diff options
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/ftrace.c | 326 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 7 | ||||
-rw-r--r-- | kernel/trace/trace.c | 456 | ||||
-rw-r--r-- | kernel/trace/trace.h | 5 | ||||
-rw-r--r-- | kernel/trace/trace_event_perf.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 321 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 17 | ||||
-rw-r--r-- | kernel/trace/trace_export.c | 7 | ||||
-rw-r--r-- | kernel/trace/trace_irqsoff.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 42 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 21 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 55 |
12 files changed, 941 insertions, 322 deletions
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6c508ff33c6..4b93b841225 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -85,6 +85,8 @@ int function_trace_stop __read_mostly; /* Current function tracing op */ struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end; +/* What to set function_trace_op to */ +static struct ftrace_ops *set_function_trace_op; /* List for set_ftrace_pid's pids. */ LIST_HEAD(ftrace_pids); @@ -278,6 +280,29 @@ static void update_global_ops(void) global_ops.func = func; } +static void ftrace_sync(struct work_struct *work) +{ + /* + * This function is just a stub to implement a hard force + * of synchronize_sched(). This requires synchronizing + * tasks even in userspace and idle. + * + * Yes, function tracing is rude. + */ +} + +static void ftrace_sync_ipi(void *data) +{ + /* Probably not needed, but do it anyway */ + smp_rmb(); +} + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static void update_function_graph_func(void); +#else +static inline void update_function_graph_func(void) { } +#endif + static void update_ftrace_function(void) { ftrace_func_t func; @@ -296,16 +321,61 @@ static void update_ftrace_function(void) !FTRACE_FORCE_LIST_FUNC)) { /* Set the ftrace_ops that the arch callback uses */ if (ftrace_ops_list == &global_ops) - function_trace_op = ftrace_global_list; + set_function_trace_op = ftrace_global_list; else - function_trace_op = ftrace_ops_list; + set_function_trace_op = ftrace_ops_list; func = ftrace_ops_list->func; } else { /* Just use the default ftrace_ops */ - function_trace_op = &ftrace_list_end; + set_function_trace_op = &ftrace_list_end; func = ftrace_ops_list_func; } + /* If there's no change, then do nothing more here */ + if (ftrace_trace_function == func) + return; + + update_function_graph_func(); + + /* + * If we are using the list function, it doesn't care + * about the function_trace_ops. + */ + if (func == ftrace_ops_list_func) { + ftrace_trace_function = func; + /* + * Don't even bother setting function_trace_ops, + * it would be racy to do so anyway. + */ + return; + } + +#ifndef CONFIG_DYNAMIC_FTRACE + /* + * For static tracing, we need to be a bit more careful. + * The function change takes affect immediately. Thus, + * we need to coorditate the setting of the function_trace_ops + * with the setting of the ftrace_trace_function. + * + * Set the function to the list ops, which will call the + * function we want, albeit indirectly, but it handles the + * ftrace_ops and doesn't depend on function_trace_op. + */ + ftrace_trace_function = ftrace_ops_list_func; + /* + * Make sure all CPUs see this. Yes this is slow, but static + * tracing is slow and nasty to have enabled. + */ + schedule_on_each_cpu(ftrace_sync); + /* Now all cpus are using the list ops. */ + function_trace_op = set_function_trace_op; + /* Make sure the function_trace_op is visible on all CPUs */ + smp_wmb(); + /* Nasty way to force a rmb on all cpus */ + smp_call_function(ftrace_sync_ipi, NULL, 1); + /* OK, we are all set to update the ftrace_trace_function now! */ +#endif /* !CONFIG_DYNAMIC_FTRACE */ + ftrace_trace_function = func; } @@ -367,9 +437,6 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list, static int __register_ftrace_function(struct ftrace_ops *ops) { - if (unlikely(ftrace_disabled)) - return -ENODEV; - if (FTRACE_WARN_ON(ops == &global_ops)) return -EINVAL; @@ -417,9 +484,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) { int ret; - if (ftrace_disabled) - return -ENODEV; - if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED))) return -EBUSY; @@ -434,16 +498,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) } else if (ops->flags & FTRACE_OPS_FL_CONTROL) { ret = remove_ftrace_list_ops(&ftrace_control_list, &control_ops, ops); - if (!ret) { - /* - * The ftrace_ops is now removed from the list, - * so there'll be no new users. We must ensure - * all current users are done before we free - * the control data. - */ - synchronize_sched(); - control_ops_free(ops); - } } else ret = remove_ftrace_ops(&ftrace_ops_list, ops); @@ -453,13 +507,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) if (ftrace_enabled) update_ftrace_function(); - /* - * Dynamic ops may be freed, we must make sure that all - * callers are done before leaving this function. - */ - if (ops->flags & FTRACE_OPS_FL_DYNAMIC) - synchronize_sched(); - return 0; } @@ -756,7 +803,7 @@ static int ftrace_profile_init(void) int cpu; int ret = 0; - for_each_online_cpu(cpu) { + for_each_possible_cpu(cpu) { ret = ftrace_profile_init_cpu(cpu); if (ret) break; @@ -1416,12 +1463,22 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, * the hashes are freed with call_rcu_sched(). */ static int -ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) +ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) { struct ftrace_hash *filter_hash; struct ftrace_hash *notrace_hash; int ret; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + /* + * There's a small race when adding ops that the ftrace handler + * that wants regs, may be called without them. We can not + * allow that handler to be called if regs is NULL. + */ + if (regs == NULL && (ops->flags & FTRACE_OPS_FL_SAVE_REGS)) + return 0; +#endif + filter_hash = rcu_dereference_raw_notrace(ops->filter_hash); notrace_hash = rcu_dereference_raw_notrace(ops->notrace_hash); @@ -1948,8 +2005,14 @@ void ftrace_modify_all_code(int command) else if (command & FTRACE_DISABLE_CALLS) ftrace_replace_code(0); - if (command & FTRACE_UPDATE_TRACE_FUNC) + if (command & FTRACE_UPDATE_TRACE_FUNC) { + function_trace_op = set_function_trace_op; + smp_wmb(); + /* If irqs are disabled, we are in stop machine */ + if (!irqs_disabled()) + smp_call_function(ftrace_sync_ipi, NULL, 1); ftrace_update_ftrace_func(ftrace_trace_function); + } if (command & FTRACE_START_FUNC_RET) ftrace_enable_ftrace_graph_caller(); @@ -2038,10 +2101,15 @@ static void ftrace_startup_enable(int command) static int ftrace_startup(struct ftrace_ops *ops, int command) { bool hash_enable = true; + int ret; if (unlikely(ftrace_disabled)) return -ENODEV; + ret = __register_ftrace_function(ops); + if (ret) + return ret; + ftrace_start_up++; command |= FTRACE_UPDATE_CALLS; @@ -2063,12 +2131,17 @@ static int ftrace_startup(struct ftrace_ops *ops, int command) return 0; } -static void ftrace_shutdown(struct ftrace_ops *ops, int command) +static int ftrace_shutdown(struct ftrace_ops *ops, int command) { bool hash_disable = true; + int ret; if (unlikely(ftrace_disabled)) - return; + return -ENODEV; + + ret = __unregister_ftrace_function(ops); + if (ret) + return ret; ftrace_start_up--; /* @@ -2102,10 +2175,42 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command) command |= FTRACE_UPDATE_TRACE_FUNC; } - if (!command || !ftrace_enabled) - return; + if (!command || !ftrace_enabled) { + /* + * If these are control ops, they still need their + * per_cpu field freed. Since, function tracing is + * not currently active, we can just free them + * without synchronizing all CPUs. + */ + if (ops->flags & FTRACE_OPS_FL_CONTROL) + control_ops_free(ops); + return 0; + } ftrace_run_update_code(command); + + /* + * Dynamic ops may be freed, we must make sure that all + * callers are done before leaving this function. + * The same goes for freeing the per_cpu data of the control + * ops. + * + * Again, normal synchronize_sched() is not good enough. + * We need to do a hard force of sched synchronization. + * This is because we use preempt_disable() to do RCU, but + * the function tracers can be called where RCU is not watching + * (like before user_exit()). We can not rely on the RCU + * infrastructure to do the synchronization, thus we must do it + * ourselves. + */ + if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_CONTROL)) { + schedule_on_each_cpu(ftrace_sync); + + if (ops->flags & FTRACE_OPS_FL_CONTROL) + control_ops_free(ops); + } + + return 0; } static void ftrace_startup_sysctl(void) @@ -2134,12 +2239,57 @@ static cycle_t ftrace_update_time; static unsigned long ftrace_update_cnt; unsigned long ftrace_update_tot_cnt; -static int ops_traces_mod(struct ftrace_ops *ops) +static inline int ops_traces_mod(struct ftrace_ops *ops) { - struct ftrace_hash *hash; + /* + * Filter_hash being empty will default to trace module. + * But notrace hash requires a test of individual module functions. + */ + return ftrace_hash_empty(ops->filter_hash) && + ftrace_hash_empty(ops->notrace_hash); +} - hash = ops->filter_hash; - return ftrace_hash_empty(hash); +/* + * Check if the current ops references the record. + * + * If the ops traces all functions, then it was already accounted for. + * If the ops does not trace the current record function, skip it. + * If the ops ignores the function via notrace filter, skip it. + */ +static inline bool +ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) +{ + /* If ops isn't enabled, ignore it */ + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) + return 0; + + /* If ops traces all mods, we already accounted for it */ + if (ops_traces_mod(ops)) + return 0; + + /* The function must be in the filter */ + if (!ftrace_hash_empty(ops->filter_hash) && + !ftrace_lookup_ip(ops->filter_hash, rec->ip)) + return 0; + + /* If in notrace hash, we ignore it too */ + if (ftrace_lookup_ip(ops->notrace_hash, rec->ip)) + return 0; + + return 1; +} + +static int referenced_filters(struct dyn_ftrace *rec) +{ + struct ftrace_ops *ops; + int cnt = 0; + + for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) { + if (ops_references_rec(ops, rec)) + cnt++; + } + + return cnt; } static int ftrace_update_code(struct module *mod) @@ -2148,6 +2298,7 @@ static int ftrace_update_code(struct module *mod) struct dyn_ftrace *p; cycle_t start, stop; unsigned long ref = 0; + bool test = false; int i; /* @@ -2161,9 +2312,12 @@ static int ftrace_update_code(struct module *mod) for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) { - if (ops->flags & FTRACE_OPS_FL_ENABLED && - ops_traces_mod(ops)) - ref++; + if (ops->flags & FTRACE_OPS_FL_ENABLED) { + if (ops_traces_mod(ops)) + ref++; + else + test = true; + } } } @@ -2173,12 +2327,16 @@ static int ftrace_update_code(struct module *mod) for (pg = ftrace_new_pgs; pg; pg = pg->next) { for (i = 0; i < pg->index; i++) { + int cnt = ref; + /* If something went wrong, bail without enabling anything */ if (unlikely(ftrace_disabled)) return -1; p = &pg->records[i]; - p->flags = ref; + if (test) + cnt += referenced_filters(p); + p->flags = cnt; /* * Do the initial record conversion from mcount jump @@ -2198,7 +2356,7 @@ static int ftrace_update_code(struct module *mod) * conversion puts the module to the correct state, thus * passing the ftrace_make_call check. */ - if (ftrace_start_up && ref) { + if (ftrace_start_up && cnt) { int failed = __ftrace_replace_code(p, 1); if (failed) ftrace_bug(failed, p->ip); @@ -2957,16 +3115,13 @@ static void __enable_ftrace_function_probe(void) if (i == FTRACE_FUNC_HASHSIZE) return; - ret = __register_ftrace_function(&trace_probe_ops); - if (!ret) - ret = ftrace_startup(&trace_probe_ops, 0); + ret = ftrace_startup(&trace_probe_ops, 0); ftrace_probe_registered = 1; } static void __disable_ftrace_function_probe(void) { - int ret; int i; if (!ftrace_probe_registered) @@ -2979,9 +3134,7 @@ static void __disable_ftrace_function_probe(void) } /* no more funcs left */ - ret = __unregister_ftrace_function(&trace_probe_ops); - if (!ret) - ftrace_shutdown(&trace_probe_ops, 0); + ftrace_shutdown(&trace_probe_ops, 0); ftrace_probe_registered = 0; } @@ -4178,17 +4331,20 @@ core_initcall(ftrace_nodyn_init); static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } static inline void ftrace_startup_enable(int command) { } /* Keep as macros so we do not need to define the commands */ -# define ftrace_startup(ops, command) \ - ({ \ - (ops)->flags |= FTRACE_OPS_FL_ENABLED; \ - 0; \ +# define ftrace_startup(ops, command) \ + ({ \ + int ___ret = __register_ftrace_function(ops); \ + if (!___ret) \ + (ops)->flags |= FTRACE_OPS_FL_ENABLED; \ + ___ret; \ }) -# define ftrace_shutdown(ops, command) do { } while (0) +# define ftrace_shutdown(ops, command) __unregister_ftrace_function(ops) + # define ftrace_startup_sysctl() do { } while (0) # define ftrace_shutdown_sysctl() do { } while (0) static inline int -ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) +ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) { return 1; } @@ -4211,7 +4367,7 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, do_for_each_ftrace_op(op, ftrace_control_list) { if (!(op->flags & FTRACE_OPS_FL_STUB) && !ftrace_function_local_disabled(op) && - ftrace_ops_test(op, ip)) + ftrace_ops_test(op, ip, regs)) op->func(ip, parent_ip, op, regs); } while_for_each_ftrace_op(op); trace_recursion_clear(TRACE_CONTROL_BIT); @@ -4244,7 +4400,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, */ preempt_disable_notrace(); do_for_each_ftrace_op(op, ftrace_ops_list) { - if (ftrace_ops_test(op, ip)) + if (ftrace_ops_test(op, ip, regs)) op->func(ip, parent_ip, op, regs); } while_for_each_ftrace_op(op); preempt_enable_notrace(); @@ -4583,9 +4739,7 @@ int register_ftrace_function(struct ftrace_ops *ops) mutex_lock(&ftrace_lock); - ret = __register_ftrace_function(ops); - if (!ret) - ret = ftrace_startup(ops, 0); + ret = ftrace_startup(ops, 0); mutex_unlock(&ftrace_lock); @@ -4604,9 +4758,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) int ret; mutex_lock(&ftrace_lock); - ret = __unregister_ftrace_function(ops); - if (!ret) - ftrace_shutdown(ops, 0); + ret = ftrace_shutdown(ops, 0); mutex_unlock(&ftrace_lock); return ret; @@ -4666,6 +4818,7 @@ int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) trace_func_graph_ret_t ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub; +static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub; /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) @@ -4800,6 +4953,37 @@ ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state, return NOTIFY_DONE; } +/* Just a place holder for function graph */ +static struct ftrace_ops fgraph_ops __read_mostly = { + .func = ftrace_stub, + .flags = FTRACE_OPS_FL_STUB | FTRACE_OPS_FL_GLOBAL | + FTRACE_OPS_FL_RECURSION_SAFE, +}; + +static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace) +{ + if (!ftrace_ops_test(&global_ops, trace->func, NULL)) + return 0; + return __ftrace_graph_entry(trace); +} + +/* + * The function graph tracer should only trace the functions defined + * by set_ftrace_filter and set_ftrace_notrace. If another function + * tracer ops is registered, the graph tracer requires testing the + * function against the global ops, and not just trace any function + * that any ftrace_ops registered. + */ +static void update_function_graph_func(void) +{ + if (ftrace_ops_list == &ftrace_list_end || + (ftrace_ops_list == &global_ops && + global_ops.next == &ftrace_list_end)) + ftrace_graph_entry = __ftrace_graph_entry; + else + ftrace_graph_entry = ftrace_graph_entry_test; +} + int register_ftrace_graph(trace_func_graph_ret_t retfunc, trace_func_graph_ent_t entryfunc) { @@ -4824,9 +5008,18 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, } ftrace_graph_return = retfunc; - ftrace_graph_entry = entryfunc; - ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET); + /* + * Update the indirect function to the entryfunc, and the + * function that gets called to the entry_test first. Then + * call the update fgraph entry function to determine if + * the entryfunc should be called directly or not. + */ + __ftrace_graph_entry = entryfunc; + ftrace_graph_entry = ftrace_graph_entry_test; + update_function_graph_func(); + + ret = ftrace_startup(&fgraph_ops, FTRACE_START_FUNC_RET); out: mutex_unlock(&ftrace_lock); @@ -4843,7 +5036,8 @@ void unregister_ftrace_graph(void) ftrace_graph_active--; ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; ftrace_graph_entry = ftrace_graph_entry_stub; - ftrace_shutdown(&global_ops, FTRACE_STOP_FUNC_RET); + __ftrace_graph_entry = ftrace_graph_entry_stub; + ftrace_shutdown(&fgraph_ops, FTRACE_STOP_FUNC_RET); unregister_pm_notifier(&ftrace_suspend_notifier); unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index e444ff88f0a..fd12cc56371 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2396,6 +2396,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, write &= RB_WRITE_MASK; tail = write - length; + /* + * If this is the first commit on the page, then it has the same + * timestamp as the page itself. + */ + if (!tail) + delta = 0; + /* See if we shot pass the end of this buffer page */ if (unlikely(write > BUF_PAGE_SIZE)) return rb_move_tail(cpu_buffer, length, tail, diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e71a8be4a6e..6dbdf277c8f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -193,6 +193,37 @@ static struct trace_array global_trace; LIST_HEAD(ftrace_trace_arrays); +int trace_array_get(struct trace_array *this_tr) +{ + struct trace_array *tr; + int ret = -ENODEV; + + mutex_lock(&trace_types_lock); + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (tr == this_tr) { + tr->ref++; + ret = 0; + break; + } + } + mutex_unlock(&trace_types_lock); + + return ret; +} + +static void __trace_array_put(struct trace_array *this_tr) +{ + WARN_ON(!this_tr->ref); + this_tr->ref--; +} + +void trace_array_put(struct trace_array *this_tr) +{ + mutex_lock(&trace_types_lock); + __trace_array_put(this_tr); + mutex_unlock(&trace_types_lock); +} + int filter_current_check_discard(struct ring_buffer *buffer, struct ftrace_event_call *call, void *rec, struct ring_buffer_event *event) @@ -201,23 +232,43 @@ int filter_current_check_discard(struct ring_buffer *buffer, } EXPORT_SYMBOL_GPL(filter_current_check_discard); -cycle_t ftrace_now(int cpu) +cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu) { u64 ts; /* Early boot up does not have a buffer yet */ - if (!global_trace.trace_buffer.buffer) + if (!buf->buffer) return trace_clock_local(); - ts = ring_buffer_time_stamp(global_trace.trace_buffer.buffer, cpu); - ring_buffer_normalize_time_stamp(global_trace.trace_buffer.buffer, cpu, &ts); + ts = ring_buffer_time_stamp(buf->buffer, cpu); + ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts); return ts; } +cycle_t ftrace_now(int cpu) +{ + return buffer_ftrace_now(&global_trace.trace_buffer, cpu); +} + +/** + * tracing_is_enabled - Show if global_trace has been disabled + * + * Shows if the global trace has been enabled or not. It uses the + * mirror flag "buffer_disabled" to be used in fast paths such as for + * the irqsoff tracer. But it may be inaccurate due to races. If you + * need to know the accurate state, use tracing_is_on() which is a little + * slower, but accurate. + */ int tracing_is_enabled(void) { - return tracing_is_on(); + /* + * For quick access (irqsoff uses this in fast path), just + * return the mirror variable of the state of the ring buffer. + * It's a little racy, but we don't really care. + */ + smp_rmb(); + return !global_trace.buffer_disabled; } /* @@ -240,7 +291,7 @@ static struct tracer *trace_types __read_mostly; /* * trace_types_lock is used to protect the trace_types list. */ -static DEFINE_MUTEX(trace_types_lock); +DEFINE_MUTEX(trace_types_lock); /* * serialize the access of the ring buffer @@ -330,6 +381,23 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION; +void tracer_tracing_on(struct trace_array *tr) +{ + if (tr->trace_buffer.buffer) + ring_buffer_record_on(tr->trace_buffer.buffer); + /* + * This flag is looked at when buffers haven't been allocated + * yet, or by some tracers (like irqsoff), that just want to + * know if the ring buffer has been disabled, but it can handle + * races of where it gets disabled but we still do a record. + * As the check is in the fast path of the tracers, it is more + * important to be fast than accurate. + */ + tr->buffer_disabled = 0; + /* Make the flag seen by readers */ + smp_wmb(); +} + /** * tracing_on - enable tracing buffers * @@ -338,15 +406,7 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | */ void tracing_on(void) { - if (global_trace.trace_buffer.buffer) - ring_buffer_record_on(global_trace.trace_buffer.buffer); - /* - * This flag is only looked at when buffers haven't been - * allocated yet. We don't really care about the race - * between setting this flag and actually turning - * on the buffer. - */ - global_trace.buffer_disabled = 0; + tracer_tracing_on(&global_trace); } EXPORT_SYMBOL_GPL(tracing_on); @@ -364,6 +424,9 @@ int __trace_puts(unsigned long ip, const char *str, int size) unsigned long irq_flags; int alloc; + if (unlikely(tracing_selftest_running || tracing_disabled)) + return 0; + alloc = sizeof(*entry) + size + 2; /* possible \n added */ local_save_flags(irq_flags); @@ -404,6 +467,9 @@ int __trace_bputs(unsigned long ip, const char *str) unsigned long irq_flags; int size = sizeof(struct bputs_entry); + if (unlikely(tracing_selftest_running || tracing_disabled)) + return 0; + local_save_flags(irq_flags); buffer = global_trace.trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, @@ -540,6 +606,23 @@ void tracing_snapshot_alloc(void) EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); #endif /* CONFIG_TRACER_SNAPSHOT */ +void tracer_tracing_off(struct trace_array *tr) +{ + if (tr->trace_buffer.buffer) + ring_buffer_record_off(tr->trace_buffer.buffer); + /* + * This flag is looked at when buffers haven't been allocated + * yet, or by some tracers (like irqsoff), that just want to + * know if the ring buffer has been disabled, but it can handle + * races of where it gets disabled but we still do a record. + * As the check is in the fast path of the tracers, it is more + * important to be fast than accurate. + */ + tr->buffer_disabled = 1; + /* Make the flag seen by readers */ + smp_wmb(); +} + /** * tracing_off - turn off tracing buffers * @@ -550,26 +633,29 @@ EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); */ void tracing_off(void) { - if (global_trace.trace_buffer.buffer) - ring_buffer_record_off(global_trace.trace_buffer.buffer); - /* - * This flag is only looked at when buffers haven't been - * allocated yet. We don't really care about the race - * between setting this flag and actually turning - * on the buffer. - */ - global_trace.buffer_disabled = 1; + tracer_tracing_off(&global_trace); } EXPORT_SYMBOL_GPL(tracing_off); /** + * tracer_tracing_is_on - show real state of ring buffer enabled + * @tr : the trace array to know if ring buffer is enabled + * + * Shows real state of the ring buffer if it is enabled or not. + */ +int tracer_tracing_is_on(struct trace_array *tr) +{ + if (tr->trace_buffer.buffer) + return ring_buffer_record_is_on(tr->trace_buffer.buffer); + return !tr->buffer_disabled; +} + +/** * tracing_is_on - show state of ring buffers enabled */ int tracing_is_on(void) { - if (global_trace.trace_buffer.buffer) - return ring_buffer_record_is_on(global_trace.trace_buffer.buffer); - return !global_trace.buffer_disabled; + return tracer_tracing_is_on(&global_trace); } EXPORT_SYMBOL_GPL(tracing_is_on); @@ -746,9 +832,12 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, if (isspace(ch)) { parser->buffer[parser->idx] = 0; parser->cont = false; - } else { + } else if (parser->idx < parser->size - 1) { parser->cont = true; parser->buffer[parser->idx++] = ch; + } else { + ret = -EINVAL; + goto out; } *ppos += read; @@ -1119,7 +1208,7 @@ void tracing_reset_online_cpus(struct trace_buffer *buf) /* Make sure all commits have finished */ synchronize_sched(); - buf->time_start = ftrace_now(buf->cpu); + buf->time_start = buffer_ftrace_now(buf, buf->cpu); for_each_online_cpu(cpu) ring_buffer_reset_cpu(buffer, cpu); @@ -1127,23 +1216,17 @@ void tracing_reset_online_cpus(struct trace_buffer *buf) ring_buffer_record_enable(buffer); } -void tracing_reset_current(int cpu) -{ - tracing_reset(&global_trace.trace_buffer, cpu); -} - +/* Must have trace_types_lock held */ void tracing_reset_all_online_cpus(void) { struct trace_array *tr; - mutex_lock(&trace_types_lock); list_for_each_entry(tr, &ftrace_trace_arrays, list) { tracing_reset_online_cpus(&tr->trace_buffer); #ifdef CONFIG_TRACER_MAX_TRACE tracing_reset_online_cpus(&tr->max_buffer); #endif } - mutex_unlock(&trace_types_lock); } #define SAVED_CMDLINES 128 @@ -2760,6 +2843,17 @@ static int s_show(struct seq_file *m, void *v) return 0; } +/* + * Should be used after trace_array_get(), trace_types_lock + * ensures that i_cdev was already initialized. + */ +static inline int tracing_get_cpu(struct inode *inode) +{ + if (inode->i_cdev) /* See trace_create_cpu_file() */ + return (long)inode->i_cdev - 1; + return RING_BUFFER_ALL_CPUS; +} + static const struct seq_operations tracer_seq_ops = { .start = s_start, .next = s_next, @@ -2770,8 +2864,7 @@ static const struct seq_operations tracer_seq_ops = { static struct trace_iterator * __tracing_open(struct inode *inode, struct file *file, bool snapshot) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; + struct trace_array *tr = inode->i_private; struct trace_iterator *iter; int cpu; @@ -2812,8 +2905,8 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) iter->trace_buffer = &tr->trace_buffer; iter->snapshot = snapshot; iter->pos = -1; + iter->cpu_file = tracing_get_cpu(inode); mutex_init(&iter->mutex); - iter->cpu_file = tc->cpu; /* Notify the tracer early; before we stop tracing. */ if (iter->trace && iter->trace->open) @@ -2850,8 +2943,6 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) tracing_iter_reset(iter, cpu); } - tr->ref++; - mutex_unlock(&trace_types_lock); return iter; @@ -2874,24 +2965,41 @@ int tracing_open_generic(struct inode *inode, struct file *filp) return 0; } +/* + * Open and update trace_array ref count. + * Must have the current trace_array passed to it. + */ +int tracing_open_generic_tr(struct inode *inode, struct file *filp) +{ + struct trace_array *tr = inode->i_private; + + if (tracing_disabled) + return -ENODEV; + + if (trace_array_get(tr) < 0) + return -ENODEV; + + filp->private_data = inode->i_private; + + return 0; +} + static int tracing_release(struct inode *inode, struct file *file) { + struct trace_array *tr = inode->i_private; struct seq_file *m = file->private_data; struct trace_iterator *iter; - struct trace_array *tr; int cpu; - if (!(file->f_mode & FMODE_READ)) + if (!(file->f_mode & FMODE_READ)) { + trace_array_put(tr); return 0; + } + /* Writes do not use seq_file */ iter = m->private; - tr = iter->tr; - mutex_lock(&trace_types_lock); - WARN_ON(!tr->ref); - tr->ref--; - for_each_tracing_cpu(cpu) { if (iter->buffer_iter[cpu]) ring_buffer_read_finish(iter->buffer_iter[cpu]); @@ -2903,6 +3011,9 @@ static int tracing_release(struct inode *inode, struct file *file) if (!iter->snapshot) /* reenable tracing if it was previously enabled */ tracing_start_tr(tr); + + __trace_array_put(tr); + mutex_unlock(&trace_types_lock); mutex_destroy(&iter->mutex); @@ -2910,24 +3021,44 @@ static int tracing_release(struct inode *inode, struct file *file) kfree(iter->trace); kfree(iter->buffer_iter); seq_release_private(inode, file); + return 0; } +static int tracing_release_generic_tr(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); + return 0; +} + +static int tracing_single_release_tr(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); + + return single_release(inode, file); +} + static int tracing_open(struct inode *inode, struct file *file) { + struct trace_array *tr = inode->i_private; struct trace_iterator *iter; int ret = 0; + if (trace_array_get(tr) < 0) + return -ENODEV; + /* If this file was open for write, then erase contents */ - if ((file->f_mode & FMODE_WRITE) && - (file->f_flags & O_TRUNC)) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; + if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { + int cpu = tracing_get_cpu(inode); - if (tc->cpu == RING_BUFFER_ALL_CPUS) + if (cpu == RING_BUFFER_ALL_CPUS) tracing_reset_online_cpus(&tr->trace_buffer); else - tracing_reset(&tr->trace_buffer, tc->cpu); + tracing_reset(&tr->trace_buffer, cpu); } if (file->f_mode & FMODE_READ) { @@ -2937,6 +3068,10 @@ static int tracing_open(struct inode *inode, struct file *file) else if (trace_flags & TRACE_ITER_LATENCY_FMT) iter->iter_flags |= TRACE_FILE_LAT_FMT; } + + if (ret < 0) + trace_array_put(tr); + return ret; } @@ -3293,17 +3428,27 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, static int tracing_trace_options_open(struct inode *inode, struct file *file) { + struct trace_array *tr = inode->i_private; + int ret; + if (tracing_disabled) return -ENODEV; - return single_open(file, tracing_trace_options_show, inode->i_private); + if (trace_array_get(tr) < 0) + return -ENODEV; + + ret = single_open(file, tracing_trace_options_show, inode->i_private); + if (ret < 0) + trace_array_put(tr); + + return ret; } static const struct file_operations tracing_iter_fops = { .open = tracing_trace_options_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = tracing_single_release_tr, .write = tracing_trace_options_write, }; @@ -3783,20 +3928,23 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, static int tracing_open_pipe(struct inode *inode, struct file *filp) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; + struct trace_array *tr = inode->i_private; struct trace_iterator *iter; int ret = 0; if (tracing_disabled) return -ENODEV; + if (trace_array_get(tr) < 0) + return -ENODEV; + mutex_lock(&trace_types_lock); /* create a buffer to store the information to pass to userspace */ iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) { ret = -ENOMEM; + __trace_array_put(tr); goto out; } @@ -3826,9 +3974,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) if (trace_clocks[tr->clock_id].in_ns) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; - iter->cpu_file = tc->cpu; - iter->tr = tc->tr; - iter->trace_buffer = &tc->tr->trace_buffer; + iter->tr = tr; + iter->trace_buffer = &tr->trace_buffer; + iter->cpu_file = tracing_get_cpu(inode); mutex_init(&iter->mutex); filp->private_data = iter; @@ -3843,6 +3991,7 @@ out: fail: kfree(iter->trace); kfree(iter); + __trace_array_put(tr); mutex_unlock(&trace_types_lock); return ret; } @@ -3850,6 +3999,7 @@ fail: static int tracing_release_pipe(struct inode *inode, struct file *file) { struct trace_iterator *iter = file->private_data; + struct trace_array *tr = inode->i_private; mutex_lock(&trace_types_lock); @@ -3863,6 +4013,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) kfree(iter->trace); kfree(iter); + trace_array_put(tr); + return 0; } @@ -3939,7 +4091,7 @@ static int tracing_wait_pipe(struct file *filp) * * iter->pos will be 0 if we haven't read anything. */ - if (!tracing_is_enabled() && iter->pos) + if (!tracing_is_on() && iter->pos) break; } @@ -4000,6 +4152,7 @@ waitagain: memset(&iter->seq, 0, sizeof(struct trace_iterator) - offsetof(struct trace_iterator, seq)); + cpumask_clear(iter->started); iter->pos = -1; trace_event_read_lock(); @@ -4200,15 +4353,16 @@ static ssize_t tracing_entries_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct trace_cpu *tc = filp->private_data; - struct trace_array *tr = tc->tr; + struct inode *inode = file_inode(filp); + struct trace_array *tr = inode->i_private; + int cpu = tracing_get_cpu(inode); char buf[64]; int r = 0; ssize_t ret; mutex_lock(&trace_types_lock); - if (tc->cpu == RING_BUFFER_ALL_CPUS) { + if (cpu == RING_BUFFER_ALL_CPUS) { int cpu, buf_size_same; unsigned long size; @@ -4235,7 +4389,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf, } else r = sprintf(buf, "X\n"); } else - r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, tc->cpu)->entries >> 10); + r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10); mutex_unlock(&trace_types_lock); @@ -4247,7 +4401,8 @@ static ssize_t tracing_entries_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct trace_cpu *tc = filp->private_data; + struct inode *inode = file_inode(filp); + struct trace_array *tr = inode->i_private; unsigned long val; int ret; @@ -4261,8 +4416,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, /* value is in KB */ val <<= 10; - - ret = tracing_resize_ring_buffer(tc->tr, val, tc->cpu); + ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode)); if (ret < 0) return ret; @@ -4316,10 +4470,12 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp) /* disable tracing ? */ if (trace_flags & TRACE_ITER_STOP_ON_FREE) - tracing_off(); + tracer_tracing_off(tr); /* resize the ring buffer to 0 */ tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); + trace_array_put(tr); + return 0; } @@ -4328,6 +4484,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) { unsigned long addr = (unsigned long)ubuf; + struct trace_array *tr = filp->private_data; struct ring_buffer_event *event; struct ring_buffer *buffer; struct print_entry *entry; @@ -4387,7 +4544,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, local_save_flags(irq_flags); size = sizeof(*entry) + cnt + 2; /* possible \n added */ - buffer = global_trace.trace_buffer.buffer; + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, irq_flags, preempt_count()); if (!event) { @@ -4478,12 +4635,12 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, * New clock may not be consistent with the previous clock. * Reset the buffer so that it doesn't have incomparable timestamps. */ - tracing_reset_online_cpus(&global_trace.trace_buffer); + tracing_reset_online_cpus(&tr->trace_buffer); #ifdef CONFIG_TRACER_MAX_TRACE if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer) ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func); - tracing_reset_online_cpus(&global_trace.max_buffer); + tracing_reset_online_cpus(&tr->max_buffer); #endif mutex_unlock(&trace_types_lock); @@ -4495,10 +4652,20 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, static int tracing_clock_open(struct inode *inode, struct file *file) { + struct trace_array *tr = inode->i_private; + int ret; + if (tracing_disabled) return -ENODEV; - return single_open(file, tracing_clock_show, inode->i_private); + if (trace_array_get(tr)) + return -ENODEV; + + ret = single_open(file, tracing_clock_show, inode->i_private); + if (ret < 0) + trace_array_put(tr); + + return ret; } struct ftrace_buffer_info { @@ -4510,31 +4677,40 @@ struct ftrace_buffer_info { #ifdef CONFIG_TRACER_SNAPSHOT static int tracing_snapshot_open(struct inode *inode, struct file *file) { - struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = inode->i_private; struct trace_iterator *iter; struct seq_file *m; int ret = 0; + if (trace_array_get(tr) < 0) + return -ENODEV; + if (file->f_mode & FMODE_READ) { iter = __tracing_open(inode, file, true); if (IS_ERR(iter)) ret = PTR_ERR(iter); } else { /* Writes still need the seq_file to hold the private data */ + ret = -ENOMEM; m = kzalloc(sizeof(*m), GFP_KERNEL); if (!m) - return -ENOMEM; + goto out; iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) { kfree(m); - return -ENOMEM; + goto out; } - iter->tr = tc->tr; - iter->trace_buffer = &tc->tr->max_buffer; - iter->cpu_file = tc->cpu; + ret = 0; + + iter->tr = tr; + iter->trace_buffer = &tr->max_buffer; + iter->cpu_file = tracing_get_cpu(inode); m->private = iter; file->private_data = m; } +out: + if (ret < 0) + trace_array_put(tr); return ret; } @@ -4616,9 +4792,12 @@ out: static int tracing_snapshot_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; + int ret; + + ret = tracing_release(inode, file); if (file->f_mode & FMODE_READ) - return tracing_release(inode, file); + return ret; /* If write only, the seq_file is just a stub */ if (m) @@ -4684,34 +4863,38 @@ static const struct file_operations tracing_pipe_fops = { }; static const struct file_operations tracing_entries_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .read = tracing_entries_read, .write = tracing_entries_write, .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, }; static const struct file_operations tracing_total_entries_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .read = tracing_total_entries_read, .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, }; static const struct file_operations tracing_free_buffer_fops = { + .open = tracing_open_generic_tr, .write = tracing_free_buffer_write, .release = tracing_free_buffer_release, }; static const struct file_operations tracing_mark_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .write = tracing_mark_write, .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, }; static const struct file_operations trace_clock_fops = { .open = tracing_clock_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = tracing_single_release_tr, .write = tracing_clock_write, }; @@ -4736,23 +4919,26 @@ static const struct file_operations snapshot_raw_fops = { static int tracing_buffers_open(struct inode *inode, struct file *filp) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; + struct trace_array *tr = inode->i_private; struct ftrace_buffer_info *info; + int ret; if (tracing_disabled) return -ENODEV; + if (trace_array_get(tr) < 0) + return -ENODEV; + info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) + if (!info) { + trace_array_put(tr); return -ENOMEM; + } mutex_lock(&trace_types_lock); - tr->ref++; - info->iter.tr = tr; - info->iter.cpu_file = tc->cpu; + info->iter.cpu_file = tracing_get_cpu(inode); info->iter.trace = tr->current_trace; info->iter.trace_buffer = &tr->trace_buffer; info->spare = NULL; @@ -4763,7 +4949,11 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) mutex_unlock(&trace_types_lock); - return nonseekable_open(inode, filp); + ret = nonseekable_open(inode, filp); + if (ret < 0) + trace_array_put(tr); + + return ret; } static unsigned int @@ -4863,8 +5053,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) mutex_lock(&trace_types_lock); - WARN_ON(!iter->tr->ref); - iter->tr->ref--; + __trace_array_put(iter->tr); if (info->spare) ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare); @@ -5066,14 +5255,14 @@ static ssize_t tracing_stats_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { - struct trace_cpu *tc = filp->private_data; - struct trace_array *tr = tc->tr; + struct inode *inode = file_inode(filp); + struct trace_array *tr = inode->i_private; struct trace_buffer *trace_buf = &tr->trace_buffer; + int cpu = tracing_get_cpu(inode); struct trace_seq *s; unsigned long cnt; unsigned long long t; unsigned long usec_rem; - int cpu = tc->cpu; s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) @@ -5126,9 +5315,10 @@ tracing_stats_read(struct file *filp, char __user *ubuf, } static const struct file_operations tracing_stats_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .read = tracing_stats_read, .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, }; #ifdef CONFIG_DYNAMIC_FTRACE @@ -5317,10 +5507,20 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) return tr->percpu_dir; } +static struct dentry * +trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent, + void *data, long cpu, const struct file_operations *fops) +{ + struct dentry *ret = trace_create_file(name, mode, parent, data, fops); + + if (ret) /* See tracing_get_cpu() */ + ret->d_inode->i_cdev = (void *)(cpu + 1); + return ret; +} + static void tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) { - struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, cpu); struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu); struct dentry *d_cpu; char cpu_dir[30]; /* 30 characters should be more than enough */ @@ -5336,28 +5536,28 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) } /* per cpu trace_pipe */ - trace_create_file("trace_pipe", 0444, d_cpu, - (void *)&data->trace_cpu, &tracing_pipe_fops); + trace_create_cpu_file("trace_pipe", 0444, d_cpu, + tr, cpu, &tracing_pipe_fops); /* per cpu trace */ - trace_create_file("trace", 0644, d_cpu, - (void *)&data->trace_cpu, &tracing_fops); + trace_create_cpu_file("trace", 0644, d_cpu, + tr, cpu, &tracing_fops); - trace_create_file("trace_pipe_raw", 0444, d_cpu, - (void *)&data->trace_cpu, &tracing_buffers_fops); + trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu, + tr, cpu, &tracing_buffers_fops); - trace_create_file("stats", 0444, d_cpu, - (void *)&data->trace_cpu, &tracing_stats_fops); + trace_create_cpu_file("stats", 0444, d_cpu, + tr, cpu, &tracing_stats_fops); - trace_create_file("buffer_size_kb", 0444, d_cpu, - (void *)&data->trace_cpu, &tracing_entries_fops); + trace_create_cpu_file("buffer_size_kb", 0444, d_cpu, + tr, cpu, &tracing_entries_fops); #ifdef CONFIG_TRACER_SNAPSHOT - trace_create_file("snapshot", 0644, d_cpu, - (void *)&data->trace_cpu, &snapshot_fops); + trace_create_cpu_file("snapshot", 0644, d_cpu, + tr, cpu, &snapshot_fops); - trace_create_file("snapshot_raw", 0444, d_cpu, - (void *)&data->trace_cpu, &snapshot_raw_fops); + trace_create_cpu_file("snapshot_raw", 0444, d_cpu, + tr, cpu, &snapshot_raw_fops); #endif } @@ -5612,15 +5812,10 @@ rb_simple_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; - struct ring_buffer *buffer = tr->trace_buffer.buffer; char buf[64]; int r; - if (buffer) - r = ring_buffer_record_is_on(buffer); - else - r = 0; - + r = tracer_tracing_is_on(tr); r = sprintf(buf, "%d\n", r); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); @@ -5642,11 +5837,11 @@ rb_simple_write(struct file *filp, const char __user *ubuf, if (buffer) { mutex_lock(&trace_types_lock); if (val) { - ring_buffer_record_on(buffer); + tracer_tracing_on(tr); if (tr->current_trace->start) tr->current_trace->start(tr); } else { - ring_buffer_record_off(buffer); + tracer_tracing_off(tr); if (tr->current_trace->stop) tr->current_trace->stop(tr); } @@ -5659,9 +5854,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf, } static const struct file_operations rb_simple_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .read = rb_simple_read, .write = rb_simple_write, + .release = tracing_release_generic_tr, .llseek = default_llseek, }; @@ -5688,6 +5884,8 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0; + buf->tr = tr; + buf->buffer = ring_buffer_alloc(size, rb_flags); if (!buf->buffer) return -ENOMEM; @@ -5775,8 +5973,10 @@ static int new_instance_create(const char *name) goto out_free_tr; ret = event_trace_add_tracer(tr->dir, tr); - if (ret) + if (ret) { + debugfs_remove_recursive(tr->dir); goto out_free_tr; + } init_tracer_debugfs(tr, tr->dir); @@ -5922,13 +6122,13 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) tr, &tracing_iter_fops); trace_create_file("trace", 0644, d_tracer, - (void *)&tr->trace_cpu, &tracing_fops); + tr, &tracing_fops); trace_create_file("trace_pipe", 0444, d_tracer, - (void *)&tr->trace_cpu, &tracing_pipe_fops); + tr, &tracing_pipe_fops); trace_create_file("buffer_size_kb", 0644, d_tracer, - (void *)&tr->trace_cpu, &tracing_entries_fops); + tr, &tracing_entries_fops); trace_create_file("buffer_total_size_kb", 0444, d_tracer, tr, &tracing_total_entries_fops); @@ -5943,11 +6143,11 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) &trace_clock_fops); trace_create_file("tracing_on", 0644, d_tracer, - tr, &rb_simple_fops); + tr, &rb_simple_fops); #ifdef CONFIG_TRACER_SNAPSHOT trace_create_file("snapshot", 0644, d_tracer, - (void *)&tr->trace_cpu, &snapshot_fops); + tr, &snapshot_fops); #endif for_each_tracing_cpu(cpu) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 20572ed88c5..51b44483eb7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -224,6 +224,11 @@ enum { extern struct list_head ftrace_trace_arrays; +extern struct mutex trace_types_lock; + +extern int trace_array_get(struct trace_array *tr); +extern void trace_array_put(struct trace_array *tr); + /* * The global tracer (top) should be the first trace array added, * but we check the flag anyway. diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 84b1e045fab..8354dc81ae6 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -26,7 +26,7 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event, { /* The ftrace function trace is allowed only for root. */ if (ftrace_event_is_function(tp_event) && - perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) + perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) return -EPERM; /* No tracing, just counting, so no obvious leak */ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 27963e2bf4b..001b349af93 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -27,12 +27,6 @@ DEFINE_MUTEX(event_mutex); -DEFINE_MUTEX(event_storage_mutex); -EXPORT_SYMBOL_GPL(event_storage_mutex); - -char event_storage[EVENT_STORAGE_SIZE]; -EXPORT_SYMBOL_GPL(event_storage); - LIST_HEAD(ftrace_events); static LIST_HEAD(ftrace_common_fields); @@ -41,6 +35,23 @@ static LIST_HEAD(ftrace_common_fields); static struct kmem_cache *field_cachep; static struct kmem_cache *file_cachep; +#define SYSTEM_FL_FREE_NAME (1 << 31) + +static inline int system_refcount(struct event_subsystem *system) +{ + return system->ref_count & ~SYSTEM_FL_FREE_NAME; +} + +static int system_refcount_inc(struct event_subsystem *system) +{ + return (system->ref_count++) & ~SYSTEM_FL_FREE_NAME; +} + +static int system_refcount_dec(struct event_subsystem *system) +{ + return (--system->ref_count) & ~SYSTEM_FL_FREE_NAME; +} + /* Double loops, do not use break, only goto's work */ #define do_for_each_event_file(tr, file) \ list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ @@ -97,7 +108,7 @@ static int __trace_define_field(struct list_head *head, const char *type, field = kmem_cache_alloc(field_cachep, GFP_TRACE); if (!field) - goto err; + return -ENOMEM; field->name = name; field->type = type; @@ -114,11 +125,6 @@ static int __trace_define_field(struct list_head *head, const char *type, list_add(&field->link, head); return 0; - -err: - kmem_cache_free(field_cachep, field); - - return -ENOMEM; } int trace_define_field(struct ftrace_event_call *call, const char *type, @@ -349,8 +355,8 @@ static void __put_system(struct event_subsystem *system) { struct event_filter *filter = system->filter; - WARN_ON_ONCE(system->ref_count == 0); - if (--system->ref_count) + WARN_ON_ONCE(system_refcount(system) == 0); + if (system_refcount_dec(system)) return; list_del(&system->list); @@ -359,13 +365,15 @@ static void __put_system(struct event_subsystem *system) kfree(filter->filter_string); kfree(filter); } + if (system->ref_count & SYSTEM_FL_FREE_NAME) + kfree(system->name); kfree(system); } static void __get_system(struct event_subsystem *system) { - WARN_ON_ONCE(system->ref_count == 0); - system->ref_count++; + WARN_ON_ONCE(system_refcount(system) == 0); + system_refcount_inc(system); } static void __get_system_dir(struct ftrace_subsystem_dir *dir) @@ -379,7 +387,7 @@ static void __put_system_dir(struct ftrace_subsystem_dir *dir) { WARN_ON_ONCE(dir->ref_count == 0); /* If the subsystem is about to be freed, the dir must be too */ - WARN_ON_ONCE(dir->subsystem->ref_count == 1 && dir->ref_count != 1); + WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1); __put_system(dir->subsystem); if (!--dir->ref_count) @@ -393,17 +401,55 @@ static void put_system(struct ftrace_subsystem_dir *dir) mutex_unlock(&event_mutex); } +static void remove_subsystem(struct ftrace_subsystem_dir *dir) +{ + if (!dir) + return; + + if (!--dir->nr_events) { + debugfs_remove_recursive(dir->entry); + list_del(&dir->list); + __put_system_dir(dir); + } +} + +static void *event_file_data(struct file *filp) +{ + return ACCESS_ONCE(file_inode(filp)->i_private); +} + +static void remove_event_file_dir(struct ftrace_event_file *file) +{ + struct dentry *dir = file->dir; + struct dentry *child; + + if (dir) { + spin_lock(&dir->d_lock); /* probably unneeded */ + list_for_each_entry(child, &dir->d_subdirs, d_u.d_child) { + if (child->d_inode) /* probably unneeded */ + child->d_inode->i_private = NULL; + } + spin_unlock(&dir->d_lock); + + debugfs_remove_recursive(dir); + } + + list_del(&file->list); + remove_subsystem(file->system); + kmem_cache_free(file_cachep, file); +} + /* * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. */ -static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, - const char *sub, const char *event, int set) +static int +__ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match, + const char *sub, const char *event, int set) { struct ftrace_event_file *file; struct ftrace_event_call *call; int ret = -EINVAL; - mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { call = file->event_call; @@ -429,6 +475,17 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, ret = 0; } + + return ret; +} + +static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, + const char *sub, const char *event, int set) +{ + int ret; + + mutex_lock(&event_mutex); + ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set); mutex_unlock(&event_mutex); return ret; @@ -623,13 +680,23 @@ static ssize_t event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_file *file = filp->private_data; + struct ftrace_event_file *file; + unsigned long flags; char *buf; - if (file->flags & FTRACE_EVENT_FL_ENABLED) { - if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED) + mutex_lock(&event_mutex); + file = event_file_data(filp); + if (likely(file)) + flags = file->flags; + mutex_unlock(&event_mutex); + + if (!file) + return -ENODEV; + + if (flags & FTRACE_EVENT_FL_ENABLED) { + if (flags & FTRACE_EVENT_FL_SOFT_DISABLED) buf = "0*\n"; - else if (file->flags & FTRACE_EVENT_FL_SOFT_MODE) + else if (flags & FTRACE_EVENT_FL_SOFT_MODE) buf = "1*\n"; else buf = "1\n"; @@ -643,13 +710,10 @@ static ssize_t event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_file *file = filp->private_data; + struct ftrace_event_file *file; unsigned long val; int ret; - if (!file) - return -EINVAL; - ret = kstrtoul_from_user(ubuf, cnt, 10, &val); if (ret) return ret; @@ -661,8 +725,11 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, switch (val) { case 0: case 1: + ret = -ENODEV; mutex_lock(&event_mutex); - ret = ftrace_event_enable_disable(file, val); + file = event_file_data(filp); + if (likely(file)) + ret = ftrace_event_enable_disable(file, val); mutex_unlock(&event_mutex); break; @@ -769,7 +836,7 @@ enum { static void *f_next(struct seq_file *m, void *v, loff_t *pos) { - struct ftrace_event_call *call = m->private; + struct ftrace_event_call *call = event_file_data(m->private); struct ftrace_event_field *field; struct list_head *common_head = &ftrace_common_fields; struct list_head *head = trace_get_fields(call); @@ -813,6 +880,11 @@ static void *f_start(struct seq_file *m, loff_t *pos) loff_t l = 0; void *p; + /* ->stop() is called even if ->start() fails */ + mutex_lock(&event_mutex); + if (!event_file_data(m->private)) + return ERR_PTR(-ENODEV); + /* Start by showing the header */ if (!*pos) return (void *)FORMAT_HEADER; @@ -827,7 +899,7 @@ static void *f_start(struct seq_file *m, loff_t *pos) static int f_show(struct seq_file *m, void *v) { - struct ftrace_event_call *call = m->private; + struct ftrace_event_call *call = event_file_data(m->private); struct ftrace_event_field *field; const char *array_descriptor; @@ -878,6 +950,7 @@ static int f_show(struct seq_file *m, void *v) static void f_stop(struct seq_file *m, void *p) { + mutex_unlock(&event_mutex); } static const struct seq_operations trace_format_seq_ops = { @@ -889,7 +962,6 @@ static const struct seq_operations trace_format_seq_ops = { static int trace_format_open(struct inode *inode, struct file *file) { - struct ftrace_event_call *call = inode->i_private; struct seq_file *m; int ret; @@ -898,7 +970,7 @@ static int trace_format_open(struct inode *inode, struct file *file) return ret; m = file->private_data; - m->private = call; + m->private = file; return 0; } @@ -906,19 +978,22 @@ static int trace_format_open(struct inode *inode, struct file *file) static ssize_t event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_call *call = filp->private_data; + int id = (long)event_file_data(filp); struct trace_seq *s; int r; if (*ppos) return 0; + if (unlikely(!id)) + return -ENODEV; + s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) return -ENOMEM; trace_seq_init(s); - trace_seq_printf(s, "%d\n", call->event.type); + trace_seq_printf(s, "%d\n", id); r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); @@ -930,21 +1005,28 @@ static ssize_t event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_call *call = filp->private_data; + struct ftrace_event_call *call; struct trace_seq *s; - int r; + int r = -ENODEV; if (*ppos) return 0; s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) return -ENOMEM; trace_seq_init(s); - print_event_filter(call, s); - r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); + mutex_lock(&event_mutex); + call = event_file_data(filp); + if (call) + print_event_filter(call, s); + mutex_unlock(&event_mutex); + + if (call) + r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); kfree(s); @@ -955,9 +1037,9 @@ static ssize_t event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_call *call = filp->private_data; + struct ftrace_event_call *call; char *buf; - int err; + int err = -ENODEV; if (cnt >= PAGE_SIZE) return -EINVAL; @@ -972,7 +1054,12 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, } buf[cnt] = '\0'; - err = apply_event_filter(call, buf); + mutex_lock(&event_mutex); + call = event_file_data(filp); + if (call) + err = apply_event_filter(call, buf); + mutex_unlock(&event_mutex); + free_page((unsigned long) buf); if (err < 0) return err; @@ -992,6 +1079,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) int ret; /* Make sure the system still exists */ + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); list_for_each_entry(tr, &ftrace_trace_arrays, list) { list_for_each_entry(dir, &tr->systems, list) { @@ -1007,6 +1095,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) } exit_loop: mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); if (!system) return -ENODEV; @@ -1014,9 +1103,17 @@ static int subsystem_open(struct inode *inode, struct file *filp) /* Some versions of gcc think dir can be uninitialized here */ WARN_ON(!dir); + /* Still need to increment the ref count of the system */ + if (trace_array_get(tr) < 0) { + put_system(dir); + return -ENODEV; + } + ret = tracing_open_generic(inode, filp); - if (ret < 0) + if (ret < 0) { + trace_array_put(tr); put_system(dir); + } return ret; } @@ -1027,16 +1124,23 @@ static int system_tr_open(struct inode *inode, struct file *filp) struct trace_array *tr = inode->i_private; int ret; + if (trace_array_get(tr) < 0) + return -ENODEV; + /* Make a temporary dir that has no system but points to tr */ dir = kzalloc(sizeof(*dir), GFP_KERNEL); - if (!dir) + if (!dir) { + trace_array_put(tr); return -ENOMEM; + } dir->tr = tr; ret = tracing_open_generic(inode, filp); - if (ret < 0) + if (ret < 0) { + trace_array_put(tr); kfree(dir); + } filp->private_data = dir; @@ -1047,6 +1151,8 @@ static int subsystem_release(struct inode *inode, struct file *file) { struct ftrace_subsystem_dir *dir = file->private_data; + trace_array_put(dir->tr); + /* * If dir->subsystem is NULL, then this is a temporary * descriptor that was made for a trace_array to enable @@ -1143,6 +1249,7 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) static int ftrace_event_avail_open(struct inode *inode, struct file *file); static int ftrace_event_set_open(struct inode *inode, struct file *file); +static int ftrace_event_release(struct inode *inode, struct file *file); static const struct seq_operations show_event_seq_ops = { .start = t_start, @@ -1170,7 +1277,7 @@ static const struct file_operations ftrace_set_event_fops = { .read = seq_read, .write = ftrace_event_write, .llseek = seq_lseek, - .release = seq_release, + .release = ftrace_event_release, }; static const struct file_operations ftrace_enable_fops = { @@ -1188,7 +1295,6 @@ static const struct file_operations ftrace_event_format_fops = { }; static const struct file_operations ftrace_event_id_fops = { - .open = tracing_open_generic, .read = event_id_read, .llseek = default_llseek, }; @@ -1247,6 +1353,15 @@ ftrace_event_open(struct inode *inode, struct file *file, return ret; } +static int ftrace_event_release(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); + + return seq_release(inode, file); +} + static int ftrace_event_avail_open(struct inode *inode, struct file *file) { @@ -1260,12 +1375,19 @@ ftrace_event_set_open(struct inode *inode, struct file *file) { const struct seq_operations *seq_ops = &show_set_event_seq_ops; struct trace_array *tr = inode->i_private; + int ret; + + if (trace_array_get(tr) < 0) + return -ENODEV; if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) ftrace_clear_events(tr); - return ftrace_event_open(inode, file, seq_ops); + ret = ftrace_event_open(inode, file, seq_ops); + if (ret < 0) + trace_array_put(tr); + return ret; } static struct event_subsystem * @@ -1279,7 +1401,15 @@ create_new_subsystem(const char *name) return NULL; system->ref_count = 1; - system->name = name; + + /* Only allocate if dynamic (kprobes and modules) */ + if (!core_kernel_data((unsigned long)name)) { + system->ref_count |= SYSTEM_FL_FREE_NAME; + system->name = kstrdup(name, GFP_KERNEL); + if (!system->name) + goto out_free; + } else + system->name = name; system->filter = NULL; @@ -1292,6 +1422,8 @@ create_new_subsystem(const char *name) return system; out_free: + if (system->ref_count & SYSTEM_FL_FREE_NAME) + kfree(system->name); kfree(system); return NULL; } @@ -1410,8 +1542,8 @@ event_create_dir(struct dentry *parent, #ifdef CONFIG_PERF_EVENTS if (call->event.type && call->class->reg) - trace_create_file("id", 0444, file->dir, call, - id); + trace_create_file("id", 0444, file->dir, + (void *)(long)call->event.type, id); #endif /* @@ -1436,33 +1568,16 @@ event_create_dir(struct dentry *parent, return 0; } -static void remove_subsystem(struct ftrace_subsystem_dir *dir) -{ - if (!dir) - return; - - if (!--dir->nr_events) { - debugfs_remove_recursive(dir->entry); - list_del(&dir->list); - __put_system_dir(dir); - } -} - static void remove_event_from_tracers(struct ftrace_event_call *call) { struct ftrace_event_file *file; struct trace_array *tr; do_for_each_event_file_safe(tr, file) { - if (file->event_call != call) continue; - list_del(&file->list); - debugfs_remove_recursive(file->dir); - remove_subsystem(file->system); - kmem_cache_free(file_cachep, file); - + remove_event_file_dir(file); /* * The do_for_each_event_file_safe() is * a double loop. After finding the call for this @@ -1591,6 +1706,7 @@ static void __add_event_to_tracers(struct ftrace_event_call *call, int trace_add_event_call(struct ftrace_event_call *call) { int ret; + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); ret = __register_event(call, NULL); @@ -1598,11 +1714,13 @@ int trace_add_event_call(struct ftrace_event_call *call) __add_event_to_tracers(call, NULL); mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); return ret; } /* - * Must be called under locking both of event_mutex and trace_event_sem. + * Must be called under locking of trace_types_lock, event_mutex and + * trace_event_sem. */ static void __trace_remove_event_call(struct ftrace_event_call *call) { @@ -1611,14 +1729,47 @@ static void __trace_remove_event_call(struct ftrace_event_call *call) destroy_preds(call); } +static int probe_remove_event_call(struct ftrace_event_call *call) +{ + struct trace_array *tr; + struct ftrace_event_file *file; + +#ifdef CONFIG_PERF_EVENTS + if (call->perf_refcount) + return -EBUSY; +#endif + do_for_each_event_file(tr, file) { + if (file->event_call != call) + continue; + /* + * We can't rely on ftrace_event_enable_disable(enable => 0) + * we are going to do, FTRACE_EVENT_FL_SOFT_MODE can suppress + * TRACE_REG_UNREGISTER. + */ + if (file->flags & FTRACE_EVENT_FL_ENABLED) + return -EBUSY; + break; + } while_for_each_event_file(); + + __trace_remove_event_call(call); + + return 0; +} + /* Remove an event_call */ -void trace_remove_event_call(struct ftrace_event_call *call) +int trace_remove_event_call(struct ftrace_event_call *call) { + int ret; + + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); down_write(&trace_event_sem); - __trace_remove_event_call(call); + ret = probe_remove_event_call(call); up_write(&trace_event_sem); mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); + + return ret; } #define for_each_event(event, start, end) \ @@ -1703,6 +1854,16 @@ static void trace_module_add_events(struct module *mod) struct ftrace_module_file_ops *file_ops = NULL; struct ftrace_event_call **call, **start, **end; + if (!mod->num_trace_events) + return; + + /* Don't add infrastructure for mods without tracepoints */ + if (trace_module_has_bad_taint(mod)) { + pr_err("%s: module has bad taint, not creating trace events\n", + mod->name); + return; + } + start = mod->trace_events; end = mod->trace_events + mod->num_trace_events; @@ -1762,6 +1923,7 @@ static int trace_module_notify(struct notifier_block *self, { struct module *mod = data; + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); switch (val) { case MODULE_STATE_COMING: @@ -1772,6 +1934,7 @@ static int trace_module_notify(struct notifier_block *self, break; } mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); return 0; } @@ -2188,12 +2351,8 @@ __trace_remove_event_dirs(struct trace_array *tr) { struct ftrace_event_file *file, *next; - list_for_each_entry_safe(file, next, &tr->events, list) { - list_del(&file->list); - debugfs_remove_recursive(file->dir); - remove_subsystem(file->system); - kmem_cache_free(file_cachep, file); - } + list_for_each_entry_safe(file, next, &tr->events, list) + remove_event_file_dir(file); } static void @@ -2329,11 +2488,11 @@ early_event_add_tracer(struct dentry *parent, struct trace_array *tr) int event_trace_del_tracer(struct trace_array *tr) { - /* Disable any running events */ - __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0); - mutex_lock(&event_mutex); + /* Disable any running events */ + __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0); + down_write(&trace_event_sem); __trace_remove_event_dirs(tr); debugfs_remove_recursive(tr->event_dir); diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index e1b653f7e1c..0a1edc694d6 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -631,17 +631,15 @@ static void append_filter_err(struct filter_parse_state *ps, free_page((unsigned long) buf); } +/* caller must hold event_mutex */ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) { - struct event_filter *filter; + struct event_filter *filter = call->filter; - mutex_lock(&event_mutex); - filter = call->filter; if (filter && filter->filter_string) trace_seq_printf(s, "%s\n", filter->filter_string); else trace_seq_printf(s, "none\n"); - mutex_unlock(&event_mutex); } void print_subsystem_event_filter(struct event_subsystem *system, @@ -1835,23 +1833,22 @@ static int create_system_filter(struct event_subsystem *system, return err; } +/* caller must hold event_mutex */ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) { struct event_filter *filter; - int err = 0; - - mutex_lock(&event_mutex); + int err; if (!strcmp(strstrip(filter_string), "0")) { filter_disable(call); filter = call->filter; if (!filter) - goto out_unlock; + return 0; RCU_INIT_POINTER(call->filter, NULL); /* Make sure the filter is not being used */ synchronize_sched(); __free_filter(filter); - goto out_unlock; + return 0; } err = create_filter(call, filter_string, true, &filter); @@ -1878,8 +1875,6 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) __free_filter(tmp); } } -out_unlock: - mutex_unlock(&event_mutex); return err; } diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index d21a7467008..d7d0b50b1b7 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -95,15 +95,12 @@ static void __always_unused ____ftrace_check_##name(void) \ #undef __array #define __array(type, item, len) \ do { \ + char *type_str = #type"["__stringify(len)"]"; \ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - mutex_lock(&event_storage_mutex); \ - snprintf(event_storage, sizeof(event_storage), \ - "%s[%d]", #type, len); \ - ret = trace_define_field(event_call, event_storage, #item, \ + ret = trace_define_field(event_call, type_str, #item, \ offsetof(typeof(field), item), \ sizeof(field.item), \ is_signed_type(type), filter_type); \ - mutex_unlock(&event_storage_mutex); \ if (ret) \ return ret; \ } while (0); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index b19d065a28c..2aefbee93a6 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -373,7 +373,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) struct trace_array_cpu *data; unsigned long flags; - if (likely(!tracer_enabled)) + if (!tracer_enabled || !tracing_is_enabled()) return; cpu = raw_smp_processor_id(); @@ -416,7 +416,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) else return; - if (!tracer_enabled) + if (!tracer_enabled || !tracing_is_enabled()) return; data = per_cpu_ptr(tr->trace_buffer.data, cpu); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9f46e98ba8f..64abc8ca928 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -90,7 +90,7 @@ static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp) } static int register_probe_event(struct trace_probe *tp); -static void unregister_probe_event(struct trace_probe *tp); +static int unregister_probe_event(struct trace_probe *tp); static DEFINE_MUTEX(probe_lock); static LIST_HEAD(probe_list); @@ -281,6 +281,8 @@ trace_probe_file_index(struct trace_probe *tp, struct ftrace_event_file *file) static int disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) { + struct ftrace_event_file **old = NULL; + int wait = 0; int ret = 0; mutex_lock(&probe_enable_lock); @@ -314,10 +316,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) } rcu_assign_pointer(tp->files, new); - - /* Make sure the probe is done with old files */ - synchronize_sched(); - kfree(old); + wait = 1; } else tp->flags &= ~TP_FLAG_PROFILE; @@ -326,11 +325,25 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) disable_kretprobe(&tp->rp); else disable_kprobe(&tp->rp.kp); + wait = 1; } out_unlock: mutex_unlock(&probe_enable_lock); + if (wait) { + /* + * Synchronize with kprobe_trace_func/kretprobe_trace_func + * to ensure disabled (all running handlers are finished). + * This is not only for kfree(), but also the caller, + * trace_remove_event_call() supposes it for releasing + * event_call related objects, which will be accessed in + * the kprobe_trace_func/kretprobe_trace_func. + */ + synchronize_sched(); + kfree(old); /* Ignored if link == NULL */ + } + return ret; } @@ -398,9 +411,12 @@ static int unregister_trace_probe(struct trace_probe *tp) if (trace_probe_is_enabled(tp)) return -EBUSY; + /* Will fail if probe is being used by ftrace or perf */ + if (unregister_probe_event(tp)) + return -EBUSY; + __unregister_trace_probe(tp); list_del(&tp->list); - unregister_probe_event(tp); return 0; } @@ -679,7 +695,9 @@ static int release_all_trace_probes(void) /* TODO: Use batch unregistration */ while (!list_empty(&probe_list)) { tp = list_entry(probe_list.next, struct trace_probe, list); - unregister_trace_probe(tp); + ret = unregister_trace_probe(tp); + if (ret) + goto end; free_trace_probe(tp); } @@ -1312,11 +1330,15 @@ static int register_probe_event(struct trace_probe *tp) return ret; } -static void unregister_probe_event(struct trace_probe *tp) +static int unregister_probe_event(struct trace_probe *tp) { + int ret; + /* tp->event is unregistered in trace_remove_event_call() */ - trace_remove_event_call(&tp->call); - kfree(tp->call.print_fmt); + ret = trace_remove_event_call(&tp->call); + if (!ret) + kfree(tp->call.print_fmt); + return ret; } /* Make a debugfs interface for controlling probe points */ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 8f2ac73c7a5..322e1646107 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -306,6 +306,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct ring_buffer *buffer; + unsigned long irq_flags; + int pc; int syscall_nr; int size; @@ -321,9 +323,12 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; + local_save_flags(irq_flags); + pc = preempt_count(); + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, - sys_data->enter_event->event.type, size, 0, 0); + sys_data->enter_event->event.type, size, irq_flags, pc); if (!event) return; @@ -333,7 +338,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) if (!filter_current_check_discard(buffer, sys_data->enter_event, entry, event)) - trace_current_buffer_unlock_commit(buffer, event, 0, 0); + trace_current_buffer_unlock_commit(buffer, event, + irq_flags, pc); } static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) @@ -343,6 +349,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct ring_buffer *buffer; + unsigned long irq_flags; + int pc; int syscall_nr; syscall_nr = trace_get_syscall_nr(current, regs); @@ -355,9 +363,13 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) if (!sys_data) return; + local_save_flags(irq_flags); + pc = preempt_count(); + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, - sys_data->exit_event->event.type, sizeof(*entry), 0, 0); + sys_data->exit_event->event.type, sizeof(*entry), + irq_flags, pc); if (!event) return; @@ -367,7 +379,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) if (!filter_current_check_discard(buffer, sys_data->exit_event, entry, event)) - trace_current_buffer_unlock_commit(buffer, event, 0, 0); + trace_current_buffer_unlock_commit(buffer, event, + irq_flags, pc); } static int reg_event_syscall_enter(struct ftrace_event_file *file, diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 32494fb0ee6..6fd72b76852 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -70,7 +70,7 @@ struct trace_uprobe { (sizeof(struct probe_arg) * (n))) static int register_uprobe_event(struct trace_uprobe *tu); -static void unregister_uprobe_event(struct trace_uprobe *tu); +static int unregister_uprobe_event(struct trace_uprobe *tu); static DEFINE_MUTEX(uprobe_lock); static LIST_HEAD(uprobe_list); @@ -164,11 +164,17 @@ static struct trace_uprobe *find_probe_event(const char *event, const char *grou } /* Unregister a trace_uprobe and probe_event: call with locking uprobe_lock */ -static void unregister_trace_uprobe(struct trace_uprobe *tu) +static int unregister_trace_uprobe(struct trace_uprobe *tu) { + int ret; + + ret = unregister_uprobe_event(tu); + if (ret) + return ret; + list_del(&tu->list); - unregister_uprobe_event(tu); free_trace_uprobe(tu); + return 0; } /* Register a trace_uprobe and probe_event */ @@ -181,9 +187,12 @@ static int register_trace_uprobe(struct trace_uprobe *tu) /* register as an event */ old_tp = find_probe_event(tu->call.name, tu->call.class->system); - if (old_tp) + if (old_tp) { /* delete old event */ - unregister_trace_uprobe(old_tp); + ret = unregister_trace_uprobe(old_tp); + if (ret) + goto end; + } ret = register_uprobe_event(tu); if (ret) { @@ -256,6 +265,8 @@ static int create_trace_uprobe(int argc, char **argv) group = UPROBE_EVENT_SYSTEM; if (is_delete) { + int ret; + if (!event) { pr_info("Delete command needs an event name.\n"); return -EINVAL; @@ -269,9 +280,9 @@ static int create_trace_uprobe(int argc, char **argv) return -ENOENT; } /* delete an event */ - unregister_trace_uprobe(tu); + ret = unregister_trace_uprobe(tu); mutex_unlock(&uprobe_lock); - return 0; + return ret; } if (argc < 2) { @@ -283,8 +294,10 @@ static int create_trace_uprobe(int argc, char **argv) return -EINVAL; } arg = strchr(argv[1], ':'); - if (!arg) + if (!arg) { + ret = -EINVAL; goto fail_address_parse; + } *arg++ = '\0'; filename = argv[1]; @@ -406,16 +419,20 @@ fail_address_parse: return ret; } -static void cleanup_all_probes(void) +static int cleanup_all_probes(void) { struct trace_uprobe *tu; + int ret = 0; mutex_lock(&uprobe_lock); while (!list_empty(&uprobe_list)) { tu = list_entry(uprobe_list.next, struct trace_uprobe, list); - unregister_trace_uprobe(tu); + ret = unregister_trace_uprobe(tu); + if (ret) + break; } mutex_unlock(&uprobe_lock); + return ret; } /* Probes listing interfaces */ @@ -460,8 +477,13 @@ static const struct seq_operations probes_seq_op = { static int probes_open(struct inode *inode, struct file *file) { - if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) - cleanup_all_probes(); + int ret; + + if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { + ret = cleanup_all_probes(); + if (ret) + return ret; + } return seq_open(file, &probes_seq_op); } @@ -968,12 +990,17 @@ static int register_uprobe_event(struct trace_uprobe *tu) return ret; } -static void unregister_uprobe_event(struct trace_uprobe *tu) +static int unregister_uprobe_event(struct trace_uprobe *tu) { + int ret; + /* tu->event is unregistered in trace_remove_event_call() */ - trace_remove_event_call(&tu->call); + ret = trace_remove_event_call(&tu->call); + if (ret) + return ret; kfree(tu->call.print_fmt); tu->call.print_fmt = NULL; + return 0; } /* Make a trace interface for controling probe points */ |