From 4fbb48cb110be653adcd97a87506e0ba8c16d585 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 30 Apr 2014 22:35:48 -0400 Subject: ftrace: Allow no regs if no more callbacks require it When registering a function callback for the function tracer, the ops can specify if it wants to save full regs (like an interrupt would) for each function that it traces, or if it does not care about regs and just wants to have the fastest return possible. Once a ops has registered a function, if other ops register that function they all will receive the regs too. That's because it does the work once, it does it for everyone. Now if the ops wanting regs unregisters the function so that there's only ops left that do not care about regs, those ops will still continue getting regs and going through the work for it on that function. This is because the disabling of the rec counter only sees the ops registered, and does not see the ops that are still attached, and does not know if the current ops that are still attached want regs or not. To play it safe, it just keeps regs being processed until no function is registered anymore. Instead of doing that, check the ops that are still registered for that function and if none want regs for it anymore, then disable the processing of regs. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5b372e3ed675..b867c647e5bc 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1492,6 +1492,26 @@ int ftrace_text_reserved(const void *start, const void *end) return (int)!!ret; } +/* Test if ops registered to this rec needs regs */ +static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec) +{ + struct ftrace_ops *ops; + bool keep_regs = false; + + for (ops = ftrace_ops_list; + ops != &ftrace_list_end; ops = ops->next) { + /* pass rec in as regs to have non-NULL val */ + if (ftrace_ops_test(ops, rec->ip, rec)) { + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { + keep_regs = true; + break; + } + } + } + + return keep_regs; +} + static void __ftrace_hash_rec_update(struct ftrace_ops *ops, int filter_hash, bool inc) @@ -1584,6 +1604,18 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == 0)) return; rec->flags--; + /* + * If the rec had REGS enabled and the ops that is + * being removed had REGS set, then see if there is + * still any ops for this record that wants regs. + * If not, we can stop recording them. + */ + if ((rec->flags & ~FTRACE_FL_MASK) > 0 && + rec->flags & FTRACE_FL_REGS && + ops->flags & FTRACE_OPS_FL_SAVE_REGS) { + if (!test_rec_ops_needs_regs(rec)) + rec->flags &= ~FTRACE_FL_REGS; + } } count++; /* Shortcut, if we handled all records, we are done. */ -- cgit v1.2.3 From 0376bde11be5b87c9fd7d6813ac5fd7e1798b1bf Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 7 May 2014 13:46:45 -0400 Subject: ftrace: Add ftrace_rec_counter() macro to simplify the code The ftrace dynamic record has a flags element that also has a counter. Instead of hard coding "rec->flags & ~FTRACE_FL_MASK" all over the place. Use a macro instead. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b867c647e5bc..a58d840305c3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1592,7 +1592,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, if (inc) { rec->flags++; - if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == FTRACE_REF_MAX)) + if (FTRACE_WARN_ON(ftrace_rec_count(rec) == FTRACE_REF_MAX)) return; /* * If any ops wants regs saved for this function @@ -1601,7 +1601,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) rec->flags |= FTRACE_FL_REGS; } else { - if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == 0)) + if (FTRACE_WARN_ON(ftrace_rec_count(rec) == 0)) return; rec->flags--; /* @@ -1610,7 +1610,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, * still any ops for this record that wants regs. * If not, we can stop recording them. */ - if ((rec->flags & ~FTRACE_FL_MASK) > 0 && + if (ftrace_rec_count(rec) > 0 && rec->flags & FTRACE_FL_REGS && ops->flags & FTRACE_OPS_FL_SAVE_REGS) { if (!test_rec_ops_needs_regs(rec)) @@ -1700,7 +1700,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) * If we are disabling calls, then disable all records that * are enabled. */ - if (enable && (rec->flags & ~FTRACE_FL_MASK)) + if (enable && ftrace_rec_count(rec)) flag = FTRACE_FL_ENABLED; /* @@ -1746,7 +1746,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) if (update) { /* If there's no more users, clear all flags */ - if (!(rec->flags & ~FTRACE_FL_MASK)) + if (!ftrace_rec_count(rec)) rec->flags = 0; else /* Just disable the record (keep REGS state) */ @@ -2685,7 +2685,7 @@ static int t_show(struct seq_file *m, void *v) seq_printf(m, "%ps", (void *)rec->ip); if (iter->flags & FTRACE_ITER_ENABLED) seq_printf(m, " (%ld)%s", - rec->flags & ~FTRACE_FL_MASK, + ftrace_rec_count(rec), rec->flags & FTRACE_FL_REGS ? " R" : ""); seq_printf(m, "\n"); -- cgit v1.2.3 From 48212542067a7ff6cbe829dbae279c2ff7557b44 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 27 Jun 2014 19:01:36 +0200 Subject: tracing/uprobes: Revert "Support mix of ftrace and perf" This reverts commit 43fe98913c9f67e3b523615ee3316f9520a623e0. This patch is very wrong. Firstly, this change leads to unbalanced uprobe_unregister(). Just for example, # perf probe -x /lib/libc.so.6 syscall # echo 1 >> /sys/kernel/debug/tracing/events/probe_libc/enable # perf record -e probe_libc:syscall whatever after that uprobe is dead (unregistered) but the user of ftrace/perf can't know this, and it looks as if nobody hits this probe. This would be easy to fix, but there are other reasons why it is not simple to mix ftrace and perf. If nothing else, they can't share the same ->consumer.filter. This is fixable too, but probably we need to fix the poorly designed uprobe_register() interface first. At least "register" and "apply" should be clearly separated. Link: http://lkml.kernel.org/p/20140627170136.GA18319@redhat.com Cc: Tom Zanussi Cc: "zhangwei(Jovi)" Cc: stable@vger.kernel.org # v3.14 Acked-by: Namhyung Kim Acked-by: Srikar Dronamraju Reviewed-by: Masami Hiramatsu Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/trace_uprobe.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 04fdb5de823c..08e7970bf3f9 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -893,6 +893,9 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file, int ret; if (file) { + if (tu->tp.flags & TP_FLAG_PROFILE) + return -EINTR; + link = kmalloc(sizeof(*link), GFP_KERNEL); if (!link) return -ENOMEM; @@ -901,8 +904,12 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file, list_add_tail_rcu(&link->list, &tu->tp.files); tu->tp.flags |= TP_FLAG_TRACE; - } else + } else { + if (tu->tp.flags & TP_FLAG_TRACE) + return -EINTR; + tu->tp.flags |= TP_FLAG_PROFILE; + } ret = uprobe_buffer_enable(); if (ret < 0) -- cgit v1.2.3 From f786106e8081bbec57053fec7fcf25dc25d02144 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 27 Jun 2014 19:01:43 +0200 Subject: tracing/uprobes: Kill the bogus UPROBE_HANDLER_REMOVE code in uprobe_dispatcher() I do not know why dd9fa555d7bb "tracing/uprobes: Move argument fetching to uprobe_dispatcher()" added the UPROBE_HANDLER_REMOVE, but it looks wrong. OK, perhaps it makes sense to avoid store_trace_args() if the tracee is nacked by uprobe_perf_filter(). But then we should kill the same code in uprobe_perf_func() and unify the TRACE/PROFILE filtering (we need to do this anyway to mix perf/ftrace). Until then this code actually adds the pessimization because uprobe_perf_filter() will be called twice and return T in likely case. Link: http://lkml.kernel.org/p/20140627170143.GA18329@redhat.com Acked-by: Namhyung Kim Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/trace_uprobe.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 08e7970bf3f9..c4cf0abd60ba 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1208,12 +1208,6 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) current->utask->vaddr = (unsigned long) &udd; -#ifdef CONFIG_PERF_EVENTS - if ((tu->tp.flags & TP_FLAG_TRACE) == 0 && - !uprobe_perf_filter(&tu->consumer, 0, current->mm)) - return UPROBE_HANDLER_REMOVE; -#endif - if (WARN_ON_ONCE(!uprobe_cpu_buffer)) return 0; -- cgit v1.2.3 From fb6bab6a5ad46d00b5ffa22268f21df1cd7c59df Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 27 Jun 2014 19:01:46 +0200 Subject: tracing/uprobes: Fix the usage of uprobe_buffer_enable() in probe_event_enable() The usage of uprobe_buffer_enable() added by dcad1a20 is very wrong, 1. uprobe_buffer_enable() and uprobe_buffer_disable() are not balanced, _enable() should be called only if !enabled. 2. If uprobe_buffer_enable() fails probe_event_enable() should clear tp.flags and free event_file_link. 3. If uprobe_register() fails it should do uprobe_buffer_disable(). Link: http://lkml.kernel.org/p/20140627170146.GA18332@redhat.com Acked-by: Namhyung Kim Acked-by: Srikar Dronamraju Reviewed-by: Masami Hiramatsu Fixes: dcad1a204f72 "tracing/uprobes: Fetch args before reserving a ring buffer" Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/trace_uprobe.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index c4cf0abd60ba..3c9b97e6b1f4 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -911,26 +911,33 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file, tu->tp.flags |= TP_FLAG_PROFILE; } - ret = uprobe_buffer_enable(); - if (ret < 0) - return ret; - WARN_ON(!uprobe_filter_is_empty(&tu->filter)); if (enabled) return 0; + ret = uprobe_buffer_enable(); + if (ret) + goto err_flags; + tu->consumer.filter = filter; ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); - if (ret) { - if (file) { - list_del(&link->list); - kfree(link); - tu->tp.flags &= ~TP_FLAG_TRACE; - } else - tu->tp.flags &= ~TP_FLAG_PROFILE; - } + if (ret) + goto err_buffer; + return 0; + + err_buffer: + uprobe_buffer_disable(); + + err_flags: + if (file) { + list_del(&link->list); + kfree(link); + tu->tp.flags &= ~TP_FLAG_TRACE; + } else { + tu->tp.flags &= ~TP_FLAG_PROFILE; + } return ret; } -- cgit v1.2.3 From 79922b8009c074e30d3a97f5a24519f11814ad03 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 6 May 2014 21:56:17 -0400 Subject: ftrace: Optimize function graph to be called directly Function graph tracing is a bit different than the function tracers, as it is processed after either the ftrace_caller or ftrace_regs_caller and we only have one place to modify the jump to ftrace_graph_caller, the jump needs to happen after the restore of registeres. The function graph tracer is dependent on the function tracer, where even if the function graph tracing is going on by itself, the save and restore of registers is still done for function tracing regardless of if function tracing is happening, before it calls the function graph code. If there's no function tracing happening, it is possible to just call the function graph tracer directly, and avoid the wasted effort to save and restore regs for function tracing. This requires adding new flags to the dyn_ftrace records: FTRACE_FL_TRAMP FTRACE_FL_TRAMP_EN The first is set if the count for the record is one, and the ftrace_ops associated to that record has its own trampoline. That way the mcount code can call that trampoline directly. In the future, trampolines can be added to arbitrary ftrace_ops, where you can have two or more ftrace_ops registered to ftrace (like kprobes and perf) and if they are not tracing the same functions, then instead of doing a loop to check all registered ftrace_ops against their hashes, just call the ftrace_ops trampoline directly, which would call the registered ftrace_ops function directly. Without this patch perf showed: 0.05% hackbench [kernel.kallsyms] [k] ftrace_caller 0.05% hackbench [kernel.kallsyms] [k] arch_local_irq_save 0.05% hackbench [kernel.kallsyms] [k] native_sched_clock 0.04% hackbench [kernel.kallsyms] [k] __buffer_unlock_commit 0.04% hackbench [kernel.kallsyms] [k] preempt_trace 0.04% hackbench [kernel.kallsyms] [k] prepare_ftrace_return 0.04% hackbench [kernel.kallsyms] [k] __this_cpu_preempt_check 0.04% hackbench [kernel.kallsyms] [k] ftrace_graph_caller See that the ftrace_caller took up more time than the ftrace_graph_caller did. With this patch: 0.05% hackbench [kernel.kallsyms] [k] __buffer_unlock_commit 0.04% hackbench [kernel.kallsyms] [k] call_filter_check_discard 0.04% hackbench [kernel.kallsyms] [k] ftrace_graph_caller 0.04% hackbench [kernel.kallsyms] [k] sched_clock The ftrace_caller is no where to be found and ftrace_graph_caller still takes up the same percentage. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 242 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 235 insertions(+), 7 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a58d840305c3..5d15eb8146a7 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1042,6 +1042,8 @@ static struct pid * const ftrace_swapper_pid = &init_struct_pid; #ifdef CONFIG_DYNAMIC_FTRACE +static struct ftrace_ops *removed_ops; + #ifndef CONFIG_FTRACE_MCOUNT_RECORD # error Dynamic ftrace depends on MCOUNT_RECORD #endif @@ -1512,6 +1514,33 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec) return keep_regs; } +static void ftrace_remove_tramp(struct ftrace_ops *ops, + struct dyn_ftrace *rec) +{ + struct ftrace_func_entry *entry; + + entry = ftrace_lookup_ip(ops->tramp_hash, rec->ip); + if (!entry) + return; + + /* + * The tramp_hash entry will be removed at time + * of update. + */ + ops->trampolines--; + rec->flags &= ~FTRACE_FL_TRAMP; +} + +static void ftrace_clear_tramps(struct dyn_ftrace *rec) +{ + struct ftrace_ops *op; + + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (op->trampolines) + ftrace_remove_tramp(op, rec); + } while_for_each_ftrace_op(op); +} + static void __ftrace_hash_rec_update(struct ftrace_ops *ops, int filter_hash, bool inc) @@ -1594,6 +1623,28 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, rec->flags++; if (FTRACE_WARN_ON(ftrace_rec_count(rec) == FTRACE_REF_MAX)) return; + + /* + * If there's only a single callback registered to a + * function, and the ops has a trampoline registered + * for it, then we can call it directly. + */ + if (ftrace_rec_count(rec) == 1 && ops->trampoline) { + rec->flags |= FTRACE_FL_TRAMP; + ops->trampolines++; + } else { + /* + * If we are adding another function callback + * to this function, and the previous had a + * trampoline used, then we need to go back to + * the default trampoline. + */ + rec->flags &= ~FTRACE_FL_TRAMP; + + /* remove trampolines from any ops for this rec */ + ftrace_clear_tramps(rec); + } + /* * If any ops wants regs saved for this function * then all ops will get saved regs. @@ -1604,6 +1655,10 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, if (FTRACE_WARN_ON(ftrace_rec_count(rec) == 0)) return; rec->flags--; + + if (ops->trampoline && !ftrace_rec_count(rec)) + ftrace_remove_tramp(ops, rec); + /* * If the rec had REGS enabled and the ops that is * being removed had REGS set, then see if there is @@ -1616,6 +1671,11 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, if (!test_rec_ops_needs_regs(rec)) rec->flags &= ~FTRACE_FL_REGS; } + + /* + * flags will be cleared in ftrace_check_record() + * if rec count is zero. + */ } count++; /* Shortcut, if we handled all records, we are done. */ @@ -1704,13 +1764,19 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) flag = FTRACE_FL_ENABLED; /* - * If enabling and the REGS flag does not match the REGS_EN, then - * do not ignore this record. Set flags to fail the compare against - * ENABLED. + * If enabling and the REGS flag does not match the REGS_EN, or + * the TRAMP flag doesn't match the TRAMP_EN, then do not ignore + * this record. Set flags to fail the compare against ENABLED. */ - if (flag && - (!(rec->flags & FTRACE_FL_REGS) != !(rec->flags & FTRACE_FL_REGS_EN))) - flag |= FTRACE_FL_REGS; + if (flag) { + if (!(rec->flags & FTRACE_FL_REGS) != + !(rec->flags & FTRACE_FL_REGS_EN)) + flag |= FTRACE_FL_REGS; + + if (!(rec->flags & FTRACE_FL_TRAMP) != + !(rec->flags & FTRACE_FL_TRAMP_EN)) + flag |= FTRACE_FL_TRAMP; + } /* If the state of this record hasn't changed, then do nothing */ if ((rec->flags & FTRACE_FL_ENABLED) == flag) @@ -1728,6 +1794,12 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) else rec->flags &= ~FTRACE_FL_REGS_EN; } + if (flag & FTRACE_FL_TRAMP) { + if (rec->flags & FTRACE_FL_TRAMP) + rec->flags |= FTRACE_FL_TRAMP_EN; + else + rec->flags &= ~FTRACE_FL_TRAMP_EN; + } } /* @@ -1736,7 +1808,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) * Otherwise, * return UPDATE_MODIFY_CALL to tell the caller to convert * from the save regs, to a non-save regs function or - * vice versa. + * vice versa, or from a trampoline call. */ if (flag & FTRACE_FL_ENABLED) return FTRACE_UPDATE_MAKE_CALL; @@ -1783,6 +1855,43 @@ int ftrace_test_record(struct dyn_ftrace *rec, int enable) return ftrace_check_record(rec, enable, 0); } +static struct ftrace_ops * +ftrace_find_tramp_ops_curr(struct dyn_ftrace *rec) +{ + struct ftrace_ops *op; + + /* Removed ops need to be tested first */ + if (removed_ops && removed_ops->tramp_hash) { + if (ftrace_lookup_ip(removed_ops->tramp_hash, rec->ip)) + return removed_ops; + } + + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (!op->tramp_hash) + continue; + + if (ftrace_lookup_ip(op->tramp_hash, rec->ip)) + return op; + + } while_for_each_ftrace_op(op); + + return NULL; +} + +static struct ftrace_ops * +ftrace_find_tramp_ops_new(struct dyn_ftrace *rec) +{ + struct ftrace_ops *op; + + do_for_each_ftrace_op(op, ftrace_ops_list) { + /* pass rec in as regs to have non-NULL val */ + if (ftrace_ops_test(op, rec->ip, rec)) + return op; + } while_for_each_ftrace_op(op); + + return NULL; +} + /** * ftrace_get_addr_new - Get the call address to set to * @rec: The ftrace record descriptor @@ -1795,6 +1904,20 @@ int ftrace_test_record(struct dyn_ftrace *rec, int enable) */ unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec) { + struct ftrace_ops *ops; + + /* Trampolines take precedence over regs */ + if (rec->flags & FTRACE_FL_TRAMP) { + ops = ftrace_find_tramp_ops_new(rec); + if (FTRACE_WARN_ON(!ops || !ops->trampoline)) { + pr_warning("Bad trampoline accounting at: %p (%pS)\n", + (void *)rec->ip, (void *)rec->ip); + /* Ftrace is shutting down, return anything */ + return (unsigned long)FTRACE_ADDR; + } + return ops->trampoline; + } + if (rec->flags & FTRACE_FL_REGS) return (unsigned long)FTRACE_REGS_ADDR; else @@ -1813,6 +1936,20 @@ unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec) */ unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec) { + struct ftrace_ops *ops; + + /* Trampolines take precedence over regs */ + if (rec->flags & FTRACE_FL_TRAMP_EN) { + ops = ftrace_find_tramp_ops_curr(rec); + if (FTRACE_WARN_ON(!ops)) { + pr_warning("Bad trampoline accounting at: %p (%pS)\n", + (void *)rec->ip, (void *)rec->ip); + /* Ftrace is shutting down, return anything */ + return (unsigned long)FTRACE_ADDR; + } + return ops->trampoline; + } + if (rec->flags & FTRACE_FL_REGS_EN) return (unsigned long)FTRACE_REGS_ADDR; else @@ -2055,6 +2192,78 @@ void __weak arch_ftrace_update_code(int command) ftrace_run_stop_machine(command); } +static int ftrace_save_ops_tramp_hash(struct ftrace_ops *ops) +{ + struct ftrace_page *pg; + struct dyn_ftrace *rec; + int size, bits; + int ret; + + size = ops->trampolines; + bits = 0; + /* + * Make the hash size about 1/2 the # found + */ + for (size /= 2; size; size >>= 1) + bits++; + + ops->tramp_hash = alloc_ftrace_hash(bits); + /* + * TODO: a failed allocation is going to screw up + * the accounting of what needs to be modified + * and not. For now, we kill ftrace if we fail + * to allocate here. But there are ways around this, + * but that will take a little more work. + */ + if (!ops->tramp_hash) + return -ENOMEM; + + do_for_each_ftrace_rec(pg, rec) { + if (ftrace_rec_count(rec) == 1 && + ftrace_ops_test(ops, rec->ip, rec)) { + + /* This record had better have a trampoline */ + if (FTRACE_WARN_ON(!(rec->flags & FTRACE_FL_TRAMP_EN))) + return -1; + + ret = add_hash_entry(ops->tramp_hash, rec->ip); + if (ret < 0) + return ret; + } + } while_for_each_ftrace_rec(); + + return 0; +} + +static int ftrace_save_tramp_hashes(void) +{ + struct ftrace_ops *op; + int ret; + + /* + * Now that any trampoline is being used, we need to save the + * hashes for the ops that have them. This allows the mapping + * back from the record to the ops that has the trampoline to + * know what code is being replaced. Modifying code must always + * verify what it is changing. + */ + do_for_each_ftrace_op(op, ftrace_ops_list) { + + /* The tramp_hash is recreated each time. */ + free_ftrace_hash(op->tramp_hash); + op->tramp_hash = NULL; + + if (op->trampolines) { + ret = ftrace_save_ops_tramp_hash(op); + if (ret) + return ret; + } + + } while_for_each_ftrace_op(op); + + return 0; +} + static void ftrace_run_update_code(int command) { int ret; @@ -2081,6 +2290,9 @@ static void ftrace_run_update_code(int command) ret = ftrace_arch_code_modify_post_process(); FTRACE_WARN_ON(ret); + + ret = ftrace_save_tramp_hashes(); + FTRACE_WARN_ON(ret); } static ftrace_func_t saved_ftrace_func; @@ -2171,8 +2383,16 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) return 0; } + /* + * If the ops uses a trampoline, then it needs to be + * tested first on update. + */ + removed_ops = ops; + ftrace_run_update_code(command); + removed_ops = NULL; + /* * Dynamic ops may be freed, we must make sure that all * callers are done before leaving this function. @@ -5116,6 +5336,11 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, /* Function graph doesn't use the .func field of global_ops */ global_ops.flags |= FTRACE_OPS_FL_STUB; +#ifdef CONFIG_DYNAMIC_FTRACE + /* Optimize function graph calling (if implemented by arch) */ + global_ops.trampoline = FTRACE_GRAPH_ADDR; +#endif + ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET); out: @@ -5136,6 +5361,9 @@ void unregister_ftrace_graph(void) __ftrace_graph_entry = ftrace_graph_entry_stub; ftrace_shutdown(&global_ops, FTRACE_STOP_FUNC_RET); global_ops.flags &= ~FTRACE_OPS_FL_STUB; +#ifdef CONFIG_DYNAMIC_FTRACE + global_ops.trampoline = 0; +#endif unregister_pm_notifier(&ftrace_suspend_notifier); unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); -- cgit v1.2.3 From 9674b2fadab636b1fe27b282f9a9fa0f9d8c9839 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 9 May 2014 16:54:59 -0400 Subject: ftrace: Add trampolines to enabled_functions debug file The enabled_functions is used to help debug the dynamic function tracing. Adding what trampolines are attached to files is useful for debugging. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5d15eb8146a7..3ded796e10e0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2903,10 +2903,22 @@ static int t_show(struct seq_file *m, void *v) return 0; seq_printf(m, "%ps", (void *)rec->ip); - if (iter->flags & FTRACE_ITER_ENABLED) + if (iter->flags & FTRACE_ITER_ENABLED) { seq_printf(m, " (%ld)%s", ftrace_rec_count(rec), - rec->flags & FTRACE_FL_REGS ? " R" : ""); + rec->flags & FTRACE_FL_REGS ? " R" : " "); + if (rec->flags & FTRACE_FL_TRAMP_EN) { + struct ftrace_ops *ops; + + ops = ftrace_find_tramp_ops_curr(rec); + if (ops && ops->trampoline) + seq_printf(m, "\ttramp: %pS", + (void *)ops->trampoline); + else + seq_printf(m, "\ttramp: ERROR!"); + } + } + seq_printf(m, "\n"); return 0; -- cgit v1.2.3 From 5c27c775d5e698d5b754d213747e9fb85290e3b8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 17 Jun 2014 11:04:42 +0000 Subject: ftrace: Simplify ftrace_hash_disable/enable path in ftrace_hash_move Simplify ftrace_hash_disable/enable path in ftrace_hash_move for hardening the process if the memory allocation failed. Link: http://lkml.kernel.org/p/20140617110442.15167.81076.stgit@kbuild-fedora.novalocal Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3ded796e10e0..8323082dbc21 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1306,25 +1306,15 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, struct ftrace_hash *new_hash; int size = src->count; int bits = 0; - int ret; int i; - /* - * Remove the current set, update the hash and add - * them back. - */ - ftrace_hash_rec_disable(ops, enable); - /* * If the new source is empty, just free dst and assign it * the empty_hash. */ if (!src->count) { - free_ftrace_hash_rcu(*dst); - rcu_assign_pointer(*dst, EMPTY_HASH); - /* still need to update the function records */ - ret = 0; - goto out; + new_hash = EMPTY_HASH; + goto update; } /* @@ -1337,10 +1327,9 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, if (bits > FTRACE_HASH_MAX_BITS) bits = FTRACE_HASH_MAX_BITS; - ret = -ENOMEM; new_hash = alloc_ftrace_hash(bits); if (!new_hash) - goto out; + return -ENOMEM; size = 1 << src->size_bits; for (i = 0; i < size; i++) { @@ -1351,20 +1340,20 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, } } +update: + /* + * Remove the current set, update the hash and add + * them back. + */ + ftrace_hash_rec_disable(ops, enable); + old_hash = *dst; rcu_assign_pointer(*dst, new_hash); free_ftrace_hash_rcu(old_hash); - ret = 0; - out: - /* - * Enable regardless of ret: - * On success, we enable the new hash. - * On failure, we re-enable the original hash. - */ ftrace_hash_rec_enable(ops, enable); - return ret; + return 0; } /* -- cgit v1.2.3 From 12306276fabcb746a14979e96f43a13c724dec49 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 20 Jun 2014 13:38:54 -0400 Subject: tracing: Move the trace_seq_* functions into its own trace_seq.c file The trace_seq_*() functions are a nice utility that allows users to manipulate buffers with printf() like formats. It has its own trace_seq.h header in include/linux and should be in its own file. Being tied with trace_output.c is rather awkward. Signed-off-by: Steven Rostedt --- kernel/trace/Makefile | 1 + kernel/trace/trace.c | 24 ---- kernel/trace/trace_output.c | 268 --------------------------------------- kernel/trace/trace_output.h | 3 - kernel/trace/trace_seq.c | 303 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 304 insertions(+), 295 deletions(-) create mode 100644 kernel/trace/trace_seq.c (limited to 'kernel/trace') diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 2611613f14f1..67d6369ddf83 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_RING_BUFFER_BENCHMARK) += ring_buffer_benchmark.o obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_TRACING) += trace_output.o +obj-$(CONFIG_TRACING) += trace_seq.o obj-$(CONFIG_TRACING) += trace_stat.o obj-$(CONFIG_TRACING) += trace_printk.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 384ede311717..eeb233cbac4f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -923,30 +923,6 @@ out: return ret; } -ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) -{ - int len; - int ret; - - if (!cnt) - return 0; - - if (s->len <= s->readpos) - return -EBUSY; - - len = s->len - s->readpos; - if (cnt > len) - cnt = len; - ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt); - if (ret == cnt) - return -EFAULT; - - cnt -= ret; - - s->readpos += cnt; - return cnt; -} - static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) { int len; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index f3dad80c20b2..b8930f79a04b 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -20,23 +20,6 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; static int next_event_type = __TRACE_LAST_TYPE + 1; -int trace_print_seq(struct seq_file *m, struct trace_seq *s) -{ - int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; - int ret; - - ret = seq_write(m, s->buffer, len); - - /* - * Only reset this buffer if we successfully wrote to the - * seq_file buffer. - */ - if (!ret) - trace_seq_init(s); - - return ret; -} - enum print_line_t trace_print_bputs_msg_only(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -85,257 +68,6 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter) return TRACE_TYPE_HANDLED; } -/** - * trace_seq_printf - sequence printing of trace information - * @s: trace sequence descriptor - * @fmt: printf format string - * - * It returns 0 if the trace oversizes the buffer's free - * space, 1 otherwise. - * - * The tracer may use either sequence operations or its own - * copy to user routines. To simplify formating of a trace - * trace_seq_printf is used to store strings into a special - * buffer (@s). Then the output may be either used by - * the sequencer or pulled into another buffer. - */ -int -trace_seq_printf(struct trace_seq *s, const char *fmt, ...) -{ - int len = (PAGE_SIZE - 1) - s->len; - va_list ap; - int ret; - - if (s->full || !len) - return 0; - - va_start(ap, fmt); - ret = vsnprintf(s->buffer + s->len, len, fmt, ap); - va_end(ap); - - /* If we can't write it all, don't bother writing anything */ - if (ret >= len) { - s->full = 1; - return 0; - } - - s->len += ret; - - return 1; -} -EXPORT_SYMBOL_GPL(trace_seq_printf); - -/** - * trace_seq_bitmask - put a list of longs as a bitmask print output - * @s: trace sequence descriptor - * @maskp: points to an array of unsigned longs that represent a bitmask - * @nmaskbits: The number of bits that are valid in @maskp - * - * It returns 0 if the trace oversizes the buffer's free - * space, 1 otherwise. - * - * Writes a ASCII representation of a bitmask string into @s. - */ -int -trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, - int nmaskbits) -{ - int len = (PAGE_SIZE - 1) - s->len; - int ret; - - if (s->full || !len) - return 0; - - ret = bitmap_scnprintf(s->buffer, len, maskp, nmaskbits); - s->len += ret; - - return 1; -} -EXPORT_SYMBOL_GPL(trace_seq_bitmask); - -/** - * trace_seq_vprintf - sequence printing of trace information - * @s: trace sequence descriptor - * @fmt: printf format string - * - * The tracer may use either sequence operations or its own - * copy to user routines. To simplify formating of a trace - * trace_seq_printf is used to store strings into a special - * buffer (@s). Then the output may be either used by - * the sequencer or pulled into another buffer. - */ -int -trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) -{ - int len = (PAGE_SIZE - 1) - s->len; - int ret; - - if (s->full || !len) - return 0; - - ret = vsnprintf(s->buffer + s->len, len, fmt, args); - - /* If we can't write it all, don't bother writing anything */ - if (ret >= len) { - s->full = 1; - return 0; - } - - s->len += ret; - - return len; -} -EXPORT_SYMBOL_GPL(trace_seq_vprintf); - -int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) -{ - int len = (PAGE_SIZE - 1) - s->len; - int ret; - - if (s->full || !len) - return 0; - - ret = bstr_printf(s->buffer + s->len, len, fmt, binary); - - /* If we can't write it all, don't bother writing anything */ - if (ret >= len) { - s->full = 1; - return 0; - } - - s->len += ret; - - return len; -} - -/** - * trace_seq_puts - trace sequence printing of simple string - * @s: trace sequence descriptor - * @str: simple string to record - * - * The tracer may use either the sequence operations or its own - * copy to user routines. This function records a simple string - * into a special buffer (@s) for later retrieval by a sequencer - * or other mechanism. - */ -int trace_seq_puts(struct trace_seq *s, const char *str) -{ - int len = strlen(str); - - if (s->full) - return 0; - - if (len > ((PAGE_SIZE - 1) - s->len)) { - s->full = 1; - return 0; - } - - memcpy(s->buffer + s->len, str, len); - s->len += len; - - return len; -} - -int trace_seq_putc(struct trace_seq *s, unsigned char c) -{ - if (s->full) - return 0; - - if (s->len >= (PAGE_SIZE - 1)) { - s->full = 1; - return 0; - } - - s->buffer[s->len++] = c; - - return 1; -} -EXPORT_SYMBOL(trace_seq_putc); - -int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) -{ - if (s->full) - return 0; - - if (len > ((PAGE_SIZE - 1) - s->len)) { - s->full = 1; - return 0; - } - - memcpy(s->buffer + s->len, mem, len); - s->len += len; - - return len; -} - -int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len) -{ - unsigned char hex[HEX_CHARS]; - const unsigned char *data = mem; - int i, j; - - if (s->full) - return 0; - -#ifdef __BIG_ENDIAN - for (i = 0, j = 0; i < len; i++) { -#else - for (i = len-1, j = 0; i >= 0; i--) { -#endif - hex[j++] = hex_asc_hi(data[i]); - hex[j++] = hex_asc_lo(data[i]); - } - hex[j++] = ' '; - - return trace_seq_putmem(s, hex, j); -} - -void *trace_seq_reserve(struct trace_seq *s, size_t len) -{ - void *ret; - - if (s->full) - return NULL; - - if (len > ((PAGE_SIZE - 1) - s->len)) { - s->full = 1; - return NULL; - } - - ret = s->buffer + s->len; - s->len += len; - - return ret; -} - -int trace_seq_path(struct trace_seq *s, const struct path *path) -{ - unsigned char *p; - - if (s->full) - return 0; - - if (s->len >= (PAGE_SIZE - 1)) { - s->full = 1; - return 0; - } - - p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); - if (!IS_ERR(p)) { - p = mangle_path(s->buffer + s->len, p, "\n"); - if (p) { - s->len = p - s->buffer; - return 1; - } - } else { - s->buffer[s->len++] = '?'; - return 1; - } - - s->full = 1; - return 0; -} - const char * ftrace_print_flags_seq(struct trace_seq *p, const char *delim, unsigned long flags, diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 127a9d8c8357..bf7daf2237ed 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -35,9 +35,6 @@ trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry); extern int __unregister_ftrace_event(struct trace_event *event); extern struct rw_semaphore trace_event_sem; -#define MAX_MEMHEX_BYTES 8 -#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) - #define SEQ_PUT_FIELD_RET(s, x) \ do { \ if (!trace_seq_putmem(s, &(x), sizeof(x))) \ diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c new file mode 100644 index 000000000000..5ba99c6cf834 --- /dev/null +++ b/kernel/trace/trace_seq.c @@ -0,0 +1,303 @@ +/* + * trace_seq.c + * + * Copyright (C) 2008-2014 Red Hat Inc, Steven Rostedt + * + */ +#include +#include +#include + +int trace_print_seq(struct seq_file *m, struct trace_seq *s) +{ + int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; + int ret; + + ret = seq_write(m, s->buffer, len); + + /* + * Only reset this buffer if we successfully wrote to the + * seq_file buffer. + */ + if (!ret) + trace_seq_init(s); + + return ret; +} + +/** + * trace_seq_printf - sequence printing of trace information + * @s: trace sequence descriptor + * @fmt: printf format string + * + * It returns 0 if the trace oversizes the buffer's free + * space, 1 otherwise. + * + * The tracer may use either sequence operations or its own + * copy to user routines. To simplify formating of a trace + * trace_seq_printf is used to store strings into a special + * buffer (@s). Then the output may be either used by + * the sequencer or pulled into another buffer. + */ +int +trace_seq_printf(struct trace_seq *s, const char *fmt, ...) +{ + int len = (PAGE_SIZE - 1) - s->len; + va_list ap; + int ret; + + if (s->full || !len) + return 0; + + va_start(ap, fmt); + ret = vsnprintf(s->buffer + s->len, len, fmt, ap); + va_end(ap); + + /* If we can't write it all, don't bother writing anything */ + if (ret >= len) { + s->full = 1; + return 0; + } + + s->len += ret; + + return 1; +} +EXPORT_SYMBOL_GPL(trace_seq_printf); + +/** + * trace_seq_bitmask - put a list of longs as a bitmask print output + * @s: trace sequence descriptor + * @maskp: points to an array of unsigned longs that represent a bitmask + * @nmaskbits: The number of bits that are valid in @maskp + * + * It returns 0 if the trace oversizes the buffer's free + * space, 1 otherwise. + * + * Writes a ASCII representation of a bitmask string into @s. + */ +int +trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, + int nmaskbits) +{ + int len = (PAGE_SIZE - 1) - s->len; + int ret; + + if (s->full || !len) + return 0; + + ret = bitmap_scnprintf(s->buffer, len, maskp, nmaskbits); + s->len += ret; + + return 1; +} +EXPORT_SYMBOL_GPL(trace_seq_bitmask); + +/** + * trace_seq_vprintf - sequence printing of trace information + * @s: trace sequence descriptor + * @fmt: printf format string + * + * The tracer may use either sequence operations or its own + * copy to user routines. To simplify formating of a trace + * trace_seq_printf is used to store strings into a special + * buffer (@s). Then the output may be either used by + * the sequencer or pulled into another buffer. + */ +int +trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) +{ + int len = (PAGE_SIZE - 1) - s->len; + int ret; + + if (s->full || !len) + return 0; + + ret = vsnprintf(s->buffer + s->len, len, fmt, args); + + /* If we can't write it all, don't bother writing anything */ + if (ret >= len) { + s->full = 1; + return 0; + } + + s->len += ret; + + return len; +} +EXPORT_SYMBOL_GPL(trace_seq_vprintf); + +int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) +{ + int len = (PAGE_SIZE - 1) - s->len; + int ret; + + if (s->full || !len) + return 0; + + ret = bstr_printf(s->buffer + s->len, len, fmt, binary); + + /* If we can't write it all, don't bother writing anything */ + if (ret >= len) { + s->full = 1; + return 0; + } + + s->len += ret; + + return len; +} + +/** + * trace_seq_puts - trace sequence printing of simple string + * @s: trace sequence descriptor + * @str: simple string to record + * + * The tracer may use either the sequence operations or its own + * copy to user routines. This function records a simple string + * into a special buffer (@s) for later retrieval by a sequencer + * or other mechanism. + */ +int trace_seq_puts(struct trace_seq *s, const char *str) +{ + int len = strlen(str); + + if (s->full) + return 0; + + if (len > ((PAGE_SIZE - 1) - s->len)) { + s->full = 1; + return 0; + } + + memcpy(s->buffer + s->len, str, len); + s->len += len; + + return len; +} + +int trace_seq_putc(struct trace_seq *s, unsigned char c) +{ + if (s->full) + return 0; + + if (s->len >= (PAGE_SIZE - 1)) { + s->full = 1; + return 0; + } + + s->buffer[s->len++] = c; + + return 1; +} +EXPORT_SYMBOL(trace_seq_putc); + +int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) +{ + if (s->full) + return 0; + + if (len > ((PAGE_SIZE - 1) - s->len)) { + s->full = 1; + return 0; + } + + memcpy(s->buffer + s->len, mem, len); + s->len += len; + + return len; +} + +#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) + +int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len) +{ + unsigned char hex[HEX_CHARS]; + const unsigned char *data = mem; + int i, j; + + if (s->full) + return 0; + +#ifdef __BIG_ENDIAN + for (i = 0, j = 0; i < len; i++) { +#else + for (i = len-1, j = 0; i >= 0; i--) { +#endif + hex[j++] = hex_asc_hi(data[i]); + hex[j++] = hex_asc_lo(data[i]); + } + hex[j++] = ' '; + + return trace_seq_putmem(s, hex, j); +} + +void *trace_seq_reserve(struct trace_seq *s, size_t len) +{ + void *ret; + + if (s->full) + return NULL; + + if (len > ((PAGE_SIZE - 1) - s->len)) { + s->full = 1; + return NULL; + } + + ret = s->buffer + s->len; + s->len += len; + + return ret; +} + +int trace_seq_path(struct trace_seq *s, const struct path *path) +{ + unsigned char *p; + + if (s->full) + return 0; + + if (s->len >= (PAGE_SIZE - 1)) { + s->full = 1; + return 0; + } + + p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); + if (!IS_ERR(p)) { + p = mangle_path(s->buffer + s->len, p, "\n"); + if (p) { + s->len = p - s->buffer; + return 1; + } + } else { + s->buffer[s->len++] = '?'; + return 1; + } + + s->full = 1; + return 0; +} + +ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) +{ + int len; + int ret; + + if (!cnt) + return 0; + + if (s->len <= s->readpos) + return -EBUSY; + + len = s->len - s->readpos; + if (cnt > len) + cnt = len; + ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt); + if (ret == cnt) + return -EFAULT; + + cnt -= ret; + + s->readpos += cnt; + return cnt; +} -- cgit v1.2.3 From 36aabfff50b6a03bcfd2c3cfbd7b83eb0a9ce0c1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 20 Jun 2014 17:38:01 -0400 Subject: tracing: Clean up trace_seq.c For using trace_seq_*() functions in NMI context, I posted a patch to move it to the lib/ directory. This caused Andrew Morton to take a look at the code. He went through and gave a lot of comments about missing kernel doc, inconsistent types for the save variable, mix match of EXPORT_SYMBOL_GPL() and EXPORT_SYMBOL() as well as missing EXPORT_SYMBOL*()s. There were a few comments about the way variables were being compared (int vs uint). All these were good review comments and should be implemented regardless of if trace_seq.c should be moved to lib/ or not. Signed-off-by: Steven Rostedt --- kernel/trace/trace_seq.c | 207 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 175 insertions(+), 32 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c index 5ba99c6cf834..0fabca773e51 100644 --- a/kernel/trace/trace_seq.c +++ b/kernel/trace/trace_seq.c @@ -3,21 +3,55 @@ * * Copyright (C) 2008-2014 Red Hat Inc, Steven Rostedt * + * The trace_seq is a handy tool that allows you to pass a descriptor around + * to a buffer that other functions can write to. It is similar to the + * seq_file functionality but has some differences. + * + * To use it, the trace_seq must be initialized with trace_seq_init(). + * This will set up the counters within the descriptor. You can call + * trace_seq_init() more than once to reset the trace_seq to start + * from scratch. + * + * The buffer size is currently PAGE_SIZE, although it may become dynamic + * in the future. + * + * A write to the buffer will either succed or fail. That is, unlike + * sprintf() there will not be a partial write (well it may write into + * the buffer but it wont update the pointers). This allows users to + * try to write something into the trace_seq buffer and if it fails + * they can flush it and try again. + * */ #include #include #include +/* How much buffer is left on the trace_seq? */ +#define TRACE_SEQ_BUF_LEFT(s) ((PAGE_SIZE - 1) - (s)->len) + +/* How much buffer is written? */ +#define TRACE_SEQ_BUF_USED(s) min((s)->len, (unsigned int)(PAGE_SIZE - 1)) + +/** + * trace_print_seq - move the contents of trace_seq into a seq_file + * @m: the seq_file descriptor that is the destination + * @s: the trace_seq descriptor that is the source. + * + * Returns 0 on success and non zero on error. If it succeeds to + * write to the seq_file it will reset the trace_seq, otherwise + * it does not modify the trace_seq to let the caller try again. + */ int trace_print_seq(struct seq_file *m, struct trace_seq *s) { - int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; + unsigned int len = TRACE_SEQ_BUF_USED(s); int ret; ret = seq_write(m, s->buffer, len); /* * Only reset this buffer if we successfully wrote to the - * seq_file buffer. + * seq_file buffer. This lets the caller try again or + * do something else with the contents. */ if (!ret) trace_seq_init(s); @@ -30,19 +64,20 @@ int trace_print_seq(struct seq_file *m, struct trace_seq *s) * @s: trace sequence descriptor * @fmt: printf format string * - * It returns 0 if the trace oversizes the buffer's free - * space, 1 otherwise. - * * The tracer may use either sequence operations or its own * copy to user routines. To simplify formating of a trace - * trace_seq_printf is used to store strings into a special + * trace_seq_printf() is used to store strings into a special * buffer (@s). Then the output may be either used by * the sequencer or pulled into another buffer. + * + * Returns 1 if we successfully written all the contents to + * the buffer. + * Returns 0 if we the length to write is bigger than the + * reserved buffer space. In this case, nothing gets written. */ -int -trace_seq_printf(struct trace_seq *s, const char *fmt, ...) +int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) { - int len = (PAGE_SIZE - 1) - s->len; + unsigned int len = TRACE_SEQ_BUF_LEFT(s); va_list ap; int ret; @@ -66,21 +101,22 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) EXPORT_SYMBOL_GPL(trace_seq_printf); /** - * trace_seq_bitmask - put a list of longs as a bitmask print output + * trace_seq_bitmask - write a bitmask array in its ASCII representation * @s: trace sequence descriptor * @maskp: points to an array of unsigned longs that represent a bitmask * @nmaskbits: The number of bits that are valid in @maskp * - * It returns 0 if the trace oversizes the buffer's free - * space, 1 otherwise. - * * Writes a ASCII representation of a bitmask string into @s. + * + * Returns 1 if we successfully written all the contents to + * the buffer. + * Returns 0 if we the length to write is bigger than the + * reserved buffer space. In this case, nothing gets written. */ -int -trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, - int nmaskbits) +int trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, + int nmaskbits) { - int len = (PAGE_SIZE - 1) - s->len; + unsigned int len = TRACE_SEQ_BUF_LEFT(s); int ret; if (s->full || !len) @@ -103,11 +139,12 @@ EXPORT_SYMBOL_GPL(trace_seq_bitmask); * trace_seq_printf is used to store strings into a special * buffer (@s). Then the output may be either used by * the sequencer or pulled into another buffer. + * + * Returns how much it wrote to the buffer. */ -int -trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) +int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) { - int len = (PAGE_SIZE - 1) - s->len; + unsigned int len = TRACE_SEQ_BUF_LEFT(s); int ret; if (s->full || !len) @@ -127,9 +164,26 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) } EXPORT_SYMBOL_GPL(trace_seq_vprintf); +/** + * trace_seq_bprintf - Write the printf string from binary arguments + * @s: trace sequence descriptor + * @fmt: The format string for the @binary arguments + * @binary: The binary arguments for @fmt. + * + * When recording in a fast path, a printf may be recorded with just + * saving the format and the arguments as they were passed to the + * function, instead of wasting cycles converting the arguments into + * ASCII characters. Instead, the arguments are saved in a 32 bit + * word array that is defined by the format string constraints. + * + * This function will take the format and the binary array and finish + * the conversion into the ASCII string within the buffer. + * + * Returns how much it wrote to the buffer. + */ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) { - int len = (PAGE_SIZE - 1) - s->len; + unsigned int len = TRACE_SEQ_BUF_LEFT(s); int ret; if (s->full || !len) @@ -147,6 +201,7 @@ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) return len; } +EXPORT_SYMBOL_GPL(trace_seq_bprintf); /** * trace_seq_puts - trace sequence printing of simple string @@ -157,15 +212,17 @@ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) * copy to user routines. This function records a simple string * into a special buffer (@s) for later retrieval by a sequencer * or other mechanism. + * + * Returns how much it wrote to the buffer. */ int trace_seq_puts(struct trace_seq *s, const char *str) { - int len = strlen(str); + unsigned int len = strlen(str); if (s->full) return 0; - if (len > ((PAGE_SIZE - 1) - s->len)) { + if (len > TRACE_SEQ_BUF_LEFT(s)) { s->full = 1; return 0; } @@ -175,13 +232,26 @@ int trace_seq_puts(struct trace_seq *s, const char *str) return len; } +EXPORT_SYMBOL_GPL(trace_seq_puts); +/** + * trace_seq_putc - trace sequence printing of simple character + * @s: trace sequence descriptor + * @c: simple character to record + * + * The tracer may use either the sequence operations or its own + * copy to user routines. This function records a simple charater + * into a special buffer (@s) for later retrieval by a sequencer + * or other mechanism. + * + * Returns how much it wrote to the buffer. + */ int trace_seq_putc(struct trace_seq *s, unsigned char c) { if (s->full) return 0; - if (s->len >= (PAGE_SIZE - 1)) { + if (TRACE_SEQ_BUF_LEFT(s) < 1) { s->full = 1; return 0; } @@ -190,14 +260,26 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c) return 1; } -EXPORT_SYMBOL(trace_seq_putc); +EXPORT_SYMBOL_GPL(trace_seq_putc); -int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) +/** + * trace_seq_putmem - write raw data into the trace_seq buffer + * @s: trace sequence descriptor + * @mem: The raw memory to copy into the buffer + * @len: The length of the raw memory to copy (in bytes) + * + * There may be cases where raw memory needs to be written into the + * buffer and a strcpy() would not work. Using this function allows + * for such cases. + * + * Returns how much it wrote to the buffer. + */ +int trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len) { if (s->full) return 0; - if (len > ((PAGE_SIZE - 1) - s->len)) { + if (len > TRACE_SEQ_BUF_LEFT(s)) { s->full = 1; return 0; } @@ -207,10 +289,24 @@ int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) return len; } +EXPORT_SYMBOL_GPL(trace_seq_putmem); #define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) -int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len) +/** + * trace_seq_putmem_hex - write raw memory into the buffer in ASCII hex + * @s: trace sequence descriptor + * @mem: The raw memory to write its hex ASCII representation of + * @len: The length of the raw memory to copy (in bytes) + * + * This is similar to trace_seq_putmem() except instead of just copying the + * raw memory into the buffer it writes its ASCII representation of it + * in hex characters. + * + * Returns how much it wrote to the buffer. + */ +int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, + unsigned int len) { unsigned char hex[HEX_CHARS]; const unsigned char *data = mem; @@ -231,15 +327,27 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len) return trace_seq_putmem(s, hex, j); } +EXPORT_SYMBOL_GPL(trace_seq_putmem_hex); -void *trace_seq_reserve(struct trace_seq *s, size_t len) +/** + * trace_seq_reserve - reserve space on the sequence buffer + * @s: trace sequence descriptor + * @len: The amount to reserver. + * + * If for some reason there is a need to save some space on the + * buffer to fill in later, this function is used for that purpose. + * The given length will be reserved and the pointer to that + * location on the buffer is returned, unless there is not enough + * buffer left to hold the given length then NULL is returned. + */ +void *trace_seq_reserve(struct trace_seq *s, unsigned int len) { void *ret; if (s->full) return NULL; - if (len > ((PAGE_SIZE - 1) - s->len)) { + if (len > TRACE_SEQ_BUF_LEFT(s)) { s->full = 1; return NULL; } @@ -249,7 +357,20 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len) return ret; } +EXPORT_SYMBOL_GPL(trace_seq_reserve); +/** + * trace_seq_path - copy a path into the sequence buffer + * @s: trace sequence descriptor + * @path: path to write into the sequence buffer. + * + * Write a path name into the sequence buffer. + * + * Returns 1 if we successfully written all the contents to + * the buffer. + * Returns 0 if we the length to write is bigger than the + * reserved buffer space. In this case, nothing gets written. + */ int trace_seq_path(struct trace_seq *s, const struct path *path) { unsigned char *p; @@ -257,7 +378,7 @@ int trace_seq_path(struct trace_seq *s, const struct path *path) if (s->full) return 0; - if (s->len >= (PAGE_SIZE - 1)) { + if (TRACE_SEQ_BUF_LEFT(s) < 1) { s->full = 1; return 0; } @@ -277,8 +398,29 @@ int trace_seq_path(struct trace_seq *s, const struct path *path) s->full = 1; return 0; } +EXPORT_SYMBOL_GPL(trace_seq_path); -ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) +/** + * trace_seq_to_user - copy the squence buffer to user space + * @s: trace sequence descriptor + * @ubuf: The userspace memory location to copy to + * @cnt: The amount to copy + * + * Copies the sequence buffer into the userspace memory pointed to + * by @ubuf. It starts from the last read position (@s->readpos) + * and writes up to @cnt characters or till it reaches the end of + * the content in the buffer (@s->len), which ever comes first. + * + * On success, it returns a positive number of the number of bytes + * it copied. + * + * On failure it returns -EBUSY if all of the content in the + * sequence has been already read, which includes nothing in the + * sequenc (@s->len == @s->readpos). + * + * Returns -EFAULT if the copy to userspace fails. + */ +int trace_seq_to_user(struct trace_seq *s, char __user *ubuf, int cnt) { int len; int ret; @@ -301,3 +443,4 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) s->readpos += cnt; return cnt; } +EXPORT_SYMBOL_GPL(trace_seq_to_user); -- cgit v1.2.3 From 6d2289f3faa71dcc5bba15c7aeba4f31c185b6df Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 20 Jun 2014 23:31:26 -0400 Subject: tracing: Make trace_seq_putmem_hex() more robust Currently trace_seq_putmem_hex() can only take as a parameter a pointer to something that is 8 bytes or less, otherwise it will overflow the buffer. This is protected by a macro that encompasses the call to trace_seq_putmem_hex() that has a BUILD_BUG_ON() for the variable before it is passed in. This is not very robust and if trace_seq_putmem_hex() ever gets used outside that macro it will cause issues. Instead of only being able to produce a hex output of memory that is for a single word, change it to be more robust and allow any size input. Signed-off-by: Steven Rostedt --- kernel/trace/trace_output.h | 1 - kernel/trace/trace_seq.c | 26 +++++++++++++++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index bf7daf2237ed..80b25b585a70 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -43,7 +43,6 @@ do { \ #define SEQ_PUT_HEX_FIELD_RET(s, x) \ do { \ - BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \ if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \ return TRACE_TYPE_PARTIAL_LINE; \ } while (0) diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c index 0fabca773e51..88c0f80f0a1f 100644 --- a/kernel/trace/trace_seq.c +++ b/kernel/trace/trace_seq.c @@ -291,6 +291,7 @@ int trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len) } EXPORT_SYMBOL_GPL(trace_seq_putmem); +#define MAX_MEMHEX_BYTES 8U #define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) /** @@ -310,22 +311,33 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, { unsigned char hex[HEX_CHARS]; const unsigned char *data = mem; + unsigned int start_len; int i, j; + int cnt = 0; if (s->full) return 0; + while (len) { + start_len = min(len, HEX_CHARS - 1); #ifdef __BIG_ENDIAN - for (i = 0, j = 0; i < len; i++) { + for (i = 0, j = 0; i < start_len; i++) { #else - for (i = len-1, j = 0; i >= 0; i--) { + for (i = start_len-1, j = 0; i >= 0; i--) { #endif - hex[j++] = hex_asc_hi(data[i]); - hex[j++] = hex_asc_lo(data[i]); - } - hex[j++] = ' '; + hex[j++] = hex_asc_hi(data[i]); + hex[j++] = hex_asc_lo(data[i]); + } + if (WARN_ON_ONCE(j == 0 || j/2 > len)) + break; + + /* j increments twice per loop */ + len -= j / 2; + hex[j++] = ' '; - return trace_seq_putmem(s, hex, j); + cnt += trace_seq_putmem(s, hex, j); + } + return cnt; } EXPORT_SYMBOL_GPL(trace_seq_putmem_hex); -- cgit v1.2.3 From 9096032fbcdcdb80b76f1046346499e20417988e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 23 Jun 2014 16:42:07 -0400 Subject: tracing: Remove trace_seq_reserve() trace_seq_reserve() has no users in the kernel, it just wastes space. Remove it. Cc: Eduard - Gabriel Munteanu Signed-off-by: Steven Rostedt --- kernel/trace/trace_seq.c | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c index 88c0f80f0a1f..1f24ed99dca2 100644 --- a/kernel/trace/trace_seq.c +++ b/kernel/trace/trace_seq.c @@ -341,36 +341,6 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, } EXPORT_SYMBOL_GPL(trace_seq_putmem_hex); -/** - * trace_seq_reserve - reserve space on the sequence buffer - * @s: trace sequence descriptor - * @len: The amount to reserver. - * - * If for some reason there is a need to save some space on the - * buffer to fill in later, this function is used for that purpose. - * The given length will be reserved and the pointer to that - * location on the buffer is returned, unless there is not enough - * buffer left to hold the given length then NULL is returned. - */ -void *trace_seq_reserve(struct trace_seq *s, unsigned int len) -{ - void *ret; - - if (s->full) - return NULL; - - if (len > TRACE_SEQ_BUF_LEFT(s)) { - s->full = 1; - return NULL; - } - - ret = s->buffer + s->len; - s->len += len; - - return ret; -} -EXPORT_SYMBOL_GPL(trace_seq_reserve); - /** * trace_seq_path - copy a path into the sequence buffer * @s: trace sequence descriptor -- cgit v1.2.3 From 3f4d8f78a07dba1cb333ce749bd6a15c1ada362d Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Thu, 26 Jun 2014 19:14:31 +0200 Subject: tracing: Remove unnecessary null test before debugfs_remove() This fixes checkpatch warning: "WARNING: debugfs_remove(NULL) is safe this check is probably not required" Link: http://lkml.kernel.org/p/1403802871-8599-1-git-send-email-fabf@skynet.be Signed-off-by: Fabian Frederick Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index eeb233cbac4f..4caa814d41c3 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6071,10 +6071,8 @@ destroy_trace_option_files(struct trace_option_dentry *topts) if (!topts) return; - for (cnt = 0; topts[cnt].opt; cnt++) { - if (topts[cnt].entry) - debugfs_remove(topts[cnt].entry); - } + for (cnt = 0; topts[cnt].opt; cnt++) + debugfs_remove(topts[cnt].entry); kfree(topts); } -- cgit v1.2.3 From 7b039cb4c5a90d8ea576b17e096f7334457aeb57 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 26 Jun 2014 09:42:41 -0400 Subject: tracing: Add trace_seq_buffer_ptr() helper function There's several locations in the kernel that open code the calculation of the next location in the trace_seq buffer. This is usually done with p->buffer + p->len Instead of having this open coded, supply a helper function in the header to do it for them. This function is called trace_seq_buffer_ptr(). Link: http://lkml.kernel.org/p/20140626220129.452783019@goodmis.org Acked-by: Paolo Bonzini Signed-off-by: Steven Rostedt --- kernel/trace/trace_output.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index b8930f79a04b..c6977d5a9b12 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -75,7 +75,7 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim, { unsigned long mask; const char *str; - const char *ret = p->buffer + p->len; + const char *ret = trace_seq_buffer_ptr(p); int i, first = 1; for (i = 0; flag_array[i].name && flags; i++) { @@ -111,7 +111,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, const struct trace_print_flags *symbol_array) { int i; - const char *ret = p->buffer + p->len; + const char *ret = trace_seq_buffer_ptr(p); for (i = 0; symbol_array[i].name; i++) { @@ -122,7 +122,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, break; } - if (ret == (const char *)(p->buffer + p->len)) + if (ret == (const char *)(trace_seq_buffer_ptr(p))) trace_seq_printf(p, "0x%lx", val); trace_seq_putc(p, 0); @@ -137,7 +137,7 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, const struct trace_print_flags_u64 *symbol_array) { int i; - const char *ret = p->buffer + p->len; + const char *ret = trace_seq_buffer_ptr(p); for (i = 0; symbol_array[i].name; i++) { @@ -148,7 +148,7 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, break; } - if (ret == (const char *)(p->buffer + p->len)) + if (ret == (const char *)(trace_seq_buffer_ptr(p))) trace_seq_printf(p, "0x%llx", val); trace_seq_putc(p, 0); @@ -162,7 +162,7 @@ const char * ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, unsigned int bitmask_size) { - const char *ret = p->buffer + p->len; + const char *ret = trace_seq_buffer_ptr(p); trace_seq_bitmask(p, bitmask_ptr, bitmask_size * 8); trace_seq_putc(p, 0); @@ -175,7 +175,7 @@ const char * ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) { int i; - const char *ret = p->buffer + p->len; + const char *ret = trace_seq_buffer_ptr(p); for (i = 0; i < buf_len; i++) trace_seq_printf(p, "%s%2.2x", i == 0 ? "" : " ", buf[i]); -- cgit v1.2.3 From a737e6dd7bfbd6d87ce1525840e6957bcb6e47e6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 12 Jun 2014 23:56:12 +0900 Subject: ftrace: Get rid of obsolete global_start_up variable It seems like it's a leftover from commit 4104d326b670 ("ftrace: Remove global function list and call function directly"). As it isn't updated at all, checking its value is meaningless. Let's get rid of it. Link: http://lkml.kernel.org/p/1402584972-17824-1-git-send-email-namhyung@kernel.org Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8323082dbc21..39df3192e725 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2286,7 +2286,6 @@ static void ftrace_run_update_code(int command) static ftrace_func_t saved_ftrace_func; static int ftrace_start_up; -static int global_start_up; static void control_ops_free(struct ftrace_ops *ops) { @@ -2350,8 +2349,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) ftrace_hash_rec_disable(ops, 1); - if (!global_start_up) - ops->flags &= ~FTRACE_OPS_FL_ENABLED; + ops->flags &= ~FTRACE_OPS_FL_ENABLED; command |= FTRACE_UPDATE_CALLS; -- cgit v1.2.3 From 1f61be007e16a5d60b1cf868aa30d87f181e8e14 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 11 Jun 2014 17:06:53 +0900 Subject: ftrace: Fix memory leak on failure path in ftrace_allocate_pages() As struct ftrace_page is managed in a single linked list, it should free from the start page. Link: http://lkml.kernel.org/p/1402474014-28655-1-git-send-email-namhyung@kernel.org Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 39df3192e725..e14ff4c1713e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2637,7 +2637,8 @@ ftrace_allocate_pages(unsigned long num_to_init) return start_pg; free_pages: - while (start_pg) { + pg = start_pg; + while (pg) { order = get_count_order(pg->size / ENTRIES_PER_PAGE); free_pages((unsigned long)pg->records, order); start_pg = pg->next; -- cgit v1.2.3 From ef2fbe16ac176c21e3b3013c169e6fdb71ec56c7 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 11 Jun 2014 17:06:54 +0900 Subject: ftrace: Do not copy hash if O_TRUNC is set When a filter file is open for writing and O_TRUNC is set, there's no need to copy and free the filter entries. Link: http://lkml.kernel.org/p/1402474014-28655-2-git-send-email-namhyung@kernel.org Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e14ff4c1713e..232b898eebbd 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3010,7 +3010,13 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, hash = ops->filter_hash; if (file->f_mode & FMODE_WRITE) { - iter->hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, hash); + const int size_bits = FTRACE_HASH_DEFAULT_BITS; + + if (file->f_flags & O_TRUNC) + iter->hash = alloc_ftrace_hash(size_bits); + else + iter->hash = alloc_and_copy_ftrace_hash(size_bits, hash); + if (!iter->hash) { trace_parser_put(&iter->parser); kfree(iter); @@ -3019,10 +3025,6 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, } } - if ((file->f_mode & FMODE_WRITE) && - (file->f_flags & O_TRUNC)) - ftrace_filter_reset(iter->hash); - if (file->f_mode & FMODE_READ) { iter->pg = ftrace_pages_start; -- cgit v1.2.3 From 3448bac32953f051be91cef6d67025869f08dc4d Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sat, 7 Jun 2014 13:43:08 +0200 Subject: tracing: Convert pr_warning() to pr_warn() in trace_events.c Convert pr_warning to standard pr_warn Define pr_fmt(fmt) fmt to avoid any future default fmt definition Link: http://lkml.kernel.org/p/1402141388-21144-1-git-send-email-fabf@skynet.be Signed-off-by: Fabian Frederick Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 56 ++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 29 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f99e0b3bca8c..e7a814b3906b 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -8,6 +8,8 @@ * */ +#define pr_fmt(fmt) fmt + #include #include #include @@ -1490,7 +1492,7 @@ event_subsystem_dir(struct trace_array *tr, const char *name, dir->entry = debugfs_create_dir(name, parent); if (!dir->entry) { - pr_warning("Failed to create system directory %s\n", name); + pr_warn("Failed to create system directory %s\n", name); __put_system(system); goto out_free; } @@ -1506,7 +1508,7 @@ event_subsystem_dir(struct trace_array *tr, const char *name, if (!entry) { kfree(system->filter); system->filter = NULL; - pr_warning("Could not create debugfs '%s/filter' entry\n", name); + pr_warn("Could not create debugfs '%s/filter' entry\n", name); } trace_create_file("enable", 0644, dir->entry, dir, @@ -1521,8 +1523,7 @@ event_subsystem_dir(struct trace_array *tr, const char *name, out_fail: /* Only print this message if failed on memory allocation */ if (!dir || !system) - pr_warning("No memory to create event subsystem %s\n", - name); + pr_warn("No memory to create event subsystem %s\n", name); return NULL; } @@ -1550,8 +1551,7 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file) name = ftrace_event_name(call); file->dir = debugfs_create_dir(name, d_events); if (!file->dir) { - pr_warning("Could not create debugfs '%s' directory\n", - name); + pr_warn("Could not create debugfs '%s' directory\n", name); return -1; } @@ -1574,8 +1574,8 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file) if (list_empty(head)) { ret = call->class->define_fields(call); if (ret < 0) { - pr_warning("Could not initialize trace point" - " events/%s\n", name); + pr_warn("Could not initialize trace point events/%s\n", + name); return -1; } } @@ -1648,8 +1648,7 @@ static int event_init(struct ftrace_event_call *call) if (call->class->raw_init) { ret = call->class->raw_init(call); if (ret < 0 && ret != -ENOSYS) - pr_warn("Could not initialize trace events/%s\n", - name); + pr_warn("Could not initialize trace events/%s\n", name); } return ret; @@ -1894,8 +1893,8 @@ __trace_add_event_dirs(struct trace_array *tr) list_for_each_entry(call, &ftrace_events, list) { ret = __trace_add_new_event(call, tr); if (ret < 0) - pr_warning("Could not create directory for event %s\n", - ftrace_event_name(call)); + pr_warn("Could not create directory for event %s\n", + ftrace_event_name(call)); } } @@ -2207,8 +2206,8 @@ __trace_early_add_event_dirs(struct trace_array *tr) list_for_each_entry(file, &tr->events, list) { ret = event_create_dir(tr->event_dir, file); if (ret < 0) - pr_warning("Could not create directory for event %s\n", - ftrace_event_name(file->event_call)); + pr_warn("Could not create directory for event %s\n", + ftrace_event_name(file->event_call)); } } @@ -2231,8 +2230,8 @@ __trace_early_add_events(struct trace_array *tr) ret = __trace_early_add_new_event(call, tr); if (ret < 0) - pr_warning("Could not create early event %s\n", - ftrace_event_name(call)); + pr_warn("Could not create early event %s\n", + ftrace_event_name(call)); } } @@ -2279,13 +2278,13 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) entry = debugfs_create_file("set_event", 0644, parent, tr, &ftrace_set_event_fops); if (!entry) { - pr_warning("Could not create debugfs 'set_event' entry\n"); + pr_warn("Could not create debugfs 'set_event' entry\n"); return -ENOMEM; } d_events = debugfs_create_dir("events", parent); if (!d_events) { - pr_warning("Could not create debugfs 'events' directory\n"); + pr_warn("Could not create debugfs 'events' directory\n"); return -ENOMEM; } @@ -2461,11 +2460,10 @@ static __init int event_trace_init(void) entry = debugfs_create_file("available_events", 0444, d_tracer, tr, &ftrace_avail_fops); if (!entry) - pr_warning("Could not create debugfs " - "'available_events' entry\n"); + pr_warn("Could not create debugfs 'available_events' entry\n"); if (trace_define_common_fields()) - pr_warning("tracing: Failed to allocate common fields"); + pr_warn("tracing: Failed to allocate common fields"); ret = early_event_add_tracer(d_tracer, tr); if (ret) @@ -2474,7 +2472,7 @@ static __init int event_trace_init(void) #ifdef CONFIG_MODULES ret = register_module_notifier(&trace_module_nb); if (ret) - pr_warning("Failed to register trace events module notifier\n"); + pr_warn("Failed to register trace events module notifier\n"); #endif return 0; } @@ -2578,7 +2576,7 @@ static __init void event_trace_self_tests(void) * it and the self test should not be on. */ if (file->flags & FTRACE_EVENT_FL_ENABLED) { - pr_warning("Enabled event during self test!\n"); + pr_warn("Enabled event during self test!\n"); WARN_ON_ONCE(1); continue; } @@ -2606,8 +2604,8 @@ static __init void event_trace_self_tests(void) ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1); if (WARN_ON_ONCE(ret)) { - pr_warning("error enabling system %s\n", - system->name); + pr_warn("error enabling system %s\n", + system->name); continue; } @@ -2615,8 +2613,8 @@ static __init void event_trace_self_tests(void) ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0); if (WARN_ON_ONCE(ret)) { - pr_warning("error disabling system %s\n", - system->name); + pr_warn("error disabling system %s\n", + system->name); continue; } @@ -2630,7 +2628,7 @@ static __init void event_trace_self_tests(void) ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1); if (WARN_ON_ONCE(ret)) { - pr_warning("error enabling all events\n"); + pr_warn("error enabling all events\n"); return; } @@ -2639,7 +2637,7 @@ static __init void event_trace_self_tests(void) /* reset sysname */ ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0); if (WARN_ON_ONCE(ret)) { - pr_warning("error disabling all events\n"); + pr_warn("error disabling all events\n"); return; } -- cgit v1.2.3 From 0d7d9a16ce112687487fadb2b490519b45f6c70e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 13 Jun 2014 01:23:50 +0900 Subject: tracing: Add ftrace_graph_notrace boot parameter The ftrace_graph_notrace option is for specifying notrace filter for function graph tracer at boot time. It can be altered after boot using set_graph_notrace file on the debugfs. Link: http://lkml.kernel.org/p/1402590233-22321-2-git-send-email-namhyung@kernel.org Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 232b898eebbd..17885a27281c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3884,6 +3884,7 @@ __setup("ftrace_filter=", set_ftrace_filter); #ifdef CONFIG_FUNCTION_GRAPH_TRACER static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; +static char ftrace_graph_notrace_buf[FTRACE_FILTER_SIZE] __initdata; static int ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer); static int __init set_graph_function(char *str) @@ -3893,16 +3894,29 @@ static int __init set_graph_function(char *str) } __setup("ftrace_graph_filter=", set_graph_function); -static void __init set_ftrace_early_graph(char *buf) +static int __init set_graph_notrace_function(char *str) +{ + strlcpy(ftrace_graph_notrace_buf, str, FTRACE_FILTER_SIZE); + return 1; +} +__setup("ftrace_graph_notrace=", set_graph_notrace_function); + +static void __init set_ftrace_early_graph(char *buf, int enable) { int ret; char *func; + unsigned long *table = ftrace_graph_funcs; + int *count = &ftrace_graph_count; + + if (!enable) { + table = ftrace_graph_notrace_funcs; + count = &ftrace_graph_notrace_count; + } while (buf) { func = strsep(&buf, ","); /* we allow only one expression at a time */ - ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count, - FTRACE_GRAPH_MAX_FUNCS, func); + ret = ftrace_set_func(table, count, FTRACE_GRAPH_MAX_FUNCS, func); if (ret) printk(KERN_DEBUG "ftrace: function %s not " "traceable\n", func); @@ -3931,7 +3945,9 @@ static void __init set_ftrace_early_filters(void) ftrace_set_early_filter(&global_ops, ftrace_notrace_buf, 0); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (ftrace_graph_buf[0]) - set_ftrace_early_graph(ftrace_graph_buf); + set_ftrace_early_graph(ftrace_graph_buf, 1); + if (ftrace_graph_notrace_buf[0]) + set_ftrace_early_graph(ftrace_graph_notrace_buf, 0); #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ } -- cgit v1.2.3 From 280d1429b6a67432ead24fb68a504b4c90c3d96d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 13 Jun 2014 01:23:51 +0900 Subject: tracing: Improve message of empty set_graph_notrace file When there's no entry in set_graph_notrace, it'll print below message #### all functions enabled #### While this is technically correct, it's better to print like below: #### no functions disabled #### Link: http://lkml.kernel.org/p/1402590233-22321-3-git-send-email-namhyung@kernel.org Reported-by: Naoya Horiguchi Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 17885a27281c..ee245c0659a6 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4089,7 +4089,12 @@ static int g_show(struct seq_file *m, void *v) return 0; if (ptr == (unsigned long *)1) { - seq_printf(m, "#### all functions enabled ####\n"); + struct ftrace_graph_data *fgd = m->private; + + if (fgd->table == ftrace_graph_funcs) + seq_printf(m, "#### all functions enabled ####\n"); + else + seq_printf(m, "#### no functions disabled ####\n"); return 0; } -- cgit v1.2.3 From 8c006cf7a2130c4bfb600ae3a496910115804641 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 13 Jun 2014 16:24:06 +0900 Subject: tracing: Improve message of empty set_ftrace_notrace file When there's no entry in set_ftrace_notrace, it'll print nothing, but it's better to print something like below like set_graph_notrace does: #### no functions disabled #### Link: http://lkml.kernel.org/p/1402644246-4649-1-git-send-email-namhyung@kernel.org Reported-by: Naoya Horiguchi Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ee245c0659a6..45aac1a742c5 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2835,8 +2835,10 @@ static void *t_start(struct seq_file *m, loff_t *pos) * off, we can short cut and just print out that all * functions are enabled. */ - if (iter->flags & FTRACE_ITER_FILTER && - ftrace_hash_empty(ops->filter_hash)) { + if ((iter->flags & FTRACE_ITER_FILTER && + ftrace_hash_empty(ops->filter_hash)) || + (iter->flags & FTRACE_ITER_NOTRACE && + ftrace_hash_empty(ops->notrace_hash))) { if (*pos > 0) return t_hash_start(m, pos); iter->flags |= FTRACE_ITER_PRINTALL; @@ -2881,7 +2883,10 @@ static int t_show(struct seq_file *m, void *v) return t_hash_show(m, iter); if (iter->flags & FTRACE_ITER_PRINTALL) { - seq_printf(m, "#### all functions enabled ####\n"); + if (iter->flags & FTRACE_ITER_NOTRACE) + seq_printf(m, "#### no functions disabled ####\n"); + else + seq_printf(m, "#### all functions enabled ####\n"); return 0; } -- cgit v1.2.3 From d048a8c7b509f35dd351e1415fe49fa99e4cb7ef Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 13 Jun 2014 01:23:53 +0900 Subject: tracing: Add description of set_graph_notrace to tracing/README It was missing the description of set_graph_notrace file. Add it. Link: http://lkml.kernel.org/p/1402590233-22321-5-git-send-email-namhyung@kernel.org Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4caa814d41c3..822f6a0894f7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3663,6 +3663,7 @@ static const char readme_msg[] = #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER " set_graph_function\t- Trace the nested calls of a function (function_graph)\n" + " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n" " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n" #endif #ifdef CONFIG_TRACER_SNAPSHOT -- cgit v1.2.3 From 099ed151675cd1d2dbeae1dac697975f6a68716d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 24 Jun 2014 23:50:09 -0400 Subject: tracing: Remove ftrace_stop/start() from reading the trace file Disabling reading and writing to the trace file should not be able to disable all function tracing callbacks. There's other users today (like kprobes and perf). Reading a trace file should not stop those from happening. Cc: stable@vger.kernel.org # 3.0+ Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 384ede311717..f243444a3772 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1396,7 +1396,6 @@ void tracing_start(void) arch_spin_unlock(&global_trace.max_lock); - ftrace_start(); out: raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); } @@ -1443,7 +1442,6 @@ void tracing_stop(void) struct ring_buffer *buffer; unsigned long flags; - ftrace_stop(); raw_spin_lock_irqsave(&global_trace.start_lock, flags); if (global_trace.stop_count++) goto out; -- cgit v1.2.3 From 2448e3493cb3874baa90725c87869455ebf11cd2 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 11 Jul 2014 21:06:38 +0200 Subject: tracing: instance_rmdir() leaks ftrace_event_file->filter instance_rmdir() path destroys the event files but forgets to free file->filter. Change remove_event_file_dir() to free_event_filter(). Link: http://lkml.kernel.org/p/20140711190638.GA19517@redhat.com Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Srikar Dronamraju Cc: Tom Zanussi Cc: "zhangwei(Jovi)" Cc: stable@vger.kernel.org # 3.11+ Fixes: f6a84bdc75b5 "tracing: Introduce remove_event_file_dir()" Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f99e0b3bca8c..2de53628689f 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -470,6 +470,7 @@ static void remove_event_file_dir(struct ftrace_event_file *file) list_del(&file->list); remove_subsystem(file->system); + free_event_filter(file->filter); kmem_cache_free(file_cachep, file); } -- cgit v1.2.3 From 8abfb8727f4a724d31f9ccfd8013fbd16d539445 Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Thu, 18 Jul 2013 16:31:05 +0800 Subject: tracing: Add ftrace_trace_stack into __trace_puts/__trace_bputs Currently trace option stacktrace is not applicable for trace_printk with constant string argument, the reason is in __trace_puts/__trace_bputs ftrace_trace_stack is missing. In contrast, when using trace_printk with non constant string argument(will call into __trace_printk/__trace_bprintk), then trace option stacktrace is workable, this inconstant result will confuses users a lot. Link: http://lkml.kernel.org/p/51E7A7C9.9040401@huawei.com Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: zhangwei(Jovi) Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f243444a3772..a6ffc8918dda 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -466,6 +466,9 @@ int __trace_puts(unsigned long ip, const char *str, int size) struct print_entry *entry; unsigned long irq_flags; int alloc; + int pc; + + pc = preempt_count(); if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; @@ -475,7 +478,7 @@ int __trace_puts(unsigned long ip, const char *str, int size) local_save_flags(irq_flags); buffer = global_trace.trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, - irq_flags, preempt_count()); + irq_flags, pc); if (!event) return 0; @@ -492,6 +495,7 @@ int __trace_puts(unsigned long ip, const char *str, int size) entry->buf[size] = '\0'; __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(buffer, irq_flags, 4, pc); return size; } @@ -509,6 +513,9 @@ int __trace_bputs(unsigned long ip, const char *str) struct bputs_entry *entry; unsigned long irq_flags; int size = sizeof(struct bputs_entry); + int pc; + + pc = preempt_count(); if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; @@ -516,7 +523,7 @@ int __trace_bputs(unsigned long ip, const char *str) local_save_flags(irq_flags); buffer = global_trace.trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, - irq_flags, preempt_count()); + irq_flags, pc); if (!event) return 0; @@ -525,6 +532,7 @@ int __trace_bputs(unsigned long ip, const char *str) entry->str = str; __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(buffer, irq_flags, 4, pc); return 1; } -- cgit v1.2.3 From 5f8bf2d263a20b986225ae1ed7d6759dc4b93af9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 15 Jul 2014 11:05:12 -0400 Subject: tracing: Fix graph tracer with stack tracer on other archs Running my ftrace tests on PowerPC, it failed the test that checks if function_graph tracer is affected by the stack tracer. It was. Looking into this, I found that the update_function_graph_func() must be called even if the trampoline function is not changed. This is because archs like PowerPC do not support ftrace_ops being passed by assembly and instead uses a helper function (what the trampoline function points to). Since this function is not changed even when multiple ftrace_ops are added to the code, the test that falls out before calling update_function_graph_func() will miss that the update must still be done. Call update_function_graph_function() for all calls to update_ftrace_function() Cc: stable@vger.kernel.org # 3.3+ Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5b372e3ed675..ac9d1dad630b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -265,12 +265,12 @@ static void update_ftrace_function(void) func = ftrace_ops_list_func; } + update_function_graph_func(); + /* If there's no change, then do nothing more here */ if (ftrace_trace_function == func) return; - update_function_graph_func(); - /* * If we are using the list function, it doesn't care * about the function_trace_ops. -- cgit v1.2.3 From f0160a5a2912267c02cfe692eac955c360de5fdf Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Thu, 18 Jul 2013 16:31:18 +0800 Subject: tracing: Add TRACE_ITER_PRINTK flag check in __trace_puts/__trace_bputs The TRACE_ITER_PRINTK check in __trace_puts/__trace_bputs is missing, so add it, to be consistent with __trace_printk/__trace_bprintk. Those functions are all called by the same function: trace_printk(). Link: http://lkml.kernel.org/p/51E7A7D6.8090900@huawei.com Cc: stable@vger.kernel.org # 3.11+ Signed-off-by: zhangwei(Jovi) Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a6ffc8918dda..bda9621638cc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -468,6 +468,9 @@ int __trace_puts(unsigned long ip, const char *str, int size) int alloc; int pc; + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + pc = preempt_count(); if (unlikely(tracing_selftest_running || tracing_disabled)) @@ -515,6 +518,9 @@ int __trace_bputs(unsigned long ip, const char *str) int size = sizeof(struct bputs_entry); int pc; + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + pc = preempt_count(); if (unlikely(tracing_selftest_running || tracing_disabled)) -- cgit v1.2.3 From 97b8ee845393701edc06e27ccec2876ff9596019 Mon Sep 17 00:00:00 2001 From: Martin Lau Date: Mon, 9 Jun 2014 23:06:42 -0700 Subject: ring-buffer: Fix polling on trace_pipe ring_buffer_poll_wait() should always put the poll_table to its wait_queue even there is immediate data available. Otherwise, the following epoll and read sequence will eventually hang forever: 1. Put some data to make the trace_pipe ring_buffer read ready first 2. epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, ee) 3. epoll_wait() 4. read(trace_pipe_fd) till EAGAIN 5. Add some more data to the trace_pipe ring_buffer 6. epoll_wait() -> this epoll_wait() will block forever ~ During the epoll_ctl(efd, EPOLL_CTL_ADD,...) call in step 2, ring_buffer_poll_wait() returns immediately without adding poll_table, which has poll_table->_qproc pointing to ep_poll_callback(), to its wait_queue. ~ During the epoll_wait() call in step 3 and step 6, ring_buffer_poll_wait() cannot add ep_poll_callback() to its wait_queue because the poll_table->_qproc is NULL and it is how epoll works. ~ When there is new data available in step 6, ring_buffer does not know it has to call ep_poll_callback() because it is not in its wait queue. Hence, block forever. Other poll implementation seems to call poll_wait() unconditionally as the very first thing to do. For example, tcp_poll() in tcp.c. Link: http://lkml.kernel.org/p/20140610060637.GA14045@devbig242.prn2.facebook.com Cc: stable@vger.kernel.org # 2.6.27 Fixes: 2a2cc8f7c4d0 "ftrace: allow the event pipe to be polled" Reviewed-by: Chris Mason Signed-off-by: Martin Lau Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 7c56c3d06943..ff7027199a9a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -616,10 +616,6 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, struct ring_buffer_per_cpu *cpu_buffer; struct rb_irq_work *work; - if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || - (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) - return POLLIN | POLLRDNORM; - if (cpu == RING_BUFFER_ALL_CPUS) work = &buffer->irq_work; else { -- cgit v1.2.3 From 646d7043adf3d92de5d3db1244a82a12628303de Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 11 Jul 2014 14:39:10 -0400 Subject: ftrace: Allow archs to specify if they need a separate function graph trampoline Currently if an arch supports function graph tracing, the core code will just assign the function graph trampoline to the function graph addr that gets called. But as the old method for function graph tracing always calls the function trampoline first and that calls the function graph trampoline, some archs may have the function graph trampoline dependent on operations that were done in the function trampoline. This causes function graph tracer to break on those archs. Instead of having the default be to set the function graph ftrace_ops to the function graph trampoline, have it instead just set it to zero which will keep it from jumping to a trampoline that is not set up to be jumped directly too. Link: http://lkml.kernel.org/r/53BED155.9040607@nvidia.com Reported-by: Tuomas Tynkkynen Tested-by: Tuomas Tynkkynen Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 45aac1a742c5..1776153ea6e0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5366,7 +5366,8 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, #ifdef CONFIG_DYNAMIC_FTRACE /* Optimize function graph calling (if implemented by arch) */ - global_ops.trampoline = FTRACE_GRAPH_ADDR; + if (FTRACE_GRAPH_TRAMP_ADDR != 0) + global_ops.trampoline = FTRACE_GRAPH_TRAMP_ADDR; #endif ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET); @@ -5390,7 +5391,8 @@ void unregister_ftrace_graph(void) ftrace_shutdown(&global_ops, FTRACE_STOP_FUNC_RET); global_ops.flags &= ~FTRACE_OPS_FL_STUB; #ifdef CONFIG_DYNAMIC_FTRACE - global_ops.trampoline = 0; + if (FTRACE_GRAPH_TRAMP_ADDR != 0) + global_ops.trampoline = 0; #endif unregister_pm_notifier(&ftrace_suspend_notifier); unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); -- cgit v1.2.3 From 3e5454d6568c203bca712e1976b052c345f47b44 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 15 Jul 2014 20:48:13 +0200 Subject: tracing: Kill destroy_preds() and destroy_file_preds() destroy_preds() makes no sense. The only caller, event_remove(), actually wants destroy_file_preds(). __trace_remove_event_call() does destroy_call_preds() which takes care of call->filter. And after the previous change we can simply remove destroy_preds() from event_remove(), we are going to call remove_event_from_tracers() which in turn calls remove_event_file_dir()->free_event_filter(). Link: http://lkml.kernel.org/p/20140715184813.GA20488@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 1 - kernel/trace/trace_events_filter.c | 20 -------------------- 2 files changed, 21 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 2de53628689f..85914edf5059 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1621,7 +1621,6 @@ static void event_remove(struct ftrace_event_call *call) if (file->event_call != call) continue; ftrace_event_enable_disable(file, 0); - destroy_preds(file); /* * The do_for_each_event_file() is * a double loop. After finding the call for this diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 8a8631926a07..30fc66f5cdca 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -810,26 +810,6 @@ void destroy_call_preds(struct ftrace_event_call *call) call->filter = NULL; } -static void destroy_file_preds(struct ftrace_event_file *file) -{ - __free_filter(file->filter); - file->filter = NULL; -} - -/* - * Called when destroying the ftrace_event_file. - * The file is being freed, so we do not need to worry about - * the file being currently used. This is for module code removing - * the tracepoints from within it. - */ -void destroy_preds(struct ftrace_event_file *file) -{ - if (file->event_call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) - destroy_call_preds(file->event_call); - else - destroy_file_preds(file); -} - static struct event_filter *__alloc_filter(void) { struct event_filter *filter; -- cgit v1.2.3 From 57375747b6fac0f6cf7b302c4a8adb9043ea8e3b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 15 Jul 2014 20:48:16 +0200 Subject: tracing: Kill destroy_call_preds() Remove destroy_call_preds(). Its only caller, __trace_remove_event_call(), can use free_event_filter() and nullify ->filter by hand. Perhaps we could keep this trivial helper although imo it is pointless, but then it should be static in trace_events.c. Link: http://lkml.kernel.org/p/20140715184816.GA20495@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 3 ++- kernel/trace/trace_events_filter.c | 6 ------ 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 85914edf5059..0d8ee29f6b9a 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1748,7 +1748,8 @@ static void __trace_remove_event_call(struct ftrace_event_call *call) { event_remove(call); trace_destroy_fields(call); - destroy_call_preds(call); + free_event_filter(call->filter); + call->filter = NULL; } static int probe_remove_event_call(struct ftrace_event_call *call) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 30fc66f5cdca..1edec329be29 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -804,12 +804,6 @@ void free_event_filter(struct event_filter *filter) __free_filter(filter); } -void destroy_call_preds(struct ftrace_event_call *call) -{ - __free_filter(call->filter); - call->filter = NULL; -} - static struct event_filter *__alloc_filter(void) { struct event_filter *filter; -- cgit v1.2.3 From b5d09db5ac02660a358f2ac477e85172929e02c8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 15 Jul 2014 20:48:21 +0200 Subject: tracing: Kill call_filter_disable() It seems that the only purpose of call_filter_disable() is to make filter_disable() less clear and symmetrical, remove it. Link: http://lkml.kernel.org/p/20140715184821.GA20498@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_filter.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 1edec329be29..54a125c8467a 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -774,17 +774,12 @@ static void __free_preds(struct event_filter *filter) filter->n_preds = 0; } -static void call_filter_disable(struct ftrace_event_call *call) -{ - call->flags &= ~TRACE_EVENT_FL_FILTERED; -} - static void filter_disable(struct ftrace_event_file *file) { struct ftrace_event_call *call = file->event_call; if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) - call_filter_disable(call); + call->flags &= ~TRACE_EVENT_FL_FILTERED; else file->flags &= ~FTRACE_EVENT_FL_FILTERED; } -- cgit v1.2.3 From ede392a75090aab49b01ecd6f7694bb9130ad461 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 15 Jul 2014 20:48:24 +0200 Subject: tracing/uprobes: Kill the dead TRACE_EVENT_FL_USE_CALL_FILTER logic alloc_trace_uprobe() sets TRACE_EVENT_FL_USE_CALL_FILTER for unknown reason and this is simply wrong. Fortunately this has no effect because register_uprobe_event() clears call->flags after that. Kill both. This trace_uprobe was kzalloc'ed and we rely on this fact anyway. Link: http://lkml.kernel.org/p/20140715184824.GA20505@redhat.com Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Signed-off-by: Steven Rostedt --- kernel/trace/trace_uprobe.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 3c9b97e6b1f4..33ff6a24b802 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -265,7 +265,6 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret) if (is_ret) tu->consumer.ret_handler = uretprobe_dispatcher; init_trace_uprobe_filter(&tu->filter); - tu->tp.call.flags |= TRACE_EVENT_FL_USE_CALL_FILTER; return tu; error: @@ -1292,7 +1291,7 @@ static int register_uprobe_event(struct trace_uprobe *tu) kfree(call->print_fmt); return -ENODEV; } - call->flags = 0; + call->class->reg = trace_uprobe_register; call->data = tu; ret = trace_add_event_call(call); -- cgit v1.2.3 From bb9ef1cb7d8668d6b0038b6f9f783c849135e40d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 15 Jul 2014 20:48:29 +0200 Subject: tracing: Change apply_subsystem_event_filter() paths to check file->system == dir filter_free_subsystem_preds(), filter_free_subsystem_filters() and replace_system_preds() can simply check file->system->subsystem and avoid strcmp(call->class->system). Better yet, we can pass "struct ftrace_subsystem_dir *dir" instead of event_subsystem and just check file->system == dir. Thanks to Namhyung Kim who pointed out that replace_system_preds() can be changed too. Link: http://lkml.kernel.org/p/20140715184829.GA20516@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_filter.c | 39 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 23 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 54a125c8467a..c2ef5a5f21da 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -842,17 +842,14 @@ static inline void __remove_filter(struct ftrace_event_file *file) remove_filter_string(file->filter); } -static void filter_free_subsystem_preds(struct event_subsystem *system, +static void filter_free_subsystem_preds(struct ftrace_subsystem_dir *dir, struct trace_array *tr) { struct ftrace_event_file *file; - struct ftrace_event_call *call; list_for_each_entry(file, &tr->events, list) { - call = file->event_call; - if (strcmp(call->class->system, system->name) != 0) + if (file->system != dir) continue; - __remove_filter(file); } } @@ -870,15 +867,13 @@ static inline void __free_subsystem_filter(struct ftrace_event_file *file) } } -static void filter_free_subsystem_filters(struct event_subsystem *system, +static void filter_free_subsystem_filters(struct ftrace_subsystem_dir *dir, struct trace_array *tr) { struct ftrace_event_file *file; - struct ftrace_event_call *call; list_for_each_entry(file, &tr->events, list) { - call = file->event_call; - if (strcmp(call->class->system, system->name) != 0) + if (file->system != dir) continue; __free_subsystem_filter(file); } @@ -1724,13 +1719,12 @@ struct filter_list { struct event_filter *filter; }; -static int replace_system_preds(struct event_subsystem *system, +static int replace_system_preds(struct ftrace_subsystem_dir *dir, struct trace_array *tr, struct filter_parse_state *ps, char *filter_string) { struct ftrace_event_file *file; - struct ftrace_event_call *call; struct filter_list *filter_item; struct filter_list *tmp; LIST_HEAD(filter_list); @@ -1738,15 +1732,15 @@ static int replace_system_preds(struct event_subsystem *system, int err; list_for_each_entry(file, &tr->events, list) { - call = file->event_call; - if (strcmp(call->class->system, system->name) != 0) + if (file->system != dir) continue; /* * Try to see if the filter can be applied * (filter arg is ignored on dry_run) */ - err = replace_preds(call, NULL, ps, filter_string, true); + err = replace_preds(file->event_call, NULL, ps, + filter_string, true); if (err) event_set_no_set_filter_flag(file); else @@ -1756,9 +1750,7 @@ static int replace_system_preds(struct event_subsystem *system, list_for_each_entry(file, &tr->events, list) { struct event_filter *filter; - call = file->event_call; - - if (strcmp(call->class->system, system->name) != 0) + if (file->system != dir) continue; if (event_no_set_filter_flag(file)) @@ -1780,7 +1772,8 @@ static int replace_system_preds(struct event_subsystem *system, if (err) goto fail_mem; - err = replace_preds(call, filter, ps, filter_string, false); + err = replace_preds(file->event_call, filter, ps, + filter_string, false); if (err) { filter_disable(file); parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); @@ -1928,7 +1921,7 @@ int create_event_filter(struct ftrace_event_call *call, * Identical to create_filter() except that it creates a subsystem filter * and always remembers @filter_str. */ -static int create_system_filter(struct event_subsystem *system, +static int create_system_filter(struct ftrace_subsystem_dir *dir, struct trace_array *tr, char *filter_str, struct event_filter **filterp) { @@ -1938,7 +1931,7 @@ static int create_system_filter(struct event_subsystem *system, err = create_filter_start(filter_str, true, &ps, &filter); if (!err) { - err = replace_system_preds(system, tr, ps, filter_str); + err = replace_system_preds(dir, tr, ps, filter_str); if (!err) { /* System filters just show a default message */ kfree(filter->filter_string); @@ -2022,18 +2015,18 @@ int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir, } if (!strcmp(strstrip(filter_string), "0")) { - filter_free_subsystem_preds(system, tr); + filter_free_subsystem_preds(dir, tr); remove_filter_string(system->filter); filter = system->filter; system->filter = NULL; /* Ensure all filters are no longer used */ synchronize_sched(); - filter_free_subsystem_filters(system, tr); + filter_free_subsystem_filters(dir, tr); __free_filter(filter); goto out_unlock; } - err = create_system_filter(system, tr, filter_string, &filter); + err = create_system_filter(dir, tr, filter_string, &filter); if (filter) { /* * No event actually uses the system filter -- cgit v1.2.3 From 6355d54438bfc3b636cb6453cd091f782fb9b4d7 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 15 Jul 2014 20:48:32 +0200 Subject: tracing: Kill "filter_string" arg of replace_preds() Cosmetic, but replace_preds() doesn't need/use "char *filter_string". Remove it to microsimplify the code. Link: http://lkml.kernel.org/p/20140715184832.GA20519@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_filter.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index c2ef5a5f21da..7a8c1528e141 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1546,7 +1546,6 @@ static int fold_pred_tree(struct event_filter *filter, static int replace_preds(struct ftrace_event_call *call, struct event_filter *filter, struct filter_parse_state *ps, - char *filter_string, bool dry_run) { char *operand1 = NULL, *operand2 = NULL; @@ -1739,8 +1738,7 @@ static int replace_system_preds(struct ftrace_subsystem_dir *dir, * Try to see if the filter can be applied * (filter arg is ignored on dry_run) */ - err = replace_preds(file->event_call, NULL, ps, - filter_string, true); + err = replace_preds(file->event_call, NULL, ps, true); if (err) event_set_no_set_filter_flag(file); else @@ -1772,8 +1770,7 @@ static int replace_system_preds(struct ftrace_subsystem_dir *dir, if (err) goto fail_mem; - err = replace_preds(file->event_call, filter, ps, - filter_string, false); + err = replace_preds(file->event_call, filter, ps, false); if (err) { filter_disable(file); parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); @@ -1895,7 +1892,7 @@ static int create_filter(struct ftrace_event_call *call, err = create_filter_start(filter_str, set_str, &ps, &filter); if (!err) { - err = replace_preds(call, filter, ps, filter_str, false); + err = replace_preds(call, filter, ps, false); if (err && set_str) append_filter_err(ps, filter); } -- cgit v1.2.3 From 1b2f121c1418249e56048d816754b479b3cb6fb3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 25 Jun 2014 10:39:46 -0400 Subject: ftrace-graph: Remove dependency of ftrace_stop() from ftrace_graph_stop() ftrace_stop() is going away as it disables parts of function tracing that affects users that should not be affected. But ftrace_graph_stop() is built on ftrace_stop(). Here's another example of killing all of function tracing because something went wrong with function graph tracing. Instead of disabling all users of function tracing on function graph error, disable only function graph tracing. A new function is created called ftrace_graph_is_dead(). This is called in strategic paths to prevent function graph from doing more harm and allowing at least a warning to be printed before the system crashes. NOTE: ftrace_stop() is still used until all the archs are converted over to use ftrace_graph_is_dead(). After that, ftrace_stop() will be removed. Reviewed-by: Masami Hiramatsu Cc: Frederic Weisbecker Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 5 ----- kernel/trace/trace_functions_graph.c | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 5 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1776153ea6e0..8063280fd53d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5473,9 +5473,4 @@ void ftrace_graph_exit_task(struct task_struct *t) kfree(ret_stack); } - -void ftrace_graph_stop(void) -{ - ftrace_stop(); -} #endif diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 4de3e57f723c..3604690be70b 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -15,6 +15,38 @@ #include "trace.h" #include "trace_output.h" +static bool kill_ftrace_graph; + +/** + * ftrace_graph_is_dead - returns true if ftrace_graph_stop() was called + * + * ftrace_graph_stop() is called when a severe error is detected in + * the function graph tracing. This function is called by the critical + * paths of function graph to keep those paths from doing any more harm. + */ +bool ftrace_graph_is_dead(void) +{ + return kill_ftrace_graph; +} + +/** + * ftrace_graph_stop - set to permanently disable function graph tracincg + * + * In case of an error int function graph tracing, this is called + * to try to keep function graph tracing from causing any more harm. + * Usually this is pretty severe and this is called to try to at least + * get a warning out to the user. + */ +void ftrace_graph_stop(void) +{ + kill_ftrace_graph = true; + /* + * ftrace_stop() will be removed when all archs are updated to + * use ftrace_graph_is_dead() + */ + ftrace_stop(); +} + /* When set, irq functions will be ignored */ static int ftrace_graph_skip_irqs; @@ -92,6 +124,9 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, unsigned long long calltime; int index; + if (unlikely(ftrace_graph_is_dead())) + return -EBUSY; + if (!current->ret_stack) return -EBUSY; -- cgit v1.2.3 From 545d47b8f359f7e9b5beabc28bfeecb3fc6af1ee Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 25 Jun 2014 11:13:27 -0400 Subject: ftrace-graph: Remove usage of ftrace_stop() in ftrace_graph_stop() All archs now use ftrace_graph_is_dead() to stop function graph tracing. Remove the usage of ftrace_stop() as that is no longer needed. Cc: Frederic Weisbecker Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/trace_functions_graph.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 3604690be70b..2c944e6c4a9d 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -40,11 +40,6 @@ bool ftrace_graph_is_dead(void) void ftrace_graph_stop(void) { kill_ftrace_graph = true; - /* - * ftrace_stop() will be removed when all archs are updated to - * use ftrace_graph_is_dead() - */ - ftrace_stop(); } /* When set, irq functions will be ignored */ -- cgit v1.2.3 From 1820122a76c6d64adc6e2a7ff438029ffb8d7cb4 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 25 Jun 2014 11:28:20 -0400 Subject: ftrace: Do no disable function tracing on enabling function tracing When function tracing is being updated function_trace_stop is set to keep from tracing the updates. This was fine when function tracing was done from stop machine. But it is no longer done that way and this can cause real tracing to be missed. Remove it. Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8063280fd53d..0fa1b87db95c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2261,11 +2261,6 @@ static void ftrace_run_update_code(int command) FTRACE_WARN_ON(ret); if (ret) return; - /* - * Do not call function tracer while we update the code. - * We are in stop machine. - */ - function_trace_stop++; /* * By default we use stop_machine() to modify the code. @@ -2275,8 +2270,6 @@ static void ftrace_run_update_code(int command) */ arch_ftrace_update_code(command); - function_trace_stop--; - ret = ftrace_arch_code_modify_post_process(); FTRACE_WARN_ON(ret); -- cgit v1.2.3 From 1d48d5960f9f24b8afd5b1dbb10bfe17b5f29a35 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 25 Jun 2014 11:54:03 -0400 Subject: ftrace: Remove function_trace_stop check from list func function_trace_stop is no longer used to stop function tracing. Remove the check from __ftrace_ops_list_func(). Also, call FTRACE_WARN_ON() instead of setting function_trace_stop if a ops has no func to call. Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0fa1b87db95c..70abf97d6e84 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4720,9 +4720,6 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op; int bit; - if (function_trace_stop) - return; - bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); if (bit < 0) return; @@ -4734,9 +4731,8 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, preempt_disable_notrace(); do_for_each_ftrace_op(op, ftrace_ops_list) { if (ftrace_ops_test(op, ip, regs)) { - if (WARN_ON(!op->func)) { - function_trace_stop = 1; - printk("op=%p %pS\n", op, op); + if (FTRACE_WARN_ON(!op->func)) { + pr_warn("op=%p %pS\n", op, op); goto out; } op->func(ip, parent_ip, op, regs); -- cgit v1.2.3 From 3a636388bae8390d23f31e061c0c6fdc14525786 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 26 Jun 2014 11:24:52 -0400 Subject: tracing: Remove function_trace_stop and HAVE_FUNCTION_TRACE_MCOUNT_TEST All users of function_trace_stop and HAVE_FUNCTION_TRACE_MCOUNT_TEST have been removed. We can safely remove them from the kernel. Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/Kconfig | 5 ----- kernel/trace/ftrace.c | 3 --- 2 files changed, 8 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d4409356f40d..a5da09c899dd 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -29,11 +29,6 @@ config HAVE_FUNCTION_GRAPH_FP_TEST help See Documentation/trace/ftrace-design.txt -config HAVE_FUNCTION_TRACE_MCOUNT_TEST - bool - help - See Documentation/trace/ftrace-design.txt - config HAVE_DYNAMIC_FTRACE bool help diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 70abf97d6e84..4c61f28a08e0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -80,9 +80,6 @@ static struct ftrace_ops ftrace_list_end __read_mostly = { int ftrace_enabled __read_mostly; static int last_ftrace_enabled; -/* Quick disabling of function tracer. */ -int function_trace_stop __read_mostly; - /* Current function tracing op */ struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end; /* What to set function_trace_op to */ -- cgit v1.2.3 From 021c5b34452d52e51664f09b98cd50c5495e74b6 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 16 Jul 2014 14:07:13 -0500 Subject: ring-buffer: Always run per-cpu ring buffer resize with schedule_work_on() The code for resizing the trace ring buffers has to run the per-cpu resize on the CPU itself. The code was using preempt_off() and running the code for the current CPU directly, otherwise calling schedule_work_on(). At least on RT this could result in the following: |BUG: sleeping function called from invalid context at kernel/rtmutex.c:673 |in_atomic(): 1, irqs_disabled(): 0, pid: 607, name: bash |3 locks held by bash/607: |CPU: 0 PID: 607 Comm: bash Not tainted 3.12.15-rt25+ #124 |(rt_spin_lock+0x28/0x68) |(free_hot_cold_page+0x84/0x3b8) |(free_buffer_page+0x14/0x20) |(rb_update_pages+0x280/0x338) |(ring_buffer_resize+0x32c/0x3dc) |(free_snapshot+0x18/0x38) |(tracing_set_tracer+0x27c/0x2ac) probably via |cd /sys/kernel/debug/tracing/ |echo 1 > events/enable ; sleep 2 |echo 1024 > buffer_size_kb If we just always use schedule_work_on(), there's no need for the preempt_off(). So do that. Link: http://lkml.kernel.org/p/1405537633-31518-1-git-send-email-cminyard@mvista.com Reported-by: Stanislav Meduna Signed-off-by: Corey Minyard Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 7c56c3d06943..35825a87d6a3 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1693,22 +1693,14 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, if (!cpu_buffer->nr_pages_to_update) continue; - /* The update must run on the CPU that is being updated. */ - preempt_disable(); - if (cpu == smp_processor_id() || !cpu_online(cpu)) { + /* Can't run something on an offline CPU. */ + if (!cpu_online(cpu)) { rb_update_pages(cpu_buffer); cpu_buffer->nr_pages_to_update = 0; } else { - /* - * Can not disable preemption for schedule_work_on() - * on PREEMPT_RT. - */ - preempt_enable(); schedule_work_on(cpu, &cpu_buffer->update_pages_work); - preempt_disable(); } - preempt_enable(); } /* wait for all the updates to complete */ @@ -1746,22 +1738,14 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, get_online_cpus(); - preempt_disable(); - /* The update must run on the CPU that is being updated. */ - if (cpu_id == smp_processor_id() || !cpu_online(cpu_id)) + /* Can't run something on an offline CPU. */ + if (!cpu_online(cpu_id)) rb_update_pages(cpu_buffer); else { - /* - * Can not disable preemption for schedule_work_on() - * on PREEMPT_RT. - */ - preempt_enable(); schedule_work_on(cpu_id, &cpu_buffer->update_pages_work); wait_for_completion(&cpu_buffer->update_done); - preempt_disable(); } - preempt_enable(); cpu_buffer->nr_pages_to_update = 0; put_online_cpus(); -- cgit v1.2.3 From 6508fa761c330a1d2b4ae36199d08dbcb70e3ddb Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 18 Jul 2014 15:17:27 +0400 Subject: tracing: let user specify tracing_thresh after selecting function_graph Currently, tracing_thresh works only if we specify it before selecting function_graph tracer. If we do the opposite, tracing_thresh will change it's value, but it will not be applied. To fix it, we add update_thresh callback which is called whenever tracing_thresh is updated and for function_graph tracer we register handler which reinitializes tracer depending on tracing_thresh. Link: http://lkml.kernel.org/p/20140718111727.GA3206@stfomichev-desktop.yandex.net Signed-off-by: Stanislav Fomichev Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 65 ++++++++++++++++++++++++++++++++---- kernel/trace/trace.h | 2 ++ kernel/trace/trace_functions_graph.c | 7 ++++ 3 files changed, 67 insertions(+), 7 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4a343db45d4e..2752147ed317 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4201,10 +4201,9 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, } static ssize_t -tracing_max_lat_read(struct file *filp, char __user *ubuf, - size_t cnt, loff_t *ppos) +tracing_nsecs_read(unsigned long *ptr, char __user *ubuf, + size_t cnt, loff_t *ppos) { - unsigned long *ptr = filp->private_data; char buf[64]; int r; @@ -4216,10 +4215,9 @@ tracing_max_lat_read(struct file *filp, char __user *ubuf, } static ssize_t -tracing_max_lat_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) +tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf, + size_t cnt, loff_t *ppos) { - unsigned long *ptr = filp->private_data; unsigned long val; int ret; @@ -4232,6 +4230,52 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, return cnt; } +static ssize_t +tracing_thresh_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos); +} + +static ssize_t +tracing_thresh_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct trace_array *tr = filp->private_data; + int ret; + + mutex_lock(&trace_types_lock); + ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos); + if (ret < 0) + goto out; + + if (tr->current_trace->update_thresh) { + ret = tr->current_trace->update_thresh(tr); + if (ret < 0) + goto out; + } + + ret = cnt; +out: + mutex_unlock(&trace_types_lock); + + return ret; +} + +static ssize_t +tracing_max_lat_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos); +} + +static ssize_t +tracing_max_lat_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos); +} + static int tracing_open_pipe(struct inode *inode, struct file *filp) { struct trace_array *tr = inode->i_private; @@ -5133,6 +5177,13 @@ static int snapshot_raw_open(struct inode *inode, struct file *filp) #endif /* CONFIG_TRACER_SNAPSHOT */ +static const struct file_operations tracing_thresh_fops = { + .open = tracing_open_generic, + .read = tracing_thresh_read, + .write = tracing_thresh_write, + .llseek = generic_file_llseek, +}; + static const struct file_operations tracing_max_lat_fops = { .open = tracing_open_generic, .read = tracing_max_lat_read, @@ -6494,7 +6545,7 @@ static __init int tracer_init_debugfs(void) init_tracer_debugfs(&global_trace, d_tracer); trace_create_file("tracing_thresh", 0644, d_tracer, - &tracing_thresh, &tracing_max_lat_fops); + &global_trace, &tracing_thresh_fops); trace_create_file("README", 0444, d_tracer, NULL, &tracing_readme_fops); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9258f5a815db..385391fb1d3b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -339,6 +339,7 @@ struct tracer_flags { * @reset: called when one switches to another tracer * @start: called when tracing is unpaused (echo 1 > tracing_enabled) * @stop: called when tracing is paused (echo 0 > tracing_enabled) + * @update_thresh: called when tracing_thresh is updated * @open: called when the trace file is opened * @pipe_open: called when the trace_pipe file is opened * @close: called when the trace file is released @@ -357,6 +358,7 @@ struct tracer { void (*reset)(struct trace_array *tr); void (*start)(struct trace_array *tr); void (*stop)(struct trace_array *tr); + int (*update_thresh)(struct trace_array *tr); void (*open)(struct trace_iterator *iter); void (*pipe_open)(struct trace_iterator *iter); void (*close)(struct trace_iterator *iter); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 2c944e6c4a9d..74d98820497c 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -475,6 +475,12 @@ static void graph_trace_reset(struct trace_array *tr) unregister_ftrace_graph(); } +int graph_trace_update_thresh(struct trace_array *tr) +{ + graph_trace_reset(tr); + return graph_trace_init(tr); +} + static int max_bytes_for_cpu; static enum print_line_t @@ -1525,6 +1531,7 @@ static struct trace_event graph_trace_ret_event = { static struct tracer graph_trace __tracer_data = { .name = "function_graph", + .update_thresh = graph_trace_update_thresh, .open = graph_trace_open, .pipe_open = graph_trace_open, .close = graph_trace_close, -- cgit v1.2.3 From b972cc58ced01ba2cf1f67b36bcfbb3ed4fa706e Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Tue, 15 Jul 2014 08:40:20 +0800 Subject: ftrace: Do not copy old hash when resetting Do not waste time copying the old hash if the hash is going to be reset. Just allocate a new hash and free the old one, as that is the same result as copying te old one and then resetting it. Link: http://lkml.kernel.org/p/1405384820-48837-1-git-send-email-wangnan0@huawei.com Signed-off-by: Wang Nan [ SDR: Removed unused ftrace_filter_reset() function ] Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4c61f28a08e0..762806026561 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2949,13 +2949,6 @@ ftrace_enabled_open(struct inode *inode, struct file *file) return iter ? 0 : -ENOMEM; } -static void ftrace_filter_reset(struct ftrace_hash *hash) -{ - mutex_lock(&ftrace_lock); - ftrace_hash_clear(hash); - mutex_unlock(&ftrace_lock); -} - /** * ftrace_regex_open - initialize function tracer filter files * @ops: The ftrace_ops that hold the hash filters @@ -3720,14 +3713,16 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, else orig_hash = &ops->notrace_hash; - hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); + if (reset) + hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); + else + hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); + if (!hash) { ret = -ENOMEM; goto out_regex_unlock; } - if (reset) - ftrace_filter_reset(hash); if (buf && !ftrace_match_records(hash, buf, len)) { ret = -EINVAL; goto out_regex_unlock; -- cgit v1.2.3 From ba1afef6a47c4133831fefcad4e0d7bf1d0ee99e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 18 Jul 2014 18:07:49 -0400 Subject: tracing: Convert local function_graph functions to static Local functions should be static. Reported-by: kbuild test robot Signed-off-by: Steven Rostedt --- kernel/trace/trace_functions_graph.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 74d98820497c..f0a0c982cde3 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -353,7 +353,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) return ret; } -int trace_graph_thresh_entry(struct ftrace_graph_ent *trace) +static int trace_graph_thresh_entry(struct ftrace_graph_ent *trace) { if (tracing_thresh) return 1; @@ -442,7 +442,7 @@ void set_graph_array(struct trace_array *tr) smp_mb(); } -void trace_graph_thresh_return(struct ftrace_graph_ret *trace) +static void trace_graph_thresh_return(struct ftrace_graph_ret *trace) { if (tracing_thresh && (trace->rettime - trace->calltime < tracing_thresh)) @@ -475,7 +475,7 @@ static void graph_trace_reset(struct trace_array *tr) unregister_ftrace_graph(); } -int graph_trace_update_thresh(struct trace_array *tr) +static int graph_trace_update_thresh(struct trace_array *tr) { graph_trace_reset(tr); return graph_trace_init(tr); @@ -1435,7 +1435,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags) seq_printf(s, " | | | |\n"); } -void print_graph_headers(struct seq_file *s) +static void print_graph_headers(struct seq_file *s) { print_graph_headers_flags(s, tracer_flags.val); } -- cgit v1.2.3 From 58d4e21e50ff3cc57910a8abc20d7e14375d2f61 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 18 Jul 2014 11:43:01 -0700 Subject: tracing: Fix wraparound problems in "uptime" trace clock The "uptime" trace clock added in: commit 8aacf017b065a805d27467843490c976835eb4a5 tracing: Add "uptime" trace clock that uses jiffies has wraparound problems when the system has been up more than 1 hour 11 minutes and 34 seconds. It converts jiffies to nanoseconds using: (u64)jiffies_to_usecs(jiffy) * 1000ULL but since jiffies_to_usecs() only returns a 32-bit value, it truncates at 2^32 microseconds. An additional problem on 32-bit systems is that the argument is "unsigned long", so fixing the return value only helps until 2^32 jiffies (49.7 days on a HZ=1000 system). Avoid these problems by using jiffies_64 as our basis, and not converting to nanoseconds (we do convert to clock_t because user facing API must not be dependent on internal kernel HZ values). Link: http://lkml.kernel.org/p/99d63c5bfe9b320a3b428d773825a37095bf6a51.1405708254.git.tony.luck@intel.com Cc: stable@vger.kernel.org # 3.10+ Fixes: 8aacf017b065 "tracing: Add "uptime" trace clock that uses jiffies" Signed-off-by: Tony Luck Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- kernel/trace/trace_clock.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bda9621638cc..291397e66669 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -823,7 +823,7 @@ static struct { { trace_clock_local, "local", 1 }, { trace_clock_global, "global", 1 }, { trace_clock_counter, "counter", 0 }, - { trace_clock_jiffies, "uptime", 1 }, + { trace_clock_jiffies, "uptime", 0 }, { trace_clock, "perf", 1 }, ARCH_TRACE_CLOCKS }; diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 26dc348332b7..57b67b1f24d1 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -59,13 +59,14 @@ u64 notrace trace_clock(void) /* * trace_jiffy_clock(): Simply use jiffies as a clock counter. + * Note that this use of jiffies_64 is not completely safe on + * 32-bit systems. But the window is tiny, and the effect if + * we are affected is that we will have an obviously bogus + * timestamp on a trace event - i.e. not life threatening. */ u64 notrace trace_clock_jiffies(void) { - u64 jiffy = jiffies - INITIAL_JIFFIES; - - /* Return nsecs */ - return (u64)jiffies_to_usecs(jiffy) * 1000ULL; + return jiffies_64_to_clock_t(jiffies_64 - INITIAL_JIFFIES); } /* -- cgit v1.2.3 From 0162d621ddf3bd02bf7de324dcf002d9c84c5059 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 23 Jul 2014 15:03:00 -0400 Subject: ftrace: Rename ftrace_ops field from trampolines to nr_trampolines Having two fields within the same struct that is off by one character can be confusing and error prone. Rename the counter "trampolines" to "nr_trampolines" to explicitly show it is a counter and not to be confused by the "trampoline" field. Suggested-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 762806026561..eda69c9f78d0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1513,7 +1513,7 @@ static void ftrace_remove_tramp(struct ftrace_ops *ops, * The tramp_hash entry will be removed at time * of update. */ - ops->trampolines--; + ops->nr_trampolines--; rec->flags &= ~FTRACE_FL_TRAMP; } @@ -1522,7 +1522,7 @@ static void ftrace_clear_tramps(struct dyn_ftrace *rec) struct ftrace_ops *op; do_for_each_ftrace_op(op, ftrace_ops_list) { - if (op->trampolines) + if (op->nr_trampolines) ftrace_remove_tramp(op, rec); } while_for_each_ftrace_op(op); } @@ -1617,7 +1617,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, */ if (ftrace_rec_count(rec) == 1 && ops->trampoline) { rec->flags |= FTRACE_FL_TRAMP; - ops->trampolines++; + ops->nr_trampolines++; } else { /* * If we are adding another function callback @@ -2185,7 +2185,7 @@ static int ftrace_save_ops_tramp_hash(struct ftrace_ops *ops) int size, bits; int ret; - size = ops->trampolines; + size = ops->nr_trampolines; bits = 0; /* * Make the hash size about 1/2 the # found @@ -2239,7 +2239,7 @@ static int ftrace_save_tramp_hashes(void) free_ftrace_hash(op->tramp_hash); op->tramp_hash = NULL; - if (op->trampolines) { + if (op->nr_trampolines) { ret = ftrace_save_ops_tramp_hash(op); if (ret) return ret; -- cgit v1.2.3 From 1b3e5c0936046e7e023149ddc8946d21c2ea20eb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:05:25 +0000 Subject: ftrace: Provide trace clocks monotonic Expose the new NMI safe accessor to clock monotonic to the tracer. Signed-off-by: Thomas Gleixner Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Signed-off-by: John Stultz --- kernel/trace/trace.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f243444a3772..84e2b45c0934 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -806,11 +806,12 @@ static struct { const char *name; int in_ns; /* is this clock in nanoseconds? */ } trace_clocks[] = { - { trace_clock_local, "local", 1 }, - { trace_clock_global, "global", 1 }, - { trace_clock_counter, "counter", 0 }, - { trace_clock_jiffies, "uptime", 1 }, - { trace_clock, "perf", 1 }, + { trace_clock_local, "local", 1 }, + { trace_clock_global, "global", 1 }, + { trace_clock_counter, "counter", 0 }, + { trace_clock_jiffies, "uptime", 1 }, + { trace_clock, "perf", 1 }, + { ktime_get_mono_fast_ns, "mono", 1 }, ARCH_TRACE_CLOCKS }; -- cgit v1.2.3 From 10e83fd01ccbb7122ad2c9dce68fb01bebb3fc46 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 23 Jul 2014 19:45:12 -0400 Subject: ring-buffer: Use rb_page_size() instead of open coded head_page size There's a helper function to get a ring buffer page size (the number of bytes of data recorded on the page), called rb_page_size(). Use that instead of open coding it. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 35825a87d6a3..d8c267ec5cca 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3763,7 +3763,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) if (rb_per_cpu_empty(cpu_buffer)) return NULL; - if (iter->head >= local_read(&iter->head_page->page->commit)) { + if (iter->head >= rb_page_size(iter->head_page)) { rb_inc_iter(iter); goto again; } -- cgit v1.2.3 From 2a0343baa4cc0d4e618898f8bdae8136bbb6e1b2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 24 Jul 2014 09:56:27 -0400 Subject: ftrace: Fix trampoline hash update check on rec->flags In the loop of ftrace_save_ops_tramp_hash(), it adds all the recs to the ops hash if the rec has only one callback attached and the ops is connected to the rec. It gives a nasty warning and shuts down ftrace if the rec doesn't have a trampoline set for it. But this can happen with the following scenario: # cd /sys/kernel/debug/tracing # echo schedule do_IRQ > set_ftrace_filter # mkdir instances/foo # echo schedule > instances/foo/set_ftrace_filter # echo function_graph > current_function # echo function > instances/foo/current_function # echo nop > instances/foo/current_function The above would then trigger the following warning and disable ftrace: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 3145 at kernel/trace/ftrace.c:2212 ftrace_run_update_code+0xe4/0x15b() Modules linked in: ipt_MASQUERADE sunrpc ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ip [...] CPU: 1 PID: 3145 Comm: bash Not tainted 3.16.0-rc3-test+ #136 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007 0000000000000000 ffffffff81808a88 ffffffff81502130 0000000000000000 ffffffff81040ca1 ffff880077c08000 ffffffff810bd286 0000000000000001 ffffffff81a56830 ffff88007a041be0 ffff88007a872d60 00000000000001be Call Trace: [] ? dump_stack+0x4a/0x75 [] ? warn_slowpath_common+0x7e/0x97 [] ? ftrace_run_update_code+0xe4/0x15b [] ? ftrace_run_update_code+0xe4/0x15b [] ? ftrace_shutdown+0x11c/0x16b [] ? unregister_ftrace_function+0x1e/0x38 [] ? function_trace_reset+0x1a/0x28 [] ? tracing_set_tracer+0xc1/0x276 [] ? tracing_set_trace_write+0x73/0x91 [] ? __sb_start_write+0x9a/0xcc [] ? security_file_permission+0x1b/0x31 [] ? vfs_write+0xac/0x11c [] ? SyS_write+0x60/0x8e [] ? system_call_fastpath+0x16/0x1b ---[ end trace 938c4415cbc7dc96 ]--- ------------[ cut here ]------------ Link: http://lkml.kernel.org/r/20140723120805.GB21376@redhat.com Reported-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index eda69c9f78d0..6ef1989c2b2e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2208,6 +2208,14 @@ static int ftrace_save_ops_tramp_hash(struct ftrace_ops *ops) if (ftrace_rec_count(rec) == 1 && ftrace_ops_test(ops, rec->ip, rec)) { + /* + * If another ops adds to a rec, the rec will + * lose its trampoline and never get it back + * until all ops are off of it. + */ + if (!(rec->flags & FTRACE_FL_TRAMP)) + continue; + /* This record had better have a trampoline */ if (FTRACE_WARN_ON(!(rec->flags & FTRACE_FL_TRAMP_EN))) return -1; -- cgit v1.2.3 From dc6f03f26f570104a2bb03f9d1deb588026d7c75 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 24 Jul 2014 11:26:11 -0400 Subject: ftrace: Add warning if tramp hash does not match nr_trampolines After adding all the records to the tramp_hash, add a check that makes sure that the number of records added matches the number of records expected to match and do a WARN_ON and disable ftrace if they do not match. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6ef1989c2b2e..979bd8cb4349 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2226,6 +2226,9 @@ static int ftrace_save_ops_tramp_hash(struct ftrace_ops *ops) } } while_for_each_ftrace_rec(); + /* The number of recs in the hash must match nr_trampolines */ + FTRACE_WARN_ON(ops->tramp_hash->count != ops->nr_trampolines); + return 0; } -- cgit v1.2.3 From f4be073db878d0e79f74bc36f1642847781791a0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 16 Jul 2014 14:33:29 +0200 Subject: perf: Check permission only for parent tracepoint event There's no need to check cloned event's permission once the parent was already checked. Also the code is checking 'current' process permissions, which is not owner process for cloned events, thus could end up with wrong permission check result. Reported-by: Alexander Yarygin Tested-by: Alexander Yarygin Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Linus Torvalds Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1405079782-8139-1-git-send-email-jolsa@kernel.org Signed-off-by: Ingo Molnar --- kernel/trace/trace_event_perf.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 5d12bb407b44..4b9c114ee9de 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -30,6 +30,18 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event, return ret; } + /* + * We checked and allowed to create parent, + * allow children without checking. + */ + if (p_event->parent) + return 0; + + /* + * It's ok to check current process (owner) permissions in here, + * because code below is called only via perf_event_open syscall. + */ + /* The ftrace function trace is allowed only for root. */ if (ftrace_event_is_function(tp_event)) { if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) -- cgit v1.2.3 From 021de3d904b88b1771a3a2cfc5b75023c391e646 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 6 Aug 2014 15:36:31 -0400 Subject: ring-buffer: Up rb_iter_peek() loop count to 3 After writting a test to try to trigger the bug that caused the ring buffer iterator to become corrupted, I hit another bug: WARNING: CPU: 1 PID: 5281 at kernel/trace/ring_buffer.c:3766 rb_iter_peek+0x113/0x238() Modules linked in: ipt_MASQUERADE sunrpc [...] CPU: 1 PID: 5281 Comm: grep Tainted: G W 3.16.0-rc3-test+ #143 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007 0000000000000000 ffffffff81809a80 ffffffff81503fb0 0000000000000000 ffffffff81040ca1 ffff8800796d6010 ffffffff810c138d ffff8800796d6010 ffff880077438c80 ffff8800796d6010 ffff88007abbe600 0000000000000003 Call Trace: [] ? dump_stack+0x4a/0x75 [] ? warn_slowpath_common+0x7e/0x97 [] ? rb_iter_peek+0x113/0x238 [] ? rb_iter_peek+0x113/0x238 [] ? ring_buffer_iter_peek+0x2d/0x5c [] ? tracing_iter_reset+0x6e/0x96 [] ? s_start+0xd7/0x17b [] ? kmem_cache_alloc_trace+0xda/0xea [] ? seq_read+0x148/0x361 [] ? vfs_read+0x93/0xf1 [] ? SyS_read+0x60/0x8e [] ? tracesys+0xdd/0xe2 Debugging this bug, which triggers when the rb_iter_peek() loops too many times (more than 2 times), I discovered there's a case that can cause that function to legitimately loop 3 times! rb_iter_peek() is different than rb_buffer_peek() as the rb_buffer_peek() only deals with the reader page (it's for consuming reads). The rb_iter_peek() is for traversing the buffer without consuming it, and as such, it can loop for one more reason. That is, if we hit the end of the reader page or any page, it will go to the next page and try again. That is, we have this: 1. iter->head > iter->head_page->page->commit (rb_inc_iter() which moves the iter to the next page) try again 2. event = rb_iter_head_event() event->type_len == RINGBUF_TYPE_TIME_EXTEND rb_advance_iter() try again 3. read the event. But we never get to 3, because the count is greater than 2 and we cause the WARNING and return NULL. Up the counter to 3. Cc: stable@vger.kernel.org # 2.6.37+ Fixes: 69d1b839f7ee "ring-buffer: Bind time extend and data events together" Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index ff7027199a9a..31a9edd7aa93 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1984,7 +1984,7 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) /** * rb_update_event - update event type and data - * @event: the even to update + * @event: the event to update * @type: the type of event * @length: the size of the event field in the ring buffer * @@ -3764,12 +3764,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) return NULL; /* - * We repeat when a time extend is encountered. - * Since the time extend is always attached to a data event, - * we should never loop more than once. - * (We never hit the following condition more than twice). + * We repeat when a time extend is encountered or we hit + * the end of the page. Since the time extend is always attached + * to a data event, we should never loop more than three times. + * Once for going to next page, once on time extend, and + * finally once to get the event. + * (We never hit the following condition more than thrice). */ - if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) + if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) return NULL; if (rb_per_cpu_empty(cpu_buffer)) -- cgit v1.2.3 From 651e22f2701b4113989237c3048d17337dd2185c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 6 Aug 2014 14:11:33 -0400 Subject: ring-buffer: Always reset iterator to reader page When performing a consuming read, the ring buffer swaps out a page from the ring buffer with a empty page and this page that was swapped out becomes the new reader page. The reader page is owned by the reader and since it was swapped out of the ring buffer, writers do not have access to it (there's an exception to that rule, but it's out of scope for this commit). When reading the "trace" file, it is a non consuming read, which means that the data in the ring buffer will not be modified. When the trace file is opened, a ring buffer iterator is allocated and writes to the ring buffer are disabled, such that the iterator will not have issues iterating over the data. Although the ring buffer disabled writes, it does not disable other reads, or even consuming reads. If a consuming read happens, then the iterator is reset and starts reading from the beginning again. My tests would sometimes trigger this bug on my i386 box: WARNING: CPU: 0 PID: 5175 at kernel/trace/trace.c:1527 __trace_find_cmdline+0x66/0xaa() Modules linked in: CPU: 0 PID: 5175 Comm: grep Not tainted 3.16.0-rc3-test+ #8 Hardware name: /DG965MQ, BIOS MQ96510J.86A.0372.2006.0605.1717 06/05/2006 00000000 00000000 f09c9e1c c18796b3 c1b5d74c f09c9e4c c103a0e3 c1b5154b f09c9e78 00001437 c1b5d74c 000005f7 c10bd85a c10bd85a c1cac57c f09c9eb0 ed0e0000 f09c9e64 c103a185 00000009 f09c9e5c c1b5154b f09c9e78 f09c9e80^M Call Trace: [] dump_stack+0x4b/0x75 [] warn_slowpath_common+0x7e/0x95 [] ? __trace_find_cmdline+0x66/0xaa [] ? __trace_find_cmdline+0x66/0xaa [] warn_slowpath_fmt+0x33/0x35 [] __trace_find_cmdline+0x66/0xaa^M [] trace_find_cmdline+0x40/0x64 [] trace_print_context+0x27/0xec [] ? trace_seq_printf+0x37/0x5b [] print_trace_line+0x319/0x39b [] ? ring_buffer_read+0x47/0x50 [] s_show+0x192/0x1ab [] ? s_next+0x5a/0x7c [] seq_read+0x267/0x34c [] vfs_read+0x8c/0xef [] ? seq_lseek+0x154/0x154 [] SyS_read+0x54/0x7f [] syscall_call+0x7/0xb ---[ end trace 3f507febd6b4cc83 ]--- >>>> ##### CPU 1 buffer started #### Which was the __trace_find_cmdline() function complaining about the pid in the event record being negative. After adding more test cases, this would trigger more often. Strangely enough, it would never trigger on a single test, but instead would trigger only when running all the tests. I believe that was the case because it required one of the tests to be shutting down via delayed instances while a new test started up. After spending several days debugging this, I found that it was caused by the iterator becoming corrupted. Debugging further, I found out why the iterator became corrupted. It happened with the rb_iter_reset(). As consuming reads may not read the full reader page, and only part of it, there's a "read" field to know where the last read took place. The iterator, must also start at the read position. In the rb_iter_reset() code, if the reader page was disconnected from the ring buffer, the iterator would start at the head page within the ring buffer (where writes still happen). But the mistake there was that it still used the "read" field to start the iterator on the head page, where it should always start at zero because readers never read from within the ring buffer where writes occur. I originally wrote a patch to have it set the iter->head to 0 instead of iter->head_page->read, but then I questioned why it wasn't always setting the iter to point to the reader page, as the reader page is still valid. The list_empty(reader_page->list) just means that it was successful in swapping out. But the reader_page may still have data. There was a bug report a long time ago that was not reproducible that had something about trace_pipe (consuming read) not matching trace (iterator read). This may explain why that happened. Anyway, the correct answer to this bug is to always use the reader page an not reset the iterator to inside the writable ring buffer. Cc: stable@vger.kernel.org # 2.6.28+ Fixes: d769041f8653 "ring_buffer: implement new locking" Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 31a9edd7aa93..b95381ebdd5e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3357,21 +3357,16 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; /* Iterator usage is expected to have record disabled */ - if (list_empty(&cpu_buffer->reader_page->list)) { - iter->head_page = rb_set_head_page(cpu_buffer); - if (unlikely(!iter->head_page)) - return; - iter->head = iter->head_page->read; - } else { - iter->head_page = cpu_buffer->reader_page; - iter->head = cpu_buffer->reader_page->read; - } + iter->head_page = cpu_buffer->reader_page; + iter->head = cpu_buffer->reader_page->read; + + iter->cache_reader_page = iter->head_page; + iter->cache_read = iter->head; + if (iter->head) iter->read_stamp = cpu_buffer->read_stamp; else iter->read_stamp = iter->head_page->page->time_stamp; - iter->cache_reader_page = cpu_buffer->reader_page; - iter->cache_read = cpu_buffer->read; } /** -- cgit v1.2.3