From fe7de49f9d4e53f24ec9ef762a503f70b562341c Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 20 Oct 2010 16:01:12 -0700 Subject: sched: Make sched_param argument static in sched_setscheduler() callers Andrew Morton pointed out almost all sched_setscheduler() callers are using fixed parameters and can be converted to static. It reduces runtime memory use a little. Signed-off-by: KOSAKI Motohiro Reported-by: Andrew Morton Acked-by: James Morris Cc: Ingo Molnar Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/trace/trace_selftest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 155a415b3209..562c56e048fd 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -558,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr) static int trace_wakeup_test_thread(void *data) { /* Make this a RT thread, doesn't need to be too high */ - struct sched_param param = { .sched_priority = 5 }; + static struct sched_param param = { .sched_priority = 5 }; struct completion *x = data; sched_setscheduler(current, SCHED_FIFO, ¶m); -- cgit v1.2.3 From b595076a180a56d1bb170e6eceda6eb9d76f4cd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 1 Nov 2010 15:38:34 -0400 Subject: tree-wide: fix comment/printk typos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "gadget", "through", "command", "maintain", "maintain", "controller", "address", "between", "initiali[zs]e", "instead", "function", "select", "already", "equal", "access", "management", "hierarchy", "registration", "interest", "relative", "memory", "offset", "already", Signed-off-by: Uwe Kleine-König Signed-off-by: Jiri Kosina --- kernel/trace/trace_entries.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index e3dfecaf13e6..6cf223764be8 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -53,7 +53,7 @@ */ /* - * Function trace entry - function address and parent function addres: + * Function trace entry - function address and parent function address: */ FTRACE_ENTRY(function, ftrace_entry, -- cgit v1.2.3 From becf91f18750cf1c60828aa2ee63a36b05c2e4d0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 10 Nov 2010 10:05:56 +0100 Subject: [S390] ftrace: build without frame pointers on s390 s390 doesn't need FRAME_POINTERS in order to have a working function tracer. We don't need frame pointers in order to get strack traces since we always have valid backchains by using the -mkernel-backchain gcc option. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- kernel/trace/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e04b8bcdef88..ea37e2ff4164 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -126,7 +126,7 @@ if FTRACE config FUNCTION_TRACER bool "Kernel Function Tracer" depends on HAVE_FUNCTION_TRACER - select FRAME_POINTER if (!ARM_UNWIND) + select FRAME_POINTER if !ARM_UNWIND && !S390 select KALLSYMS select GENERIC_TRACER select CONTEXT_SWITCH_TRACER -- cgit v1.2.3 From 91e86e560d0b3ce4c5fc64fd2bbb99f856a30a4e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 10 Nov 2010 12:56:12 +0100 Subject: tracing: Fix recursive user stack trace The user stack trace can fault when examining the trace. Which would call the do_page_fault handler, which would trace again, which would do the user stack trace, which would fault and call do_page_fault again ... Thus this is causing a recursive bug. We need to have a recursion detector here. [ Resubmitted by Jiri Olsa ] [ Eric Dumazet recommended using __this_cpu_* instead of __get_cpu_* ] Cc: Eric Dumazet Signed-off-by: Jiri Olsa LKML-Reference: <1289390172-9730-3-git-send-email-jolsa@redhat.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 82d9b8106cd0..ee6a7339cf0e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1284,6 +1284,8 @@ void trace_dump_stack(void) __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); } +static DEFINE_PER_CPU(int, user_stack_count); + void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) { @@ -1302,6 +1304,18 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) if (unlikely(in_nmi())) return; + /* + * prevent recursion, since the user stack tracing may + * trigger other kernel events. + */ + preempt_disable(); + if (__this_cpu_read(user_stack_count)) + goto out; + + __this_cpu_inc(user_stack_count); + + + event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, sizeof(*entry), flags, pc); if (!event) @@ -1319,6 +1333,11 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) save_stack_trace_user(&trace); if (!filter_check_discard(call, entry, buffer, event)) ring_buffer_unlock_commit(buffer, event); + + __this_cpu_dec(user_stack_count); + + out: + preempt_enable(); } #ifdef UNUSED -- cgit v1.2.3 From d07335e51df0c6dec202d315fc4f1f7e100eec4e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 16 Nov 2010 12:52:38 +0100 Subject: block: Rename "block_remap" tracepoint to "block_bio_remap" to clarify the event. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 7b8ec0281548..2b8e2ee7c0ef 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -887,7 +887,7 @@ static void blk_add_trace_split(void *ignore, } /** - * blk_add_trace_remap - Add a trace for a remap operation + * blk_add_trace_bio_remap - Add a trace for a bio-remap operation * @ignore: trace callback data parameter (not used) * @q: queue the io is for * @bio: the source bio @@ -899,9 +899,9 @@ static void blk_add_trace_split(void *ignore, * it spans a stripe (or similar). Add a trace for that action. * **/ -static void blk_add_trace_remap(void *ignore, - struct request_queue *q, struct bio *bio, - dev_t dev, sector_t from) +static void blk_add_trace_bio_remap(void *ignore, + struct request_queue *q, struct bio *bio, + dev_t dev, sector_t from) { struct blk_trace *bt = q->blk_trace; struct blk_io_trace_remap r; @@ -1016,7 +1016,7 @@ static void blk_register_tracepoints(void) WARN_ON(ret); ret = register_trace_block_split(blk_add_trace_split, NULL); WARN_ON(ret); - ret = register_trace_block_remap(blk_add_trace_remap, NULL); + ret = register_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); WARN_ON(ret); ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); WARN_ON(ret); @@ -1025,7 +1025,7 @@ static void blk_register_tracepoints(void) static void blk_unregister_tracepoints(void) { unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); - unregister_trace_block_remap(blk_add_trace_remap, NULL); + unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); unregister_trace_block_split(blk_add_trace_split, NULL); unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); -- cgit v1.2.3 From 451a3c24b0135bce54542009b5fde43846c7cf67 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 17 Nov 2010 16:26:55 +0100 Subject: BKL: remove extraneous #include The big kernel lock has been removed from all these files at some point, leaving only the #include. Remove this too as a cleanup. Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- kernel/trace/trace.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 82d9b8106cd0..042084157980 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 61c32659b12c44e62de32fbf99f7e4ca783dc38b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Nov 2010 01:39:17 +0100 Subject: tracing: New flag to allow non privileged users to use a trace event This adds a new trace event internal flag that allows them to be used in perf by non privileged users in case of task bound tracing. This is desired for syscalls tracepoint because they don't leak global system informations, like some other tracepoints. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron --- kernel/trace/trace_event_perf.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 39c059ca670e..19a359d5e6d5 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -21,17 +21,46 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) /* Count the events in use (per event id, not per instance) */ static int total_ref_count; +static int perf_trace_event_perm(struct ftrace_event_call *tp_event, + struct perf_event *p_event) +{ + /* No tracing, just counting, so no obvious leak */ + if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) + return 0; + + /* Some events are ok to be traced by non-root users... */ + if (p_event->attach_state == PERF_ATTACH_TASK) { + if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY) + return 0; + } + + /* + * ...otherwise raw tracepoint data can be a severe data leak, + * only allow root to have these. + */ + if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + return 0; +} + static int perf_trace_event_init(struct ftrace_event_call *tp_event, struct perf_event *p_event) { struct hlist_head __percpu *list; - int ret = -ENOMEM; + int ret; int cpu; + ret = perf_trace_event_perm(tp_event, p_event); + if (ret) + return ret; + p_event->tp_event = tp_event; if (tp_event->perf_refcount++ > 0) return 0; + ret = -ENOMEM; + list = alloc_percpu(struct hlist_head); if (!list) goto fail; -- cgit v1.2.3 From 042957801626465492b9428860de39a3cb2a8219 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 12 Nov 2010 22:32:11 -0500 Subject: tracing/events: Show real number in array fields Currently we have in something like the sched_switch event: field:char prev_comm[TASK_COMM_LEN]; offset:12; size:16; signed:1; When a userspace tool such as perf tries to parse this, the TASK_COMM_LEN is meaningless. This is done because the TRACE_EVENT() macro simply uses a #len to show the string of the length. When the length is an enum, we get a string that means nothing for tools. By adding a static buffer and a mutex to protect it, we can store the string into that buffer with snprintf and show the actual number. Now we get: field:char prev_comm[16]; offset:12; size:16; signed:1; Something much more useful. Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 6 ++++++ kernel/trace/trace_export.c | 14 ++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 0725eeab1937..35fde09b81de 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -27,6 +27,12 @@ DEFINE_MUTEX(event_mutex); +DEFINE_MUTEX(event_storage_mutex); +EXPORT_SYMBOL_GPL(event_storage_mutex); + +char event_storage[EVENT_STORAGE_SIZE]; +EXPORT_SYMBOL_GPL(event_storage); + LIST_HEAD(ftrace_events); LIST_HEAD(ftrace_common_fields); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 4ba44deaac25..4b74d71705c0 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -83,13 +83,19 @@ static void __always_unused ____ftrace_check_##name(void) \ #undef __array #define __array(type, item, len) \ - BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + do { \ + BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ + mutex_lock(&event_storage_mutex); \ + snprintf(event_storage, sizeof(event_storage), \ + "%s[%d]", #type, len); \ + ret = trace_define_field(event_call, event_storage, #item, \ offsetof(typeof(field), item), \ sizeof(field.item), \ is_signed_type(type), FILTER_OTHER); \ - if (ret) \ - return ret; + mutex_unlock(&event_storage_mutex); \ + if (ret) \ + return ret; \ + } while (0); #undef __array_desc #define __array_desc(type, container, item, len) \ -- cgit v1.2.3 From 364829b1263b44aa60383824e4c1289d83d78ca7 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 24 Nov 2010 15:13:16 -0800 Subject: tracing: Fix panic when lseek() called on "trace" opened for writing The file_ops struct for the "trace" special file defined llseek as seq_lseek(). However, if the file was opened for writing only, seq_open() was not called, and the seek would dereference a null pointer, file->private_data. This patch introduces a new wrapper for seq_lseek() which checks if the file descriptor is opened for reading first. If not, it does nothing. Cc: Signed-off-by: Slava Pestov LKML-Reference: <1290640396-24179-1-git-send-email-slavapestov@google.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ee6a7339cf0e..21db0deb5c7d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2339,11 +2339,19 @@ tracing_write_stub(struct file *filp, const char __user *ubuf, return count; } +static loff_t tracing_seek(struct file *file, loff_t offset, int origin) +{ + if (file->f_mode & FMODE_READ) + return seq_lseek(file, offset, origin); + else + return 0; +} + static const struct file_operations tracing_fops = { .open = tracing_open, .read = seq_read, .write = tracing_write_stub, - .llseek = seq_lseek, + .llseek = tracing_seek, .release = tracing_release, }; -- cgit v1.2.3 From e1e359273576ee8fe27021356b064c772ed29af3 Mon Sep 17 00:00:00 2001 From: David Sharp Date: Wed, 22 Dec 2010 16:38:24 -0800 Subject: ring_buffer: Off-by-one and duplicate events in ring_buffer_read_page Fix two related problems in the event-copying loop of ring_buffer_read_page. The loop condition for copying events is off-by-one. "len" is the remaining space in the caller-supplied page. "size" is the size of the next event (or two events). If len == size, then there is just enough space for the next event. size was set to rb_event_ts_length, which may include the size of two events if the first event is a time-extend, in order to assure time- extends are kept together with the event after it. However, rb_advance_reader always advances by one event. This would result in the event after any time-extend being duplicated. Instead, get the size of a single event for the memcpy, but use rb_event_ts_length for the loop condition. Signed-off-by: David Sharp LKML-Reference: <1293064704-8101-1-git-send-email-dhsharp@google.com> LKML-Reference: Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9ed509a015d8..bd1c35a4fbcc 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3853,6 +3853,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer, /* Need to copy one event at a time */ do { + /* We need the size of one event, because + * rb_advance_reader only advances by one event, + * whereas rb_event_ts_length may include the size of + * one or two events. + * We have already ensured there's enough space if this + * is a time extend. */ + size = rb_event_length(event); memcpy(bpage->data + pos, rpage->data + rpos, size); len -= size; @@ -3867,7 +3874,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, event = rb_reader_event(cpu_buffer); /* Always keep the time extend and data together */ size = rb_event_ts_length(event); - } while (len > size); + } while (len >= size); /* update bpage */ local_set(&bpage->commit, pos); -- cgit v1.2.3 From 61a0d49c33c7fd57c14895e5b0760bd02b65ac1f Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 3 Jan 2011 17:50:43 +0100 Subject: perf: Do not export power_frequency, but power_start event power_frequency moved to drivers/cpufreq/cpufreq.c which has to be compiled in, no need to export it. intel_idle can a be module though... Signed-off-by: Thomas Renninger Signed-off-by: Ingo Molnar Acked-by: Jean Pihet Cc: Jean Pihet Cc: Arjan van de Ven Cc: rjw@sisk.pl LKML-Reference: <1294073445-14812-2-git-send-email-trenn@suse.de> Signed-off-by: Ingo Molnar LKML-Reference: <1290072314-31155-2-git-send-email-trenn@suse.de> --- kernel/trace/power-traces.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c index a22582a06161..0e0497d9fade 100644 --- a/kernel/trace/power-traces.c +++ b/kernel/trace/power-traces.c @@ -13,5 +13,5 @@ #define CREATE_TRACE_POINTS #include -EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency); +EXPORT_TRACEPOINT_SYMBOL_GPL(power_start); -- cgit v1.2.3 From 25e41933b58777f2d020c3b0186b430ea004ec28 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 3 Jan 2011 17:50:44 +0100 Subject: perf: Clean up power events by introducing new, more generic ones Add these new power trace events: power:cpu_idle power:cpu_frequency power:machine_suspend The old C-state/idle accounting events: power:power_start power:power_end Have now a replacement (but we are still keeping the old tracepoints for compatibility): power:cpu_idle and power:power_frequency is replaced with: power:cpu_frequency power:machine_suspend is newly introduced. Jean Pihet has a patch integrated into the generic layer (kernel/power/suspend.c) which will make use of it. the type= field got removed from both, it was never used and the type is differed by the event type itself. perf timechart userspace tool gets adjusted in a separate patch. Signed-off-by: Thomas Renninger Signed-off-by: Ingo Molnar Acked-by: Arjan van de Ven Acked-by: Jean Pihet Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Linus Torvalds Cc: rjw@sisk.pl LKML-Reference: <1294073445-14812-3-git-send-email-trenn@suse.de> Signed-off-by: Ingo Molnar LKML-Reference: <1290072314-31155-2-git-send-email-trenn@suse.de> --- kernel/trace/Kconfig | 15 +++++++++++++++ kernel/trace/power-traces.c | 3 +++ 2 files changed, 18 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index ea37e2ff4164..14674dce77a6 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -69,6 +69,21 @@ config EVENT_TRACING select CONTEXT_SWITCH_TRACER bool +config EVENT_POWER_TRACING_DEPRECATED + depends on EVENT_TRACING + bool "Deprecated power event trace API, to be removed" + default y + help + Provides old power event types: + C-state/idle accounting events: + power:power_start + power:power_end + and old cpufreq accounting event: + power:power_frequency + This is for userspace compatibility + and will vanish after 5 kernel iterations, + namely 2.6.41. + config CONTEXT_SWITCH_TRACER bool diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c index 0e0497d9fade..f55fcf61b223 100644 --- a/kernel/trace/power-traces.c +++ b/kernel/trace/power-traces.c @@ -13,5 +13,8 @@ #define CREATE_TRACE_POINTS #include +#ifdef EVENT_POWER_TRACING_DEPRECATED EXPORT_TRACEPOINT_SYMBOL_GPL(power_start); +#endif +EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); -- cgit v1.2.3 From 23036f1a340beec19cc451ba9719526c4ffb3a57 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 7 Jan 2011 08:53:58 +0100 Subject: blktrace: add missing probe argument to block_bio_complete blktrace.c block bio complete callback needs to gain a new argument to reflect the newly added "error" tracepoint argument. This is needed to match the new block_bio_complete TRACE_EVENT as of commit de983a7bfcb7c020901ca6e2314cf55a4207ab5a. Signed-off-by: Mathieu Desnoyers CC: Jeff Moyer CC: Steven Rostedt CC: Frederic Weisbecker CC: Ingo Molnar CC: Thomas Gleixner CC: Li Zefan Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 2b8e2ee7c0ef..fab31aff9d97 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -782,7 +782,8 @@ static void blk_add_trace_bio_bounce(void *ignore, } static void blk_add_trace_bio_complete(void *ignore, - struct request_queue *q, struct bio *bio) + struct request_queue *q, struct bio *bio, + int error) { blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); } -- cgit v1.2.3 From c9b5f501ef1580faa30c40c644b7691870462201 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 7 Jan 2011 13:41:40 +0100 Subject: sched: Constify function scope static struct sched_param usage Function-scope statics are discouraged because they are easily overlooked and can cause subtle bugs/races due to their global (non-SMP safe) nature. Linus noticed that we did this for sched_param - at minimum make the const. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra LKML-Reference: Message-ID: Signed-off-by: Ingo Molnar --- kernel/trace/trace_selftest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 562c56e048fd..659732eba07c 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -558,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr) static int trace_wakeup_test_thread(void *data) { /* Make this a RT thread, doesn't need to be too high */ - static struct sched_param param = { .sched_priority = 5 }; + static const struct sched_param param = { .sched_priority = 5 }; struct completion *x = data; sched_setscheduler(current, SCHED_FIFO, ¶m); -- cgit v1.2.3 From 1dbd1951f39e13da579ffe879cce19586d0462de Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 9 Dec 2010 15:47:56 +0800 Subject: tracing: Fix preempt count leak While running my ftrace stress test, this showed up: BUG: sleeping function called from invalid context at mm/mmap.c:233 ... note: cat[3293] exited with preempt_count 1 The bug was introduced by commit 91e86e560d0b3ce4c5fc64fd2bbb99f856a30a4e ("tracing: Fix recursive user stack trace") Cc: Signed-off-by: Li Zefan LKML-Reference: <4D0089AC.1020802@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f8cf959bad45..dc53ecb80589 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1313,12 +1313,10 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) __this_cpu_inc(user_stack_count); - - event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, sizeof(*entry), flags, pc); if (!event) - return; + goto out_drop_count; entry = ring_buffer_event_data(event); entry->tgid = current->tgid; @@ -1333,8 +1331,8 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) if (!filter_check_discard(call, entry, buffer, event)) ring_buffer_unlock_commit(buffer, event); + out_drop_count: __this_cpu_dec(user_stack_count); - out: preempt_enable(); } -- cgit v1.2.3 From 870915e047a2da695be118d32dd5a900f0c3e933 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 28 Oct 2010 11:31:17 -0400 Subject: tracing: Fix TRACE_EVENT power tracepoint creation DEFINE_TRACE should also exist when CONFIG_EVENT_TRACING=n. Otherwise, setting only TRACEPOINTS=y is broken. Acked-by: Arjan van de Ven Signed-off-by: Mathieu Desnoyers LKML-Reference: <20101028153117.GA4051@Krystal> Signed-off-by: Steven Rostedt --- kernel/trace/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 53f338190b26..761c510a06c5 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -52,7 +52,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o -obj-$(CONFIG_EVENT_TRACING) += power-traces.o +obj-$(CONFIG_TRACEPOINTS) += power-traces.o ifeq ($(CONFIG_TRACING),y) obj-$(CONFIG_KGDB_KDB) += trace_kdb.o endif -- cgit v1.2.3 From 797a455d2c682476c3797dbfecf5bf84c1e3b9d3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 10 Jan 2011 09:06:44 +0100 Subject: block: ensure that completion error gets properly traced We normally just use the BIO_UPTODATE flag to signal 0/-EIO. If we have more information available, we should pass that along to the trace output. Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index fab31aff9d97..153562d0b93c 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -758,54 +758,58 @@ static void blk_add_trace_rq_complete(void *ignore, * @q: queue the io is for * @bio: the source bio * @what: the action + * @error: error, if any * * Description: * Records an action against a bio. Will log the bio offset + size. * **/ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, - u32 what) + u32 what, int error) { struct blk_trace *bt = q->blk_trace; if (likely(!bt)) return; + if (!error && !bio_flagged(bio, BIO_UPTODATE)) + error = EIO; + __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, - !bio_flagged(bio, BIO_UPTODATE), 0, NULL); + error, 0, NULL); } static void blk_add_trace_bio_bounce(void *ignore, struct request_queue *q, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_BOUNCE); + blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); } static void blk_add_trace_bio_complete(void *ignore, struct request_queue *q, struct bio *bio, int error) { - blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); + blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); } static void blk_add_trace_bio_backmerge(void *ignore, struct request_queue *q, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); + blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); } static void blk_add_trace_bio_frontmerge(void *ignore, struct request_queue *q, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); + blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); } static void blk_add_trace_bio_queue(void *ignore, struct request_queue *q, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_QUEUE); + blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0); } static void blk_add_trace_getrq(void *ignore, @@ -813,7 +817,7 @@ static void blk_add_trace_getrq(void *ignore, struct bio *bio, int rw) { if (bio) - blk_add_trace_bio(q, bio, BLK_TA_GETRQ); + blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); else { struct blk_trace *bt = q->blk_trace; @@ -828,7 +832,7 @@ static void blk_add_trace_sleeprq(void *ignore, struct bio *bio, int rw) { if (bio) - blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ); + blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); else { struct blk_trace *bt = q->blk_trace; -- cgit v1.2.3 From 7f85803a26f304e698c673838aab06cc6d4d6e59 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 14 Jan 2011 17:49:02 +0800 Subject: tracing: Remove syscall_exit_fields There is no need for syscall_exit_fields as the syscall exit event class can already host the fields in its structure, like most other trace events do by default. Use that default behavior instead. Following this scheme, we don't need anymore to override the get_fields() callback of the syscall exit event class either. Hence both syscall_exit_fields and syscall_get_exit_fields() can be removed. Also changed some indentation to keep the following under 80 characters: ".fields = LIST_HEAD_INIT(event_class_syscall_exit.fields)," Acked-by: Frederic Weisbecker Signed-off-by: Lai Jiangshan LKML-Reference: <4D301C0E.8090408@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_syscalls.c | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index bac752f0cfb5..b706529b4fc7 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -23,9 +23,6 @@ static int syscall_exit_register(struct ftrace_event_call *event, static int syscall_enter_define_fields(struct ftrace_event_call *call); static int syscall_exit_define_fields(struct ftrace_event_call *call); -/* All syscall exit events have the same fields */ -static LIST_HEAD(syscall_exit_fields); - static struct list_head * syscall_get_enter_fields(struct ftrace_event_call *call) { @@ -34,34 +31,28 @@ syscall_get_enter_fields(struct ftrace_event_call *call) return &entry->enter_fields; } -static struct list_head * -syscall_get_exit_fields(struct ftrace_event_call *call) -{ - return &syscall_exit_fields; -} - struct trace_event_functions enter_syscall_print_funcs = { - .trace = print_syscall_enter, + .trace = print_syscall_enter, }; struct trace_event_functions exit_syscall_print_funcs = { - .trace = print_syscall_exit, + .trace = print_syscall_exit, }; struct ftrace_event_class event_class_syscall_enter = { - .system = "syscalls", - .reg = syscall_enter_register, - .define_fields = syscall_enter_define_fields, - .get_fields = syscall_get_enter_fields, - .raw_init = init_syscall_trace, + .system = "syscalls", + .reg = syscall_enter_register, + .define_fields = syscall_enter_define_fields, + .get_fields = syscall_get_enter_fields, + .raw_init = init_syscall_trace, }; struct ftrace_event_class event_class_syscall_exit = { - .system = "syscalls", - .reg = syscall_exit_register, - .define_fields = syscall_exit_define_fields, - .get_fields = syscall_get_exit_fields, - .raw_init = init_syscall_trace, + .system = "syscalls", + .reg = syscall_exit_register, + .define_fields = syscall_exit_define_fields, + .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields), + .raw_init = init_syscall_trace, }; extern unsigned long __start_syscalls_metadata[]; -- cgit v1.2.3 From 490da40d82b31c0562d3f5edb37810f492ca1c34 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Wed, 19 Jan 2011 10:51:44 +0800 Subject: blktrace: Don't output messages if NOTIFY isn't set. Now if we enable blktrace, cfq has too many messages output to the trace buffer. It is fine if we don't specify any action mask. But if I do like this: blktrace /dev/sdb -a issue -a complete -o - | blkparse -i - I only want to see 'D' and 'C', while with the following command dd if=/mnt/ocfs2/test of=/dev/null bs=4k count=1 iflag=direct I will get(with a 2.6.37 vanilla kernel): 8,16 0 0 0.000000000 0 m N cfq3805 alloced 8,16 0 0 0.000004126 0 m N cfq3805 insert_request 8,16 0 0 0.000004884 0 m N cfq3805 add_to_rr 8,16 0 0 0.000008417 0 m N cfq workload slice:300 8,16 0 0 0.000009557 0 m N cfq3805 set_active wl_prio:0 wl_type:2 8,16 0 0 0.000010640 0 m N cfq3805 fifo= (null) 8,16 0 0 0.000011193 0 m N cfq3805 dispatch_insert 8,16 0 0 0.000012221 0 m N cfq3805 dispatched a request 8,16 0 0 0.000012802 0 m N cfq3805 activate rq, drv=1 8,16 0 1 0.000013181 3805 D R 114759 + 8 [dd] 8,16 0 2 0.000164244 0 C R 114759 + 8 [0] 8,16 0 0 0.000167997 0 m N cfq3805 complete rqnoidle 0 8,16 0 0 0.000168782 0 m N cfq3805 set_slice=100 8,16 0 0 0.000169874 0 m N cfq3805 arm_idle: 8 group_idle: 0 8,16 0 0 0.000170189 0 m N cfq schedule dispatch 8,16 0 0 0.000397938 0 m N cfq3805 slice expired t=0 8,16 0 0 0.000399763 0 m N cfq3805 sl_used=1 disp=1 charge=1 iops=0 sect=8 8,16 0 0 0.000400227 0 m N cfq3805 del_from_rr 8,16 0 0 0.000400882 0 m N cfq3805 put_queue See, there are 19 lines while I only need 2. I don't think it is appropriate for a user. So this patch will disable any messages if the BLK_TC_NOTIFY isn't set. Now the output for the same command will look like: 8,16 0 1 0.000000000 4908 D R 114759 + 8 [dd] 8,16 0 2 0.000146827 0 C R 114759 + 8 [0] Yes, it is what I want to see. Cc: Steven Rostedt Cc: Jeff Moyer Signed-off-by: Tao Ma Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 153562d0b93c..d95721f33702 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -138,6 +138,13 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) !blk_tracer_enabled)) return; + /* + * If the BLK_TC_NOTIFY action mask isn't set, don't send any note + * message to the trace. + */ + if (!(bt->act_mask & BLK_TC_NOTIFY)) + return; + local_irq_save(flags); buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); va_start(args, fmt); -- cgit v1.2.3 From 2ce802f62ba32a7d95748ac92bf351f76affb6ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 20 Jan 2011 12:06:35 +0100 Subject: lockdep: Move early boot local IRQ enable/disable status to init/main.c During early boot, local IRQ is disabled until IRQ subsystem is properly initialized. During this time, no one should enable local IRQ and some operations which usually are not allowed with IRQ disabled, e.g. operations which might sleep or require communications with other processors, are allowed. lockdep tracked this with early_boot_irqs_off/on() callbacks. As other subsystems need this information too, move it to init/main.c and make it generally available. While at it, toggle the boolean to early_boot_irqs_disabled instead of enabled so that it can be initialized with %false and %true indicates the exceptional condition. Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra Acked-by: Pekka Enberg Cc: Linus Torvalds LKML-Reference: <20110120110635.GB6036@htj.dyndns.org> Signed-off-by: Ingo Molnar --- kernel/trace/trace_irqsoff.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 5cf8c602b880..92b6e1e12d98 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -453,14 +453,6 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1) * Stubs: */ -void early_boot_irqs_off(void) -{ -} - -void early_boot_irqs_on(void) -{ -} - void trace_softirqs_on(unsigned long ip) { } -- cgit v1.2.3 From e4a9ea5ee7c8812a7bf0c3fb725ceeaa3d4c2fcc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Jan 2011 09:15:30 -0500 Subject: tracing: Replace trace_event struct array with pointer array Currently the trace_event structures are placed in the _ftrace_events section, and at link time, the linker makes one large array of all the trace_event structures. On boot up, this array is read (much like the initcall sections) and the events are processed. The problem is that there is no guarantee that gcc will place complex structures nicely together in an array format. Two structures in the same file may be placed awkwardly, because gcc has no clue that they are suppose to be in an array. A hack was used previous to force the alignment to 4, to pack the structures together. But this caused alignment issues with other architectures (sparc). Instead of packing the structures into an array, the structures' addresses are now put into the _ftrace_event section. As pointers are always the natural alignment, gcc should always pack them tightly together (otherwise initcall, extable, etc would also fail). By having the pointers to the structures in the section, we can still iterate the trace_events without causing unnecessary alignment problems with other architectures, or depending on the current behaviour of gcc that will likely change in the future just to tick us kernel developers off a little more. The _ftrace_event section is also moved into the .init.data section as it is now only needed at boot up. Suggested-by: David Miller Cc: Mathieu Desnoyers Acked-by: David S. Miller Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 12 ++++++------ kernel/trace/trace_export.c | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 35fde09b81de..5f499e0438a4 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1284,7 +1284,7 @@ trace_create_file_ops(struct module *mod) static void trace_module_add_events(struct module *mod) { struct ftrace_module_file_ops *file_ops = NULL; - struct ftrace_event_call *call, *start, *end; + struct ftrace_event_call **call, **start, **end; start = mod->trace_events; end = mod->trace_events + mod->num_trace_events; @@ -1297,7 +1297,7 @@ static void trace_module_add_events(struct module *mod) return; for_each_event(call, start, end) { - __trace_add_event_call(call, mod, + __trace_add_event_call(*call, mod, &file_ops->id, &file_ops->enable, &file_ops->filter, &file_ops->format); } @@ -1367,8 +1367,8 @@ static struct notifier_block trace_module_nb = { .priority = 0, }; -extern struct ftrace_event_call __start_ftrace_events[]; -extern struct ftrace_event_call __stop_ftrace_events[]; +extern struct ftrace_event_call *__start_ftrace_events[]; +extern struct ftrace_event_call *__stop_ftrace_events[]; static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; @@ -1384,7 +1384,7 @@ __setup("trace_event=", setup_trace_event); static __init int event_trace_init(void) { - struct ftrace_event_call *call; + struct ftrace_event_call **call; struct dentry *d_tracer; struct dentry *entry; struct dentry *d_events; @@ -1430,7 +1430,7 @@ static __init int event_trace_init(void) pr_warning("tracing: Failed to allocate common fields"); for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { - __trace_add_event_call(call, NULL, &ftrace_event_id_fops, + __trace_add_event_call(*call, NULL, &ftrace_event_id_fops, &ftrace_enable_fops, &ftrace_event_filter_fops, &ftrace_event_format_fops); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 4b74d71705c0..bbeec31e0ae3 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -161,13 +161,13 @@ struct ftrace_event_class event_class_ftrace_##call = { \ .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ }; \ \ -struct ftrace_event_call __used \ -__attribute__((__aligned__(4))) \ -__attribute__((section("_ftrace_events"))) event_##call = { \ +struct ftrace_event_call __used event_##call = { \ .name = #call, \ .event.type = etype, \ .class = &event_class_ftrace_##call, \ .print_fmt = print, \ }; \ +struct ftrace_event_call __used \ +__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; #include "trace_entries.h" -- cgit v1.2.3 From 3d56e331b6537671c66f1b510bed0f1e0331dfc8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 2 Feb 2011 17:06:09 -0500 Subject: tracing: Replace syscall_meta_data struct array with pointer array Currently the syscall_meta structures for the syscall tracepoints are placed in the __syscall_metadata section, and at link time, the linker makes one large array of all these syscall metadata structures. On boot up, this array is read (much like the initcall sections) and the syscall data is processed. The problem is that there is no guarantee that gcc will place complex structures nicely together in an array format. Two structures in the same file may be placed awkwardly, because gcc has no clue that they are suppose to be in an array. A hack was used previous to force the alignment to 4, to pack the structures together. But this caused alignment issues with other architectures (sparc). Instead of packing the structures into an array, the structures' addresses are now put into the __syscall_metadata section. As pointers are always the natural alignment, gcc should always pack them tightly together (otherwise initcall, extable, etc would also fail). By having the pointers to the structures in the section, we can still iterate the trace_events without causing unnecessary alignment problems with other architectures, or depending on the current behaviour of gcc that will likely change in the future just to tick us kernel developers off a little more. The __syscall_metadata section is also moved into the .init.data section as it is now only needed at boot up. Suggested-by: David Miller Acked-by: David S. Miller Cc: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- kernel/trace/trace_syscalls.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index b706529b4fc7..5c9fe08d2093 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -55,20 +55,21 @@ struct ftrace_event_class event_class_syscall_exit = { .raw_init = init_syscall_trace, }; -extern unsigned long __start_syscalls_metadata[]; -extern unsigned long __stop_syscalls_metadata[]; +extern struct syscall_metadata *__start_syscalls_metadata[]; +extern struct syscall_metadata *__stop_syscalls_metadata[]; static struct syscall_metadata **syscalls_metadata; -static struct syscall_metadata *find_syscall_meta(unsigned long syscall) +static __init struct syscall_metadata * +find_syscall_meta(unsigned long syscall) { - struct syscall_metadata *start; - struct syscall_metadata *stop; + struct syscall_metadata **start; + struct syscall_metadata **stop; char str[KSYM_SYMBOL_LEN]; - start = (struct syscall_metadata *)__start_syscalls_metadata; - stop = (struct syscall_metadata *)__stop_syscalls_metadata; + start = __start_syscalls_metadata; + stop = __stop_syscalls_metadata; kallsyms_lookup(syscall, NULL, NULL, NULL, str); for ( ; start < stop; start++) { @@ -78,8 +79,8 @@ static struct syscall_metadata *find_syscall_meta(unsigned long syscall) * with "SyS" instead of "sys", leading to an unwanted * mismatch. */ - if (start->name && !strcmp(start->name + 3, str + 3)) - return start; + if ((*start)->name && !strcmp((*start)->name + 3, str + 3)) + return *start; } return NULL; } -- cgit v1.2.3