From 76369139ceb955deefc509e6e12ce9d6ce50ccab Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 19 May 2011 19:55:04 +0200 Subject: perf: Split up buffer handling from core code And create the internal perf events header. v2: Keep an internal inlined perf_output_copy() Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra Cc: Borislav Petkov Cc: Stephane Eranian Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1305827704-5607-1-git-send-email-fweisbec@gmail.com [ v3: use clearer 'ring_buffer' and 'rb' naming ] Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 33 ++++----------------------------- 1 file changed, 4 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3412684ce5d5..779f6ed54d52 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -680,33 +680,6 @@ enum perf_event_active_state { }; struct file; - -#define PERF_BUFFER_WRITABLE 0x01 - -struct perf_buffer { - atomic_t refcount; - struct rcu_head rcu_head; -#ifdef CONFIG_PERF_USE_VMALLOC - struct work_struct work; - int page_order; /* allocation order */ -#endif - int nr_pages; /* nr of data pages */ - int writable; /* are we writable */ - - atomic_t poll; /* POLL_ for wakeups */ - - local_t head; /* write position */ - local_t nest; /* nested writers */ - local_t events; /* event limit */ - local_t wakeup; /* wakeup stamp */ - local_t lost; /* nr records lost */ - - long watermark; /* wakeup watermark */ - - struct perf_event_mmap_page *user_page; - void *data_pages[0]; -}; - struct perf_sample_data; typedef void (*perf_overflow_handler_t)(struct perf_event *, int, @@ -745,6 +718,8 @@ struct perf_cgroup { }; #endif +struct ring_buffer; + /** * struct perf_event - performance event kernel representation: */ @@ -834,7 +809,7 @@ struct perf_event { atomic_t mmap_count; int mmap_locked; struct user_struct *mmap_user; - struct perf_buffer *buffer; + struct ring_buffer *rb; /* poll related */ wait_queue_head_t waitq; @@ -945,7 +920,7 @@ struct perf_cpu_context { struct perf_output_handle { struct perf_event *event; - struct perf_buffer *buffer; + struct ring_buffer *rb; unsigned long wakeup; unsigned long size; void *addr; -- cgit v1.2.3 From 7ea5906405a1f3fc1c0033dfd7e02f2cfd1de5e5 Mon Sep 17 00:00:00 2001 From: Vaibhav Nagarnaik Date: Tue, 3 May 2011 17:56:42 -0700 Subject: tracing: Use NUMA allocation for per-cpu ring buffer pages The tracing ring buffer is a group of per-cpu ring buffers where allocation and logging is done on a per-cpu basis. The events that are generated on a particular CPU are logged in the corresponding buffer. This is to provide wait-free writes between CPUs and good NUMA node locality while accessing the ring buffer. However, the allocation routines consider NUMA locality only for buffer page metadata and not for the actual buffer page. This causes the pages to be allocated on the NUMA node local to the CPU where the allocation routine is running at the time. This patch fixes the problem by using a NUMA node specific allocation routine so that the pages are allocated from a NUMA node local to the logging CPU. I tested with the getuid_microbench from autotest. It is a simple binary that calls getuid() in a loop and measures the average time for the syscall to complete. The following command was used to test: $ getuid_microbench 1000000 Compared the numbers found on kernel with and without this patch and found that logging latency decreases by 30-50 ns/call. tracing with non-NUMA allocation - 569 ns/call tracing with NUMA allocation - 512 ns/call Signed-off-by: Vaibhav Nagarnaik Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Michael Rubin Cc: David Sharp Link: http://lkml.kernel.org/r/1304470602-20366-1-git-send-email-vnagarnaik@google.com Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index ab38ac80b0f9..b891de96000f 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -169,7 +169,7 @@ void ring_buffer_set_clock(struct ring_buffer *buffer, size_t ring_buffer_page_len(void *page); -void *ring_buffer_alloc_read_page(struct ring_buffer *buffer); +void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu); void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page, size_t len, int cpu, int full); -- cgit v1.2.3 From 395810627b6a43c8d0ec948884043946fa162308 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 8 Jun 2011 16:09:21 +0900 Subject: x86: Swap save_stack_trace_regs parameters Swap the 1st and 2nd parameters of save_stack_trace_regs() as same as the parameters of save_stack_trace_tsk(). Signed-off-by: Masami Hiramatsu Cc: yrl.pp-manager.tt@hitachi.com Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20110608070921.17777.31103.stgit@fedora15 Signed-off-by: Steven Rostedt --- include/linux/stacktrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 25310f1d7f37..115b570e3bff 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -14,8 +14,8 @@ struct stack_trace { }; extern void save_stack_trace(struct stack_trace *trace); -extern void save_stack_trace_regs(struct stack_trace *trace, - struct pt_regs *regs); +extern void save_stack_trace_regs(struct pt_regs *regs, + struct stack_trace *trace); extern void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace); -- cgit v1.2.3 From 1fd8df2c3970c9e7e4e262354154ee39e58bdd7c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 8 Jun 2011 16:09:34 +0900 Subject: tracing/kprobes: Fix kprobe-tracer to support stack trace Fix to support kernel stack trace correctly on kprobe-tracer. Since the execution path of kprobe-based dynamic events is different from other tracepoint-based events, normal ftrace_trace_stack() doesn't work correctly. To fix that, this introduces ftrace_trace_stack_regs() which traces stack via pt_regs instead of current stack register. e.g. # echo p schedule+4 > /sys/kernel/debug/tracing/kprobe_events # echo 1 > /sys/kernel/debug/tracing/options/stacktrace # echo 1 > /sys/kernel/debug/tracing/events/kprobes/enable # head -n 20 /sys/kernel/debug/tracing/trace bash-2968 [000] 10297.050245: p_schedule_4: (schedule+0x4/0x4ca) bash-2968 [000] 10297.050247: => schedule_timeout => n_tty_read => tty_read => vfs_read => sys_read => system_call_fastpath kworker/0:1-2940 [000] 10297.050265: p_schedule_4: (schedule+0x4/0x4ca) kworker/0:1-2940 [000] 10297.050266: => worker_thread => kthread => kernel_thread_helper sshd-1132 [000] 10297.050365: p_schedule_4: (schedule+0x4/0x4ca) sshd-1132 [000] 10297.050365: => sysret_careful Note: Even with this fix, the first entry will be skipped if the probe is put on the function entry area before the frame pointer is set up (usually, that is 4 bytes (push %bp; mov %sp %bp) on x86), because stack unwinder depends on the frame pointer. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: yrl.pp-manager.tt@hitachi.com Cc: Peter Zijlstra Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20110608070934.17777.17116.stgit@fedora15 Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 59d3ef100eb9..b1e69eefc203 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -129,6 +129,10 @@ void trace_current_buffer_unlock_commit(struct ring_buffer *buffer, void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc); +void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer, + struct ring_buffer_event *event, + unsigned long flags, int pc, + struct pt_regs *regs); void trace_current_buffer_discard_commit(struct ring_buffer *buffer, struct ring_buffer_event *event); -- cgit v1.2.3 From 28009ce4a8130af7260a9271901b4419834ad152 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Tue, 7 Jun 2011 16:33:38 +0100 Subject: perf: Remove 64-bit alignment padding from perf_event_context Reorder perf_event_context to remove 8 bytes of 64 bit alignment padding shrinking its size to 192 bytes, allowing it to fit into a smaller slab and use one fewer cache lines. Signed-off-by: Richard Kennedy Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1307460819.1950.5.camel@castor.rsk Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e76a41010e1f..2f7b5d42ab41 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -894,8 +894,8 @@ struct perf_event_context { u64 parent_gen; u64 generation; int pin_count; - struct rcu_head rcu_head; int nr_cgroups; /* cgroup events present */ + struct rcu_head rcu_head; }; /* -- cgit v1.2.3 From a8b0ca17b80e92faab46ee7179ba9e99ccb61233 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 27 Jun 2011 14:41:57 +0200 Subject: perf: Remove the nmi parameter from the swevent and overflow interface The nmi parameter indicated if we could do wakeups from the current context, if not, we would set some state and self-IPI and let the resulting interrupt do the wakeup. For the various event classes: - hardware: nmi=0; PMI is in fact an NMI or we run irq_work_run from the PMI-tail (ARM etc.) - tracepoint: nmi=0; since tracepoint could be from NMI context. - software: nmi=[0,1]; some, like the schedule thing cannot perform wakeups, and hence need 0. As one can see, there is very little nmi=1 usage, and the down-side of not using it is that on some platforms some software events can have a jiffy delay in wakeup (when arch_irq_work_raise isn't implemented). The up-side however is that we can remove the nmi parameter and save a bunch of conditionals in fast paths. Signed-off-by: Peter Zijlstra Cc: Michael Cree Cc: Will Deacon Cc: Deng-Cheng Zhu Cc: Anton Blanchard Cc: Eric B Munson Cc: Heiko Carstens Cc: Paul Mundt Cc: David S. Miller Cc: Frederic Weisbecker Cc: Jason Wessel Cc: Don Zickus Link: http://lkml.kernel.org/n/tip-agjev8eu666tvknpb3iaj0fg@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2f7b5d42ab41..0946a8bc098d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -682,7 +682,7 @@ enum perf_event_active_state { struct file; struct perf_sample_data; -typedef void (*perf_overflow_handler_t)(struct perf_event *, int, +typedef void (*perf_overflow_handler_t)(struct perf_event *, struct perf_sample_data *, struct pt_regs *regs); @@ -925,7 +925,6 @@ struct perf_output_handle { unsigned long size; void *addr; int page; - int nmi; int sample; }; @@ -993,7 +992,7 @@ extern void perf_prepare_sample(struct perf_event_header *header, struct perf_event *event, struct pt_regs *regs); -extern int perf_event_overflow(struct perf_event *event, int nmi, +extern int perf_event_overflow(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs); @@ -1012,7 +1011,7 @@ static inline int is_software_event(struct perf_event *event) extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; -extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); +extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); #ifndef perf_arch_fetch_caller_regs static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { } @@ -1034,7 +1033,7 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs) } static __always_inline void -perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) +perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { struct pt_regs hot_regs; @@ -1043,7 +1042,7 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) perf_fetch_caller_regs(&hot_regs); regs = &hot_regs; } - __perf_sw_event(event_id, nr, nmi, regs, addr); + __perf_sw_event(event_id, nr, regs, addr); } } @@ -1057,7 +1056,7 @@ static inline void perf_event_task_sched_in(struct task_struct *task) static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) { - perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); + perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); __perf_event_task_sched_out(task, next); } @@ -1119,7 +1118,7 @@ extern void perf_bp_event(struct perf_event *event, void *data); extern int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size, - int nmi, int sample); + int sample); extern void perf_output_end(struct perf_output_handle *handle); extern void perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); @@ -1143,8 +1142,7 @@ static inline int perf_event_task_disable(void) { return -EINVAL; } static inline int perf_event_task_enable(void) { return -EINVAL; } static inline void -perf_sw_event(u32 event_id, u64 nr, int nmi, - struct pt_regs *regs, u64 addr) { } +perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { } static inline void perf_bp_event(struct perf_event *event, void *data) { } -- cgit v1.2.3 From a7ac67ea021b4603095d2aa458bc41641238f22c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 27 Jun 2011 16:47:16 +0200 Subject: perf: Remove the perf_output_begin(.sample) argument Since only samples call perf_output_sample() its much saner (and more correct) to put the sample logic in there than in the perf_output_begin()/perf_output_end() pair. Saves a useless argument, reduces conditionals and shrinks struct perf_output_handle, win! Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-2crpvsx3cqu67q3zqjbnlpsc@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0946a8bc098d..771b0b2845e4 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -925,7 +925,6 @@ struct perf_output_handle { unsigned long size; void *addr; int page; - int sample; }; #ifdef CONFIG_PERF_EVENTS @@ -1117,8 +1116,7 @@ extern void perf_bp_event(struct perf_event *event, void *data); #endif extern int perf_output_begin(struct perf_output_handle *handle, - struct perf_event *event, unsigned int size, - int sample); + struct perf_event *event, unsigned int size); extern void perf_output_end(struct perf_output_handle *handle); extern void perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); -- cgit v1.2.3 From efc9f05df2dd171280dcb736a4d973ffefd5508e Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 6 Jun 2011 16:57:03 +0200 Subject: perf_events: Update Intel extra regs shared constraints management This patch improves the code managing the extra shared registers used for offcore_response events on Intel Nehalem/Westmere. The idea is to use static allocation instead of dynamic allocation. This simplifies greatly the get and put constraint routines for those events. The patch also renames per_core to shared_regs because the same data structure gets used whether or not HT is on. When HT is off, those events still need to coordination because they use a extra MSR that has to be shared within an event group. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110606145703.GA7258@quad Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 771b0b2845e4..069315eefb22 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -536,6 +536,16 @@ struct perf_branch_stack { struct task_struct; +/* + * extra PMU register associated with an event + */ +struct hw_perf_event_extra { + u64 config; /* register value */ + unsigned int reg; /* register address or index */ + int alloc; /* extra register already allocated */ + int idx; /* index in shared_regs->regs[] */ +}; + /** * struct hw_perf_event - performance event hardware details: */ @@ -549,9 +559,7 @@ struct hw_perf_event { unsigned long event_base; int idx; int last_cpu; - unsigned int extra_reg; - u64 extra_config; - int extra_alloc; + struct hw_perf_event_extra extra_reg; }; struct { /* software */ struct hrtimer hrtimer; -- cgit v1.2.3 From 89d6c0b5bdbb1927775584dcf532d98b3efe1477 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Apr 2011 23:37:06 +0200 Subject: perf, arch: Add generic NODE cache events Add a NODE level to the generic cache events which is used to measure local vs remote memory accesses. Like all other cache events, an ACCESS is HIT+MISS, if there is no way to distinguish between reads and writes do reads only etc.. The below needs filling out for !x86 (which I filled out with unsupported events). I'm fairly sure ARM can leave it like that since it doesn't strike me as an architecture that even has NUMA support. SH might have something since it does appear to have some NUMA bits. Sparc64, PowerPC and MIPS certainly want a good look there since they clearly are NUMA capable. Signed-off-by: Peter Zijlstra Cc: David Miller Cc: Anton Blanchard Cc: David Daney Cc: Deng-Cheng Zhu Cc: Paul Mundt Cc: Will Deacon Cc: Robert Richter Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1303508226.4865.8.camel@laptop Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 069315eefb22..a5f54b973bdb 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -61,7 +61,7 @@ enum perf_hw_id { /* * Generalized hardware cache events: * - * { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x + * { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x * { read, write, prefetch } x * { accesses, misses } */ @@ -72,6 +72,7 @@ enum perf_hw_cache_id { PERF_COUNT_HW_CACHE_DTLB = 3, PERF_COUNT_HW_CACHE_ITLB = 4, PERF_COUNT_HW_CACHE_BPU = 5, + PERF_COUNT_HW_CACHE_NODE = 6, PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ }; -- cgit v1.2.3 From 4dc0da86967d5463708631d02a70cfed5b104884 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 29 Jun 2011 18:42:35 +0300 Subject: perf: Add context field to perf_event The perf_event overflow handler does not receive any caller-derived argument, so many callers need to resort to looking up the perf_event in their local data structure. This is ugly and doesn't scale if a single callback services many perf_events. Fix by adding a context parameter to perf_event_create_kernel_counter() (and derived hardware breakpoints APIs) and storing it in the perf_event. The field can be accessed from the callback as event->overflow_handler_context. All callers are updated. Signed-off-by: Avi Kivity Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1309362157-6596-2-git-send-email-avi@redhat.com Signed-off-by: Ingo Molnar --- include/linux/hw_breakpoint.h | 10 ++++++++-- include/linux/perf_event.h | 4 +++- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index d1e55fed2c7d..6ae9c631a1be 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -73,6 +73,7 @@ static inline unsigned long hw_breakpoint_len(struct perf_event *bp) extern struct perf_event * register_user_hw_breakpoint(struct perf_event_attr *attr, perf_overflow_handler_t triggered, + void *context, struct task_struct *tsk); /* FIXME: only change from the attr, and don't unregister */ @@ -85,11 +86,13 @@ modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr); extern struct perf_event * register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, perf_overflow_handler_t triggered, + void *context, int cpu); extern struct perf_event * __percpu * register_wide_hw_breakpoint(struct perf_event_attr *attr, - perf_overflow_handler_t triggered); + perf_overflow_handler_t triggered, + void *context); extern int register_perf_hw_breakpoint(struct perf_event *bp); extern int __register_perf_hw_breakpoint(struct perf_event *bp); @@ -115,6 +118,7 @@ static inline int __init init_hw_breakpoint(void) { return 0; } static inline struct perf_event * register_user_hw_breakpoint(struct perf_event_attr *attr, perf_overflow_handler_t triggered, + void *context, struct task_struct *tsk) { return NULL; } static inline int modify_user_hw_breakpoint(struct perf_event *bp, @@ -122,10 +126,12 @@ modify_user_hw_breakpoint(struct perf_event *bp, static inline struct perf_event * register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, perf_overflow_handler_t triggered, + void *context, int cpu) { return NULL; } static inline struct perf_event * __percpu * register_wide_hw_breakpoint(struct perf_event_attr *attr, - perf_overflow_handler_t triggered) { return NULL; } + perf_overflow_handler_t triggered, + void *context) { return NULL; } static inline int register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } static inline int diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a5f54b973bdb..2a08cacb1628 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -839,6 +839,7 @@ struct perf_event { u64 id; perf_overflow_handler_t overflow_handler; + void *overflow_handler_context; #ifdef CONFIG_EVENT_TRACING struct ftrace_event_call *tp_event; @@ -960,7 +961,8 @@ extern struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, struct task_struct *task, - perf_overflow_handler_t callback); + perf_overflow_handler_t callback, + void *context); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); -- cgit v1.2.3 From 26ca5c11fb45ae2b2ac7e3574b8db6b3a3c7d350 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 29 Jun 2011 18:42:37 +0300 Subject: perf: export perf_event_refresh() to modules KVM needs one-shot samples, since a PMC programmed to -X will fire after X events and then again after 2^40 events (i.e. variable period). Signed-off-by: Avi Kivity Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1309362157-6596-4-git-send-email-avi@redhat.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2a08cacb1628..3f2711ccf910 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -955,6 +955,7 @@ extern void perf_pmu_disable(struct pmu *pmu); extern void perf_pmu_enable(struct pmu *pmu); extern int perf_event_task_disable(void); extern int perf_event_task_enable(void); +extern int perf_event_refresh(struct perf_event *event, int refresh); extern void perf_event_update_userpage(struct perf_event *event); extern int perf_event_release_kernel(struct perf_event *event); extern struct perf_event * @@ -1149,6 +1150,10 @@ static inline void perf_event_delayed_put(struct task_struct *task) { } static inline void perf_event_print_debug(void) { } static inline int perf_event_task_disable(void) { return -EINVAL; } static inline int perf_event_task_enable(void) { return -EINVAL; } +static inline int perf_event_refresh(struct perf_event *event, int refresh) +{ + return -EINVAL; +} static inline void perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { } -- cgit v1.2.3 From 43dd61c9a09bd413e837df829e6bfb42159be52a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 7 Jul 2011 11:09:22 -0400 Subject: ftrace: Fix regression of :mod:module function enabling The new code that allows different utilities to pick and choose what functions they trace broke the :mod: hook that allows users to trace only functions of a particular module. The reason is that the :mod: hook bypasses the hash that is setup to allow individual users to trace their own functions and uses the global hash directly. But if the global hash has not been set up, it will cause a bug: echo '*:mod:radeon' > /sys/kernel/debug/set_ftrace_filter produces: [drm:drm_mode_getfb] *ERROR* invalid framebuffer id [drm:radeon_crtc_page_flip] *ERROR* failed to reserve new rbo buffer before flip BUG: unable to handle kernel paging request at ffffffff8160ec90 IP: [] add_hash_entry+0x66/0xd0 PGD 1a05067 PUD 1a09063 PMD 80000000016001e1 Oops: 0003 [#1] SMP Jul 7 04:02:28 phyllis kernel: [55303.858604] CPU 1 Modules linked in: cryptd aes_x86_64 aes_generic binfmt_misc rfcomm bnep ip6table_filter hid radeon r8169 ahci libahci mii ttm drm_kms_helper drm video i2c_algo_bit intel_agp intel_gtt Pid: 10344, comm: bash Tainted: G WC 3.0.0-rc5 #1 Dell Inc. Inspiron N5010/0YXXJJ RIP: 0010:[] [] add_hash_entry+0x66/0xd0 RSP: 0018:ffff88003a96bda8 EFLAGS: 00010246 RAX: ffff8801301735c0 RBX: ffffffff8160ec80 RCX: 0000000000306ee0 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff880137c92940 RBP: ffff88003a96bdb8 R08: ffff880137c95680 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff81c9df78 R13: ffff8801153d1000 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f329c18a700(0000) GS:ffff880137c80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffff8160ec90 CR3: 000000003002b000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process bash (pid: 10344, threadinfo ffff88003a96a000, task ffff88012fcfc470) Stack: 0000000000000fd0 00000000000000fc ffff88003a96be38 ffffffff810d92f5 ffff88011c4c4e00 ffff880000000000 000000000b69f4d0 ffffffff8160ec80 ffff8800300e6f06 0000000081130295 0000000000000282 ffff8800300e6f00 Call Trace: [] match_records+0x155/0x1b0 [] ftrace_mod_callback+0xbc/0x100 [] ftrace_regex_write+0x16f/0x210 [] ftrace_filter_write+0xf/0x20 [] vfs_write+0xc8/0x190 [] sys_write+0x51/0x90 [] system_call_fastpath+0x16/0x1b Code: 48 8b 33 31 d2 48 85 f6 75 33 49 89 d4 4c 03 63 08 49 8b 14 24 48 85 d2 48 89 10 74 04 48 89 42 08 49 89 04 24 4c 89 60 08 31 d2 RIP [] add_hash_entry+0x66/0xd0 RSP CR2: ffffffff8160ec90 ---[ end trace a5d031828efdd88e ]--- Reported-by: Brian Marete Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9d88e1cb5dbb..ed0eb5254d1c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -123,7 +123,8 @@ stack_trace_sysctl(struct ctl_table *table, int write, struct ftrace_func_command { struct list_head list; char *name; - int (*func)(char *func, char *cmd, + int (*func)(struct ftrace_hash *hash, + char *func, char *cmd, char *params, int enable); }; -- cgit v1.2.3 From 04da85b86188f224cc9b391b5bdd92a3ba20ffcf Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 11 Jul 2011 10:12:59 -0400 Subject: ftrace: Fix warning when CONFIG_FUNCTION_TRACER is not defined The struct ftrace_hash was declared within CONFIG_FUNCTION_TRACER but was referenced outside of it. Reported-by: Ingo Molnar Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ed0eb5254d1c..f0c0e8a47ae6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -19,6 +19,8 @@ #include +struct ftrace_hash; + #ifdef CONFIG_FUNCTION_TRACER extern int ftrace_enabled; @@ -29,8 +31,6 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); -struct ftrace_hash; - enum { FTRACE_OPS_FL_ENABLED = 1 << 0, FTRACE_OPS_FL_GLOBAL = 1 << 1, -- cgit v1.2.3 From 4a9bd3f134decd6d16ead8d288342d57aad486be Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 14 Jul 2011 16:36:53 -0400 Subject: tracing: Have dynamic size event stack traces Currently the stack trace per event in ftace is only 8 frames. This can be quite limiting and sometimes useless. Especially when the "ignore frames" is wrong and we also use up stack frames for the event processing itself. Change this to be dynamic by adding a percpu buffer that we can write a large stack frame into and then copy into the ring buffer. For interrupts and NMIs that come in while another event is being process, will only get to use the 8 frame stack. That should be enough as the task that it interrupted will have the full stack frame anyway. Requested-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index b1e69eefc203..96efa6794ea5 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -76,6 +76,7 @@ struct trace_iterator { struct trace_entry *ent; unsigned long lost_events; int leftover; + int ent_size; int cpu; u64 ts; -- cgit v1.2.3