From 84e478c6f1eb9c4bfa1fff2f8108e9a061b46428 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 5 Feb 2010 21:47:05 -0500 Subject: nmi_watchdog: Config option to enable new nmi_watchdog These are the bits that enable the new nmi_watchdog and safely isolate the old nmi_watchdog. Only one or the other can run, not both at the same time. Signed-off-by: Don Zickus Cc: Linus Torvalds Cc: Andrew Morton Cc: gorcunov@gmail.com Cc: aris@redhat.com Cc: peterz@infradead.org LKML-Reference: <1265424425-31562-4-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index b752e807adde..a42ff0bef708 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -47,4 +47,8 @@ static inline bool trigger_all_cpu_backtrace(void) } #endif +#ifdef CONFIG_NMI_WATCHDOG +int hw_nmi_is_cpu_stuck(struct pt_regs *); +#endif + #endif -- cgit v1.2.3 From 504d7cf10ee42bb76b9556859f23d4121dee0a77 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 12 Feb 2010 17:19:19 -0500 Subject: nmi_watchdog: Compile and portability fixes The original patch was x86_64 centric. Changed the code to make it less so. ested by building and running on a powerpc. Signed-off-by: Don Zickus Cc: peterz@infradead.org Cc: gorcunov@gmail.com Cc: aris@redhat.com LKML-Reference: <1266013161-31197-2-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index a42ff0bef708..794e7354c5bf 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -20,10 +20,14 @@ extern void touch_nmi_watchdog(void); extern void acpi_nmi_disable(void); extern void acpi_nmi_enable(void); #else +#ifndef CONFIG_NMI_WATCHDOG static inline void touch_nmi_watchdog(void) { touch_softlockup_watchdog(); } +#else +extern void touch_nmi_watchdog(void); +#endif static inline void acpi_nmi_disable(void) { } static inline void acpi_nmi_enable(void) { } #endif @@ -49,6 +53,11 @@ static inline bool trigger_all_cpu_backtrace(void) #ifdef CONFIG_NMI_WATCHDOG int hw_nmi_is_cpu_stuck(struct pt_regs *); +u64 hw_nmi_get_sample_period(void); +extern int nmi_watchdog_enabled; +struct ctl_table; +extern int proc_nmi_enabled(struct ctl_table *, int , + void __user *, size_t *, loff_t *); #endif #endif -- cgit v1.2.3 From 47195d57636604ff6048b0d7aa3e4ed9643f6073 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Mon, 22 Feb 2010 18:09:03 -0500 Subject: nmi_watchdog: Clean up various small details Mostly copy/paste whitespace damage with a couple of nitpicks by the checkpatch script. Fix the struct definition as requested by Ingo too. Signed-off-by: Don Zickus Cc: peterz@infradead.org Cc: gorcunov@gmail.com Cc: aris@redhat.com LKML-Reference: <1266880143-24943-1-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar -- arch/x86/kernel/apic/hw_nmi.c | 14 +++++------ arch/x86/kernel/traps.c | 6 ++-- include/linux/nmi.h | 2 - kernel/nmi_watchdog.c | 51 ++++++++++++++++++++---------------------- 4 files changed, 36 insertions(+), 37 deletions(-) --- include/linux/nmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 794e7354c5bf..22cc7960b649 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -57,7 +57,7 @@ u64 hw_nmi_get_sample_period(void); extern int nmi_watchdog_enabled; struct ctl_table; extern int proc_nmi_enabled(struct ctl_table *, int , - void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); #endif #endif -- cgit v1.2.3 From 58687acba59266735adb8ccd9b5b9aa2c7cd205b Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 7 May 2010 17:11:44 -0400 Subject: lockup_detector: Combine nmi_watchdog and softlockup detector The new nmi_watchdog (which uses the perf event subsystem) is very similar in structure to the softlockup detector. Using Ingo's suggestion, I combined the two functionalities into one file: kernel/watchdog.c. Now both the nmi_watchdog (or hardlockup detector) and softlockup detector sit on top of the perf event subsystem, which is run every 60 seconds or so to see if there are any lockups. To detect hardlockups, cpus not responding to interrupts, I implemented an hrtimer that runs 5 times for every perf event overflow event. If that stops counting on a cpu, then the cpu is most likely in trouble. To detect softlockups, tasks not yielding to the scheduler, I used the previous kthread idea that now gets kicked every time the hrtimer fires. If the kthread isn't being scheduled neither is anyone else and the warning is printed to the console. I tested this on x86_64 and both the softlockup and hardlockup paths work. V2: - cleaned up the Kconfig and softlockup combination - surrounded hardlockup cases with #ifdef CONFIG_PERF_EVENTS_NMI - seperated out the softlockup case from perf event subsystem - re-arranged the enabling/disabling nmi watchdog from proc space - added cpumasks for hardlockup failure cases - removed fallback to soft events if no PMU exists for hard events V3: - comment cleanups - drop support for older softlockup code - per_cpu cleanups - completely remove software clock base hardlockup detector - use per_cpu masking on hard/soft lockup detection - #ifdef cleanups - rename config option NMI_WATCHDOG to LOCKUP_DETECTOR - documentation additions V4: - documentation fixes - convert per_cpu to __get_cpu_var - powerpc compile fixes V5: - split apart warn flags for hard and soft lockups TODO: - figure out how to make an arch-agnostic clock2cycles call (if possible) to feed into perf events as a sample period [fweisbec: merged conflict patch] Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Eric Paris Cc: Randy Dunlap LKML-Reference: <1273266711-18706-2-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker --- include/linux/nmi.h | 8 ++++---- include/linux/sched.h | 6 ++++++ 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 22cc7960b649..abd48aacaf79 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -20,7 +20,7 @@ extern void touch_nmi_watchdog(void); extern void acpi_nmi_disable(void); extern void acpi_nmi_enable(void); #else -#ifndef CONFIG_NMI_WATCHDOG +#ifndef CONFIG_LOCKUP_DETECTOR static inline void touch_nmi_watchdog(void) { touch_softlockup_watchdog(); @@ -51,12 +51,12 @@ static inline bool trigger_all_cpu_backtrace(void) } #endif -#ifdef CONFIG_NMI_WATCHDOG +#ifdef CONFIG_LOCKUP_DETECTOR int hw_nmi_is_cpu_stuck(struct pt_regs *); u64 hw_nmi_get_sample_period(void); -extern int nmi_watchdog_enabled; +extern int watchdog_enabled; struct ctl_table; -extern int proc_nmi_enabled(struct ctl_table *, int , +extern int proc_dowatchdog_enabled(struct ctl_table *, int , void __user *, size_t *, loff_t *); #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index dad7f668ebf7..37efe8fa5306 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -346,6 +346,12 @@ extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, size_t *lenp, loff_t *ppos); #endif +#ifdef CONFIG_LOCKUP_DETECTOR +extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos); +#endif + /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) -- cgit v1.2.3 From 332fbdbca3f7716c5620970755ae054d213bcc4e Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 7 May 2010 17:11:45 -0400 Subject: lockup_detector: Touch_softlockup cleanups and softlockup_tick removal Just some code cleanup to make touch_softlockup clearer and remove the softlockup_tick function as it is no longer needed. Also remove the /proc softlockup_thres call as it has been changed to watchdog_thres. Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Eric Paris Cc: Randy Dunlap LKML-Reference: <1273266711-18706-3-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker --- include/linux/sched.h | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 37efe8fa5306..33f9b2ad0bbb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -312,19 +312,15 @@ extern void scheduler_tick(void); extern void sched_show_task(struct task_struct *p); #ifdef CONFIG_DETECT_SOFTLOCKUP -extern void softlockup_tick(void); extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); -extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); +extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; extern int softlockup_thresh; #else -static inline void softlockup_tick(void) -{ -} static inline void touch_softlockup_watchdog(void) { } @@ -346,12 +342,6 @@ extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, size_t *lenp, loff_t *ppos); #endif -#ifdef CONFIG_LOCKUP_DETECTOR -extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); -#endif - /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) -- cgit v1.2.3 From 19cc36c0f0457e5c6629ec24036fbbe8255c88ec Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 13 May 2010 02:30:49 +0200 Subject: lockup_detector: Fix forgotten config conversion Fix forgotten CONFIG_DETECT_SOFTLOCKUP -> CONFIG_LOCKUP_DETECTOR in sched.h Fixes: arch/x86/built-in.o: In function `touch_nmi_watchdog': (.text+0x1bd59): undefined reference to `touch_softlockup_watchdog' kernel/built-in.o: In function `show_state_filter': (.text+0x10d01): undefined reference to `touch_all_softlockup_watchdogs' kernel/built-in.o: In function `sched_clock_idle_wakeup_event': (.text+0x362f9): undefined reference to `touch_softlockup_watchdog' kernel/built-in.o: In function `timekeeping_resume': timekeeping.c:(.text+0x38757): undefined reference to `touch_softlockup_watchdog' kernel/built-in.o: In function `tick_nohz_handler': tick-sched.c:(.text+0x3e5b9): undefined reference to `touch_softlockup_watchdog' kernel/built-in.o: In function `tick_sched_timer': tick-sched.c:(.text+0x3e671): undefined reference to `touch_softlockup_watchdog' kernel/built-in.o: In function `tick_check_idle': (.text+0x3e90b): undefined reference to `touch_softlockup_watchdog' Signed-off-by: Frederic Weisbecker Cc: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Eric Paris Cc: Randy Dunlap --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 33f9b2ad0bbb..3958e0cd24f7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -311,7 +311,7 @@ extern void scheduler_tick(void); extern void sched_show_task(struct task_struct *p); -#ifdef CONFIG_DETECT_SOFTLOCKUP +#ifdef CONFIG_LOCKUP_DETECTOR extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); -- cgit v1.2.3 From cafcd80d216bc2136b8edbb794327e495792c666 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 14 May 2010 11:11:21 -0400 Subject: lockup_detector: Cross arch compile fixes Combining the softlockup and hardlockup code causes watchdog.c to build even without the hardlockup detection support. So if an arch, that has the previous and the new nmi watchdog implementations cohabiting, wants to know if the generic one is in use, CONFIG_LOCKUP_DETECTOR is not a reliable check. We need to use CONFIG_HARDLOCKUP_DETECTOR instead. Fixes: kernel/built-in.o: In function `touch_nmi_watchdog': (.text+0x449bc): multiple definition of `touch_nmi_watchdog' arch/sparc/kernel/built-in.o:(.text+0x11b28): first defined here Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Don Zickus Cc: Cyrill Gorcunov LKML-Reference: <20100514151121.GR15159@redhat.com> [ use CONFIG_HARDLOCKUP_DETECTOR instead of CONFIG_PERF_EVENTS_NMI] Signed-off-by: Frederic Weisbecker --- include/linux/nmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index abd48aacaf79..06aab5eee134 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -20,7 +20,7 @@ extern void touch_nmi_watchdog(void); extern void acpi_nmi_disable(void); extern void acpi_nmi_enable(void); #else -#ifndef CONFIG_LOCKUP_DETECTOR +#ifndef CONFIG_HARDLOCKUP_DETECTOR static inline void touch_nmi_watchdog(void) { touch_softlockup_watchdog(); -- cgit v1.2.3 From b0f82b81fe6bbcf78d478071f33e44554726bc81 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 20 May 2010 07:47:21 +0200 Subject: perf: Drop the skip argument from perf_arch_fetch_regs_caller Drop this argument now that we always want to rewind only to the state of the first caller. It means frame pointers are not necessary anymore to reliably get the source of an event. But this also means we need this helper to be a macro now, as an inline function is not an option since we need to know when to provide a default implentation. Signed-off-by: Frederic Weisbecker Signed-off-by: Paul Mackerras Cc: David Miller Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 32 +++++++------------------------- 1 file changed, 7 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fb6c91eac7e3..bea785cef493 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -905,8 +905,10 @@ extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); -extern void -perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip); +#ifndef perf_arch_fetch_caller_regs +static inline void +perf_arch_fetch_caller_regs(struct regs *regs, unsigned long ip) { } +#endif /* * Take a snapshot of the regs. Skip ip and frame pointer to @@ -916,31 +918,11 @@ perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip); * - bp for callchains * - eflags, for future purposes, just in case */ -static inline void perf_fetch_caller_regs(struct pt_regs *regs, int skip) +static inline void perf_fetch_caller_regs(struct pt_regs *regs) { - unsigned long ip; - memset(regs, 0, sizeof(*regs)); - switch (skip) { - case 1 : - ip = CALLER_ADDR0; - break; - case 2 : - ip = CALLER_ADDR1; - break; - case 3 : - ip = CALLER_ADDR2; - break; - case 4: - ip = CALLER_ADDR3; - break; - /* No need to support further for now */ - default: - ip = 0; - } - - return perf_arch_fetch_caller_regs(regs, ip, skip); + perf_arch_fetch_caller_regs(regs, CALLER_ADDR0); } static inline void @@ -950,7 +932,7 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) struct pt_regs hot_regs; if (!regs) { - perf_fetch_caller_regs(&hot_regs, 1); + perf_fetch_caller_regs(&hot_regs); regs = &hot_regs; } __perf_sw_event(event_id, nr, nmi, regs, addr); -- cgit v1.2.3 From ecc55f84b2e9741f29daa787ded93986df6cbe17 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 21 May 2010 15:11:34 +0200 Subject: perf, trace: Inline perf_swevent_put_recursion_context() Inline perf_swevent_put_recursion_context into perf_tp_event(), this shrinks the per trace template code footprint and saves a function call. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 3 +-- include/linux/perf_event.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 3167f2df4126..0af31cd335d6 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -257,8 +257,7 @@ static inline void perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, u64 count, struct pt_regs *regs, void *head) { - perf_tp_event(addr, count, raw_data, size, regs, head); - perf_swevent_put_recursion_context(rctx); + perf_tp_event(addr, count, raw_data, size, regs, head, rctx); } #endif diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 5d0266d94985..c691a0b27bcd 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1001,7 +1001,7 @@ static inline bool perf_paranoid_kernel(void) extern void perf_event_init(void); extern void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, struct pt_regs *regs, - struct hlist_head *head); + struct hlist_head *head, int rctx); extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags -- cgit v1.2.3 From 3af9e859281bda7eb7c20b51879cf43aa788ac2e Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Tue, 18 May 2010 15:30:49 +0100 Subject: perf: Add non-exec mmap() tracking Add the capacility to track data mmap()s. This can be used together with PERF_SAMPLE_ADDR for data profiling. Signed-off-by: Anton Blanchard [Updated code for stable perf ABI] Signed-off-by: Eric B Munson Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Mike Galbraith Cc: Steven Rostedt LKML-Reference: <1274193049-25997-1-git-send-email-ebmunson@us.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c691a0b27bcd..36efad90cd43 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -214,8 +214,9 @@ struct perf_event_attr { * See also PERF_RECORD_MISC_EXACT_IP */ precise_ip : 2, /* skid constraint */ + mmap_data : 1, /* non-exec mmap data */ - __reserved_1 : 47; + __reserved_1 : 46; union { __u32 wakeup_events; /* wakeup every n events */ @@ -962,14 +963,7 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) } } -extern void __perf_event_mmap(struct vm_area_struct *vma); - -static inline void perf_event_mmap(struct vm_area_struct *vma) -{ - if (vma->vm_flags & VM_EXEC) - __perf_event_mmap(vma); -} - +extern void perf_event_mmap(struct vm_area_struct *vma); extern struct perf_guest_info_callbacks *perf_guest_cbs; extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); -- cgit v1.2.3 From 8d2cacbbb8deadfae78aa16e4e1ee619bdd7019e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 May 2010 17:49:05 +0200 Subject: perf: Cleanup {start,commit,cancel}_txn details Clarify some of the transactional group scheduling API details and change it so that a successfull ->commit_txn also closes the transaction. Signed-off-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Mike Galbraith Cc: Steven Rostedt LKML-Reference: <1274803086.5882.1752.camel@twins> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 36efad90cd43..f1b6ba0770e0 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -549,7 +549,10 @@ struct hw_perf_event { struct perf_event; -#define PERF_EVENT_TXN_STARTED 1 +/* + * Common implementation detail of pmu::{start,commit,cancel}_txn + */ +#define PERF_EVENT_TXN 0x1 /** * struct pmu - generic performance monitoring unit @@ -563,14 +566,28 @@ struct pmu { void (*unthrottle) (struct perf_event *event); /* - * group events scheduling is treated as a transaction, - * add group events as a whole and perform one schedulability test. - * If test fails, roll back the whole group + * Group events scheduling is treated as a transaction, add group + * events as a whole and perform one schedulability test. If the test + * fails, roll back the whole group */ + /* + * Start the transaction, after this ->enable() doesn't need + * to do schedulability tests. + */ void (*start_txn) (const struct pmu *pmu); - void (*cancel_txn) (const struct pmu *pmu); + /* + * If ->start_txn() disabled the ->enable() schedulability test + * then ->commit_txn() is required to perform one. On success + * the transaction is closed. On error the transaction is kept + * open until ->cancel_txn() is called. + */ int (*commit_txn) (const struct pmu *pmu); + /* + * Will cancel the transaction, assumes ->disable() is called for + * each successfull ->enable() during the transaction. + */ + void (*cancel_txn) (const struct pmu *pmu); }; /** -- cgit v1.2.3 From ca5135e6b4a3cbc7e187737520fbc4b508f6f7a2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 28 May 2010 19:33:23 +0200 Subject: perf: Rename perf_mmap_data to perf_buffer Rename to clarify code. s/perf_mmap_data/perf_buffer/g and selective s/data/buffer/g Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Mike Galbraith Cc: Steven Rostedt LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f1b6ba0770e0..2a0da021c23f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -602,7 +602,7 @@ enum perf_event_active_state { struct file; -struct perf_mmap_data { +struct perf_buffer { atomic_t refcount; struct rcu_head rcu_head; #ifdef CONFIG_PERF_USE_VMALLOC @@ -727,7 +727,7 @@ struct perf_event { atomic_t mmap_count; int mmap_locked; struct user_struct *mmap_user; - struct perf_mmap_data *data; + struct perf_buffer *buffer; /* poll related */ wait_queue_head_t waitq; @@ -825,7 +825,7 @@ struct perf_cpu_context { struct perf_output_handle { struct perf_event *event; - struct perf_mmap_data *data; + struct perf_buffer *buffer; unsigned long wakeup; unsigned long size; void *addr; -- cgit v1.2.3 From d57e34fdd60be7ffd0b1d86bfa1a553df86b7172 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 28 May 2010 19:41:35 +0200 Subject: perf: Simplify the ring-buffer logic: make perf_buffer_alloc() do everything needed Currently there are perf_buffer_alloc() + perf_buffer_init() + some separate bits, fold it all into a single perf_buffer_alloc() and only leave the attachment to the event separate. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2a0da021c23f..441992a9775c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -602,6 +602,8 @@ enum perf_event_active_state { struct file; +#define PERF_BUFFER_WRITABLE 0x01 + struct perf_buffer { atomic_t refcount; struct rcu_head rcu_head; -- cgit v1.2.3 From a6e6dea68c18f705957573ee5596097c7e82d0e5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 21 May 2010 14:27:58 +0200 Subject: perf: Add perf_event::child_count Only child counters adding back their values into the parent counter are responsible for cross-cpu updates to event->count. So if we pull that out into a new child_count variable, we get an event->count that is only modified locally. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Mike Galbraith Cc: Steven Rostedt LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 441992a9775c..f34dab9b275e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -671,6 +671,7 @@ struct perf_event { enum perf_event_active_state state; unsigned int attach_state; atomic64_t count; + atomic64_t child_count; /* * These are the total time in nanoseconds that the event -- cgit v1.2.3 From e78505958cf123048fb48cb56b79cebb8edd15fb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 21 May 2010 14:43:08 +0200 Subject: perf: Convert perf_event to local_t Since now all modification to event->count (and ->prev_count and ->period_left) are local to a cpu, change then to local64_t so we avoid the LOCK'ed ops. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f34dab9b275e..7342979f95f2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -487,6 +487,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #define PERF_MAX_STACK_DEPTH 255 @@ -536,10 +537,10 @@ struct hw_perf_event { struct arch_hw_breakpoint info; #endif }; - atomic64_t prev_count; + local64_t prev_count; u64 sample_period; u64 last_period; - atomic64_t period_left; + local64_t period_left; u64 interrupts; u64 freq_time_stamp; @@ -670,7 +671,7 @@ struct perf_event { enum perf_event_active_state state; unsigned int attach_state; - atomic64_t count; + local64_t count; atomic64_t child_count; /* -- cgit v1.2.3 From 7be7923633a142402266d642ccebf74f556a649b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 9 Jun 2010 11:57:23 +0200 Subject: perf: Fix build breakage for architecutes without atomic64_t The local64.h include dependency was not dependent on PERF_EVENT=y, which meant that arch's without atomic64_t support ended up including it and failed to build. Signed-off-by: Peter Zijlstra LKML-Reference: --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7342979f95f2..1218d05728b9 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -462,6 +462,7 @@ enum perf_callchain_context { #ifdef CONFIG_PERF_EVENTS # include +# include #endif struct perf_guest_info_callbacks { @@ -487,7 +488,6 @@ struct perf_guest_info_callbacks { #include #include #include -#include #define PERF_MAX_STACK_DEPTH 255 -- cgit v1.2.3 From 039ca4e74a1cf60bd7487324a564ecf5c981f254 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 26 May 2010 17:22:17 +0800 Subject: tracing: Remove kmemtrace ftrace plugin We have been resisting new ftrace plugins and removing existing ones, and kmemtrace has been superseded by kmem trace events and perf-kmem, so we remove it. Signed-off-by: Li Zefan Acked-by: Pekka Enberg Acked-by: Eduard - Gabriel Munteanu Cc: Ingo Molnar Cc: Steven Rostedt [ remove kmemtrace from the makefile, handle slob too ] Signed-off-by: Frederic Weisbecker --- include/linux/kmemtrace.h | 25 ------------------------- include/linux/slab_def.h | 3 ++- include/linux/slub_def.h | 3 ++- 3 files changed, 4 insertions(+), 27 deletions(-) delete mode 100644 include/linux/kmemtrace.h (limited to 'include/linux') diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h deleted file mode 100644 index b616d3930c3b..000000000000 --- a/include/linux/kmemtrace.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (C) 2008 Eduard - Gabriel Munteanu - * - * This file is released under GPL version 2. - */ - -#ifndef _LINUX_KMEMTRACE_H -#define _LINUX_KMEMTRACE_H - -#ifdef __KERNEL__ - -#include - -#ifdef CONFIG_KMEMTRACE -extern void kmemtrace_init(void); -#else -static inline void kmemtrace_init(void) -{ -} -#endif - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_KMEMTRACE_H */ - diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 1812dac8c496..1acfa73ce2ac 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -14,7 +14,8 @@ #include /* kmalloc_sizes.h needs PAGE_SIZE */ #include /* kmalloc_sizes.h needs L1_CACHE_BYTES */ #include -#include + +#include #ifndef ARCH_KMALLOC_MINALIGN /* diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 55695c8d2f8a..2345d3a033e6 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -10,9 +10,10 @@ #include #include #include -#include #include +#include + enum stat_item { ALLOC_FASTPATH, /* Allocation from cpu slab */ ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */ -- cgit v1.2.3 From 45a73372efe4a63f44aa2e1125d4a777c2fdc8d8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 23 Jun 2010 23:00:37 +0200 Subject: hw_breakpoints: Fix per task breakpoint tracking Freeing a perf event can happen in several ways. A task calls perf_event_exit_task() right before exiting. This helper will detach all the events from the task context and queue their removal through free_event() if they are child tasks. The task also loses its context reference there. Releasing the breakpoint slot from the constraint table is made from free_event() that calls release_bp_slot(). We count the number of breakpoints this task is running by looking at the task's perf_event_ctxp and iterating through its attached events. But at this time, the reference to this context has been cleaned up already. So looking at the event->ctx instead of task->perf_event_ctxp to count the remaining breakpoints should solve the problem. At least it would for child breakpoints, but not for parent ones. If the parent exits before the child, it will remove all its events from the context but free_event() will be called later, on fd release time. And checking the number of breakpoints the task has attached to its context at this time is unreliable as all events have been removed from the context. To solve this, we keep track of the list of per task breakpoints. On top of it, we maintain our array of numbers of breakpoints used by the tasks. We use the context address as a task id. So, instead of looking at the number of events attached to a context, we walk through our list of per task breakpoints and count the number of breakpoints that use the same ctx than the one to be reserved or released from the constraint table, and update the count on top of this result. In the meantime it solves a bad refcounting, it also solves a warning, reported by Paul. Badness at /home/paulus/kernel/perf/kernel/hw_breakpoint.c:114 NIP: c0000000000cb470 LR: c0000000000cb46c CTR: c00000000032d9b8 REGS: c000000118e7b570 TRAP: 0700 Not tainted (2.6.35-rc3-perf-00008-g76b0f13 ) MSR: 9000000000029032 CR: 44004424 XER: 000fffff TASK = c0000001187dcad0[3143] 'perf' THREAD: c000000118e78000 CPU: 1 GPR00: c0000000000cb46c c000000118e7b7f0 c0000000009866a0 0000000000000020 GPR04: 0000000000000000 000000000000001d 0000000000000000 0000000000000001 GPR08: c0000000009bed68 c00000000086dff8 c000000000a5bf10 0000000000000001 GPR12: 0000000024004422 c00000000ffff200 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000018 00000000101150f4 GPR20: 0000000010206b40 0000000000000000 0000000000000000 00000000101150f4 GPR24: c0000001199090c0 0000000000000001 0000000000000000 0000000000000001 GPR28: 0000000000000000 0000000000000000 c0000000008ec290 0000000000000000 NIP [c0000000000cb470] .task_bp_pinned+0x5c/0x12c LR [c0000000000cb46c] .task_bp_pinned+0x58/0x12c Call Trace: [c000000118e7b7f0] [c0000000000cb46c] .task_bp_pinned+0x58/0x12c (unreliable) [c000000118e7b8a0] [c0000000000cb584] .toggle_bp_task_slot+0x44/0xe4 [c000000118e7b940] [c0000000000cb6c8] .toggle_bp_slot+0xa4/0x164 [c000000118e7b9f0] [c0000000000cbafc] .release_bp_slot+0x44/0x6c [c000000118e7ba80] [c0000000000c4178] .bp_perf_event_destroy+0x10/0x24 [c000000118e7bb00] [c0000000000c4aec] .free_event+0x180/0x1bc [c000000118e7bbc0] [c0000000000c54c4] .perf_event_release_kernel+0x14c/0x170 Reported-by: Paul Mackerras Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Prasad Cc: Mahesh Salgaonkar Cc: Will Deacon Cc: Jason Wessel --- include/linux/perf_event.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 63b5aa5dce69..0dd5f8ad77ac 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -533,8 +533,10 @@ struct hw_perf_event { struct hrtimer hrtimer; }; #ifdef CONFIG_HAVE_HW_BREAKPOINT - /* breakpoint */ - struct arch_hw_breakpoint info; + struct { /* breakpoint */ + struct arch_hw_breakpoint info; + struct list_head bp_list; + }; #endif }; local64_t prev_count; -- cgit v1.2.3 From 5cfaf214856eb934759ae500a0b812dd06a00bd9 Mon Sep 17 00:00:00 2001 From: Nobuhiro Iwamatsu Date: Wed, 23 Jun 2010 09:17:53 +0900 Subject: perf: Fix argument of perf_arch_fetch_caller_regs "struct regs" was set to argument of perf_arch_fetch_caller_regs off-case. It should be "struct pt_regs". This fixes various build errors in archs that have CONFIG_PERF_EVENTS=y but no overriden implementation of perf_arch_fetch_caller_regs. cc1: warnings being treated as errors In file included from include/linux/ftrace_event.h:8, from include/trace/syscall.h:6, from include/linux/syscalls.h:75, from arch/sh/kernel/sys_sh32.c:9: include/linux/perf_event.h:937: error: 'struct regs' declared inside parameter list include/linux/perf_event.h:937: error: its scope is only this definition or declaration, which is probably not what you want include/linux/perf_event.h: In function 'perf_fetch_caller_regs': include/linux/perf_event.h:952: error: passing argument 1 of 'perf_arch_fetch_caller_regs' from incompatible pointer type Signed-off-by: Nobuhiro Iwamatsu Reported-by: Stephen Rothwell Cc: Paul Mackerras Cc: David Miller Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Frederic Weisbecker --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0dd5f8ad77ac..937495c25073 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -936,7 +936,7 @@ extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); #ifndef perf_arch_fetch_caller_regs static inline void -perf_arch_fetch_caller_regs(struct regs *regs, unsigned long ip) { } +perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { } #endif /* -- cgit v1.2.3 From c9642c49aae1272d7c24008a40ae614470b957a6 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 24 May 2010 16:22:30 +0800 Subject: tracing: Use a global field list for all syscall exit events All syscall exit events have the same fields. The kernel size drops 2.5K: text data bss dec hex filename 7018612 2034376 7251132 16304120 f8c7f8 vmlinux.o.orig 7018612 2031888 7251132 16301632 f8be40 vmlinux.o Signed-off-by: Li Zefan LKML-Reference: <4BFA3746.8070100@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/linux/syscalls.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 7f614ce274a9..7994bd44eb56 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -165,7 +165,6 @@ extern struct trace_event_functions exit_syscall_print_funcs; .enter_event = &event_enter_##sname, \ .exit_event = &event_exit_##sname, \ .enter_fields = LIST_HEAD_INIT(__syscall_meta_##sname.enter_fields), \ - .exit_fields = LIST_HEAD_INIT(__syscall_meta_##sname.exit_fields), \ }; #define SYSCALL_DEFINE0(sname) \ @@ -180,7 +179,6 @@ extern struct trace_event_functions exit_syscall_print_funcs; .enter_event = &event_enter__##sname, \ .exit_event = &event_exit__##sname, \ .enter_fields = LIST_HEAD_INIT(__syscall_meta__##sname.enter_fields), \ - .exit_fields = LIST_HEAD_INIT(__syscall_meta__##sname.exit_fields), \ }; \ asmlinkage long sys_##sname(void) #else -- cgit v1.2.3 From a1d0ce8213e9ddf4046ef5ba95c55762d075f541 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 8 Jun 2010 11:22:06 -0400 Subject: tracing: Use class->reg() for all registering of events Because kprobes and syscalls need special processing to register events, the class->reg() method was created to handle the differences. But instead of creating a default ->reg for perf and ftrace events, the code was scattered with: if (class->reg) class->reg(); else default_reg(); This is messy and can also lead to bugs. This patch cleans up this code and creates a default reg() entry for the events allowing for the code to directly call the class->reg() without the condition. Reported-by: Peter Zijlstra Acked-by: Peter Zijlstra Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 0af31cd335d6..01df7ca4ead7 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -146,6 +146,9 @@ struct ftrace_event_class { int (*raw_init)(struct ftrace_event_call *); }; +extern int ftrace_event_reg(struct ftrace_event_call *event, + enum trace_reg type); + enum { TRACE_EVENT_FL_ENABLED_BIT, TRACE_EVENT_FL_FILTERED_BIT, -- cgit v1.2.3 From eb7beb5c09af75494234ea6acd09d0a647cf7338 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 16 Jul 2010 00:50:03 +0200 Subject: tracing: Remove special traces Special traces type was only used by sysprof. Lets remove it now that sysprof ftrace plugin has been dropped. Signed-off-by: Frederic Weisbecker Acked-by: Soeren Sandmann Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Steven Rostedt Cc: Li Zefan --- include/linux/kernel.h | 5 ----- include/linux/sched.h | 12 ------------ 2 files changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 8317ec4b9f3b..adee958b5989 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -508,9 +508,6 @@ extern void tracing_start(void); extern void tracing_stop(void); extern void ftrace_off_permanent(void); -extern void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); - static inline void __attribute__ ((format (printf, 1, 2))) ____trace_printk_check_format(const char *fmt, ...) { @@ -586,8 +583,6 @@ __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode); #else -static inline void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } static inline int trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); diff --git a/include/linux/sched.h b/include/linux/sched.h index 747fcaedddb7..f751ea9dcb7b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2434,18 +2434,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) #endif /* CONFIG_SMP */ -#ifdef CONFIG_TRACING -extern void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3); -#else -static inline void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ -} -#endif - extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); -- cgit v1.2.3 From e870e9a1240bcef1157ffaaf71dac63362e71904 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 2 Jul 2010 11:07:32 +0800 Subject: tracing: Allow to disable cmdline recording We found that even enabling a single trace event that will rarely be triggered can add big overhead to context switch. (lmbench context switch test) ------------------------------------------------- 2p/0K 2p/16K 2p/64K 8p/16K 8p/64K 16p/16K 16p/64K ctxsw ctxsw ctxsw ctxsw ctxsw ctxsw ctxsw ------ ------ ------ ------ ------ ------- ------- 2.19 2.3 2.21 2.56 2.13 2.54 2.07 2.39 2.51 2.35 2.75 2.27 2.81 2.24 The overhead is 6% ~ 11%. It's because when a trace event is enabled 3 tracepoints (sched_switch, sched_wakeup, sched_wakeup_new) will be activated to map pid to cmdname. We'd like to avoid this overhead, so add a trace option '(no)record-cmd' to allow to disable cmdline recording. Signed-off-by: Li Zefan LKML-Reference: <4C2D57F4.2050204@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 01df7ca4ead7..2b7b1395b4d5 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -152,11 +152,13 @@ extern int ftrace_event_reg(struct ftrace_event_call *event, enum { TRACE_EVENT_FL_ENABLED_BIT, TRACE_EVENT_FL_FILTERED_BIT, + TRACE_EVENT_FL_RECORDED_CMD_BIT, }; enum { - TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT), - TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), + TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT), + TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), + TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT), }; struct ftrace_event_call { @@ -174,6 +176,7 @@ struct ftrace_event_call { * 32 bit flags: * bit 1: enabled * bit 2: filter_active + * bit 3: enabled cmd record * * Changes to flags must hold the event_mutex. * -- cgit v1.2.3 From bc289ae98b75d93228d24f521ef02a076e506e94 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 3 Jun 2010 18:26:24 +0800 Subject: tracing: Reduce latency and remove percpu trace_seq __print_flags() and __print_symbolic() use percpu trace_seq: 1) Its memory is allocated at compile time, it wastes memory if we don't use tracing. 2) It is percpu data and it wastes more memory for multi-cpus system. 3) It disables preemption when it executes its core routine "trace_seq_printf(s, "%s: ", #call);" and introduces latency. So we move this trace_seq to struct trace_iterator. Signed-off-by: Lai Jiangshan LKML-Reference: <4C078350.7090106@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 2b7b1395b4d5..02b8b24f8f51 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -11,8 +11,6 @@ struct trace_array; struct tracer; struct dentry; -DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq); - struct trace_print_flags { unsigned long mask; const char *name; @@ -58,6 +56,9 @@ struct trace_iterator { struct ring_buffer_iter *buffer_iter[NR_CPUS]; unsigned long iter_flags; + /* trace_seq for __print_flags() and __print_symbolic() etc. */ + struct trace_seq tmp_seq; + /* The below is zeroed out in pipe_read */ struct trace_seq seq; struct trace_entry *ent; -- cgit v1.2.3 From 9849ed4d72251d273524efb8b70be0be9aecb1df Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 20 Jul 2010 03:13:35 -0400 Subject: tracing/documentation: Document dynamic ftracer internals Add more details to the dynamic function tracing design implementation. Signed-off-by: Mike Frysinger LKML-Reference: <1279610015-10250-1-git-send-email-vapier@gentoo.org> Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 41e46330d9be..dcd6a7c3a435 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1,3 +1,8 @@ +/* + * Ftrace header. For implementation details beyond the random comments + * scattered below, see: Documentation/trace/ftrace-design.txt + */ + #ifndef _LINUX_FTRACE_H #define _LINUX_FTRACE_H -- cgit v1.2.3