From 2f5f6ad9390c1ebbf738d130dbfe80b60eaa167e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 8 Aug 2011 16:57:47 -0400 Subject: ftrace: Pass ftrace_ops as third parameter to function trace callback Currently the function trace callback receives only the ip and parent_ip of the function that it traced. It would be more powerful to also return the ops that registered the function as well. This allows the same function to act differently depending on what ftrace_ops registered it. Link: http://lkml.kernel.org/r/20120612225424.267254552@goodmis.org Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 55e6d63d46d0..2d5964119885 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -18,6 +18,15 @@ #include +/* + * If the arch supports passing the variable contents of + * function_trace_op as the third parameter back from the + * mcount call, then the arch should define this as 1. + */ +#ifndef ARCH_SUPPORTS_FTRACE_OPS +#define ARCH_SUPPORTS_FTRACE_OPS 0 +#endif + struct module; struct ftrace_hash; @@ -29,7 +38,10 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); +struct ftrace_ops; + +typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op); /* * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are @@ -163,7 +175,7 @@ static inline int ftrace_function_local_disabled(struct ftrace_ops *ops) return *this_cpu_ptr(ops->disabled); } -extern void ftrace_stub(unsigned long a0, unsigned long a1); +extern void ftrace_stub(unsigned long a0, unsigned long a1, struct ftrace_ops *op); #else /* !CONFIG_FUNCTION_TRACER */ /* -- cgit v1.2.3 From ccf3672d530170c98c734dfc5db07d64bcbad2ad Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 5 Jun 2012 09:44:25 -0400 Subject: ftrace: Consolidate arch dependent functions with 'list' function As the function tracer starts to get more features, the support for theses features will spread out throughout the different architectures over time. These features boil down to what each arch does in the mcount trampoline (the ftrace_caller). Currently there's two features that are not the same throughout the archs. 1) Support to stop function tracing before the callback 2) passing of the ftrace ops Both of these require placing an indirect function to support the features if the mcount trampoline does not. On a side note, for all architectures, when more than one callback is registered to the function tracer, an intermediate 'list' function is called by the mcount trampoline to iterate through the callbacks that are registered. Instead of making a separate function for each of these features, and requiring several indirect calls, just use the single 'list' function as the intermediate, to handle all cases. If an arch does not support the 'stop function tracing' or the passing of ftrace ops, just force it to use the list function that will handle the features required. This makes the code cleaner and simpler and removes a lot of #ifdefs in the code. Link: http://lkml.kernel.org/r/20120612225424.495625483@goodmis.org Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2d5964119885..3651fdc3bec9 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -27,6 +27,19 @@ #define ARCH_SUPPORTS_FTRACE_OPS 0 #endif +/* + * If the arch's mcount caller does not support all of ftrace's + * features, then it must call an indirect function that + * does. Or at least does enough to prevent any unwelcomed side effects. + */ +#if !defined(CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST) || \ + !ARCH_SUPPORTS_FTRACE_OPS +# define FTRACE_FORCE_LIST_FUNC 1 +#else +# define FTRACE_FORCE_LIST_FUNC 0 +#endif + + struct module; struct ftrace_hash; -- cgit v1.2.3 From a1e2e31d175a1349274eba3465d17616c6725f8c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 9 Aug 2011 12:50:46 -0400 Subject: ftrace: Return pt_regs to function trace callback Return as the 4th paramater to the function tracer callback the pt_regs. Later patches that implement regs passing for the architectures will require having the ftrace_ops set the SAVE_REGS flag, which will tell the arch to take the time to pass a full set of pt_regs to the ftrace_ops callback function. If the arch does not support it then it should pass NULL. If an arch can pass full regs, then it should define: ARCH_SUPPORTS_FTRACE_SAVE_REGS to 1 Link: http://lkml.kernel.org/r/20120702201821.019966811@goodmis.org Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 3651fdc3bec9..e4202881fb00 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -54,7 +55,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, struct ftrace_ops; typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op); + struct ftrace_ops *op, struct pt_regs *regs); /* * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are @@ -188,7 +189,8 @@ static inline int ftrace_function_local_disabled(struct ftrace_ops *ops) return *this_cpu_ptr(ops->disabled); } -extern void ftrace_stub(unsigned long a0, unsigned long a1, struct ftrace_ops *op); +extern void ftrace_stub(unsigned long a0, unsigned long a1, + struct ftrace_ops *op, struct pt_regs *regs); #else /* !CONFIG_FUNCTION_TRACER */ /* -- cgit v1.2.3 From 08f6fba503111e0336f2b4d6915a4a18f9b60e51 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 30 Apr 2012 16:20:23 -0400 Subject: ftrace/x86: Add separate function to save regs Add a way to have different functions calling different trampolines. If a ftrace_ops wants regs saved on the return, then have only the functions with ops registered to save regs. Functions registered by other ops would not be affected, unless the functions overlap. If one ftrace_ops registered functions A, B and C and another ops registered fucntions to save regs on A, and D, then only functions A and D would be saving regs. Function B and C would work as normal. Although A is registered by both ops: normal and saves regs; this is fine as saving the regs is needed to satisfy one of the ops that calls it but the regs are ignored by the other ops function. x86_64 implements the full regs saving, and i386 just passes a NULL for regs to satisfy the ftrace_ops passing. Where an arch must supply both regs and ftrace_ops parameters, even if regs is just NULL. It is OK for an arch to pass NULL regs. All function trace users that require regs passing must add the flag FTRACE_OPS_FL_SAVE_REGS when registering the ftrace_ops. If the arch does not support saving regs then the ftrace_ops will fail to register. The flag FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED may be set that will prevent the ftrace_ops from failing to register. In this case, the handler may either check if regs is not NULL or check if ARCH_SUPPORTS_FTRACE_SAVE_REGS. If the arch supports passing regs it will set this macro and pass regs for ops that request them. All other archs will just pass NULL. Link: Link: http://lkml.kernel.org/r/20120711195745.107705970@goodmis.org Cc: Alexander van Heukelum Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 107 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 100 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e4202881fb00..ab39990cc43f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -71,12 +71,28 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, * could be controled by following calls: * ftrace_function_local_enable * ftrace_function_local_disable + * SAVE_REGS - The ftrace_ops wants regs saved at each function called + * and passed to the callback. If this flag is set, but the + * architecture does not support passing regs + * (ARCH_SUPPORTS_FTRACE_SAVE_REGS is not defined), then the + * ftrace_ops will fail to register, unless the next flag + * is set. + * SAVE_REGS_IF_SUPPORTED - This is the same as SAVE_REGS, but if the + * handler can handle an arch that does not save regs + * (the handler tests if regs == NULL), then it can set + * this flag instead. It will not fail registering the ftrace_ops + * but, the regs field will be NULL if the arch does not support + * passing regs to the handler. + * Note, if this flag is set, the SAVE_REGS flag will automatically + * get set upon registering the ftrace_ops, if the arch supports it. */ enum { - FTRACE_OPS_FL_ENABLED = 1 << 0, - FTRACE_OPS_FL_GLOBAL = 1 << 1, - FTRACE_OPS_FL_DYNAMIC = 1 << 2, - FTRACE_OPS_FL_CONTROL = 1 << 3, + FTRACE_OPS_FL_ENABLED = 1 << 0, + FTRACE_OPS_FL_GLOBAL = 1 << 1, + FTRACE_OPS_FL_DYNAMIC = 1 << 2, + FTRACE_OPS_FL_CONTROL = 1 << 3, + FTRACE_OPS_FL_SAVE_REGS = 1 << 4, + FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 5, }; struct ftrace_ops { @@ -254,12 +270,31 @@ extern void unregister_ftrace_function_probe_all(char *glob); extern int ftrace_text_reserved(void *start, void *end); +/* + * The dyn_ftrace record's flags field is split into two parts. + * the first part which is '0-FTRACE_REF_MAX' is a counter of + * the number of callbacks that have registered the function that + * the dyn_ftrace descriptor represents. + * + * The second part is a mask: + * ENABLED - the function is being traced + * REGS - the record wants the function to save regs + * REGS_EN - the function is set up to save regs. + * + * When a new ftrace_ops is registered and wants a function to save + * pt_regs, the rec->flag REGS is set. When the function has been + * set up to save regs, the REG_EN flag is set. Once a function + * starts saving regs it will do so until all ftrace_ops are removed + * from tracing that function. + */ enum { - FTRACE_FL_ENABLED = (1 << 30), + FTRACE_FL_ENABLED = (1UL << 29), + FTRACE_FL_REGS = (1UL << 30), + FTRACE_FL_REGS_EN = (1UL << 31) }; -#define FTRACE_FL_MASK (0x3UL << 30) -#define FTRACE_REF_MAX ((1 << 30) - 1) +#define FTRACE_FL_MASK (0x7UL << 29) +#define FTRACE_REF_MAX ((1UL << 29) - 1) struct dyn_ftrace { union { @@ -290,9 +325,23 @@ enum { FTRACE_STOP_FUNC_RET = (1 << 4), }; +/* + * The FTRACE_UPDATE_* enum is used to pass information back + * from the ftrace_update_record() and ftrace_test_record() + * functions. These are called by the code update routines + * to find out what is to be done for a given function. + * + * IGNORE - The function is already what we want it to be + * MAKE_CALL - Start tracing the function + * MODIFY_CALL - Stop saving regs for the function + * MODIFY_CALL_REGS - Start saving regs for the function + * MAKE_NOP - Stop tracing the function + */ enum { FTRACE_UPDATE_IGNORE, FTRACE_UPDATE_MAKE_CALL, + FTRACE_UPDATE_MODIFY_CALL, + FTRACE_UPDATE_MODIFY_CALL_REGS, FTRACE_UPDATE_MAKE_NOP, }; @@ -344,7 +393,9 @@ extern int ftrace_dyn_arch_init(void *data); extern void ftrace_replace_code(int enable); extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); +extern void ftrace_regs_caller(void); extern void ftrace_call(void); +extern void ftrace_regs_call(void); extern void mcount_call(void); void ftrace_modify_all_code(int command); @@ -352,6 +403,15 @@ void ftrace_modify_all_code(int command); #ifndef FTRACE_ADDR #define FTRACE_ADDR ((unsigned long)ftrace_caller) #endif + +#ifndef FTRACE_REGS_ADDR +#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS +# define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller) +#else +# define FTRACE_REGS_ADDR FTRACE_ADDR +#endif +#endif + #ifdef CONFIG_FUNCTION_GRAPH_TRACER extern void ftrace_graph_caller(void); extern int ftrace_enable_ftrace_graph_caller(void); @@ -407,6 +467,39 @@ extern int ftrace_make_nop(struct module *mod, */ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); +#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS +/** + * ftrace_modify_call - convert from one addr to another (no nop) + * @rec: the mcount call site record + * @old_addr: the address expected to be currently called to + * @addr: the address to change to + * + * This is a very sensitive operation and great care needs + * to be taken by the arch. The operation should carefully + * read the location, check to see if what is read is indeed + * what we expect it to be, and then on success of the compare, + * it should write to the location. + * + * The code segment at @rec->ip should be a caller to @old_addr + * + * Return must be: + * 0 on success + * -EFAULT on error reading the location + * -EINVAL on a failed compare of the contents + * -EPERM on error writing to the location + * Any other value will be considered a failure. + */ +extern int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr); +#else +/* Should never be called */ +static inline int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + return -EINVAL; +} +#endif + /* May be defined in arch */ extern int ftrace_arch_read_dyn_info(char *buf, int size); -- cgit v1.2.3 From 4740974a6844156c14d741b0080b59d275679a23 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 20 Jul 2012 11:04:44 -0400 Subject: ftrace: Add default recursion protection for function tracing As more users of the function tracer utility are being added, they do not always add the necessary recursion protection. To protect from function recursion due to tracing, if the callback ftrace_ops does not specifically specify that it protects against recursion (by setting the FTRACE_OPS_FL_RECURSION_SAFE flag), the list operation will be called by the mcount trampoline which adds recursion protection. If the flag is set, then the function will be called directly with no extra protection. Note, the list operation is called if more than one function callback is registered, or if the arch does not support all of the function tracer features. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ab39990cc43f..65a14e47a0db 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -85,6 +85,10 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, * passing regs to the handler. * Note, if this flag is set, the SAVE_REGS flag will automatically * get set upon registering the ftrace_ops, if the arch supports it. + * RECURSION_SAFE - The ftrace_ops can set this to tell the ftrace infrastructure + * that the call back has its own recursion protection. If it does + * not set this, then the ftrace infrastructure will add recursion + * protection for the caller. */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, @@ -93,6 +97,7 @@ enum { FTRACE_OPS_FL_CONTROL = 1 << 3, FTRACE_OPS_FL_SAVE_REGS = 1 << 4, FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 5, + FTRACE_OPS_FL_RECURSION_SAFE = 1 << 6, }; struct ftrace_ops { -- cgit v1.2.3 From ea701f11da44b44907af226fe5a5f57d2f26eeb2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 20 Jul 2012 13:08:05 -0400 Subject: ftrace: Add selftest to test function trace recursion protection Add selftests to test the function tracing recursion protection actually does work. It also tests if a ftrace_ops states it will perform its own protection. Although, even if the ftrace_ops states it will protect itself, the ftrace infrastructure may still provide protection if the arch does not support all features or another ftrace_ops is registered. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 65a14e47a0db..9962e954a633 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -220,6 +220,10 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1, */ #define register_ftrace_function(ops) ({ 0; }) #define unregister_ftrace_function(ops) ({ 0; }) +static inline int ftrace_nr_registered_ops(void) +{ + return 0; +} static inline void clear_ftrace_function(void) { } static inline void ftrace_kill(void) { } static inline void ftrace_stop(void) { } @@ -275,6 +279,8 @@ extern void unregister_ftrace_function_probe_all(char *glob); extern int ftrace_text_reserved(void *start, void *end); +extern int ftrace_nr_registered_ops(void); + /* * The dyn_ftrace record's flags field is split into two parts. * the first part which is '0-FTRACE_REF_MAX' is a counter of -- cgit v1.2.3 From 647664eaf4033501739ac1f42dd52ce8c9266ccc Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Jun 2012 19:28:08 +0900 Subject: ftrace: add ftrace_set_filter_ip() for address based filter Add a new filter update interface ftrace_set_filter_ip() to set ftrace filter by ip address, not only glob pattern. Link: http://lkml.kernel.org/r/20120605102808.27845.67952.stgit@localhost.localdomain Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Ananth N Mavinakayanahalli Cc: "Frank Ch. Eigler" Cc: Andrew Morton Cc: Frederic Weisbecker Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9962e954a633..3e711122f66c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -317,6 +317,8 @@ struct dyn_ftrace { }; int ftrace_force_update(void); +int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, + int remove, int reset); int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, int len, int reset); int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, @@ -544,6 +546,7 @@ static inline int ftrace_text_reserved(void *start, void *end) */ #define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; }) #define ftrace_set_early_filter(ops, buf, enable) do { } while (0) +#define ftrace_set_filter_ip(ops, ip, remove, reset) ({ -ENODEV; }) #define ftrace_set_filter(ops, buf, len, reset) ({ -ENODEV; }) #define ftrace_set_notrace(ops, buf, len, reset) ({ -ENODEV; }) #define ftrace_free_filter(ops) do { } while (0) -- cgit v1.2.3 From 4dc936769e8a6382a4cc12375e8a4daa2b829fda Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 6 Jun 2012 13:45:31 -0400 Subject: ftrace: Make ftrace_location() a nop on !DYNAMIC_FTRACE When CONFIG_DYNAMIC_FTRACE is not set, ftrace_location() is not defined. If a user (like kprobes) references this function, it will break the compile when CONFIG_DYNAMIC_FTRACE is not set. Add ftrace_location() as a nop (return 0) when DYNAMIC_FTRACE is not defined. Link: http://lkml.kernel.org/r/20120612225426.961092717@goodmis.org Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 3e711122f66c..a52f2f4fe030 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -520,7 +520,7 @@ extern int skip_trace(unsigned long ip); extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); -#else +#else /* CONFIG_DYNAMIC_FTRACE */ static inline int skip_trace(unsigned long ip) { return 0; } static inline int ftrace_force_update(void) { return 0; } static inline void ftrace_disable_daemon(void) { } @@ -538,6 +538,10 @@ static inline int ftrace_text_reserved(void *start, void *end) { return 0; } +static inline unsigned long ftrace_location(unsigned long ip) +{ + return 0; +} /* * Again users of functions that have ftrace_ops may not -- cgit v1.2.3 From ae6aa16fdc163afe6b04b6c073ad4ddd4663c03b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Jun 2012 19:28:32 +0900 Subject: kprobes: introduce ftrace based optimization Introduce function trace based kprobes optimization. With using ftrace optimization, kprobes on the mcount calling address, use ftrace's mcount call instead of breakpoint. Furthermore, this optimization works with preemptive kernel not like as current jump-based optimization. Of cource, this feature works only if the probe is on mcount call. Only if kprobe.break_handler is set, that probe is not optimized with ftrace (nor put on ftrace). The reason why this limitation comes is that this break_handler may be used only from jprobes which changes ip address (for fetching the function arguments), but function tracer ignores modified ip address. Changes in v2: - Fix ftrace_ops registering right after setting its filter. - Unregister ftrace_ops if there is no kprobe using. - Remove notrace dependency from __kprobes macro. Link: http://lkml.kernel.org/r/20120605102832.27845.63461.stgit@localhost.localdomain Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Ananth N Mavinakayanahalli Cc: "Frank Ch. Eigler" Cc: Andrew Morton Cc: Frederic Weisbecker Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/kprobes.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index b6e1f8c00577..aa0d05e852e3 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -38,6 +38,7 @@ #include #include #include +#include #ifdef CONFIG_KPROBES #include @@ -48,14 +49,26 @@ #define KPROBE_REENTER 0x00000004 #define KPROBE_HIT_SSDONE 0x00000008 +/* + * If function tracer is enabled and the arch supports full + * passing of pt_regs to function tracing, then kprobes can + * optimize on top of function tracing. + */ +#if defined(CONFIG_FUNCTION_TRACER) && defined(ARCH_SUPPORTS_FTRACE_SAVE_REGS) \ + && defined(ARCH_SUPPORTS_KPROBES_ON_FTRACE) +# define KPROBES_CAN_USE_FTRACE +#endif + /* Attach to insert probes on any functions which should be ignored*/ #define __kprobes __attribute__((__section__(".kprobes.text"))) + #else /* CONFIG_KPROBES */ typedef int kprobe_opcode_t; struct arch_specific_insn { int dummy; }; #define __kprobes + #endif /* CONFIG_KPROBES */ struct kprobe; @@ -128,6 +141,7 @@ struct kprobe { * NOTE: * this flag is only for optimized_kprobe. */ +#define KPROBE_FLAG_FTRACE 8 /* probe is using ftrace */ /* Has this kprobe gone ? */ static inline int kprobe_gone(struct kprobe *p) @@ -146,6 +160,13 @@ static inline int kprobe_optimized(struct kprobe *p) { return p->flags & KPROBE_FLAG_OPTIMIZED; } + +/* Is this kprobe uses ftrace ? */ +static inline int kprobe_ftrace(struct kprobe *p) +{ + return p->flags & KPROBE_FLAG_FTRACE; +} + /* * Special probe type that uses setjmp-longjmp type tricks to resume * execution at a specified entry with a matching prototype corresponding @@ -295,6 +316,12 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table, #endif #endif /* CONFIG_OPTPROBES */ +#ifdef KPROBES_CAN_USE_FTRACE +extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct pt_regs *regs); +extern int arch_prepare_kprobe_ftrace(struct kprobe *p); +#endif + /* Get the kprobe at this addr (if any) - called with preemption disabled */ struct kprobe *get_kprobe(void *addr); -- cgit v1.2.3 From e52538965119319447c0800c534da73142c27be2 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Jun 2012 19:28:38 +0900 Subject: kprobes/x86: ftrace based optimization for x86 Add function tracer based kprobe optimization support handlers on x86. This allows kprobes to use function tracer for probing on mcount call. Link: http://lkml.kernel.org/r/20120605102838.27845.26317.stgit@localhost.localdomain Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Ananth N Mavinakayanahalli Cc: "Frank Ch. Eigler" Cc: Andrew Morton Cc: Frederic Weisbecker Signed-off-by: Masami Hiramatsu [ Updated to new port of ftrace save regs functions ] Signed-off-by: Steven Rostedt --- include/linux/kprobes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index aa0d05e852e3..23755ba42abc 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -318,7 +318,7 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table, #endif /* CONFIG_OPTPROBES */ #ifdef KPROBES_CAN_USE_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct pt_regs *regs); + struct ftrace_ops *ops, struct pt_regs *regs); extern int arch_prepare_kprobe_ftrace(struct kprobe *p); #endif -- cgit v1.2.3 From c5e63197db519bae1c33e41ea0342a50f39e7a93 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Aug 2012 15:20:36 +0200 Subject: perf: Unified API to record selective sets of arch registers This brings a new API to help the selective dump of registers on event sampling, and its implementation for x86 arch. Added HAVE_PERF_REGS config option to determine if the architecture provides perf registers ABI. The information about desired registers will be passed in u64 mask. It's up to the architecture to map the registers into the mask bits. For the x86 arch implementation, both 32 and 64 bit registers bits are defined within single enum to ensure 64 bit system can provide register dump for compat task if needed in the future. Original-patch-by: Frederic Weisbecker [ Added missing linux/errno.h include ] Signed-off-by: Jiri Olsa Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-2-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_regs.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/linux/perf_regs.h (limited to 'include/linux') diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h new file mode 100644 index 000000000000..a2f1a98f7839 --- /dev/null +++ b/include/linux/perf_regs.h @@ -0,0 +1,19 @@ +#ifndef _LINUX_PERF_REGS_H +#define _LINUX_PERF_REGS_H + +#ifdef CONFIG_HAVE_PERF_REGS +#include +u64 perf_reg_value(struct pt_regs *regs, int idx); +int perf_reg_validate(u64 mask); +#else +static inline u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + return 0; +} + +static inline int perf_reg_validate(u64 mask) +{ + return mask ? -ENOSYS : 0; +} +#endif /* CONFIG_HAVE_PERF_REGS */ +#endif /* _LINUX_PERF_REGS_H */ -- cgit v1.2.3 From 4018994f3d8785275ef0e7391b75c3462c029e56 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Aug 2012 15:20:37 +0200 Subject: perf: Add ability to attach user level registers dump to sample Introducing PERF_SAMPLE_REGS_USER sample type bit to trigger the dump of user level registers on sample. Registers we want to dump are specified by sample_regs_user bitmask. Only user level registers are dumped at the moment. Meaning the register values of the user space context as it was before the user entered the kernel for whatever reason (syscall, irq, exception, or a PMI happening in userspace). The layout of the sample_regs_user bitmap is described in asm/perf_regs.h for archs that support register dump. This is going to be useful to bring Dwarf CFI based stack unwinding on top of samples. Original-patch-by: Frederic Weisbecker [ Dump registers ABI specification. ] Signed-off-by: Jiri Olsa Suggested-by: Stephane Eranian Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-3-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 35 ++++++++++++++++++++++++++++++++--- include/linux/perf_regs.h | 6 ++++++ 2 files changed, 38 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7602ccb3f40e..3d4d84745f07 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -130,8 +130,9 @@ enum perf_event_sample_format { PERF_SAMPLE_STREAM_ID = 1U << 9, PERF_SAMPLE_RAW = 1U << 10, PERF_SAMPLE_BRANCH_STACK = 1U << 11, + PERF_SAMPLE_REGS_USER = 1U << 12, - PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 13, /* non-ABI */ }; /* @@ -162,6 +163,15 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_KERNEL|\ PERF_SAMPLE_BRANCH_HV) +/* + * Values to determine ABI of the registers dump. + */ +enum perf_sample_regs_abi { + PERF_SAMPLE_REGS_ABI_NONE = 0, + PERF_SAMPLE_REGS_ABI_32 = 1, + PERF_SAMPLE_REGS_ABI_64 = 2, +}; + /* * The format of the data returned by read() on a perf event fd, * as specified by attr.read_format: @@ -194,6 +204,7 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ #define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ #define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ +#define PERF_ATTR_SIZE_VER3 88 /* add: sample_regs_user */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -271,7 +282,13 @@ struct perf_event_attr { __u64 bp_len; __u64 config2; /* extension of config1 */ }; - __u64 branch_sample_type; /* enum branch_sample_type */ + __u64 branch_sample_type; /* enum perf_branch_sample_type */ + + /* + * Defines set of user regs to dump on samples. + * See asm/perf_regs.h for details. + */ + __u64 sample_regs_user; }; /* @@ -548,6 +565,9 @@ enum perf_event_type { * char data[size];}&& PERF_SAMPLE_RAW * * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK + * + * { u64 abi; # enum perf_sample_regs_abi + * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER * }; */ PERF_RECORD_SAMPLE = 9, @@ -609,6 +629,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #include struct perf_callchain_entry { @@ -654,6 +675,11 @@ struct perf_branch_stack { struct perf_branch_entry entries[0]; }; +struct perf_regs_user { + __u64 abi; + struct pt_regs *regs; +}; + struct task_struct; /* @@ -1133,6 +1159,7 @@ struct perf_sample_data { struct perf_callchain_entry *callchain; struct perf_raw_record *raw; struct perf_branch_stack *br_stack; + struct perf_regs_user regs_user; }; static inline void perf_sample_data_init(struct perf_sample_data *data, @@ -1142,7 +1169,9 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, data->addr = addr; data->raw = NULL; data->br_stack = NULL; - data->period = period; + data->period = period; + data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE; + data->regs_user.regs = NULL; } extern void perf_output_sample(struct perf_output_handle *handle, diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h index a2f1a98f7839..3c73d5fe18be 100644 --- a/include/linux/perf_regs.h +++ b/include/linux/perf_regs.h @@ -5,6 +5,7 @@ #include u64 perf_reg_value(struct pt_regs *regs, int idx); int perf_reg_validate(u64 mask); +u64 perf_reg_abi(struct task_struct *task); #else static inline u64 perf_reg_value(struct pt_regs *regs, int idx) { @@ -15,5 +16,10 @@ static inline int perf_reg_validate(u64 mask) { return mask ? -ENOSYS : 0; } + +static inline u64 perf_reg_abi(struct task_struct *task) +{ + return PERF_SAMPLE_REGS_ABI_NONE; +} #endif /* CONFIG_HAVE_PERF_REGS */ #endif /* _LINUX_PERF_REGS_H */ -- cgit v1.2.3 From 91d7753a45f8525dc75b6be01e427dc1c378dc16 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 7 Aug 2012 15:20:38 +0200 Subject: perf: Factor __output_copy to be usable with specific copy function Adding a generic way to use __output_copy function with specific copy function via DEFINE_PERF_OUTPUT_COPY macro. Using this to add new __output_copy_user function, that provides output copy from user pointers. For x86 the copy_from_user_nmi function is used and __copy_from_user_inatomic for the rest of the architectures. This new function will be used in user stack dump on sample, coming in next patches. Signed-off-by: Jiri Olsa Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Ingo Molnar Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-4-git-send-email-jolsa@redhat.com Signed-off-by: Frederic Weisbecker Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3d4d84745f07..d41394a1af36 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1319,7 +1319,7 @@ static inline bool has_branch_stack(struct perf_event *event) extern int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size); extern void perf_output_end(struct perf_output_handle *handle); -extern void perf_output_copy(struct perf_output_handle *handle, +extern unsigned int perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); extern int perf_swevent_get_recursion_context(void); extern void perf_swevent_put_recursion_context(int rctx); -- cgit v1.2.3 From 5685e0ff45f5df67e79e9b052b6ffd501ff38c11 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Aug 2012 15:20:39 +0200 Subject: perf: Add perf_output_skip function to skip bytes in sample Introducing perf_output_skip function to be able to skip data within the perf ring buffer. When writing data into perf ring buffer we first reserve needed place in ring buffer and then copy the actual data. There's a possibility we won't be able to fill all the reserved size with data, so we need a way to skip the remaining bytes. This is going to be useful when storing the user stack dump, where we might end up with less data than we originally requested. Signed-off-by: Jiri Olsa Acked-by: Frederic Weisbecker Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-5-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d41394a1af36..8a73f75beb16 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1321,6 +1321,8 @@ extern int perf_output_begin(struct perf_output_handle *handle, extern void perf_output_end(struct perf_output_handle *handle); extern unsigned int perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); +extern unsigned int perf_output_skip(struct perf_output_handle *handle, + unsigned int len); extern int perf_swevent_get_recursion_context(void); extern void perf_swevent_put_recursion_context(int rctx); extern void perf_event_enable(struct perf_event *event); -- cgit v1.2.3 From c5ebcedb566ef17bda7b02686e0d658a7bb42ee7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Aug 2012 15:20:40 +0200 Subject: perf: Add ability to attach user stack dump to sample Introducing PERF_SAMPLE_STACK_USER sample type bit to trigger the dump of the user level stack on sample. The size of the dump is specified by sample_stack_user value. Being able to dump parts of the user stack, starting from the stack pointer, will be useful to make a post mortem dwarf CFI based stack unwinding. Added HAVE_PERF_USER_STACK_DUMP config option to determine if the architecture provides user stack dump on perf event samples. This needs access to the user stack pointer which is not unified across architectures. Enabling this for x86 architecture. Signed-off-by: Jiri Olsa Original-patch-by: Frederic Weisbecker Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-6-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8a73f75beb16..d1d25f6a5e24 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -131,8 +131,9 @@ enum perf_event_sample_format { PERF_SAMPLE_RAW = 1U << 10, PERF_SAMPLE_BRANCH_STACK = 1U << 11, PERF_SAMPLE_REGS_USER = 1U << 12, + PERF_SAMPLE_STACK_USER = 1U << 13, - PERF_SAMPLE_MAX = 1U << 13, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 14, /* non-ABI */ }; /* @@ -205,6 +206,7 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ #define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ #define PERF_ATTR_SIZE_VER3 88 /* add: sample_regs_user */ +#define PERF_ATTR_SIZE_VER4 96 /* add: sample_stack_user */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -289,6 +291,14 @@ struct perf_event_attr { * See asm/perf_regs.h for details. */ __u64 sample_regs_user; + + /* + * Defines size of the user stack to dump on samples. + */ + __u32 sample_stack_user; + + /* Align to u64. */ + __u32 __reserved_2; }; /* @@ -568,6 +578,10 @@ enum perf_event_type { * * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER + * + * { u64 size; + * char data[size]; + * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * }; */ PERF_RECORD_SAMPLE = 9, @@ -1160,6 +1174,7 @@ struct perf_sample_data { struct perf_raw_record *raw; struct perf_branch_stack *br_stack; struct perf_regs_user regs_user; + u64 stack_user_size; }; static inline void perf_sample_data_init(struct perf_sample_data *data, @@ -1172,6 +1187,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, data->period = period; data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE; data->regs_user.regs = NULL; + data->stack_user_size = 0; } extern void perf_output_sample(struct perf_output_handle *handle, -- cgit v1.2.3 From d077526485d5c9b12fe85d0b2b3b7041e6bc5f91 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 7 Aug 2012 15:20:41 +0200 Subject: perf: Add attribute to filter out callchains Introducing following bits to the the perf_event_attr struct: - exclude_callchain_kernel to filter out kernel callchain from the sample dump - exclude_callchain_user to filter out user callchain from the sample dump We need to be able to disable standard user callchain dump when we use the dwarf cfi callchain mode, because frame pointer based user callchains are useless in this mode. Implementing also exclude_callchain_kernel to have complete set of options. Signed-off-by: Jiri Olsa [ Added kernel callchains filtering ] Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Ingo Molnar Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-7-git-send-email-jolsa@redhat.com Signed-off-by: Frederic Weisbecker Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d1d25f6a5e24..297ca3db6b4a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -268,7 +268,10 @@ struct perf_event_attr { exclude_host : 1, /* don't count in host */ exclude_guest : 1, /* don't count in guest */ - __reserved_1 : 43; + exclude_callchain_kernel : 1, /* exclude kernel callchains */ + exclude_callchain_user : 1, /* exclude user callchains */ + + __reserved_1 : 41; union { __u32 wakeup_events; /* wakeup every n events */ -- cgit v1.2.3 From 1659d129ed014b715b0b2120e6fd929bdd33ed03 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 22 Aug 2012 10:35:40 +0200 Subject: perf tools: Keep the perf_event_attr on version 3 Stashing version 4 under version 3 and removing version 4, because both version changes were within single patchset. Reported-by: Peter Zijlstra Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/20120822083540.GB1003@krava.brq.redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 297ca3db6b4a..28f9cee3fbc3 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -205,8 +205,8 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ #define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ #define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ -#define PERF_ATTR_SIZE_VER3 88 /* add: sample_regs_user */ -#define PERF_ATTR_SIZE_VER4 96 /* add: sample_stack_user */ +#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */ + /* add: sample_stack_user */ /* * Hardware event_id to monitor via a performance monitoring event: -- cgit v1.2.3 From 647c42dfd40fec032a4c8525a755160f0765921f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 6 Aug 2012 13:15:09 +0200 Subject: uprobes: Kill uprobes_state->count uprobes_state->count is only needed to avoid the slow path in uprobe_pre_sstep_notifier(). It is also checked in uprobe_munmap() but ironically its only goal to decrement this counter. However, it is very broken. Just some examples: - uprobe_mmap() can race with uprobe_unregister() and wrongly increment the counter if it hits the non-uprobe "int3". Note that install_breakpoint() checks ->consumers first and returns -EEXIST if it is NULL. "atomic_sub() if error" in uprobe_mmap() looks obviously wrong too. - uprobe_munmap() can race with uprobe_register() and wrongly decrement the counter by the same reason. - Suppose an appication tries to increase the mmapped area via sys_mremap(). vma_adjust() does uprobe_munmap(whole_vma) first, this can nullify the counter temporarily and race with another thread which can hit the bp, the application will be killed by SIGTRAP. - Suppose an application mmaps 2 consecutive areas in the same file and one (or both) of these areas has uprobes. In the likely case mmap_region()->vma_merge() suceeds. Like above, this leads to uprobe_munmap/uprobe_mmap from vma_merge()->vma_adjust() but then mmap_region() does another uprobe_mmap(resulting_vma) and doubles the counter. This patch only removes this counter and fixes the compile errors, then we will try to cleanup the changed code and add something else instead. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index efe4b3308c74..03ae547c1c31 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -99,8 +99,8 @@ struct xol_area { struct uprobes_state { struct xol_area *xol_area; - atomic_t count; }; + extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr, bool verify); extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); -- cgit v1.2.3 From f8ac4ec9c064b330dcc49e03c450fe74298c4622 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 8 Aug 2012 17:11:42 +0200 Subject: uprobes: Introduce MMF_HAS_UPROBES Add the new MMF_HAS_UPROBES flag. It is set by install_breakpoint() and it is copied by dup_mmap(), uprobe_pre_sstep_notifier() checks it to avoid the slow path if the task was never probed. Perhaps it makes sense to check it in valid_vma(is_register => false) as well. This needs the new dup_mmap()->uprobe_dup_mmap() hook. We can't use uprobe_reset_state() or put MMF_HAS_UPROBES into MMF_INIT_MASK, we need oldmm->mmap_sem to avoid the race with uprobe_register() or mmap() from another thread. Currently we never clear this bit, it can be false-positive after uprobe_unregister() or uprobe_munmap() or if dup_mmap() hits the probed VM_DONTCOPY vma. But this is fine correctness-wise and has no effect unless the task hits the non-uprobe breakpoint. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/sched.h | 2 ++ include/linux/uprobes.h | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b8c86648a2f9..3667c332e61d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -446,6 +446,8 @@ extern int get_dumpable(struct mm_struct *mm); #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ #define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ +#define MMF_HAS_UPROBES 19 /* might have uprobes */ + #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) struct sighand_struct { diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 03ae547c1c31..4a37ab153247 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -108,6 +108,7 @@ extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_con extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_mmap(struct vm_area_struct *vma); extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); +extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); extern void uprobe_free_utask(struct task_struct *t); extern void uprobe_copy_process(struct task_struct *t); extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); @@ -138,6 +139,10 @@ static inline void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { } +static inline void +uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) +{ +} static inline void uprobe_notify_resume(struct pt_regs *regs) { } -- cgit v1.2.3 From 61559a8165da2b6bab7621ac36379c6280efacb6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 8 Aug 2012 17:17:46 +0200 Subject: uprobes: Fold uprobe_reset_state() into uprobe_dup_mmap() Now that we have uprobe_dup_mmap() we can fold uprobe_reset_state() into the new hook and remove it. mmput()->uprobe_clear_state() can't be called before dup_mmap(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 4a37ab153247..30297f95c8af 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -118,7 +118,6 @@ extern void uprobe_notify_resume(struct pt_regs *regs); extern bool uprobe_deny_signal(void); extern bool __weak arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs); extern void uprobe_clear_state(struct mm_struct *mm); -extern void uprobe_reset_state(struct mm_struct *mm); #else /* !CONFIG_UPROBES */ struct uprobes_state { }; @@ -163,8 +162,5 @@ static inline void uprobe_copy_process(struct task_struct *t) static inline void uprobe_clear_state(struct mm_struct *mm) { } -static inline void uprobe_reset_state(struct mm_struct *mm) -{ -} #endif /* !CONFIG_UPROBES */ #endif /* _LINUX_UPROBES_H */ -- cgit v1.2.3 From ded86e7c8fc4404414c4700010c9962ea8bd083a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 8 Aug 2012 18:07:03 +0200 Subject: uprobes: Remove "verify" argument from set_orig_insn() Nobody does set_orig_insn(verify => false), and I think nobody will. Remove this argument. IIUC set_orig_insn(verify => false) was needed to single-step without xol area. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 30297f95c8af..6d4fe79a1a6a 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -102,7 +102,7 @@ struct uprobes_state { }; extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); -extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr, bool verify); +extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); -- cgit v1.2.3 From 9f68f672c47b9bd4cfe0a667ecb0b1382c61e2de Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 19 Aug 2012 16:15:09 +0200 Subject: uprobes: Introduce MMF_RECALC_UPROBES Add the new MMF_RECALC_UPROBES flag, it means that MMF_HAS_UPROBES can be false positive after remove_breakpoint() or uprobe_munmap(). It is also set by uprobe_dup_mmap(), this is not optimal but simple. We could add the new hook, uprobe_dup_vma(), to set MMF_HAS_UPROBES only if the new mm actually has uprobes, but I don't think this makes sense. The next patch will use this flag to clear MMF_HAS_UPROBES. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3667c332e61d..255661d48834 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -446,7 +446,8 @@ extern int get_dumpable(struct mm_struct *mm); #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ #define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ -#define MMF_HAS_UPROBES 19 /* might have uprobes */ +#define MMF_HAS_UPROBES 19 /* has uprobes */ +#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) -- cgit v1.2.3 From 9d778782266f95e5c6ec43ed8195ba331c821018 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 7 Aug 2012 18:12:28 +0200 Subject: uprobes: Introduce arch_uprobe_enable/disable_step() As Oleg pointed out in [0] uprobe should not use the ptrace interface for enabling/disabling single stepping. [0] http://lkml.kernel.org/r/20120730141638.GA5306@redhat.com Add the new "__weak arch" helpers which simply call user_*_single_step() as a preparation. This is only needed to not break the powerpc port, we will fold this logic into arch_uprobe_pre/post_xol() hooks later. We should also change handle_singlestep(), _disable_step(&uprobe->arch) should be called before put_uprobe(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 6d4fe79a1a6a..e6f0331e3d45 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -112,6 +112,8 @@ extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); extern void uprobe_free_utask(struct task_struct *t); extern void uprobe_copy_process(struct task_struct *t); extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); +extern void __weak arch_uprobe_enable_step(struct arch_uprobe *arch); +extern void __weak arch_uprobe_disable_step(struct arch_uprobe *arch); extern int uprobe_post_sstep_notifier(struct pt_regs *regs); extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); extern void uprobe_notify_resume(struct pt_regs *regs); -- cgit v1.2.3