From 8f0820183056ad26dabc0202115848a92f1143fc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 20 Apr 2010 10:47:33 -0400 Subject: tracing: Create class struct for events This patch creates a ftrace_event_class struct that event structs point to. This class struct will be made to hold information to modify the events. Currently the class struct only holds the events system name. This patch slightly increases the size, but this change lays the ground work of other changes to make the footprint of tracepoints smaller. With 82 standard tracepoints, and 618 system call tracepoints (two tracepoints per syscall: enter and exit): text data bss dec hex filename 4913961 1088356 861512 6863829 68bbd5 vmlinux.orig 4914025 1088868 861512 6864405 68be15 vmlinux.class This patch also cleans up some stale comments in ftrace.h. v2: Fixed missing semi-colon in macro. Acked-by: Frederic Weisbecker Acked-by: Mathieu Desnoyers Acked-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 6 +++++- include/linux/syscalls.h | 6 ++++-- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 39e71b0a3bfd..496eea898ee4 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -113,10 +113,14 @@ void tracing_record_cmdline(struct task_struct *tsk); struct event_filter; +struct ftrace_event_class { + char *system; +}; + struct ftrace_event_call { struct list_head list; + struct ftrace_event_class *class; char *name; - char *system; struct dentry *dir; struct trace_event *event; int enabled; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 057929b0a651..ac5791df2506 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -134,6 +134,8 @@ struct perf_event_attr; #define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__) #define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) +extern struct ftrace_event_class event_class_syscalls; + #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static const struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call \ @@ -146,7 +148,7 @@ struct perf_event_attr; __attribute__((section("_ftrace_events"))) \ event_enter_##sname = { \ .name = "sys_enter"#sname, \ - .system = "syscalls", \ + .class = &event_class_syscalls, \ .event = &enter_syscall_print_##sname, \ .raw_init = init_syscall_trace, \ .define_fields = syscall_enter_define_fields, \ @@ -168,7 +170,7 @@ struct perf_event_attr; __attribute__((section("_ftrace_events"))) \ event_exit_##sname = { \ .name = "sys_exit"#sname, \ - .system = "syscalls", \ + .class = &event_class_syscalls, \ .event = &exit_syscall_print_##sname, \ .raw_init = init_syscall_trace, \ .define_fields = syscall_exit_define_fields, \ -- cgit v1.2.3 From 53da59aa6dd881fd0bbdd058a8a299d90ce9dd1d Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 30 Apr 2010 12:59:59 -0400 Subject: tracepoints: Add check trace callback type This check is meant to be used by tracepoint users which do a direct cast of callbacks to (void *) for direct registration, thus bypassing the register_trace_##name and unregister_trace_##name checks. This permits to ensure that the callback type matches the function type at the call site, but without generating any code. Acked-by: Masami Hiramatsu Acked-by: Frederic Weisbecker Signed-off-by: Mathieu Desnoyers LKML-Reference: <20100430165959.GA25605@Krystal> CC: Ingo Molnar CC: Andrew Morton CC: Thomas Gleixner CC: Peter Zijlstra CC: Arnaldo Carvalho de Melo CC: Lai Jiangshan CC: Li Zefan CC: Christoph Hellwig Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 1d85f9a6a199..8d5e4f6d96d0 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -137,9 +137,11 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, static inline int unregister_trace_##name(void (*probe)(proto)) \ { \ return tracepoint_probe_unregister(#name, (void *)probe);\ + } \ + static inline void check_trace_callback_type_##name(void (*cb)(proto)) \ + { \ } - #define DEFINE_TRACE_FN(name, reg, unreg) \ static const char __tpstrtab_##name[] \ __attribute__((section("__tracepoints_strings"))) = #name; \ @@ -168,6 +170,9 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, static inline int unregister_trace_##name(void (*probe)(proto)) \ { \ return -ENOSYS; \ + } \ + static inline void check_trace_callback_type_##name(void (*cb)(proto)) \ + { \ } #define DEFINE_TRACE_FN(name, reg, unreg) -- cgit v1.2.3 From 38516ab59fbc5b3bb278cf5e1fe2867c70cff32e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 20 Apr 2010 17:04:50 -0400 Subject: tracing: Let tracepoints have data passed to tracepoint callbacks This patch adds data to be passed to tracepoint callbacks. The created functions from DECLARE_TRACE() now need a mandatory data parameter. For example: DECLARE_TRACE(mytracepoint, int value, value) Will create the register function: int register_trace_mytracepoint((void(*)(void *data, int value))probe, void *data); As the first argument, all callbacks (probes) must take a (void *data) parameter. So a callback for the above tracepoint will look like: void myprobe(void *data, int value) { } The callback may choose to ignore the data parameter. This change allows callbacks to register a private data pointer along with the function probe. void mycallback(void *data, int value); register_trace_mytracepoint(mycallback, mydata); Then the mycallback() will receive the "mydata" as the first parameter before the args. A more detailed example: DECLARE_TRACE(mytracepoint, TP_PROTO(int status), TP_ARGS(status)); /* In the C file */ DEFINE_TRACE(mytracepoint, TP_PROTO(int status), TP_ARGS(status)); [...] trace_mytracepoint(status); /* In a file registering this tracepoint */ int my_callback(void *data, int status) { struct my_struct my_data = data; [...] } [...] my_data = kmalloc(sizeof(*my_data), GFP_KERNEL); init_my_data(my_data); register_trace_mytracepoint(my_callback, my_data); The same callback can also be registered to the same tracepoint as long as the data registered is different. Note, the data must also be used to unregister the callback: unregister_trace_mytracepoint(my_callback, my_data); Because of the data parameter, tracepoints declared this way can not have no args. That is: DECLARE_TRACE(mytracepoint, TP_PROTO(void), TP_ARGS()); will cause an error. If no arguments are needed, a new macro can be used instead: DECLARE_TRACE_NOARGS(mytracepoint); Since there are no arguments, the proto and args fields are left out. This is part of a series to make the tracepoint footprint smaller: text data bss dec hex filename 4913961 1088356 861512 6863829 68bbd5 vmlinux.orig 4914025 1088868 861512 6864405 68be15 vmlinux.class 4918492 1084612 861512 6864616 68bee8 vmlinux.tracepoint Again, this patch also increases the size of the kernel, but lays the ground work for decreasing it. v5: Fixed net/core/drop_monitor.c to handle these updates. v4: Moved the DECLARE_TRACE() DECLARE_TRACE_NOARGS out of the #ifdef CONFIG_TRACE_POINTS, since the two are the same in both cases. The __DECLARE_TRACE() is what changes. Thanks to Frederic Weisbecker for pointing this out. v3: Made all register_* functions require data to be passed and all callbacks to take a void * parameter as its first argument. This makes the calling functions comply with C standards. Also added more comments to the modifications of DECLARE_TRACE(). v2: Made the DECLARE_TRACE() have the ability to pass arguments and added a new DECLARE_TRACE_NOARGS() for tracepoints that do not need any arguments. Acked-by: Mathieu Desnoyers Acked-by: Masami Hiramatsu Acked-by: Frederic Weisbecker Cc: Neil Horman Cc: David S. Miller Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 95 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 72 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 8d5e4f6d96d0..9a59d1f98cd4 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -20,12 +20,17 @@ struct module; struct tracepoint; +struct tracepoint_func { + void *func; + void *data; +}; + struct tracepoint { const char *name; /* Tracepoint name */ int state; /* State. */ void (*regfunc)(void); void (*unregfunc)(void); - void **funcs; + struct tracepoint_func *funcs; } __attribute__((aligned(32))); /* * Aligned on 32 bytes because it is * globally visible and gcc happily @@ -37,16 +42,19 @@ struct tracepoint { * Connect a probe to a tracepoint. * Internal API, should not be used directly. */ -extern int tracepoint_probe_register(const char *name, void *probe); +extern int tracepoint_probe_register(const char *name, void *probe, void *data); /* * Disconnect a probe from a tracepoint. * Internal API, should not be used directly. */ -extern int tracepoint_probe_unregister(const char *name, void *probe); +extern int +tracepoint_probe_unregister(const char *name, void *probe, void *data); -extern int tracepoint_probe_register_noupdate(const char *name, void *probe); -extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe); +extern int tracepoint_probe_register_noupdate(const char *name, void *probe, + void *data); +extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe, + void *data); extern void tracepoint_probe_update_all(void); struct tracepoint_iter { @@ -102,17 +110,27 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, /* * it_func[0] is never NULL because there is at least one element in the array * when the array itself is non NULL. + * + * Note, the proto and args passed in includes "__data" as the first parameter. + * The reason for this is to handle the "void" prototype. If a tracepoint + * has a "void" prototype, then it is invalid to declare a function + * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just + * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto". */ #define __DO_TRACE(tp, proto, args) \ do { \ - void **it_func; \ + struct tracepoint_func *it_func_ptr; \ + void *it_func; \ + void *__data; \ \ rcu_read_lock_sched_notrace(); \ - it_func = rcu_dereference_sched((tp)->funcs); \ - if (it_func) { \ + it_func_ptr = rcu_dereference_sched((tp)->funcs); \ + if (it_func_ptr) { \ do { \ - ((void(*)(proto))(*it_func))(args); \ - } while (*(++it_func)); \ + it_func = (it_func_ptr)->func; \ + __data = (it_func_ptr)->data; \ + ((void(*)(proto))(it_func))(args); \ + } while ((++it_func_ptr)->func); \ } \ rcu_read_unlock_sched_notrace(); \ } while (0) @@ -122,23 +140,29 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. */ -#define DECLARE_TRACE(name, proto, args) \ +#define __DECLARE_TRACE(name, proto, args, data_proto, data_args) \ extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ if (unlikely(__tracepoint_##name.state)) \ __DO_TRACE(&__tracepoint_##name, \ - TP_PROTO(proto), TP_ARGS(args)); \ + TP_PROTO(data_proto), \ + TP_ARGS(data_args)); \ } \ - static inline int register_trace_##name(void (*probe)(proto)) \ + static inline int \ + register_trace_##name(void (*probe)(data_proto), void *data) \ { \ - return tracepoint_probe_register(#name, (void *)probe); \ + return tracepoint_probe_register(#name, (void *)probe, \ + data); \ } \ - static inline int unregister_trace_##name(void (*probe)(proto)) \ + static inline int \ + unregister_trace_##name(void (*probe)(data_proto), void *data) \ { \ - return tracepoint_probe_unregister(#name, (void *)probe);\ + return tracepoint_probe_unregister(#name, (void *)probe, \ + data); \ } \ - static inline void check_trace_callback_type_##name(void (*cb)(proto)) \ + static inline void \ + check_trace_callback_type_##name(void (*cb)(data_proto)) \ { \ } @@ -158,20 +182,22 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, EXPORT_SYMBOL(__tracepoint_##name) #else /* !CONFIG_TRACEPOINTS */ -#define DECLARE_TRACE(name, proto, args) \ - static inline void _do_trace_##name(struct tracepoint *tp, proto) \ - { } \ +#define __DECLARE_TRACE(name, proto, args, data_proto, data_args) \ static inline void trace_##name(proto) \ { } \ - static inline int register_trace_##name(void (*probe)(proto)) \ + static inline int \ + register_trace_##name(void (*probe)(data_proto), \ + void *data) \ { \ return -ENOSYS; \ } \ - static inline int unregister_trace_##name(void (*probe)(proto)) \ + static inline int \ + unregister_trace_##name(void (*probe)(data_proto), \ + void *data) \ { \ return -ENOSYS; \ } \ - static inline void check_trace_callback_type_##name(void (*cb)(proto)) \ + static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \ { \ } @@ -181,6 +207,29 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, #define EXPORT_TRACEPOINT_SYMBOL(name) #endif /* CONFIG_TRACEPOINTS */ + +/* + * The need for the DECLARE_TRACE_NOARGS() is to handle the prototype + * (void). "void" is a special value in a function prototype and can + * not be combined with other arguments. Since the DECLARE_TRACE() + * macro adds a data element at the beginning of the prototype, + * we need a way to differentiate "(void *data, proto)" from + * "(void *data, void)". The second prototype is invalid. + * + * DECLARE_TRACE_NOARGS() passes "void" as the tracepoint prototype + * and "void *__data" as the callback prototype. + * + * DECLARE_TRACE() passes "proto" as the tracepoint protoype and + * "void *__data, proto" as the callback prototype. + */ +#define DECLARE_TRACE_NOARGS(name) \ + __DECLARE_TRACE(name, void, , void *__data, __data) + +#define DECLARE_TRACE(name, proto, args) \ + __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ + PARAMS(void *__data, proto), \ + PARAMS(__data, args)) + #endif /* DECLARE_TRACE */ #ifndef TRACE_EVENT -- cgit v1.2.3 From 2239291aeb0379fe47980b0e560e0eb9fd7e82ec Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 21 Apr 2010 12:27:06 -0400 Subject: tracing: Remove per event trace registering This patch removes the register functions of TRACE_EVENT() to enable and disable tracepoints. The registering of a event is now down directly in the trace_events.c file. The tracepoint_probe_register() is now called directly. The prototypes are no longer type checked, but this should not be an issue since the tracepoints are created automatically by the macros. If a prototype is incorrect in the TRACE_EVENT() macro, then other macros will catch it. The trace_event_class structure now holds the probes to be called by the callbacks. This removes needing to have each event have a separate pointer for the probe. To handle kprobes and syscalls, since they register probes in a different manner, a "reg" field is added to the ftrace_event_class structure. If the "reg" field is assigned, then it will be called for enabling and disabling of the probe for either ftrace or perf. To let the reg function know what is happening, a new enum (trace_reg) is created that has the type of control that is needed. With this new rework, the 82 kernel events and 618 syscall events has their footprint dramatically lowered: text data bss dec hex filename 4913961 1088356 861512 6863829 68bbd5 vmlinux.orig 4914025 1088868 861512 6864405 68be15 vmlinux.class 4918492 1084612 861512 6864616 68bee8 vmlinux.tracepoint 4900252 1057412 861512 6819176 680d68 vmlinux.regs The size went from 6863829 to 6819176, that's a total of 44K in savings. With tracepoints being continuously added, this is critical that the footprint becomes minimal. v5: Added #ifdef CONFIG_PERF_EVENTS around a reference to perf specific structure in trace_events.c. v4: Fixed trace self tests to check probe because regfunc no longer exists. v3: Updated to handle void *data in beginning of probe parameters. Also added the tracepoint: check_trace_callback_type_##call(). v2: Changed the callback probes to pass void * and typecast the value within the function. Acked-by: Mathieu Desnoyers Acked-by: Masami Hiramatsu Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 19 +++++++++++++++---- include/linux/syscalls.h | 29 ++++------------------------- 2 files changed, 19 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 496eea898ee4..e665ed38b4bf 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -113,8 +113,23 @@ void tracing_record_cmdline(struct task_struct *tsk); struct event_filter; +enum trace_reg { + TRACE_REG_REGISTER, + TRACE_REG_UNREGISTER, + TRACE_REG_PERF_REGISTER, + TRACE_REG_PERF_UNREGISTER, +}; + +struct ftrace_event_call; + struct ftrace_event_class { char *system; + void *probe; +#ifdef CONFIG_PERF_EVENTS + void *perf_probe; +#endif + int (*reg)(struct ftrace_event_call *event, + enum trace_reg type); }; struct ftrace_event_call { @@ -124,8 +139,6 @@ struct ftrace_event_call { struct dentry *dir; struct trace_event *event; int enabled; - int (*regfunc)(struct ftrace_event_call *); - void (*unregfunc)(struct ftrace_event_call *); int id; const char *print_fmt; int (*raw_init)(struct ftrace_event_call *); @@ -137,8 +150,6 @@ struct ftrace_event_call { void *data; int perf_refcount; - int (*perf_event_enable)(struct ftrace_event_call *); - void (*perf_event_disable)(struct ftrace_event_call *); }; #define PERF_MAX_TRACE_SIZE 2048 diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index ac5791df2506..e3348c4c22e8 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -103,22 +103,6 @@ struct perf_event_attr; #define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__) #define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) -#ifdef CONFIG_PERF_EVENTS - -#define TRACE_SYS_ENTER_PERF_INIT(sname) \ - .perf_event_enable = perf_sysenter_enable, \ - .perf_event_disable = perf_sysenter_disable, - -#define TRACE_SYS_EXIT_PERF_INIT(sname) \ - .perf_event_enable = perf_sysexit_enable, \ - .perf_event_disable = perf_sysexit_disable, -#else -#define TRACE_SYS_ENTER_PERF(sname) -#define TRACE_SYS_ENTER_PERF_INIT(sname) -#define TRACE_SYS_EXIT_PERF(sname) -#define TRACE_SYS_EXIT_PERF_INIT(sname) -#endif /* CONFIG_PERF_EVENTS */ - #ifdef CONFIG_FTRACE_SYSCALLS #define __SC_STR_ADECL1(t, a) #a #define __SC_STR_ADECL2(t, a, ...) #a, __SC_STR_ADECL1(__VA_ARGS__) @@ -134,7 +118,8 @@ struct perf_event_attr; #define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__) #define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) -extern struct ftrace_event_class event_class_syscalls; +extern struct ftrace_event_class event_class_syscall_enter; +extern struct ftrace_event_class event_class_syscall_exit; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static const struct syscall_metadata __syscall_meta_##sname; \ @@ -148,14 +133,11 @@ extern struct ftrace_event_class event_class_syscalls; __attribute__((section("_ftrace_events"))) \ event_enter_##sname = { \ .name = "sys_enter"#sname, \ - .class = &event_class_syscalls, \ + .class = &event_class_syscall_enter, \ .event = &enter_syscall_print_##sname, \ .raw_init = init_syscall_trace, \ .define_fields = syscall_enter_define_fields, \ - .regfunc = reg_event_syscall_enter, \ - .unregfunc = unreg_event_syscall_enter, \ .data = (void *)&__syscall_meta_##sname,\ - TRACE_SYS_ENTER_PERF_INIT(sname) \ } #define SYSCALL_TRACE_EXIT_EVENT(sname) \ @@ -170,14 +152,11 @@ extern struct ftrace_event_class event_class_syscalls; __attribute__((section("_ftrace_events"))) \ event_exit_##sname = { \ .name = "sys_exit"#sname, \ - .class = &event_class_syscalls, \ + .class = &event_class_syscall_exit, \ .event = &exit_syscall_print_##sname, \ .raw_init = init_syscall_trace, \ .define_fields = syscall_exit_define_fields, \ - .regfunc = reg_event_syscall_exit, \ - .unregfunc = unreg_event_syscall_exit, \ .data = (void *)&__syscall_meta_##sname,\ - TRACE_SYS_EXIT_PERF_INIT(sname) \ } #define SYSCALL_METADATA(sname, nb) \ -- cgit v1.2.3 From 2e33af029556cb8bd22bf4f86f42d540249177ea Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 Apr 2010 10:35:55 -0400 Subject: tracing: Move fields from event to class structure Move the defined fields from the event to the class structure. Since the fields of the event are defined by the class they belong to, it makes sense to have the class hold the information instead of the individual events. The events of the same class would just hold duplicate information. After this change the size of the kernel dropped another 3K: text data bss dec hex filename 4913961 1088356 861512 6863829 68bbd5 vmlinux.orig 4900252 1057412 861512 6819176 680d68 vmlinux.regs 4900375 1053380 861512 6815267 67fe23 vmlinux.fields Although the text increased, this was mainly due to the C files having to adapt to the change. This is a constant increase, where new tracepoints will not increase the Text. But the big drop is in the data size (as well as needed allocations to hold the fields). This will give even more savings as more tracepoints are created. Note, if just TRACE_EVENT()s are used and not DECLARE_EVENT_CLASS() with several DEFINE_EVENT()s, then the savings will be lost. But we are pushing developers to consolidate events with DEFINE_EVENT() so this should not be an issue. The kprobes define a unique class to every new event, but are dynamic so it should not be a issue. The syscalls however have a single class but the fields for the individual events are different. The syscalls use a metadata to define the fields. I moved the fields list from the event to the metadata and added a "get_fields()" function to the class. This function is used to find the fields. For normal events and kprobes, get_fields() just returns a pointer to the fields list_head in the class. For syscall events, it returns the fields list_head in the metadata for the event. v2: Fixed the syscall fields. The syscall metadata needs a list of fields for both enter and exit. Acked-by: Frederic Weisbecker Acked-by: Mathieu Desnoyers Acked-by: Masami Hiramatsu Cc: Tom Zanussi Cc: Peter Zijlstra Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 5 +++-- include/linux/syscalls.h | 14 ++++++++------ 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index e665ed38b4bf..479c3c1876e6 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -130,6 +130,9 @@ struct ftrace_event_class { #endif int (*reg)(struct ftrace_event_call *event, enum trace_reg type); + int (*define_fields)(struct ftrace_event_call *); + struct list_head *(*get_fields)(struct ftrace_event_call *); + struct list_head fields; }; struct ftrace_event_call { @@ -142,8 +145,6 @@ struct ftrace_event_call { int id; const char *print_fmt; int (*raw_init)(struct ftrace_event_call *); - int (*define_fields)(struct ftrace_event_call *); - struct list_head fields; int filter_active; struct event_filter *filter; void *mod; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e3348c4c22e8..fd0f1f248cd8 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -122,7 +122,7 @@ extern struct ftrace_event_class event_class_syscall_enter; extern struct ftrace_event_class event_class_syscall_exit; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ - static const struct syscall_metadata __syscall_meta_##sname; \ + static struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call \ __attribute__((__aligned__(4))) event_enter_##sname; \ static struct trace_event enter_syscall_print_##sname = { \ @@ -136,12 +136,11 @@ extern struct ftrace_event_class event_class_syscall_exit; .class = &event_class_syscall_enter, \ .event = &enter_syscall_print_##sname, \ .raw_init = init_syscall_trace, \ - .define_fields = syscall_enter_define_fields, \ .data = (void *)&__syscall_meta_##sname,\ } #define SYSCALL_TRACE_EXIT_EVENT(sname) \ - static const struct syscall_metadata __syscall_meta_##sname; \ + static struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call \ __attribute__((__aligned__(4))) event_exit_##sname; \ static struct trace_event exit_syscall_print_##sname = { \ @@ -155,14 +154,13 @@ extern struct ftrace_event_class event_class_syscall_exit; .class = &event_class_syscall_exit, \ .event = &exit_syscall_print_##sname, \ .raw_init = init_syscall_trace, \ - .define_fields = syscall_exit_define_fields, \ .data = (void *)&__syscall_meta_##sname,\ } #define SYSCALL_METADATA(sname, nb) \ SYSCALL_TRACE_ENTER_EVENT(sname); \ SYSCALL_TRACE_EXIT_EVENT(sname); \ - static const struct syscall_metadata __used \ + static struct syscall_metadata __used \ __attribute__((__aligned__(4))) \ __attribute__((section("__syscalls_metadata"))) \ __syscall_meta_##sname = { \ @@ -172,12 +170,14 @@ extern struct ftrace_event_class event_class_syscall_exit; .args = args_##sname, \ .enter_event = &event_enter_##sname, \ .exit_event = &event_exit_##sname, \ + .enter_fields = LIST_HEAD_INIT(__syscall_meta_##sname.enter_fields), \ + .exit_fields = LIST_HEAD_INIT(__syscall_meta_##sname.exit_fields), \ }; #define SYSCALL_DEFINE0(sname) \ SYSCALL_TRACE_ENTER_EVENT(_##sname); \ SYSCALL_TRACE_EXIT_EVENT(_##sname); \ - static const struct syscall_metadata __used \ + static struct syscall_metadata __used \ __attribute__((__aligned__(4))) \ __attribute__((section("__syscalls_metadata"))) \ __syscall_meta__##sname = { \ @@ -185,6 +185,8 @@ extern struct ftrace_event_class event_class_syscall_exit; .nb_args = 0, \ .enter_event = &event_enter__##sname, \ .exit_event = &event_exit__##sname, \ + .enter_fields = LIST_HEAD_INIT(__syscall_meta__##sname.enter_fields), \ + .exit_fields = LIST_HEAD_INIT(__syscall_meta__##sname.exit_fields), \ }; \ asmlinkage long sys_##sname(void) #else -- cgit v1.2.3 From 0405ab80aa94afb13bf9ac4a6fc9f2923d4b9114 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 Apr 2010 11:46:44 -0400 Subject: tracing: Move raw_init from events to class The raw_init function pointer in the event is used to initialize various kinds of events. The type of initialization needed is usually classed to the kind of event it is. Two events with the same class will always have the same initialization function, so it makes sense to move this to the class structure. Perhaps even making a special system structure would work since the initialization is the same for all events within a system. But since there's no system structure (yet), this will just move it to the class. text data bss dec hex filename 4913961 1088356 861512 6863829 68bbd5 vmlinux.orig 4900375 1053380 861512 6815267 67fe23 vmlinux.fields 4900382 1048964 861512 6810858 67ecea vmlinux.init The text grew very slightly, but this is a constant growth that happened with the changing of the C files that call the init code. The bigger savings is the data which will be saved the more events share a class. Acked-by: Mathieu Desnoyers Acked-by: Masami Hiramatsu Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 +- include/linux/syscalls.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 479c3c1876e6..393a8394df0e 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -133,6 +133,7 @@ struct ftrace_event_class { int (*define_fields)(struct ftrace_event_call *); struct list_head *(*get_fields)(struct ftrace_event_call *); struct list_head fields; + int (*raw_init)(struct ftrace_event_call *); }; struct ftrace_event_call { @@ -144,7 +145,6 @@ struct ftrace_event_call { int enabled; int id; const char *print_fmt; - int (*raw_init)(struct ftrace_event_call *); int filter_active; struct event_filter *filter; void *mod; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index fd0f1f248cd8..211c704a71ed 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -135,7 +135,6 @@ extern struct ftrace_event_class event_class_syscall_exit; .name = "sys_enter"#sname, \ .class = &event_class_syscall_enter, \ .event = &enter_syscall_print_##sname, \ - .raw_init = init_syscall_trace, \ .data = (void *)&__syscall_meta_##sname,\ } @@ -153,7 +152,6 @@ extern struct ftrace_event_class event_class_syscall_exit; .name = "sys_exit"#sname, \ .class = &event_class_syscall_exit, \ .event = &exit_syscall_print_##sname, \ - .raw_init = init_syscall_trace, \ .data = (void *)&__syscall_meta_##sname,\ } -- cgit v1.2.3 From a9a5776380208a3e48a92d0c763ee1a3b486fb73 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 Apr 2010 18:46:14 -0400 Subject: tracing: Allow events to share their print functions Multiple events may use the same method to print their data. Instead of having all events have a pointer to their print funtions, the trace_event structure now points to a trace_event_functions structure that will hold the way to print ouf the event. The event itself is now passed to the print function to let the print function know what kind of event it should print. This opens the door to consolidating the way several events print their output. text data bss dec hex filename 4913961 1088356 861512 6863829 68bbd5 vmlinux.orig 4900382 1048964 861512 6810858 67ecea vmlinux.init 4900446 1049028 861512 6810986 67ed6a vmlinux.preprint This change slightly increases the size but is needed for the next change. v3: Fix the branch tracer events to handle this change. v2: Fix the new function graph tracer event calls to handle this change. Acked-by: Mathieu Desnoyers Acked-by: Masami Hiramatsu Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 17 ++++++++++++----- include/linux/syscalls.h | 10 ++++++++-- 2 files changed, 20 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 393a8394df0e..4f77932b0983 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -70,18 +70,25 @@ struct trace_iterator { }; +struct trace_event; + typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, - int flags); -struct trace_event { - struct hlist_node node; - struct list_head list; - int type; + int flags, struct trace_event *event); + +struct trace_event_functions { trace_print_func trace; trace_print_func raw; trace_print_func hex; trace_print_func binary; }; +struct trace_event { + struct hlist_node node; + struct list_head list; + int type; + struct trace_event_functions *funcs; +}; + extern int register_ftrace_event(struct trace_event *event); extern int unregister_ftrace_event(struct trace_event *event); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 211c704a71ed..f7256770a20f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -125,9 +125,12 @@ extern struct ftrace_event_class event_class_syscall_exit; static struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call \ __attribute__((__aligned__(4))) event_enter_##sname; \ - static struct trace_event enter_syscall_print_##sname = { \ + static struct trace_event_functions enter_syscall_print_funcs_##sname = { \ .trace = print_syscall_enter, \ }; \ + static struct trace_event enter_syscall_print_##sname = { \ + .funcs = &enter_syscall_print_funcs_##sname, \ + }; \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ @@ -142,9 +145,12 @@ extern struct ftrace_event_class event_class_syscall_exit; static struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call \ __attribute__((__aligned__(4))) event_exit_##sname; \ - static struct trace_event exit_syscall_print_##sname = { \ + static struct trace_event_functions exit_syscall_print_funcs_##sname = { \ .trace = print_syscall_exit, \ }; \ + static struct trace_event exit_syscall_print_##sname = { \ + .funcs = &exit_syscall_print_funcs_##sname, \ + }; \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ -- cgit v1.2.3 From 80decc70afc57c87eee9d6b836aec2ecacba3457 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 23 Apr 2010 10:00:22 -0400 Subject: tracing: Move print functions into event class Currently, every event has its own trace_event structure. This is fine since the structure is needed anyway. But the print function structure (trace_event_functions) is now separate. Since the output of the trace event is done by the class (with the exception of events defined by DEFINE_EVENT_PRINT), it makes sense to have the class define the print functions that all events in the class can use. This makes a bigger deal with the syscall events since all syscall events use the same class. The savings here is another 30K. text data bss dec hex filename 4913961 1088356 861512 6863829 68bbd5 vmlinux.orig 4900382 1048964 861512 6810858 67ecea vmlinux.init 4900446 1049028 861512 6810986 67ed6a vmlinux.preprint 4895024 1023812 861512 6780348 6775bc vmlinux.print To accomplish this, and to let the class know what event is being printed, the event structure is embedded in the ftrace_event_call structure. This should not be an issues since the event structure was created for each event anyway. Acked-by: Mathieu Desnoyers Acked-by: Masami Hiramatsu Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 +- include/linux/syscalls.h | 18 ++++-------------- 2 files changed, 5 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 4f77932b0983..b1a007d6e8fd 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -148,7 +148,7 @@ struct ftrace_event_call { struct ftrace_event_class *class; char *name; struct dentry *dir; - struct trace_event *event; + struct trace_event event; int enabled; int id; const char *print_fmt; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f7256770a20f..a1a86a53bc73 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -120,24 +120,20 @@ struct perf_event_attr; extern struct ftrace_event_class event_class_syscall_enter; extern struct ftrace_event_class event_class_syscall_exit; +extern struct trace_event_functions enter_syscall_print_funcs; +extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call \ __attribute__((__aligned__(4))) event_enter_##sname; \ - static struct trace_event_functions enter_syscall_print_funcs_##sname = { \ - .trace = print_syscall_enter, \ - }; \ - static struct trace_event enter_syscall_print_##sname = { \ - .funcs = &enter_syscall_print_funcs_##sname, \ - }; \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ event_enter_##sname = { \ .name = "sys_enter"#sname, \ .class = &event_class_syscall_enter, \ - .event = &enter_syscall_print_##sname, \ + .event.funcs = &enter_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ } @@ -145,19 +141,13 @@ extern struct ftrace_event_class event_class_syscall_exit; static struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call \ __attribute__((__aligned__(4))) event_exit_##sname; \ - static struct trace_event_functions exit_syscall_print_funcs_##sname = { \ - .trace = print_syscall_exit, \ - }; \ - static struct trace_event exit_syscall_print_##sname = { \ - .funcs = &exit_syscall_print_funcs_##sname, \ - }; \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ event_exit_##sname = { \ .name = "sys_exit"#sname, \ .class = &event_class_syscall_exit, \ - .event = &exit_syscall_print_##sname, \ + .event.funcs = &exit_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ } -- cgit v1.2.3 From 32c0edaeaad74a7883e736ae0f3798784cfc2a80 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 23 Apr 2010 10:38:03 -0400 Subject: tracing: Remove duplicate id information in event structure Now that the trace_event structure is embedded in the ftrace_event_call structure, there is no need for the ftrace_event_call id field. The id field is the same as the trace_event type field. Removing the id and re-arranging the structure brings down the tracepoint footprint by another 5K. text data bss dec hex filename 4913961 1088356 861512 6863829 68bbd5 vmlinux.orig 4895024 1023812 861512 6780348 6775bc vmlinux.print 4894944 1018052 861512 6774508 675eec vmlinux.id Acked-by: Mathieu Desnoyers Acked-by: Masami Hiramatsu Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index b1a007d6e8fd..0be028527633 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -149,14 +149,13 @@ struct ftrace_event_call { char *name; struct dentry *dir; struct trace_event event; - int enabled; - int id; const char *print_fmt; - int filter_active; struct event_filter *filter; void *mod; void *data; + int enabled; + int filter_active; int perf_refcount; }; -- cgit v1.2.3 From 553552ce1796c32cf4e3d4f45cd5b537de91dd1d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 23 Apr 2010 11:12:36 -0400 Subject: tracing: Combine event filter_active and enable into single flags field The filter_active and enable both use an int (4 bytes each) to set a single flag. We can save 4 bytes per event by combining the two into a single integer. text data bss dec hex filename 4913961 1088356 861512 6863829 68bbd5 vmlinux.orig 4894944 1018052 861512 6774508 675eec vmlinux.id 4894871 1012292 861512 6768675 674823 vmlinux.flags This gives us another 5K in savings. The modification of both the enable and filter fields are done under the event_mutex, so it is still safe to combine the two. Note: Although Mathieu gave his Acked-by, he would like it documented that the reads of flags are not protected by the mutex. The way the code works, these reads will not break anything, but will have a residual effect. Since this behavior is the same even before this patch, describing this situation is left to another patch, as this patch does not change the behavior, but just brought it to Mathieu's attention. v2: Updated the event trace self test to for this change. Acked-by: Mathieu Desnoyers Acked-by: Masami Hiramatsu Acked-by: Frederic Weisbecker Cc: Tom Zanussi Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 0be028527633..5ac97a42950d 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -143,6 +143,16 @@ struct ftrace_event_class { int (*raw_init)(struct ftrace_event_call *); }; +enum { + TRACE_EVENT_FL_ENABLED_BIT, + TRACE_EVENT_FL_FILTERED_BIT, +}; + +enum { + TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT), + TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), +}; + struct ftrace_event_call { struct list_head list; struct ftrace_event_class *class; @@ -154,8 +164,15 @@ struct ftrace_event_call { void *mod; void *data; - int enabled; - int filter_active; + /* + * 32 bit flags: + * bit 1: enabled + * bit 2: filter_active + * + * Must hold event_mutex to change. + */ + unsigned int flags; + int perf_refcount; }; -- cgit v1.2.3 From 1eaa4787a774c4896518c81f24e8bccaa2244924 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 14 May 2010 10:19:13 -0400 Subject: tracing: Comment the use of event_mutex with trace event flags The flags variable is protected by the event_mutex when modifying, but the event_mutex is not held when reading the variable. This is due to the fact that the reads occur in critical sections where taking a mutex (or even a spinlock) is not wanted. But the two flags that exist (enable and filter_active) have the code written as such to handle the reads to not need a lock. The enable flag is used just to know if the event is enabled or not and its use is always under the event_mutex. Whether or not the event is actually enabled is really determined by the tracepoint being registered. The flag is just a way to let the code know if the tracepoint is registered. The filter_active is different. It is read without the lock. If it is set, then the event probes jump to the filter code. There can be a slight mismatch between filters available and filter_active. If the flag is set but no filters are available, the code safely jumps to a filter nop. If the flag is not set and the filters are available, then the filters are skipped. This is acceptable since filters are usually set before tracing or they are set by humans, which would not notice the slight delay that this causes. v2: Fixed typo: "cacheing" -> "caching" Reported-by: Mathieu Desnoyers Acked-by: Mathieu Desnoyers Cc: Tom Zanussi Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 5ac97a42950d..dc7fc646fa2e 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -169,7 +169,14 @@ struct ftrace_event_call { * bit 1: enabled * bit 2: filter_active * - * Must hold event_mutex to change. + * Changes to flags must hold the event_mutex. + * + * Note: Reads of flags do not hold the event_mutex since + * they occur in critical sections. But the way flags + * is currently used, these changes do no affect the code + * except that when a change is made, it may have a slight + * delay in propagating the changes to other CPUs due to + * caching and such. */ unsigned int flags; -- cgit v1.2.3