From ae63b31e4d0e2ec09c569306ea46f664508ef717 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 3 May 2012 23:09:03 -0400
Subject: tracing: Separate out trace events from global variables

The trace events for ftrace are all defined via global variables.
The arrays of events and event systems are linked to a global list.
This prevents multiple users of the event system (what to enable and
what not to).

By adding descriptors to represent the event/file relation, as well
as to which trace_array descriptor they are associated with, allows
for more than one set of events to be defined. Once the trace events
files have a link between the trace event and the trace_array they
are associated with, we can create multiple trace_arrays that can
record separate events in separate buffers.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 51 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 13a54d0bdfa8..c7191d482f98 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -182,18 +182,20 @@ extern int ftrace_event_reg(struct ftrace_event_call *event,
 			    enum trace_reg type, void *data);
 
 enum {
-	TRACE_EVENT_FL_ENABLED_BIT,
 	TRACE_EVENT_FL_FILTERED_BIT,
-	TRACE_EVENT_FL_RECORDED_CMD_BIT,
 	TRACE_EVENT_FL_CAP_ANY_BIT,
 	TRACE_EVENT_FL_NO_SET_FILTER_BIT,
 	TRACE_EVENT_FL_IGNORE_ENABLE_BIT,
 };
 
+/*
+ * Event flags:
+ *  FILTERED	  - The event has a filter attached
+ *  CAP_ANY	  - Any user can enable for perf
+ *  NO_SET_FILTER - Set when filter has error and is to be ignored
+ */
 enum {
-	TRACE_EVENT_FL_ENABLED		= (1 << TRACE_EVENT_FL_ENABLED_BIT),
 	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
-	TRACE_EVENT_FL_RECORDED_CMD	= (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT),
 	TRACE_EVENT_FL_CAP_ANY		= (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
 	TRACE_EVENT_FL_NO_SET_FILTER	= (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
 	TRACE_EVENT_FL_IGNORE_ENABLE	= (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT),
@@ -203,12 +205,44 @@ struct ftrace_event_call {
 	struct list_head	list;
 	struct ftrace_event_class *class;
 	char			*name;
-	struct dentry		*dir;
 	struct trace_event	event;
 	const char		*print_fmt;
 	struct event_filter	*filter;
+	struct list_head	*files;
 	void			*mod;
 	void			*data;
+	int			flags; /* static flags of different events */
+
+#ifdef CONFIG_PERF_EVENTS
+	int				perf_refcount;
+	struct hlist_head __percpu	*perf_events;
+#endif
+};
+
+struct trace_array;
+struct ftrace_subsystem_dir;
+
+enum {
+	FTRACE_EVENT_FL_ENABLED_BIT,
+	FTRACE_EVENT_FL_RECORDED_CMD_BIT,
+};
+
+/*
+ * Ftrace event file flags:
+ *  ENABELD	  - The event is enabled
+ *  RECORDED_CMD  - The comms should be recorded at sched_switch
+ */
+enum {
+	FTRACE_EVENT_FL_ENABLED		= (1 << FTRACE_EVENT_FL_ENABLED_BIT),
+	FTRACE_EVENT_FL_RECORDED_CMD	= (1 << FTRACE_EVENT_FL_RECORDED_CMD_BIT),
+};
+
+struct ftrace_event_file {
+	struct list_head		list;
+	struct ftrace_event_call	*event_call;
+	struct dentry			*dir;
+	struct trace_array		*tr;
+	struct ftrace_subsystem_dir	*system;
 
 	/*
 	 * 32 bit flags:
@@ -223,17 +257,12 @@ struct ftrace_event_call {
 	 *
 	 * Note: Reads of flags do not hold the event_mutex since
 	 * they occur in critical sections. But the way flags
-	 * is currently used, these changes do no affect the code
+	 * is currently used, these changes do not affect the code
 	 * except that when a change is made, it may have a slight
 	 * delay in propagating the changes to other CPUs due to
 	 * caching and such.
 	 */
 	unsigned int		flags;
-
-#ifdef CONFIG_PERF_EVENTS
-	int				perf_refcount;
-	struct hlist_head __percpu	*perf_events;
-#endif
 };
 
 #define __TRACE_EVENT_FLAGS(name, value)				\
-- 
cgit v1.2.3


From ccb469a198cffac94a7eea0b69f715f06e2ddf15 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 2 Aug 2012 10:32:10 -0400
Subject: tracing: Pass the ftrace_file to the buffer lock reserve code

Pass the struct ftrace_event_file *ftrace_file to the
trace_event_buffer_lock_reserve() (new function that replaces the
trace_current_buffer_lock_reserver()).

The ftrace_file holds a pointer to the trace_array that is in use.
In the case of multiple buffers with different trace_arrays, this
allows different events to be recorded into different buffers.

Also fixed some of the stale comments in include/trace/ftrace.h

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index c7191d482f98..fd28c170c597 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -128,6 +128,13 @@ enum print_line_t {
 void tracing_generic_entry_update(struct trace_entry *entry,
 				  unsigned long flags,
 				  int pc);
+struct ftrace_event_file;
+
+struct ring_buffer_event *
+trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer,
+				struct ftrace_event_file *ftrace_file,
+				int type, unsigned long len,
+				unsigned long flags, int pc);
 struct ring_buffer_event *
 trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer,
 				  int type, unsigned long len,
-- 
cgit v1.2.3


From 15693458c4bc0693fd63a50d60f35b628fcf4e29 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 28 Feb 2013 19:59:17 -0500
Subject: tracing/ring-buffer: Move poll wake ups into ring buffer code

Move the logic to wake up on ring buffer data into the ring buffer
code itself. This simplifies the tracing code a lot and also has the
added benefit that waiters on one of the instance buffers can be woken
only when data is added to that instance instead of data added to
any instance.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 1342e69542f3..d69cf637a15a 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -4,6 +4,7 @@
 #include <linux/kmemcheck.h>
 #include <linux/mm.h>
 #include <linux/seq_file.h>
+#include <linux/poll.h>
 
 struct ring_buffer;
 struct ring_buffer_iter;
@@ -96,6 +97,11 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
 	__ring_buffer_alloc((size), (flags), &__key);	\
 })
 
+void ring_buffer_wait(struct ring_buffer *buffer, int cpu);
+int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
+			  struct file *filp, poll_table *poll_table);
+
+
 #define RING_BUFFER_ALL_CPUS -1
 
 void ring_buffer_free(struct ring_buffer *buffer);
-- 
cgit v1.2.3


From f71130de5c7fba92faf3901784714e37a234c08f Mon Sep 17 00:00:00 2001
From: Li Zefan <lizefan@huawei.com>
Date: Thu, 21 Feb 2013 10:32:38 +0800
Subject: tracing: Add a helper function for event print functions

Move duplicate code in event print functions to a helper function.

This shrinks the size of the kernel by ~13K.

   text    data     bss     dec     hex filename
6596137 1743966 10138672        18478775        119f6b7 vmlinux.o.old
6583002 1743849 10138672        18465523        119c2f3 vmlinux.o.new

Link: http://lkml.kernel.org/r/51258746.2060304@huawei.com

Signed-off-by: Li Zefan <lizefan@huawei.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index fd28c170c597..4d79d2dc189c 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -38,6 +38,12 @@ const char *ftrace_print_symbols_seq_u64(struct trace_seq *p,
 const char *ftrace_print_hex_seq(struct trace_seq *p,
 				 const unsigned char *buf, int len);
 
+struct trace_iterator;
+struct trace_event;
+
+int ftrace_raw_output_prep(struct trace_iterator *iter,
+			   struct trace_event *event);
+
 /*
  * The trace entry - the most basic unit of tracing. This is what
  * is printed in the end as a single line in the trace output, such as:
@@ -95,8 +101,6 @@ enum trace_iter_flags {
 };
 
 
-struct trace_event;
-
 typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
 				      int flags, struct trace_event *event);
 
-- 
cgit v1.2.3


From 2a30c11f6a037e2475f3c651bc57e697e79fa963 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <srostedt@redhat.com>
Date: Mon, 4 Mar 2013 22:27:04 -0500
Subject: tracing: Add comment for trace event flag IGNORE_ENABLE

All the trace event flags have comments but the IGNORE_ENABLE flag
which is set for ftrace internal events that should not be enabled
via the debugfs "enable" file. That is, if the top level enable file
is set, it will enable all events. It use to just check the ftrace
event call descriptor "reg" field and skip those whithout it, but now
some ftrace internal events have a reg field but still need to be
skipped. The flag was created to ignore those events.

Now document it.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 4d79d2dc189c..0b0814d90164 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -204,6 +204,7 @@ enum {
  *  FILTERED	  - The event has a filter attached
  *  CAP_ANY	  - Any user can enable for perf
  *  NO_SET_FILTER - Set when filter has error and is to be ignored
+ *  IGNORE_ENABLE - For ftrace internal events, do not enable with debugfs file
  */
 enum {
 	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
-- 
cgit v1.2.3


From 575380da8b46969a2c6a7e14a51056a63b30fe2e Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <srostedt@redhat.com>
Date: Mon, 4 Mar 2013 23:05:12 -0500
Subject: tracing: Only clear trace buffer on module unload if event was traced

Currently, when a module with events is unloaded, the trace buffer is
cleared. This is just a safety net in case the module might have some
strange callback when its event is outputted. But there's no reason
to reset the buffer if the module didn't have any of its events traced.

Add a flag to the event "call" structure called WAS_ENABLED and gets set
when the event is ever enabled, and this flag never gets cleared. When a
module gets unloaded, if any of its events have this flag set, then the
trace buffer will get cleared.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 0b0814d90164..d6964244e567 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -197,6 +197,7 @@ enum {
 	TRACE_EVENT_FL_CAP_ANY_BIT,
 	TRACE_EVENT_FL_NO_SET_FILTER_BIT,
 	TRACE_EVENT_FL_IGNORE_ENABLE_BIT,
+	TRACE_EVENT_FL_WAS_ENABLED_BIT,
 };
 
 /*
@@ -205,12 +206,16 @@ enum {
  *  CAP_ANY	  - Any user can enable for perf
  *  NO_SET_FILTER - Set when filter has error and is to be ignored
  *  IGNORE_ENABLE - For ftrace internal events, do not enable with debugfs file
+ *  WAS_ENABLED   - Set and stays set when an event was ever enabled
+ *                    (used for module unloading, if a module event is enabled,
+ *                     it is best to clear the buffers that used it).
  */
 enum {
 	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
 	TRACE_EVENT_FL_CAP_ANY		= (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
 	TRACE_EVENT_FL_NO_SET_FILTER	= (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
 	TRACE_EVENT_FL_IGNORE_ENABLE	= (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT),
+	TRACE_EVENT_FL_WAS_ENABLED	= (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT),
 };
 
 struct ftrace_event_call {
-- 
cgit v1.2.3


From 12883efb670c28dff57dcd7f4f995a1ffe153b2d Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <srostedt@redhat.com>
Date: Tue, 5 Mar 2013 09:24:35 -0500
Subject: tracing: Consolidate max_tr into main trace_array structure

Currently, the way the latency tracers and snapshot feature works
is to have a separate trace_array called "max_tr" that holds the
snapshot buffer. For latency tracers, this snapshot buffer is used
to swap the running buffer with this buffer to save the current max
latency.

The only items needed for the max_tr is really just a copy of the buffer
itself, the per_cpu data pointers, the time_start timestamp that states
when the max latency was triggered, and the cpu that the max latency
was triggered on. All other fields in trace_array are unused by the
max_tr, making the max_tr mostly bloat.

This change removes the max_tr completely, and adds a new structure
called trace_buffer, that holds the buffer pointer, the per_cpu data
pointers, the time_start timestamp, and the cpu where the latency occurred.

The trace_array, now has two trace_buffers, one for the normal trace and
one for the max trace or snapshot. By doing this, not only do we remove
the bloat from the max_trace but the instances of traces can now use
their own snapshot feature and not have just the top level global_trace have
the snapshot feature and latency tracers for itself.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index d6964244e567..d84c4a575514 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -8,6 +8,7 @@
 #include <linux/perf_event.h>
 
 struct trace_array;
+struct trace_buffer;
 struct tracer;
 struct dentry;
 
@@ -67,6 +68,7 @@ struct trace_entry {
 struct trace_iterator {
 	struct trace_array	*tr;
 	struct tracer		*trace;
+	struct trace_buffer	*trace_buffer;
 	void			*private;
 	int			cpu_file;
 	struct mutex		mutex;
-- 
cgit v1.2.3


From ad909e21bbe69f1d39055d346540abd827190eca Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <srostedt@redhat.com>
Date: Wed, 6 Mar 2013 21:45:37 -0500
Subject: tracing: Add internal tracing_snapshot() functions

The new snapshot feature is quite handy. It's a way for the user
to take advantage of the spare buffer that, until then, only
the latency tracers used to "snapshot" the buffer when it hit
a max latency. Now users can trigger a "snapshot" manually when
some condition is hit in a program. But a snapshot currently can
not be triggered by a condition inside the kernel.

With the addition of tracing_snapshot() and tracing_snapshot_alloc(),
snapshots can now be taking when a condition is hit, and the
developer wants to snapshot the case without stopping the trace.

Note, any snapshot will overwrite the old one, so take care
in how this is done.

These new functions are to be used like tracing_on(), tracing_off()
and trace_printk() are. That is, they should never be called
in the mainline Linux kernel. They are solely for the purpose
of debugging.

The tracing_snapshot() will not allocate a buffer, but it is
safe to be called from any context (except NMIs). But if a
snapshot buffer isn't allocated when it is called, it will write
to the live buffer, complaining about the lack of a snapshot
buffer, and then stop tracing (giving you the "permanent snapshot").

tracing_snapshot_alloc() will allocate the snapshot buffer if
it was not already allocated and then take the snapshot. This routine
*may sleep*, and must be called from context that can sleep.
The allocation is done with GFP_KERNEL and not atomic.

If you need a snapshot in an atomic context, say in early boot,
then it is best to call the tracing_snapshot_alloc() before then,
where it will allocate the buffer, and then you can use the
tracing_snapshot() anywhere you want and still get snapshots.

Cc: Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/kernel.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index c566927efcbd..bc5392a326ab 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -483,6 +483,8 @@ enum ftrace_dump_mode {
 void tracing_on(void);
 void tracing_off(void);
 int tracing_is_on(void);
+void tracing_snapshot(void);
+void tracing_snapshot_alloc(void);
 
 extern void tracing_start(void);
 extern void tracing_stop(void);
@@ -570,6 +572,8 @@ static inline void trace_dump_stack(void) { }
 static inline void tracing_on(void) { }
 static inline void tracing_off(void) { }
 static inline int tracing_is_on(void) { return 0; }
+static inline void tracing_snapshot(void) { }
+static inline void tracing_snapshot_alloc(void) { }
 
 static inline __printf(1, 2)
 int trace_printk(const char *fmt, ...)
-- 
cgit v1.2.3


From 09ae72348eccb60e304cf8ce94653f4a78fcd407 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 8 Mar 2013 21:02:34 -0500
Subject: tracing: Add trace_puts() for even faster trace_printk() tracing

The trace_printk() is extremely fast and is very handy as it can be
used in any context (including NMIs!). But it still requires scanning
the fmt string for parsing the args. Even the trace_bprintk() requires
a scan to know what args will be saved, although it doesn't copy the
format string itself.

Several times trace_printk() has no args, and wastes cpu cycles scanning
the fmt string.

Adding trace_puts() allows the developer to use an even faster
tracing method that only saves the pointer to the string in the
ring buffer without doing any format parsing at all. This will
help remove even more of the "Heisenbug" effect, when debugging.

Also fixed up the F_printk()s for the ftrace internal bprint and print events.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/kernel.h | 41 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index bc5392a326ab..a3a5574a61fc 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -514,7 +514,8 @@ do {									\
  *
  * This is intended as a debugging tool for the developer only.
  * Please refrain from leaving trace_printks scattered around in
- * your code.
+ * your code. (Extra memory is used for special buffers that are
+ * allocated when trace_printk() is used)
  */
 
 #define trace_printk(fmt, args...)					\
@@ -537,6 +538,44 @@ int __trace_bprintk(unsigned long ip, const char *fmt, ...);
 extern __printf(2, 3)
 int __trace_printk(unsigned long ip, const char *fmt, ...);
 
+/**
+ * trace_puts - write a string into the ftrace buffer
+ * @str: the string to record
+ *
+ * Note: __trace_bputs is an internal function for trace_puts and
+ *       the @ip is passed in via the trace_puts macro.
+ *
+ * This is similar to trace_printk() but is made for those really fast
+ * paths that a developer wants the least amount of "Heisenbug" affects,
+ * where the processing of the print format is still too much.
+ *
+ * This function allows a kernel developer to debug fast path sections
+ * that printk is not appropriate for. By scattering in various
+ * printk like tracing in the code, a developer can quickly see
+ * where problems are occurring.
+ *
+ * This is intended as a debugging tool for the developer only.
+ * Please refrain from leaving trace_puts scattered around in
+ * your code. (Extra memory is used for special buffers that are
+ * allocated when trace_puts() is used)
+ *
+ * Returns: 0 if nothing was written, positive # if string was.
+ *  (1 when __trace_bputs is used, strlen(str) when __trace_puts is used)
+ */
+
+extern int __trace_bputs(unsigned long ip, const char *str);
+extern int __trace_puts(unsigned long ip, const char *str, int size);
+#define trace_puts(str) ({						\
+	static const char *trace_printk_fmt				\
+		__attribute__((section("__trace_printk_fmt"))) =	\
+		__builtin_constant_p(str) ? str : NULL;			\
+									\
+	if (__builtin_constant_p(str))					\
+		__trace_bputs(_THIS_IP_, trace_printk_fmt);		\
+	else								\
+		__trace_puts(_THIS_IP_, str, strlen(str));		\
+})
+
 extern void trace_dump_stack(void);
 
 /*
-- 
cgit v1.2.3


From 9d3c752c062e3266f1051ba0825276ea1e2777da Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 8 Mar 2013 22:11:57 -0500
Subject: tracing: Optimize trace_printk() with one arg to use trace_puts()

Although trace_printk() is extremely fast, especially when it uses
trace_bprintk() (writes args straight to buffer instead of inserting
into string), it still has the overhead of calling one of the printf
sprintf() functions, that need to scan the fmt string to determine
what, if any args it has.

This is a waste of precious CPU cycles if the printk format has no
args but a single constant string. It is better to use trace_puts()
which does not have the overhead of the fmt scanning.

But wouldn't it be nice if the developer didn't have to think about
such things, and the compile would just do it for them?

  trace_printk("this string has no args\n");
  [...]
  trace_printk("this sting does %p %d\n", foo, bar);

As tracing is critical to have the least amount of overhead,
especially when dealing with race conditions, and you want to
eliminate any "Heisenbugs", you want the trace_printk() to use the
fastest possible means of tracing.

Currently the macro magic determines if it will use trace_bprintk()
or if the fmt is a dynamic string (a variable), it will fall
back to the slow trace_printk() method that does a full snprintf()
before copying it into the buffer, where as trace_bprintk() only
copys the pointer to the fmt and the args into the buffer.

Well, now there's a way to spend some more Hogwarts cash and come
up with new fancy macro magic.

  #define trace_printk(fmt, ...)			\
  do {							\
	char _______STR[] = __stringify((__VA_ARGS__));	\
	if (sizeof(_______STR) > 3)			\
		do_trace_printk(fmt, ##__VA_ARGS__);	\
	else						\
		trace_puts(fmt);			\
  } while (0)

The above needs a bit of explaining (both here and in the comments).

By stringifying the __VA_ARGS__, we can, at compile time, determine
the number of args that are being passed to trace_printk(). The extra
parenthesis are required, otherwise the compiler complains about
too many parameters for __stringify if there is more than one arg.

When there are no args, the __stringify((__VA_ARGS__)) converts into
"()\0", a string of 3 characters. Anything else, will be a string
containing more than 3 characters. Now we assign that string to a
dynamic char array, and then take the sizeof() of that array.
If it is greater than 3 characters, we know trace_printk() has args
and we need to do the full "do_trace_printk()" on them, otherwise
it was only passed a single arg and we can optimize to use trace_puts().

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven "The King of Nasty Macros!" Rostedt <rostedt@goodmis.org>
---
 include/linux/kernel.h | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index a3a5574a61fc..d0a16fe03fef 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -516,9 +516,30 @@ do {									\
  * Please refrain from leaving trace_printks scattered around in
  * your code. (Extra memory is used for special buffers that are
  * allocated when trace_printk() is used)
+ *
+ * A little optization trick is done here. If there's only one
+ * argument, there's no need to scan the string for printf formats.
+ * The trace_puts() will suffice. But how can we take advantage of
+ * using trace_puts() when trace_printk() has only one argument?
+ * By stringifying the args and checking the size we can tell
+ * whether or not there are args. __stringify((__VA_ARGS__)) will
+ * turn into "()\0" with a size of 3 when there are no args, anything
+ * else will be bigger. All we need to do is define a string to this,
+ * and then take its size and compare to 3. If it's bigger, use
+ * do_trace_printk() otherwise, optimize it to trace_puts(). Then just
+ * let gcc optimize the rest.
  */
 
-#define trace_printk(fmt, args...)					\
+#define trace_printk(fmt, ...)				\
+do {							\
+	char _______STR[] = __stringify((__VA_ARGS__));	\
+	if (sizeof(_______STR) > 3)			\
+		do_trace_printk(fmt, ##__VA_ARGS__);	\
+	else						\
+		trace_puts(fmt);			\
+} while (0)
+
+#define do_trace_printk(fmt, args...)					\
 do {									\
 	static const char *trace_printk_fmt				\
 		__attribute__((section("__trace_printk_fmt"))) =	\
-- 
cgit v1.2.3


From 57d01ad09721fb7719c4c8c72b434398186f35a0 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 12 Mar 2013 12:38:06 -0400
Subject: tracing: Fix comments for ftrace_event_file/call flags

Most of the flags for the struct ftrace_event_file were moved over
to the flags of the struct ftrace_event_call, but the comments were
never updated.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index d84c4a575514..4cb6cd8338a4 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -230,6 +230,13 @@ struct ftrace_event_call {
 	struct list_head	*files;
 	void			*mod;
 	void			*data;
+	/*
+	 *   bit 0:		filter_active
+	 *   bit 1:		allow trace by non root (cap any)
+	 *   bit 2:		failed to apply filter
+	 *   bit 3:		ftrace internal event (do not enable)
+	 *   bit 4:		Event was enabled by module
+	 */
 	int			flags; /* static flags of different events */
 
 #ifdef CONFIG_PERF_EVENTS
@@ -248,7 +255,7 @@ enum {
 
 /*
  * Ftrace event file flags:
- *  ENABELD	  - The event is enabled
+ *  ENABLED	  - The event is enabled
  *  RECORDED_CMD  - The comms should be recorded at sched_switch
  */
 enum {
@@ -265,12 +272,8 @@ struct ftrace_event_file {
 
 	/*
 	 * 32 bit flags:
-	 *   bit 1:		enabled
-	 *   bit 2:		filter_active
-	 *   bit 3:		enabled cmd record
-	 *   bit 4:		allow trace by non root (cap any)
-	 *   bit 5:		failed to apply filter
-	 *   bit 6:		ftrace internal event (do not enable)
+	 *   bit 0:		enabled
+	 *   bit 1:		enabled cmd record
 	 *
 	 * Changes to flags must hold the event_mutex.
 	 *
-- 
cgit v1.2.3


From e67efb93f0e9130174293ffaa5975f87b301b531 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 12 Mar 2013 15:07:59 -0400
Subject: ftrace: Clean up function probe methods

When a function probe is created, each function that the probe is
attached to, a "callback" method is called. On release of the probe,
each function entry calls the "free" method.

First, "callback" is a confusing name and does not really match what
it does. Callback sounds like it will be called when the probe
triggers. But that's not the case. This is really an "init" function,
so lets rename it as such.

Secondly, both "init" and "free" do not pass enough information back
to the handlers. Pass back the ops, ip and data for each time the
method is called. We have the information, might as well use it.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index e5ca8ef50e9b..832422d706f4 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -259,8 +259,10 @@ struct ftrace_probe_ops {
 	void			(*func)(unsigned long ip,
 					unsigned long parent_ip,
 					void **data);
-	int			(*callback)(unsigned long ip, void **data);
-	void			(*free)(void **data);
+	int			(*init)(struct ftrace_probe_ops *ops,
+					unsigned long ip, void **data);
+	void			(*free)(struct ftrace_probe_ops *ops,
+					unsigned long ip, void **data);
 	int			(*print)(struct seq_file *m,
 					 unsigned long ip,
 					 struct ftrace_probe_ops *ops,
-- 
cgit v1.2.3


From 417944c4c7a0f657158d0515f3b8e8c043fd788f Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 12 Mar 2013 13:26:18 -0400
Subject: tracing: Add a way to soft disable trace events

In order to let triggers enable or disable events, we need a 'soft'
method for doing so. For example, if a function probe is added that
lets a user enable or disable events when a function is called, that
change must be done without taking locks or a mutex, and definitely
it can't sleep. But the full enabling of a tracepoint is expensive.

By adding a 'SOFT_DISABLE' flag, and converting the flags to be updated
without the protection of a mutex (using set/clear_bit()), this soft
disable flag can be used to allow critical sections to enable or disable
events from being traced (after the event has been placed into "SOFT_MODE").

Some caveats though: The comm recorder (to map pids with a comm) can not
be soft disabled (yet). If you disable an event with with a "soft"
disable and wait a while before reading the trace, the comm cache may be
replaced and you'll get a bunch of <...> for comms in the trace.

Reading the "enable" file for an event that is disabled will now give
you "0*" where the '*' denotes that the tracepoint is still active but
the event itself is "disabled".

[ fixed _BIT used in & operation : thanks to Dan Carpenter and smatch ]

Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 4cb6cd8338a4..4e28b011e63b 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -251,16 +251,23 @@ struct ftrace_subsystem_dir;
 enum {
 	FTRACE_EVENT_FL_ENABLED_BIT,
 	FTRACE_EVENT_FL_RECORDED_CMD_BIT,
+	FTRACE_EVENT_FL_SOFT_MODE_BIT,
+	FTRACE_EVENT_FL_SOFT_DISABLED_BIT,
 };
 
 /*
  * Ftrace event file flags:
  *  ENABLED	  - The event is enabled
  *  RECORDED_CMD  - The comms should be recorded at sched_switch
+ *  SOFT_MODE     - The event is enabled/disabled by SOFT_DISABLED
+ *  SOFT_DISABLED - When set, do not trace the event (even though its
+ *                   tracepoint may be enabled)
  */
 enum {
 	FTRACE_EVENT_FL_ENABLED		= (1 << FTRACE_EVENT_FL_ENABLED_BIT),
 	FTRACE_EVENT_FL_RECORDED_CMD	= (1 << FTRACE_EVENT_FL_RECORDED_CMD_BIT),
+	FTRACE_EVENT_FL_SOFT_MODE	= (1 << FTRACE_EVENT_FL_SOFT_MODE_BIT),
+	FTRACE_EVENT_FL_SOFT_DISABLED	= (1 << FTRACE_EVENT_FL_SOFT_DISABLED_BIT),
 };
 
 struct ftrace_event_file {
@@ -274,17 +281,18 @@ struct ftrace_event_file {
 	 * 32 bit flags:
 	 *   bit 0:		enabled
 	 *   bit 1:		enabled cmd record
+	 *   bit 2:		enable/disable with the soft disable bit
+	 *   bit 3:		soft disabled
 	 *
-	 * Changes to flags must hold the event_mutex.
-	 *
-	 * Note: Reads of flags do not hold the event_mutex since
-	 * they occur in critical sections. But the way flags
+	 * Note: The bits must be set atomically to prevent races
+	 * from other writers. Reads of flags do not need to be in
+	 * sync as they occur in critical sections. But the way flags
 	 * is currently used, these changes do not affect the code
 	 * except that when a change is made, it may have a slight
 	 * delay in propagating the changes to other CPUs due to
-	 * caching and such.
+	 * caching and such. Which is mostly OK ;-)
 	 */
-	unsigned int		flags;
+	unsigned long		flags;
 };
 
 #define __TRACE_EVENT_FLAGS(name, value)				\
-- 
cgit v1.2.3


From c142be8ebe0b7bf73c8a0063925623f3e4b980c0 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 13 Mar 2013 09:55:57 -0400
Subject: tracing: Add skip argument to trace_dump_stack()

Altough the trace_dump_stack() already skips three functions in
the call to stack trace, which gets the stack trace to start
at the caller of the function, the caller may want to skip some
more too (as it may have helper functions).

Add a skip argument to the trace_dump_stack() that lets the caller
skip back tracing functions that it doesn't care about.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/kernel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d0a16fe03fef..239dbb9627ca 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -597,7 +597,7 @@ extern int __trace_puts(unsigned long ip, const char *str, int size);
 		__trace_puts(_THIS_IP_, str, strlen(str));		\
 })
 
-extern void trace_dump_stack(void);
+extern void trace_dump_stack(int skip);
 
 /*
  * The double __builtin_constant_p is because gcc will give us an error
-- 
cgit v1.2.3


From 8aacf017b065a805d27467843490c976835eb4a5 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 14 Mar 2013 13:13:45 -0400
Subject: tracing: Add "uptime" trace clock that uses jiffies

Add a simple trace clock called "uptime" for those that are
interested in the uptime of the trace. It uses jiffies as that's
the safest method, as other uptime clocks grab seq locks, which could
cause a deadlock if taken from an event or function tracer.

Requested-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_clock.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/trace_clock.h b/include/linux/trace_clock.h
index d563f37e1a1d..1d7ca2739272 100644
--- a/include/linux/trace_clock.h
+++ b/include/linux/trace_clock.h
@@ -16,6 +16,7 @@
 
 extern u64 notrace trace_clock_local(void);
 extern u64 notrace trace_clock(void);
+extern u64 notrace trace_clock_jiffies(void);
 extern u64 notrace trace_clock_global(void);
 extern u64 notrace trace_clock_counter(void);
 
-- 
cgit v1.2.3


From d2802d0739dcc61af5e5ea00773ce7ddead4e9c2 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 19 Apr 2013 17:10:27 -0400
Subject: tracing: Compare to 1 instead of zero for is_signed_type()

The formats of the trace events show if the type of a event field
is signed or not via a macro called is_signed_type(). This does
a trick with the type and compares a -1 to zero after typecasting
to the tested type. If it returns true, it's signed, otherwise
its not. But this unfortunately triggers a warning by gcc:

  warning: comparison of unsigned expression < 0 is always false

As we know it is always false (that's why we do it), this is a
false warning. Luckily for us, the comparison works with a 1 as
well, without giving the warning.

Convert the check to compare (type)-1 < (type)0 to (type)-1 < (type)1
to determine if the type is signed or not.

Link: http://lkml.kernel.org/r/CAErSpo4YXcY9fuOKWYGDkddJwk68kmZTohsmVB6QvrhjboOh1Q@mail.gmail.com

Reported-by: Bjorn Helgaas <bhelgaas@google.com>
Reported-by: Gary Hade <garyhade@us.ibm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 4e28b011e63b..34e00fb49bec 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -333,7 +333,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type,
 extern int trace_add_event_call(struct ftrace_event_call *call);
 extern void trace_remove_event_call(struct ftrace_event_call *call);
 
-#define is_signed_type(type)	(((type)(-1)) < (type)0)
+#define is_signed_type(type)	(((type)(-1)) < (type)1)
 
 int trace_set_clr_event(const char *system, const char *event, int set);
 
-- 
cgit v1.2.3