summaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig125
-rw-r--r--kernel/trace/Makefile4
-rw-r--r--kernel/trace/ftrace.c111
-rw-r--r--kernel/trace/ring_buffer.c28
-rw-r--r--kernel/trace/trace.c151
-rw-r--r--kernel/trace/trace.h6
-rw-r--r--kernel/trace/trace_branch.c19
-rw-r--r--kernel/trace/trace_event_profile.c52
-rw-r--r--kernel/trace/trace_events.c81
-rw-r--r--kernel/trace/trace_events_filter.c33
-rw-r--r--kernel/trace/trace_export.c94
-rw-r--r--kernel/trace/trace_functions_graph.c78
-rw-r--r--kernel/trace/trace_kprobe.c305
-rw-r--r--kernel/trace/trace_ksym.c140
-rw-r--r--kernel/trace/trace_stack.c24
-rw-r--r--kernel/trace/trace_syscalls.c189
16 files changed, 716 insertions, 724 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d006554888dc..13e13d428cd3 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -12,39 +12,37 @@ config NOP_TRACER
config HAVE_FTRACE_NMI_ENTER
bool
help
- See Documentation/trace/ftrace-implementation.txt
+ See Documentation/trace/ftrace-design.txt
config HAVE_FUNCTION_TRACER
bool
help
- See Documentation/trace/ftrace-implementation.txt
+ See Documentation/trace/ftrace-design.txt
config HAVE_FUNCTION_GRAPH_TRACER
bool
help
- See Documentation/trace/ftrace-implementation.txt
+ See Documentation/trace/ftrace-design.txt
config HAVE_FUNCTION_GRAPH_FP_TEST
bool
help
- An arch may pass in a unique value (frame pointer) to both the
- entering and exiting of a function. On exit, the value is compared
- and if it does not match, then it will panic the kernel.
+ See Documentation/trace/ftrace-design.txt
config HAVE_FUNCTION_TRACE_MCOUNT_TEST
bool
help
- See Documentation/trace/ftrace-implementation.txt
+ See Documentation/trace/ftrace-design.txt
config HAVE_DYNAMIC_FTRACE
bool
help
- See Documentation/trace/ftrace-implementation.txt
+ See Documentation/trace/ftrace-design.txt
config HAVE_FTRACE_MCOUNT_RECORD
bool
help
- See Documentation/trace/ftrace-implementation.txt
+ See Documentation/trace/ftrace-design.txt
config HAVE_HW_BRANCH_TRACER
bool
@@ -52,7 +50,7 @@ config HAVE_HW_BRANCH_TRACER
config HAVE_SYSCALL_TRACEPOINTS
bool
help
- See Documentation/trace/ftrace-implementation.txt
+ See Documentation/trace/ftrace-design.txt
config TRACER_MAX_TRACE
bool
@@ -83,7 +81,7 @@ config RING_BUFFER_ALLOW_SWAP
# This allows those options to appear when no other tracer is selected. But the
# options do not appear when something else selects it. We need the two options
# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
-# hidding of the automatic options.
+# hiding of the automatic options.
config TRACING
bool
@@ -119,7 +117,7 @@ menuconfig FTRACE
bool "Tracers"
default y if DEBUG_KERNEL
help
- Enable the kernel tracing infrastructure.
+ Enable the kernel tracing infrastructure.
if FTRACE
@@ -133,7 +131,7 @@ config FUNCTION_TRACER
help
Enable the kernel to trace every kernel function. This is done
by using a compiler feature to insert a small, 5-byte No-Operation
- instruction to the beginning of every kernel function, which NOP
+ instruction at the beginning of every kernel function, which NOP
sequence is then dynamically patched into a tracer call when
tracing is enabled by the administrator. If it's runtime disabled
(the bootup default), then the overhead of the instructions is very
@@ -150,7 +148,7 @@ config FUNCTION_GRAPH_TRACER
and its entry.
Its first purpose is to trace the duration of functions and
draw a call graph for each thread with some information like
- the return value. This is done by setting the current return
+ the return value. This is done by setting the current return
address on the current task structure into a stack of calls.
@@ -173,7 +171,7 @@ config IRQSOFF_TRACER
echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
- (Note that kernel size and overhead increases with this option
+ (Note that kernel size and overhead increase with this option
enabled. This option and the preempt-off timing option can be
used together or separately.)
@@ -186,7 +184,7 @@ config PREEMPT_TRACER
select TRACER_MAX_TRACE
select RING_BUFFER_ALLOW_SWAP
help
- This option measures the time spent in preemption off critical
+ This option measures the time spent in preemption-off critical
sections, with microsecond accuracy.
The default measurement method is a maximum search, which is
@@ -195,7 +193,7 @@ config PREEMPT_TRACER
echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
- (Note that kernel size and overhead increases with this option
+ (Note that kernel size and overhead increase with this option
enabled. This option and the irqs-off timing option can be
used together or separately.)
@@ -222,7 +220,7 @@ config ENABLE_DEFAULT_TRACERS
depends on !GENERIC_TRACER
select TRACING
help
- This tracer hooks to various trace points in the kernel
+ This tracer hooks to various trace points in the kernel,
allowing the user to pick and choose which trace point they
want to trace. It also includes the sched_switch tracer plugin.
@@ -265,19 +263,19 @@ choice
The likely/unlikely profiler only looks at the conditions that
are annotated with a likely or unlikely macro.
- The "all branch" profiler will profile every if statement in the
+ The "all branch" profiler will profile every if-statement in the
kernel. This profiler will also enable the likely/unlikely
- profiler as well.
+ profiler.
- Either of the above profilers add a bit of overhead to the system.
- If unsure choose "No branch profiling".
+ Either of the above profilers adds a bit of overhead to the system.
+ If unsure, choose "No branch profiling".
config BRANCH_PROFILE_NONE
bool "No branch profiling"
help
- No branch profiling. Branch profiling adds a bit of overhead.
- Only enable it if you want to analyse the branching behavior.
- Otherwise keep it disabled.
+ No branch profiling. Branch profiling adds a bit of overhead.
+ Only enable it if you want to analyse the branching behavior.
+ Otherwise keep it disabled.
config PROFILE_ANNOTATED_BRANCHES
bool "Trace likely/unlikely profiler"
@@ -288,7 +286,7 @@ config PROFILE_ANNOTATED_BRANCHES
/sys/kernel/debug/tracing/profile_annotated_branch
- Note: this will add a significant overhead, only turn this
+ Note: this will add a significant overhead; only turn this
on if you need to profile the system's use of these macros.
config PROFILE_ALL_BRANCHES
@@ -305,7 +303,7 @@ config PROFILE_ALL_BRANCHES
This configuration, when enabled, will impose a great overhead
on the system. This should only be enabled when the system
- is to be analyzed
+ is to be analyzed in much detail.
endchoice
config TRACING_BRANCHES
@@ -330,15 +328,6 @@ config BRANCH_TRACER
Say N if unsure.
-config POWER_TRACER
- bool "Trace power consumption behavior"
- depends on X86
- select GENERIC_TRACER
- help
- This tracer helps developers to analyze and optimize the kernels
- power management decisions, specifically the C-state and P-state
- behavior.
-
config KSYM_TRACER
bool "Trace read and write access on kernel memory locations"
depends on HAVE_HW_BREAKPOINT
@@ -391,14 +380,14 @@ config HW_BRANCH_TRACER
select GENERIC_TRACER
help
This tracer records all branches on the system in a circular
- buffer giving access to the last N branches for each cpu.
+ buffer, giving access to the last N branches for each cpu.
config KMEMTRACE
bool "Trace SLAB allocations"
select GENERIC_TRACER
help
kmemtrace provides tracing for slab allocator functions, such as
- kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
+ kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
data is then fed to the userspace application in order to analyse
allocation hotspots, internal fragmentation and so on, making it
possible to see how well an allocator performs, as well as debug
@@ -417,15 +406,15 @@ config WORKQUEUE_TRACER
bool "Trace workqueues"
select GENERIC_TRACER
help
- The workqueue tracer provides some statistical informations
+ The workqueue tracer provides some statistical information
about each cpu workqueue thread such as the number of the
works inserted and executed since their creation. It can help
- to evaluate the amount of work each of them have to perform.
+ to evaluate the amount of work each of them has to perform.
For example it can help a developer to decide whether he should
- choose a per cpu workqueue instead of a singlethreaded one.
+ choose a per-cpu workqueue instead of a singlethreaded one.
config BLK_DEV_IO_TRACE
- bool "Support for tracing block io actions"
+ bool "Support for tracing block IO actions"
depends on SYSFS
depends on BLOCK
select RELAY
@@ -451,20 +440,20 @@ config BLK_DEV_IO_TRACE
config KPROBE_EVENT
depends on KPROBES
- depends on X86
+ depends on HAVE_REGS_AND_STACK_ACCESS_API
bool "Enable kprobes-based dynamic events"
select TRACING
default y
help
- This allows the user to add tracing events (similar to tracepoints) on the fly
- via the ftrace interface. See Documentation/trace/kprobetrace.txt
- for more details.
+ This allows the user to add tracing events (similar to tracepoints)
+ on the fly via the ftrace interface. See
+ Documentation/trace/kprobetrace.txt for more details.
Those events can be inserted wherever kprobes can probe, and record
various register and memory values.
- This option is also required by perf-probe subcommand of perf tools. If
- you want to use perf tools, this option is strongly recommended.
+ This option is also required by perf-probe subcommand of perf tools.
+ If you want to use perf tools, this option is strongly recommended.
config DYNAMIC_FTRACE
bool "enable/disable ftrace tracepoints dynamically"
@@ -472,32 +461,32 @@ config DYNAMIC_FTRACE
depends on HAVE_DYNAMIC_FTRACE
default y
help
- This option will modify all the calls to ftrace dynamically
- (will patch them out of the binary image and replaces them
- with a No-Op instruction) as they are called. A table is
- created to dynamically enable them again.
+ This option will modify all the calls to ftrace dynamically
+ (will patch them out of the binary image and replace them
+ with a No-Op instruction) as they are called. A table is
+ created to dynamically enable them again.
- This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise
- has native performance as long as no tracing is active.
+ This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but
+ otherwise has native performance as long as no tracing is active.
- The changes to the code are done by a kernel thread that
- wakes up once a second and checks to see if any ftrace calls
- were made. If so, it runs stop_machine (stops all CPUS)
- and modifies the code to jump over the call to ftrace.
+ The changes to the code are done by a kernel thread that
+ wakes up once a second and checks to see if any ftrace calls
+ were made. If so, it runs stop_machine (stops all CPUS)
+ and modifies the code to jump over the call to ftrace.
config FUNCTION_PROFILER
bool "Kernel function profiler"
depends on FUNCTION_TRACER
default n
help
- This option enables the kernel function profiler. A file is created
- in debugfs called function_profile_enabled which defaults to zero.
- When a 1 is echoed into this file profiling begins, and when a
- zero is entered, profiling stops. A file in the trace_stats
- directory called functions, that show the list of functions that
- have been hit and their counters.
+ This option enables the kernel function profiler. A file is created
+ in debugfs called function_profile_enabled which defaults to zero.
+ When a 1 is echoed into this file profiling begins, and when a
+ zero is entered, profiling stops. A "functions" file is created in
+ the trace_stats directory; this file shows the list of functions that
+ have been hit and their counters.
- If in doubt, say N
+ If in doubt, say N.
config FTRACE_MCOUNT_RECORD
def_bool y
@@ -556,8 +545,8 @@ config RING_BUFFER_BENCHMARK
tristate "Ring buffer benchmark stress tester"
depends on RING_BUFFER
help
- This option creates a test to stress the ring buffer and bench mark it.
- It creates its own ring buffer such that it will not interfer with
+ This option creates a test to stress the ring buffer and benchmark it.
+ It creates its own ring buffer such that it will not interfere with
any other users of the ring buffer (such as ftrace). It then creates
a producer and consumer that will run for 10 seconds and sleep for
10 seconds. Each interval it will print out the number of events
@@ -566,7 +555,7 @@ config RING_BUFFER_BENCHMARK
It does not disable interrupts or raise its priority, so it may be
affected by processes that are running.
- If unsure, say N
+ If unsure, say N.
endif # FTRACE
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index cd9ecd89ec77..d00c6fe23f54 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -51,7 +51,9 @@ endif
obj-$(CONFIG_EVENT_TRACING) += trace_events.o
obj-$(CONFIG_EVENT_TRACING) += trace_export.o
obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
-obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
+ifeq ($(CONFIG_PERF_EVENTS),y)
+obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o
+endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7968762c8167..83783579378f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -22,7 +22,6 @@
#include <linux/hardirq.h>
#include <linux/kthread.h>
#include <linux/uaccess.h>
-#include <linux/kprobes.h>
#include <linux/ftrace.h>
#include <linux/sysctl.h>
#include <linux/ctype.h>
@@ -898,36 +897,6 @@ static struct dyn_ftrace *ftrace_free_records;
} \
}
-#ifdef CONFIG_KPROBES
-
-static int frozen_record_count;
-
-static inline void freeze_record(struct dyn_ftrace *rec)
-{
- if (!(rec->flags & FTRACE_FL_FROZEN)) {
- rec->flags |= FTRACE_FL_FROZEN;
- frozen_record_count++;
- }
-}
-
-static inline void unfreeze_record(struct dyn_ftrace *rec)
-{
- if (rec->flags & FTRACE_FL_FROZEN) {
- rec->flags &= ~FTRACE_FL_FROZEN;
- frozen_record_count--;
- }
-}
-
-static inline int record_frozen(struct dyn_ftrace *rec)
-{
- return rec->flags & FTRACE_FL_FROZEN;
-}
-#else
-# define freeze_record(rec) ({ 0; })
-# define unfreeze_record(rec) ({ 0; })
-# define record_frozen(rec) ({ 0; })
-#endif /* CONFIG_KPROBES */
-
static void ftrace_free_rec(struct dyn_ftrace *rec)
{
rec->freelist = ftrace_free_records;
@@ -1025,6 +994,21 @@ static void ftrace_bug(int failed, unsigned long ip)
}
+/* Return 1 if the address range is reserved for ftrace */
+int ftrace_text_reserved(void *start, void *end)
+{
+ struct dyn_ftrace *rec;
+ struct ftrace_page *pg;
+
+ do_for_each_ftrace_rec(pg, rec) {
+ if (rec->ip <= (unsigned long)end &&
+ rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start)
+ return 1;
+ } while_for_each_ftrace_rec();
+ return 0;
+}
+
+
static int
__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
{
@@ -1076,14 +1060,6 @@ static void ftrace_replace_code(int enable)
!(rec->flags & FTRACE_FL_CONVERTED))
continue;
- /* ignore updates to this record's mcount site */
- if (get_kprobe((void *)rec->ip)) {
- freeze_record(rec);
- continue;
- } else {
- unfreeze_record(rec);
- }
-
failed = __ftrace_replace_code(rec, enable);
if (failed) {
rec->flags |= FTRACE_FL_FAILED;
@@ -1690,7 +1666,7 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
static int ftrace_match(char *str, char *regex, int len, int type)
{
int matched = 0;
- char *ptr;
+ int slen;
switch (type) {
case MATCH_FULL:
@@ -1706,8 +1682,8 @@ static int ftrace_match(char *str, char *regex, int len, int type)
matched = 1;
break;
case MATCH_END_ONLY:
- ptr = strstr(str, regex);
- if (ptr && (ptr[len] == 0))
+ slen = strlen(str);
+ if (slen >= len && memcmp(str + slen - len, regex, len) == 0)
matched = 1;
break;
}
@@ -2426,6 +2402,7 @@ static const struct file_operations ftrace_notrace_fops = {
static DEFINE_MUTEX(graph_lock);
int ftrace_graph_count;
+int ftrace_graph_filter_enabled;
unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
static void *
@@ -2448,7 +2425,7 @@ static void *g_start(struct seq_file *m, loff_t *pos)
mutex_lock(&graph_lock);
/* Nothing, tell g_show to print all functions are enabled */
- if (!ftrace_graph_count && !*pos)
+ if (!ftrace_graph_filter_enabled && !*pos)
return (void *)1;
return __g_next(m, pos);
@@ -2494,6 +2471,7 @@ ftrace_graph_open(struct inode *inode, struct file *file)
mutex_lock(&graph_lock);
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC)) {
+ ftrace_graph_filter_enabled = 0;
ftrace_graph_count = 0;
memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
}
@@ -2519,7 +2497,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
struct dyn_ftrace *rec;
struct ftrace_page *pg;
int search_len;
- int found = 0;
+ int fail = 1;
int type, not;
char *search;
bool exists;
@@ -2530,37 +2508,51 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
/* decode regex */
type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
- if (not)
- return -EINVAL;
+ if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS)
+ return -EBUSY;
search_len = strlen(search);
mutex_lock(&ftrace_lock);
do_for_each_ftrace_rec(pg, rec) {
- if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
- break;
-
if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
continue;
if (ftrace_match_record(rec, search, search_len, type)) {
- /* ensure it is not already in the array */
+ /* if it is in the array */
exists = false;
- for (i = 0; i < *idx; i++)
+ for (i = 0; i < *idx; i++) {
if (array[i] == rec->ip) {
exists = true;
break;
}
- if (!exists)
- array[(*idx)++] = rec->ip;
- found = 1;
+ }
+
+ if (!not) {
+ fail = 0;
+ if (!exists) {
+ array[(*idx)++] = rec->ip;
+ if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
+ goto out;
+ }
+ } else {
+ if (exists) {
+ array[i] = array[--(*idx)];
+ array[*idx] = 0;
+ fail = 0;
+ }
+ }
}
} while_for_each_ftrace_rec();
-
+out:
mutex_unlock(&ftrace_lock);
- return found ? 0 : -EINVAL;
+ if (fail)
+ return -EINVAL;
+
+ ftrace_graph_filter_enabled = 1;
+ return 0;
}
static ssize_t
@@ -2570,16 +2562,11 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
struct trace_parser parser;
ssize_t read, ret;
- if (!cnt || cnt < 0)
+ if (!cnt)
return 0;
mutex_lock(&graph_lock);
- if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) {
- ret = -EBUSY;
- goto out_unlock;
- }
-
if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
ret = -ENOMEM;
goto out_unlock;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2326b04c95c4..8c1b2d290718 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -464,6 +464,8 @@ struct ring_buffer_iter {
struct ring_buffer_per_cpu *cpu_buffer;
unsigned long head;
struct buffer_page *head_page;
+ struct buffer_page *cache_reader_page;
+ unsigned long cache_read;
u64 read_stamp;
};
@@ -2716,6 +2718,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
iter->read_stamp = cpu_buffer->read_stamp;
else
iter->read_stamp = iter->head_page->page->time_stamp;
+ iter->cache_reader_page = cpu_buffer->reader_page;
+ iter->cache_read = cpu_buffer->read;
}
/**
@@ -2869,7 +2873,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
* Splice the empty reader page into the list around the head.
*/
reader = rb_set_head_page(cpu_buffer);
- cpu_buffer->reader_page->list.next = reader->list.next;
+ cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
cpu_buffer->reader_page->list.prev = reader->list.prev;
/*
@@ -2906,7 +2910,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
*
* Now make the new head point back to the reader page.
*/
- reader->list.next->prev = &cpu_buffer->reader_page->list;
+ rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
/* Finally update the reader page to the new head */
@@ -3060,13 +3064,22 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
struct ring_buffer_event *event;
int nr_loops = 0;
- if (ring_buffer_iter_empty(iter))
- return NULL;
-
cpu_buffer = iter->cpu_buffer;
buffer = cpu_buffer->buffer;
+ /*
+ * Check if someone performed a consuming read to
+ * the buffer. A consuming read invalidates the iterator
+ * and we need to reset the iterator in this case.
+ */
+ if (unlikely(iter->cache_read != cpu_buffer->read ||
+ iter->cache_reader_page != cpu_buffer->reader_page))
+ rb_iter_reset(iter);
+
again:
+ if (ring_buffer_iter_empty(iter))
+ return NULL;
+
/*
* We repeat when a timestamp is encountered.
* We can get multiple timestamps by nested interrupts or also
@@ -3081,6 +3094,11 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
if (rb_per_cpu_empty(cpu_buffer))
return NULL;
+ if (iter->head >= local_read(&iter->head_page->page->commit)) {
+ rb_inc_iter(iter);
+ goto again;
+ }
+
event = rb_iter_head_event(iter);
switch (event->type_len) {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8b9f20ab8eed..032c57ca6502 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -32,6 +32,7 @@
#include <linux/splice.h>
#include <linux/kdebug.h>
#include <linux/string.h>
+#include <linux/rwsem.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/poll.h>
@@ -102,9 +103,6 @@ static inline void ftrace_enable_cpu(void)
static cpumask_var_t __read_mostly tracing_buffer_mask;
-/* Define which cpu buffers are currently read in trace_pipe */
-static cpumask_var_t tracing_reader_cpumask;
-
#define for_each_tracing_cpu(cpu) \
for_each_cpu(cpu, tracing_buffer_mask)
@@ -243,12 +241,91 @@ static struct tracer *current_trace __read_mostly;
/*
* trace_types_lock is used to protect the trace_types list.
- * This lock is also used to keep user access serialized.
- * Accesses from userspace will grab this lock while userspace
- * activities happen inside the kernel.
*/
static DEFINE_MUTEX(trace_types_lock);
+/*
+ * serialize the access of the ring buffer
+ *
+ * ring buffer serializes readers, but it is low level protection.
+ * The validity of the events (which returns by ring_buffer_peek() ..etc)
+ * are not protected by ring buffer.
+ *
+ * The content of events may become garbage if we allow other process consumes
+ * these events concurrently:
+ * A) the page of the consumed events may become a normal page
+ * (not reader page) in ring buffer, and this page will be rewrited
+ * by events producer.
+ * B) The page of the consumed events may become a page for splice_read,
+ * and this page will be returned to system.
+ *
+ * These primitives allow multi process access to different cpu ring buffer
+ * concurrently.
+ *
+ * These primitives don't distinguish read-only and read-consume access.
+ * Multi read-only access are also serialized.
+ */
+
+#ifdef CONFIG_SMP
+static DECLARE_RWSEM(all_cpu_access_lock);
+static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
+
+static inline void trace_access_lock(int cpu)
+{
+ if (cpu == TRACE_PIPE_ALL_CPU) {
+ /* gain it for accessing the whole ring buffer. */
+ down_write(&all_cpu_access_lock);
+ } else {
+ /* gain it for accessing a cpu ring buffer. */
+
+ /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
+ down_read(&all_cpu_access_lock);
+
+ /* Secondly block other access to this @cpu ring buffer. */
+ mutex_lock(&per_cpu(cpu_access_lock, cpu));
+ }
+}
+
+static inline void trace_access_unlock(int cpu)
+{
+ if (cpu == TRACE_PIPE_ALL_CPU) {
+ up_write(&all_cpu_access_lock);
+ } else {
+ mutex_unlock(&per_cpu(cpu_access_lock, cpu));
+ up_read(&all_cpu_access_lock);
+ }
+}
+
+static inline void trace_access_lock_init(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ mutex_init(&per_cpu(cpu_access_lock, cpu));
+}
+
+#else
+
+static DEFINE_MUTEX(access_lock);
+
+static inline void trace_access_lock(int cpu)
+{
+ (void)cpu;
+ mutex_lock(&access_lock);
+}
+
+static inline void trace_access_unlock(int cpu)
+{
+ (void)cpu;
+ mutex_unlock(&access_lock);
+}
+
+static inline void trace_access_lock_init(void)
+{
+}
+
+#endif
+
/* trace_wait is a waitqueue for tasks blocked on trace_poll */
static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
@@ -951,6 +1028,11 @@ void trace_find_cmdline(int pid, char comm[])
return;
}
+ if (WARN_ON_ONCE(pid < 0)) {
+ strcpy(comm, "<XXX>");
+ return;
+ }
+
if (pid > PID_MAX_DEFAULT) {
strcpy(comm, "<...>");
return;
@@ -1315,8 +1397,10 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
entry->fmt = fmt;
memcpy(entry->buf, trace_buf, sizeof(u32) * len);
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!filter_check_discard(call, entry, buffer, event)) {
ring_buffer_unlock_commit(buffer, event);
+ ftrace_trace_stack(buffer, flags, 6, pc);
+ }
out_unlock:
arch_spin_unlock(&trace_buf_lock);
@@ -1389,8 +1473,10 @@ int trace_array_vprintk(struct trace_array *tr,
memcpy(&entry->buf, trace_buf, len);
entry->buf[len] = '\0';
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!filter_check_discard(call, entry, buffer, event)) {
ring_buffer_unlock_commit(buffer, event);
+ ftrace_trace_stack(buffer, irq_flags, 6, pc);
+ }
out_unlock:
arch_spin_unlock(&trace_buf_lock);
@@ -1580,12 +1666,6 @@ static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
}
/*
- * No necessary locking here. The worst thing which can
- * happen is loosing events consumed at the same time
- * by a trace_pipe reader.
- * Other than that, we don't risk to crash the ring buffer
- * because it serializes the readers.
- *
* The current tracer is copied to avoid a global locking
* all around.
*/
@@ -1640,12 +1720,16 @@ static void *s_start(struct seq_file *m, loff_t *pos)
}
trace_event_read_lock();
+ trace_access_lock(cpu_file);
return p;
}
static void s_stop(struct seq_file *m, void *p)
{
+ struct trace_iterator *iter = m->private;
+
atomic_dec(&trace_record_cmdline_disabled);
+ trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
}
@@ -2836,22 +2920,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
mutex_lock(&trace_types_lock);
- /* We only allow one reader per cpu */
- if (cpu_file == TRACE_PIPE_ALL_CPU) {
- if (!cpumask_empty(tracing_reader_cpumask)) {
- ret = -EBUSY;
- goto out;
- }
- cpumask_setall(tracing_reader_cpumask);
- } else {
- if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
- cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
- else {
- ret = -EBUSY;
- goto out;
- }
- }
-
/* create a buffer to store the information to pass to userspace */
iter = kzalloc(sizeof(*iter), GFP_KERNEL);
if (!iter) {
@@ -2907,12 +2975,6 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
mutex_lock(&trace_types_lock);
- if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
- cpumask_clear(tracing_reader_cpumask);
- else
- cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
-
-
if (iter->trace->pipe_close)
iter->trace->pipe_close(iter);
@@ -3074,6 +3136,7 @@ waitagain:
iter->pos = -1;
trace_event_read_lock();
+ trace_access_lock(iter->cpu_file);
while (find_next_entry_inc(iter) != NULL) {
enum print_line_t ret;
int len = iter->seq.len;
@@ -3090,6 +3153,7 @@ waitagain:
if (iter->seq.len >= cnt)
break;
}
+ trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
/* Now copy what we have to the user */
@@ -3215,6 +3279,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
}
trace_event_read_lock();
+ trace_access_lock(iter->cpu_file);
/* Fill as many pages as possible. */
for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
@@ -3238,6 +3303,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
trace_seq_init(&iter->seq);
}
+ trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
mutex_unlock(&iter->mutex);
@@ -3539,10 +3605,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
info->read = 0;
+ trace_access_lock(info->cpu);
ret = ring_buffer_read_page(info->tr->buffer,
&info->spare,
count,
info->cpu, 0);
+ trace_access_unlock(info->cpu);
if (ret < 0)
return 0;
@@ -3670,6 +3738,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
len &= PAGE_MASK;
}
+ trace_access_lock(info->cpu);
entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
@@ -3717,6 +3786,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
}
+ trace_access_unlock(info->cpu);
spd.nr_pages = i;
/* did we read anything? */
@@ -3949,7 +4019,7 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
if (!!(topt->flags->val & topt->opt->bit) != val) {
mutex_lock(&trace_types_lock);
ret = __set_tracer_option(current_trace, topt->flags,
- topt->opt, val);
+ topt->opt, !val);
mutex_unlock(&trace_types_lock);
if (ret)
return ret;
@@ -4153,6 +4223,8 @@ static __init int tracer_init_debugfs(void)
struct dentry *d_tracer;
int cpu;
+ trace_access_lock_init();
+
d_tracer = tracing_init_dentry();
trace_create_file("tracing_enabled", 0644, d_tracer,
@@ -4387,9 +4459,6 @@ __init static int tracer_alloc_buffers(void)
if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
goto out_free_buffer_mask;
- if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
- goto out_free_tracing_cpumask;
-
/* To save memory, keep the ring buffer size to its minimum */
if (ring_buffer_expanded)
ring_buf_size = trace_buf_size;
@@ -4447,8 +4516,6 @@ __init static int tracer_alloc_buffers(void)
return 0;
out_free_cpumask:
- free_cpumask_var(tracing_reader_cpumask);
-out_free_tracing_cpumask:
free_cpumask_var(tracing_cpumask);
out_free_buffer_mask:
free_cpumask_var(tracing_buffer_mask);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4df6a77eb196..fd05bcaf91b0 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -497,6 +497,7 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s);
#ifdef CONFIG_DYNAMIC_FTRACE
/* TODO: make this variable */
#define FTRACE_GRAPH_MAX_FUNCS 32
+extern int ftrace_graph_filter_enabled;
extern int ftrace_graph_count;
extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
@@ -504,7 +505,7 @@ static inline int ftrace_graph_addr(unsigned long addr)
{
int i;
- if (!ftrace_graph_count || test_tsk_trace_graph(current))
+ if (!ftrace_graph_filter_enabled)
return 1;
for (i = 0; i < ftrace_graph_count; i++) {
@@ -791,7 +792,8 @@ extern const char *__stop___trace_bprintk_fmt[];
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \
- extern struct ftrace_event_call event_##call;
+ extern struct ftrace_event_call \
+ __attribute__((__aligned__(4))) event_##call;
#undef FTRACE_ENTRY_DUP
#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \
FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 4a194f08f88c..b9bc4d470177 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -307,8 +307,23 @@ static int annotated_branch_stat_cmp(void *p1, void *p2)
return -1;
if (percent_a > percent_b)
return 1;
- else
- return 0;
+
+ if (a->incorrect < b->incorrect)
+ return -1;
+ if (a->incorrect > b->incorrect)
+ return 1;
+
+ /*
+ * Since the above shows worse (incorrect) cases
+ * first, we continue that by showing best (correct)
+ * cases last.
+ */
+ if (a->correct > b->correct)
+ return -1;
+ if (a->correct < b->correct)
+ return 1;
+
+ return 0;
}
static struct tracer_stat annotated_branch_stats = {
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 9e25573242cf..f0d693005075 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -6,14 +6,12 @@
*/
#include <linux/module.h>
+#include <linux/kprobes.h>
#include "trace.h"
-char *perf_trace_buf;
-EXPORT_SYMBOL_GPL(perf_trace_buf);
-
-char *perf_trace_buf_nmi;
-EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
+static char *perf_trace_buf;
+static char *perf_trace_buf_nmi;
typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
@@ -120,3 +118,47 @@ void ftrace_profile_disable(int event_id)
}
mutex_unlock(&event_mutex);
}
+
+__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
+ int *rctxp, unsigned long *irq_flags)
+{
+ struct trace_entry *entry;
+ char *trace_buf, *raw_data;
+ int pc, cpu;
+
+ pc = preempt_count();
+
+ /* Protect the per cpu buffer, begin the rcu read side */
+ local_irq_save(*irq_flags);
+
+ *rctxp = perf_swevent_get_recursion_context();
+ if (*rctxp < 0)
+ goto err_recursion;
+
+ cpu = smp_processor_id();
+
+ if (in_nmi())
+ trace_buf = rcu_dereference(perf_trace_buf_nmi);
+ else
+ trace_buf = rcu_dereference(perf_trace_buf);
+
+ if (!trace_buf)
+ goto err;
+
+ raw_data = per_cpu_ptr(trace_buf, cpu);
+
+ /* zero the dead bytes from align to not leak stack to user */
+ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+
+ entry = (struct trace_entry *)raw_data;
+ tracing_generic_entry_update(entry, *irq_flags, pc);
+ entry->type = type;
+
+ return raw_data;
+err:
+ perf_swevent_put_recursion_context(*rctxp);
+err_recursion:
+ local_irq_restore(*irq_flags);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 189b09baf4fb..3f972ad98d04 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -60,10 +60,8 @@ int trace_define_field(struct ftrace_event_call *call, const char *type,
return 0;
err:
- if (field) {
+ if (field)
kfree(field->name);
- kfree(field->type);
- }
kfree(field);
return -ENOMEM;
@@ -520,41 +518,16 @@ out:
return ret;
}
-extern char *__bad_type_size(void);
-
-#undef FIELD
-#define FIELD(type, name) \
- sizeof(type) != sizeof(field.name) ? __bad_type_size() : \
- #type, "common_" #name, offsetof(typeof(field), name), \
- sizeof(field.name), is_signed_type(type)
-
-static int trace_write_header(struct trace_seq *s)
-{
- struct trace_entry field;
-
- /* struct trace_entry */
- return trace_seq_printf(s,
- "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
- "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
- "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
- "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
- "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
- "\n",
- FIELD(unsigned short, type),
- FIELD(unsigned char, flags),
- FIELD(unsigned char, preempt_count),
- FIELD(int, pid),
- FIELD(int, lock_depth));
-}
-
static ssize_t
event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
{
struct ftrace_event_call *call = filp->private_data;
+ struct ftrace_event_field *field;
struct trace_seq *s;
+ int common_field_count = 5;
char *buf;
- int r;
+ int r = 0;
if (*ppos)
return 0;
@@ -565,14 +538,48 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
trace_seq_init(s);
- /* If any of the first writes fail, so will the show_format. */
-
trace_seq_printf(s, "name: %s\n", call->name);
trace_seq_printf(s, "ID: %d\n", call->id);
trace_seq_printf(s, "format:\n");
- trace_write_header(s);
- r = call->show_format(call, s);
+ list_for_each_entry_reverse(field, &call->fields, link) {
+ /*
+ * Smartly shows the array type(except dynamic array).
+ * Normal:
+ * field:TYPE VAR
+ * If TYPE := TYPE[LEN], it is shown:
+ * field:TYPE VAR[LEN]
+ */
+ const char *array_descriptor = strchr(field->type, '[');
+
+ if (!strncmp(field->type, "__data_loc", 10))
+ array_descriptor = NULL;
+
+ if (!array_descriptor) {
+ r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
+ "\tsize:%u;\tsigned:%d;\n",
+ field->type, field->name, field->offset,
+ field->size, !!field->is_signed);
+ } else {
+ r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
+ "\tsize:%u;\tsigned:%d;\n",
+ (int)(array_descriptor - field->type),
+ field->type, field->name,
+ array_descriptor, field->offset,
+ field->size, !!field->is_signed);
+ }
+
+ if (--common_field_count == 0)
+ r = trace_seq_printf(s, "\n");
+
+ if (!r)
+ break;
+ }
+
+ if (r)
+ r = trace_seq_printf(s, "\nprint fmt: %s\n",
+ call->print_fmt);
+
if (!r) {
/*
* ug! The format output is bigger than a PAGE!!
@@ -948,10 +955,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
filter);
}
- /* A trace may not want to export its format */
- if (!call->show_format)
- return 0;
-
trace_create_file("format", 0444, call->dir, call,
format);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 50504cb228de..4615f62a04f1 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -211,8 +211,9 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
{
char **addr = (char **)(event + pred->offset);
int cmp, match;
+ int len = strlen(*addr) + 1; /* including tailing '\0' */
- cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len);
+ cmp = pred->regex.match(*addr, &pred->regex, len);
match = cmp ^ pred->not;
@@ -251,7 +252,18 @@ static int filter_pred_none(struct filter_pred *pred, void *event,
return 0;
}
-/* Basic regex callbacks */
+/*
+ * regex_match_foo - Basic regex callbacks
+ *
+ * @str: the string to be searched
+ * @r: the regex structure containing the pattern string
+ * @len: the length of the string to be searched (including '\0')
+ *
+ * Note:
+ * - @str might not be NULL-terminated if it's of type DYN_STRING
+ * or STATIC_STRING
+ */
+
static int regex_match_full(char *str, struct regex *r, int len)
{
if (strncmp(str, r->pattern, len) == 0)
@@ -261,23 +273,24 @@ static int regex_match_full(char *str, struct regex *r, int len)
static int regex_match_front(char *str, struct regex *r, int len)
{
- if (strncmp(str, r->pattern, len) == 0)
+ if (strncmp(str, r->pattern, r->len) == 0)
return 1;
return 0;
}
static int regex_match_middle(char *str, struct regex *r, int len)
{
- if (strstr(str, r->pattern))
+ if (strnstr(str, r->pattern, len))
return 1;
return 0;
}
static int regex_match_end(char *str, struct regex *r, int len)
{
- char *ptr = strstr(str, r->pattern);
+ int strlen = len - 1;
- if (ptr && (ptr[r->len] == 0))
+ if (strlen >= r->len &&
+ memcmp(str + strlen - r->len, r->pattern, r->len) == 0)
return 1;
return 0;
}
@@ -781,10 +794,8 @@ static int filter_add_pred(struct filter_parse_state *ps,
pred->regex.field_len = field->size;
} else if (field->filter_type == FILTER_DYN_STRING)
fn = filter_pred_strloc;
- else {
+ else
fn = filter_pred_pchar;
- pred->regex.field_len = strlen(pred->regex.pattern);
- }
} else {
if (field->is_signed)
ret = strict_strtoll(pred->regex.pattern, 0, &val);
@@ -1360,7 +1371,7 @@ out_unlock:
return err;
}
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
void ftrace_profile_free_filter(struct perf_event *event)
{
@@ -1428,5 +1439,5 @@ out_unlock:
return err;
}
-#endif /* CONFIG_EVENT_PROFILE */
+#endif /* CONFIG_PERF_EVENTS */
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 458e5bfe26d0..e091f64ba6ce 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -62,78 +62,6 @@ static void __always_unused ____ftrace_check_##name(void) \
#include "trace_entries.h"
-
-#undef __field
-#define __field(type, item) \
- ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
- "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
- offsetof(typeof(field), item), \
- sizeof(field.item), is_signed_type(type)); \
- if (!ret) \
- return 0;
-
-#undef __field_desc
-#define __field_desc(type, container, item) \
- ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
- "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
- offsetof(typeof(field), container.item), \
- sizeof(field.container.item), \
- is_signed_type(type)); \
- if (!ret) \
- return 0;
-
-#undef __array
-#define __array(type, item, len) \
- ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
- "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
- offsetof(typeof(field), item), \
- sizeof(field.item), is_signed_type(type)); \
- if (!ret) \
- return 0;
-
-#undef __array_desc
-#define __array_desc(type, container, item, len) \
- ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
- "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
- offsetof(typeof(field), container.item), \
- sizeof(field.container.item), \
- is_signed_type(type)); \
- if (!ret) \
- return 0;
-
-#undef __dynamic_array
-#define __dynamic_array(type, item) \
- ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
- "offset:%zu;\tsize:0;\tsigned:%u;\n", \
- offsetof(typeof(field), item), \
- is_signed_type(type)); \
- if (!ret) \
- return 0;
-
-#undef F_printk
-#define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
-
-#undef __entry
-#define __entry REC
-
-#undef FTRACE_ENTRY
-#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
-static int \
-ftrace_format_##name(struct ftrace_event_call *unused, \
- struct trace_seq *s) \
-{ \
- struct struct_name field __attribute__((unused)); \
- int ret = 0; \
- \
- tstruct; \
- \
- trace_seq_printf(s, "\nprint fmt: " print); \
- \
- return ret; \
-}
-
-#include "trace_entries.h"
-
#undef __field
#define __field(type, item) \
ret = trace_define_field(event_call, #type, #item, \
@@ -158,7 +86,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
ret = trace_define_field(event_call, #type "[" #len "]", #item, \
offsetof(typeof(field), item), \
- sizeof(field.item), 0, FILTER_OTHER); \
+ sizeof(field.item), \
+ is_signed_type(type), FILTER_OTHER); \
if (ret) \
return ret;
@@ -168,13 +97,18 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
ret = trace_define_field(event_call, #type "[" #len "]", #item, \
offsetof(typeof(field), \
container.item), \
- sizeof(field.container.item), 0, \
- FILTER_OTHER); \
+ sizeof(field.container.item), \
+ is_signed_type(type), FILTER_OTHER); \
if (ret) \
return ret;
#undef __dynamic_array
-#define __dynamic_array(type, item)
+#define __dynamic_array(type, item) \
+ ret = trace_define_field(event_call, #type, #item, \
+ offsetof(typeof(field), item), \
+ 0, is_signed_type(type), FILTER_OTHER);\
+ if (ret) \
+ return ret;
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
@@ -197,6 +131,9 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
return 0;
}
+#undef __entry
+#define __entry REC
+
#undef __field
#define __field(type, item)
@@ -212,6 +149,9 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
#undef __dynamic_array
#define __dynamic_array(type, item)
+#undef F_printk
+#define F_printk(fmt, args...) #fmt ", " __stringify(args)
+
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \
\
@@ -222,7 +162,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
.id = type, \
.system = __stringify(TRACE_SYSTEM), \
.raw_init = ftrace_raw_init_event, \
- .show_format = ftrace_format_##call, \
+ .print_fmt = print, \
.define_fields = ftrace_define_fields_##call, \
}; \
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index b1342c5d37cf..e998a824e9db 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -18,6 +18,7 @@ struct fgraph_cpu_data {
pid_t last_pid;
int depth;
int ignore;
+ unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH];
};
struct fgraph_data {
@@ -212,13 +213,11 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
int cpu;
int pc;
- if (unlikely(!tr))
- return 0;
-
if (!ftrace_trace_task(current))
return 0;
- if (!ftrace_graph_addr(trace->func))
+ /* trace it when it is-nested-in or is a function enabled. */
+ if (!(trace->depth || ftrace_graph_addr(trace->func)))
return 0;
local_irq_save(flags);
@@ -231,9 +230,6 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
} else {
ret = 0;
}
- /* Only do the atomic if it is not already set */
- if (!test_tsk_trace_graph(current))
- set_tsk_trace_graph(current);
atomic_dec(&data->disabled);
local_irq_restore(flags);
@@ -281,17 +277,24 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
pc = preempt_count();
__trace_graph_return(tr, trace, flags, pc);
}
- if (!trace->depth)
- clear_tsk_trace_graph(current);
atomic_dec(&data->disabled);
local_irq_restore(flags);
}
+void set_graph_array(struct trace_array *tr)
+{
+ graph_array = tr;
+
+ /* Make graph_array visible before we start tracing */
+
+ smp_mb();
+}
+
static int graph_trace_init(struct trace_array *tr)
{
int ret;
- graph_array = tr;
+ set_graph_array(tr);
ret = register_ftrace_graph(&trace_graph_return,
&trace_graph_entry);
if (ret)
@@ -301,11 +304,6 @@ static int graph_trace_init(struct trace_array *tr)
return 0;
}
-void set_graph_array(struct trace_array *tr)
-{
- graph_array = tr;
-}
-
static void graph_trace_reset(struct trace_array *tr)
{
tracing_stop_cmdline_record();
@@ -673,15 +671,21 @@ print_graph_entry_leaf(struct trace_iterator *iter,
duration = graph_ret->rettime - graph_ret->calltime;
if (data) {
+ struct fgraph_cpu_data *cpu_data;
int cpu = iter->cpu;
- int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
+
+ cpu_data = per_cpu_ptr(data->cpu_data, cpu);
/*
* Comments display at + 1 to depth. Since
* this is a leaf function, keep the comments
* equal to this depth.
*/
- *depth = call->depth - 1;
+ cpu_data->depth = call->depth - 1;
+
+ /* No need to keep this function around for this depth */
+ if (call->depth < FTRACE_RETFUNC_DEPTH)
+ cpu_data->enter_funcs[call->depth] = 0;
}
/* Overhead */
@@ -721,10 +725,15 @@ print_graph_entry_nested(struct trace_iterator *iter,
int i;
if (data) {
+ struct fgraph_cpu_data *cpu_data;
int cpu = iter->cpu;
- int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
- *depth = call->depth;
+ cpu_data = per_cpu_ptr(data->cpu_data, cpu);
+ cpu_data->depth = call->depth;
+
+ /* Save this function pointer to see if the exit matches */
+ if (call->depth < FTRACE_RETFUNC_DEPTH)
+ cpu_data->enter_funcs[call->depth] = call->func;
}
/* No overhead */
@@ -854,19 +863,28 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
struct fgraph_data *data = iter->private;
pid_t pid = ent->pid;
int cpu = iter->cpu;
+ int func_match = 1;
int ret;
int i;
if (data) {
+ struct fgraph_cpu_data *cpu_data;
int cpu = iter->cpu;
- int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
+
+ cpu_data = per_cpu_ptr(data->cpu_data, cpu);
/*
* Comments display at + 1 to depth. This is the
* return from a function, we now want the comments
* to display at the same level of the bracket.
*/
- *depth = trace->depth - 1;
+ cpu_data->depth = trace->depth - 1;
+
+ if (trace->depth < FTRACE_RETFUNC_DEPTH) {
+ if (cpu_data->enter_funcs[trace->depth] != trace->func)
+ func_match = 0;
+ cpu_data->enter_funcs[trace->depth] = 0;
+ }
}
if (print_graph_prologue(iter, s, 0, 0))
@@ -891,9 +909,21 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
return TRACE_TYPE_PARTIAL_LINE;
}
- ret = trace_seq_printf(s, "}\n");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
+ /*
+ * If the return function does not have a matching entry,
+ * then the entry was lost. Instead of just printing
+ * the '}' and letting the user guess what function this
+ * belongs to, write out the function name.
+ */
+ if (func_match) {
+ ret = trace_seq_printf(s, "}\n");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ } else {
+ ret = trace_seq_printf(s, "} (%ps)\n", (void *)trace->func);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
/* Overrun */
if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) {
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 375f81a568dc..505c92273b1a 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -91,11 +91,6 @@ static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
return retval;
}
-static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
-{
- return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
-}
-
static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
void *dummy)
{
@@ -231,9 +226,7 @@ static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
{
int ret = -EINVAL;
- if (ff->func == fetch_argument)
- ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
- else if (ff->func == fetch_register) {
+ if (ff->func == fetch_register) {
const char *name;
name = regs_query_register_name((unsigned int)((long)ff->data));
ret = snprintf(buf, n, "%%%s", name);
@@ -489,14 +482,6 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
}
} else
ret = -EINVAL;
- } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
- ret = strict_strtoul(arg + 3, 10, &param);
- if (ret || param > PARAM_MAX_ARGS)
- ret = -EINVAL;
- else {
- ff->func = fetch_argument;
- ff->data = (void *)param;
- }
} else
ret = -EINVAL;
return ret;
@@ -611,7 +596,6 @@ static int create_trace_probe(int argc, char **argv)
* - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
* - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
* Fetch args:
- * $argN : fetch Nth of function argument. (N:0-)
* $retval : fetch return value
* $stack : fetch stack address
* $stackN : fetch Nth of stack (N:0-)
@@ -651,12 +635,12 @@ static int create_trace_probe(int argc, char **argv)
event = strchr(group, '/') + 1;
event[-1] = '\0';
if (strlen(group) == 0) {
- pr_info("Group name is not specifiled\n");
+ pr_info("Group name is not specified\n");
return -EINVAL;
}
}
if (strlen(event) == 0) {
- pr_info("Event name is not specifiled\n");
+ pr_info("Event name is not specified\n");
return -EINVAL;
}
}
@@ -689,7 +673,7 @@ static int create_trace_probe(int argc, char **argv)
return -EINVAL;
}
/* an address specified */
- ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
+ ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
if (ret) {
pr_info("Failed to parse address.\n");
return ret;
@@ -958,7 +942,7 @@ static const struct file_operations kprobe_profile_ops = {
};
/* Kprobe handler */
-static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
+static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
{
struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
struct kprobe_trace_entry *entry;
@@ -978,7 +962,7 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
irq_flags, pc);
if (!event)
- return 0;
+ return;
entry = ring_buffer_event_data(event);
entry->nargs = tp->nr_args;
@@ -988,11 +972,10 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
if (!filter_current_check_discard(buffer, call, entry, event))
trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
- return 0;
}
/* Kretprobe handler */
-static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
+static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
@@ -1011,7 +994,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
irq_flags, pc);
if (!event)
- return 0;
+ return;
entry = ring_buffer_event_data(event);
entry->nargs = tp->nr_args;
@@ -1022,8 +1005,6 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
if (!filter_current_check_discard(buffer, call, entry, event))
trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
-
- return 0;
}
/* Event entry printers */
@@ -1174,212 +1155,123 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
return 0;
}
-static int __probe_event_show_format(struct trace_seq *s,
- struct trace_probe *tp, const char *fmt,
- const char *arg)
+static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
{
int i;
+ int pos = 0;
- /* Show format */
- if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
- return 0;
+ const char *fmt, *arg;
- for (i = 0; i < tp->nr_args; i++)
- if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name))
- return 0;
+ if (!probe_is_return(tp)) {
+ fmt = "(%lx)";
+ arg = "REC->" FIELD_STRING_IP;
+ } else {
+ fmt = "(%lx <- %lx)";
+ arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
+ }
- if (!trace_seq_printf(s, "\", %s", arg))
- return 0;
+ /* When len=0, we just calculate the needed length */
+#define LEN_OR_ZERO (len ? len - pos : 0)
- for (i = 0; i < tp->nr_args; i++)
- if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
- return 0;
-
- return trace_seq_puts(s, "\n");
-}
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
-#undef SHOW_FIELD
-#define SHOW_FIELD(type, item, name) \
- do { \
- ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \
- "offset:%u;\tsize:%u;\n", name, \
- (unsigned int)offsetof(typeof(field), item),\
- (unsigned int)sizeof(type)); \
- if (!ret) \
- return 0; \
- } while (0)
+ for (i = 0; i < tp->nr_args; i++) {
+ pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%%lx",
+ tp->args[i].name);
+ }
-static int kprobe_event_show_format(struct ftrace_event_call *call,
- struct trace_seq *s)
-{
- struct kprobe_trace_entry field __attribute__((unused));
- int ret, i;
- struct trace_probe *tp = (struct trace_probe *)call->data;
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
- SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP);
- SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
+ for (i = 0; i < tp->nr_args; i++) {
+ pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
+ tp->args[i].name);
+ }
- /* Show fields */
- for (i = 0; i < tp->nr_args; i++)
- SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
- trace_seq_puts(s, "\n");
+#undef LEN_OR_ZERO
- return __probe_event_show_format(s, tp, "(%lx)",
- "REC->" FIELD_STRING_IP);
+ /* return the length of print_fmt */
+ return pos;
}
-static int kretprobe_event_show_format(struct ftrace_event_call *call,
- struct trace_seq *s)
+static int set_print_fmt(struct trace_probe *tp)
{
- struct kretprobe_trace_entry field __attribute__((unused));
- int ret, i;
- struct trace_probe *tp = (struct trace_probe *)call->data;
+ int len;
+ char *print_fmt;
- SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC);
- SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP);
- SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
+ /* First: called with 0 length to calculate the needed length */
+ len = __set_print_fmt(tp, NULL, 0);
+ print_fmt = kmalloc(len + 1, GFP_KERNEL);
+ if (!print_fmt)
+ return -ENOMEM;
- /* Show fields */
- for (i = 0; i < tp->nr_args; i++)
- SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
- trace_seq_puts(s, "\n");
+ /* Second: actually write the @print_fmt */
+ __set_print_fmt(tp, print_fmt, len + 1);
+ tp->call.print_fmt = print_fmt;
- return __probe_event_show_format(s, tp, "(%lx <- %lx)",
- "REC->" FIELD_STRING_FUNC
- ", REC->" FIELD_STRING_RETIP);
+ return 0;
}
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
/* Kprobe profile handler */
-static __kprobes int kprobe_profile_func(struct kprobe *kp,
+static __kprobes void kprobe_profile_func(struct kprobe *kp,
struct pt_regs *regs)
{
struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
struct ftrace_event_call *call = &tp->call;
struct kprobe_trace_entry *entry;
- struct trace_entry *ent;
- int size, __size, i, pc, __cpu;
+ int size, __size, i;
unsigned long irq_flags;
- char *trace_buf;
- char *raw_data;
int rctx;
- pc = preempt_count();
__size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
"profile buffer not large enough"))
- return 0;
-
- /*
- * Protect the non nmi buffer
- * This also protects the rcu read side
- */
- local_irq_save(irq_flags);
-
- rctx = perf_swevent_get_recursion_context();
- if (rctx < 0)
- goto end_recursion;
-
- __cpu = smp_processor_id();
-
- if (in_nmi())
- trace_buf = rcu_dereference(perf_trace_buf_nmi);
- else
- trace_buf = rcu_dereference(perf_trace_buf);
+ return;
- if (!trace_buf)
- goto end;
-
- raw_data = per_cpu_ptr(trace_buf, __cpu);
-
- /* Zero dead bytes from alignment to avoid buffer leak to userspace */
- *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
- entry = (struct kprobe_trace_entry *)raw_data;
- ent = &entry->ent;
+ entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
+ if (!entry)
+ return;
- tracing_generic_entry_update(ent, irq_flags, pc);
- ent->type = call->id;
entry->nargs = tp->nr_args;
entry->ip = (unsigned long)kp->addr;
for (i = 0; i < tp->nr_args; i++)
entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
- perf_tp_event(call->id, entry->ip, 1, entry, size);
-
-end:
- perf_swevent_put_recursion_context(rctx);
-end_recursion:
- local_irq_restore(irq_flags);
- return 0;
+ ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags);
}
/* Kretprobe profile handler */
-static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
+static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
struct ftrace_event_call *call = &tp->call;
struct kretprobe_trace_entry *entry;
- struct trace_entry *ent;
- int size, __size, i, pc, __cpu;
+ int size, __size, i;
unsigned long irq_flags;
- char *trace_buf;
- char *raw_data;
int rctx;
- pc = preempt_count();
__size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
"profile buffer not large enough"))
- return 0;
-
- /*
- * Protect the non nmi buffer
- * This also protects the rcu read side
- */
- local_irq_save(irq_flags);
-
- rctx = perf_swevent_get_recursion_context();
- if (rctx < 0)
- goto end_recursion;
-
- __cpu = smp_processor_id();
+ return;
- if (in_nmi())
- trace_buf = rcu_dereference(perf_trace_buf_nmi);
- else
- trace_buf = rcu_dereference(perf_trace_buf);
-
- if (!trace_buf)
- goto end;
-
- raw_data = per_cpu_ptr(trace_buf, __cpu);
-
- /* Zero dead bytes from alignment to avoid buffer leak to userspace */
- *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
- entry = (struct kretprobe_trace_entry *)raw_data;
- ent = &entry->ent;
+ entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
+ if (!entry)
+ return;
- tracing_generic_entry_update(ent, irq_flags, pc);
- ent->type = call->id;
entry->nargs = tp->nr_args;
entry->func = (unsigned long)tp->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
for (i = 0; i < tp->nr_args; i++)
entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
- perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
-
-end:
- perf_swevent_put_recursion_context(rctx);
-end_recursion:
- local_irq_restore(irq_flags);
- return 0;
+ ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags);
}
static int probe_profile_enable(struct ftrace_event_call *call)
@@ -1407,7 +1299,7 @@ static void probe_profile_disable(struct ftrace_event_call *call)
disable_kprobe(&tp->rp.kp);
}
}
-#endif /* CONFIG_EVENT_PROFILE */
+#endif /* CONFIG_PERF_EVENTS */
static __kprobes
@@ -1417,10 +1309,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
if (tp->flags & TP_FLAG_TRACE)
kprobe_trace_func(kp, regs);
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
if (tp->flags & TP_FLAG_PROFILE)
kprobe_profile_func(kp, regs);
-#endif /* CONFIG_EVENT_PROFILE */
+#endif
return 0; /* We don't tweek kernel, so just return 0 */
}
@@ -1431,10 +1323,10 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
if (tp->flags & TP_FLAG_TRACE)
kretprobe_trace_func(ri, regs);
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
if (tp->flags & TP_FLAG_PROFILE)
kretprobe_profile_func(ri, regs);
-#endif /* CONFIG_EVENT_PROFILE */
+#endif
return 0; /* We don't tweek kernel, so just return 0 */
}
@@ -1447,23 +1339,25 @@ static int register_probe_event(struct trace_probe *tp)
if (probe_is_return(tp)) {
tp->event.trace = print_kretprobe_event;
call->raw_init = probe_event_raw_init;
- call->show_format = kretprobe_event_show_format;
call->define_fields = kretprobe_event_define_fields;
} else {
tp->event.trace = print_kprobe_event;
call->raw_init = probe_event_raw_init;
- call->show_format = kprobe_event_show_format;
call->define_fields = kprobe_event_define_fields;
}
+ if (set_print_fmt(tp) < 0)
+ return -ENOMEM;
call->event = &tp->event;
call->id = register_ftrace_event(&tp->event);
- if (!call->id)
+ if (!call->id) {
+ kfree(call->print_fmt);
return -ENODEV;
+ }
call->enabled = 0;
call->regfunc = probe_event_enable;
call->unregfunc = probe_event_disable;
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
call->profile_enable = probe_profile_enable;
call->profile_disable = probe_profile_disable;
#endif
@@ -1471,6 +1365,7 @@ static int register_probe_event(struct trace_probe *tp)
ret = trace_add_event_call(call);
if (ret) {
pr_info("Failed to register kprobe event: %s\n", call->name);
+ kfree(call->print_fmt);
unregister_ftrace_event(&tp->event);
}
return ret;
@@ -1480,6 +1375,7 @@ static void unregister_probe_event(struct trace_probe *tp)
{
/* tp->event is unregistered in trace_remove_event_call() */
trace_remove_event_call(&tp->call);
+ kfree(tp->call.print_fmt);
}
/* Make a debugfs interface for controling probe points */
@@ -1522,28 +1418,67 @@ static int kprobe_trace_selftest_target(int a1, int a2, int a3,
static __init int kprobe_trace_self_tests_init(void)
{
- int ret;
+ int ret, warn = 0;
int (*target)(int, int, int, int, int, int);
+ struct trace_probe *tp;
target = kprobe_trace_selftest_target;
pr_info("Testing kprobe tracing: ");
ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
- "$arg1 $arg2 $arg3 $arg4 $stack $stack0");
- if (WARN_ON_ONCE(ret))
- pr_warning("error enabling function entry\n");
+ "$stack $stack0 +0($stack)");
+ if (WARN_ON_ONCE(ret)) {
+ pr_warning("error on probing function entry.\n");
+ warn++;
+ } else {
+ /* Enable trace point */
+ tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
+ if (WARN_ON_ONCE(tp == NULL)) {
+ pr_warning("error on getting new probe.\n");
+ warn++;
+ } else
+ probe_event_enable(&tp->call);
+ }
ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
"$retval");
- if (WARN_ON_ONCE(ret))
- pr_warning("error enabling function return\n");
+ if (WARN_ON_ONCE(ret)) {
+ pr_warning("error on probing function return.\n");
+ warn++;
+ } else {
+ /* Enable trace point */
+ tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
+ if (WARN_ON_ONCE(tp == NULL)) {
+ pr_warning("error on getting new probe.\n");
+ warn++;
+ } else
+ probe_event_enable(&tp->call);
+ }
+
+ if (warn)
+ goto end;
ret = target(1, 2, 3, 4, 5, 6);
- cleanup_all_probes();
+ ret = command_trace_probe("-:testprobe");
+ if (WARN_ON_ONCE(ret)) {
+ pr_warning("error on deleting a probe.\n");
+ warn++;
+ }
- pr_cont("OK\n");
+ ret = command_trace_probe("-:testprobe2");
+ if (WARN_ON_ONCE(ret)) {
+ pr_warning("error on deleting a probe.\n");
+ warn++;
+ }
+
+end:
+ cleanup_all_probes();
+ if (warn)
+ pr_cont("NG: Some tests are failed. Please check them.\n");
+ else
+ pr_cont("OK\n");
return 0;
}
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index faf37fa4408c..94103cdcf9d8 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -26,12 +26,13 @@
#include <linux/fs.h>
#include "trace_output.h"
-#include "trace_stat.h"
#include "trace.h"
#include <linux/hw_breakpoint.h>
#include <asm/hw_breakpoint.h>
+#include <asm/atomic.h>
+
/*
* For now, let us restrict the no. of symbols traced simultaneously to number
* of available hardware breakpoint registers.
@@ -44,7 +45,7 @@ struct trace_ksym {
struct perf_event **ksym_hbp;
struct perf_event_attr attr;
#ifdef CONFIG_PROFILE_KSYM_TRACER
- unsigned long counter;
+ atomic64_t counter;
#endif
struct hlist_node ksym_hlist;
};
@@ -69,9 +70,8 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
rcu_read_lock();
hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
- if ((entry->attr.bp_addr == hbp_hit_addr) &&
- (entry->counter <= MAX_UL_INT)) {
- entry->counter++;
+ if (entry->attr.bp_addr == hbp_hit_addr) {
+ atomic64_inc(&entry->counter);
break;
}
}
@@ -197,7 +197,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
entry->attr.bp_addr = addr;
entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
- ret = -EAGAIN;
entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
ksym_hbp_handler);
@@ -300,8 +299,8 @@ static ssize_t ksym_trace_filter_write(struct file *file,
* 2: echo 0 > ksym_trace_filter
* 3: echo "*:---" > ksym_trace_filter
*/
- if (!buf[0] || !strcmp(buf, "0") ||
- !strcmp(buf, "*:---")) {
+ if (!input_string[0] || !strcmp(input_string, "0") ||
+ !strcmp(input_string, "*:---")) {
__ksym_trace_reset();
ret = 0;
goto out;
@@ -444,102 +443,77 @@ struct tracer ksym_tracer __read_mostly =
.print_line = ksym_trace_output
};
-__init static int init_ksym_trace(void)
-{
- struct dentry *d_tracer;
- struct dentry *entry;
-
- d_tracer = tracing_init_dentry();
- ksym_filter_entry_count = 0;
-
- entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
- NULL, &ksym_tracing_fops);
- if (!entry)
- pr_warning("Could not create debugfs "
- "'ksym_trace_filter' file\n");
-
- return register_tracer(&ksym_tracer);
-}
-device_initcall(init_ksym_trace);
-
-
#ifdef CONFIG_PROFILE_KSYM_TRACER
-static int ksym_tracer_stat_headers(struct seq_file *m)
+static int ksym_profile_show(struct seq_file *m, void *v)
{
+ struct hlist_node *node;
+ struct trace_ksym *entry;
+ int access_type = 0;
+ char fn_name[KSYM_NAME_LEN];
+
seq_puts(m, " Access Type ");
seq_puts(m, " Symbol Counter\n");
seq_puts(m, " ----------- ");
seq_puts(m, " ------ -------\n");
- return 0;
-}
-static int ksym_tracer_stat_show(struct seq_file *m, void *v)
-{
- struct hlist_node *stat = v;
- struct trace_ksym *entry;
- int access_type = 0;
- char fn_name[KSYM_NAME_LEN];
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
- entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
+ access_type = entry->attr.bp_type;
- access_type = entry->attr.bp_type;
+ switch (access_type) {
+ case HW_BREAKPOINT_R:
+ seq_puts(m, " R ");
+ break;
+ case HW_BREAKPOINT_W:
+ seq_puts(m, " W ");
+ break;
+ case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
+ seq_puts(m, " RW ");
+ break;
+ default:
+ seq_puts(m, " NA ");
+ }
- switch (access_type) {
- case HW_BREAKPOINT_R:
- seq_puts(m, " R ");
- break;
- case HW_BREAKPOINT_W:
- seq_puts(m, " W ");
- break;
- case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
- seq_puts(m, " RW ");
- break;
- default:
- seq_puts(m, " NA ");
+ if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
+ seq_printf(m, " %-36s", fn_name);
+ else
+ seq_printf(m, " %-36s", "<NA>");
+ seq_printf(m, " %15llu\n",
+ (unsigned long long)atomic64_read(&entry->counter));
}
-
- if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
- seq_printf(m, " %-36s", fn_name);
- else
- seq_printf(m, " %-36s", "<NA>");
- seq_printf(m, " %15lu\n", entry->counter);
+ rcu_read_unlock();
return 0;
}
-static void *ksym_tracer_stat_start(struct tracer_stat *trace)
+static int ksym_profile_open(struct inode *node, struct file *file)
{
- return ksym_filter_head.first;
-}
-
-static void *
-ksym_tracer_stat_next(void *v, int idx)
-{
- struct hlist_node *stat = v;
-
- return stat->next;
+ return single_open(file, ksym_profile_show, NULL);
}
-static struct tracer_stat ksym_tracer_stats = {
- .name = "ksym_tracer",
- .stat_start = ksym_tracer_stat_start,
- .stat_next = ksym_tracer_stat_next,
- .stat_headers = ksym_tracer_stat_headers,
- .stat_show = ksym_tracer_stat_show
+static const struct file_operations ksym_profile_fops = {
+ .open = ksym_profile_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
};
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
-__init static int ksym_tracer_stat_init(void)
+__init static int init_ksym_trace(void)
{
- int ret;
+ struct dentry *d_tracer;
- ret = register_stat_tracer(&ksym_tracer_stats);
- if (ret) {
- printk(KERN_WARNING "Warning: could not register "
- "ksym tracer stats\n");
- return 1;
- }
+ d_tracer = tracing_init_dentry();
- return 0;
+ trace_create_file("ksym_trace_filter", 0644, d_tracer,
+ NULL, &ksym_tracing_fops);
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+ trace_create_file("ksym_profile", 0444, d_tracer,
+ NULL, &ksym_profile_fops);
+#endif
+
+ return register_tracer(&ksym_tracer);
}
-fs_initcall(ksym_tracer_stat_init);
-#endif /* CONFIG_PROFILE_KSYM_TRACER */
+device_initcall(init_ksym_trace);
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 678a5120ee30..f4bc9b27de5f 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
unsigned long val, flags;
char buf[64];
int ret;
+ int cpu;
if (count >= sizeof(buf))
return -EINVAL;
@@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
return ret;
local_irq_save(flags);
+
+ /*
+ * In case we trace inside arch_spin_lock() or after (NMI),
+ * we will cause circular lock, so we also need to increase
+ * the percpu trace_active here.
+ */
+ cpu = smp_processor_id();
+ per_cpu(trace_active, cpu)++;
+
arch_spin_lock(&max_stack_lock);
*ptr = val;
arch_spin_unlock(&max_stack_lock);
+
+ per_cpu(trace_active, cpu)--;
local_irq_restore(flags);
return count;
@@ -206,7 +218,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
static void *t_start(struct seq_file *m, loff_t *pos)
{
+ int cpu;
+
local_irq_disable();
+
+ cpu = smp_processor_id();
+ per_cpu(trace_active, cpu)++;
+
arch_spin_lock(&max_stack_lock);
if (*pos == 0)
@@ -217,7 +235,13 @@ static void *t_start(struct seq_file *m, loff_t *pos)
static void t_stop(struct seq_file *m, void *p)
{
+ int cpu;
+
arch_spin_unlock(&max_stack_lock);
+
+ cpu = smp_processor_id();
+ per_cpu(trace_active, cpu)--;
+
local_irq_enable();
}
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 75289f372dd2..cba47d7935cc 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -143,70 +143,65 @@ extern char *__bad_type_size(void);
#type, #name, offsetof(typeof(trace), name), \
sizeof(trace.name), is_signed_type(type)
-int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
+static
+int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
{
int i;
- int ret;
- struct syscall_metadata *entry = call->data;
- struct syscall_trace_enter trace;
- int offset = offsetof(struct syscall_trace_enter, args);
+ int pos = 0;
- ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
- "\tsigned:%u;\n",
- SYSCALL_FIELD(int, nr));
- if (!ret)
- return 0;
+ /* When len=0, we just calculate the needed length */
+#define LEN_OR_ZERO (len ? len - pos : 0)
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
for (i = 0; i < entry->nb_args; i++) {
- ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i],
- entry->args[i]);
- if (!ret)
- return 0;
- ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
- "\tsigned:%u;\n", offset,
- sizeof(unsigned long),
- is_signed_type(unsigned long));
- if (!ret)
- return 0;
- offset += sizeof(unsigned long);
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
+ entry->args[i], sizeof(unsigned long),
+ i == entry->nb_args - 1 ? "" : ", ");
}
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
- trace_seq_puts(s, "\nprint fmt: \"");
for (i = 0; i < entry->nb_args; i++) {
- ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i],
- sizeof(unsigned long),
- i == entry->nb_args - 1 ? "" : ", ");
- if (!ret)
- return 0;
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ ", ((unsigned long)(REC->%s))", entry->args[i]);
}
- trace_seq_putc(s, '"');
- for (i = 0; i < entry->nb_args; i++) {
- ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
- entry->args[i]);
- if (!ret)
- return 0;
- }
+#undef LEN_OR_ZERO
- return trace_seq_putc(s, '\n');
+ /* return the length of print_fmt */
+ return pos;
}
-int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
+static int set_syscall_print_fmt(struct ftrace_event_call *call)
{
- int ret;
- struct syscall_trace_exit trace;
+ char *print_fmt;
+ int len;
+ struct syscall_metadata *entry = call->data;
- ret = trace_seq_printf(s,
- "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
- "\tsigned:%u;\n"
- "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
- "\tsigned:%u;\n",
- SYSCALL_FIELD(int, nr),
- SYSCALL_FIELD(long, ret));
- if (!ret)
+ if (entry->enter_event != call) {
+ call->print_fmt = "\"0x%lx\", REC->ret";
return 0;
+ }
+
+ /* First: called with 0 length to calculate the needed length */
+ len = __set_enter_print_fmt(entry, NULL, 0);
+
+ print_fmt = kmalloc(len + 1, GFP_KERNEL);
+ if (!print_fmt)
+ return -ENOMEM;
+
+ /* Second: actually write the @print_fmt */
+ __set_enter_print_fmt(entry, print_fmt, len + 1);
+ call->print_fmt = print_fmt;
- return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n");
+ return 0;
+}
+
+static void free_syscall_print_fmt(struct ftrace_event_call *call)
+{
+ struct syscall_metadata *entry = call->data;
+
+ if (entry->enter_event == call)
+ kfree(call->print_fmt);
}
int syscall_enter_define_fields(struct ftrace_event_call *call)
@@ -386,12 +381,22 @@ int init_syscall_trace(struct ftrace_event_call *call)
{
int id;
- id = register_ftrace_event(call->event);
- if (!id)
- return -ENODEV;
- call->id = id;
- INIT_LIST_HEAD(&call->fields);
- return 0;
+ if (set_syscall_print_fmt(call) < 0)
+ return -ENOMEM;
+
+ id = trace_event_raw_init(call);
+
+ if (id < 0) {
+ free_syscall_print_fmt(call);
+ return id;
+ }
+
+ return id;
+}
+
+unsigned long __init arch_syscall_addr(int nr)
+{
+ return (unsigned long)sys_call_table[nr];
}
int __init init_ftrace_syscalls(void)
@@ -421,7 +426,7 @@ int __init init_ftrace_syscalls(void)
}
core_initcall(init_ftrace_syscalls);
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
@@ -433,12 +438,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec;
unsigned long flags;
- char *trace_buf;
- char *raw_data;
int syscall_nr;
int rctx;
int size;
- int cpu;
syscall_nr = syscall_get_nr(current, regs);
if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
@@ -457,37 +459,15 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
"profile buffer not large enough"))
return;
- /* Protect the per cpu buffer, begin the rcu read side */
- local_irq_save(flags);
-
- rctx = perf_swevent_get_recursion_context();
- if (rctx < 0)
- goto end_recursion;
-
- cpu = smp_processor_id();
-
- trace_buf = rcu_dereference(perf_trace_buf);
-
- if (!trace_buf)
- goto end;
-
- raw_data = per_cpu_ptr(trace_buf, cpu);
-
- /* zero the dead bytes from align to not leak stack to user */
- *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+ rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size,
+ sys_data->enter_event->id, &rctx, &flags);
+ if (!rec)
+ return;
- rec = (struct syscall_trace_enter *) raw_data;
- tracing_generic_entry_update(&rec->ent, 0, 0);
- rec->ent.type = sys_data->enter_event->id;
rec->nr = syscall_nr;
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
(unsigned long *)&rec->args);
- perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
-
-end:
- perf_swevent_put_recursion_context(rctx);
-end_recursion:
- local_irq_restore(flags);
+ ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
}
int prof_sysenter_enable(struct ftrace_event_call *call)
@@ -531,11 +511,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
struct syscall_trace_exit *rec;
unsigned long flags;
int syscall_nr;
- char *trace_buf;
- char *raw_data;
int rctx;
int size;
- int cpu;
syscall_nr = syscall_get_nr(current, regs);
if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
@@ -557,38 +534,15 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
"exit event has grown above profile buffer size"))
return;
- /* Protect the per cpu buffer, begin the rcu read side */
- local_irq_save(flags);
-
- rctx = perf_swevent_get_recursion_context();
- if (rctx < 0)
- goto end_recursion;
-
- cpu = smp_processor_id();
-
- trace_buf = rcu_dereference(perf_trace_buf);
-
- if (!trace_buf)
- goto end;
-
- raw_data = per_cpu_ptr(trace_buf, cpu);
-
- /* zero the dead bytes from align to not leak stack to user */
- *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
-
- rec = (struct syscall_trace_exit *)raw_data;
+ rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size,
+ sys_data->exit_event->id, &rctx, &flags);
+ if (!rec)
+ return;
- tracing_generic_entry_update(&rec->ent, 0, 0);
- rec->ent.type = sys_data->exit_event->id;
rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs);
- perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
-
-end:
- perf_swevent_put_recursion_context(rctx);
-end_recursion:
- local_irq_restore(flags);
+ ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
}
int prof_sysexit_enable(struct ftrace_event_call *call)
@@ -603,7 +557,7 @@ int prof_sysexit_enable(struct ftrace_event_call *call)
ret = register_trace_sys_exit(prof_syscall_exit);
if (ret) {
pr_info("event trace: Could not activate"
- "syscall entry trace point");
+ "syscall exit trace point");
} else {
set_bit(num, enabled_prof_exit_syscalls);
sys_prof_refcount_exit++;
@@ -626,6 +580,5 @@ void prof_sysexit_disable(struct ftrace_event_call *call)
mutex_unlock(&syscall_trace_lock);
}
-#endif
-
+#endif /* CONFIG_PERF_EVENTS */