From 1b46cddfccfec4cc67b187fb53d78198de6a057c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Sep 2009 14:48:46 -0300 Subject: perf tools: Use rb_tree for maps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Threads can have many and kernel modules will be represented as a tree of maps as well. Ah, and for a perf.data with 146607 samples: Before: [root@doppio ~]# perf stat -r 5 perf report > /dev/null Performance counter stats for 'perf report' (5 runs): 699.823680 task-clock-msecs # 0.991 CPUs ( +- 0.454% ) 74 context-switches # 0.000 M/sec ( +- 1.709% ) 2 CPU-migrations # 0.000 M/sec ( +- 17.008% ) 23114 page-faults # 0.033 M/sec ( +- 0.000% ) 1381257019 cycles # 1973.721 M/sec ( +- 0.290% ) 1456894438 instructions # 1.055 IPC ( +- 0.007% ) 18779818 cache-references # 26.835 M/sec ( +- 0.380% ) 641799 cache-misses # 0.917 M/sec ( +- 1.200% ) 0.705972729 seconds time elapsed ( +- 0.501% ) [root@doppio ~]# After Performance counter stats for 'perf report' (5 runs): 691.261451 task-clock-msecs # 0.993 CPUs ( +- 0.307% ) 72 context-switches # 0.000 M/sec ( +- 0.829% ) 6 CPU-migrations # 0.000 M/sec ( +- 18.409% ) 23127 page-faults # 0.033 M/sec ( +- 0.000% ) 1366395876 cycles # 1976.670 M/sec ( +- 0.153% ) 1443136016 instructions # 1.056 IPC ( +- 0.012% ) 17956402 cache-references # 25.976 M/sec ( +- 0.325% ) 661924 cache-misses # 0.958 M/sec ( +- 1.335% ) 0.696127275 seconds time elapsed ( +- 0.377% ) I.e. we see some speedup too. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: "H. Peter Anvin" LKML-Reference: <20090928174846.GA3361@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/util/thread.c | 129 ++++++++++++++++++++++++++++++----------------- 1 file changed, 83 insertions(+), 46 deletions(-) (limited to 'tools/perf/util/thread.c') diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 45efb5db0d19..9d0945cc66d1 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -15,7 +15,7 @@ static struct thread *thread__new(pid_t pid) self->comm = malloc(32); if (self->comm) snprintf(self->comm, 32, ":%d", self->pid); - INIT_LIST_HEAD(&self->maps); + self->maps = RB_ROOT; } return self; @@ -31,11 +31,13 @@ int thread__set_comm(struct thread *self, const char *comm) static size_t thread__fprintf(struct thread *self, FILE *fp) { - struct map *pos; + struct rb_node *nd; size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm); - list_for_each_entry(pos, &self->maps, node) + for (nd = rb_first(&self->maps); nd; nd = rb_next(nd)) { + struct map *pos = rb_entry(nd, struct map, rb_node); ret += map__fprintf(pos, fp); + } return ret; } @@ -93,42 +95,90 @@ register_idle_thread(struct rb_root *threads, struct thread **last_match) return thread; } -void thread__insert_map(struct thread *self, struct map *map) +static void thread__remove_overlappings(struct thread *self, struct map *map) { - struct map *pos, *tmp; - - list_for_each_entry_safe(pos, tmp, &self->maps, node) { - if (map__overlap(pos, map)) { - if (verbose >= 2) { - printf("overlapping maps:\n"); - map__fprintf(map, stdout); - map__fprintf(pos, stdout); - } - - if (map->start <= pos->start && map->end > pos->start) - pos->start = map->end; - - if (map->end >= pos->end && map->start < pos->end) - pos->end = map->start; - - if (verbose >= 2) { - printf("after collision:\n"); - map__fprintf(pos, stdout); - } - - if (pos->start >= pos->end) { - list_del_init(&pos->node); - free(pos); - } + struct rb_node *next = rb_first(&self->maps); + + while (next) { + struct map *pos = rb_entry(next, struct map, rb_node); + next = rb_next(&pos->rb_node); + + if (!map__overlap(pos, map)) + continue; + + if (verbose >= 2) { + printf("overlapping maps:\n"); + map__fprintf(map, stdout); + map__fprintf(pos, stdout); + } + + if (map->start <= pos->start && map->end > pos->start) + pos->start = map->end; + + if (map->end >= pos->end && map->start < pos->end) + pos->end = map->start; + + if (verbose >= 2) { + printf("after collision:\n"); + map__fprintf(pos, stdout); + } + + if (pos->start >= pos->end) { + rb_erase(&pos->rb_node, &self->maps); + free(pos); } } +} + +void maps__insert(struct rb_root *maps, struct map *map) +{ + struct rb_node **p = &maps->rb_node; + struct rb_node *parent = NULL; + const u64 ip = map->start; + struct map *m; + + while (*p != NULL) { + parent = *p; + m = rb_entry(parent, struct map, rb_node); + if (ip < m->start) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&map->rb_node, parent, p); + rb_insert_color(&map->rb_node, maps); +} + +struct map *maps__find(struct rb_root *maps, u64 ip) +{ + struct rb_node **p = &maps->rb_node; + struct rb_node *parent = NULL; + struct map *m; + + while (*p != NULL) { + parent = *p; + m = rb_entry(parent, struct map, rb_node); + if (ip < m->start) + p = &(*p)->rb_left; + else if (ip > m->end) + p = &(*p)->rb_right; + else + return m; + } + + return NULL; +} - list_add_tail(&map->node, &self->maps); +void thread__insert_map(struct thread *self, struct map *map) +{ + thread__remove_overlappings(self, map); + maps__insert(&self->maps, map); } int thread__fork(struct thread *self, struct thread *parent) { - struct map *map; + struct rb_node *nd; if (self->comm) free(self->comm); @@ -136,7 +186,8 @@ int thread__fork(struct thread *self, struct thread *parent) if (!self->comm) return -ENOMEM; - list_for_each_entry(map, &parent->maps, node) { + for (nd = rb_first(&parent->maps); nd; nd = rb_next(nd)) { + struct map *map = rb_entry(nd, struct map, rb_node); struct map *new = map__clone(map); if (!new) return -ENOMEM; @@ -146,20 +197,6 @@ int thread__fork(struct thread *self, struct thread *parent) return 0; } -struct map *thread__find_map(struct thread *self, u64 ip) -{ - struct map *pos; - - if (self == NULL) - return NULL; - - list_for_each_entry(pos, &self->maps, node) - if (ip >= pos->start && ip <= pos->end) - return pos; - - return NULL; -} - size_t threads__fprintf(FILE *fp, struct rb_root *threads) { size_t ret = 0; -- cgit v1.2.3 From 439d473b4777de510e1322168ac6f2f377ecd5bc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 2 Oct 2009 03:29:58 -0300 Subject: perf tools: Rewrite and improve support for kernel modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Representing modules as struct map entries, backed by a DSO, etc, using /proc/modules to find where the module is loaded. DSOs now can have a short and long name, so that in verbose mode we can show exactly which .ko or vmlinux image was used. As kernel modules now are a DSO separate from the kernel, we can ask for just the hits for a particular set of kernel modules, just like we can do with shared libraries: [root@doppio linux-2.6-tip]# perf report -n --vmlinux /home/acme/git/build/tip-recvmmsg/vmlinux --modules --dsos \[drm\] | head -15 84.58% 13266 Xorg [k] drm_clflush_pages 4.02% 630 Xorg [k] trace_kmalloc.clone.0 3.95% 619 Xorg [k] drm_ioctl 2.07% 324 Xorg [k] drm_addbufs 1.68% 263 Xorg [k] drm_gem_close_ioctl 0.77% 120 Xorg [k] drm_setmaster_ioctl 0.70% 110 Xorg [k] drm_lastclose 0.68% 106 Xorg [k] drm_open 0.54% 85 Xorg [k] drm_mm_search_free [root@doppio linux-2.6-tip]# Specifying --dsos /lib/modules/2.6.31-tip/kernel/drivers/gpu/drm/drm.ko would have the same effect. Allowing specifying just 'drm.ko' is left for another patch. Processing kallsyms so that per kernel module struct map are instantiated was also left for another patch. That will allow removing the module name from each of its symbols. struct symbol was reduced by removing the ->module backpointer and moving it (well now the map) to struct symbol_entry in perf top, that is its only user right now. The total linecount went down by ~500 lines. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: "H. Peter Anvin" Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Avi Kivity Signed-off-by: Ingo Molnar --- tools/perf/util/thread.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'tools/perf/util/thread.c') diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 9d0945cc66d1..3b56aebb1f4b 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -16,6 +16,7 @@ static struct thread *thread__new(pid_t pid) if (self->comm) snprintf(self->comm, 32, ":%d", self->pid); self->maps = RB_ROOT; + INIT_LIST_HEAD(&self->removed_maps); } return self; @@ -32,13 +33,20 @@ int thread__set_comm(struct thread *self, const char *comm) static size_t thread__fprintf(struct thread *self, FILE *fp) { struct rb_node *nd; - size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm); + struct map *pos; + size_t ret = fprintf(fp, "Thread %d %s\nCurrent maps:\n", + self->pid, self->comm); for (nd = rb_first(&self->maps); nd; nd = rb_next(nd)) { - struct map *pos = rb_entry(nd, struct map, rb_node); + pos = rb_entry(nd, struct map, rb_node); ret += map__fprintf(pos, fp); } + ret = fprintf(fp, "Removed maps:\n"); + + list_for_each_entry(pos, &self->removed_maps, node) + ret += map__fprintf(pos, fp); + return ret; } @@ -112,21 +120,13 @@ static void thread__remove_overlappings(struct thread *self, struct map *map) map__fprintf(pos, stdout); } - if (map->start <= pos->start && map->end > pos->start) - pos->start = map->end; - - if (map->end >= pos->end && map->start < pos->end) - pos->end = map->start; - - if (verbose >= 2) { - printf("after collision:\n"); - map__fprintf(pos, stdout); - } - - if (pos->start >= pos->end) { - rb_erase(&pos->rb_node, &self->maps); - free(pos); - } + rb_erase(&pos->rb_node, &self->maps); + /* + * We may have references to this map, for instance in some + * hist_entry instances, so just move them to a separate + * list. + */ + list_add_tail(&pos->node, &self->removed_maps); } } -- cgit v1.2.3 From 9a92b479b2f088ee2d3194243f4c8e59b1b8c9c2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 8 Oct 2009 16:37:12 +0200 Subject: perf tools: Improve thread comm resolution in perf sched When we get sched traces that involve a task that was already created before opening the event, we won't have the comm event for it. So if we can't find the comm event for a given thread, we look at the traces that may contain these informations. Before: ata/1:371 | 0.000 ms | 1 | avg: 3988.693 ms | max: 3988.693 ms | kondemand/1:421 | 0.096 ms | 3 | avg: 345.346 ms | max: 1035.989 ms | kondemand/0:420 | 0.025 ms | 3 | avg: 421.332 ms | max: 964.014 ms | :5124:5124 | 0.103 ms | 5 | avg: 74.082 ms | max: 277.194 ms | :6244:6244 | 0.691 ms | 9 | avg: 125.655 ms | max: 271.306 ms | firefox:5080 | 0.924 ms | 5 | avg: 53.833 ms | max: 257.828 ms | npviewer.bin:6225 | 21.871 ms | 53 | avg: 22.462 ms | max: 220.835 ms | :6245:6245 | 9.631 ms | 21 | avg: 41.864 ms | max: 213.349 ms | After: ata/1:371 | 0.000 ms | 1 | avg: 3988.693 ms | max: 3988.693 ms | kondemand/1:421 | 0.096 ms | 3 | avg: 345.346 ms | max: 1035.989 ms | kondemand/0:420 | 0.025 ms | 3 | avg: 421.332 ms | max: 964.014 ms | firefox:5124 | 0.103 ms | 5 | avg: 74.082 ms | max: 277.194 ms | npviewer.bin:6244 | 0.691 ms | 9 | avg: 125.655 ms | max: 271.306 ms | firefox:5080 | 0.924 ms | 5 | avg: 53.833 ms | max: 257.828 ms | npviewer.bin:6225 | 21.871 ms | 53 | avg: 22.462 ms | max: 220.835 ms | npviewer.bin:6245 | 9.631 ms | 21 | avg: 41.864 ms | max: 213.349 ms | Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1255012632-7882-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/thread.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) (limited to 'tools/perf/util/thread.c') diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 3b56aebb1f4b..8bd5ca2d2f28 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -6,15 +6,17 @@ #include "util.h" #include "debug.h" -static struct thread *thread__new(pid_t pid) +static struct thread *thread__new(pid_t pid, int set_comm) { struct thread *self = calloc(1, sizeof(*self)); if (self != NULL) { self->pid = pid; - self->comm = malloc(32); - if (self->comm) - snprintf(self->comm, 32, ":%d", self->pid); + if (set_comm) { + self->comm = malloc(32); + if (self->comm) + snprintf(self->comm, 32, ":%d", self->pid); + } self->maps = RB_ROOT; INIT_LIST_HEAD(&self->removed_maps); } @@ -50,8 +52,10 @@ static size_t thread__fprintf(struct thread *self, FILE *fp) return ret; } -struct thread * -threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) +static struct thread * +__threads__findnew(pid_t pid, struct rb_root *threads, + struct thread **last_match, + int set_comm) { struct rb_node **p = &threads->rb_node; struct rb_node *parent = NULL; @@ -80,7 +84,8 @@ threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) p = &(*p)->rb_right; } - th = thread__new(pid); + th = thread__new(pid, set_comm); + if (th != NULL) { rb_link_node(&th->rb_node, parent, p); rb_insert_color(&th->rb_node, threads); @@ -90,6 +95,19 @@ threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) return th; } +struct thread * +threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) +{ + return __threads__findnew(pid, threads, last_match, 1); +} + +struct thread * +threads__findnew_nocomm(pid_t pid, struct rb_root *threads, + struct thread **last_match) +{ + return __threads__findnew(pid, threads, last_match, 0); +} + struct thread * register_idle_thread(struct rb_root *threads, struct thread **last_match) { -- cgit v1.2.3 From 97ea1a7fa62af0d8d49a0fc12796b0073537c9d8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 8 Oct 2009 21:04:17 +0200 Subject: perf tools: Fix thread comm resolution in perf sched This reverts commit 9a92b479b2f088ee2d3194243f4c8e59b1b8c9c2 ("perf tools: Improve thread comm resolution in perf sched") and fixes the real bug. The bug was elsewhere: We are failing to resolve thread names in perf sched because the table of threads we are building, on top of comm events, has a per process granularity. But perf sched, unlike the other perf tools, needs a per thread granularity as we are profiling every tasks individually. So fix it by building our threads table using the tid instead of the pid as the thread identifier. v2: Revert the previous fix - it is not really needed Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1255028657-11158-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/thread.c | 32 +++++++------------------------- 1 file changed, 7 insertions(+), 25 deletions(-) (limited to 'tools/perf/util/thread.c') diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 8bd5ca2d2f28..3b56aebb1f4b 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -6,17 +6,15 @@ #include "util.h" #include "debug.h" -static struct thread *thread__new(pid_t pid, int set_comm) +static struct thread *thread__new(pid_t pid) { struct thread *self = calloc(1, sizeof(*self)); if (self != NULL) { self->pid = pid; - if (set_comm) { - self->comm = malloc(32); - if (self->comm) - snprintf(self->comm, 32, ":%d", self->pid); - } + self->comm = malloc(32); + if (self->comm) + snprintf(self->comm, 32, ":%d", self->pid); self->maps = RB_ROOT; INIT_LIST_HEAD(&self->removed_maps); } @@ -52,10 +50,8 @@ static size_t thread__fprintf(struct thread *self, FILE *fp) return ret; } -static struct thread * -__threads__findnew(pid_t pid, struct rb_root *threads, - struct thread **last_match, - int set_comm) +struct thread * +threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) { struct rb_node **p = &threads->rb_node; struct rb_node *parent = NULL; @@ -84,8 +80,7 @@ __threads__findnew(pid_t pid, struct rb_root *threads, p = &(*p)->rb_right; } - th = thread__new(pid, set_comm); - + th = thread__new(pid); if (th != NULL) { rb_link_node(&th->rb_node, parent, p); rb_insert_color(&th->rb_node, threads); @@ -95,19 +90,6 @@ __threads__findnew(pid_t pid, struct rb_root *threads, return th; } -struct thread * -threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) -{ - return __threads__findnew(pid, threads, last_match, 1); -} - -struct thread * -threads__findnew_nocomm(pid_t pid, struct rb_root *threads, - struct thread **last_match) -{ - return __threads__findnew(pid, threads, last_match, 0); -} - struct thread * register_idle_thread(struct rb_root *threads, struct thread **last_match) { -- cgit v1.2.3 From d5b889f2ecec7849e851ddd31c34bdfb3482b5de Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Oct 2009 11:16:29 -0300 Subject: perf tools: Move threads & last_match to threads.c This was just being copy'n'pasted all over. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith LKML-Reference: <20091013141629.GD21809@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/util/thread.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'tools/perf/util/thread.c') diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 3b56aebb1f4b..f53fad7c0a8d 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -6,6 +6,9 @@ #include "util.h" #include "debug.h" +static struct rb_root threads; +static struct thread *last_match; + static struct thread *thread__new(pid_t pid) { struct thread *self = calloc(1, sizeof(*self)); @@ -50,10 +53,9 @@ static size_t thread__fprintf(struct thread *self, FILE *fp) return ret; } -struct thread * -threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) +struct thread *threads__findnew(pid_t pid) { - struct rb_node **p = &threads->rb_node; + struct rb_node **p = &threads.rb_node; struct rb_node *parent = NULL; struct thread *th; @@ -62,15 +64,15 @@ threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) * so most of the time we dont have to look up * the full rbtree: */ - if (*last_match && (*last_match)->pid == pid) - return *last_match; + if (last_match && last_match->pid == pid) + return last_match; while (*p != NULL) { parent = *p; th = rb_entry(parent, struct thread, rb_node); if (th->pid == pid) { - *last_match = th; + last_match = th; return th; } @@ -83,17 +85,16 @@ threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) th = thread__new(pid); if (th != NULL) { rb_link_node(&th->rb_node, parent, p); - rb_insert_color(&th->rb_node, threads); - *last_match = th; + rb_insert_color(&th->rb_node, &threads); + last_match = th; } return th; } -struct thread * -register_idle_thread(struct rb_root *threads, struct thread **last_match) +struct thread *register_idle_thread(void) { - struct thread *thread = threads__findnew(0, threads, last_match); + struct thread *thread = threads__findnew(0); if (!thread || thread__set_comm(thread, "swapper")) { fprintf(stderr, "problem inserting idle task.\n"); @@ -197,12 +198,12 @@ int thread__fork(struct thread *self, struct thread *parent) return 0; } -size_t threads__fprintf(FILE *fp, struct rb_root *threads) +size_t threads__fprintf(FILE *fp) { size_t ret = 0; struct rb_node *nd; - for (nd = rb_first(threads); nd; nd = rb_next(nd)) { + for (nd = rb_first(&threads); nd; nd = rb_next(nd)) { struct thread *pos = rb_entry(nd, struct thread, rb_node); ret += thread__fprintf(pos, fp); -- cgit v1.2.3 From a4fb581b15949cfd10b64c8af37bc106e95307f3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 22 Oct 2009 23:23:23 +0200 Subject: perf tools: Bind callchains to the first sort dimension column Currently, the callchains are displayed using a constant left margin. So depending on the current sort dimension configuration, callchains may appear to be well attached to the first sort dimension column field which is mostly the case, except when the first dimension of sorting is done by comm, because these are right aligned. This patch binds the callchain to the first letter in the first column, whatever type of column it is (dso, comm, symbol). Before: 0.80% perf [k] __lock_acquire __lock_acquire lock_acquire | |--58.33%-- _spin_lock | | | |--28.57%-- inotify_should_send_event | | fsnotify | | __fsnotify_parent After: 0.80% perf [k] __lock_acquire __lock_acquire lock_acquire | |--58.33%-- _spin_lock | | | |--28.57%-- inotify_should_send_event | | fsnotify | | __fsnotify_parent Also, for clarity, we don't put anymore the callchain as is but: - If we have a top level ancestor in the callchain, start it with a first ascii hook. Before: 0.80% perf [kernel] [k] __lock_acquire __lock_acquire lock_acquire | |--58.33%-- _spin_lock | | | |--28.57%-- inotify_should_send_event | | fsnotify [..] [..] After: 0.80% perf [kernel] [k] __lock_acquire | --- __lock_acquire lock_acquire | |--58.33%-- _spin_lock | | | |--28.57%-- inotify_should_send_event | | fsnotify [..] [..] - Otherwise, if we have several top level ancestors, then display these like we did before: 1.69% Xorg | |--21.21%-- vread_hpet | 0x7fffd85b46fc | 0x7fffd85b494d | 0x7f4fafb4e54d | |--15.15%-- exaOffscreenAlloc | |--9.09%-- I830WaitLpRing Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard LKML-Reference: <1256246604-17156-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/thread.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tools/perf/util/thread.c') diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index f53fad7c0a8d..8cb47f1d8a76 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -33,6 +33,17 @@ int thread__set_comm(struct thread *self, const char *comm) return self->comm ? 0 : -ENOMEM; } +int thread__comm_len(struct thread *self) +{ + if (!self->comm_len) { + if (!self->comm) + return 0; + self->comm_len = strlen(self->comm); + } + + return self->comm_len; +} + static size_t thread__fprintf(struct thread *self, FILE *fp) { struct rb_node *nd; -- cgit v1.2.3 From 6beba7adbe092e63dfe8d09fbd1e3ec140474a13 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 21 Oct 2009 17:34:06 -0200 Subject: perf tools: Unify debug messages mechanisms We were using eprintf in some places, that looks at a global 'verbose' level, and at other places passing a 'v' parameter to specify the verbosity level, unify it by introducing pr_{err,warning,debug,etc}, just like in the kernel. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith LKML-Reference: <1256153646-10097-1-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/util/thread.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/perf/util/thread.c') diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 8cb47f1d8a76..0f6d78c9863a 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -127,9 +127,9 @@ static void thread__remove_overlappings(struct thread *self, struct map *map) continue; if (verbose >= 2) { - printf("overlapping maps:\n"); - map__fprintf(map, stdout); - map__fprintf(pos, stdout); + fputs("overlapping maps:\n", stderr); + map__fprintf(map, stderr); + map__fprintf(pos, stderr); } rb_erase(&pos->rb_node, &self->maps); -- cgit v1.2.3 From 364794845cbc49e638b83d7ef739524291e1e961 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 24 Nov 2009 12:05:16 -0200 Subject: perf tools: Introduce zalloc() for the common calloc(1, N) case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This way we type less characters and it looks more like the kzalloc kernel counterpart. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259071517-3242-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/thread.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util/thread.c') diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 0f6d78c9863a..1796625f7784 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -11,7 +11,7 @@ static struct thread *last_match; static struct thread *thread__new(pid_t pid) { - struct thread *self = calloc(1, sizeof(*self)); + struct thread *self = zalloc(sizeof(*self)); if (self != NULL) { self->pid = pid; -- cgit v1.2.3 From 95011c600740837288a3b34b411244a4d9157c4e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Nov 2009 16:29:20 -0200 Subject: perf symbols: Support multiple symtabs in struct thread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Making the routines that were so far specific to the kernel maps useful for all threads. This is done by making the kernel maps be contained in a kernel "thread". This gets the kernel specific routines closer to the userspace counterparts, which will help in reducing the boilerplate for resolving a symbol, as will be demonstrated in the next patches. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259346563-12568-9-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/thread.c | 115 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 89 insertions(+), 26 deletions(-) (limited to 'tools/perf/util/thread.c') diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 1796625f7784..2229f82cd630 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -9,17 +9,26 @@ static struct rb_root threads; static struct thread *last_match; +void thread__init(struct thread *self, pid_t pid) +{ + int i; + self->pid = pid; + self->comm = NULL; + for (i = 0; i < MAP__NR_TYPES; ++i) { + self->maps[i] = RB_ROOT; + INIT_LIST_HEAD(&self->removed_maps[i]); + } +} + static struct thread *thread__new(pid_t pid) { struct thread *self = zalloc(sizeof(*self)); if (self != NULL) { - self->pid = pid; + thread__init(self, pid); self->comm = malloc(32); if (self->comm) snprintf(self->comm, 32, ":%d", self->pid); - self->maps = RB_ROOT; - INIT_LIST_HEAD(&self->removed_maps); } return self; @@ -44,24 +53,68 @@ int thread__comm_len(struct thread *self) return self->comm_len; } -static size_t thread__fprintf(struct thread *self, FILE *fp) +static const char *map_type__name[MAP__NR_TYPES] = { + [MAP__FUNCTION] = "Functions", +}; + +static size_t __thread__fprintf_maps(struct thread *self, + enum map_type type, FILE *fp) { + size_t printed = fprintf(fp, "%s:\n", map_type__name[type]); struct rb_node *nd; - struct map *pos; - size_t ret = fprintf(fp, "Thread %d %s\nCurrent maps:\n", - self->pid, self->comm); - for (nd = rb_first(&self->maps); nd; nd = rb_next(nd)) { - pos = rb_entry(nd, struct map, rb_node); - ret += map__fprintf(pos, fp); + for (nd = rb_first(&self->maps[type]); nd; nd = rb_next(nd)) { + struct map *pos = rb_entry(nd, struct map, rb_node); + printed += fprintf(fp, "Map:"); + printed += map__fprintf(pos, fp); + if (verbose > 1) { + printed += dso__fprintf(pos->dso, type, fp); + printed += fprintf(fp, "--\n"); + } } - ret = fprintf(fp, "Removed maps:\n"); + return printed; +} + +size_t thread__fprintf_maps(struct thread *self, FILE *fp) +{ + size_t printed = 0, i; + for (i = 0; i < MAP__NR_TYPES; ++i) + printed += __thread__fprintf_maps(self, i, fp); + return printed; +} - list_for_each_entry(pos, &self->removed_maps, node) - ret += map__fprintf(pos, fp); +static size_t __thread__fprintf_removed_maps(struct thread *self, + enum map_type type, FILE *fp) +{ + struct map *pos; + size_t printed = 0; + + list_for_each_entry(pos, &self->removed_maps[type], node) { + printed += fprintf(fp, "Map:"); + printed += map__fprintf(pos, fp); + if (verbose > 1) { + printed += dso__fprintf(pos->dso, type, fp); + printed += fprintf(fp, "--\n"); + } + } + return printed; +} - return ret; +static size_t thread__fprintf_removed_maps(struct thread *self, FILE *fp) +{ + size_t printed = 0, i; + for (i = 0; i < MAP__NR_TYPES; ++i) + printed += __thread__fprintf_removed_maps(self, i, fp); + return printed; +} + +static size_t thread__fprintf(struct thread *self, FILE *fp) +{ + size_t printed = fprintf(fp, "Thread %d %s\n", self->pid, self->comm); + printed += thread__fprintf_removed_maps(self, fp); + printed += fprintf(fp, "Removed maps:\n"); + return printed + thread__fprintf_removed_maps(self, fp); } struct thread *threads__findnew(pid_t pid) @@ -117,7 +170,8 @@ struct thread *register_idle_thread(void) static void thread__remove_overlappings(struct thread *self, struct map *map) { - struct rb_node *next = rb_first(&self->maps); + struct rb_root *root = &self->maps[map->type]; + struct rb_node *next = rb_first(root); while (next) { struct map *pos = rb_entry(next, struct map, rb_node); @@ -132,13 +186,13 @@ static void thread__remove_overlappings(struct thread *self, struct map *map) map__fprintf(pos, stderr); } - rb_erase(&pos->rb_node, &self->maps); + rb_erase(&pos->rb_node, root); /* * We may have references to this map, for instance in some * hist_entry instances, so just move them to a separate * list. */ - list_add_tail(&pos->node, &self->removed_maps); + list_add_tail(&pos->node, &self->removed_maps[map->type]); } } @@ -185,12 +239,26 @@ struct map *maps__find(struct rb_root *maps, u64 ip) void thread__insert_map(struct thread *self, struct map *map) { thread__remove_overlappings(self, map); - maps__insert(&self->maps, map); + maps__insert(&self->maps[map->type], map); } -int thread__fork(struct thread *self, struct thread *parent) +static int thread__clone_maps(struct thread *self, struct thread *parent, + enum map_type type) { struct rb_node *nd; + for (nd = rb_first(&parent->maps[type]); nd; nd = rb_next(nd)) { + struct map *map = rb_entry(nd, struct map, rb_node); + struct map *new = map__clone(map); + if (new == NULL) + return -ENOMEM; + thread__insert_map(self, new); + } + return 0; +} + +int thread__fork(struct thread *self, struct thread *parent) +{ + int i; if (self->comm) free(self->comm); @@ -198,14 +266,9 @@ int thread__fork(struct thread *self, struct thread *parent) if (!self->comm) return -ENOMEM; - for (nd = rb_first(&parent->maps); nd; nd = rb_next(nd)) { - struct map *map = rb_entry(nd, struct map, rb_node); - struct map *new = map__clone(map); - if (!new) + for (i = 0; i < MAP__NR_TYPES; ++i) + if (thread__clone_maps(self, parent, i) < 0) return -ENOMEM; - thread__insert_map(self, new); - } - return 0; } -- cgit v1.2.3 From 1ed091c45ae33b2179d387573c3fe3f3b4adf60a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Nov 2009 16:29:23 -0200 Subject: perf tools: Consolidate symbol resolving across all tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now we have a very high level routine for simple tools to process IP sample events: int event__preprocess_sample(const event_t *self, struct addr_location *al, symbol_filter_t filter) It receives the event itself and will insert new threads in the global threads list and resolve the map and symbol, filling all this info into the new addr_location struct, so that tools like annotate and report can further process the event by creating hist_entries in their specific way (with or without callgraphs, etc). It in turn uses the new next layer function: void thread__find_addr_location(struct thread *self, u8 cpumode, enum map_type type, u64 addr, struct addr_location *al, symbol_filter_t filter) This one will, given a thread (userspace or the kernel kthread one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE too in the near future) at the given cpumode, taking vdsos into account (userspace hit, but kernel symbol) and will fill all these details in the addr_location given. Tools that need a more compact API for plain function resolution, like 'kmem', can use this other one: struct symbol *thread__find_function(struct thread *self, u64 addr, symbol_filter_t filter) So, to resolve a kernel symbol, that is all the 'kmem' tool needs, its just a matter of calling: sym = thread__find_function(kthread, addr, NULL); The 'filter' parameter is needed because we do lazy parsing/loading of ELF symtabs or /proc/kallsyms. With this we remove more code duplication all around, which is always good, huh? :-) Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: John Kacur Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/thread.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'tools/perf/util/thread.c') diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 2229f82cd630..603f5610861b 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -285,3 +285,15 @@ size_t threads__fprintf(FILE *fp) return ret; } + +struct symbol *thread__find_symbol(struct thread *self, + enum map_type type, u64 addr, + symbol_filter_t filter) +{ + struct map *map = thread__find_map(self, type, addr); + + if (map != NULL) + return map__find_symbol(map, map->map_ip(map, addr), filter); + + return NULL; +} -- cgit v1.2.3