From bdfebd848f2a14e639031a0b0e61d7c7ee5e5fd2 Mon Sep 17 00:00:00 2001 From: Roberto Agostino Vitillo Date: Thu, 9 Feb 2012 23:21:02 +0100 Subject: perf record: Add support for sampling taken branch This patch adds a new option to enable taken branch stack sampling, i.e., leverage the PERF_SAMPLE_BRANCH_STACK feature of perf_events. There is a new option to active this mode: -b. It is possible to pass a set of filters to select the type of branches to sample. The following filters are available: - any : any type of branches - any_call : any function call or system call - any_ret : any function return or system call return - any_ind : any indirect branch - u: only when the branch target is at the user level - k: only when the branch target is in the kernel - hv: only when the branch target is in the hypervisor Filters can be combined by passing a comma separated list to the option: $ perf record -b any_call,u -e cycles:u branchy Signed-off-by: Roberto Agostino Vitillo Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: acme@redhat.com Cc: robert.richter@amd.com Cc: ming.m.lin@intel.com Cc: andi@firstfloor.org Cc: asharma@fb.com Cc: vweaver1@eecs.utk.edu Cc: khandual@linux.vnet.ibm.com Cc: dsahern@gmail.com Link: http://lkml.kernel.org/r/1328826068-11713-13-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 74 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) (limited to 'tools/perf/builtin-record.c') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 75d230fef202..1c49d4e8767c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -638,6 +638,77 @@ out_delete_session: return err; } +#define BRANCH_OPT(n, m) \ + { .name = n, .mode = (m) } + +#define BRANCH_END { .name = NULL } + +struct branch_mode { + const char *name; + int mode; +}; + +static const struct branch_mode branch_modes[] = { + BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER), + BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL), + BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV), + BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY), + BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL), + BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN), + BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL), + BRANCH_END +}; + +static int +parse_branch_stack(const struct option *opt, const char *str, int unset __used) +{ +#define ONLY_PLM \ + (PERF_SAMPLE_BRANCH_USER |\ + PERF_SAMPLE_BRANCH_KERNEL |\ + PERF_SAMPLE_BRANCH_HV) + + uint64_t *mode = (uint64_t *)opt->value; + const struct branch_mode *br; + char *s, *os, *p; + int ret = -1; + + *mode = 0; + + /* because str is read-only */ + s = os = strdup(str); + if (!s) + return -1; + + for (;;) { + p = strchr(s, ','); + if (p) + *p = '\0'; + + for (br = branch_modes; br->name; br++) { + if (!strcasecmp(s, br->name)) + break; + } + if (!br->name) + goto error; + + *mode |= br->mode; + + if (!p) + break; + + s = p + 1; + } + ret = 0; + + if ((*mode & ~ONLY_PLM) == 0) { + error("need at least one branch type with -b\n"); + ret = -1; + } +error: + free(os); + return ret; +} + static const char * const record_usage[] = { "perf record [] []", "perf record [] -- []", @@ -727,6 +798,9 @@ const struct option record_options[] = { "monitor event in cgroup name only", parse_cgroups), OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), + OPT_CALLBACK('b', "branch-stack", &record.opts.branch_stack, + "branch mode mask", "branch stack sampling modes", + parse_branch_stack), OPT_END() }; -- cgit v1.2.3 From a5aabdacde9caff54886ae454e0fad2f26929753 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 8 Mar 2012 23:47:45 +0100 Subject: perf record: Provide default branch stack sampling mode option This patch chanegs the logic of the -b, --branch-stack options of perf record. Based on users' request, the patch provides a default filter mode with the -b (or --branch-any) option. With the option, any type of taken branches is sampled. With -j (or --branch-filter), the user can specify any valid combination of branch types and privilege levels if supported by the underlying hardware. The -b (--branch any) is a shortcut for: --branch-filter any. $ perf record -b foo or: $ perf record --branch-filter any foo For more specific filtering: $ perf record --branch-filter ind_call,u foo Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: acme@redhat.com Cc: asharma@fb.com Cc: ravitillo@lbl.gov Cc: vweaver1@eecs.utk.edu Cc: khandual@linux.vnet.ibm.com Cc: dsahern@gmail.com Link: http://lkml.kernel.org/r/1331246868-19905-2-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 68 ++++++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 25 deletions(-) (limited to 'tools/perf/builtin-record.c') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 1c49d4e8767c..a7c53a9ef372 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -660,7 +660,7 @@ static const struct branch_mode branch_modes[] = { }; static int -parse_branch_stack(const struct option *opt, const char *str, int unset __used) +parse_branch_stack(const struct option *opt, const char *str, int unset) { #define ONLY_PLM \ (PERF_SAMPLE_BRANCH_USER |\ @@ -669,40 +669,53 @@ parse_branch_stack(const struct option *opt, const char *str, int unset __used) uint64_t *mode = (uint64_t *)opt->value; const struct branch_mode *br; - char *s, *os, *p; + char *s, *os = NULL, *p; int ret = -1; - *mode = 0; + if (unset) + return 0; - /* because str is read-only */ - s = os = strdup(str); - if (!s) + /* + * cannot set it twice, -b + --branch-filter for instance + */ + if (*mode) return -1; - for (;;) { - p = strchr(s, ','); - if (p) - *p = '\0'; - - for (br = branch_modes; br->name; br++) { - if (!strcasecmp(s, br->name)) - break; - } - if (!br->name) - goto error; + /* str may be NULL in case no arg is passed to -b */ + if (str) { + /* because str is read-only */ + s = os = strdup(str); + if (!s) + return -1; + + for (;;) { + p = strchr(s, ','); + if (p) + *p = '\0'; + + for (br = branch_modes; br->name; br++) { + if (!strcasecmp(s, br->name)) + break; + } + if (!br->name) { + ui__warning("unknown branch filter %s," + " check man page\n", s); + goto error; + } - *mode |= br->mode; + *mode |= br->mode; - if (!p) - break; + if (!p) + break; - s = p + 1; + s = p + 1; + } } ret = 0; + /* default to any branch */ if ((*mode & ~ONLY_PLM) == 0) { - error("need at least one branch type with -b\n"); - ret = -1; + *mode = PERF_SAMPLE_BRANCH_ANY; } error: free(os); @@ -798,8 +811,13 @@ const struct option record_options[] = { "monitor event in cgroup name only", parse_cgroups), OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), - OPT_CALLBACK('b', "branch-stack", &record.opts.branch_stack, - "branch mode mask", "branch stack sampling modes", + + OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, + "branch any", "sample any taken branches", + parse_branch_stack), + + OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, + "branch filter mask", "branch stack filter modes", parse_branch_stack), OPT_END() }; -- cgit v1.2.3 From 330aa675b4f92a422cb6d3acbbfd16a628017520 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 8 Mar 2012 23:47:46 +0100 Subject: perf record: Add HEADER_BRANCH_STACK tag This patch adds a new feature bit, namely, HEADER_BRANCH_STACK. When present, it indicates that sample records may contain branch stack. This could be useful to a viewer to switch to branch mode without having to parse all the samples or without a specific cmdline option. This will be used in a subsequent patch to enhance perf report with branch stacks. Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: acme@redhat.com Cc: asharma@fb.com Cc: ravitillo@lbl.gov Cc: vweaver1@eecs.utk.edu Cc: khandual@linux.vnet.ibm.com Cc: dsahern@gmail.com Link: http://lkml.kernel.org/r/1331246868-19905-3-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/perf/builtin-record.c') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a7c53a9ef372..be4e1eee782e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) if (!have_tracepoints(&evsel_list->entries)) perf_header__clear_feat(&session->header, HEADER_TRACE_INFO); + if (!rec->opts.branch_stack) + perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); + if (!rec->file_new) { err = perf_session__read_header(session, output); if (err < 0) -- cgit v1.2.3 From 5a7ed29c7572d00a75e8c4529e30c5ac2ef82271 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 5 Apr 2012 18:26:28 +0200 Subject: perf record: Use sw counter only if hw pmu is not detected Use cpu-clock-tick sw counter for cpu-cycles only if there is no hw pmu available. This is the case if the syscall reports ENOENT. In other cases (e.g. invalid attributes) we don't want the sw counter to be used. Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1333643188-26895-5-git-send-email-robert.richter@amd.com Signed-off-by: Robert Richter Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/builtin-record.c') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index be4e1eee782e..10b1f1f25ed7 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -245,7 +245,7 @@ try_again: * based cpu-clock-tick sw counter, which * is always available even if no PMU support: */ - if (attr->type == PERF_TYPE_HARDWARE + if (err == ENOENT && attr->type == PERF_TYPE_HARDWARE && attr->config == PERF_COUNT_HW_CPU_CYCLES) { if (verbose) -- cgit v1.2.3