From 97da89767d398c1dfa1f34e5f312eb8ebb382f7f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 21 Jul 2015 15:40:16 +0200
Subject: uprobes: Export 'struct return_instance', introduce
 arch_uretprobe_is_alive()

Add the new "weak" helper, arch_uretprobe_is_alive(), used by
the next patches. It should return true if this return_instance
is still valid. The arch agnostic version just always returns
true.

The patch exports "struct return_instance" for the architectures
which want to override this hook. We can also cleanup
prepare_uretprobe() if we pass the new return_instance to
arch_uretprobe_hijack_return_addr().

Tested-by: Pratyush Anand <panand@redhat.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Anton Arapov <arapov@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150721134016.GA4762@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/uprobes.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 60beb5dc7977..50d2764d66a8 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -92,6 +92,15 @@ struct uprobe_task {
 	unsigned int			depth;
 };
 
+struct return_instance {
+	struct uprobe		*uprobe;
+	unsigned long		func;
+	unsigned long		orig_ret_vaddr; /* original return address */
+	bool			chained;	/* true, if instance is nested */
+
+	struct return_instance	*next;		/* keep as stack */
+};
+
 struct xol_area;
 
 struct uprobes_state {
@@ -128,6 +137,7 @@ extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
 extern int  arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
 extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
 extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
+extern bool arch_uretprobe_is_alive(struct return_instance *ret, struct pt_regs *regs);
 extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
 extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
 					 void *src, unsigned long len);
-- 
cgit v1.2.3


From 7b868e4802a86d867aad1be0471b5767d9c20e10 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 21 Jul 2015 15:40:18 +0200
Subject: uprobes/x86: Reimplement arch_uretprobe_is_alive()

Add the x86 specific version of arch_uretprobe_is_alive()
helper. It returns true if the stack frame mangled by
prepare_uretprobe() is still on stack. So if it returns false,
we know that the probed function has already returned.

We add the new return_instance->stack member and change the
generic code to initialize it in prepare_uretprobe, but it
should be equally useful for other architectures.

TODO: this assumes that the probed application can't use
      multiple stacks (say sigaltstack). We will try to improve
      this logic later.

Tested-by: Pratyush Anand <panand@redhat.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Anton Arapov <arapov@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150721134018.GA4766@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/uprobes.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 50d2764d66a8..7ab6d2c8be49 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -95,6 +95,7 @@ struct uprobe_task {
 struct return_instance {
 	struct uprobe		*uprobe;
 	unsigned long		func;
+	unsigned long		stack;		/* stack pointer */
 	unsigned long		orig_ret_vaddr; /* original return address */
 	bool			chained;	/* true, if instance is nested */
 
-- 
cgit v1.2.3


From 86dcb702e74b8ab7d3b2d36984ef00671cea73b9 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 21 Jul 2015 15:40:26 +0200
Subject: uprobes: Add the "enum rp_check ctx" arg to arch_uretprobe_is_alive()

arch/x86 doesn't care (so far), but as Pratyush Anand pointed
out other architectures might want why arch_uretprobe_is_alive()
was called and use different checks depending on the context.
Add the new argument to distinguish 2 callers.

Tested-by: Pratyush Anand <panand@redhat.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Anton Arapov <arapov@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150721134026.GA4779@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/uprobes.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 7ab6d2c8be49..c0a540239ab6 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -102,6 +102,11 @@ struct return_instance {
 	struct return_instance	*next;		/* keep as stack */
 };
 
+enum rp_check {
+	RP_CHECK_CALL,
+	RP_CHECK_RET,
+};
+
 struct xol_area;
 
 struct uprobes_state {
@@ -138,7 +143,7 @@ extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
 extern int  arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
 extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
 extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
-extern bool arch_uretprobe_is_alive(struct return_instance *ret, struct pt_regs *regs);
+extern bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, struct pt_regs *regs);
 extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
 extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
 					 void *src, unsigned long len);
-- 
cgit v1.2.3


From db087ef69a2b155ae001665bf0b3806abde7ee34 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 21 Jul 2015 15:40:28 +0200
Subject: uprobes/x86: Make arch_uretprobe_is_alive(RP_CHECK_CALL) more clever

The previous change documents that cleanup_return_instances()
can't always detect the dead frames, the stack can grow. But
there is one special case which imho worth fixing:
arch_uretprobe_is_alive() can return true when the stack didn't
actually grow, but the next "call" insn uses the already
invalidated frame.

Test-case:

	#include <stdio.h>
	#include <setjmp.h>

	jmp_buf jmp;
	int nr = 1024;

	void func_2(void)
	{
		if (--nr == 0)
			return;
		longjmp(jmp, 1);
	}

	void func_1(void)
	{
		setjmp(jmp);
		func_2();
	}

	int main(void)
	{
		func_1();
		return 0;
	}

If you ret-probe func_1() and func_2() prepare_uretprobe() hits
the MAX_URETPROBE_DEPTH limit and "return" from func_2() is not
reported.

When we know that the new call is not chained, we can do the
more strict check. In this case "sp" points to the new ret-addr,
so every frame which uses the same "sp" must be dead. The only
complication is that arch_uretprobe_is_alive() needs to know was
it chained or not, so we add the new RP_CHECK_CHAIN_CALL enum
and change prepare_uretprobe() to pass RP_CHECK_CALL only if
!chained.

Note: arch_uretprobe_is_alive() could also re-read *sp and check
if this word is still trampoline_vaddr. This could obviously
improve the logic, but I would like to avoid another
copy_from_user() especially in the case when we can't avoid the
false "alive == T" positives.

Tested-by: Pratyush Anand <panand@redhat.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Anton Arapov <arapov@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150721134028.GA4786@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/uprobes.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index c0a540239ab6..0bdc72f36905 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -104,6 +104,7 @@ struct return_instance {
 
 enum rp_check {
 	RP_CHECK_CALL,
+	RP_CHECK_CHAIN_CALL,
 	RP_CHECK_RET,
 };
 
-- 
cgit v1.2.3


From e5779e8e12299f77c2421a707855d8d124171d85 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 30 Jul 2015 20:32:40 -0700
Subject: perf/x86/hw_breakpoints: Disallow kernel breakpoints unless
 kprobe-safe

Code on the kprobe blacklist doesn't want unexpected int3
exceptions. It probably doesn't want unexpected debug exceptions
either. Be safe: disallow breakpoints in nokprobes code.

On non-CONFIG_KPROBES kernels, there is no kprobe blacklist.  In
that case, disallow kernel breakpoints entirely.

It will be particularly important to keep hw breakpoints out of the
entry and NMI code once we move debug exceptions off the IST stack.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/e14b152af99640448d895e3c2a8c2d5ee19a1325.1438312874.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kprobes.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 1ab54754a86d..8f6849084248 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -267,6 +267,8 @@ extern void show_registers(struct pt_regs *regs);
 extern void kprobes_inc_nmissed_count(struct kprobe *p);
 extern bool arch_within_kprobe_blacklist(unsigned long addr);
 
+extern bool within_kprobe_blacklist(unsigned long addr);
+
 struct kprobe_insn_cache {
 	struct mutex mutex;
 	void *(*alloc)(void);	/* allocate insn page */
-- 
cgit v1.2.3


From 098d2164e3441c252eaa28906d45e16b7bf1bd2b Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Wed, 1 Jul 2015 02:13:49 +0000
Subject: bpf: Use correct #ifdef controller for trace_call_bpf()

Commit e1abf2cc8d5d80b41c4419368ec743ccadbb131e ("bpf: Fix the build on
BPF_SYSCALL=y && !CONFIG_TRACING kernels, make it more configurable")
updated the building condition of bpf_trace.o from CONFIG_BPF_SYSCALL
to CONFIG_BPF_EVENTS, but the corresponding #ifdef controller in
trace_events.h for trace_call_bpf() was not changed. Which, in theory,
is incorrect.

With current Kconfigs, we can create a .config with CONFIG_BPF_SYSCALL=y
and CONFIG_BPF_EVENTS=n by unselecting CONFIG_KPROBE_EVENT and
selecting CONFIG_BPF_SYSCALL. With these options, trace_call_bpf() will
be defined as an extern function, but if anyone calls it a symbol missing
error will be triggered since bpf_trace.o was not built.

This patch changes the #ifdef controller for trace_call_bpf() from
CONFIG_BPF_SYSCALL to CONFIG_BPF_EVENTS. I'll show its correctness:

Before this patch:

   BPF_SYSCALL   BPF_EVENTS   trace_call_bpf   bpf_trace.o
   y             y           normal           compiled
   n             n           inline           not compiled
   y             n           normal           not compiled (incorrect)
   n             y          impossible (BPF_EVENTS depends on BPF_SYSCALL)

After this patch:

   BPF_SYSCALL   BPF_EVENTS   trace_call_bpf   bpf_trace.o
   y             y           normal           compiled
   n             n           inline           not compiled
   y             n           inline           not compiled (fixed)
   n             y          impossible (BPF_EVENTS depends on BPF_SYSCALL)

So this patch doesn't break anything. QED.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kaixu Xia <xiakaixu@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1435716878-189507-2-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/trace_events.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 1063c850dbab..180dbf8720f9 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -542,7 +542,7 @@ event_trigger_unlock_commit_regs(struct trace_event_file *file,
 		event_triggers_post_call(file, tt);
 }
 
-#ifdef CONFIG_BPF_SYSCALL
+#ifdef CONFIG_BPF_EVENTS
 unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
 #else
 static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
-- 
cgit v1.2.3


From 04a22fae4cbc1f7d3f7471e9b36359f98bd3f043 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Wed, 1 Jul 2015 02:13:50 +0000
Subject: tracing, perf: Implement BPF programs attached to uprobes

By copying BPF related operation to uprobe processing path, this patch
allow users attach BPF programs to uprobes like what they are already
doing on kprobes.

After this patch, users are allowed to use PERF_EVENT_IOC_SET_BPF on a
uprobe perf event. Which make it possible to profile user space programs
and kernel events together using BPF.

Because of this patch, CONFIG_BPF_EVENTS should be selected by
CONFIG_UPROBE_EVENT to ensure trace_call_bpf() is compiled even if
KPROBE_EVENT is not set.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kaixu Xia <xiakaixu@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1435716878-189507-3-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/trace_events.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 180dbf8720f9..ed27917cabc9 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -243,6 +243,7 @@ enum {
 	TRACE_EVENT_FL_USE_CALL_FILTER_BIT,
 	TRACE_EVENT_FL_TRACEPOINT_BIT,
 	TRACE_EVENT_FL_KPROBE_BIT,
+	TRACE_EVENT_FL_UPROBE_BIT,
 };
 
 /*
@@ -257,6 +258,7 @@ enum {
  *  USE_CALL_FILTER - For trace internal events, don't use file filter
  *  TRACEPOINT    - Event is a tracepoint
  *  KPROBE        - Event is a kprobe
+ *  UPROBE        - Event is a uprobe
  */
 enum {
 	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
@@ -267,8 +269,11 @@ enum {
 	TRACE_EVENT_FL_USE_CALL_FILTER	= (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT),
 	TRACE_EVENT_FL_TRACEPOINT	= (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
 	TRACE_EVENT_FL_KPROBE		= (1 << TRACE_EVENT_FL_KPROBE_BIT),
+	TRACE_EVENT_FL_UPROBE		= (1 << TRACE_EVENT_FL_UPROBE_BIT),
 };
 
+#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
+
 struct trace_event_call {
 	struct list_head	list;
 	struct trace_event_class *class;
-- 
cgit v1.2.3