Merge branch 'ebpf-tail-call'

Alexei Starovoitov says: ==================== bpf: introduce bpf_tail_call() helper introduce bpf_tail_call(ctx, &jmp_table, index) helper function which can be used from BPF programs like: int bpf_prog(struct pt_regs *ctx) { ... bpf_tail_call(ctx, &jmp_table, index); ... } that is roughly equivalent to: int bpf_prog(struct pt_regs *ctx) { ... if (jmp_table[index]) return (*jmp_table[index])(ctx); ... } The important detail that it's not a normal call, but a tail call. The kernel stack is precious, so this helper reuses the current stack frame and jumps into another BPF program without adding extra call frame. It's trivially done in interpreter and a bit trickier in JITs. Use cases: - simplify complex programs - dispatch into other programs (for example: index in jump table can be syscall number or network protocol) - build dynamic chains of programs The chain of tail calls can form unpredictable dynamic loops therefore tail_call_cnt is used to limit the number of calls and currently is set to 32. patch 1 - support bpf_tail_call() in interpreter patch 2 - support in x64 JIT We've discussed what's neccessary to support it in arm64/s390 JITs and it looks fine. patch 3 - sample example for tracing patch 4 - sample example for networking More details in every patch. This set went through several iterations of reviews/fixes and older attempts can be seen: https://git.kernel.org/cgit/linux/kernel/git/ast/bpf.git/log/?h=tail_call_v[123456] - tail_call_v1 does it without touching JITs but introduces overhead for all programs that don't use this helper function. - tail_call_v2 still has some overhead and x64 JIT does full stack unwind (prologue skipping optimization wasn't there) - tail_call_v3 reuses 'call' instruction encoding and has interpreter overhead for every normal call - tail_call_v4 fixes above architectural shortcomings and v5,v6 fix few more bugs This last tail_call_v6 approach seems to be the best. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@davemloft.net> 2015-05-21 17:08:00 -0400
committer: David S. Miller <davem@davemloft.net> 2015-05-21 17:08:00 -0400
commit: 3f55b7ed5e5d4aa7291e3a1e2f7224eeba5810ba (patch)
tree: 632ea9b7f4a1df31760730bf432bae625a4fda8e /include/linux
parent: e7582bab5d28ea72e07cf2c74632eaf46a6c1a50 (diff)
parent: 530b2c8619f25f9c332c85510579943aa46df515 (diff)
2 files changed, 23 insertions, 1 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d5cda067115a..8821b9a8689e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -126,6 +126,27 @@ struct bpf_prog_aux {
 	struct work_struct work;
 };
 
+struct bpf_array {
+	struct bpf_map map;
+	u32 elem_size;
+	/* 'ownership' of prog_array is claimed by the first program that
+	 * is going to use this map or by the first program which FD is stored
+	 * in the map to make sure that all callers and callees have the same
+	 * prog_type and JITed flag
+	 */
+	enum bpf_prog_type owner_prog_type;
+	bool owner_jited;
+	union {
+		char value[0] __aligned(8);
+		struct bpf_prog *prog[0] __aligned(8);
+	};
+};
+#define MAX_TAIL_CALL_CNT 32
+
+u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
+void bpf_prog_array_map_clear(struct bpf_map *map);
+bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
+
 #ifdef CONFIG_BPF_SYSCALL
 void bpf_register_prog_type(struct bpf_prog_type_list *tl);
 void bpf_register_map_type(struct bpf_map_type_list *tl);
@@ -160,5 +181,6 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto;
 
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
+extern const struct bpf_func_proto bpf_tail_call_proto;
 
 #endif /* _LINUX_BPF_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 200be4a74a33..17724f6ea983 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -378,7 +378,7 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 
 int sk_filter(struct sock *sk, struct sk_buff *skb);
 
-void bpf_prog_select_runtime(struct bpf_prog *fp);
+int bpf_prog_select_runtime(struct bpf_prog *fp);
 void bpf_prog_free(struct bpf_prog *fp);
 
 struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
author	David S. Miller <davem@davemloft.net>	2015-05-21 17:08:00 -0400
committer	David S. Miller <davem@davemloft.net>	2015-05-21 17:08:00 -0400
commit	3f55b7ed5e5d4aa7291e3a1e2f7224eeba5810ba (patch)
tree	632ea9b7f4a1df31760730bf432bae625a4fda8e /include/linux
parent	e7582bab5d28ea72e07cf2c74632eaf46a6c1a50 (diff)
parent	530b2c8619f25f9c332c85510579943aa46df515 (diff)