From 4666b6e2b27d91e05a5b8459e40e4a05dbc1c7b0 Mon Sep 17 00:00:00 2001 From: Yaogong Wang Date: Fri, 14 Sep 2018 16:24:06 +0800 Subject: tcp: use an RB tree for ooo receive queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9f5afeae51526b3ad7b7cb21ee8b145ce6ea7a7a ] Over the years, TCP BDP has increased by several orders of magnitude, and some people are considering to reach the 2 Gbytes limit. Even with current window scale limit of 14, ~1 Gbytes maps to ~740,000 MSS. In presence of packet losses (or reorders), TCP stores incoming packets into an out of order queue, and number of skbs sitting there waiting for the missing packets to be received can be in the 10^5 range. Most packets are appended to the tail of this queue, and when packets can finally be transferred to receive queue, we scan the queue from its head. However, in presence of heavy losses, we might have to find an arbitrary point in this queue, involving a linear scan for every incoming packet, throwing away cpu caches. This patch converts it to a RB tree, to get bounded latencies. Yaogong wrote a preliminary patch about 2 years ago. Eric did the rebase, added ofo_last_skb cache, polishing and tests. Tested with network dropping between 1 and 10 % packets, with good success (about 30 % increase of throughput in stress tests) Next step would be to also use an RB tree for the write queue at sender side ;) Signed-off-by: Yaogong Wang Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Ilpo Järvinen Acked-By: Ilpo Järvinen Signed-off-by: David S. Miller Signed-off-by: Mao Wenan Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 8 ++++++++ include/linux/tcp.h | 7 +++---- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c28bd8be290a..a490dd718654 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2273,6 +2273,8 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } +void skb_rbtree_purge(struct rb_root *root); + void *netdev_alloc_frag(unsigned int fragsz); struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, @@ -2807,6 +2809,12 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) return __pskb_trim(skb, len); } +#define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode) +#define skb_rb_first(root) rb_to_skb(rb_first(root)) +#define skb_rb_last(root) rb_to_skb(rb_last(root)) +#define skb_rb_next(skb) rb_to_skb(rb_next(&(skb)->rbnode)) +#define skb_rb_prev(skb) rb_to_skb(rb_prev(&(skb)->rbnode)) + #define skb_queue_walk(queue, skb) \ for (skb = (queue)->next; \ skb != (struct sk_buff *)(queue); \ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 5b6df1a8dc74..747404dbe506 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -279,10 +279,9 @@ struct tcp_sock { struct sk_buff* lost_skb_hint; struct sk_buff *retransmit_skb_hint; - /* OOO segments go in this list. Note that socket lock must be held, - * as we do not use sk_buff_head lock. - */ - struct sk_buff_head out_of_order_queue; + /* OOO segments go in this rbtree. Socket lock must be held. */ + struct rb_root out_of_order_queue; + struct sk_buff *ooo_last_skb; /* cache rb_last(out_of_order_queue) */ /* SACKs data, these 2 need to be together (see tcp_options_write) */ struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ -- cgit v1.2.3 From 3a07d58f20c8e0efd9e5d43ee929c3f5c5f35874 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Tue, 16 May 2017 09:30:18 +0800 Subject: ebtables: arpreply: Add the standard target sanity check commit c953d63548207a085abcb12a15fefc8a11ffdf0a upstream. The info->target comes from userspace and it would be used directly. So we need to add the sanity check to make sure it is a valid standard target, although the ebtables tool has already checked it. Kernel needs to validate anything coming from userspace. If the target is set as an evil value, it would break the ebtables and cause a panic. Because the non-standard target is treated as one offset. Now add one helper function ebt_invalid_target, and we would replace the macro INVALID_TARGET later. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso Cc: Loic Signed-off-by: Greg Kroah-Hartman --- include/linux/netfilter_bridge/ebtables.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 2ea517c7c6b9..bffd096fae3b 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -125,4 +125,9 @@ extern unsigned int ebt_do_table(struct sk_buff *skb, /* True if the target is not a standard target */ #define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0) +static inline bool ebt_invalid_target(int target) +{ + return (target < -NUM_STANDARD_TARGETS || target >= 0); +} + #endif -- cgit v1.2.3