diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 27 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 2 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 7 | ||||
-rw-r--r-- | net/ipv4/inet_lro.c | 316 | ||||
-rw-r--r-- | net/ipv4/ping.c | 2 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 30 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 21 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 41 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 34 | ||||
-rw-r--r-- | net/ipv4/udp.c | 13 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 11 |
12 files changed, 452 insertions, 55 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c31e5fa8f1a8..aad274c67b5e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -136,6 +136,8 @@ static inline int current_has_network(void) } #endif +int sysctl_reserved_port_bind __read_mostly = 1; + /* The inetsw table contains everything that inet_create needs to * build a new socket. */ @@ -1346,6 +1348,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, for (p = *head; p; p = p->next) { struct iphdr *iph2; + u16 flush_id; if (!NAPI_GRO_CB(p)->same_flow) continue; @@ -1369,14 +1372,24 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, (iph->tos ^ iph2->tos) | ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)); - /* Save the IP ID check to be included later when we get to - * the transport layer so only the inner most IP ID is checked. - * This is because some GSO/TSO implementations do not - * correctly increment the IP ID for the outer hdrs. - */ - NAPI_GRO_CB(p)->flush_id = - ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; + + /* We must save the offset as it is possible to have multiple + * flows using the same protocol and address pairs so we + * need to wait until we can validate this is part of the + * same flow with a 5-tuple or better to avoid unnecessary + * collisions between flows. We can support one of two + * possible scenarios, either a fixed value with DF bit set + * or an incrementing value with DF either set or unset. + * In the case of a fixed value we will end up losing the + * data that the IP ID was a fixed value, however per RFC + * 6864 in such a case the actual value of the IP ID is + * meant to be ignored anyway. + */ + flush_id = (u16)(id - ntohs(iph2->id)); + if (flush_id || !(iph2->frag_off & htons(IP_DF))) + NAPI_GRO_CB(p)->flush_id |= flush_id ^ + NAPI_GRO_CB(p)->count; } NAPI_GRO_CB(skb)->flush |= flush; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 5c598f99a500..fa59bc35dbb5 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1694,7 +1694,7 @@ struct fib_table *fib_trie_unmerge(struct fib_table *oldtb) lt = (struct trie *)local_tb->tb_data; while ((l = leaf_walk_rcu(&tp, key)) != NULL) { - struct key_vector *local_l = NULL, *local_tp; + struct key_vector *local_l = NULL, *local_tp = NULL; hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { struct fib_alias *new_fa; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index a7e7aa1f6a47..81fcff83d309 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -179,6 +179,13 @@ have_snum: head = &hashinfo->bhash[inet_bhashfn(net, snum, hashinfo->bhash_size)]; spin_lock(&head->lock); + + if (inet_is_local_reserved_port(net, snum) && + !sysctl_reserved_port_bind) { + ret = 1; + goto fail_unlock; + } + inet_bind_bucket_for_each(tb, &head->chain) if (net_eq(ib_net(tb), net) && tb->port == snum) goto tb_found; diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index f17ea49b28fb..8f26145c34e2 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -145,20 +145,37 @@ static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len) } static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb, - struct iphdr *iph, struct tcphdr *tcph) + struct iphdr *iph, struct tcphdr *tcph, + struct net_lro_info *lro_info) { int nr_frags; __be32 *ptr; u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); + u64 hw_marked = 0; + + if (lro_info) + hw_marked = lro_info->valid_fields; nr_frags = skb_shinfo(skb)->nr_frags; lro_desc->parent = skb; lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]); lro_desc->iph = iph; lro_desc->tcph = tcph; - lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len; - lro_desc->tcp_ack = tcph->ack_seq; - lro_desc->tcp_window = tcph->window; + + if (hw_marked & LRO_TCP_SEQ_NUM) + lro_desc->tcp_next_seq = lro_info->tcp_seq_num + tcp_data_len; + else + lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len; + + if (hw_marked & LRO_TCP_ACK_NUM) + lro_desc->tcp_ack = htonl(lro_info->tcp_ack_num); + else + lro_desc->tcp_ack = tcph->ack_seq; + + if (hw_marked & LRO_TCP_WIN) + lro_desc->tcp_window = htons(lro_info->tcp_win); + else + lro_desc->tcp_window = tcph->window; lro_desc->pkt_aggr_cnt = 1; lro_desc->ip_tot_len = ntohs(iph->tot_len); @@ -173,8 +190,11 @@ static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb, lro_desc->mss = tcp_data_len; lro_desc->active = 1; - lro_desc->data_csum = lro_tcp_data_csum(iph, tcph, - tcp_data_len); + if (hw_marked & LRO_TCP_DATA_CSUM) + lro_desc->data_csum = lro_info->tcp_data_csum; + else + lro_desc->data_csum = lro_tcp_data_csum(iph, tcph, + tcp_data_len); } static inline void lro_clear_desc(struct net_lro_desc *lro_desc) @@ -183,16 +203,29 @@ static inline void lro_clear_desc(struct net_lro_desc *lro_desc) } static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph, - struct tcphdr *tcph, int tcp_data_len) + struct tcphdr *tcph, int tcp_data_len, + struct net_lro_info *lro_info) { struct sk_buff *parent = lro_desc->parent; __be32 *topt; + u64 hw_marked = 0; + + if (lro_info) + hw_marked = lro_info->valid_fields; lro_desc->pkt_aggr_cnt++; lro_desc->ip_tot_len += tcp_data_len; lro_desc->tcp_next_seq += tcp_data_len; - lro_desc->tcp_window = tcph->window; - lro_desc->tcp_ack = tcph->ack_seq; + + if (hw_marked & LRO_TCP_WIN) + lro_desc->tcp_window = htons(lro_info->tcp_win); + else + lro_desc->tcp_window = tcph->window; + + if (hw_marked & LRO_TCP_ACK_NUM) + lro_desc->tcp_ack = htonl(lro_info->tcp_ack_num); + else + lro_desc->tcp_ack = tcph->ack_seq; /* don't update tcp_rcv_tsval, would not work with PAWS */ if (lro_desc->tcp_saw_tstamp) { @@ -200,10 +233,17 @@ static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph, lro_desc->tcp_rcv_tsecr = *(topt + 2); } - lro_desc->data_csum = csum_block_add(lro_desc->data_csum, - lro_tcp_data_csum(iph, tcph, - tcp_data_len), - parent->len); + if (hw_marked & LRO_TCP_DATA_CSUM) + lro_desc->data_csum = csum_block_add(lro_desc->data_csum, + lro_info->tcp_data_csum, + parent->len); + else + lro_desc->data_csum = + csum_block_add(lro_desc->data_csum, + lro_tcp_data_csum(iph, + tcph, + tcp_data_len), + parent->len); parent->len += tcp_data_len; parent->data_len += tcp_data_len; @@ -212,12 +252,13 @@ static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph, } static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb, - struct iphdr *iph, struct tcphdr *tcph) + struct iphdr *iph, struct tcphdr *tcph, + struct net_lro_info *lro_info) { struct sk_buff *parent = lro_desc->parent; int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); - lro_add_common(lro_desc, iph, tcph, tcp_data_len); + lro_add_common(lro_desc, iph, tcph, tcp_data_len, lro_info); skb_pull(skb, (skb->len - tcp_data_len)); parent->truesize += skb->truesize; @@ -230,6 +271,29 @@ static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb, lro_desc->last_skb = skb; } +static void lro_add_frags(struct net_lro_desc *lro_desc, + int len, int hlen, int truesize, + struct skb_frag_struct *skb_frags, + struct iphdr *iph, struct tcphdr *tcph) +{ + struct sk_buff *skb = lro_desc->parent; + int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); + + lro_add_common(lro_desc, iph, tcph, tcp_data_len, NULL); + + skb->truesize += truesize; + + skb_frags[0].page_offset += hlen; + skb_frag_size_sub(&skb_frags[0], hlen); + + while (tcp_data_len > 0) { + *lro_desc->next_frag = *skb_frags; + tcp_data_len -= skb_frag_size(skb_frags); + lro_desc->next_frag++; + skb_frags++; + skb_shinfo(skb)->nr_frags++; + } +} static int lro_check_tcp_conn(struct net_lro_desc *lro_desc, struct iphdr *iph, @@ -284,6 +348,8 @@ static void lro_flush(struct net_lro_mgr *lro_mgr, if (lro_mgr->features & LRO_F_NAPI) netif_receive_skb(lro_desc->parent); + else if (lro_mgr->features & LRO_F_NI) + netif_rx_ni(lro_desc->parent); else netif_rx(lro_desc->parent); @@ -292,12 +358,13 @@ static void lro_flush(struct net_lro_mgr *lro_mgr, } static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, - void *priv) + void *priv, struct net_lro_info *lro_info) { struct net_lro_desc *lro_desc; struct iphdr *iph; struct tcphdr *tcph; u64 flags; + u64 hw_marked = 0; int vlan_hdr_len = 0; if (!lro_mgr->get_skb_header || @@ -308,7 +375,14 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, if (!(flags & LRO_IPV4) || !(flags & LRO_TCP)) goto out; - lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); + if (lro_info) + hw_marked = lro_info->valid_fields; + + if (hw_marked & LRO_DESC) + lro_desc = lro_info->lro_desc; + else + lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); + if (!lro_desc) goto out; @@ -317,22 +391,38 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, vlan_hdr_len = VLAN_HLEN; if (!lro_desc->active) { /* start new lro session */ - if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL)) - goto out; + if (hw_marked & LRO_ELIGIBILITY_CHECKED) { + if (!lro_info->lro_eligible) + goto out; + } else { + if (lro_tcp_ip_check(iph, tcph, + skb->len - vlan_hdr_len, NULL)) + goto out; + } skb->ip_summed = lro_mgr->ip_summed_aggr; - lro_init_desc(lro_desc, skb, iph, tcph); + lro_init_desc(lro_desc, skb, iph, tcph, lro_info); LRO_INC_STATS(lro_mgr, aggregated); return 0; } - if (lro_desc->tcp_next_seq != ntohl(tcph->seq)) - goto out2; + if (hw_marked & LRO_TCP_SEQ_NUM) { + if (lro_desc->tcp_next_seq != lro_info->tcp_seq_num) + goto out2; + } else { + if (lro_desc->tcp_next_seq != ntohl(tcph->seq)) + goto out2; + } - if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc)) - goto out2; + if (hw_marked & LRO_ELIGIBILITY_CHECKED) { + if (!lro_info->lro_eligible) + goto out2; + } else { + if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc)) + goto out2; + } - lro_add_packet(lro_desc, skb, iph, tcph); + lro_add_packet(lro_desc, skb, iph, tcph, lro_info); LRO_INC_STATS(lro_mgr, aggregated); if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) || @@ -348,19 +438,161 @@ out: return 1; } +static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr, + struct skb_frag_struct *frags, + int len, int true_size, + void *mac_hdr, + int hlen, __wsum sum, + u32 ip_summed) +{ + struct sk_buff *skb; + struct skb_frag_struct *skb_frags; + int data_len = len; + int hdr_len = min(len, hlen); + + skb = netdev_alloc_skb(lro_mgr->dev, hlen + lro_mgr->frag_align_pad); + if (!skb) + return NULL; + + skb_reserve(skb, lro_mgr->frag_align_pad); + skb->len = len; + skb->data_len = len - hdr_len; + skb->truesize += true_size; + skb->tail += hdr_len; + + memcpy(skb->data, mac_hdr, hdr_len); + + skb_frags = skb_shinfo(skb)->frags; + while (data_len > 0) { + *skb_frags = *frags; + data_len -= skb_frag_size(frags); + skb_frags++; + frags++; + skb_shinfo(skb)->nr_frags++; + } + + skb_shinfo(skb)->frags[0].page_offset += hdr_len; + skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hdr_len); + + skb->ip_summed = ip_summed; + skb->csum = sum; + skb->protocol = eth_type_trans(skb, lro_mgr->dev); + return skb; +} + +static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, + struct skb_frag_struct *frags, + int len, int true_size, + void *priv, __wsum sum) +{ + struct net_lro_desc *lro_desc; + struct iphdr *iph; + struct tcphdr *tcph; + struct sk_buff *skb; + u64 flags; + void *mac_hdr; + int mac_hdr_len; + int hdr_len = LRO_MAX_PG_HLEN; + int vlan_hdr_len = 0; + + if (!lro_mgr->get_frag_header || + lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph, + (void *)&tcph, &flags, priv)) { + mac_hdr = skb_frag_address(frags); + goto out1; + } + + if (!(flags & LRO_IPV4) || !(flags & LRO_TCP)) + goto out1; + + hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr); + mac_hdr_len = (int)((void *)(iph) - mac_hdr); + + lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); + if (!lro_desc) + goto out1; + + if (!lro_desc->active) { /* start new lro session */ + if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL)) + goto out1; + + skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr, + hdr_len, 0, lro_mgr->ip_summed_aggr); + if (!skb) + goto out; + + if ((skb->protocol == htons(ETH_P_8021Q)) && + !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) + vlan_hdr_len = VLAN_HLEN; + + iph = (void *)(skb->data + vlan_hdr_len); + tcph = (void *)((u8 *)skb->data + vlan_hdr_len + + IP_HDR_LEN(iph)); + + lro_init_desc(lro_desc, skb, iph, tcph, NULL); + LRO_INC_STATS(lro_mgr, aggregated); + return NULL; + } + + if (lro_desc->tcp_next_seq != ntohl(tcph->seq)) + goto out2; + + if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc)) + goto out2; + + lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph); + LRO_INC_STATS(lro_mgr, aggregated); + + if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) || + lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu)) + lro_flush(lro_mgr, lro_desc); + + return NULL; + +out2: /* send aggregated packets to the stack */ + lro_flush(lro_mgr, lro_desc); + +out1: /* Original packet has to be posted to the stack */ + skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr, + hdr_len, sum, lro_mgr->ip_summed); +out: + return skb; +} + void lro_receive_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, void *priv) { - if (__lro_proc_skb(lro_mgr, skb, priv)) { + if (__lro_proc_skb(lro_mgr, skb, priv, NULL)) { if (lro_mgr->features & LRO_F_NAPI) netif_receive_skb(skb); + else if (lro_mgr->features & LRO_F_NI) + netif_rx_ni(skb); else netif_rx(skb); } } EXPORT_SYMBOL(lro_receive_skb); +void lro_receive_frags(struct net_lro_mgr *lro_mgr, + struct skb_frag_struct *frags, + int len, int true_size, void *priv, __wsum sum) +{ + struct sk_buff *skb; + + skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum); + if (!skb) + return; + + if (lro_mgr->features & LRO_F_NAPI) + netif_receive_skb(skb); + else if (lro_mgr->features & LRO_F_NI) + netif_rx_ni(skb); + else + netif_rx(skb); +} +EXPORT_SYMBOL(lro_receive_frags); + void lro_flush_all(struct net_lro_mgr *lro_mgr) { int i; @@ -372,3 +604,35 @@ void lro_flush_all(struct net_lro_mgr *lro_mgr) } } EXPORT_SYMBOL(lro_flush_all); + +void lro_flush_pkt(struct net_lro_mgr *lro_mgr, struct iphdr *iph, + struct tcphdr *tcph) +{ + struct net_lro_desc *lro_desc; + + lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); + if (lro_desc && lro_desc->active) + lro_flush(lro_mgr, lro_desc); +} +EXPORT_SYMBOL(lro_flush_pkt); + +void lro_flush_desc(struct net_lro_mgr *lro_mgr, struct net_lro_desc *lro_desc) +{ + if (lro_desc->active) + lro_flush(lro_mgr, lro_desc); +} +EXPORT_SYMBOL(lro_flush_desc); + +void lro_receive_skb_ext(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, + void *priv, struct net_lro_info *lro_info) +{ + if (__lro_proc_skb(lro_mgr, skb, priv, lro_info)) { + if (lro_mgr->features & LRO_F_NAPI) + netif_receive_skb(skb); + else if (lro_mgr->features & LRO_F_NI) + netif_rx_ni(skb); + else + netif_rx(skb); + } +} +EXPORT_SYMBOL(lro_receive_skb_ext); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index a989aba861e0..72e1e831589a 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -660,7 +660,7 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, void *user_icmph, size_t icmph_len) { u8 type, code; - if (len > 0xFFFF) + if (len > 0xFFFF || len < icmph_len) return -EMSGSIZE; /* Must have at least a full ICMP header. */ diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 46123369144f..8233e27679f2 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -42,6 +42,10 @@ static int tcp_syn_retries_min = 1; static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; +static int tcp_delack_seg_min = TCP_DELACK_MIN; +static int tcp_delack_seg_max = 60; +static int tcp_use_userconfig_min; +static int tcp_use_userconfig_max = 1; /* Update system visible IP port range */ static void set_local_port_range(struct net *net, int range[2]) @@ -821,6 +825,25 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &one }, + { + .procname = "tcp_delack_seg", + .data = &sysctl_tcp_delack_seg, + .maxlen = sizeof(sysctl_tcp_delack_seg), + .mode = 0644, + .proc_handler = tcp_proc_delayed_ack_control, + .extra1 = &tcp_delack_seg_min, + .extra2 = &tcp_delack_seg_max, + }, + { + .procname = "tcp_use_userconfig", + .data = &sysctl_tcp_use_userconfig, + .maxlen = sizeof(sysctl_tcp_use_userconfig), + .mode = 0644, + .proc_handler = tcp_use_userconfig_sysctl_handler, + .extra1 = &tcp_use_userconfig_min, + .extra2 = &tcp_use_userconfig_max, + }, + { } }; @@ -903,6 +926,13 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_do_large_bitmap, }, { + .procname = "reserved_port_bind", + .data = &sysctl_reserved_port_bind, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { .procname = "ip_no_pmtu_disc", .data = &init_net.ipv4.sysctl_ip_no_pmtu_disc, .maxlen = sizeof(int), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 48e6509426b0..9bdd7847ef3a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -302,6 +302,12 @@ EXPORT_SYMBOL(sysctl_tcp_wmem); atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ EXPORT_SYMBOL(tcp_memory_allocated); +int sysctl_tcp_delack_seg __read_mostly = TCP_DELACK_SEG; +EXPORT_SYMBOL(sysctl_tcp_delack_seg); + +int sysctl_tcp_use_userconfig __read_mostly; +EXPORT_SYMBOL(sysctl_tcp_use_userconfig); + /* * Current number of TCP sockets. */ @@ -1407,8 +1413,11 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied) /* Delayed ACKs frequently hit locked sockets during bulk * receive. */ if (icsk->icsk_ack.blocked || - /* Once-per-two-segments ACK was not sent by tcp_input.c */ - tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss || + /* Once-per-sysctl_tcp_delack_seg segments + * ACK was not sent by tcp_input.c + */ + tp->rcv_nxt - tp->rcv_wup > (icsk->icsk_ack.rcv_mss) * + sysctl_tcp_delack_seg || /* * If this read emptied read buffer, we send ACK, if * connection is not bidirectional, user drained @@ -2723,6 +2732,14 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) rate64 = rate != ~0U ? rate : ~0ULL; put_unaligned(rate64, &info->tcpi_max_pacing_rate); + /* Expose reference count for socket */ + if (sk->sk_socket) { + struct file *filep = sk->sk_socket->file; + + if (filep) + info->tcpi_count = file_count(filep); + } + do { start = u64_stats_fetch_begin_irq(&tp->syncp); put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b6d99c308bef..3dbff307218e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4965,7 +4965,8 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) struct tcp_sock *tp = tcp_sk(sk); /* More than one full frame received... */ - if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && + if (((tp->rcv_nxt - tp->rcv_wup) > (inet_csk(sk)->icsk_ack.rcv_mss) * + sysctl_tcp_delack_seg && /* ... and right edge of window advances far enough. * (tcp_recvmsg() will send ACK otherwise). Or... */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 984de80590bc..3845ab04a9b4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -713,8 +713,8 @@ release_sk1: outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(const struct sock *sk, struct sk_buff *skb, - u32 seq, u32 ack, +static void tcp_v4_send_ack(const struct sock *sk, + struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int reply_flags, u8 tos) @@ -728,8 +728,8 @@ static void tcp_v4_send_ack(const struct sock *sk, struct sk_buff *skb, #endif ]; } rep; - struct ip_reply_arg arg; struct net *net = sock_net(sk); + struct ip_reply_arg arg; memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof(arg)); @@ -792,7 +792,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcp_v4_send_ack(sk, skb, + tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, @@ -811,9 +812,17 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. */ - tcp_v4_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? - tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, - tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd, + u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : + tcp_sk(sk)->snd_nxt; + + /* RFC 7323 2.3 + * The window field (SEG.WND) of every outgoing segment, with the + * exception of <SYN> segments, MUST be right-shifted by + * Rcv.Wind.Shift bits: + */ + tcp_v4_send_ack(sk, skb, seq, + tcp_rsk(req)->rcv_nxt, + req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp, req->ts_recent, 0, @@ -840,7 +849,8 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, struct sk_buff *skb; /* First, grab a route. */ - if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) + if (!dst && (dst = inet_csk_route_req( + (struct sock *)sk, &fl4, req)) == NULL) return -1; skb = tcp_make_synack(sk, dst, req, foc, attach_req); @@ -1199,7 +1209,8 @@ static struct dst_entry *tcp_v4_route_req(const struct sock *sk, const struct request_sock *req, bool *strict) { - struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req); + struct dst_entry *dst = inet_csk_route_req( + (struct sock *)sk, &fl->u.ip4, req); if (strict) { if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr) @@ -1574,6 +1585,12 @@ int tcp_v4_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, th->doff * 4)) goto discard_it; + /* Assuming a trustworthy entity did the checksum and found the csum + * invalid, drop the packet. + */ + if (skb->ip_summed == CHECKSUM_COMPLETE && skb->csum_valid == 0) + goto csum_error; + /* An explanation is required here, I think. * Packet length and doff are validated by header prediction, * provided case of th->doff==0 is eliminated. @@ -2186,6 +2203,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) __be32 src = inet->inet_rcv_saddr; __u16 destp = ntohs(inet->inet_dport); __u16 srcp = ntohs(inet->inet_sport); + __u8 seq_state = sk->sk_state; int rx_queue; int state; @@ -2205,6 +2223,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) timer_expires = jiffies; } + if (inet->transparent) + seq_state |= 0x80; + state = sk_state_load(sk); if (state == TCP_LISTEN) rx_queue = sk->sk_ack_backlog; @@ -2216,7 +2237,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", - i, src, srcp, dest, destp, state, + i, src, srcp, dest, destp, seq_state, tp->write_seq - tp->snd_una, rx_queue, timer_active, diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 1ec12a4f327e..c1a84472cc0c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -32,6 +32,40 @@ int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; int sysctl_tcp_orphan_retries __read_mostly; int sysctl_tcp_thin_linear_timeouts __read_mostly; +/*Function to reset tcp_ack related sysctl on resetting master control */ +void set_tcp_default(void) +{ + sysctl_tcp_delack_seg = TCP_DELACK_SEG; +} + +/*sysctl handler for tcp_ack realted master control */ +int tcp_proc_delayed_ack_control(struct ctl_table *table, int write, + void __user *buffer, size_t *length, + loff_t *ppos) +{ + int ret = proc_dointvec_minmax(table, write, buffer, length, ppos); + + /* The ret value will be 0 if the input validation is successful + * and the values are written to sysctl table. If not, the stack + * will continue to work with currently configured values + */ + return ret; +} + +/*sysctl handler for tcp_ack realted master control */ +int tcp_use_userconfig_sysctl_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, + loff_t *ppos) +{ + int ret = proc_dointvec_minmax(table, write, buffer, length, ppos); + + if (write && ret == 0) { + if (!sysctl_tcp_use_userconfig) + set_tcp_default(); + } + return ret; +} + static void tcp_write_err(struct sock *sk) { sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9ee5087b9b5e..4d6f09c05a12 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -257,6 +257,11 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, } else { hslot = udp_hashslot(udptable, net, snum); spin_lock_bh(&hslot->lock); + + if (inet_is_local_reserved_port(net, snum) && + !sysctl_reserved_port_bind) + goto fail_unlock; + if (hslot->count > 10) { int exist; unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum; @@ -2448,14 +2453,20 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, int bucket) { struct inet_sock *inet = inet_sk(sp); + struct udp_sock *up = udp_sk(sp); __be32 dest = inet->inet_daddr; __be32 src = inet->inet_rcv_saddr; __u16 destp = ntohs(inet->inet_dport); __u16 srcp = ntohs(inet->inet_sport); + __u8 state = sp->sk_state; + if (up->encap_rcv) + state |= 0xF0; + else if (inet->transparent) + state |= 0x80; seq_printf(f, "%5d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d", - bucket, src, srcp, dest, destp, sp->sk_state, + bucket, src, srcp, dest, destp, state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), 0, 0L, 0, diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 7b0edb37a115..ab7ab839b057 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -20,7 +20,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo; static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, - int tos, int oif, + int tos, const xfrm_address_t *saddr, const xfrm_address_t *daddr) { @@ -29,7 +29,6 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, memset(fl4, 0, sizeof(*fl4)); fl4->daddr = daddr->a4; fl4->flowi4_tos = tos; - fl4->flowi4_oif = oif; if (saddr) fl4->saddr = saddr->a4; @@ -42,22 +41,22 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, return ERR_CAST(rt); } -static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif, +static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, const xfrm_address_t *saddr, const xfrm_address_t *daddr) { struct flowi4 fl4; - return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr); + return __xfrm4_dst_lookup(net, &fl4, tos, saddr, daddr); } -static int xfrm4_get_saddr(struct net *net, int oif, +static int xfrm4_get_saddr(struct net *net, xfrm_address_t *saddr, xfrm_address_t *daddr) { struct dst_entry *dst; struct flowi4 fl4; - dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr); + dst = __xfrm4_dst_lookup(net, &fl4, 0, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; |