summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c27
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv4/inet_connection_sock.c7
-rw-r--r--net/ipv4/inet_lro.c316
-rw-r--r--net/ipv4/ping.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c30
-rw-r--r--net/ipv4/tcp.c21
-rw-r--r--net/ipv4/tcp_input.c3
-rw-r--r--net/ipv4/tcp_ipv4.c41
-rw-r--r--net/ipv4/tcp_timer.c34
-rw-r--r--net/ipv4/udp.c13
-rw-r--r--net/ipv4/xfrm4_policy.c11
12 files changed, 452 insertions, 55 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c31e5fa8f1a8..aad274c67b5e 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -136,6 +136,8 @@ static inline int current_has_network(void)
}
#endif
+int sysctl_reserved_port_bind __read_mostly = 1;
+
/* The inetsw table contains everything that inet_create needs to
* build a new socket.
*/
@@ -1346,6 +1348,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
for (p = *head; p; p = p->next) {
struct iphdr *iph2;
+ u16 flush_id;
if (!NAPI_GRO_CB(p)->same_flow)
continue;
@@ -1369,14 +1372,24 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
(iph->tos ^ iph2->tos) |
((iph->frag_off ^ iph2->frag_off) & htons(IP_DF));
- /* Save the IP ID check to be included later when we get to
- * the transport layer so only the inner most IP ID is checked.
- * This is because some GSO/TSO implementations do not
- * correctly increment the IP ID for the outer hdrs.
- */
- NAPI_GRO_CB(p)->flush_id =
- ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id);
NAPI_GRO_CB(p)->flush |= flush;
+
+ /* We must save the offset as it is possible to have multiple
+ * flows using the same protocol and address pairs so we
+ * need to wait until we can validate this is part of the
+ * same flow with a 5-tuple or better to avoid unnecessary
+ * collisions between flows. We can support one of two
+ * possible scenarios, either a fixed value with DF bit set
+ * or an incrementing value with DF either set or unset.
+ * In the case of a fixed value we will end up losing the
+ * data that the IP ID was a fixed value, however per RFC
+ * 6864 in such a case the actual value of the IP ID is
+ * meant to be ignored anyway.
+ */
+ flush_id = (u16)(id - ntohs(iph2->id));
+ if (flush_id || !(iph2->frag_off & htons(IP_DF)))
+ NAPI_GRO_CB(p)->flush_id |= flush_id ^
+ NAPI_GRO_CB(p)->count;
}
NAPI_GRO_CB(skb)->flush |= flush;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5c598f99a500..fa59bc35dbb5 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1694,7 +1694,7 @@ struct fib_table *fib_trie_unmerge(struct fib_table *oldtb)
lt = (struct trie *)local_tb->tb_data;
while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
- struct key_vector *local_l = NULL, *local_tp;
+ struct key_vector *local_l = NULL, *local_tp = NULL;
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
struct fib_alias *new_fa;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index a7e7aa1f6a47..81fcff83d309 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -179,6 +179,13 @@ have_snum:
head = &hashinfo->bhash[inet_bhashfn(net, snum,
hashinfo->bhash_size)];
spin_lock(&head->lock);
+
+ if (inet_is_local_reserved_port(net, snum) &&
+ !sysctl_reserved_port_bind) {
+ ret = 1;
+ goto fail_unlock;
+ }
+
inet_bind_bucket_for_each(tb, &head->chain)
if (net_eq(ib_net(tb), net) && tb->port == snum)
goto tb_found;
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index f17ea49b28fb..8f26145c34e2 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -145,20 +145,37 @@ static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
}
static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
- struct iphdr *iph, struct tcphdr *tcph)
+ struct iphdr *iph, struct tcphdr *tcph,
+ struct net_lro_info *lro_info)
{
int nr_frags;
__be32 *ptr;
u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
+ u64 hw_marked = 0;
+
+ if (lro_info)
+ hw_marked = lro_info->valid_fields;
nr_frags = skb_shinfo(skb)->nr_frags;
lro_desc->parent = skb;
lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
lro_desc->iph = iph;
lro_desc->tcph = tcph;
- lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
- lro_desc->tcp_ack = tcph->ack_seq;
- lro_desc->tcp_window = tcph->window;
+
+ if (hw_marked & LRO_TCP_SEQ_NUM)
+ lro_desc->tcp_next_seq = lro_info->tcp_seq_num + tcp_data_len;
+ else
+ lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
+
+ if (hw_marked & LRO_TCP_ACK_NUM)
+ lro_desc->tcp_ack = htonl(lro_info->tcp_ack_num);
+ else
+ lro_desc->tcp_ack = tcph->ack_seq;
+
+ if (hw_marked & LRO_TCP_WIN)
+ lro_desc->tcp_window = htons(lro_info->tcp_win);
+ else
+ lro_desc->tcp_window = tcph->window;
lro_desc->pkt_aggr_cnt = 1;
lro_desc->ip_tot_len = ntohs(iph->tot_len);
@@ -173,8 +190,11 @@ static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
lro_desc->mss = tcp_data_len;
lro_desc->active = 1;
- lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
- tcp_data_len);
+ if (hw_marked & LRO_TCP_DATA_CSUM)
+ lro_desc->data_csum = lro_info->tcp_data_csum;
+ else
+ lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
+ tcp_data_len);
}
static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
@@ -183,16 +203,29 @@ static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
}
static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
- struct tcphdr *tcph, int tcp_data_len)
+ struct tcphdr *tcph, int tcp_data_len,
+ struct net_lro_info *lro_info)
{
struct sk_buff *parent = lro_desc->parent;
__be32 *topt;
+ u64 hw_marked = 0;
+
+ if (lro_info)
+ hw_marked = lro_info->valid_fields;
lro_desc->pkt_aggr_cnt++;
lro_desc->ip_tot_len += tcp_data_len;
lro_desc->tcp_next_seq += tcp_data_len;
- lro_desc->tcp_window = tcph->window;
- lro_desc->tcp_ack = tcph->ack_seq;
+
+ if (hw_marked & LRO_TCP_WIN)
+ lro_desc->tcp_window = htons(lro_info->tcp_win);
+ else
+ lro_desc->tcp_window = tcph->window;
+
+ if (hw_marked & LRO_TCP_ACK_NUM)
+ lro_desc->tcp_ack = htonl(lro_info->tcp_ack_num);
+ else
+ lro_desc->tcp_ack = tcph->ack_seq;
/* don't update tcp_rcv_tsval, would not work with PAWS */
if (lro_desc->tcp_saw_tstamp) {
@@ -200,10 +233,17 @@ static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
lro_desc->tcp_rcv_tsecr = *(topt + 2);
}
- lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
- lro_tcp_data_csum(iph, tcph,
- tcp_data_len),
- parent->len);
+ if (hw_marked & LRO_TCP_DATA_CSUM)
+ lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
+ lro_info->tcp_data_csum,
+ parent->len);
+ else
+ lro_desc->data_csum =
+ csum_block_add(lro_desc->data_csum,
+ lro_tcp_data_csum(iph,
+ tcph,
+ tcp_data_len),
+ parent->len);
parent->len += tcp_data_len;
parent->data_len += tcp_data_len;
@@ -212,12 +252,13 @@ static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
}
static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
- struct iphdr *iph, struct tcphdr *tcph)
+ struct iphdr *iph, struct tcphdr *tcph,
+ struct net_lro_info *lro_info)
{
struct sk_buff *parent = lro_desc->parent;
int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
- lro_add_common(lro_desc, iph, tcph, tcp_data_len);
+ lro_add_common(lro_desc, iph, tcph, tcp_data_len, lro_info);
skb_pull(skb, (skb->len - tcp_data_len));
parent->truesize += skb->truesize;
@@ -230,6 +271,29 @@ static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
lro_desc->last_skb = skb;
}
+static void lro_add_frags(struct net_lro_desc *lro_desc,
+ int len, int hlen, int truesize,
+ struct skb_frag_struct *skb_frags,
+ struct iphdr *iph, struct tcphdr *tcph)
+{
+ struct sk_buff *skb = lro_desc->parent;
+ int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
+
+ lro_add_common(lro_desc, iph, tcph, tcp_data_len, NULL);
+
+ skb->truesize += truesize;
+
+ skb_frags[0].page_offset += hlen;
+ skb_frag_size_sub(&skb_frags[0], hlen);
+
+ while (tcp_data_len > 0) {
+ *lro_desc->next_frag = *skb_frags;
+ tcp_data_len -= skb_frag_size(skb_frags);
+ lro_desc->next_frag++;
+ skb_frags++;
+ skb_shinfo(skb)->nr_frags++;
+ }
+}
static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
struct iphdr *iph,
@@ -284,6 +348,8 @@ static void lro_flush(struct net_lro_mgr *lro_mgr,
if (lro_mgr->features & LRO_F_NAPI)
netif_receive_skb(lro_desc->parent);
+ else if (lro_mgr->features & LRO_F_NI)
+ netif_rx_ni(lro_desc->parent);
else
netif_rx(lro_desc->parent);
@@ -292,12 +358,13 @@ static void lro_flush(struct net_lro_mgr *lro_mgr,
}
static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
- void *priv)
+ void *priv, struct net_lro_info *lro_info)
{
struct net_lro_desc *lro_desc;
struct iphdr *iph;
struct tcphdr *tcph;
u64 flags;
+ u64 hw_marked = 0;
int vlan_hdr_len = 0;
if (!lro_mgr->get_skb_header ||
@@ -308,7 +375,14 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
goto out;
- lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
+ if (lro_info)
+ hw_marked = lro_info->valid_fields;
+
+ if (hw_marked & LRO_DESC)
+ lro_desc = lro_info->lro_desc;
+ else
+ lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
+
if (!lro_desc)
goto out;
@@ -317,22 +391,38 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
vlan_hdr_len = VLAN_HLEN;
if (!lro_desc->active) { /* start new lro session */
- if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
- goto out;
+ if (hw_marked & LRO_ELIGIBILITY_CHECKED) {
+ if (!lro_info->lro_eligible)
+ goto out;
+ } else {
+ if (lro_tcp_ip_check(iph, tcph,
+ skb->len - vlan_hdr_len, NULL))
+ goto out;
+ }
skb->ip_summed = lro_mgr->ip_summed_aggr;
- lro_init_desc(lro_desc, skb, iph, tcph);
+ lro_init_desc(lro_desc, skb, iph, tcph, lro_info);
LRO_INC_STATS(lro_mgr, aggregated);
return 0;
}
- if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
- goto out2;
+ if (hw_marked & LRO_TCP_SEQ_NUM) {
+ if (lro_desc->tcp_next_seq != lro_info->tcp_seq_num)
+ goto out2;
+ } else {
+ if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
+ goto out2;
+ }
- if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
- goto out2;
+ if (hw_marked & LRO_ELIGIBILITY_CHECKED) {
+ if (!lro_info->lro_eligible)
+ goto out2;
+ } else {
+ if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
+ goto out2;
+ }
- lro_add_packet(lro_desc, skb, iph, tcph);
+ lro_add_packet(lro_desc, skb, iph, tcph, lro_info);
LRO_INC_STATS(lro_mgr, aggregated);
if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
@@ -348,19 +438,161 @@ out:
return 1;
}
+static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr,
+ struct skb_frag_struct *frags,
+ int len, int true_size,
+ void *mac_hdr,
+ int hlen, __wsum sum,
+ u32 ip_summed)
+{
+ struct sk_buff *skb;
+ struct skb_frag_struct *skb_frags;
+ int data_len = len;
+ int hdr_len = min(len, hlen);
+
+ skb = netdev_alloc_skb(lro_mgr->dev, hlen + lro_mgr->frag_align_pad);
+ if (!skb)
+ return NULL;
+
+ skb_reserve(skb, lro_mgr->frag_align_pad);
+ skb->len = len;
+ skb->data_len = len - hdr_len;
+ skb->truesize += true_size;
+ skb->tail += hdr_len;
+
+ memcpy(skb->data, mac_hdr, hdr_len);
+
+ skb_frags = skb_shinfo(skb)->frags;
+ while (data_len > 0) {
+ *skb_frags = *frags;
+ data_len -= skb_frag_size(frags);
+ skb_frags++;
+ frags++;
+ skb_shinfo(skb)->nr_frags++;
+ }
+
+ skb_shinfo(skb)->frags[0].page_offset += hdr_len;
+ skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hdr_len);
+
+ skb->ip_summed = ip_summed;
+ skb->csum = sum;
+ skb->protocol = eth_type_trans(skb, lro_mgr->dev);
+ return skb;
+}
+
+static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
+ struct skb_frag_struct *frags,
+ int len, int true_size,
+ void *priv, __wsum sum)
+{
+ struct net_lro_desc *lro_desc;
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+ struct sk_buff *skb;
+ u64 flags;
+ void *mac_hdr;
+ int mac_hdr_len;
+ int hdr_len = LRO_MAX_PG_HLEN;
+ int vlan_hdr_len = 0;
+
+ if (!lro_mgr->get_frag_header ||
+ lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph,
+ (void *)&tcph, &flags, priv)) {
+ mac_hdr = skb_frag_address(frags);
+ goto out1;
+ }
+
+ if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
+ goto out1;
+
+ hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr);
+ mac_hdr_len = (int)((void *)(iph) - mac_hdr);
+
+ lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
+ if (!lro_desc)
+ goto out1;
+
+ if (!lro_desc->active) { /* start new lro session */
+ if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL))
+ goto out1;
+
+ skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
+ hdr_len, 0, lro_mgr->ip_summed_aggr);
+ if (!skb)
+ goto out;
+
+ if ((skb->protocol == htons(ETH_P_8021Q)) &&
+ !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
+ vlan_hdr_len = VLAN_HLEN;
+
+ iph = (void *)(skb->data + vlan_hdr_len);
+ tcph = (void *)((u8 *)skb->data + vlan_hdr_len
+ + IP_HDR_LEN(iph));
+
+ lro_init_desc(lro_desc, skb, iph, tcph, NULL);
+ LRO_INC_STATS(lro_mgr, aggregated);
+ return NULL;
+ }
+
+ if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
+ goto out2;
+
+ if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc))
+ goto out2;
+
+ lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph);
+ LRO_INC_STATS(lro_mgr, aggregated);
+
+ if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) ||
+ lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
+ lro_flush(lro_mgr, lro_desc);
+
+ return NULL;
+
+out2: /* send aggregated packets to the stack */
+ lro_flush(lro_mgr, lro_desc);
+
+out1: /* Original packet has to be posted to the stack */
+ skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
+ hdr_len, sum, lro_mgr->ip_summed);
+out:
+ return skb;
+}
+
void lro_receive_skb(struct net_lro_mgr *lro_mgr,
struct sk_buff *skb,
void *priv)
{
- if (__lro_proc_skb(lro_mgr, skb, priv)) {
+ if (__lro_proc_skb(lro_mgr, skb, priv, NULL)) {
if (lro_mgr->features & LRO_F_NAPI)
netif_receive_skb(skb);
+ else if (lro_mgr->features & LRO_F_NI)
+ netif_rx_ni(skb);
else
netif_rx(skb);
}
}
EXPORT_SYMBOL(lro_receive_skb);
+void lro_receive_frags(struct net_lro_mgr *lro_mgr,
+ struct skb_frag_struct *frags,
+ int len, int true_size, void *priv, __wsum sum)
+{
+ struct sk_buff *skb;
+
+ skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum);
+ if (!skb)
+ return;
+
+ if (lro_mgr->features & LRO_F_NAPI)
+ netif_receive_skb(skb);
+ else if (lro_mgr->features & LRO_F_NI)
+ netif_rx_ni(skb);
+ else
+ netif_rx(skb);
+}
+EXPORT_SYMBOL(lro_receive_frags);
+
void lro_flush_all(struct net_lro_mgr *lro_mgr)
{
int i;
@@ -372,3 +604,35 @@ void lro_flush_all(struct net_lro_mgr *lro_mgr)
}
}
EXPORT_SYMBOL(lro_flush_all);
+
+void lro_flush_pkt(struct net_lro_mgr *lro_mgr, struct iphdr *iph,
+ struct tcphdr *tcph)
+{
+ struct net_lro_desc *lro_desc;
+
+ lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
+ if (lro_desc && lro_desc->active)
+ lro_flush(lro_mgr, lro_desc);
+}
+EXPORT_SYMBOL(lro_flush_pkt);
+
+void lro_flush_desc(struct net_lro_mgr *lro_mgr, struct net_lro_desc *lro_desc)
+{
+ if (lro_desc->active)
+ lro_flush(lro_mgr, lro_desc);
+}
+EXPORT_SYMBOL(lro_flush_desc);
+
+void lro_receive_skb_ext(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
+ void *priv, struct net_lro_info *lro_info)
+{
+ if (__lro_proc_skb(lro_mgr, skb, priv, lro_info)) {
+ if (lro_mgr->features & LRO_F_NAPI)
+ netif_receive_skb(skb);
+ else if (lro_mgr->features & LRO_F_NI)
+ netif_rx_ni(skb);
+ else
+ netif_rx(skb);
+ }
+}
+EXPORT_SYMBOL(lro_receive_skb_ext);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index a989aba861e0..72e1e831589a 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -660,7 +660,7 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
void *user_icmph, size_t icmph_len) {
u8 type, code;
- if (len > 0xFFFF)
+ if (len > 0xFFFF || len < icmph_len)
return -EMSGSIZE;
/* Must have at least a full ICMP header. */
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 46123369144f..8233e27679f2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -42,6 +42,10 @@ static int tcp_syn_retries_min = 1;
static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
static int ip_ping_group_range_min[] = { 0, 0 };
static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
+static int tcp_delack_seg_min = TCP_DELACK_MIN;
+static int tcp_delack_seg_max = 60;
+static int tcp_use_userconfig_min;
+static int tcp_use_userconfig_max = 1;
/* Update system visible IP port range */
static void set_local_port_range(struct net *net, int range[2])
@@ -821,6 +825,25 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = &one
},
+ {
+ .procname = "tcp_delack_seg",
+ .data = &sysctl_tcp_delack_seg,
+ .maxlen = sizeof(sysctl_tcp_delack_seg),
+ .mode = 0644,
+ .proc_handler = tcp_proc_delayed_ack_control,
+ .extra1 = &tcp_delack_seg_min,
+ .extra2 = &tcp_delack_seg_max,
+ },
+ {
+ .procname = "tcp_use_userconfig",
+ .data = &sysctl_tcp_use_userconfig,
+ .maxlen = sizeof(sysctl_tcp_use_userconfig),
+ .mode = 0644,
+ .proc_handler = tcp_use_userconfig_sysctl_handler,
+ .extra1 = &tcp_use_userconfig_min,
+ .extra2 = &tcp_use_userconfig_max,
+ },
+
{ }
};
@@ -903,6 +926,13 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_do_large_bitmap,
},
{
+ .procname = "reserved_port_bind",
+ .data = &sysctl_reserved_port_bind,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.procname = "ip_no_pmtu_disc",
.data = &init_net.ipv4.sysctl_ip_no_pmtu_disc,
.maxlen = sizeof(int),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 48e6509426b0..9bdd7847ef3a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -302,6 +302,12 @@ EXPORT_SYMBOL(sysctl_tcp_wmem);
atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
EXPORT_SYMBOL(tcp_memory_allocated);
+int sysctl_tcp_delack_seg __read_mostly = TCP_DELACK_SEG;
+EXPORT_SYMBOL(sysctl_tcp_delack_seg);
+
+int sysctl_tcp_use_userconfig __read_mostly;
+EXPORT_SYMBOL(sysctl_tcp_use_userconfig);
+
/*
* Current number of TCP sockets.
*/
@@ -1407,8 +1413,11 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied)
/* Delayed ACKs frequently hit locked sockets during bulk
* receive. */
if (icsk->icsk_ack.blocked ||
- /* Once-per-two-segments ACK was not sent by tcp_input.c */
- tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss ||
+ /* Once-per-sysctl_tcp_delack_seg segments
+ * ACK was not sent by tcp_input.c
+ */
+ tp->rcv_nxt - tp->rcv_wup > (icsk->icsk_ack.rcv_mss) *
+ sysctl_tcp_delack_seg ||
/*
* If this read emptied read buffer, we send ACK, if
* connection is not bidirectional, user drained
@@ -2723,6 +2732,14 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
rate64 = rate != ~0U ? rate : ~0ULL;
put_unaligned(rate64, &info->tcpi_max_pacing_rate);
+ /* Expose reference count for socket */
+ if (sk->sk_socket) {
+ struct file *filep = sk->sk_socket->file;
+
+ if (filep)
+ info->tcpi_count = file_count(filep);
+ }
+
do {
start = u64_stats_fetch_begin_irq(&tp->syncp);
put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b6d99c308bef..3dbff307218e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4965,7 +4965,8 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
struct tcp_sock *tp = tcp_sk(sk);
/* More than one full frame received... */
- if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
+ if (((tp->rcv_nxt - tp->rcv_wup) > (inet_csk(sk)->icsk_ack.rcv_mss) *
+ sysctl_tcp_delack_seg &&
/* ... and right edge of window advances far enough.
* (tcp_recvmsg() will send ACK otherwise). Or...
*/
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 984de80590bc..3845ab04a9b4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -713,8 +713,8 @@ release_sk1:
outside socket context is ugly, certainly. What can I do?
*/
-static void tcp_v4_send_ack(const struct sock *sk, struct sk_buff *skb,
- u32 seq, u32 ack,
+static void tcp_v4_send_ack(const struct sock *sk,
+ struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key,
int reply_flags, u8 tos)
@@ -728,8 +728,8 @@ static void tcp_v4_send_ack(const struct sock *sk, struct sk_buff *skb,
#endif
];
} rep;
- struct ip_reply_arg arg;
struct net *net = sock_net(sk);
+ struct ip_reply_arg arg;
memset(&rep.th, 0, sizeof(struct tcphdr));
memset(&arg, 0, sizeof(arg));
@@ -792,7 +792,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
struct inet_timewait_sock *tw = inet_twsk(sk);
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
- tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+ tcp_v4_send_ack(sk, skb,
+ tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp + tcptw->tw_ts_offset,
tcptw->tw_ts_recent,
@@ -811,9 +812,17 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
*/
- tcp_v4_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
- tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
- tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
+ u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
+ tcp_sk(sk)->snd_nxt;
+
+ /* RFC 7323 2.3
+ * The window field (SEG.WND) of every outgoing segment, with the
+ * exception of <SYN> segments, MUST be right-shifted by
+ * Rcv.Wind.Shift bits:
+ */
+ tcp_v4_send_ack(sk, skb, seq,
+ tcp_rsk(req)->rcv_nxt,
+ req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp,
req->ts_recent,
0,
@@ -840,7 +849,8 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
struct sk_buff *skb;
/* First, grab a route. */
- if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
+ if (!dst && (dst = inet_csk_route_req(
+ (struct sock *)sk, &fl4, req)) == NULL)
return -1;
skb = tcp_make_synack(sk, dst, req, foc, attach_req);
@@ -1199,7 +1209,8 @@ static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
const struct request_sock *req,
bool *strict)
{
- struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
+ struct dst_entry *dst = inet_csk_route_req(
+ (struct sock *)sk, &fl->u.ip4, req);
if (strict) {
if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
@@ -1574,6 +1585,12 @@ int tcp_v4_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, th->doff * 4))
goto discard_it;
+ /* Assuming a trustworthy entity did the checksum and found the csum
+ * invalid, drop the packet.
+ */
+ if (skb->ip_summed == CHECKSUM_COMPLETE && skb->csum_valid == 0)
+ goto csum_error;
+
/* An explanation is required here, I think.
* Packet length and doff are validated by header prediction,
* provided case of th->doff==0 is eliminated.
@@ -2186,6 +2203,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
__be32 src = inet->inet_rcv_saddr;
__u16 destp = ntohs(inet->inet_dport);
__u16 srcp = ntohs(inet->inet_sport);
+ __u8 seq_state = sk->sk_state;
int rx_queue;
int state;
@@ -2205,6 +2223,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
timer_expires = jiffies;
}
+ if (inet->transparent)
+ seq_state |= 0x80;
+
state = sk_state_load(sk);
if (state == TCP_LISTEN)
rx_queue = sk->sk_ack_backlog;
@@ -2216,7 +2237,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
- i, src, srcp, dest, destp, state,
+ i, src, srcp, dest, destp, seq_state,
tp->write_seq - tp->snd_una,
rx_queue,
timer_active,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 1ec12a4f327e..c1a84472cc0c 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -32,6 +32,40 @@ int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
int sysctl_tcp_orphan_retries __read_mostly;
int sysctl_tcp_thin_linear_timeouts __read_mostly;
+/*Function to reset tcp_ack related sysctl on resetting master control */
+void set_tcp_default(void)
+{
+ sysctl_tcp_delack_seg = TCP_DELACK_SEG;
+}
+
+/*sysctl handler for tcp_ack realted master control */
+int tcp_proc_delayed_ack_control(struct ctl_table *table, int write,
+ void __user *buffer, size_t *length,
+ loff_t *ppos)
+{
+ int ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
+
+ /* The ret value will be 0 if the input validation is successful
+ * and the values are written to sysctl table. If not, the stack
+ * will continue to work with currently configured values
+ */
+ return ret;
+}
+
+/*sysctl handler for tcp_ack realted master control */
+int tcp_use_userconfig_sysctl_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *length,
+ loff_t *ppos)
+{
+ int ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
+
+ if (write && ret == 0) {
+ if (!sysctl_tcp_use_userconfig)
+ set_tcp_default();
+ }
+ return ret;
+}
+
static void tcp_write_err(struct sock *sk)
{
sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9ee5087b9b5e..4d6f09c05a12 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -257,6 +257,11 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
} else {
hslot = udp_hashslot(udptable, net, snum);
spin_lock_bh(&hslot->lock);
+
+ if (inet_is_local_reserved_port(net, snum) &&
+ !sysctl_reserved_port_bind)
+ goto fail_unlock;
+
if (hslot->count > 10) {
int exist;
unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum;
@@ -2448,14 +2453,20 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
int bucket)
{
struct inet_sock *inet = inet_sk(sp);
+ struct udp_sock *up = udp_sk(sp);
__be32 dest = inet->inet_daddr;
__be32 src = inet->inet_rcv_saddr;
__u16 destp = ntohs(inet->inet_dport);
__u16 srcp = ntohs(inet->inet_sport);
+ __u8 state = sp->sk_state;
+ if (up->encap_rcv)
+ state |= 0xF0;
+ else if (inet->transparent)
+ state |= 0x80;
seq_printf(f, "%5d: %08X:%04X %08X:%04X"
" %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d",
- bucket, src, srcp, dest, destp, sp->sk_state,
+ bucket, src, srcp, dest, destp, state,
sk_wmem_alloc_get(sp),
sk_rmem_alloc_get(sp),
0, 0L, 0,
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 7b0edb37a115..ab7ab839b057 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -20,7 +20,7 @@
static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
- int tos, int oif,
+ int tos,
const xfrm_address_t *saddr,
const xfrm_address_t *daddr)
{
@@ -29,7 +29,6 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
memset(fl4, 0, sizeof(*fl4));
fl4->daddr = daddr->a4;
fl4->flowi4_tos = tos;
- fl4->flowi4_oif = oif;
if (saddr)
fl4->saddr = saddr->a4;
@@ -42,22 +41,22 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
return ERR_CAST(rt);
}
-static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif,
+static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
const xfrm_address_t *saddr,
const xfrm_address_t *daddr)
{
struct flowi4 fl4;
- return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr);
+ return __xfrm4_dst_lookup(net, &fl4, tos, saddr, daddr);
}
-static int xfrm4_get_saddr(struct net *net, int oif,
+static int xfrm4_get_saddr(struct net *net,
xfrm_address_t *saddr, xfrm_address_t *daddr)
{
struct dst_entry *dst;
struct flowi4 fl4;
- dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr);
+ dst = __xfrm4_dst_lookup(net, &fl4, 0, NULL, daddr);
if (IS_ERR(dst))
return -EHOSTUNREACH;