diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 116 |
1 files changed, 65 insertions, 51 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 12b98e257c5f..d3b7172b9ee0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -102,6 +102,7 @@ int sysctl_tcp_thin_dupack __read_mostly; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; int sysctl_tcp_early_retrans __read_mostly = 3; int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; +int sysctl_tcp_default_init_rwnd __read_mostly = TCP_INIT_CWND * 2; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ @@ -117,6 +118,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ #define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ +#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -556,8 +558,8 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, void tcp_rcv_space_adjust(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + u32 copied; int time; - int copied; time = tcp_time_stamp - tp->rcvq_space.time; if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0) @@ -579,12 +581,13 @@ void tcp_rcv_space_adjust(struct sock *sk) if (sysctl_tcp_moderate_rcvbuf && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { - int rcvwin, rcvmem, rcvbuf; + int rcvmem, rcvbuf; + u64 rcvwin; /* minimal window to cope with packet losses, assuming * steady state. Add some cushion because of small variations. */ - rcvwin = (copied << 1) + 16 * tp->advmss; + rcvwin = ((u64)copied << 1) + 16 * tp->advmss; /* If rate increased by 25%, * assume slow start, rcvwin = 3 * copied @@ -604,12 +607,13 @@ void tcp_rcv_space_adjust(struct sock *sk) while (tcp_win_from_space(rcvmem) < tp->advmss) rcvmem += 128; - rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]); + do_div(rcvwin, tp->advmss); + rcvbuf = min_t(u64, rcvwin * rcvmem, sysctl_tcp_rmem[2]); if (rcvbuf > sk->sk_rcvbuf) { sk->sk_rcvbuf = rcvbuf; /* Make the window clamp follow along. */ - tp->window_clamp = rcvwin; + tp->window_clamp = tcp_win_from_space(rcvbuf); } } tp->rcvq_space.space = copied; @@ -1134,13 +1138,14 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, */ if (pkt_len > mss) { unsigned int new_len = (pkt_len / mss) * mss; - if (!in_sack && new_len < pkt_len) { + if (!in_sack && new_len < pkt_len) new_len += mss; - if (new_len >= skb->len) - return 0; - } pkt_len = new_len; } + + if (pkt_len >= skb->len && !in_sack) + return 0; + err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC); if (err < 0) return err; @@ -2164,8 +2169,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int cnt, oldcnt; - int err; + int cnt, oldcnt, lost; unsigned int mss; /* Use SACK to deduce losses of new sequences sent during recovery */ const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq; @@ -2205,9 +2209,10 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) break; mss = tcp_skb_mss(skb); - err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, - mss, GFP_ATOMIC); - if (err < 0) + /* If needed, chop off the prefix to mark as lost. */ + lost = (packets - oldcnt) * mss; + if (lost < skb->len && + tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0) break; cnt = packets; } @@ -2324,10 +2329,9 @@ static void DBGUNDO(struct sock *sk, const char *msg) } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", msg, - &np->daddr, ntohs(inet->inet_dport), + &sk->sk_v6_daddr, ntohs(inet->inet_dport), tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); @@ -2503,8 +2507,8 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ - if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || - (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { + if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH && + (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) { tp->snd_cwnd = tp->snd_ssthresh; tp->snd_cwnd_stamp = tcp_time_stamp; } @@ -3028,8 +3032,7 @@ void tcp_rearm_rto(struct sock *sk) /* delta may not be positive if the socket is locked * when the retrans timer fires and is rescheduled. */ - if (delta > 0) - rto = delta; + rto = max(delta, 1); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, TCP_RTO_MAX); @@ -3220,7 +3223,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, int delta; /* Non-retransmitted hole got filled? That's reordering */ - if (reord < prior_fackets) + if (reord < prior_fackets && reord <= tp->fackets_out) tcp_update_reordering(sk, tp->fackets_out - reord, 0); delta = tcp_is_fack(tp) ? pkts_acked : @@ -3544,7 +3547,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (before(ack, prior_snd_una)) { /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ if (before(ack, prior_snd_una - tp->max_window)) { - tcp_send_challenge_ack(sk, skb); + if (!(flag & FLAG_NO_CHALLENGE_ACK)) + tcp_send_challenge_ack(sk, skb); return -1; } goto old_ack; @@ -3868,11 +3872,8 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) int length = (th->doff << 2) - sizeof(*th); const u8 *ptr = (const u8 *)(th + 1); - /* If the TCP option is too short, we can short cut */ - if (length < TCPOLEN_MD5SIG) - return NULL; - - while (length > 0) { + /* If not enough data remaining, we can short cut */ + while (length >= TCPOLEN_MD5SIG) { int opcode = *ptr++; int opsize; @@ -4779,6 +4780,7 @@ restart: static void tcp_collapse_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + u32 range_truesize, sum_tiny = 0; struct sk_buff *skb = skb_peek(&tp->out_of_order_queue); struct sk_buff *head; u32 start, end; @@ -4788,6 +4790,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk) start = TCP_SKB_CB(skb)->seq; end = TCP_SKB_CB(skb)->end_seq; + range_truesize = skb->truesize; head = skb; for (;;) { @@ -4802,14 +4805,24 @@ static void tcp_collapse_ofo_queue(struct sock *sk) if (!skb || after(TCP_SKB_CB(skb)->seq, end) || before(TCP_SKB_CB(skb)->end_seq, start)) { - tcp_collapse(sk, &tp->out_of_order_queue, - head, skb, start, end); + /* Do not attempt collapsing tiny skbs */ + if (range_truesize != head->truesize || + end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) { + tcp_collapse(sk, &tp->out_of_order_queue, + head, skb, start, end); + } else { + sum_tiny += range_truesize; + if (sum_tiny > sk->sk_rcvbuf >> 3) + return; + } + head = skb; if (!skb) break; /* Start new segment */ start = TCP_SKB_CB(skb)->seq; end = TCP_SKB_CB(skb)->end_seq; + range_truesize = skb->truesize; } else { if (before(TCP_SKB_CB(skb)->seq, start)) start = TCP_SKB_CB(skb)->seq; @@ -4865,6 +4878,9 @@ static int tcp_prune_queue(struct sock *sk) else if (tcp_under_memory_pressure(sk)) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); + if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) + return 0; + tcp_collapse_ofo_queue(sk); if (!skb_queue_empty(&sk->sk_receive_queue)) tcp_collapse(sk, &sk->sk_receive_queue, @@ -4943,7 +4959,7 @@ static void tcp_check_space(struct sock *sk) if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); /* pairs with tcp_poll() */ - smp_mb__after_atomic(); + smp_mb(); if (sk->sk_socket && test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) tcp_new_space(sk); @@ -5436,6 +5452,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) struct inet_connection_sock *icsk = inet_csk(sk); tcp_set_state(sk, TCP_ESTABLISHED); + icsk->icsk_ack.lrcvtime = tcp_time_stamp; if (skb) { icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); @@ -5464,10 +5481,6 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) else tp->pred_flags = 0; - if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_state_change(sk); - sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); - } } static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, @@ -5531,6 +5544,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_cookie foc = { .len = -1 }; int saved_clamp = tp->rx_opt.mss_clamp; + bool fastopen_fail; tcp_parse_options(skb, &tp->rx_opt, 0, &foc); if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) @@ -5633,10 +5647,15 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_finish_connect(sk, skb); - if ((tp->syn_fastopen || tp->syn_data) && - tcp_rcv_fastopen_synack(sk, skb, &foc)) - return -1; + fastopen_fail = (tp->syn_fastopen || tp->syn_data) && + tcp_rcv_fastopen_synack(sk, skb, &foc); + if (!sock_flag(sk, SOCK_DEAD)) { + sk->sk_state_change(sk); + sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); + } + if (fastopen_fail) + return -1; if (sk->sk_write_pending || icsk->icsk_accept_queue.rskq_defer_accept || icsk->icsk_ack.pingpong) { @@ -5648,7 +5667,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * to stand against the temptation 8) --ANK */ inet_csk_schedule_ack(sk); - icsk->icsk_ack.lrcvtime = tcp_time_stamp; tcp_enter_quickack_mode(sk); inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX); @@ -5831,13 +5849,17 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) /* step 5: check the ACK field */ acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | - FLAG_UPDATE_TS_RECENT) > 0; + FLAG_UPDATE_TS_RECENT | + FLAG_NO_CHALLENGE_ACK) > 0; + if (!acceptable) { + if (sk->sk_state == TCP_SYN_RECV) + return 1; /* send one RST */ + tcp_send_challenge_ack(sk, skb); + goto discard; + } switch (sk->sk_state) { case TCP_SYN_RECV: - if (!acceptable) - return 1; - if (!tp->srtt_us) tcp_synack_rtt_meas(sk, req); @@ -5906,14 +5928,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) * our SYNACK so stop the SYNACK timer. */ if (req) { - /* Return RST if ack_seq is invalid. - * Note that RFC793 only says to generate a - * DUPACK for it but for TCP Fast Open it seems - * better to treat this case like TCP_SYN_RECV - * above. - */ - if (!acceptable) - return 1; /* We no longer need the request sock. */ reqsk_fastopen_remove(sk, req, false); tcp_rearm_rto(sk); @@ -6106,7 +6120,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, struct inet_request_sock *ireq = inet_rsk(req); kmemcheck_annotate_bitfield(ireq, flags); - ireq->opt = NULL; + ireq->ireq_opt = NULL; atomic64_set(&ireq->ir_cookie, 0); ireq->ireq_state = TCP_NEW_SYN_RECV; write_pnet(&ireq->ireq_net, sock_net(sk_listener)); |