summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c332
1 files changed, 145 insertions, 187 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c4638e6f0238..86023b9be47f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -274,7 +274,6 @@
#include <net/tcp.h>
#include <net/xfrm.h>
#include <net/ip.h>
-#include <net/netdma.h>
#include <net/sock.h>
#include <asm/uaccess.h>
@@ -285,6 +284,8 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
int sysctl_tcp_min_tso_segs __read_mostly = 2;
+int sysctl_tcp_autocorking __read_mostly = 1;
+
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
@@ -379,13 +380,13 @@ void tcp_init_sock(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- skb_queue_head_init(&tp->out_of_order_queue);
+ __skb_queue_head_init(&tp->out_of_order_queue);
tcp_init_xmit_timers(sk);
tcp_prequeue_init(tp);
INIT_LIST_HEAD(&tp->tsq_node);
icsk->icsk_rto = TCP_TIMEOUT_INIT;
- tp->mdev = TCP_TIMEOUT_INIT;
+ tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
/* So many TCP implementations out there (incorrectly) count the
* initial SYN frame in their delayed-ACK and congestion control
@@ -403,7 +404,7 @@ void tcp_init_sock(struct sock *sk)
tp->reordering = sysctl_tcp_reordering;
tcp_enable_early_retrans(tp);
- icsk->icsk_ca_ops = &tcp_init_congestion_ops;
+ tcp_assign_congestion_control(sk);
tp->tsoffset = 0;
@@ -424,6 +425,17 @@ void tcp_init_sock(struct sock *sk)
}
EXPORT_SYMBOL(tcp_init_sock);
+static void tcp_tx_timestamp(struct sock *sk, struct sk_buff *skb)
+{
+ if (sk->sk_tsflags) {
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+ sock_tx_timestamp(sk, &shinfo->tx_flags);
+ if (shinfo->tx_flags & SKBTX_ANY_TSTAMP)
+ shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1;
+ }
+}
+
/*
* Wait for a TCP event.
*
@@ -521,7 +533,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
}
/* This barrier is coupled with smp_wmb() in tcp_reset() */
smp_rmb();
- if (sk->sk_err)
+ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
mask |= POLLERR;
return mask;
@@ -596,7 +608,7 @@ static inline bool forced_push(const struct tcp_sock *tp)
return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
}
-static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
+static void skb_entail(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
@@ -605,7 +617,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
tcb->seq = tcb->end_seq = tp->write_seq;
tcb->tcp_flags = TCPHDR_ACK;
tcb->sacked = 0;
- skb_header_release(skb);
+ __skb_header_release(skb);
tcp_add_write_queue_tail(sk, skb);
sk->sk_wmem_queued += skb->truesize;
sk_mem_charge(sk, skb->truesize);
@@ -619,19 +631,58 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
tp->snd_up = tp->write_seq;
}
-static inline void tcp_push(struct sock *sk, int flags, int mss_now,
- int nonagle)
+/* If a not yet filled skb is pushed, do not send it if
+ * we have data packets in Qdisc or NIC queues :
+ * Because TX completion will happen shortly, it gives a chance
+ * to coalesce future sendmsg() payload into this skb, without
+ * need for a timer, and with no latency trade off.
+ * As packets containing data payload have a bigger truesize
+ * than pure acks (dataless) packets, the last checks prevent
+ * autocorking if we only have an ACK in Qdisc/NIC queues,
+ * or if TX completion was delayed after we processed ACK packet.
+ */
+static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
+ int size_goal)
{
- if (tcp_send_head(sk)) {
- struct tcp_sock *tp = tcp_sk(sk);
+ return skb->len < size_goal &&
+ sysctl_tcp_autocorking &&
+ skb != tcp_write_queue_head(sk) &&
+ atomic_read(&sk->sk_wmem_alloc) > skb->truesize;
+}
+
+static void tcp_push(struct sock *sk, int flags, int mss_now,
+ int nonagle, int size_goal)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct sk_buff *skb;
+
+ if (!tcp_send_head(sk))
+ return;
+
+ skb = tcp_write_queue_tail(sk);
+ if (!(flags & MSG_MORE) || forced_push(tp))
+ tcp_mark_push(tp, skb);
+
+ tcp_mark_urg(tp, flags);
- if (!(flags & MSG_MORE) || forced_push(tp))
- tcp_mark_push(tp, tcp_write_queue_tail(sk));
+ if (tcp_should_autocork(sk, skb, size_goal)) {
- tcp_mark_urg(tp, flags);
- __tcp_push_pending_frames(sk, mss_now,
- (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
+ /* avoid atomic op if TSQ_THROTTLED bit is already set */
+ if (!test_bit(TSQ_THROTTLED, &tp->tsq_flags)) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING);
+ set_bit(TSQ_THROTTLED, &tp->tsq_flags);
+ }
+ /* It is possible TX completion already happened
+ * before we set TSQ_THROTTLED.
+ */
+ if (atomic_read(&sk->sk_wmem_alloc) > skb->truesize)
+ return;
}
+
+ if (flags & MSG_MORE)
+ nonagle = TCP_NAGLE_CORK;
+
+ __tcp_push_pending_frames(sk, mss_now, nonagle);
}
static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
@@ -911,15 +962,17 @@ new_segment:
skb->ip_summed = CHECKSUM_PARTIAL;
tp->write_seq += copy;
TCP_SKB_CB(skb)->end_seq += copy;
- skb_shinfo(skb)->gso_segs = 0;
+ tcp_skb_pcount_set(skb, 0);
if (!copied)
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
copied += copy;
offset += copy;
- if (!(size -= copy))
+ if (!(size -= copy)) {
+ tcp_tx_timestamp(sk, skb);
goto out;
+ }
if (skb->len < size_goal || (flags & MSG_OOB))
continue;
@@ -934,7 +987,8 @@ new_segment:
wait_for_sndbuf:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
wait_for_memory:
- tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+ tcp_push(sk, flags & ~MSG_MORE, mss_now,
+ TCP_NAGLE_PUSH, size_goal);
if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
goto do_error;
@@ -944,7 +998,7 @@ wait_for_memory:
out:
if (copied && !(flags & MSG_SENDPAGE_NOTLAST))
- tcp_push(sk, flags, mss_now, tp->nonagle);
+ tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
return copied;
do_error:
@@ -1002,7 +1056,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp)
}
}
-static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size)
+static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
+ int *copied, size_t size)
{
struct tcp_sock *tp = tcp_sk(sk);
int err, flags;
@@ -1017,11 +1072,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size)
if (unlikely(tp->fastopen_req == NULL))
return -ENOBUFS;
tp->fastopen_req->data = msg;
+ tp->fastopen_req->size = size;
flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
msg->msg_namelen, flags);
- *size = tp->fastopen_req->copied;
+ *copied = tp->fastopen_req->copied;
tcp_free_fastopen_req(tp);
return err;
}
@@ -1041,7 +1097,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
flags = msg->msg_flags;
if (flags & MSG_FASTOPEN) {
- err = tcp_sendmsg_fastopen(sk, msg, &copied_syn);
+ err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
if (err == -EINPROGRESS && copied_syn > 0)
goto out;
else if (err)
@@ -1064,7 +1120,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (unlikely(tp->repair)) {
if (tp->repair_queue == TCP_RECV_QUEUE) {
copied = tcp_send_rcvq(sk, msg, size);
- goto out;
+ goto out_nopush;
}
err = -EINVAL;
@@ -1131,13 +1187,6 @@ new_segment:
goto wait_for_memory;
/*
- * All packets are restored as if they have
- * already been sent.
- */
- if (tp->repair)
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
-
- /*
* Check whether we can use HW checksum.
*/
if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
@@ -1146,6 +1195,13 @@ new_segment:
skb_entail(sk, skb);
copy = size_goal;
max = size_goal;
+
+ /* All packets are restored as if they have
+ * already been sent. skb_mstamp isn't set to
+ * avoid wrong rtt estimation.
+ */
+ if (tp->repair)
+ TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED;
}
/* Try to append data to the end of skb. */
@@ -1204,12 +1260,14 @@ new_segment:
tp->write_seq += copy;
TCP_SKB_CB(skb)->end_seq += copy;
- skb_shinfo(skb)->gso_segs = 0;
+ tcp_skb_pcount_set(skb, 0);
from += copy;
copied += copy;
- if ((seglen -= copy) == 0 && iovlen == 0)
+ if ((seglen -= copy) == 0 && iovlen == 0) {
+ tcp_tx_timestamp(sk, skb);
goto out;
+ }
if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
continue;
@@ -1225,7 +1283,8 @@ wait_for_sndbuf:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
wait_for_memory:
if (copied)
- tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+ tcp_push(sk, flags & ~MSG_MORE, mss_now,
+ TCP_NAGLE_PUSH, size_goal);
if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
goto do_error;
@@ -1236,7 +1295,8 @@ wait_for_memory:
out:
if (copied)
- tcp_push(sk, flags, mss_now, tp->nonagle);
+ tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
+out_nopush:
release_sock(sk);
return copied + copied_syn;
@@ -1333,7 +1393,7 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
* calculation of whether or not we must ACK for the sake of
* a window update.
*/
-void tcp_cleanup_rbuf(struct sock *sk, int copied)
+static void tcp_cleanup_rbuf(struct sock *sk, int copied)
{
struct tcp_sock *tp = tcp_sk(sk);
bool time_to_ack = false;
@@ -1409,39 +1469,6 @@ static void tcp_prequeue_process(struct sock *sk)
tp->ucopy.memory = 0;
}
-#ifdef CONFIG_NET_DMA
-static void tcp_service_net_dma(struct sock *sk, bool wait)
-{
- dma_cookie_t done, used;
- dma_cookie_t last_issued;
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (!tp->ucopy.dma_chan)
- return;
-
- last_issued = tp->ucopy.dma_cookie;
- dma_async_issue_pending(tp->ucopy.dma_chan);
-
- do {
- if (dma_async_is_tx_complete(tp->ucopy.dma_chan,
- last_issued, &done,
- &used) == DMA_COMPLETE) {
- /* Safe to free early-copied skbs now */
- __skb_queue_purge(&sk->sk_async_wait_queue);
- break;
- } else {
- struct sk_buff *skb;
- while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
- (dma_async_is_complete(skb->dma_cookie, done,
- used) == DMA_COMPLETE)) {
- __skb_dequeue(&sk->sk_async_wait_queue);
- kfree_skb(skb);
- }
- }
- } while (wait);
-}
-#endif
-
static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
{
struct sk_buff *skb;
@@ -1449,9 +1476,9 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
offset = seq - TCP_SKB_CB(skb)->seq;
- if (tcp_hdr(skb)->syn)
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
offset--;
- if (offset < skb->len || tcp_hdr(skb)->fin) {
+ if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) {
*off = offset;
return skb;
}
@@ -1459,7 +1486,7 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
* splitted a fat GRO packet, while we released socket lock
* in skb_splice_bits()
*/
- sk_eat_skb(sk, skb, false);
+ sk_eat_skb(sk, skb);
}
return NULL;
}
@@ -1524,12 +1551,12 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
if (offset + 1 != skb->len)
continue;
}
- if (tcp_hdr(skb)->fin) {
- sk_eat_skb(sk, skb, false);
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
+ sk_eat_skb(sk, skb);
++seq;
break;
}
- sk_eat_skb(sk, skb, false);
+ sk_eat_skb(sk, skb);
if (!desc->count)
break;
tp->copied_seq = seq;
@@ -1567,10 +1594,12 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
int target; /* Read at least this many bytes */
long timeo;
struct task_struct *user_recv = NULL;
- bool copied_early = false;
struct sk_buff *skb;
u32 urg_hole = 0;
+ if (unlikely(flags & MSG_ERRQUEUE))
+ return ip_recv_error(sk, msg, len, addr_len);
+
if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) &&
(sk->sk_state == TCP_ESTABLISHED))
sk_busy_loop(sk, nonblock);
@@ -1610,28 +1639,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
-#ifdef CONFIG_NET_DMA
- tp->ucopy.dma_chan = NULL;
- preempt_disable();
- skb = skb_peek_tail(&sk->sk_receive_queue);
- {
- int available = 0;
-
- if (skb)
- available = TCP_SKB_CB(skb)->seq + skb->len - (*seq);
- if ((available < target) &&
- (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
- !sysctl_tcp_low_latency &&
- net_dma_find_channel()) {
- preempt_enable_no_resched();
- tp->ucopy.pinned_list =
- dma_pin_iovec_pages(msg->msg_iov, len);
- } else {
- preempt_enable_no_resched();
- }
- }
-#endif
-
do {
u32 offset;
@@ -1658,11 +1665,11 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
break;
offset = *seq - TCP_SKB_CB(skb)->seq;
- if (tcp_hdr(skb)->syn)
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
offset--;
if (offset < skb->len)
goto found_ok_skb;
- if (tcp_hdr(skb)->fin)
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok;
WARN(!(flags & MSG_PEEK),
"recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
@@ -1762,16 +1769,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
/* __ Set realtime policy in scheduler __ */
}
-#ifdef CONFIG_NET_DMA
- if (tp->ucopy.dma_chan) {
- if (tp->rcv_wnd == 0 &&
- !skb_queue_empty(&sk->sk_async_wait_queue)) {
- tcp_service_net_dma(sk, true);
- tcp_cleanup_rbuf(sk, copied);
- } else
- dma_async_issue_pending(tp->ucopy.dma_chan);
- }
-#endif
if (copied >= target) {
/* Do not sleep, just process backlog. */
release_sock(sk);
@@ -1779,11 +1776,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
} else
sk_wait_data(sk, &timeo);
-#ifdef CONFIG_NET_DMA
- tcp_service_net_dma(sk, false); /* Don't block */
- tp->ucopy.wakeup = 0;
-#endif
-
if (user_recv) {
int chunk;
@@ -1841,43 +1833,13 @@ do_prequeue:
}
if (!(flags & MSG_TRUNC)) {
-#ifdef CONFIG_NET_DMA
- if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
- tp->ucopy.dma_chan = net_dma_find_channel();
-
- if (tp->ucopy.dma_chan) {
- tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec(
- tp->ucopy.dma_chan, skb, offset,
- msg->msg_iov, used,
- tp->ucopy.pinned_list);
-
- if (tp->ucopy.dma_cookie < 0) {
-
- pr_alert("%s: dma_cookie < 0\n",
- __func__);
-
- /* Exception. Bailout! */
- if (!copied)
- copied = -EFAULT;
- break;
- }
-
- dma_async_issue_pending(tp->ucopy.dma_chan);
-
- if ((offset + used) == skb->len)
- copied_early = true;
-
- } else
-#endif
- {
- err = skb_copy_datagram_iovec(skb, offset,
- msg->msg_iov, used);
- if (err) {
- /* Exception. Bailout! */
- if (!copied)
- copied = -EFAULT;
- break;
- }
+ err = skb_copy_datagram_iovec(skb, offset,
+ msg->msg_iov, used);
+ if (err) {
+ /* Exception. Bailout! */
+ if (!copied)
+ copied = -EFAULT;
+ break;
}
}
@@ -1895,21 +1857,17 @@ skip_copy:
if (used + offset < skb->len)
continue;
- if (tcp_hdr(skb)->fin)
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok;
- if (!(flags & MSG_PEEK)) {
- sk_eat_skb(sk, skb, copied_early);
- copied_early = false;
- }
+ if (!(flags & MSG_PEEK))
+ sk_eat_skb(sk, skb);
continue;
found_fin_ok:
/* Process the FIN. */
++*seq;
- if (!(flags & MSG_PEEK)) {
- sk_eat_skb(sk, skb, copied_early);
- copied_early = false;
- }
+ if (!(flags & MSG_PEEK))
+ sk_eat_skb(sk, skb);
break;
} while (len > 0);
@@ -1932,16 +1890,6 @@ skip_copy:
tp->ucopy.len = 0;
}
-#ifdef CONFIG_NET_DMA
- tcp_service_net_dma(sk, true); /* Wait for queue to drain */
- tp->ucopy.dma_chan = NULL;
-
- if (tp->ucopy.pinned_list) {
- dma_unpin_iovec_pages(tp->ucopy.pinned_list);
- tp->ucopy.pinned_list = NULL;
- }
-#endif
-
/* According to UNIX98, msg_name/msg_namelen are ignored
* on connected socket. I was just happy when found this 8) --ANK
*/
@@ -2096,8 +2044,10 @@ void tcp_close(struct sock *sk, long timeout)
* reader process may not have drained the data yet!
*/
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
- u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
- tcp_hdr(skb)->fin;
+ u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq;
+
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+ len--;
data_was_unread += len;
__kfree_skb(skb);
}
@@ -2186,7 +2136,7 @@ adjudge_to_death:
/* This is a (useful) BSD violating of the RFC. There is a
* problem with TCP as specified in that the other end could
* keep a socket open forever with no application left this end.
- * We use a 3 minute timeout (about the same as BSD) then kill
+ * We use a 1 minute timeout (about the same as BSD) then kill
* our end. If they send after that then tough - BUT: long enough
* that we won't make the old 4*rto = almost no time - whoops
* reset mistake.
@@ -2285,9 +2235,6 @@ int tcp_disconnect(struct sock *sk, int flags)
__skb_queue_purge(&sk->sk_receive_queue);
tcp_write_queue_purge(sk);
__skb_queue_purge(&tp->out_of_order_queue);
-#ifdef CONFIG_NET_DMA
- __skb_queue_purge(&sk->sk_async_wait_queue);
-#endif
inet->inet_dport = 0;
@@ -2296,7 +2243,7 @@ int tcp_disconnect(struct sock *sk, int flags)
sk->sk_shutdown = 0;
sock_reset_flag(sk, SOCK_DONE);
- tp->srtt = 0;
+ tp->srtt_us = 0;
if ((tp->write_seq += tp->max_window + 2) == 0)
tp->write_seq = 1;
icsk->icsk_backoff = 0;
@@ -2627,7 +2574,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
#endif
case TCP_USER_TIMEOUT:
- /* Cap the max timeout in ms TCP will retry/retrans
+ /* Cap the max time in ms TCP will retry or probe the window
* before giving up and aborting (ETIMEDOUT) a connection.
*/
if (val < 0)
@@ -2740,8 +2687,8 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
- info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3;
- info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2;
+ info->tcpi_rtt = tp->srtt_us >> 3;
+ info->tcpi_rttvar = tp->mdev_us >> 2;
info->tcpi_snd_ssthresh = tp->snd_ssthresh;
info->tcpi_snd_cwnd = tp->snd_cwnd;
info->tcpi_advmss = tp->advmss;
@@ -2751,6 +2698,11 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
info->tcpi_rcv_space = tp->rcvq_space.space;
info->tcpi_total_retrans = tp->total_retrans;
+
+ info->tcpi_pacing_rate = sk->sk_pacing_rate != ~0U ?
+ sk->sk_pacing_rate : ~0ULL;
+ info->tcpi_max_pacing_rate = sk->sk_max_pacing_rate != ~0U ?
+ sk->sk_max_pacing_rate : ~0ULL;
}
EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -2866,6 +2818,14 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
case TCP_USER_TIMEOUT:
val = jiffies_to_msecs(icsk->icsk_user_timeout);
break;
+
+ case TCP_FASTOPEN:
+ if (icsk->icsk_accept_queue.fastopenq != NULL)
+ val = icsk->icsk_accept_queue.fastopenq->max_qlen;
+ else
+ val = 0;
+ break;
+
case TCP_TIMESTAMP:
val = tcp_time_stamp + tp->tsoffset;
break;
@@ -3093,7 +3053,7 @@ static int __init set_thash_entries(char *str)
}
__setup("thash_entries=", set_thash_entries);
-static void tcp_init_mem(void)
+static void __init tcp_init_mem(void)
{
unsigned long limit = nr_free_buffer_pages() / 8;
limit = max(limit, 128UL);
@@ -3111,8 +3071,8 @@ void __init tcp_init(void)
BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
- percpu_counter_init(&tcp_sockets_allocated, 0);
- percpu_counter_init(&tcp_orphan_count, 0);
+ percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
+ percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
tcp_hashinfo.bind_bucket_cachep =
kmem_cache_create("tcp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
@@ -3179,8 +3139,6 @@ void __init tcp_init(void)
tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
tcp_metrics_init();
-
- tcp_register_congestion_control(&tcp_reno);
-
+ BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0);
tcp_tasklet_init();
}