From 38f0ec724f5306c81130ca9343c856aa37a76d54 Mon Sep 17 00:00:00 2001 From: Robert Love Date: Mon, 12 May 2008 17:08:29 -0400 Subject: net: socket ioctl to reset connections matching local address Introduce a new socket ioctl, SIOCKILLADDR, that nukes all sockets bound to the same local address. This is useful in situations with dynamic IPs, to kill stuck connections. Signed-off-by: Brian Swetland net: fix tcp_v4_nuke_addr Signed-off-by: Dima Zavin net: ipv4: Fix a spinlock recursion bug in tcp_v4_nuke. We can't hold the lock while calling to tcp_done(), so we drop it before calling. We then have to start at the top of the chain again. Signed-off-by: Dima Zavin net: ipv4: Fix race in tcp_v4_nuke_addr(). To fix a recursive deadlock in 2.6.29, we stopped holding the hash table lock across tcp_done() calls. This fixed the deadlock, but introduced a race where the socket could die or change state. Fix: Before unlocking the hash table, we grab a reference to the socket. We can then unlock the hash table without risk of the socket going away. We then lock the socket, which is safe because it is pinned. We can then call tcp_done() without recursive deadlock and without race. Upon return, we unlock the socket and then unpin it, killing it. Change-Id: Idcdae072b48238b01bdbc8823b60310f1976e045 Signed-off-by: Robert Love Acked-by: Dima Zavin ipv4: disable bottom halves around call to tcp_done(). Signed-off-by: Robert Love Signed-off-by: Colin Cross ipv4: Move sk_error_report inside bh_lock_sock in tcp_v4_nuke_addr When sk_error_report is called, it wakes up the user-space thread, which then calls tcp_close. When the tcp_close is interrupted by the tcp_v4_nuke_addr ioctl thread running tcp_done, it leaks 392 bytes and triggers a WARN_ON. This patch moves the call to sk_error_report inside the bh_lock_sock, which matches the locking used in tcp_v4_err. Signed-off-by: Colin Cross --- net/ipv4/tcp.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c82cca18c90f..f94bc2cf50d3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -275,6 +275,9 @@ #include #include #include +#include +#include +#include #include #include @@ -3187,3 +3190,107 @@ void __init tcp_init(void) BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0); tcp_tasklet_init(); } + +static int tcp_is_local(struct net *net, __be32 addr) { + struct rtable *rt; + struct flowi4 fl4 = { .daddr = addr }; + rt = ip_route_output_key(net, &fl4); + if (IS_ERR_OR_NULL(rt)) + return 0; + return rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static int tcp_is_local6(struct net *net, struct in6_addr *addr) { + struct rt6_info *rt6 = rt6_lookup(net, addr, addr, 0, 0); + return rt6 && rt6->dst.dev && (rt6->dst.dev->flags & IFF_LOOPBACK); +} +#endif + +/* + * tcp_nuke_addr - destroy all sockets on the given local address + * if local address is the unspecified address (0.0.0.0 or ::), destroy all + * sockets with local addresses that are not configured. + */ +int tcp_nuke_addr(struct net *net, struct sockaddr *addr) +{ + int family = addr->sa_family; + unsigned int bucket; + + struct in_addr *in; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct in6_addr *in6; +#endif + if (family == AF_INET) { + in = &((struct sockaddr_in *)addr)->sin_addr; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (family == AF_INET6) { + in6 = &((struct sockaddr_in6 *)addr)->sin6_addr; +#endif + } else { + return -EAFNOSUPPORT; + } + + for (bucket = 0; bucket < tcp_hashinfo.ehash_mask; bucket++) { + struct hlist_nulls_node *node; + struct sock *sk; + spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, bucket); + +restart: + spin_lock_bh(lock); + sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[bucket].chain) { + struct inet_sock *inet = inet_sk(sk); + + if (sysctl_ip_dynaddr && sk->sk_state == TCP_SYN_SENT) + continue; + if (sock_flag(sk, SOCK_DEAD)) + continue; + + if (family == AF_INET) { + __be32 s4 = inet->inet_rcv_saddr; + if (s4 == LOOPBACK4_IPV6) + continue; + + if (in->s_addr != s4 && + !(in->s_addr == INADDR_ANY && + !tcp_is_local(net, s4))) + continue; + } + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + if (family == AF_INET6) { + struct in6_addr *s6; + if (!inet->pinet6) + continue; + + s6 = &inet->pinet6->rcv_saddr; + if (ipv6_addr_type(s6) == IPV6_ADDR_MAPPED) + continue; + + if (!ipv6_addr_equal(in6, s6) && + !(ipv6_addr_equal(in6, &in6addr_any) && + !tcp_is_local6(net, s6))) + continue; + } +#endif + + sock_hold(sk); + spin_unlock_bh(lock); + + local_bh_disable(); + bh_lock_sock(sk); + sk->sk_err = ETIMEDOUT; + sk->sk_error_report(sk); + + tcp_done(sk); + bh_unlock_sock(sk); + local_bh_enable(); + sock_put(sk); + + goto restart; + } + spin_unlock_bh(lock); + } + + return 0; +} -- cgit v1.2.3 From 4747299b2c8e8778927b3df0501023d76fe4f2d5 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 9 Jul 2015 17:17:57 -0700 Subject: net: fix iterating over hashtable in tcp_nuke_addr() The actual size of the tcp hashinfo table is tcp_hashinfo.ehash_mask + 1 so we need to adjust the loop accordingly to get the sockets hashed into the last bucket. Change-Id: I796b3c7b4a1a7fa35fba9e5192a4a403eb6e17de Signed-off-by: Dmitry Torokhov --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f94bc2cf50d3..c8cfe784c79a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3231,7 +3231,7 @@ int tcp_nuke_addr(struct net *net, struct sockaddr *addr) return -EAFNOSUPPORT; } - for (bucket = 0; bucket < tcp_hashinfo.ehash_mask; bucket++) { + for (bucket = 0; bucket <= tcp_hashinfo.ehash_mask; bucket++) { struct hlist_nulls_node *node; struct sock *sk; spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, bucket); -- cgit v1.2.3 From 08f7c4280cd5efe9e274240c42177f459431bac2 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 3 Sep 2015 13:08:37 -0700 Subject: net: fix crash in tcp_nuke_addr() When iterating through sockets we need to skip sockets in TIME_WAIT state as they use lightweight structure inet_timewait_sock that does not have sk_lock member, and if we try to lock them we'll crash thusly: [ 89.376383] BUG: spinlock lockup suspected on CPU#0, netd/431 [ 89.382139] lock: 0xffffffc039d05070, .magic: 66d30606, .owner: /-1682098992, .owner_cpu: 0 [ 89.390598] CPU: 0 PID: 431 Comm: netd Tainted: G U W 3.18.0 #5 [ 89.397389] Hardware name: Google Tegra210 Smaug Rev 1+ (DT) [ 89.403049] Call trace: [ 89.405501] [] dump_backtrace+0x0/0x10c [ 89.410918] [] show_stack+0x10/0x1c [ 89.415971] [] dump_stack+0x74/0x94 [ 89.421018] [] spin_dump+0x78/0x88 [ 89.425984] [] do_raw_spin_lock+0xfc/0x158 [ 89.431666] [] _raw_spin_lock+0x34/0x44 [ 89.437059] [] tcp_nuke_addr+0x1fc/0x29c [ 89.442548] [] devinet_ioctl+0x288/0x680 [ 89.448053] [] inet_ioctl+0xc4/0xf4 [ 89.453103] [] sock_do_ioctl+0x2c/0x5c [ 89.458408] [] sock_ioctl+0x210/0x230 [ 89.463633] [] do_vfs_ioctl+0x4ac/0x590 [ 89.469049] [] SyS_ioctl+0x5c/0x88 (or with NULL pointer dereference if lockdep is still working). Change-Id: I07c70d9a60b125b1070ff05c4eec27daee1a3e90 Signed-off-by: Dmitry Torokhov --- net/ipv4/tcp.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c8cfe784c79a..47b147ca69d1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3241,8 +3241,19 @@ restart: sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[bucket].chain) { struct inet_sock *inet = inet_sk(sk); + if (sk->sk_state == TCP_TIME_WAIT) { + /* + * Sockets that are in TIME_WAIT state are + * instances of lightweight inet_timewait_sock, + * we should simply skip them (or we'll try to + * access non-existing fields and crash). + */ + continue; + } + if (sysctl_ip_dynaddr && sk->sk_state == TCP_SYN_SENT) continue; + if (sock_flag(sk, SOCK_DEAD)) continue; -- cgit v1.2.3 From 6b6d5fbf9ae567aefb58099a30bbb6d25fa8925b Mon Sep 17 00:00:00 2001 From: Mike Chan Date: Wed, 7 Jan 2009 11:40:42 -0800 Subject: misc: uidstat: Adding uid stat driver to collect network statistics. Signed-off-by: Mike Chan --- net/ipv4/tcp.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 47b147ca69d1..cab28756a96b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -269,6 +269,7 @@ #include #include #include +#include #include #include @@ -1286,6 +1287,9 @@ out: tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); out_nopush: release_sock(sk); + + if (copied + copied_syn) + uid_stat_tcp_snd(current_uid(), copied + copied_syn); return copied + copied_syn; do_fault: @@ -1560,6 +1564,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, if (copied > 0) { tcp_recv_skb(sk, seq, &offset); tcp_cleanup_rbuf(sk, copied); + uid_stat_tcp_rcv(current_uid(), copied); } return copied; } @@ -1893,6 +1898,9 @@ skip_copy: tcp_cleanup_rbuf(sk, copied); release_sock(sk); + + if (copied > 0) + uid_stat_tcp_rcv(current_uid(), copied); return copied; out: @@ -1901,6 +1909,8 @@ out: recv_urg: err = tcp_recv_urg(sk, msg, len, flags); + if (err > 0) + uid_stat_tcp_rcv(current_uid(), err); goto out; recv_sndq: -- cgit v1.2.3 From 8a8ad1cf6587b127429ed180788cfe5137a53a23 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 28 Mar 2014 12:19:27 -0700 Subject: net: kuid/kguid build fixes Small build fixes for xt_quota2 and ipv4 changes Change-Id: Ib098768040c8875887b2081c3165a6c83b37e180 Signed-off-by: John Stultz --- net/ipv4/tcp.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cab28756a96b..ced1683b2f3b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1289,7 +1289,8 @@ out_nopush: release_sock(sk); if (copied + copied_syn) - uid_stat_tcp_snd(current_uid(), copied + copied_syn); + uid_stat_tcp_snd(from_kuid(&init_user_ns, current_uid()), + copied + copied_syn); return copied + copied_syn; do_fault: @@ -1564,7 +1565,8 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, if (copied > 0) { tcp_recv_skb(sk, seq, &offset); tcp_cleanup_rbuf(sk, copied); - uid_stat_tcp_rcv(current_uid(), copied); + uid_stat_tcp_rcv(from_kuid(&init_user_ns, current_uid()), + copied); } return copied; } @@ -1900,7 +1902,8 @@ skip_copy: release_sock(sk); if (copied > 0) - uid_stat_tcp_rcv(current_uid(), copied); + uid_stat_tcp_rcv(from_kuid(&init_user_ns, current_uid()), + copied); return copied; out: @@ -1910,7 +1913,8 @@ out: recv_urg: err = tcp_recv_urg(sk, msg, len, flags); if (err > 0) - uid_stat_tcp_rcv(current_uid(), err); + uid_stat_tcp_rcv(from_kuid(&init_user_ns, current_uid()), + err); goto out; recv_sndq: @@ -3229,7 +3233,7 @@ int tcp_nuke_addr(struct net *net, struct sockaddr *addr) struct in_addr *in; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct in6_addr *in6; + struct in6_addr *in6 = NULL; #endif if (family == AF_INET) { in = &((struct sockaddr_in *)addr)->sin_addr; @@ -3281,10 +3285,8 @@ restart: #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (family == AF_INET6) { struct in6_addr *s6; - if (!inet->pinet6) - continue; - s6 = &inet->pinet6->rcv_saddr; + s6 = &sk->sk_v6_rcv_saddr; if (ipv6_addr_type(s6) == IPV6_ADDR_MAPPED) continue; -- cgit v1.2.3 From 8bf4413b4f54e24120b90ecbfee426beeddc3ff0 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 28 Oct 2015 15:56:59 +0900 Subject: Don't kill IPv4 sockets when killing IPv6 sockets was requested. c7c3ec4903d32c60423ee013d96e94602f66042c cherry-picked the tcp_nuke_addr ioctl, but omitted a check that ensures that a socket is an IPv6 socket. This makes it so that if we issue a SIOCKILLADDR on ::, it kills IPv4 sockets as well. This is because every IPv4 socket has an IPv6 source address (sk_v6_rcv_saddr) of ::. Thus, when we iterate over an IPv4 socket, and compare the source address of the socket to the source address in the ioctl, it matches the :: that was passed in, and we kill the socket. Change-Id: I736431a898e6ec91536536d352936a210aa10100 --- net/ipv4/tcp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ced1683b2f3b..65dc38a429ae 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3285,6 +3285,8 @@ restart: #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (family == AF_INET6) { struct in6_addr *s6; + if (!inet->pinet6) + continue; s6 = &sk->sk_v6_rcv_saddr; if (ipv6_addr_type(s6) == IPV6_ADDR_MAPPED) -- cgit v1.2.3 From 3823c8136f2170b3ac5e6a5f8b857746a786e845 Mon Sep 17 00:00:00 2001 From: Tushar Behera Date: Wed, 26 Mar 2014 15:27:05 +0530 Subject: tcp: Fix IPV6 module build errors If CONFIG_IPV6=m is selected, we are getting following build errors. net/built-in.o: In function `tcp_is_local6': net/ipv4/tcp.c:3261: undefined reference to `rt6_lookup' Making the code conditional upon only CONFIG_IPV6=y fixes this issue. Also export tcp_nuke_addr to build IPv6 modules. Otherwise we run into following build error: CC [M] lib/zlib_deflate/deftree.o CC [M] lib/zlib_deflate/deflate_syms.o LD [M] lib/zlib_deflate/zlib_deflate.o Building modules, stage 2. MODPOST 46 modules ERROR: "tcp_nuke_addr" [net/ipv6/ipv6.ko] undefined! make[2]: *** [__modpost] Error 1 Signed-off-by: Tushar Behera CC: John Stultz Signed-off-by: John Stultz Signed-off-by: Amit Pundir --- net/ipv4/tcp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 65dc38a429ae..e8c126a52551 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3214,7 +3214,7 @@ static int tcp_is_local(struct net *net, __be32 addr) { return rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK); } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if defined(CONFIG_IPV6) static int tcp_is_local6(struct net *net, struct in6_addr *addr) { struct rt6_info *rt6 = rt6_lookup(net, addr, addr, 0, 0); return rt6 && rt6->dst.dev && (rt6->dst.dev->flags & IFF_LOOPBACK); @@ -3282,7 +3282,7 @@ restart: continue; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if defined(CONFIG_IPV6) if (family == AF_INET6) { struct in6_addr *s6; if (!inet->pinet6) @@ -3319,3 +3319,4 @@ restart: return 0; } +EXPORT_SYMBOL_GPL(tcp_nuke_addr); -- cgit v1.2.3