From b0dd663b60944a3ce86430fa35549fb37968bda0 Mon Sep 17 00:00:00 2001 From: Sonic Zhang Date: Wed, 11 Sep 2013 11:31:53 +0800 Subject: netpoll: Should handle ETH_P_ARP other than ETH_P_IP in netpoll_neigh_reply The received ARP request type in the Ethernet packet head is ETH_P_ARP other than ETH_P_IP. [ Bug introduced by commit b7394d2429c198b1da3d46ac39192e891029ec0f ("netpoll: prepare for ipv6") ] Signed-off-by: Sonic Zhang Signed-off-by: David S. Miller --- net/core/netpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 2c637e9a0b27..c3c7b27c112d 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -550,7 +550,7 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo return; proto = ntohs(eth_hdr(skb)->h_proto); - if (proto == ETH_P_IP) { + if (proto == ETH_P_ARP) { struct arphdr *arp; unsigned char *arp_ptr; /* No arp on this interface */ -- cgit v1.2.3 From d0fe8c888b1fd1a2f84b9962cabcb98a70988aec Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 19 Sep 2013 15:02:35 +0200 Subject: netpoll: fix NULL pointer dereference in netpoll_cleanup I've been hitting a NULL ptr deref while using netconsole because the np->dev check and the pointer manipulation in netpoll_cleanup are done without rtnl and the following sequence happens when having a netconsole over a vlan and we remove the vlan while disabling the netconsole: CPU 1 CPU2 removes vlan and calls the notifier enters store_enabled(), calls netdev_cleanup which checks np->dev and then waits for rtnl executes the netconsole netdev release notifier making np->dev == NULL and releases rtnl continues to dereference a member of np->dev which at this point is == NULL Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/core/netpoll.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net/core') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index c3c7b27c112d..fc75c9e461b8 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -1284,15 +1284,14 @@ EXPORT_SYMBOL_GPL(__netpoll_free_async); void netpoll_cleanup(struct netpoll *np) { - if (!np->dev) - return; - rtnl_lock(); + if (!np->dev) + goto out; __netpoll_cleanup(np); - rtnl_unlock(); - dev_put(np->dev); np->dev = NULL; +out: + rtnl_unlock(); } EXPORT_SYMBOL(netpoll_cleanup); -- cgit v1.2.3 From 50624c934db18ab90aaea4908f60dd39aab4e6e5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 23 Sep 2013 21:19:49 -0700 Subject: net: Delay default_device_exit_batch until no devices are unregistering v2 There is currently serialization network namespaces exiting and network devices exiting as the final part of netdev_run_todo does not happen under the rtnl_lock. This is compounded by the fact that the only list of devices unregistering in netdev_run_todo is local to the netdev_run_todo. This lack of serialization in extreme cases results in network devices unregistering in netdev_run_todo after the loopback device of their network namespace has been freed (making dst_ifdown unsafe), and after the their network namespace has exited (making the NETDEV_UNREGISTER, and NETDEV_UNREGISTER_FINAL callbacks unsafe). Add the missing serialization by a per network namespace count of how many network devices are unregistering and having a wait queue that is woken up whenever the count is decreased. The count and wait queue allow default_device_exit_batch to wait until all of the unregistration activity for a network namespace has finished before proceeding to unregister the loopback device and then allowing the network namespace to exit. Only a single global wait queue is used because there is a single global lock, and there is a single waiter, per network namespace wait queues would be a waste of resources. The per network namespace count of unregistering devices gives a progress guarantee because the number of network devices unregistering in an exiting network namespace must ultimately drop to zero (assuming network device unregistration completes). The basic logic remains the same as in v1. This patch is now half comment and half rtnl_lock_unregistering an expanded version of wait_event performs no extra work in the common case where no network devices are unregistering when we get to default_device_exit_batch. Reported-by: Francesco Ruggeri Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/dev.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 5c713f2239cc..65f829cfd928 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5247,10 +5247,12 @@ static int dev_new_index(struct net *net) /* Delayed registration/unregisteration */ static LIST_HEAD(net_todo_list); +static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); static void net_set_todo(struct net_device *dev) { list_add_tail(&dev->todo_list, &net_todo_list); + dev_net(dev)->dev_unreg_count++; } static void rollback_registered_many(struct list_head *head) @@ -5918,6 +5920,12 @@ void netdev_run_todo(void) if (dev->destructor) dev->destructor(dev); + /* Report a network device has been unregistered */ + rtnl_lock(); + dev_net(dev)->dev_unreg_count--; + __rtnl_unlock(); + wake_up(&netdev_unregistering_wq); + /* Free network device */ kobject_put(&dev->dev.kobj); } @@ -6603,6 +6611,34 @@ static void __net_exit default_device_exit(struct net *net) rtnl_unlock(); } +static void __net_exit rtnl_lock_unregistering(struct list_head *net_list) +{ + /* Return with the rtnl_lock held when there are no network + * devices unregistering in any network namespace in net_list. + */ + struct net *net; + bool unregistering; + DEFINE_WAIT(wait); + + for (;;) { + prepare_to_wait(&netdev_unregistering_wq, &wait, + TASK_UNINTERRUPTIBLE); + unregistering = false; + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) { + if (net->dev_unreg_count > 0) { + unregistering = true; + break; + } + } + if (!unregistering) + break; + __rtnl_unlock(); + schedule(); + } + finish_wait(&netdev_unregistering_wq, &wait); +} + static void __net_exit default_device_exit_batch(struct list_head *net_list) { /* At exit all network devices most be removed from a network @@ -6614,7 +6650,18 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) struct net *net; LIST_HEAD(dev_kill_list); - rtnl_lock(); + /* To prevent network device cleanup code from dereferencing + * loopback devices or network devices that have been freed + * wait here for all pending unregistrations to complete, + * before unregistring the loopback device and allowing the + * network namespace be freed. + * + * The netdev todo list containing all network devices + * unregistrations that happen in default_device_exit_batch + * will run in the rtnl_unlock() at the end of + * default_device_exit_batch. + */ + rtnl_lock_unregistering(net_list); list_for_each_entry(net, net_list, exit_list) { for_each_netdev_reverse(net, dev) { if (dev->rtnl_link_ops) -- cgit v1.2.3 From 9a3bab6b05383f1e4c3716b3615500c51285959e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Sep 2013 06:19:57 -0700 Subject: net: net_secret should not depend on TCP A host might need net_secret[] and never open a single socket. Problem added in commit aebda156a570782 ("net: defer net_secret[] initialization") Based on prior patch from Hannes Frederic Sowa. Reported-by: Hannes Frederic Sowa Signed-off-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/core/secure_seq.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 6a2f13cee86a..3f1ec1586ae1 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -10,11 +10,24 @@ #include -static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; +#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4) -void net_secret_init(void) +static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned; + +static void net_secret_init(void) { - get_random_bytes(net_secret, sizeof(net_secret)); + u32 tmp; + int i; + + if (likely(net_secret[0])) + return; + + for (i = NET_SECRET_SIZE; i > 0;) { + do { + get_random_bytes(&tmp, sizeof(tmp)); + } while (!tmp); + cmpxchg(&net_secret[--i], 0, tmp); + } } #ifdef CONFIG_INET @@ -42,6 +55,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, u32 hash[MD5_DIGEST_WORDS]; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + (__force u32)daddr[i]; @@ -63,6 +77,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, u32 hash[MD5_DIGEST_WORDS]; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + (__force u32) daddr[i]; @@ -82,6 +97,7 @@ __u32 secure_ip_id(__be32 daddr) { u32 hash[MD5_DIGEST_WORDS]; + net_secret_init(); hash[0] = (__force __u32) daddr; hash[1] = net_secret[13]; hash[2] = net_secret[14]; @@ -96,6 +112,7 @@ __u32 secure_ipv6_id(const __be32 daddr[4]) { __u32 hash[4]; + net_secret_init(); memcpy(hash, daddr, 16); md5_transform(hash, net_secret); @@ -107,6 +124,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, { u32 hash[MD5_DIGEST_WORDS]; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = ((__force u16)sport << 16) + (__force u16)dport; @@ -121,6 +139,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { u32 hash[MD5_DIGEST_WORDS]; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = (__force u32)dport ^ net_secret[14]; @@ -140,6 +159,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, u32 hash[MD5_DIGEST_WORDS]; u64 seq; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = ((__force u16)sport << 16) + (__force u16)dport; @@ -164,6 +184,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, u64 seq; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + daddr[i]; -- cgit v1.2.3 From b86783587b3d1d552326d955acee37eac48800f1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Sep 2013 08:44:06 -0700 Subject: net: flow_dissector: fix thoff for IPPROTO_AH In commit 8ed781668dd49 ("flow_keys: include thoff into flow_keys for later usage"), we missed that existing code was using nhoff as a temporary variable that could not always contain transport header offset. This is not a problem for TCP/UDP because port offset (@poff) is 0 for these protocols. Signed-off-by: Eric Dumazet Cc: Daniel Borkmann Cc: Nikolay Aleksandrov Acked-by: Nikolay Aleksandrov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 1929af87b260..8d7d0dd72db2 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -154,8 +154,8 @@ ipv6: if (poff >= 0) { __be32 *ports, _ports; - nhoff += poff; - ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports); + ports = skb_header_pointer(skb, nhoff + poff, + sizeof(_ports), &_ports); if (ports) flow->ports = *ports; } -- cgit v1.2.3