diff options
| author | David Ahern <dsa@cumulusnetworks.com> | 2016-04-07 11:10:06 -0700 | 
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2020-05-10 10:26:22 +0200 | 
| commit | 77ef57530f1140ce60b31125359819a185a68c58 (patch) | |
| tree | 6318a79135268864fa4643cb9a07fbcc9558f6a5 | |
| parent | 396250c2041a9190e1d8199dfb2598c461de4343 (diff) | |
net: vrf: Fix dst reference counting
commit 9ab179d83b4e31ea277a123492e419067c2f129a upstream.
Vivek reported a kernel exception deleting a VRF with an active
connection through it. The root cause is that the socket has a cached
reference to a dst that is destroyed. Converting the dst_destroy to
dst_release and letting proper reference counting kick in does not
work as the dst has a reference to the device which needs to be released
as well.
I talked to Hannes about this at netdev and he pointed out the ipv4 and
ipv6 dst handling has dst_ifdown for just this scenario. Rather than
continuing with the reinvented dst wheel in VRF just remove it and
leverage the ipv4 and ipv6 versions.
Fixes: 193125dbd8eb2 ("net: Introduce VRF device driver")
Fixes: 35402e3136634 ("net: Add IPv6 support to VRF device")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
| -rw-r--r-- | drivers/net/vrf.c | 177 | ||||
| -rw-r--r-- | include/net/ip6_route.h | 3 | ||||
| -rw-r--r-- | include/net/route.h | 3 | ||||
| -rw-r--r-- | net/ipv4/route.c | 7 | ||||
| -rw-r--r-- | net/ipv6/route.c | 7 | 
5 files changed, 30 insertions, 167 deletions
| diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index d3d59122a357..27fd5640a273 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -71,41 +71,6 @@ struct pcpu_dstats {  	struct u64_stats_sync	syncp;  }; -static struct dst_entry *vrf_ip_check(struct dst_entry *dst, u32 cookie) -{ -	return dst; -} - -static int vrf_ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) -{ -	return ip_local_out(net, sk, skb); -} - -static unsigned int vrf_v4_mtu(const struct dst_entry *dst) -{ -	/* TO-DO: return max ethernet size? */ -	return dst->dev->mtu; -} - -static void vrf_dst_destroy(struct dst_entry *dst) -{ -	/* our dst lives forever - or until the device is closed */ -} - -static unsigned int vrf_default_advmss(const struct dst_entry *dst) -{ -	return 65535 - 40; -} - -static struct dst_ops vrf_dst_ops = { -	.family		= AF_INET, -	.local_out	= vrf_ip_local_out, -	.check		= vrf_ip_check, -	.mtu		= vrf_v4_mtu, -	.destroy	= vrf_dst_destroy, -	.default_advmss	= vrf_default_advmss, -}; -  /* neighbor handling is done with actual device; do not want   * to flip skb->dev for those ndisc packets. This really fails   * for multiple next protocols (e.g., NEXTHDR_HOP). But it is @@ -363,46 +328,6 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)  }  #if IS_ENABLED(CONFIG_IPV6) -static struct dst_entry *vrf_ip6_check(struct dst_entry *dst, u32 cookie) -{ -	return dst; -} - -static struct dst_ops vrf_dst_ops6 = { -	.family		= AF_INET6, -	.local_out	= ip6_local_out, -	.check		= vrf_ip6_check, -	.mtu		= vrf_v4_mtu, -	.destroy	= vrf_dst_destroy, -	.default_advmss	= vrf_default_advmss, -}; - -static int init_dst_ops6_kmem_cachep(void) -{ -	vrf_dst_ops6.kmem_cachep = kmem_cache_create("vrf_ip6_dst_cache", -						     sizeof(struct rt6_info), -						     0, -						     SLAB_HWCACHE_ALIGN, -						     NULL); - -	if (!vrf_dst_ops6.kmem_cachep) -		return -ENOMEM; - -	return 0; -} - -static void free_dst_ops6_kmem_cachep(void) -{ -	kmem_cache_destroy(vrf_dst_ops6.kmem_cachep); -} - -static int vrf_input6(struct sk_buff *skb) -{ -	skb->dev->stats.rx_errors++; -	kfree_skb(skb); -	return 0; -} -  /* modelled after ip6_finish_output2 */  static int vrf_finish_output6(struct net *net, struct sock *sk,  			      struct sk_buff *skb) @@ -445,67 +370,34 @@ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb)  			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));  } -static void vrf_rt6_destroy(struct net_vrf *vrf) +static void vrf_rt6_release(struct net_vrf *vrf)  { -	dst_destroy(&vrf->rt6->dst); -	free_percpu(vrf->rt6->rt6i_pcpu); +	dst_release(&vrf->rt6->dst);  	vrf->rt6 = NULL;  }  static int vrf_rt6_create(struct net_device *dev)  {  	struct net_vrf *vrf = netdev_priv(dev); -	struct dst_entry *dst; +	struct net *net = dev_net(dev);  	struct rt6_info *rt6; -	int cpu;  	int rc = -ENOMEM; -	rt6 = dst_alloc(&vrf_dst_ops6, dev, 0, -			DST_OBSOLETE_NONE, -			(DST_HOST | DST_NOPOLICY | DST_NOXFRM)); +	rt6 = ip6_dst_alloc(net, dev, +			    DST_HOST | DST_NOPOLICY | DST_NOXFRM | DST_NOCACHE);  	if (!rt6)  		goto out; -	dst = &rt6->dst; - -	rt6->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_KERNEL); -	if (!rt6->rt6i_pcpu) { -		dst_destroy(dst); -		goto out; -	} -	for_each_possible_cpu(cpu) { -		struct rt6_info **p = per_cpu_ptr(rt6->rt6i_pcpu, cpu); -		*p =  NULL; -	} - -	memset(dst + 1, 0, sizeof(*rt6) - sizeof(*dst)); - -	INIT_LIST_HEAD(&rt6->rt6i_siblings); -	INIT_LIST_HEAD(&rt6->rt6i_uncached); - -	rt6->dst.input	= vrf_input6;  	rt6->dst.output	= vrf_output6; - -	rt6->rt6i_table = fib6_get_table(dev_net(dev), vrf->tb_id); - -	atomic_set(&rt6->dst.__refcnt, 2); - +	rt6->rt6i_table = fib6_get_table(net, vrf->tb_id); +	dst_hold(&rt6->dst);  	vrf->rt6 = rt6;  	rc = 0;  out:  	return rc;  }  #else -static int init_dst_ops6_kmem_cachep(void) -{ -	return 0; -} - -static void free_dst_ops6_kmem_cachep(void) -{ -} - -static void vrf_rt6_destroy(struct net_vrf *vrf) +static void vrf_rt6_release(struct net_vrf *vrf)  {  } @@ -577,11 +469,11 @@ static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)  			    !(IPCB(skb)->flags & IPSKB_REROUTED));  } -static void vrf_rtable_destroy(struct net_vrf *vrf) +static void vrf_rtable_release(struct net_vrf *vrf)  {  	struct dst_entry *dst = (struct dst_entry *)vrf->rth; -	dst_destroy(dst); +	dst_release(dst);  	vrf->rth = NULL;  } @@ -590,22 +482,10 @@ static struct rtable *vrf_rtable_create(struct net_device *dev)  	struct net_vrf *vrf = netdev_priv(dev);  	struct rtable *rth; -	rth = dst_alloc(&vrf_dst_ops, dev, 2, -			DST_OBSOLETE_NONE, -			(DST_HOST | DST_NOPOLICY | DST_NOXFRM)); +	rth = rt_dst_alloc(dev, 0, RTN_UNICAST, 1, 1, 0);  	if (rth) {  		rth->dst.output	= vrf_output; -		rth->rt_genid	= rt_genid_ipv4(dev_net(dev)); -		rth->rt_flags	= 0; -		rth->rt_type	= RTN_UNICAST; -		rth->rt_is_input = 0; -		rth->rt_iif	= 0; -		rth->rt_pmtu	= 0; -		rth->rt_gateway	= 0; -		rth->rt_uses_gateway = 0;  		rth->rt_table_id = vrf->tb_id; -		INIT_LIST_HEAD(&rth->rt_uncached); -		rth->rt_uncached_list = NULL;  	}  	return rth; @@ -739,8 +619,8 @@ static void vrf_dev_uninit(struct net_device *dev)  //	struct list_head *head = &queue->all_slaves;  //	struct slave *slave, *next; -	vrf_rtable_destroy(vrf); -	vrf_rt6_destroy(vrf); +	vrf_rtable_release(vrf); +	vrf_rt6_release(vrf);  //	list_for_each_entry_safe(slave, next, head, list)  //		vrf_del_slave(dev, slave->dev); @@ -772,7 +652,7 @@ static int vrf_dev_init(struct net_device *dev)  	return 0;  out_rth: -	vrf_rtable_destroy(vrf); +	vrf_rtable_release(vrf);  out_stats:  	free_percpu(dev->dstats);  	dev->dstats = NULL; @@ -805,7 +685,7 @@ static struct rtable *vrf_get_rtable(const struct net_device *dev,  		struct net_vrf *vrf = netdev_priv(dev);  		rth = vrf->rth; -		atomic_inc(&rth->dst.__refcnt); +		dst_hold(&rth->dst);  	}  	return rth; @@ -856,7 +736,7 @@ static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,  		struct net_vrf *vrf = netdev_priv(dev);  		rt = vrf->rt6; -		atomic_inc(&rt->dst.__refcnt); +		dst_hold(&rt->dst);  	}  	return (struct dst_entry *)rt; @@ -1003,19 +883,6 @@ static int __init vrf_init_module(void)  {  	int rc; -	vrf_dst_ops.kmem_cachep = -		kmem_cache_create("vrf_ip_dst_cache", -				  sizeof(struct rtable), 0, -				  SLAB_HWCACHE_ALIGN, -				  NULL); - -	if (!vrf_dst_ops.kmem_cachep) -		return -ENOMEM; - -	rc = init_dst_ops6_kmem_cachep(); -	if (rc != 0) -		goto error2; -  	register_netdevice_notifier(&vrf_notifier_block);  	rc = rtnl_link_register(&vrf_link_ops); @@ -1026,22 +893,10 @@ static int __init vrf_init_module(void)  error:  	unregister_netdevice_notifier(&vrf_notifier_block); -	free_dst_ops6_kmem_cachep(); -error2: -	kmem_cache_destroy(vrf_dst_ops.kmem_cachep);  	return rc;  } -static void __exit vrf_cleanup_module(void) -{ -	rtnl_link_unregister(&vrf_link_ops); -	unregister_netdevice_notifier(&vrf_notifier_block); -	kmem_cache_destroy(vrf_dst_ops.kmem_cachep); -	free_dst_ops6_kmem_cachep(); -} -  module_init(vrf_init_module); -module_exit(vrf_cleanup_module);  MODULE_AUTHOR("Shrijeet Mukherjee, David Ahern");  MODULE_DESCRIPTION("Device driver to instantiate VRF domains");  MODULE_LICENSE("GPL"); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index df6474c37ca0..8d0a9b1fc39a 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -103,6 +103,9 @@ void fib6_force_start_gc(struct net *net);  struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,  				    const struct in6_addr *addr, bool anycast); +struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, +			       int flags); +  /*   *	support functions for ND   * diff --git a/include/net/route.h b/include/net/route.h index d2a92d94ff72..6be55d00a200 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -210,6 +210,9 @@ unsigned int inet_addr_type_dev_table(struct net *net,  void ip_rt_multicast_event(struct in_device *);  int ip_rt_ioctl(struct net *, unsigned int cmd, void __user *arg);  void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt); +struct rtable *rt_dst_alloc(struct net_device *dev, +			     unsigned int flags, u16 type, +			     bool nopolicy, bool noxfrm, bool will_cache);  struct in_ifaddr;  void fib_add_ifaddr(struct in_ifaddr *); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d76752a28bfa..29a87fadf01b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1500,9 +1500,9 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,  #endif  } -static struct rtable *rt_dst_alloc(struct net_device *dev, -				   unsigned int flags, u16 type, -				   bool nopolicy, bool noxfrm, bool will_cache) +struct rtable *rt_dst_alloc(struct net_device *dev, +			    unsigned int flags, u16 type, +			    bool nopolicy, bool noxfrm, bool will_cache)  {  	struct rtable *rt; @@ -1531,6 +1531,7 @@ static struct rtable *rt_dst_alloc(struct net_device *dev,  	return rt;  } +EXPORT_SYMBOL(rt_dst_alloc);  /* called in rcu_read_lock() section */  static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 86550ee5da32..50eba77f5a0d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -339,9 +339,9 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,  	return rt;  } -static struct rt6_info *ip6_dst_alloc(struct net *net, -				      struct net_device *dev, -				      int flags) +struct rt6_info *ip6_dst_alloc(struct net *net, +			       struct net_device *dev, +			       int flags)  {  	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags); @@ -365,6 +365,7 @@ static struct rt6_info *ip6_dst_alloc(struct net *net,  	return rt;  } +EXPORT_SYMBOL(ip6_dst_alloc);  static void ip6_dst_destroy(struct dst_entry *dst)  { | 
