diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-12 18:07:07 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-12 18:07:07 -0800 |
| commit | 6be35c700f742e911ecedd07fcc43d4439922334 (patch) | |
| tree | ca9f37214d204465fcc2d79c82efd291e357c53c /drivers/net/tun.c | |
| parent | e37aa63e87bd581f9be5555ed0ba83f5295c92fc (diff) | |
| parent | 520dfe3a3645257bf83660f672c47f8558f3d4c4 (diff) | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David Miller:
1) Allow to dump, monitor, and change the bridge multicast database
using netlink. From Cong Wang.
2) RFC 5961 TCP blind data injection attack mitigation, from Eric
Dumazet.
3) Networking user namespace support from Eric W. Biederman.
4) tuntap/virtio-net multiqueue support by Jason Wang.
5) Support for checksum offload of encapsulated packets (basically,
tunneled traffic can still be checksummed by HW). From Joseph
Gasparakis.
6) Allow BPF filter access to VLAN tags, from Eric Dumazet and
Daniel Borkmann.
7) Bridge port parameters over netlink and BPDU blocking support
from Stephen Hemminger.
8) Improve data access patterns during inet socket demux by rearranging
socket layout, from Eric Dumazet.
9) TIPC protocol updates and cleanups from Ying Xue, Paul Gortmaker, and
Jon Maloy.
10) Update TCP socket hash sizing to be more in line with current day
realities. The existing heurstics were choosen a decade ago.
From Eric Dumazet.
11) Fix races, queue bloat, and excessive wakeups in ATM and
associated drivers, from Krzysztof Mazur and David Woodhouse.
12) Support DOVE (Distributed Overlay Virtual Ethernet) extensions
in VXLAN driver, from David Stevens.
13) Add "oops_only" mode to netconsole, from Amerigo Wang.
14) Support set and query of VEB/VEPA bridge mode via PF_BRIDGE, also
allow DCB netlink to work on namespaces other than the initial
namespace. From John Fastabend.
15) Support PTP in the Tigon3 driver, from Matt Carlson.
16) tun/vhost zero copy fixes and improvements, plus turn it on
by default, from Michael S. Tsirkin.
17) Support per-association statistics in SCTP, from Michele
Baldessari.
And many, many, driver updates, cleanups, and improvements. Too
numerous to mention individually.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1722 commits)
net/mlx4_en: Add support for destination MAC in steering rules
net/mlx4_en: Use generic etherdevice.h functions.
net: ethtool: Add destination MAC address to flow steering API
bridge: add support of adding and deleting mdb entries
bridge: notify mdb changes via netlink
ndisc: Unexport ndisc_{build,send}_skb().
uapi: add missing netconf.h to export list
pkt_sched: avoid requeues if possible
solos-pci: fix double-free of TX skb in DMA mode
bnx2: Fix accidental reversions.
bna: Driver Version Updated to 3.1.2.1
bna: Firmware update
bna: Add RX State
bna: Rx Page Based Allocation
bna: TX Intr Coalescing Fix
bna: Tx and Rx Optimizations
bna: Code Cleanup and Enhancements
ath9k: check pdata variable before dereferencing it
ath5k: RX timestamp is reported at end of frame
ath9k_htc: RX timestamp is reported at end of frame
...
Diffstat (limited to 'drivers/net/tun.c')
| -rw-r--r-- | drivers/net/tun.c | 875 |
1 files changed, 640 insertions, 235 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 0873cdcf39be..2ac2164a1e39 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -68,7 +68,6 @@ #include <net/netns/generic.h> #include <net/rtnetlink.h> #include <net/sock.h> -#include <net/cls_cgroup.h> #include <asm/uaccess.h> @@ -110,16 +109,56 @@ struct tap_filter { unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; }; +/* 1024 is probably a high enough limit: modern hypervisors seem to support on + * the order of 100-200 CPUs so this leaves us some breathing space if we want + * to match a queue per guest CPU. + */ +#define MAX_TAP_QUEUES 1024 + +#define TUN_FLOW_EXPIRE (3 * HZ) + +/* A tun_file connects an open character device to a tuntap netdevice. It + * also contains all socket related strctures (except sock_fprog and tap_filter) + * to serve as one transmit queue for tuntap device. The sock_fprog and + * tap_filter were kept in tun_struct since they were used for filtering for the + * netdevice not for a specific queue (at least I didn't see the requirement for + * this). + * + * RCU usage: + * The tun_file and tun_struct are loosely coupled, the pointer from one to the + * other can only be read while rcu_read_lock or rtnl_lock is held. + */ struct tun_file { - atomic_t count; - struct tun_struct *tun; + struct sock sk; + struct socket socket; + struct socket_wq wq; + struct tun_struct __rcu *tun; struct net *net; + struct fasync_struct *fasync; + /* only used for fasnyc */ + unsigned int flags; + u16 queue_index; +}; + +struct tun_flow_entry { + struct hlist_node hash_link; + struct rcu_head rcu; + struct tun_struct *tun; + + u32 rxhash; + int queue_index; + unsigned long updated; }; -struct tun_sock; +#define TUN_NUM_FLOW_ENTRIES 1024 +/* Since the socket were moved to tun_file, to preserve the behavior of persist + * device, socket filter, sndbuf and vnet header size were restore when the + * file were attached to a persist device. + */ struct tun_struct { - struct tun_file *tfile; + struct tun_file __rcu *tfiles[MAX_TAP_QUEUES]; + unsigned int numqueues; unsigned int flags; kuid_t owner; kgid_t group; @@ -128,88 +167,349 @@ struct tun_struct { netdev_features_t set_features; #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \ NETIF_F_TSO6|NETIF_F_UFO) - struct fasync_struct *fasync; - - struct tap_filter txflt; - struct socket socket; - struct socket_wq wq; int vnet_hdr_sz; - + int sndbuf; + struct tap_filter txflt; + struct sock_fprog fprog; + /* protected by rtnl lock */ + bool filter_attached; #ifdef TUN_DEBUG int debug; #endif + spinlock_t lock; + struct kmem_cache *flow_cache; + struct hlist_head flows[TUN_NUM_FLOW_ENTRIES]; + struct timer_list flow_gc_timer; + unsigned long ageing_time; }; -struct tun_sock { - struct sock sk; - struct tun_struct *tun; -}; +static inline u32 tun_hashfn(u32 rxhash) +{ + return rxhash & 0x3ff; +} -static inline struct tun_sock *tun_sk(struct sock *sk) +static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash) { - return container_of(sk, struct tun_sock, sk); + struct tun_flow_entry *e; + struct hlist_node *n; + + hlist_for_each_entry_rcu(e, n, head, hash_link) { + if (e->rxhash == rxhash) + return e; + } + return NULL; } -static int tun_attach(struct tun_struct *tun, struct file *file) +static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun, + struct hlist_head *head, + u32 rxhash, u16 queue_index) { - struct tun_file *tfile = file->private_data; - int err; + struct tun_flow_entry *e = kmem_cache_alloc(tun->flow_cache, + GFP_ATOMIC); + if (e) { + tun_debug(KERN_INFO, tun, "create flow: hash %u index %u\n", + rxhash, queue_index); + e->updated = jiffies; + e->rxhash = rxhash; + e->queue_index = queue_index; + e->tun = tun; + hlist_add_head_rcu(&e->hash_link, head); + } + return e; +} - ASSERT_RTNL(); +static void tun_flow_free(struct rcu_head *head) +{ + struct tun_flow_entry *e + = container_of(head, struct tun_flow_entry, rcu); + kmem_cache_free(e->tun->flow_cache, e); +} - netif_tx_lock_bh(tun->dev); +static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e) +{ + tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n", + e->rxhash, e->queue_index); + hlist_del_rcu(&e->hash_link); + call_rcu(&e->rcu, tun_flow_free); +} - err = -EINVAL; - if (tfile->tun) - goto out; +static void tun_flow_flush(struct tun_struct *tun) +{ + int i; - err = -EBUSY; - if (tun->tfile) - goto out; + spin_lock_bh(&tun->lock); + for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { + struct tun_flow_entry *e; + struct hlist_node *h, *n; - err = 0; - tfile->tun = tun; - tun->tfile = tfile; - tun->socket.file = file; - netif_carrier_on(tun->dev); - dev_hold(tun->dev); - sock_hold(tun->socket.sk); - atomic_inc(&tfile->count); + hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link) + tun_flow_delete(tun, e); + } + spin_unlock_bh(&tun->lock); +} -out: - netif_tx_unlock_bh(tun->dev); - return err; +static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index) +{ + int i; + + spin_lock_bh(&tun->lock); + for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { + struct tun_flow_entry *e; + struct hlist_node *h, *n; + + hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link) { + if (e->queue_index == queue_index) + tun_flow_delete(tun, e); + } + } + spin_unlock_bh(&tun->lock); } -static void __tun_detach(struct tun_struct *tun) +static void tun_flow_cleanup(unsigned long data) { - /* Detach from net device */ - netif_tx_lock_bh(tun->dev); - netif_carrier_off(tun->dev); - tun->tfile = NULL; - netif_tx_unlock_bh(tun->dev); + struct tun_struct *tun = (struct tun_struct *)data; + unsigned long delay = tun->ageing_time; + unsigned long next_timer = jiffies + delay; + unsigned long count = 0; + int i; - /* Drop read queue */ - skb_queue_purge(&tun->socket.sk->sk_receive_queue); + tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n"); + + spin_lock_bh(&tun->lock); + for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { + struct tun_flow_entry *e; + struct hlist_node *h, *n; + + hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link) { + unsigned long this_timer; + count++; + this_timer = e->updated + delay; + if (time_before_eq(this_timer, jiffies)) + tun_flow_delete(tun, e); + else if (time_before(this_timer, next_timer)) + next_timer = this_timer; + } + } - /* Drop the extra count on the net device */ - dev_put(tun->dev); + if (count) + mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer)); + spin_unlock_bh(&tun->lock); } -static void tun_detach(struct tun_struct *tun) +static void tun_flow_update(struct tun_struct *tun, struct sk_buff *skb, + u16 queue_index) +{ + struct hlist_head *head; + struct tun_flow_entry *e; + unsigned long delay = tun->ageing_time; + u32 rxhash = skb_get_rxhash(skb); + + if (!rxhash) + return; + else + head = &tun->flows[tun_hashfn(rxhash)]; + + rcu_read_lock(); + + if (tun->numqueues == 1) + goto unlock; + + e = tun_flow_find(head, rxhash); + if (likely(e)) { + /* TODO: keep queueing to old queue until it's empty? */ + e->queue_index = queue_index; + e->updated = jiffies; + } else { + spin_lock_bh(&tun->lock); + if (!tun_flow_find(head, rxhash)) + tun_flow_create(tun, head, rxhash, queue_index); + + if (!timer_pending(&tun->flow_gc_timer)) + mod_timer(&tun->flow_gc_timer, + round_jiffies_up(jiffies + delay)); + spin_unlock_bh(&tun->lock); + } + +unlock: + rcu_read_unlock(); +} + +/* We try to identify a flow through its rxhash first. The reason that + * we do not check rxq no. is becuase some cards(e.g 82599), chooses + * the rxq based on the txq where the last packet of the flow comes. As + * the userspace application move between processors, we may get a + * different rxq no. here. If we could not get rxhash, then we would + * hope the rxq no. may help here. + */ +static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb) +{ + struct tun_struct *tun = netdev_priv(dev); + struct tun_flow_entry *e; + u32 txq = 0; + u32 numqueues = 0; + + rcu_read_lock(); + numqueues = tun->numqueues; + + txq = skb_get_rxhash(skb); + if (txq) { + e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq); + if (e) + txq = e->queue_index; + else + /* use multiply and shift instead of expensive divide */ + txq = ((u64)txq * numqueues) >> 32; + } else if (likely(skb_rx_queue_recorded(skb))) { + txq = skb_get_rx_queue(skb); + while (unlikely(txq >= numqueues)) + txq -= numqueues; + } + + rcu_read_unlock(); + return txq; +} + +static inline bool tun_not_capable(struct tun_struct *tun) +{ + const struct cred *cred = current_cred(); + struct net *net = dev_net(tun->dev); + + return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) || + (gid_valid(tun->group) && !in_egroup_p(tun->group))) && + !ns_capable(net->user_ns, CAP_NET_ADMIN); +} + +static void tun_set_real_num_queues(struct tun_struct *tun) +{ + netif_set_real_num_tx_queues(tun->dev, tun->numqueues); + netif_set_real_num_rx_queues(tun->dev, tun->numqueues); +} + +static void __tun_detach(struct tun_file *tfile, bool clean) +{ + struct tun_file *ntfile; + struct tun_struct *tun; + struct net_device *dev; + + tun = rcu_dereference_protected(tfile->tun, + lockdep_rtnl_is_held()); + if (tun) { + u16 index = tfile->queue_index; + BUG_ON(index >= tun->numqueues); + dev = tun->dev; + + rcu_assign_pointer(tun->tfiles[index], + tun->tfiles[tun->numqueues - 1]); + rcu_assign_pointer(tfile->tun, NULL); + ntfile = rcu_dereference_protected(tun->tfiles[index], + lockdep_rtnl_is_held()); + ntfile->queue_index = index; + + --tun->numqueues; + sock_put(&tfile->sk); + + synchronize_net(); + tun_flow_delete_by_queue(tun, tun->numqueues + 1); + /* Drop read queue */ + skb_queue_purge(&tfile->sk.sk_receive_queue); + tun_set_real_num_queues(tun); + + if (tun->numqueues == 0 && !(tun->flags & TUN_PERSIST)) + if (dev->reg_state == NETREG_REGISTERED) + unregister_netdevice(dev); + } + + if (clean) { + BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED, + &tfile->socket.flags)); + sk_release_kernel(&tfile->sk); + } +} + +static void tun_detach(struct tun_file *tfile, bool clean) { rtnl_lock(); - __tun_detach(tun); + __tun_detach(tfile, clean); rtnl_unlock(); } +static void tun_detach_all(struct net_device *dev) +{ + struct tun_struct *tun = netdev_priv(dev); + struct tun_file *tfile; + int i, n = tun->numqueues; + + for (i = 0; i < n; i++) { + tfile = rcu_dereference_protected(tun->tfiles[i], + lockdep_rtnl_is_held()); + BUG_ON(!tfile); + wake_up_all(&tfile->wq.wait); + rcu_assign_pointer(tfile->tun, NULL); + --tun->numqueues; + } + BUG_ON(tun->numqueues != 0); + + synchronize_net(); + for (i = 0; i < n; i++) { + tfile = rcu_dereference_protected(tun->tfiles[i], + lockdep_rtnl_is_held()); + /* Drop read queue */ + skb_queue_purge(&tfile->sk.sk_receive_queue); + sock_put(&tfile->sk); + } +} + +static int tun_attach(struct tun_struct *tun, struct file *file) +{ + struct tun_file *tfile = file->private_data; + int err; + + err = -EINVAL; + if (rcu_dereference_protected(tfile->tun, lockdep_rtnl_is_held())) + goto out; + + err = -EBUSY; + if (!(tun->flags & TUN_TAP_MQ) && tun->numqueues == 1) + goto out; + + err = -E2BIG; + if (tun->numqueues == MAX_TAP_QUEUES) + goto out; + + err = 0; + + /* Re-attach the filter to presist device */ + if (tun->filter_attached == true) { + err = sk_attach_filter(&tun->fprog, tfile->socket.sk); + if (!err) + goto out; + } + tfile->queue_index = tun->numqueues; + rcu_assign_pointer(tfile->tun, tun); + rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); + sock_hold(&tfile->sk); + tun->numqueues++; + + tun_set_real_num_queues(tun); + + /* device is allowed to go away first, so no need to hold extra + * refcnt. + */ + +out: + return err; +} + static struct tun_struct *__tun_get(struct tun_file *tfile) { - struct tun_struct *tun = NULL; + struct tun_struct *tun; - if (atomic_inc_not_zero(&tfile->count)) - tun = tfile->tun; + rcu_read_lock(); + tun = rcu_dereference(tfile->tun); + if (tun) + dev_hold(tun->dev); + rcu_read_unlock(); return tun; } @@ -221,10 +521,7 @@ static struct tun_struct *tun_get(struct file *file) static void tun_put(struct tun_struct *tun) { - struct tun_file *tfile = tun->tfile; - - if (atomic_dec_and_test(&tfile->count)) - tun_detach(tfile->tun); + dev_put(tun->dev); } /* TAP filtering */ @@ -344,38 +641,20 @@ static const struct ethtool_ops tun_ethtool_ops; /* Net device detach from fd. */ static void tun_net_uninit(struct net_device *dev) { - struct tun_struct *tun = netdev_priv(dev); - struct tun_file *tfile = tun->tfile; - - /* Inform the methods they need to stop using the dev. - */ - if (tfile) { - wake_up_all(&tun->wq.wait); - if (atomic_dec_and_test(&tfile->count)) - __tun_detach(tun); - } -} - -static void tun_free_netdev(struct net_device *dev) -{ - struct tun_struct *tun = netdev_priv(dev); - - BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED, &tun->socket.flags)); - - sk_release_kernel(tun->socket.sk); + tun_detach_all(dev); } /* Net device open. */ static int tun_net_open(struct net_device *dev) { - netif_start_queue(dev); + netif_tx_start_all_queues(dev); return 0; } /* Net device close. */ static int tun_net_close(struct net_device *dev) { - netif_stop_queue(dev); + netif_tx_stop_all_queues(dev); return 0; } @@ -383,38 +662,36 @@ static int tun_net_close(struct net_device *dev) static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); + int txq = skb->queue_mapping; + struct tun_file *tfile; - tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len); + rcu_read_lock(); + tfile = rcu_dereference(tun->tfiles[txq]); /* Drop packet if interface is not attached */ - if (!tun->tfile) + if (txq >= tun->numqueues) goto drop; + tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len); + + BUG_ON(!tfile); + /* Drop if the filter does not like it. * This is a noop if the filter is disabled. * Filter can be enabled only for the TAP devices. */ if (!check_filter(&tun->txflt, skb)) goto drop; - if (tun->socket.sk->sk_filter && - sk_filter(tun->socket.sk, skb)) + if (tfile->socket.sk->sk_filter && + sk_filter(tfile->socket.sk, skb)) goto drop; - if (skb_queue_len(&tun->socket.sk->sk_receive_queue) >= dev->tx_queue_len) { - if (!(tun->flags & TUN_ONE_QUEUE)) { - /* Normal queueing mode. */ - /* Packet scheduler handles dropping of further packets. */ - netif_stop_queue(dev); - - /* We won't see all dropped packets individually, so overrun - * error is more appropriate. */ - dev->stats.tx_fifo_errors++; - } else { - /* Single queue mode. - * Driver handles dropping of all packets itself. */ - goto drop; - } - } + /* Limit the number of packets queued by dividing txq length with the + * number of queues. + */ + if (skb_queue_len(&tfile->socket.sk->sk_receive_queue) + >= dev->tx_queue_len / tun->numqueues) + goto drop; /* Orphan the skb - required as we might hang on to it * for indefinite time. */ @@ -423,18 +700,22 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) skb_orphan(skb); /* Enqueue packet */ - skb_queue_tail(&tun->socket.sk->sk_receive_queue, skb); + skb_queue_tail(&tfile->socket.sk->sk_receive_queue, skb); /* Notify and wake up reader process */ - if (tun->flags & TUN_FASYNC) - kill_fasync(&tun->fasync, SIGIO, POLL_IN); - wake_up_interruptible_poll(&tun->wq.wait, POLLIN | + if (tfile->flags & TUN_FASYNC) + kill_fasync(&tfile->fasync, SIGIO, POLL_IN); + wake_up_interruptible_poll(&tfile->wq.wait, POLLIN | POLLRDNORM | POLLRDBAND); + + rcu_read_unlock(); return NETDEV_TX_OK; drop: dev->stats.tx_dropped++; + skb_tx_error(skb); kfree_skb(skb); + rcu_read_unlock(); return NETDEV_TX_OK; } @@ -490,6 +771,7 @@ static const struct net_device_ops tun_netdev_ops = { .ndo_start_xmit = tun_net_xmit, .ndo_change_mtu = tun_net_change_mtu, .ndo_fix_features = tun_net_fix_features, + .ndo_select_queue = tun_select_queue, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = tun_poll_controller, #endif @@ -505,11 +787,43 @@ static const struct net_device_ops tap_netdev_ops = { .ndo_set_rx_mode = tun_net_mclist, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, + .ndo_select_queue = tun_select_queue, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = tun_poll_controller, #endif }; +static int tun_flow_init(struct tun_struct *tun) +{ + int i; + + tun->flow_cache = kmem_cache_create("tun_flow_cache", + sizeof(struct tun_flow_entry), 0, 0, + NULL); + if (!tun->flow_cache) + return -ENOMEM; + + for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) + INIT_HLIST_HEAD(&tun->flows[i]); + + tun->ageing_time = TUN_FLOW_EXPIRE; + setup_timer(&tun->flow_gc_timer, tun_flow_cleanup, (unsigned long)tun); + mod_timer(&tun->flow_gc_timer, + round_jiffies_up(jiffies + tun->ageing_time)); + + return 0; +} + +static void tun_flow_uninit(struct tun_struct *tun) +{ + del_timer_sync(&tun->flow_gc_timer); + tun_flow_flush(tun); + + /* Wait for completion of call_rcu()'s */ + rcu_barrier(); + kmem_cache_destroy(tun->flow_cache); +} + /* Initialize net device. */ static void tun_net_init(struct net_device *dev) { @@ -535,6 +849,7 @@ static void tun_net_init(struct net_device *dev) /* Ethernet TAP Device */ ether_setup(dev); dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; eth_hw_addr_random(dev); @@ -546,7 +861,7 @@ static void tun_net_init(struct net_device *dev) /* Character device part */ /* Poll */ -static unsigned int tun_chr_poll(struct file *file, poll_table * wait) +static unsigned int tun_chr_poll(struct file *file, poll_table *wait) { struct tun_file *tfile = file->private_data; struct tun_struct *tun = __tun_get(tfile); @@ -556,11 +871,11 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) if (!tun) return POLLERR; - sk = tun->socket.sk; + sk = tfile->socket.sk; tun_debug(KERN_INFO, tun, "tun_chr_poll\n"); - poll_wait(file, &tun->wq.wait, wait); + poll_wait(file, &tfile->wq.wait, wait); if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; @@ -579,16 +894,14 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) /* prepad is the amount to reserve at front. len is length after that. * linear is a hint as to how much to copy (usually headers). */ -static struct sk_buff *tun_alloc_skb(struct tun_struct *tun, +static struct sk_buff *tun_alloc_skb(struct tun_file *tfile, size_t prepad, size_t len, size_t linear, int noblock) { - struct sock *sk = tun->socket.sk; + struct sock *sk = tfile->socket.sk; struct sk_buff *skb; int err; - sock_update_classid(sk); - /* Under a page? Don't bother with paged skb. */ if (prepad + len < PAGE_SIZE || !linear) linear = len; @@ -685,9 +998,9 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, } /* Get packet from user space buffer */ -static ssize_t tun_get_user(struct tun_struct *tun, void *msg_control, - const struct iovec *iv, size_t total_len, - size_t count, int noblock) +static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, + void *msg_control, const struct iovec *iv, + size_t total_len, size_t count, int noblock) { struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; struct sk_buff *skb; @@ -757,7 +1070,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, void *msg_control, } else copylen = len; - skb = tun_alloc_skb(tun, align, copylen, gso.hdr_len, noblock); + skb = tun_alloc_skb(tfile, align, copylen, gso.hdr_len, noblock); if (IS_ERR(skb)) { if (PTR_ERR(skb) != -EAGAIN) tun->dev->stats.rx_dropped++; @@ -854,6 +1167,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, void *msg_control, tun->dev->stats.rx_packets++; tun->dev->stats.rx_bytes += len; + tun_flow_update(tun, skb, tfile->queue_index); return total_len; } @@ -862,6 +1176,7 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, { struct file *file = iocb->ki_filp; struct tun_struct *tun = tun_get(file); + struct tun_file *tfile = file->private_data; ssize_t result; if (!tun) @@ -869,8 +1184,8 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, tun_debug(KERN_INFO, tun, "tun_chr_write %ld\n", count); - result = tun_get_user(tun, NULL, iv, iov_length(iv, count), count, - file->f_flags & O_NONBLOCK); + result = tun_get_user(tun, tfile, NULL, iv, iov_length(iv, count), + count, file->f_flags & O_NONBLOCK); tun_put(tun); return result; @@ -878,6 +1193,7 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, /* Put packet to the user space buffer */ static ssize_t tun_put_user(struct tun_struct *tun, + struct tun_file *tfile, struct sk_buff *skb, const struct iovec *iv, int len) { @@ -957,7 +1273,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, return total; } -static ssize_t tun_do_read(struct tun_struct *tun, +static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, struct kiocb *iocb, const struct iovec *iv, ssize_t len, int noblock) { @@ -965,15 +1281,15 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct sk_buff *skb; ssize_t ret = 0; - tun_debug(KERN_INFO, tun, "tun_chr_read\n"); + tun_debug(KERN_INFO, tun, "tun_do_read\n"); if (unlikely(!noblock)) - add_wait_queue(&tun->wq.wait, &wait); + add_wait_queue(&tfile->wq.wait, &wait); while (len) { current->state = TASK_INTERRUPTIBLE; /* Read frames from the queue */ - if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) { + if (!(skb = skb_dequeue(&tfile->socket.sk->sk_receive_queue))) { if (noblock) { ret = -EAGAIN; break; @@ -991,16 +1307,15 @@ static ssize_t tun_do_read(struct tun_struct *tun, schedule(); continue; } - netif_wake_queue(tun->dev); - ret = tun_put_user(tun, skb, iv, len); + ret = tun_put_user(tun, tfile, skb, iv, len); kfree_skb(skb); break; } current->state = TASK_RUNNING; if (unlikely(!noblock)) - remove_wait_queue(&tun->wq.wait, &wait); + remove_wait_queue(&tfile->wq.wait, &wait); return ret; } @@ -1021,13 +1336,22 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, goto out; } - ret = tun_do_read(tun, iocb, iv, len, file->f_flags & O_NONBLOCK); + ret = tun_do_read(tun, tfile, iocb, iv, len, + file->f_flags & O_NONBLOCK); ret = min_t(ssize_t, ret, len); out: tun_put(tun); return ret; } +static void tun_free_netdev(struct net_device *dev) +{ + struct tun_struct *tun = netdev_priv(dev); + + tun_flow_uninit(tun); + free_netdev(dev); +} + static void tun_setup(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); @@ -1056,7 +1380,7 @@ static struct rtnl_link_ops tun_link_ops __read_mostly = { static void tun_sock_write_space(struct sock *sk) { - struct tun_struct *tun; + struct tun_file *tfile; wait_queue_head_t *wqueue; if (!sock_writeable(sk)) @@ -1070,37 +1394,46 @@ static void tun_sock_write_space(struct sock *sk) wake_up_interruptible_sync_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND); - tun = tun_sk(sk)->tun; - kill_fasync(&tun->fasync, SIGIO, POLL_OUT); -} - -static void tun_sock_destruct(struct sock *sk) -{ - free_netdev(tun_sk(sk)->tun->dev); + tfile = container_of(sk, struct tun_file, sk); + kill_fasync(&tfile->fasync, SIGIO, POLL_OUT); } static int tun_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len) { - struct tun_struct *tun = container_of(sock, struct tun_struct, socket); - return tun_get_user(tun, m->msg_control, m->msg_iov, total_len, - m->msg_iovlen, m->msg_flags & MSG_DONTWAIT); + int ret; + struct tun_file *tfile = container_of(sock, struct tun_file, socket); + struct tun_struct *tun = __tun_get(tfile); + + if (!tun) + return -EBADFD; + ret = tun_get_user(tun, tfile, m->msg_control, m->msg_iov, total_len, + m->msg_iovlen, m->msg_flags & MSG_DONTWAIT); + tun_put(tun); + return ret; } + static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len, int flags) { - struct tun_struct *tun = container_of(sock, struct tun_struct, socket); + struct tun_file *tfile = container_of(sock, struct tun_file, socket); + struct tun_struct *tun = __tun_get(tfile); int ret; + + if (!tun) + return -EBADFD; + if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) return -EINVAL; - ret = tun_do_read(tun, iocb, m->msg_iov, total_len, + ret = tun_do_read(tun, tfile, iocb, m->msg_iov, total_len, flags & MSG_DONTWAIT); if (ret > total_len) { m->msg_flags |= MSG_TRUNC; ret = flags & MSG_TRUNC ? ret : total_len; } + tun_put(tun); return ret; } @@ -1121,7 +1454,7 @@ static const struct proto_ops tun_socket_ops = { static struct proto tun_proto = { .name = "tun", .owner = THIS_MODULE, - .obj_size = sizeof(struct tun_sock), + .obj_size = sizeof(struct tun_file), }; static int tun_flags(struct tun_struct *tun) @@ -1136,12 +1469,18 @@ static int tun_flags(struct tun_struct *tun) if (tun->flags & TUN_NO_PI) flags |= IFF_NO_PI; + /* This flag has no real effect. We track the value for backwards + * compatibility. + */ if (tun->flags & TUN_ONE_QUEUE) flags |= IFF_ONE_QUEUE; if (tun->flags & TUN_VNET_HDR) flags |= IFF_VNET_HDR; + if (tun->flags & TUN_TAP_MQ) + flags |= IFF_MULTI_QUEUE; + return flags; } @@ -1178,15 +1517,13 @@ static DEVICE_ATTR(group, 0444, tun_show_group, NULL); static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) { - struct sock *sk; struct tun_struct *tun; + struct tun_file *tfile = file->private_data; struct net_device *dev; int err; dev = __dev_get_by_name(net, ifr->ifr_name); if (dev) { - const struct cred *cred = current_cred(); - if (ifr->ifr_flags & IFF_TUN_EXCL) return -EBUSY; if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops) @@ -1196,11 +1533,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) else return -EINVAL; - if (((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) || - (gid_valid(tun->group) && !in_egroup_p(tun->group))) && - !capable(CAP_NET_ADMIN)) + if (tun_not_capable(tun)) return -EPERM; - err = security_tun_dev_attach(tun->socket.sk); + err = security_tun_dev_attach(tfile->socket.sk); if (err < 0) return err; @@ -1212,7 +1547,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) char *name; unsigned long flags = 0; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; err = security_tun_dev_create(); if (err < 0) @@ -1233,8 +1568,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (*ifr->ifr_name) name = ifr->ifr_name; - dev = alloc_netdev(sizeof(struct tun_struct), name, - tun_setup); + dev = alloc_netdev_mqs(sizeof(struct tun_struct), name, + tun_setup, + MAX_TAP_QUEUES, MAX_TAP_QUEUES); if (!dev) return -ENOMEM; @@ -1246,46 +1582,38 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) tun->flags = flags; tun->txflt.count = 0; tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr); - set_bit(SOCK_EXTERNALLY_ALLOCATED, &tun->socket.flags); - - err = -ENOMEM; - sk = sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL, &tun_proto); - if (!sk) - goto err_free_dev; - sk_change_net(sk, net); - tun->socket.wq = &tun->wq; - init_waitqueue_head(&tun->wq.wait); - tun->socket.ops = &tun_socket_ops; - sock_init_data(&tun->socket, sk); - sk->sk_write_space = tun_sock_write_space; - sk->sk_sndbuf = INT_MAX; - sock_set_flag(sk, SOCK_ZEROCOPY); + tun->filter_attached = false; + tun->sndbuf = tfile->socket.sk->sk_sndbuf; - tun_sk(sk)->tun = tun; + spin_lock_init(&tun->lock); - security_tun_dev_post_create(sk); + security_tun_dev_post_create(&tfile->sk); tun_net_init(dev); + err = tun_flow_init(tun); + if (err < 0) + goto err_free_dev; + dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | TUN_USER_FEATURES; dev->features = dev->hw_features; + err = tun_attach(tun, file); + if (err < 0) + goto err_free_dev; + err = register_netdevice(tun->dev); if (err < 0) - goto err_free_sk; + goto err_free_dev; if (device_create_file(&tun->dev->dev, &dev_attr_tun_flags) || device_create_file(&tun->dev->dev, &dev_attr_owner) || device_create_file(&tun->dev->dev, &dev_attr_group)) pr_err("Failed to create tun sysfs files\n"); - sk->sk_destruct = tun_sock_destruct; - - err = tun_attach(tun, file); - if (err < 0) - goto failed; + netif_carrier_on(tun->dev); } tun_debug(KERN_INFO, tun, "tun_set_iff\n"); @@ -1295,6 +1623,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) else tun->flags &= ~TUN_NO_PI; + /* This flag has no real effect. We track the value for backwards + * compatibility. + */ if (ifr->ifr_flags & IFF_ONE_QUEUE) tun->flags |= TUN_ONE_QUEUE; else @@ -1305,24 +1636,26 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) else tun->flags &= ~TUN_VNET_HDR; + if (ifr->ifr_flags & IFF_MULTI_QUEUE) + tun->flags |= TUN_TAP_MQ; + else + tun->flags &= ~TUN_TAP_MQ; + /* Make sure persistent devices do not get stuck in * xoff state. */ if (netif_running(tun->dev)) - netif_wake_queue(tun->dev); + netif_tx_wake_all_queues(tun->dev); strcpy(ifr->ifr_name, tun->dev->name); return 0; - err_free_sk: - tun_free_netdev(dev); err_free_dev: free_netdev(dev); - failed: return err; } -static int tun_get_iff(struct net *net, struct tun_struct *tun, +static void tun_get_iff(struct net *net, struct tun_struct *tun, struct ifreq *ifr) { tun_debug(KERN_INFO, tun, "tun_get_iff\n"); @@ -1331,7 +1664,6 @@ static int tun_get_iff(struct net *net, struct tun_struct *tun, ifr->ifr_flags = tun_flags(tun); - return 0; } /* This is like a cut-down ethtool ops, except done via tun fd so no @@ -1373,13 +1705,91 @@ static int set_offload(struct tun_struct *tun, unsigned long arg) return 0; } +static void tun_detach_filter(struct tun_struct *tun, int n) +{ + int i; + struct tun_file *tfile; + + for (i = 0; i < n; i++) { + tfile = rcu_dereference_protected(tun->tfiles[i], + lockdep_rtnl_is_held()); + sk_detach_filter(tfile->socket.sk); + } + + tun->filter_attached = false; +} + +static int tun_attach_filter(struct tun_struct *tun) +{ + int i, ret = 0; + struct tun_file *tfile; + + for (i = 0; i < tun->numqueues; i++) { + tfile = rcu_dereference_protected(tun->tfiles[i], + lockdep_rtnl_is_held()); + ret = sk_attach_filter(&tun->fprog, tfile->socket.sk); + if (ret) { + tun_detach_filter(tun, i); + return ret; + } + } + + tun->filter_attached = true; + return ret; +} + +static void tun_set_sndbuf(struct tun_struct *tun) +{ + struct tun_file *tfile; + int i; + + for (i = 0; i < tun->numqueues; i++) { + tfile = rcu_dereference_protected(tun->tfiles[i], + lockdep_rtnl_is_held()); + tfile->socket.sk->sk_sndbuf = tun->sndbuf; + } +} + +static int tun_set_queue(struct file *file, struct ifreq *ifr) +{ + struct tun_file *tfile = file->private_data; + struct tun_struct *tun; + struct net_device *dev; + int ret = 0; + + rtnl_lock(); + + if (ifr->ifr_flags & IFF_ATTACH_QUEUE) { + dev = __dev_get_by_name(tfile->net, ifr->ifr_name); + if (!dev) { + ret = -EINVAL; + goto unlock; + } + + tun = netdev_priv(dev); + if (dev->netdev_ops != &tap_netdev_ops && + dev->netdev_ops != &tun_netdev_ops) + ret = -EINVAL; + else if (tun_not_capable(tun)) + ret = -EPERM; + else + ret = tun_attach(tun, file); + } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) + __tun_detach(tfile, false); + else + ret = -EINVAL; + +unlock: + rtnl_unlock(); + return ret; +} + static long __tun_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg, int ifreq_len) { struct tun_file *tfile = file->private_data; struct tun_struct *tun; void __user* argp = (void __user*)arg; - struct sock_fprog fprog; struct ifreq ifr; kuid_t owner; kgid_t group; @@ -1387,7 +1797,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, int vnet_hdr_sz; int ret; - if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) { + if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == 0x89) { if (copy_from_user(&ifr, argp, ifreq_len)) return -EFAULT; } else { @@ -1398,10 +1808,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, * This is needed because we never checked for invalid flags on * TUNSETIFF. */ return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE | - IFF_VNET_HDR, + IFF_VNET_HDR | IFF_MULTI_QUEUE, (unsigned int __user*)argp); - } + } else if (cmd == TUNSETQUEUE) + return tun_set_queue(file, &ifr); + ret = 0; rtnl_lock(); tun = __tun_get(tfile); @@ -1422,14 +1834,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, if (!tun) goto unlock; - tun_debug(KERN_INFO, tun, "tun_chr_ioctl cmd %d\n", cmd); + tun_debug(KERN_INFO, tun, "tun_chr_ioctl cmd %u\n", cmd); ret = 0; switch (cmd) { case TUNGETIFF: - ret = tun_get_iff(current->nsproxy->net_ns, tun, &ifr); - if (ret) - break; + tun_get_iff(current->nsproxy->net_ns, tun, &ifr); if (copy_to_user(argp, &ifr, ifreq_len)) ret = -EFAULT; @@ -1444,11 +1854,16 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break; case TUNSETPERSIST: - /* Disable/Enable persist mode */ - if (arg) + /* Disable/Enable persist mode. Keep an extra reference to the + * module to prevent the module being unprobed. + */ + if (arg) { tun->flags |= TUN_PERSIST; - else + __module_get(THIS_MODULE); + } else { tun->flags &= ~TUN_PERSIST; + module_put(THIS_MODULE); + } tun_debug(KERN_INFO, tun, "persist %s\n", arg ? "enabled" : "disabled"); @@ -1462,7 +1877,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break; } tun->owner = owner; - tun_debug(KERN_INFO, tun, "owner set to %d\n", + tun_debug(KERN_INFO, tun, "owner set to %u\n", from_kuid(&init_user_ns, tun->owner)); break; @@ -1474,7 +1889,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break; } tun->group = group; - tun_debug(KERN_INFO, tun, "group set to %d\n", + tun_debug(KERN_INFO, tun, "group set to %u\n", from_kgid(&init_user_ns, tun->group)); break; @@ -1526,7 +1941,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break; case TUNGETSNDBUF: - sndbuf = tun->socket.sk->sk_sndbuf; + sndbuf = tfile->socket.sk->sk_sndbuf; if (copy_to_user(argp, &sndbuf, sizeof(sndbuf))) ret = -EFAULT; break; @@ -1537,7 +1952,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break; } - tun->socket.sk->sk_sndbuf = sndbuf; + tun->sndbuf = sndbuf; + tun_set_sndbuf(tun); break; case TUNGETVNETHDRSZ: @@ -1565,10 +1981,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV) break; ret = -EFAULT; - if (copy_from_user(&fprog, argp, sizeof(fprog))) + if (copy_from_user(&tun->fprog, argp, sizeof(tun->fprog))) break; - ret = sk_attach_filter(&fprog, tun->socket.sk); + ret = tun_attach_filter(tun); break; case TUNDETACHFILTER: @@ -1576,7 +1992,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, ret = -EINVAL; if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV) break; - ret = sk_detach_filter(tun->socket.sk); + ret = 0; + tun_detach_filter(tun, tun->numqueues); break; default: @@ -1628,27 +2045,21 @@ static long tun_chr_compat_ioctl(struct file *file, static int tun_chr_fasync(int fd, struct file *file, int on) { - struct tun_struct *tun = tun_get(file); + struct tun_file *tfile = file->private_data; int ret; - if (!tun) - return -EBADFD; - - tun_debug(KERN_INFO, tun, "tun_chr_fasync %d\n", on); - - if ((ret = fasync_helper(fd, file, on, &tun->fasync)) < 0) + if ((ret = fasync_helper(fd, file, on, &tfile->fasync)) < 0) goto out; if (on) { ret = __f_setown(file, task_pid(current), PIDTYPE_PID, 0); if (ret) goto out; - tun->flags |= TUN_FASYNC; + tfile->flags |= TUN_FASYNC; } else - tun->flags &= ~TUN_FASYNC; + tfile->flags &= ~TUN_FASYNC; ret = 0; out: - tun_put(tun); return ret; } @@ -1658,44 +2069,39 @@ static int tun_chr_open(struct inode *inode, struct file * file) DBG1(KERN_INFO, "tunX: tun_chr_open\n"); - tfile = kmalloc(sizeof(*tfile), GFP_KERNEL); + tfile = (struct tun_file *)sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL, + &tun_proto); if (!tfile) return -ENOMEM; - atomic_set(&tfile->count, 0); - tfile->tun = NULL; + rcu_assign_pointer(tfile->tun, NULL); tfile->net = get_net(current->nsproxy->net_ns); + tfile->flags = 0; + + rcu_assign_pointer(tfile->socket.wq, &tfile->wq); + init_waitqueue_head(&tfile->wq.wait); + + tfile->socket.file = file; + tfile->socket.ops = &tun_socket_ops; + + sock_init_data(&tfile->socket, &tfile->sk); + sk_change_net(&tfile->sk, tfile->net); + + tfile->sk.sk_write_space = tun_sock_write_space; + tfile->sk.sk_sndbuf = INT_MAX; + file->private_data = tfile; + set_bit(SOCK_EXTERNALLY_ALLOCATED, &tfile->socket.flags); + return 0; } static int tun_chr_close(struct inode *inode, struct file *file) { struct tun_file *tfile = file->private_data; - struct tun_struct *tun; - - tun = __tun_get(tfile); - if (tun) { - struct net_device *dev = tun->dev; - - tun_debug(KERN_INFO, tun, "tun_chr_close\n"); - - __tun_detach(tun); - - /* If desirable, unregister the netdevice. */ - if (!(tun->flags & TUN_PERSIST)) { - rtnl_lock(); - if (dev->reg_state == NETREG_REGISTERED) - unregister_netdevice(dev); - rtnl_unlock(); - } - } + struct net *net = tfile->net; - tun = tfile->tun; - if (tun) - sock_put(tun->socket.sk); - - put_net(tfile->net); - kfree(tfile); + tun_detach(tfile, true); + put_net(net); return 0; } @@ -1822,14 +2228,13 @@ static void tun_cleanup(void) * holding a reference to the file for as long as the socket is in use. */ struct socket *tun_get_socket(struct file *file) { - struct tun_struct *tun; + struct tun_file *tfile; if (file->f_op != &tun_fops) return ERR_PTR(-EINVAL); - tun = tun_get(file); - if (!tun) + tfile = file->private_data; + if (!tfile) return ERR_PTR(-EBADFD); - tun_put(tun); - return &tun->socket; + return &tfile->socket; } EXPORT_SYMBOL_GPL(tun_get_socket); |
