From 92d4ea6e412a023434b477c04fd5632aefb233d4 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 5 Dec 2013 20:42:58 -0800 Subject: tun: spelling fixes Fix spelling errors in tun driver. Signed-off-by: Stephen Hemminger Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 782e38bfc1ee..5ac2877a89c3 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -110,7 +110,7 @@ struct tap_filter { unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; }; -/* DEFAULT_MAX_NUM_RSS_QUEUES were choosed to let the rx/tx queues allocated for +/* DEFAULT_MAX_NUM_RSS_QUEUES were chosen to let the rx/tx queues allocated for * the netdevice to be fit in one page. So we can make sure the success of * memory allocation. TODO: increase the limit. */ #define MAX_TAP_QUEUES DEFAULT_MAX_NUM_RSS_QUEUES @@ -119,7 +119,7 @@ struct tap_filter { #define TUN_FLOW_EXPIRE (3 * HZ) /* A tun_file connects an open character device to a tuntap netdevice. It - * also contains all socket related strctures (except sock_fprog and tap_filter) + * also contains all socket related structures (except sock_fprog and tap_filter) * to serve as one transmit queue for tuntap device. The sock_fprog and * tap_filter were kept in tun_struct since they were used for filtering for the * netdevice not for a specific queue (at least I didn't see the requirement for @@ -342,7 +342,7 @@ unlock: } /* We try to identify a flow through its rxhash first. The reason that - * we do not check rxq no. is becuase some cards(e.g 82599), chooses + * we do not check rxq no. is because some cards(e.g 82599), chooses * the rxq based on the txq where the last packet of the flow comes. As * the userspace application move between processors, we may get a * different rxq no. here. If we could not get rxhash, then we would @@ -531,7 +531,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte err = 0; - /* Re-attach the filter to presist device */ + /* Re-attach the filter to persist device */ if (!skip_filter && (tun->filter_attached == true)) { err = sk_attach_filter(&tun->fprog, tfile->socket.sk); if (!err) @@ -819,9 +819,9 @@ static void tun_poll_controller(struct net_device *dev) * Tun only receives frames when: * 1) the char device endpoint gets data from user space * 2) the tun socket gets a sendmsg call from user space - * Since both of those are syncronous operations, we are guaranteed + * Since both of those are synchronous operations, we are guaranteed * never to have pending data when we poll for it - * so theres nothing to do here but return. + * so there is nothing to do here but return. * We need this though so netpoll recognizes us as an interface that * supports polling, which enables bridge devices in virt setups to * still use netconsole -- cgit v1.2.3 From f96eb74c84ebb09a03031f7f1e51789a928f9de0 Mon Sep 17 00:00:00 2001 From: Zhi Yong Wu Date: Sat, 7 Dec 2013 04:13:06 +0800 Subject: tun: remove unused parameter in tun_do_read() Signed-off-by: Zhi Yong Wu Signed-off-by: David S. Miller --- drivers/net/tun.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 5ac2877a89c3..6b0b2a057a47 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1289,8 +1289,7 @@ done: } static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, - struct kiocb *iocb, const struct iovec *iv, - ssize_t len, int noblock) + const struct iovec *iv, ssize_t len, int noblock) { DECLARE_WAITQUEUE(wait, current); struct sk_buff *skb; @@ -1353,7 +1352,7 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, goto out; } - ret = tun_do_read(tun, tfile, iocb, iv, len, + ret = tun_do_read(tun, tfile, iv, len, file->f_flags & O_NONBLOCK); ret = min_t(ssize_t, ret, len); out: @@ -1452,7 +1451,7 @@ static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, SOL_PACKET, TUN_TX_TIMESTAMP); goto out; } - ret = tun_do_read(tun, tfile, iocb, m->msg_iov, total_len, + ret = tun_do_read(tun, tfile, m->msg_iov, total_len, flags & MSG_DONTWAIT); if (ret > total_len) { m->msg_flags |= MSG_TRUNC; -- cgit v1.2.3 From 73713357ab58aacda1af715bb5a623528dbbfd79 Mon Sep 17 00:00:00 2001 From: Zhi Yong Wu Date: Sat, 7 Dec 2013 04:55:00 +0800 Subject: tun: remove useless codes in tun_chr_aio_read() and tun_recvmsg() By checking related codes, it is impossible that ret > len or total_len, so we should remove some useless codes in both above functions. Signed-off-by: Zhi Yong Wu Signed-off-by: David S. Miller --- drivers/net/tun.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 3c5a8d8cde50..bbb693512918 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1354,7 +1354,6 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, ret = tun_do_read(tun, tfile, iv, len, file->f_flags & O_NONBLOCK); - ret = min_t(ssize_t, ret, len); if (ret > 0) iocb->ki_pos = ret; out: @@ -1455,10 +1454,6 @@ static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, } ret = tun_do_read(tun, tfile, m->msg_iov, total_len, flags & MSG_DONTWAIT); - if (ret > total_len) { - m->msg_flags |= MSG_TRUNC; - ret = flags & MSG_TRUNC ? ret : total_len; - } out: tun_put(tun); return ret; -- cgit v1.2.3 From 42404c09151029689659818e2196ee2fd97142aa Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Dec 2013 22:05:45 -0500 Subject: Revert "tun: remove useless codes in tun_chr_aio_read() and tun_recvmsg()" This reverts commit 73713357ab58aacda1af715bb5a623528dbbfd79. MSG_TRUNC handling was broken and is going to be fixed in the 'net' tree, so revert this. Signed-off-by: David S. Miller --- drivers/net/tun.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index bbb693512918..3c5a8d8cde50 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1354,6 +1354,7 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, ret = tun_do_read(tun, tfile, iv, len, file->f_flags & O_NONBLOCK); + ret = min_t(ssize_t, ret, len); if (ret > 0) iocb->ki_pos = ret; out: @@ -1454,6 +1455,10 @@ static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, } ret = tun_do_read(tun, tfile, m->msg_iov, total_len, flags & MSG_DONTWAIT); + if (ret > total_len) { + m->msg_flags |= MSG_TRUNC; + ret = flags & MSG_TRUNC ? ret : total_len; + } out: tun_put(tun); return ret; -- cgit v1.2.3 From 3958afa1b272eb07109fd31549e69193b4d7c364 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 15 Dec 2013 22:12:06 -0800 Subject: net: Change skb_get_rxhash to skb_get_hash Changing name of function as part of making the hash in skbuff to be generic property, not just for receive path. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/tun.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 3c5a8d8cde50..8569da248336 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -358,7 +358,7 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb) rcu_read_lock(); numqueues = ACCESS_ONCE(tun->numqueues); - txq = skb_get_rxhash(skb); + txq = skb_get_hash(skb); if (txq) { e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq); if (e) @@ -1146,7 +1146,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb_reset_network_header(skb); skb_probe_transport_header(skb, 0); - rxhash = skb_get_rxhash(skb); + rxhash = skb_get_hash(skb); netif_rx_ni(skb); tun->dev->stats.rx_packets++; -- cgit v1.2.3 From 9bc8893937c8369c9c3a045c05f5820f2242bc96 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 22 Dec 2013 18:54:32 +0800 Subject: tun: Add support for RFS on tun flows This patch adds support so that the rps_flow_tables (RFS) can be programmed using the tun flows which are already set up to track flows for the purposes of queue selection. On the receive path (corresponding to select_queue and tun_net_xmit) the rxhash is saved in the flow_entry. The original code only does flow lookup in select_queue, so this patch adds a flow lookup in tun_net_xmit if num_queues == 1 (select_queue is not called from dev_queue_xmit->netdev_pick_tx in that case). The flow is recorded (processing CPU) in tun_flow_update (TX path), and reset when flow is deleted. Signed-off-by: Tom Herbert Signed-off-by: Zhi Yong Wu Signed-off-by: David S. Miller --- drivers/net/tun.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index a17a7018db19..3cf0457f5c69 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -152,6 +152,7 @@ struct tun_flow_entry { struct tun_struct *tun; u32 rxhash; + u32 rps_rxhash; int queue_index; unsigned long updated; }; @@ -220,6 +221,7 @@ static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun, rxhash, queue_index); e->updated = jiffies; e->rxhash = rxhash; + e->rps_rxhash = 0; e->queue_index = queue_index; e->tun = tun; hlist_add_head_rcu(&e->hash_link, head); @@ -232,6 +234,7 @@ static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e) { tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n", e->rxhash, e->queue_index); + sock_rps_reset_flow_hash(e->rps_rxhash); hlist_del_rcu(&e->hash_link); kfree_rcu(e, rcu); --tun->flow_count; @@ -325,6 +328,7 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash, /* TODO: keep queueing to old queue until it's empty? */ e->queue_index = queue_index; e->updated = jiffies; + sock_rps_record_flow_hash(e->rps_rxhash); } else { spin_lock_bh(&tun->lock); if (!tun_flow_find(head, rxhash) && @@ -341,6 +345,18 @@ unlock: rcu_read_unlock(); } +/** + * Save the hash received in the stack receive path and update the + * flow_hash table accordingly. + */ +static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash) +{ + if (unlikely(e->rps_rxhash != hash)) { + sock_rps_reset_flow_hash(e->rps_rxhash); + e->rps_rxhash = hash; + } +} + /* We try to identify a flow through its rxhash first. The reason that * we do not check rxq no. is because some cards(e.g 82599), chooses * the rxq based on the txq where the last packet of the flow comes. As @@ -361,9 +377,10 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb) txq = skb_get_hash(skb); if (txq) { e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq); - if (e) + if (e) { txq = e->queue_index; - else + tun_flow_save_rps_rxhash(e, txq); + } else /* use multiply and shift instead of expensive divide */ txq = ((u64)txq * numqueues) >> 32; } else if (likely(skb_rx_queue_recorded(skb))) { @@ -728,6 +745,22 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) if (txq >= tun->numqueues) goto drop; + if (tun->numqueues == 1) { + /* Select queue was not called for the skbuff, so we extract the + * RPS hash and save it into the flow_table here. + */ + __u32 rxhash; + + rxhash = skb_get_hash(skb); + if (rxhash) { + struct tun_flow_entry *e; + e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)], + rxhash); + if (e) + tun_flow_save_rps_rxhash(e, rxhash); + } + } + tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len); BUG_ON(!tfile); -- cgit v1.2.3 From fbe4d4565badba393a94927e16ae66654a025dca Mon Sep 17 00:00:00 2001 From: Zhi Yong Wu Date: Thu, 2 Jan 2014 13:24:28 +0800 Subject: tun, rfs: fix the incorrect hash value The code incorrectly save the queue index as the hash, so this patch is fixing it with the hash received in the stack receive path. Signed-off-by: Zhi Yong Wu Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 3cf0457f5c69..09f66624eaca 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -378,8 +378,8 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb) if (txq) { e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq); if (e) { - txq = e->queue_index; tun_flow_save_rps_rxhash(e, txq); + txq = e->queue_index; } else /* use multiply and shift instead of expensive divide */ txq = ((u64)txq * numqueues) >> 32; -- cgit v1.2.3 From fa35864e0bb7f7c13b9c6d6751ddac9b42d4810f Mon Sep 17 00:00:00 2001 From: Dominic Curran Date: Wed, 22 Jan 2014 03:03:23 +0000 Subject: tuntap: Fix for a race in accessing numqueues A patch for fixing a race between queue selection and changing queues was introduced in commit 92bb73ea2("tuntap: fix a possible race between queue selection and changing queues"). The fix was to prevent the driver from re-reading the tun->numqueues more than once within tun_select_queue() using ACCESS_ONCE(). We have been experiancing 'Divide-by-zero' errors in tun_net_xmit() since we moved from 3.6 to 3.10, and believe that they come from a simular source where the value of tun->numqueues changes to zero between the first and a subsequent read of tun->numqueues. The fix is a simular use of ACCESS_ONCE(), as well as a multiply instead of a divide in the if statement. Signed-off-by: Dominic Curran Cc: Jason Wang Cc: Maxim Krasnyansky Acked-by: Jason Wang Acked-by: Max Krasnyansky Signed-off-by: David S. Miller --- drivers/net/tun.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 34cca74c99ed..bcf01af4b879 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -738,15 +738,17 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) struct tun_struct *tun = netdev_priv(dev); int txq = skb->queue_mapping; struct tun_file *tfile; + u32 numqueues = 0; rcu_read_lock(); tfile = rcu_dereference(tun->tfiles[txq]); + numqueues = ACCESS_ONCE(tun->numqueues); /* Drop packet if interface is not attached */ - if (txq >= tun->numqueues) + if (txq >= numqueues) goto drop; - if (tun->numqueues == 1) { + if (numqueues == 1) { /* Select queue was not called for the skbuff, so we extract the * RPS hash and save it into the flow_table here. */ @@ -779,8 +781,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* Limit the number of packets queued by dividing txq length with the * number of queues. */ - if (skb_queue_len(&tfile->socket.sk->sk_receive_queue) - >= dev->tx_queue_len / tun->numqueues) + if (skb_queue_len(&tfile->socket.sk->sk_receive_queue) * numqueues + >= dev->tx_queue_len) goto drop; if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) -- cgit v1.2.3 From 93e14b6d776e850a371fe4234a06088f210d8651 Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Wed, 29 Jan 2014 16:43:31 +0900 Subject: tun: add device name(iff) field to proc fdinfo entry A file descriptor opened for /dev/net/tun and a tun device are connected with ioctl. Though understanding the connection is important for trouble shooting, no way is given to a user to know the connected device for a given file descriptor at userland. This patch adds a new fdinfo field for the device name connected to a file descriptor opened for /dev/net/tun. Here is an example of the field: # lsof | grep tun qemu-syst 4565 qemu 25u CHR 10,200 0t138 12921 /dev/net/tun ... # cat /proc/4565/fdinfo/25 pos: 138 flags: 0104002 iff: vnet0 # ip link show dev vnet0 8: vnet0: mtu 1500 ... changelog: v2: indent iff just like the other fdinfo fields are. v3: remove unused variable. Both are suggested by David Miller . Signed-off-by: Masatake YAMATO Signed-off-by: David S. Miller --- drivers/net/tun.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index bcf01af4b879..44c4db8450f0 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -69,6 +69,7 @@ #include #include #include +#include #include @@ -2228,6 +2229,27 @@ static int tun_chr_close(struct inode *inode, struct file *file) return 0; } +#ifdef CONFIG_PROC_FS +static int tun_chr_show_fdinfo(struct seq_file *m, struct file *f) +{ + struct tun_struct *tun; + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + + rtnl_lock(); + tun = tun_get(f); + if (tun) + tun_get_iff(current->nsproxy->net_ns, tun, &ifr); + rtnl_unlock(); + + if (tun) + tun_put(tun); + + return seq_printf(m, "iff:\t%s\n", ifr.ifr_name); +} +#endif + static const struct file_operations tun_fops = { .owner = THIS_MODULE, .llseek = no_llseek, @@ -2242,7 +2264,10 @@ static const struct file_operations tun_fops = { #endif .open = tun_chr_open, .release = tun_chr_close, - .fasync = tun_chr_fasync + .fasync = tun_chr_fasync, +#ifdef CONFIG_PROC_FS + .show_fdinfo = tun_chr_show_fdinfo, +#endif }; static struct miscdevice tun_miscdev = { -- cgit v1.2.3