summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bridge/br_device.c8
-rw-r--r--net/bridge/br_if.c5
-rw-r--r--net/bridge/br_netlink.c2
-rw-r--r--net/bridge/br_private.h24
-rw-r--r--net/bridge/br_stp.c15
-rw-r--r--net/bridge/br_stp_if.c4
-rw-r--r--net/bridge/br_stp_timer.c4
-rw-r--r--net/bridge/br_sysfs_br.c17
-rw-r--r--net/bridge/br_vlan.c147
-rw-r--r--net/core/dev.c29
-rw-r--r--net/core/pktgen.c26
-rw-r--r--net/core/skbuff.c66
-rw-r--r--net/dccp/ccid.c2
-rw-r--r--net/dccp/proto.c2
-rw-r--r--net/dsa/dsa.c2
-rw-r--r--net/dsa/slave.c1
-rw-r--r--net/ipv4/cipso_ipv4.c2
-rw-r--r--net/ipv4/fou.c224
-rw-r--r--net/ipv4/ip_fragment.c4
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ip_tunnel.c26
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_output.c5
-rw-r--r--net/ipv4/tcp_timer.c41
-rw-r--r--net/ipv4/udp_offload.c52
-rw-r--r--net/ipv6/addrconf.c3
-rw-r--r--net/ipv6/addrconf_core.c7
-rw-r--r--net/ipv6/ip6_fib.c20
-rw-r--r--net/ipv6/ip6_gre.c11
-rw-r--r--net/ipv6/ip6_tunnel.c6
-rw-r--r--net/ipv6/ip6_vti.c6
-rw-r--r--net/ipv6/route.c4
-rw-r--r--net/ipv6/sit.c4
-rw-r--r--net/ipv6/udp_offload.c3
-rw-r--r--net/netfilter/Kconfig1
-rw-r--r--net/netfilter/nfnetlink.c64
-rw-r--r--net/netfilter/nft_hash.c12
-rw-r--r--net/netfilter/nft_rbtree.c2
-rw-r--r--net/sched/cls_tcindex.c12
-rw-r--r--net/sched/cls_u32.c10
-rw-r--r--net/sched/ematch.c6
-rw-r--r--net/sched/sch_api.c2
-rw-r--r--net/sched/sch_generic.c55
-rw-r--r--net/xfrm/xfrm_policy.c4
46 files changed, 772 insertions, 178 deletions
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 659cac15c0df..ffd379db5938 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -88,12 +88,17 @@ out:
static int br_dev_init(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
+ int err;
br->stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!br->stats)
return -ENOMEM;
- return 0;
+ err = br_vlan_init(br);
+ if (err)
+ free_percpu(br->stats);
+
+ return err;
}
static int br_dev_open(struct net_device *dev)
@@ -389,5 +394,4 @@ void br_dev_setup(struct net_device *dev)
br_netfilter_rtable_init(br);
br_stp_timer_init(br);
br_multicast_init(br);
- br_vlan_init(br);
}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index a9f54a9b6690..ed307db7a12b 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -332,7 +332,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
p->port_no = index;
p->flags = BR_LEARNING | BR_FLOOD;
br_init_port(p);
- p->state = BR_STATE_DISABLED;
+ br_set_state(p, BR_STATE_DISABLED);
br_stp_port_timer_init(p);
br_multicast_add_port(p);
@@ -500,6 +500,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
if (br_fdb_insert(br, p, dev->dev_addr, 0))
netdev_err(dev, "failed insert local address bridge forwarding table\n");
+ if (nbp_vlan_init(p))
+ netdev_err(dev, "failed to initialize vlan filtering on this port\n");
+
spin_lock_bh(&br->lock);
changed_addr = br_stp_recalculate_bridge_id(br);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 0fa66b83685f..2ff9706647f2 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -301,7 +301,7 @@ static int br_set_port_state(struct net_bridge_port *p, u8 state)
(!netif_oper_up(p->dev) && state != BR_STATE_DISABLED))
return -ENETDOWN;
- p->state = state;
+ br_set_state(p, state);
br_log_state(p);
br_port_state_selection(p->br);
return 0;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index f53592fc3ef9..d8cbaa694227 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -299,6 +299,7 @@ struct net_bridge
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
u8 vlan_enabled;
__be16 vlan_proto;
+ u16 default_pvid;
struct net_port_vlans __rcu *vlan_info;
#endif
};
@@ -604,11 +605,13 @@ bool br_vlan_find(struct net_bridge *br, u16 vid);
void br_recalculate_fwd_mask(struct net_bridge *br);
int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
int br_vlan_set_proto(struct net_bridge *br, unsigned long val);
-void br_vlan_init(struct net_bridge *br);
+int br_vlan_init(struct net_bridge *br);
+int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val);
int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags);
int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
void nbp_vlan_flush(struct net_bridge_port *port);
bool nbp_vlan_find(struct net_bridge_port *port, u16 vid);
+int nbp_vlan_init(struct net_bridge_port *port);
static inline struct net_port_vlans *br_get_vlan_info(
const struct net_bridge *br)
@@ -641,11 +644,11 @@ static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid)
static inline u16 br_get_pvid(const struct net_port_vlans *v)
{
- /* Return just the VID if it is set, or VLAN_N_VID (invalid vid) if
- * vid wasn't set
- */
+ if (!v)
+ return 0;
+
smp_rmb();
- return v->pvid ?: VLAN_N_VID;
+ return v->pvid;
}
static inline int br_vlan_enabled(struct net_bridge *br)
@@ -704,8 +707,9 @@ static inline void br_recalculate_fwd_mask(struct net_bridge *br)
{
}
-static inline void br_vlan_init(struct net_bridge *br)
+static inline int br_vlan_init(struct net_bridge *br)
{
+ return 0;
}
static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
@@ -738,13 +742,18 @@ static inline bool nbp_vlan_find(struct net_bridge_port *port, u16 vid)
return false;
}
+static inline int nbp_vlan_init(struct net_bridge_port *port)
+{
+ return 0;
+}
+
static inline u16 br_vlan_get_tag(const struct sk_buff *skb, u16 *tag)
{
return 0;
}
static inline u16 br_get_pvid(const struct net_port_vlans *v)
{
- return VLAN_N_VID; /* Returns invalid vid */
+ return 0;
}
static inline int br_vlan_enabled(struct net_bridge *br)
@@ -766,6 +775,7 @@ static inline void br_nf_core_fini(void) {}
/* br_stp.c */
void br_log_state(const struct net_bridge_port *p);
+void br_set_state(struct net_bridge_port *p, unsigned int state);
struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no);
void br_init_port(struct net_bridge_port *p);
void br_become_designated_port(struct net_bridge_port *p);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 3c86f0538cbb..2b047bcf42a4 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -36,6 +36,11 @@ void br_log_state(const struct net_bridge_port *p)
br_port_state_names[p->state]);
}
+void br_set_state(struct net_bridge_port *p, unsigned int state)
+{
+ p->state = state;
+}
+
/* called under bridge lock */
struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no)
{
@@ -107,7 +112,7 @@ static void br_root_port_block(const struct net_bridge *br,
br_notice(br, "port %u(%s) tried to become root port (blocked)",
(unsigned int) p->port_no, p->dev->name);
- p->state = BR_STATE_LISTENING;
+ br_set_state(p, BR_STATE_LISTENING);
br_log_state(p);
br_ifinfo_notify(RTM_NEWLINK, p);
@@ -387,7 +392,7 @@ static void br_make_blocking(struct net_bridge_port *p)
p->state == BR_STATE_LEARNING)
br_topology_change_detection(p->br);
- p->state = BR_STATE_BLOCKING;
+ br_set_state(p, BR_STATE_BLOCKING);
br_log_state(p);
br_ifinfo_notify(RTM_NEWLINK, p);
@@ -404,13 +409,13 @@ static void br_make_forwarding(struct net_bridge_port *p)
return;
if (br->stp_enabled == BR_NO_STP || br->forward_delay == 0) {
- p->state = BR_STATE_FORWARDING;
+ br_set_state(p, BR_STATE_FORWARDING);
br_topology_change_detection(br);
del_timer(&p->forward_delay_timer);
} else if (br->stp_enabled == BR_KERNEL_STP)
- p->state = BR_STATE_LISTENING;
+ br_set_state(p, BR_STATE_LISTENING);
else
- p->state = BR_STATE_LEARNING;
+ br_set_state(p, BR_STATE_LEARNING);
br_multicast_enable_port(p);
br_log_state(p);
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 189ba1e7d851..41146872c1b4 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -37,7 +37,7 @@ void br_init_port(struct net_bridge_port *p)
{
p->port_id = br_make_port_id(p->priority, p->port_no);
br_become_designated_port(p);
- p->state = BR_STATE_BLOCKING;
+ br_set_state(p, BR_STATE_BLOCKING);
p->topology_change_ack = 0;
p->config_pending = 0;
}
@@ -100,7 +100,7 @@ void br_stp_disable_port(struct net_bridge_port *p)
wasroot = br_is_root_bridge(br);
br_become_designated_port(p);
- p->state = BR_STATE_DISABLED;
+ br_set_state(p, BR_STATE_DISABLED);
p->topology_change_ack = 0;
p->config_pending = 0;
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 558c46d19e05..4fcaa67750fd 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -87,11 +87,11 @@ static void br_forward_delay_timer_expired(unsigned long arg)
(unsigned int) p->port_no, p->dev->name);
spin_lock(&br->lock);
if (p->state == BR_STATE_LISTENING) {
- p->state = BR_STATE_LEARNING;
+ br_set_state(p, BR_STATE_LEARNING);
mod_timer(&p->forward_delay_timer,
jiffies + br->forward_delay);
} else if (p->state == BR_STATE_LEARNING) {
- p->state = BR_STATE_FORWARDING;
+ br_set_state(p, BR_STATE_FORWARDING);
if (br_is_designated_for_some_port(br))
br_topology_change_detection(br);
netif_carrier_on(br->dev);
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index cb431c6016ee..4c97fc50fb70 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -725,6 +725,22 @@ static ssize_t vlan_protocol_store(struct device *d,
return store_bridge_parm(d, buf, len, br_vlan_set_proto);
}
static DEVICE_ATTR_RW(vlan_protocol);
+
+static ssize_t default_pvid_show(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_bridge *br = to_bridge(d);
+ return sprintf(buf, "%d\n", br->default_pvid);
+}
+
+static ssize_t default_pvid_store(struct device *d,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return store_bridge_parm(d, buf, len, br_vlan_set_default_pvid);
+}
+static DEVICE_ATTR_RW(default_pvid);
#endif
static struct attribute *bridge_attrs[] = {
@@ -771,6 +787,7 @@ static struct attribute *bridge_attrs[] = {
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
&dev_attr_vlan_filtering.attr,
&dev_attr_vlan_protocol.attr,
+ &dev_attr_default_pvid.attr,
#endif
NULL
};
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 3ba57fcdcd13..150048fb99b0 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -223,7 +223,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
* See if pvid is set on this port. That tells us which
* vlan untagged or priority-tagged traffic belongs to.
*/
- if (pvid == VLAN_N_VID)
+ if (!pvid)
goto drop;
/* PVID is set on this port. Any untagged or priority-tagged
@@ -292,7 +292,7 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
if (!*vid) {
*vid = br_get_pvid(v);
- if (*vid == VLAN_N_VID)
+ if (!*vid)
return false;
return true;
@@ -499,9 +499,141 @@ err_filt:
goto unlock;
}
-void br_vlan_init(struct net_bridge *br)
+static bool vlan_default_pvid(struct net_port_vlans *pv, u16 vid)
+{
+ return pv && vid == pv->pvid && test_bit(vid, pv->untagged_bitmap);
+}
+
+static void br_vlan_disable_default_pvid(struct net_bridge *br)
+{
+ struct net_bridge_port *p;
+ u16 pvid = br->default_pvid;
+
+ /* Disable default_pvid on all ports where it is still
+ * configured.
+ */
+ if (vlan_default_pvid(br_get_vlan_info(br), pvid))
+ br_vlan_delete(br, pvid);
+
+ list_for_each_entry(p, &br->port_list, list) {
+ if (vlan_default_pvid(nbp_get_vlan_info(p), pvid))
+ nbp_vlan_delete(p, pvid);
+ }
+
+ br->default_pvid = 0;
+}
+
+static int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
+{
+ struct net_bridge_port *p;
+ u16 old_pvid;
+ int err = 0;
+ unsigned long *changed;
+
+ changed = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!changed)
+ return -ENOMEM;
+
+ old_pvid = br->default_pvid;
+
+ /* Update default_pvid config only if we do not conflict with
+ * user configuration.
+ */
+ if ((!old_pvid || vlan_default_pvid(br_get_vlan_info(br), old_pvid)) &&
+ !br_vlan_find(br, pvid)) {
+ err = br_vlan_add(br, pvid,
+ BRIDGE_VLAN_INFO_PVID |
+ BRIDGE_VLAN_INFO_UNTAGGED);
+ if (err)
+ goto out;
+ br_vlan_delete(br, old_pvid);
+ set_bit(0, changed);
+ }
+
+ list_for_each_entry(p, &br->port_list, list) {
+ /* Update default_pvid config only if we do not conflict with
+ * user configuration.
+ */
+ if ((old_pvid &&
+ !vlan_default_pvid(nbp_get_vlan_info(p), old_pvid)) ||
+ nbp_vlan_find(p, pvid))
+ continue;
+
+ err = nbp_vlan_add(p, pvid,
+ BRIDGE_VLAN_INFO_PVID |
+ BRIDGE_VLAN_INFO_UNTAGGED);
+ if (err)
+ goto err_port;
+ nbp_vlan_delete(p, old_pvid);
+ set_bit(p->port_no, changed);
+ }
+
+ br->default_pvid = pvid;
+
+out:
+ kfree(changed);
+ return err;
+
+err_port:
+ list_for_each_entry_continue_reverse(p, &br->port_list, list) {
+ if (!test_bit(p->port_no, changed))
+ continue;
+
+ if (old_pvid)
+ nbp_vlan_add(p, old_pvid,
+ BRIDGE_VLAN_INFO_PVID |
+ BRIDGE_VLAN_INFO_UNTAGGED);
+ nbp_vlan_delete(p, pvid);
+ }
+
+ if (test_bit(0, changed)) {
+ if (old_pvid)
+ br_vlan_add(br, old_pvid,
+ BRIDGE_VLAN_INFO_PVID |
+ BRIDGE_VLAN_INFO_UNTAGGED);
+ br_vlan_delete(br, pvid);
+ }
+ goto out;
+}
+
+int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val)
+{
+ u16 pvid = val;
+ int err = 0;
+
+ if (val >= VLAN_VID_MASK)
+ return -EINVAL;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ if (pvid == br->default_pvid)
+ goto unlock;
+
+ /* Only allow default pvid change when filtering is disabled */
+ if (br->vlan_enabled) {
+ pr_info_once("Please disable vlan filtering to change default_pvid\n");
+ err = -EPERM;
+ goto unlock;
+ }
+
+ if (!pvid)
+ br_vlan_disable_default_pvid(br);
+ else
+ err = __br_vlan_set_default_pvid(br, pvid);
+
+unlock:
+ rtnl_unlock();
+ return err;
+}
+
+int br_vlan_init(struct net_bridge *br)
{
br->vlan_proto = htons(ETH_P_8021Q);
+ br->default_pvid = 1;
+ return br_vlan_add(br, 1,
+ BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED);
}
/* Must be protected by RTNL.
@@ -593,3 +725,12 @@ out:
rcu_read_unlock();
return found;
}
+
+int nbp_vlan_init(struct net_bridge_port *p)
+{
+ return p->br->default_pvid ?
+ nbp_vlan_add(p, p->br->default_pvid,
+ BRIDGE_VLAN_INFO_PVID |
+ BRIDGE_VLAN_INFO_UNTAGGED) :
+ 0;
+}
diff --git a/net/core/dev.c b/net/core/dev.c
index e55c546717d4..1a90530f83ff 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2655,7 +2655,7 @@ struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t featur
return skb;
}
-struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
{
netdev_features_t features;
@@ -2720,6 +2720,30 @@ out_null:
return NULL;
}
+struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev)
+{
+ struct sk_buff *next, *head = NULL, *tail;
+
+ while (skb) {
+ next = skb->next;
+ skb->next = NULL;
+ skb = validate_xmit_skb(skb, dev);
+ if (skb) {
+ struct sk_buff *end = skb;
+
+ while (end->next)
+ end = end->next;
+ if (!head)
+ head = skb;
+ else
+ tail->next = skb;
+ tail = end;
+ }
+ skb = next;
+ }
+ return head;
+}
+
static void qdisc_pkt_len_init(struct sk_buff *skb)
{
const struct skb_shared_info *shinfo = skb_shinfo(skb);
@@ -2786,8 +2810,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_bstats_update(q, skb);
- skb = validate_xmit_skb(skb, dev);
- if (skb && sch_direct_xmit(skb, q, dev, txq, root_lock)) {
+ if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
contended = false;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 5c728aaf8d6c..443256bdcddc 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -387,6 +387,7 @@ struct pktgen_dev {
u16 queue_map_min;
u16 queue_map_max;
__u32 skb_priority; /* skb priority field */
+ unsigned int burst; /* number of duplicated packets to burst */
int node; /* Memory node */
#ifdef CONFIG_XFRM
@@ -613,6 +614,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->traffic_class)
seq_printf(seq, " traffic_class: 0x%02x\n", pkt_dev->traffic_class);
+ if (pkt_dev->burst > 1)
+ seq_printf(seq, " burst: %d\n", pkt_dev->burst);
+
if (pkt_dev->node >= 0)
seq_printf(seq, " node: %d\n", pkt_dev->node);
@@ -1124,6 +1128,16 @@ static ssize_t pktgen_if_write(struct file *file,
pkt_dev->dst_mac_count);
return count;
}
+ if (!strcmp(name, "burst")) {
+ len = num_arg(&user_buffer[i], 10, &value);
+ if (len < 0)
+ return len;
+
+ i += len;
+ pkt_dev->burst = value < 1 ? 1 : value;
+ sprintf(pg_result, "OK: burst=%d", pkt_dev->burst);
+ return count;
+ }
if (!strcmp(name, "node")) {
len = num_arg(&user_buffer[i], 10, &value);
if (len < 0)
@@ -3297,6 +3311,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
static void pktgen_xmit(struct pktgen_dev *pkt_dev)
{
+ unsigned int burst = ACCESS_ONCE(pkt_dev->burst);
struct net_device *odev = pkt_dev->odev;
struct netdev_queue *txq;
int ret;
@@ -3347,8 +3362,10 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
pkt_dev->last_ok = 0;
goto unlock;
}
- atomic_inc(&(pkt_dev->skb->users));
- ret = netdev_start_xmit(pkt_dev->skb, odev, txq, false);
+ atomic_add(burst, &pkt_dev->skb->users);
+
+xmit_more:
+ ret = netdev_start_xmit(pkt_dev->skb, odev, txq, --burst > 0);
switch (ret) {
case NETDEV_TX_OK:
@@ -3356,6 +3373,8 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
pkt_dev->sofar++;
pkt_dev->seq_num++;
pkt_dev->tx_bytes += pkt_dev->last_pkt_size;
+ if (burst > 0 && !netif_xmit_frozen_or_drv_stopped(txq))
+ goto xmit_more;
break;
case NET_XMIT_DROP:
case NET_XMIT_CN:
@@ -3374,6 +3393,8 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
atomic_dec(&(pkt_dev->skb->users));
pkt_dev->last_ok = 0;
}
+ if (unlikely(burst))
+ atomic_sub(burst, &pkt_dev->skb->users);
unlock:
HARD_TX_UNLOCK(odev, txq);
@@ -3572,6 +3593,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
pkt_dev->svlan_p = 0;
pkt_dev->svlan_cfi = 0;
pkt_dev->svlan_id = 0xffff;
+ pkt_dev->burst = 1;
pkt_dev->node = -1;
err = pktgen_setup_dev(t->net, pkt_dev, ifname);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7de3d679f3e0..9a423e2c5766 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -257,15 +257,16 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
kmemcheck_annotate_variable(shinfo->destructor_arg);
if (flags & SKB_ALLOC_FCLONE) {
- struct sk_buff *child = skb + 1;
- atomic_t *fclone_ref = (atomic_t *) (child + 1);
+ struct sk_buff_fclones *fclones;
- kmemcheck_annotate_bitfield(child, flags1);
+ fclones = container_of(skb, struct sk_buff_fclones, skb1);
+
+ kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
skb->fclone = SKB_FCLONE_ORIG;
- atomic_set(fclone_ref, 1);
+ atomic_set(&fclones->fclone_ref, 1);
- child->fclone = SKB_FCLONE_UNAVAILABLE;
- child->pfmemalloc = pfmemalloc;
+ fclones->skb2.fclone = SKB_FCLONE_FREE;
+ fclones->skb2.pfmemalloc = pfmemalloc;
}
out:
return skb;
@@ -524,8 +525,7 @@ static void skb_release_data(struct sk_buff *skb)
*/
static void kfree_skbmem(struct sk_buff *skb)
{
- struct sk_buff *other;
- atomic_t *fclone_ref;
+ struct sk_buff_fclones *fclones;
switch (skb->fclone) {
case SKB_FCLONE_UNAVAILABLE:
@@ -533,22 +533,28 @@ static void kfree_skbmem(struct sk_buff *skb)
break;
case SKB_FCLONE_ORIG:
- fclone_ref = (atomic_t *) (skb + 2);
- if (atomic_dec_and_test(fclone_ref))
- kmem_cache_free(skbuff_fclone_cache, skb);
+ fclones = container_of(skb, struct sk_buff_fclones, skb1);
+ if (atomic_dec_and_test(&fclones->fclone_ref))
+ kmem_cache_free(skbuff_fclone_cache, fclones);
break;
case SKB_FCLONE_CLONE:
- fclone_ref = (atomic_t *) (skb + 1);
- other = skb - 1;
+ fclones = container_of(skb, struct sk_buff_fclones, skb2);
- /* The clone portion is available for
- * fast-cloning again.
+ /* Warning : We must perform the atomic_dec_and_test() before
+ * setting skb->fclone back to SKB_FCLONE_FREE, otherwise
+ * skb_clone() could set clone_ref to 2 before our decrement.
+ * Anyway, if we are going to free the structure, no need to
+ * rewrite skb->fclone.
*/
- skb->fclone = SKB_FCLONE_UNAVAILABLE;
-
- if (atomic_dec_and_test(fclone_ref))
- kmem_cache_free(skbuff_fclone_cache, other);
+ if (atomic_dec_and_test(&fclones->fclone_ref)) {
+ kmem_cache_free(skbuff_fclone_cache, fclones);
+ } else {
+ /* The clone portion is available for
+ * fast-cloning again.
+ */
+ skb->fclone = SKB_FCLONE_FREE;
+ }
break;
}
}
@@ -859,17 +865,22 @@ EXPORT_SYMBOL_GPL(skb_copy_ubufs);
struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
{
- struct sk_buff *n;
+ struct sk_buff_fclones *fclones = container_of(skb,
+ struct sk_buff_fclones,
+ skb1);
+ struct sk_buff *n = &fclones->skb2;
if (skb_orphan_frags(skb, gfp_mask))
return NULL;
- n = skb + 1;
if (skb->fclone == SKB_FCLONE_ORIG &&
- n->fclone == SKB_FCLONE_UNAVAILABLE) {
- atomic_t *fclone_ref = (atomic_t *) (n + 1);
+ n->fclone == SKB_FCLONE_FREE) {
n->fclone = SKB_FCLONE_CLONE;
- atomic_inc(fclone_ref);
+ /* As our fastclone was free, clone_ref must be 1 at this point.
+ * We could use atomic_inc() here, but it is faster
+ * to set the final value.
+ */
+ atomic_set(&fclones->fclone_ref, 2);
} else {
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
@@ -3155,6 +3166,9 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
goto done;
}
+ /* switch back to head shinfo */
+ pinfo = skb_shinfo(p);
+
if (pinfo->frag_list)
goto merge;
if (skb_gro_len(p) != pinfo->gso_size)
@@ -3230,7 +3244,6 @@ done:
NAPI_GRO_CB(skb)->same_flow = 1;
return 0;
}
-EXPORT_SYMBOL_GPL(skb_gro_receive);
void __init skb_init(void)
{
@@ -3240,8 +3253,7 @@ void __init skb_init(void)
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
- (2*sizeof(struct sk_buff)) +
- sizeof(atomic_t),
+ sizeof(struct sk_buff_fclones),
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 597557254ddb..83498975165f 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -99,7 +99,7 @@ static void ccid_kmem_cache_destroy(struct kmem_cache *slab)
kmem_cache_destroy(slab);
}
-static int ccid_activate(struct ccid_operations *ccid_ops)
+static int __init ccid_activate(struct ccid_operations *ccid_ops)
{
int err = -ENOBUFS;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index de2c1e719305..1ad150ed57cf 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1082,7 +1082,7 @@ void dccp_shutdown(struct sock *sk, int how)
EXPORT_SYMBOL_GPL(dccp_shutdown);
-static inline int dccp_mib_init(void)
+static inline int __init dccp_mib_init(void)
{
dccp_statistics = alloc_percpu(struct dccp_mib);
if (!dccp_statistics)
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 6905f2d84c44..22f34cf4cb27 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -238,6 +238,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
{
}
+#ifdef CONFIG_PM_SLEEP
static int dsa_switch_suspend(struct dsa_switch *ds)
{
int i, ret = 0;
@@ -280,6 +281,7 @@ static int dsa_switch_resume(struct dsa_switch *ds)
return 0;
}
+#endif
/* link polling *************************************************************/
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 36953c84ff2d..8030489d9cbe 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -608,7 +608,6 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
p->phy->speed = 0;
p->phy->duplex = 0;
p->phy->advertising = p->phy->supported | ADVERTISED_Autoneg;
- phy_start_aneg(p->phy);
}
return slave_dev;
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 05b708bbdb0d..4715f25dfe03 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -246,7 +246,7 @@ static u32 cipso_v4_map_cache_hash(const unsigned char *key, u32 key_len)
* success, negative values on error.
*
*/
-static int cipso_v4_cache_init(void)
+static int __init cipso_v4_cache_init(void)
{
u32 iter;
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index dced89fbe480..efa70ad44906 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -7,6 +7,7 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <net/genetlink.h>
+#include <net/gue.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/udp.h>
@@ -27,6 +28,7 @@ struct fou {
};
struct fou_cfg {
+ u16 type;
u8 protocol;
struct udp_port_cfg udp_config;
};
@@ -64,15 +66,51 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
sizeof(struct udphdr));
}
+static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
+{
+ struct fou *fou = fou_from_sock(sk);
+ size_t len;
+ struct guehdr *guehdr;
+ struct udphdr *uh;
+
+ if (!fou)
+ return 1;
+
+ len = sizeof(struct udphdr) + sizeof(struct guehdr);
+ if (!pskb_may_pull(skb, len))
+ goto drop;
+
+ uh = udp_hdr(skb);
+ guehdr = (struct guehdr *)&uh[1];
+
+ len += guehdr->hlen << 2;
+ if (!pskb_may_pull(skb, len))
+ goto drop;
+
+ if (guehdr->version != 0)
+ goto drop;
+
+ if (guehdr->flags) {
+ /* No support yet */
+ goto drop;
+ }
+
+ return fou_udp_encap_recv_deliver(skb, guehdr->next_hdr, len);
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
static struct sk_buff **fou_gro_receive(struct sk_buff **head,
- struct sk_buff *skb,
- const struct net_offload **offloads)
+ struct sk_buff *skb)
{
const struct net_offload *ops;
struct sk_buff **pp = NULL;
u8 proto = NAPI_GRO_CB(skb)->proto;
+ const struct net_offload **offloads;
rcu_read_lock();
+ offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
ops = rcu_dereference(offloads[proto]);
if (!ops || !ops->callbacks.gro_receive)
goto out_unlock;
@@ -85,14 +123,15 @@ out_unlock:
return pp;
}
-static int fou_gro_complete(struct sk_buff *skb, int nhoff,
- const struct net_offload **offloads)
+static int fou_gro_complete(struct sk_buff *skb, int nhoff)
{
const struct net_offload *ops;
u8 proto = NAPI_GRO_CB(skb)->proto;
int err = -ENOSYS;
+ const struct net_offload **offloads;
rcu_read_lock();
+ offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
ops = rcu_dereference(offloads[proto]);
if (WARN_ON(!ops || !ops->callbacks.gro_complete))
goto out_unlock;
@@ -105,26 +144,110 @@ out_unlock:
return err;
}
-static struct sk_buff **fou4_gro_receive(struct sk_buff **head,
- struct sk_buff *skb)
+static struct sk_buff **gue_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
{
- return fou_gro_receive(head, skb, inet_offloads);
-}
+ const struct net_offload **offloads;
+ const struct net_offload *ops;
+ struct sk_buff **pp = NULL;
+ struct sk_buff *p;
+ u8 proto;
+ struct guehdr *guehdr;
+ unsigned int hlen, guehlen;
+ unsigned int off;
+ int flush = 1;
+
+ off = skb_gro_offset(skb);
+ hlen = off + sizeof(*guehdr);
+ guehdr = skb_gro_header_fast(skb, off);
+ if (skb_gro_header_hard(skb, hlen)) {
+ guehdr = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!guehdr))
+ goto out;
+ }
-static int fou4_gro_complete(struct sk_buff *skb, int nhoff)
-{
- return fou_gro_complete(skb, nhoff, inet_offloads);
-}
+ proto = guehdr->next_hdr;
-static struct sk_buff **fou6_gro_receive(struct sk_buff **head,
- struct sk_buff *skb)
-{
- return fou_gro_receive(head, skb, inet6_offloads);
+ rcu_read_lock();
+ offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
+ ops = rcu_dereference(offloads[proto]);
+ if (WARN_ON(!ops || !ops->callbacks.gro_receive))
+ goto out_unlock;
+
+ guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
+
+ hlen = off + guehlen;
+ if (skb_gro_header_hard(skb, hlen)) {
+ guehdr = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!guehdr))
+ goto out_unlock;
+ }
+
+ flush = 0;
+
+ for (p = *head; p; p = p->next) {
+ const struct guehdr *guehdr2;
+
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ guehdr2 = (struct guehdr *)(p->data + off);
+
+ /* Compare base GUE header to be equal (covers
+ * hlen, version, next_hdr, and flags.
+ */
+ if (guehdr->word != guehdr2->word) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+
+ /* Compare optional fields are the same. */
+ if (guehdr->hlen && memcmp(&guehdr[1], &guehdr2[1],
+ guehdr->hlen << 2)) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+ }
+
+ skb_gro_pull(skb, guehlen);
+
+ /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
+ skb_gro_postpull_rcsum(skb, guehdr, guehlen);
+
+ pp = ops->callbacks.gro_receive(head, skb);
+
+out_unlock:
+ rcu_read_unlock();
+out:
+ NAPI_GRO_CB(skb)->flush |= flush;
+
+ return pp;
}
-static int fou6_gro_complete(struct sk_buff *skb, int nhoff)
+static int gue_gro_complete(struct sk_buff *skb, int nhoff)
{
- return fou_gro_complete(skb, nhoff, inet6_offloads);
+ const struct net_offload **offloads;
+ struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
+ const struct net_offload *ops;
+ unsigned int guehlen;
+ u8 proto;
+ int err = -ENOENT;
+
+ proto = guehdr->next_hdr;
+
+ guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
+
+ rcu_read_lock();
+ offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
+ ops = rcu_dereference(offloads[proto]);
+ if (WARN_ON(!ops || !ops->callbacks.gro_complete))
+ goto out_unlock;
+
+ err = ops->callbacks.gro_complete(skb, nhoff + guehlen);
+
+out_unlock:
+ rcu_read_unlock();
+ return err;
}
static int fou_add_to_port_list(struct fou *fou)
@@ -162,6 +285,28 @@ static void fou_release(struct fou *fou)
kfree(fou);
}
+static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
+{
+ udp_sk(sk)->encap_rcv = fou_udp_recv;
+ fou->protocol = cfg->protocol;
+ fou->udp_offloads.callbacks.gro_receive = fou_gro_receive;
+ fou->udp_offloads.callbacks.gro_complete = fou_gro_complete;
+ fou->udp_offloads.port = cfg->udp_config.local_udp_port;
+ fou->udp_offloads.ipproto = cfg->protocol;
+
+ return 0;
+}
+
+static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
+{
+ udp_sk(sk)->encap_rcv = gue_udp_recv;
+ fou->udp_offloads.callbacks.gro_receive = gue_gro_receive;
+ fou->udp_offloads.callbacks.gro_complete = gue_gro_complete;
+ fou->udp_offloads.port = cfg->udp_config.local_udp_port;
+
+ return 0;
+}
+
static int fou_create(struct net *net, struct fou_cfg *cfg,
struct socket **sockp)
{
@@ -184,10 +329,24 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
sk = sock->sk;
- /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
- fou->protocol = cfg->protocol;
- fou->port = cfg->udp_config.local_udp_port;
- udp_sk(sk)->encap_rcv = fou_udp_recv;
+ fou->port = cfg->udp_config.local_udp_port;
+
+ /* Initial for fou type */
+ switch (cfg->type) {
+ case FOU_ENCAP_DIRECT:
+ err = fou_encap_init(sk, fou, cfg);
+ if (err)
+ goto error;
+ break;
+ case FOU_ENCAP_GUE:
+ err = gue_encap_init(sk, fou, cfg);
+ if (err)
+ goto error;
+ break;
+ default:
+ err = -EINVAL;
+ goto error;
+ }
udp_sk(sk)->encap_type = 1;
udp_encap_enable();
@@ -199,23 +358,6 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
sk->sk_allocation = GFP_ATOMIC;
- switch (cfg->udp_config.family) {
- case AF_INET:
- fou->udp_offloads.callbacks.gro_receive = fou4_gro_receive;
- fou->udp_offloads.callbacks.gro_complete = fou4_gro_complete;
- break;
- case AF_INET6:
- fou->udp_offloads.callbacks.gro_receive = fou6_gro_receive;
- fou->udp_offloads.callbacks.gro_complete = fou6_gro_complete;
- break;
- default:
- err = -EPFNOSUPPORT;
- goto error;
- }
-
- fou->udp_offloads.port = cfg->udp_config.local_udp_port;
- fou->udp_offloads.ipproto = cfg->protocol;
-
if (cfg->udp_config.family == AF_INET) {
err = udp_add_offload(&fou->udp_offloads);
if (err)
@@ -272,6 +414,7 @@ static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
[FOU_ATTR_PORT] = { .type = NLA_U16, },
[FOU_ATTR_AF] = { .type = NLA_U8, },
[FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
+ [FOU_ATTR_TYPE] = { .type = NLA_U8, },
};
static int parse_nl_config(struct genl_info *info,
@@ -299,6 +442,9 @@ static int parse_nl_config(struct genl_info *info,
if (info->attrs[FOU_ATTR_IPPROTO])
cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]);
+ if (info->attrs[FOU_ATTR_TYPE])
+ cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]);
+
return 0;
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 15f0e2bad7ad..2811cc18701a 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -790,7 +790,7 @@ static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net)
kfree(table);
}
-static void ip4_frags_ctl_register(void)
+static void __init ip4_frags_ctl_register(void)
{
register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table);
}
@@ -804,7 +804,7 @@ static inline void ip4_frags_ns_ctl_unregister(struct net *net)
{
}
-static inline void ip4_frags_ctl_register(void)
+static inline void __init ip4_frags_ctl_register(void)
{
}
#endif
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 829aff8bf723..0485ef18d254 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -241,6 +241,8 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
/* Push GRE header. */
gre_build_header(skb, &tpi, tunnel->tun_hlen);
+ skb_set_inner_protocol(skb, tpi.proto);
+
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index b75b47b0a223..0bb8e141eacc 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -56,6 +56,7 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/udp.h>
+#include <net/gue.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -495,6 +496,8 @@ static int ip_encap_hlen(struct ip_tunnel_encap *e)
return 0;
case TUNNEL_ENCAP_FOU:
return sizeof(struct udphdr);
+ case TUNNEL_ENCAP_GUE:
+ return sizeof(struct udphdr) + sizeof(struct guehdr);
default:
return -EINVAL;
}
@@ -546,6 +549,15 @@ static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
skb_reset_transport_header(skb);
uh = udp_hdr(skb);
+ if (e->type == TUNNEL_ENCAP_GUE) {
+ struct guehdr *guehdr = (struct guehdr *)&uh[1];
+
+ guehdr->version = 0;
+ guehdr->hlen = 0;
+ guehdr->flags = 0;
+ guehdr->next_hdr = *protocol;
+ }
+
uh->dest = e->dport;
uh->source = sport;
uh->len = htons(skb->len);
@@ -565,6 +577,7 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
case TUNNEL_ENCAP_NONE:
return 0;
case TUNNEL_ENCAP_FOU:
+ case TUNNEL_ENCAP_GUE:
return fou_build_header(skb, &t->encap, t->encap_hlen,
protocol, fl4);
default:
@@ -759,7 +772,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
df |= (inner_iph->frag_off&htons(IP_DF));
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
- + rt->dst.header_len;
+ + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
if (max_headroom > dev->needed_headroom)
dev->needed_headroom = max_headroom;
@@ -853,9 +866,14 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
- if (!t && (cmd == SIOCADDTUNNEL)) {
- t = ip_tunnel_create(net, itn, p);
- err = PTR_ERR_OR_ZERO(t);
+ if (cmd == SIOCADDTUNNEL) {
+ if (!t) {
+ t = ip_tunnel_create(net, itn, p);
+ err = PTR_ERR_OR_ZERO(t);
+ break;
+ }
+
+ err = -EEXIST;
break;
}
if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index bfec31df8b21..ea88ab3102a8 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -224,6 +224,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (IS_ERR(skb))
goto out;
+ skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+
ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
return NETDEV_TX_OK;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d4bd68dcdc39..793c0bb8c4fd 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -746,7 +746,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
}
n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
- if (n) {
+ if (!IS_ERR(n)) {
if (!(n->nud_state & NUD_VALID)) {
neigh_event_send(n, NULL);
} else {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index cf5e508e1ef5..26a6f113f00c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2693,7 +2693,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
#endif
case TCP_USER_TIMEOUT:
- /* Cap the max timeout in ms TCP will retry/retrans
+ /* Cap the max time in ms TCP will retry or probe the window
* before giving up and aborting (ETIMEDOUT) a connection.
*/
if (val < 0)
@@ -3172,7 +3172,7 @@ static int __init set_thash_entries(char *str)
}
__setup("thash_entries=", set_thash_entries);
-static void tcp_init_mem(void)
+static void __init tcp_init_mem(void)
{
unsigned long limit = nr_free_buffer_pages() / 8;
limit = max(limit, 128UL);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ee567e9e98c3..8d4eac793700 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2110,10 +2110,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
static bool skb_still_in_host_queue(const struct sock *sk,
const struct sk_buff *skb)
{
- const struct sk_buff *fclone = skb + 1;
-
- if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
- fclone->fclone == SKB_FCLONE_CLONE)) {
+ if (unlikely(skb_fclone_busy(skb))) {
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
return true;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b24360f6e293..9b21ae8b2e31 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -52,7 +52,7 @@ static void tcp_write_err(struct sock *sk)
* limit.
* 2. If we have strong memory pressure.
*/
-static int tcp_out_of_resources(struct sock *sk, int do_reset)
+static int tcp_out_of_resources(struct sock *sk, bool do_reset)
{
struct tcp_sock *tp = tcp_sk(sk);
int shift = 0;
@@ -72,7 +72,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset)
if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
/* 2. Window is closed. */
(!tp->snd_wnd && !tp->packets_out))
- do_reset = 1;
+ do_reset = true;
if (do_reset)
tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_done(sk);
@@ -270,40 +270,41 @@ static void tcp_probe_timer(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int max_probes;
+ u32 start_ts;
if (tp->packets_out || !tcp_send_head(sk)) {
icsk->icsk_probes_out = 0;
return;
}
- /* *WARNING* RFC 1122 forbids this
- *
- * It doesn't AFAIK, because we kill the retransmit timer -AK
- *
- * FIXME: We ought not to do it, Solaris 2.5 actually has fixing
- * this behaviour in Solaris down as a bug fix. [AC]
- *
- * Let me to explain. icsk_probes_out is zeroed by incoming ACKs
- * even if they advertise zero window. Hence, connection is killed only
- * if we received no ACKs for normal connection timeout. It is not killed
- * only because window stays zero for some time, window may be zero
- * until armageddon and even later. We are in full accordance
- * with RFCs, only probe timer combines both retransmission timeout
- * and probe timeout in one bottle. --ANK
+ /* RFC 1122 4.2.2.17 requires the sender to stay open indefinitely as
+ * long as the receiver continues to respond probes. We support this by
+ * default and reset icsk_probes_out with incoming ACKs. But if the
+ * socket is orphaned or the user specifies TCP_USER_TIMEOUT, we
+ * kill the socket when the retry count and the time exceeds the
+ * corresponding system limit. We also implement similar policy when
+ * we use RTO to probe window in tcp_retransmit_timer().
*/
- max_probes = sysctl_tcp_retries2;
+ start_ts = tcp_skb_timestamp(tcp_send_head(sk));
+ if (!start_ts)
+ skb_mstamp_get(&tcp_send_head(sk)->skb_mstamp);
+ else if (icsk->icsk_user_timeout &&
+ (s32)(tcp_time_stamp - start_ts) > icsk->icsk_user_timeout)
+ goto abort;
+ max_probes = sysctl_tcp_retries2;
if (sock_flag(sk, SOCK_DEAD)) {
const int alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
max_probes = tcp_orphan_retries(sk, alive);
-
- if (tcp_out_of_resources(sk, alive || icsk->icsk_probes_out <= max_probes))
+ if (!alive && icsk->icsk_backoff >= max_probes)
+ goto abort;
+ if (tcp_out_of_resources(sk, true))
return;
}
if (icsk->icsk_probes_out > max_probes) {
- tcp_write_err(sk);
+abort: tcp_write_err(sk);
} else {
/* Only send another probe if we didn't close things up. */
tcp_send_probe0(sk);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 19ebe6a39ddc..507310ef4b56 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -25,8 +25,11 @@ struct udp_offload_priv {
struct udp_offload_priv __rcu *next;
};
-struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
- netdev_features_t features)
+static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
+ netdev_features_t features,
+ struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
+ netdev_features_t features),
+ __be16 new_protocol)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
u16 mac_offset = skb->mac_header;
@@ -48,7 +51,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
skb_reset_mac_header(skb);
skb_set_network_header(skb, skb_inner_network_offset(skb));
skb->mac_len = skb_inner_network_offset(skb);
- skb->protocol = htons(ETH_P_TEB);
+ skb->protocol = new_protocol;
need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
if (need_csum)
@@ -56,7 +59,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
/* segment inner packet. */
enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
- segs = skb_mac_gso_segment(skb, enc_features);
+ segs = gso_inner_segment(skb, enc_features);
if (IS_ERR_OR_NULL(segs)) {
skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
mac_len);
@@ -101,6 +104,44 @@ out:
return segs;
}
+struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
+ netdev_features_t features,
+ bool is_ipv6)
+{
+ __be16 protocol = skb->protocol;
+ const struct net_offload **offloads;
+ const struct net_offload *ops;
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
+ netdev_features_t features);
+
+ rcu_read_lock();
+
+ switch (skb->inner_protocol_type) {
+ case ENCAP_TYPE_ETHER:
+ protocol = skb->inner_protocol;
+ gso_inner_segment = skb_mac_gso_segment;
+ break;
+ case ENCAP_TYPE_IPPROTO:
+ offloads = is_ipv6 ? inet6_offloads : inet_offloads;
+ ops = rcu_dereference(offloads[skb->inner_ipproto]);
+ if (!ops || !ops->callbacks.gso_segment)
+ goto out_unlock;
+ gso_inner_segment = ops->callbacks.gso_segment;
+ break;
+ default:
+ goto out_unlock;
+ }
+
+ segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment,
+ protocol);
+
+out_unlock:
+ rcu_read_unlock();
+
+ return segs;
+}
+
static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -113,7 +154,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (skb->encapsulation &&
(skb_shinfo(skb)->gso_type &
(SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
- segs = skb_udp_tunnel_segment(skb, features);
+ segs = skb_udp_tunnel_segment(skb, features, false);
goto out;
}
@@ -293,6 +334,7 @@ static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
inet_gro_compute_pseudo);
skip:
+ NAPI_GRO_CB(skb)->is_ipv6 = 0;
return udp_gro_receive(head, skb, uh);
flush:
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e189480f8fd6..725c763270a0 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4783,10 +4783,11 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
if (ip6_del_rt(ifp->rt))
dst_free(&ifp->rt->dst);
+
+ rt_genid_bump_ipv6(net);
break;
}
atomic_inc(&net->ipv6.dev_addr_genid);
- rt_genid_bump_ipv6(net);
}
static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index e6960457f625..98cc4cd570e2 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -8,6 +8,13 @@
#include <net/addrconf.h>
#include <net/ip.h>
+/* if ipv6 module registers this function is used by xfrm to force all
+ * sockets to relookup their nodes - this is fairly expensive, be
+ * careful
+ */
+void (*__fib6_flush_trees)(struct net *);
+EXPORT_SYMBOL(__fib6_flush_trees);
+
#define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16)
static inline unsigned int ipv6_addr_scope2type(unsigned int scope)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 76b7f5ee8f4c..97b9fa8de377 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1605,6 +1605,24 @@ static void fib6_prune_clones(struct net *net, struct fib6_node *fn)
fib6_clean_tree(net, fn, fib6_prune_clone, 1, NULL);
}
+static int fib6_update_sernum(struct rt6_info *rt, void *arg)
+{
+ __u32 sernum = *(__u32 *)arg;
+
+ if (rt->rt6i_node &&
+ rt->rt6i_node->fn_sernum != sernum)
+ rt->rt6i_node->fn_sernum = sernum;
+
+ return 0;
+}
+
+static void fib6_flush_trees(struct net *net)
+{
+ __u32 new_sernum = fib6_new_sernum();
+
+ fib6_clean_all(net, fib6_update_sernum, &new_sernum);
+}
+
/*
* Garbage collection
*/
@@ -1788,6 +1806,8 @@ int __init fib6_init(void)
NULL);
if (ret)
goto out_unregister_subsys;
+
+ __fib6_flush_trees = fib6_flush_trees;
out:
return ret;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 5f19dfbc4c6a..74b677916a70 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -314,6 +314,8 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE);
+ if (t && create)
+ return NULL;
if (t || !create)
return t;
@@ -616,6 +618,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
int err = -1;
u8 proto;
struct sk_buff *new_skb;
+ __be16 protocol;
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -732,8 +735,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
ipv6h->daddr = fl6->daddr;
((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags;
- ((__be16 *)(ipv6h + 1))[1] = (dev->type == ARPHRD_ETHER) ?
- htons(ETH_P_TEB) : skb->protocol;
+ protocol = (dev->type == ARPHRD_ETHER) ?
+ htons(ETH_P_TEB) : skb->protocol;
+ ((__be16 *)(ipv6h + 1))[1] = protocol;
if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
__be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4);
@@ -754,6 +758,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
}
}
+ skb_set_inner_protocol(skb, protocol);
+
ip6tunnel_xmit(skb, dev);
if (ndst)
ip6_tnl_dst_store(tunnel, ndst);
@@ -1724,4 +1730,5 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
MODULE_ALIAS_RTNL_LINK("ip6gre");
+MODULE_ALIAS_RTNL_LINK("ip6gretap");
MODULE_ALIAS_NETDEV("ip6gre0");
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index e01bd0399297..d3e8888ad611 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -364,8 +364,12 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net,
(t = rtnl_dereference(*tp)) != NULL;
tp = &t->next) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_equal(remote, &t->parms.raddr))
+ ipv6_addr_equal(remote, &t->parms.raddr)) {
+ if (create)
+ return NULL;
+
return t;
+ }
}
if (!create)
return NULL;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 7f52fd9fa7b0..5833a2244467 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -253,8 +253,12 @@ static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p,
(t = rtnl_dereference(*tp)) != NULL;
tp = &t->next) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_equal(remote, &t->parms.raddr))
+ ipv6_addr_equal(remote, &t->parms.raddr)) {
+ if (create)
+ return NULL;
+
return t;
+ }
}
if (!create)
return NULL;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f74b0417bd60..a318dd89b6d9 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -314,7 +314,6 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
- rt->rt6i_genid = rt_genid_ipv6(net);
INIT_LIST_HEAD(&rt->rt6i_siblings);
}
return rt;
@@ -1096,9 +1095,6 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
* DST_OBSOLETE_FORCE_CHK which forces validation calls down
* into this function always.
*/
- if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
- return NULL;
-
if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
return NULL;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index db75809ab843..0d4e27466f82 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -982,6 +982,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
goto tx_error;
}
+ skb_set_inner_ipproto(skb, IPPROTO_IPV6);
+
err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr,
protocol, tos, ttl, df,
!net_eq(tunnel->net, dev_net(dev)));
@@ -1006,6 +1008,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (IS_ERR(skb))
goto out;
+ skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+
ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
return NETDEV_TX_OK;
out:
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 212ebfc7973f..6b8f543f6ac6 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -58,7 +58,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
if (skb->encapsulation && skb_shinfo(skb)->gso_type &
(SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
- segs = skb_udp_tunnel_segment(skb, features);
+ segs = skb_udp_tunnel_segment(skb, features, true);
else {
const struct ipv6hdr *ipv6h;
struct udphdr *uh;
@@ -140,6 +140,7 @@ static struct sk_buff **udp6_gro_receive(struct sk_buff **head,
ip6_gro_compute_pseudo);
skip:
+ NAPI_GRO_CB(skb)->is_ipv6 = 1;
return udp_gro_receive(head, skb, uh);
flush:
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 608d18986923..ae5096ab65eb 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -856,6 +856,7 @@ config NETFILTER_XT_TARGET_TPROXY
tristate '"TPROXY" target transparent proxying support'
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
+ depends on (IPV6 || IPV6=n)
depends on IP_NF_MANGLE
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index f77d3f7f22b5..6c5a915cfa75 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -222,6 +222,51 @@ replay:
}
}
+struct nfnl_err {
+ struct list_head head;
+ struct nlmsghdr *nlh;
+ int err;
+};
+
+static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err)
+{
+ struct nfnl_err *nfnl_err;
+
+ nfnl_err = kmalloc(sizeof(struct nfnl_err), GFP_KERNEL);
+ if (nfnl_err == NULL)
+ return -ENOMEM;
+
+ nfnl_err->nlh = nlh;
+ nfnl_err->err = err;
+ list_add_tail(&nfnl_err->head, list);
+
+ return 0;
+}
+
+static void nfnl_err_del(struct nfnl_err *nfnl_err)
+{
+ list_del(&nfnl_err->head);
+ kfree(nfnl_err);
+}
+
+static void nfnl_err_reset(struct list_head *err_list)
+{
+ struct nfnl_err *nfnl_err, *next;
+
+ list_for_each_entry_safe(nfnl_err, next, err_list, head)
+ nfnl_err_del(nfnl_err);
+}
+
+static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb)
+{
+ struct nfnl_err *nfnl_err, *next;
+
+ list_for_each_entry_safe(nfnl_err, next, err_list, head) {
+ netlink_ack(skb, nfnl_err->nlh, nfnl_err->err);
+ nfnl_err_del(nfnl_err);
+ }
+}
+
static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
u_int16_t subsys_id)
{
@@ -230,6 +275,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
const struct nfnetlink_subsystem *ss;
const struct nfnl_callback *nc;
bool success = true, done = false;
+ static LIST_HEAD(err_list);
int err;
if (subsys_id >= NFNL_SUBSYS_COUNT)
@@ -287,6 +333,7 @@ replay:
type = nlh->nlmsg_type;
if (type == NFNL_MSG_BATCH_BEGIN) {
/* Malformed: Batch begin twice */
+ nfnl_err_reset(&err_list);
success = false;
goto done;
} else if (type == NFNL_MSG_BATCH_END) {
@@ -333,6 +380,7 @@ replay:
* original skb.
*/
if (err == -EAGAIN) {
+ nfnl_err_reset(&err_list);
ss->abort(oskb);
nfnl_unlock(subsys_id);
kfree_skb(nskb);
@@ -341,11 +389,24 @@ replay:
}
ack:
if (nlh->nlmsg_flags & NLM_F_ACK || err) {
+ /* Errors are delivered once the full batch has been
+ * processed, this avoids that the same error is
+ * reported several times when replaying the batch.
+ */
+ if (nfnl_err_add(&err_list, nlh, err) < 0) {
+ /* We failed to enqueue an error, reset the
+ * list of errors and send OOM to userspace
+ * pointing to the batch header.
+ */
+ nfnl_err_reset(&err_list);
+ netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM);
+ success = false;
+ goto done;
+ }
/* We don't stop processing the batch on errors, thus,
* userspace gets all the errors that the batch
* triggers.
*/
- netlink_ack(skb, nlh, err);
if (err)
success = false;
}
@@ -361,6 +422,7 @@ done:
else
ss->abort(oskb);
+ nfnl_err_deliver(&err_list, oskb);
nfnl_unlock(subsys_id);
kfree_skb(nskb);
}
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 28fb8f38e6ba..8892b7b6184a 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -180,15 +180,17 @@ static int nft_hash_init(const struct nft_set *set,
static void nft_hash_destroy(const struct nft_set *set)
{
const struct rhashtable *priv = nft_set_priv(set);
- const struct bucket_table *tbl;
+ const struct bucket_table *tbl = priv->tbl;
struct nft_hash_elem *he, *next;
unsigned int i;
- tbl = rht_dereference(priv->tbl, priv);
- for (i = 0; i < tbl->size; i++)
- rht_for_each_entry_safe(he, next, tbl->buckets[i], priv, node)
+ for (i = 0; i < tbl->size; i++) {
+ for (he = rht_entry(tbl->buckets[i], struct nft_hash_elem, node);
+ he != NULL; he = next) {
+ next = rht_entry(he->node.next, struct nft_hash_elem, node);
nft_hash_elem_destroy(set, he);
-
+ }
+ }
rhashtable_destroy(priv);
}
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index e1836ff88199..46214f245665 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -234,13 +234,11 @@ static void nft_rbtree_destroy(const struct nft_set *set)
struct nft_rbtree_elem *rbe;
struct rb_node *node;
- spin_lock_bh(&nft_rbtree_lock);
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
nft_rbtree_elem_destroy(set, rbe);
}
- spin_unlock_bh(&nft_rbtree_lock);
}
static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 8d0e83d6903e..30f10fb07f4a 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -254,10 +254,15 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
cp->tp = tp;
if (p->perfect) {
+ int i;
+
cp->perfect = kmemdup(p->perfect,
sizeof(*r) * cp->hash, GFP_KERNEL);
if (!cp->perfect)
goto errout;
+ for (i = 0; i < cp->hash; i++)
+ tcf_exts_init(&cp->perfect[i].exts,
+ TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
balloc = 1;
}
cp->h = p->h;
@@ -353,6 +358,9 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
f = kzalloc(sizeof(*f), GFP_KERNEL);
if (!f)
goto errout_alloc;
+ f->key = handle;
+ tcindex_filter_result_init(&f->result);
+ f->next = NULL;
}
if (tb[TCA_TCINDEX_CLASSID]) {
@@ -376,9 +384,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
struct tcindex_filter *nfp;
struct tcindex_filter __rcu **fp;
- f->key = handle;
- f->result = new_filter_result;
- f->next = NULL;
+ tcf_exts_change(tp, &f->result.exts, &r->exts);
fp = cp->h + (handle % cp->hash);
for (nfp = rtnl_dereference(*fp);
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 4be3ebf46d67..0472909bb014 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -358,7 +358,6 @@ static int u32_destroy_key(struct tcf_proto *tp,
struct tc_u_knode *n,
bool free_pf)
{
- tcf_unbind_filter(tp, &n->res);
tcf_exts_destroy(&n->exts);
if (n->ht_down)
n->ht_down->refcnt--;
@@ -416,6 +415,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
if (pkp == key) {
RCU_INIT_POINTER(*kp, key->next);
+ tcf_unbind_filter(tp, &key->res);
call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
return 0;
}
@@ -425,7 +425,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
return 0;
}
-static void u32_clear_hnode(struct tc_u_hnode *ht)
+static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
{
struct tc_u_knode *n;
unsigned int h;
@@ -434,6 +434,7 @@ static void u32_clear_hnode(struct tc_u_hnode *ht)
while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
RCU_INIT_POINTER(ht->ht[h],
rtnl_dereference(n->next));
+ tcf_unbind_filter(tp, &n->res);
call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
}
}
@@ -447,7 +448,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
WARN_ON(ht->refcnt);
- u32_clear_hnode(ht);
+ u32_clear_hnode(tp, ht);
hn = &tp_c->hlist;
for (phn = rtnl_dereference(*hn);
@@ -482,7 +483,7 @@ static void u32_destroy(struct tcf_proto *tp)
ht;
ht = rtnl_dereference(ht->next)) {
ht->refcnt--;
- u32_clear_hnode(ht);
+ u32_clear_hnode(tp, ht);
}
while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
@@ -731,6 +732,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
}
u32_replace_knode(tp, tp_c, new);
+ tcf_unbind_filter(tp, &n->res);
call_rcu(&n->rcu, u32_delete_key_rcu);
return 0;
}
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 3a633debb6df..ad57f4444b9c 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -526,9 +526,11 @@ pop_stack:
match_idx = stack[--stackp];
cur_match = tcf_em_get_match(tree, match_idx);
- if (tcf_em_early_end(cur_match, res))
+ if (tcf_em_early_end(cur_match, res)) {
+ if (tcf_em_is_inverted(cur_match))
+ res = !res;
goto pop_stack;
- else {
+ } else {
match_idx++;
goto proceed;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index aa8329508dba..c79a226cc25c 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -578,8 +578,10 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
timer);
+ rcu_read_lock();
qdisc_unthrottled(wd->qdisc);
__netif_schedule(qdisc_root(wd->qdisc));
+ rcu_read_unlock();
return HRTIMER_NORESTART;
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7c8e5d73d433..2b349a4de3c8 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -56,11 +56,34 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
return 0;
}
-static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
+static void try_bulk_dequeue_skb(struct Qdisc *q,
+ struct sk_buff *skb,
+ const struct netdev_queue *txq)
+{
+ int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
+
+ while (bytelimit > 0) {
+ struct sk_buff *nskb = q->dequeue(q);
+
+ if (!nskb)
+ break;
+
+ bytelimit -= nskb->len; /* covers GSO len */
+ skb->next = nskb;
+ skb = nskb;
+ }
+ skb->next = NULL;
+}
+
+/* Note that dequeue_skb can possibly return a SKB list (via skb->next).
+ * A requeued skb (via q->gso_skb) can also be a SKB list.
+ */
+static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate)
{
struct sk_buff *skb = q->gso_skb;
const struct netdev_queue *txq = q->dev_queue;
+ *validate = true;
if (unlikely(skb)) {
/* check the reason of requeuing without tx lock first */
txq = skb_get_tx_queue(txq->dev, skb);
@@ -69,14 +92,16 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
q->q.qlen--;
} else
skb = NULL;
+ /* skb in gso_skb were already validated */
+ *validate = false;
} else {
- if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) {
+ if (!(q->flags & TCQ_F_ONETXQUEUE) ||
+ !netif_xmit_frozen_or_stopped(txq)) {
skb = q->dequeue(q);
- if (skb)
- skb = validate_xmit_skb(skb, qdisc_dev(q));
+ if (skb && qdisc_may_bulk(q))
+ try_bulk_dequeue_skb(q, skb, txq);
}
}
-
return skb;
}
@@ -120,19 +145,24 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
*/
int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
struct net_device *dev, struct netdev_queue *txq,
- spinlock_t *root_lock)
+ spinlock_t *root_lock, bool validate)
{
int ret = NETDEV_TX_BUSY;
/* And release qdisc */
spin_unlock(root_lock);
- HARD_TX_LOCK(dev, txq, smp_processor_id());
- if (!netif_xmit_frozen_or_stopped(txq))
- skb = dev_hard_start_xmit(skb, dev, txq, &ret);
+ /* Note that we validate skb (GSO, checksum, ...) outside of locks */
+ if (validate)
+ skb = validate_xmit_skb_list(skb, dev);
- HARD_TX_UNLOCK(dev, txq);
+ if (skb) {
+ HARD_TX_LOCK(dev, txq, smp_processor_id());
+ if (!netif_xmit_frozen_or_stopped(txq))
+ skb = dev_hard_start_xmit(skb, dev, txq, &ret);
+ HARD_TX_UNLOCK(dev, txq);
+ }
spin_lock(root_lock);
if (dev_xmit_complete(ret)) {
@@ -181,9 +211,10 @@ static inline int qdisc_restart(struct Qdisc *q)
struct net_device *dev;
spinlock_t *root_lock;
struct sk_buff *skb;
+ bool validate;
/* Dequeue packet */
- skb = dequeue_skb(q);
+ skb = dequeue_skb(q, &validate);
if (unlikely(!skb))
return 0;
@@ -193,7 +224,7 @@ static inline int qdisc_restart(struct Qdisc *q)
dev = qdisc_dev(q);
txq = skb_get_tx_queue(dev, skb);
- return sch_direct_xmit(skb, q, dev, txq, root_lock);
+ return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
}
void __qdisc_run(struct Qdisc *q)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f623dca6ce30..4c4e457e7888 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1961,10 +1961,8 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
struct xfrm_policy *pol = xdst->pols[0];
struct xfrm_policy_queue *pq = &pol->polq;
- const struct sk_buff *fclone = skb + 1;
- if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
- fclone->fclone == SKB_FCLONE_CLONE)) {
+ if (unlikely(skb_fclone_busy(skb))) {
kfree_skb(skb);
return 0;
}