diff options
Diffstat (limited to 'net')
46 files changed, 772 insertions, 178 deletions
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 659cac15c0df..ffd379db5938 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -88,12 +88,17 @@ out: static int br_dev_init(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); + int err; br->stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!br->stats) return -ENOMEM; - return 0; + err = br_vlan_init(br); + if (err) + free_percpu(br->stats); + + return err; } static int br_dev_open(struct net_device *dev) @@ -389,5 +394,4 @@ void br_dev_setup(struct net_device *dev) br_netfilter_rtable_init(br); br_stp_timer_init(br); br_multicast_init(br); - br_vlan_init(br); } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index a9f54a9b6690..ed307db7a12b 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -332,7 +332,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, p->port_no = index; p->flags = BR_LEARNING | BR_FLOOD; br_init_port(p); - p->state = BR_STATE_DISABLED; + br_set_state(p, BR_STATE_DISABLED); br_stp_port_timer_init(p); br_multicast_add_port(p); @@ -500,6 +500,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (br_fdb_insert(br, p, dev->dev_addr, 0)) netdev_err(dev, "failed insert local address bridge forwarding table\n"); + if (nbp_vlan_init(p)) + netdev_err(dev, "failed to initialize vlan filtering on this port\n"); + spin_lock_bh(&br->lock); changed_addr = br_stp_recalculate_bridge_id(br); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 0fa66b83685f..2ff9706647f2 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -301,7 +301,7 @@ static int br_set_port_state(struct net_bridge_port *p, u8 state) (!netif_oper_up(p->dev) && state != BR_STATE_DISABLED)) return -ENETDOWN; - p->state = state; + br_set_state(p, state); br_log_state(p); br_port_state_selection(p->br); return 0; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f53592fc3ef9..d8cbaa694227 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -299,6 +299,7 @@ struct net_bridge #ifdef CONFIG_BRIDGE_VLAN_FILTERING u8 vlan_enabled; __be16 vlan_proto; + u16 default_pvid; struct net_port_vlans __rcu *vlan_info; #endif }; @@ -604,11 +605,13 @@ bool br_vlan_find(struct net_bridge *br, u16 vid); void br_recalculate_fwd_mask(struct net_bridge *br); int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); int br_vlan_set_proto(struct net_bridge *br, unsigned long val); -void br_vlan_init(struct net_bridge *br); +int br_vlan_init(struct net_bridge *br); +int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val); int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); void nbp_vlan_flush(struct net_bridge_port *port); bool nbp_vlan_find(struct net_bridge_port *port, u16 vid); +int nbp_vlan_init(struct net_bridge_port *port); static inline struct net_port_vlans *br_get_vlan_info( const struct net_bridge *br) @@ -641,11 +644,11 @@ static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid) static inline u16 br_get_pvid(const struct net_port_vlans *v) { - /* Return just the VID if it is set, or VLAN_N_VID (invalid vid) if - * vid wasn't set - */ + if (!v) + return 0; + smp_rmb(); - return v->pvid ?: VLAN_N_VID; + return v->pvid; } static inline int br_vlan_enabled(struct net_bridge *br) @@ -704,8 +707,9 @@ static inline void br_recalculate_fwd_mask(struct net_bridge *br) { } -static inline void br_vlan_init(struct net_bridge *br) +static inline int br_vlan_init(struct net_bridge *br) { + return 0; } static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) @@ -738,13 +742,18 @@ static inline bool nbp_vlan_find(struct net_bridge_port *port, u16 vid) return false; } +static inline int nbp_vlan_init(struct net_bridge_port *port) +{ + return 0; +} + static inline u16 br_vlan_get_tag(const struct sk_buff *skb, u16 *tag) { return 0; } static inline u16 br_get_pvid(const struct net_port_vlans *v) { - return VLAN_N_VID; /* Returns invalid vid */ + return 0; } static inline int br_vlan_enabled(struct net_bridge *br) @@ -766,6 +775,7 @@ static inline void br_nf_core_fini(void) {} /* br_stp.c */ void br_log_state(const struct net_bridge_port *p); +void br_set_state(struct net_bridge_port *p, unsigned int state); struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no); void br_init_port(struct net_bridge_port *p); void br_become_designated_port(struct net_bridge_port *p); diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 3c86f0538cbb..2b047bcf42a4 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -36,6 +36,11 @@ void br_log_state(const struct net_bridge_port *p) br_port_state_names[p->state]); } +void br_set_state(struct net_bridge_port *p, unsigned int state) +{ + p->state = state; +} + /* called under bridge lock */ struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no) { @@ -107,7 +112,7 @@ static void br_root_port_block(const struct net_bridge *br, br_notice(br, "port %u(%s) tried to become root port (blocked)", (unsigned int) p->port_no, p->dev->name); - p->state = BR_STATE_LISTENING; + br_set_state(p, BR_STATE_LISTENING); br_log_state(p); br_ifinfo_notify(RTM_NEWLINK, p); @@ -387,7 +392,7 @@ static void br_make_blocking(struct net_bridge_port *p) p->state == BR_STATE_LEARNING) br_topology_change_detection(p->br); - p->state = BR_STATE_BLOCKING; + br_set_state(p, BR_STATE_BLOCKING); br_log_state(p); br_ifinfo_notify(RTM_NEWLINK, p); @@ -404,13 +409,13 @@ static void br_make_forwarding(struct net_bridge_port *p) return; if (br->stp_enabled == BR_NO_STP || br->forward_delay == 0) { - p->state = BR_STATE_FORWARDING; + br_set_state(p, BR_STATE_FORWARDING); br_topology_change_detection(br); del_timer(&p->forward_delay_timer); } else if (br->stp_enabled == BR_KERNEL_STP) - p->state = BR_STATE_LISTENING; + br_set_state(p, BR_STATE_LISTENING); else - p->state = BR_STATE_LEARNING; + br_set_state(p, BR_STATE_LEARNING); br_multicast_enable_port(p); br_log_state(p); diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 189ba1e7d851..41146872c1b4 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -37,7 +37,7 @@ void br_init_port(struct net_bridge_port *p) { p->port_id = br_make_port_id(p->priority, p->port_no); br_become_designated_port(p); - p->state = BR_STATE_BLOCKING; + br_set_state(p, BR_STATE_BLOCKING); p->topology_change_ack = 0; p->config_pending = 0; } @@ -100,7 +100,7 @@ void br_stp_disable_port(struct net_bridge_port *p) wasroot = br_is_root_bridge(br); br_become_designated_port(p); - p->state = BR_STATE_DISABLED; + br_set_state(p, BR_STATE_DISABLED); p->topology_change_ack = 0; p->config_pending = 0; diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 558c46d19e05..4fcaa67750fd 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -87,11 +87,11 @@ static void br_forward_delay_timer_expired(unsigned long arg) (unsigned int) p->port_no, p->dev->name); spin_lock(&br->lock); if (p->state == BR_STATE_LISTENING) { - p->state = BR_STATE_LEARNING; + br_set_state(p, BR_STATE_LEARNING); mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); } else if (p->state == BR_STATE_LEARNING) { - p->state = BR_STATE_FORWARDING; + br_set_state(p, BR_STATE_FORWARDING); if (br_is_designated_for_some_port(br)) br_topology_change_detection(br); netif_carrier_on(br->dev); diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index cb431c6016ee..4c97fc50fb70 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -725,6 +725,22 @@ static ssize_t vlan_protocol_store(struct device *d, return store_bridge_parm(d, buf, len, br_vlan_set_proto); } static DEVICE_ATTR_RW(vlan_protocol); + +static ssize_t default_pvid_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br->default_pvid); +} + +static ssize_t default_pvid_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_vlan_set_default_pvid); +} +static DEVICE_ATTR_RW(default_pvid); #endif static struct attribute *bridge_attrs[] = { @@ -771,6 +787,7 @@ static struct attribute *bridge_attrs[] = { #ifdef CONFIG_BRIDGE_VLAN_FILTERING &dev_attr_vlan_filtering.attr, &dev_attr_vlan_protocol.attr, + &dev_attr_default_pvid.attr, #endif NULL }; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 3ba57fcdcd13..150048fb99b0 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -223,7 +223,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, * See if pvid is set on this port. That tells us which * vlan untagged or priority-tagged traffic belongs to. */ - if (pvid == VLAN_N_VID) + if (!pvid) goto drop; /* PVID is set on this port. Any untagged or priority-tagged @@ -292,7 +292,7 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) if (!*vid) { *vid = br_get_pvid(v); - if (*vid == VLAN_N_VID) + if (!*vid) return false; return true; @@ -499,9 +499,141 @@ err_filt: goto unlock; } -void br_vlan_init(struct net_bridge *br) +static bool vlan_default_pvid(struct net_port_vlans *pv, u16 vid) +{ + return pv && vid == pv->pvid && test_bit(vid, pv->untagged_bitmap); +} + +static void br_vlan_disable_default_pvid(struct net_bridge *br) +{ + struct net_bridge_port *p; + u16 pvid = br->default_pvid; + + /* Disable default_pvid on all ports where it is still + * configured. + */ + if (vlan_default_pvid(br_get_vlan_info(br), pvid)) + br_vlan_delete(br, pvid); + + list_for_each_entry(p, &br->port_list, list) { + if (vlan_default_pvid(nbp_get_vlan_info(p), pvid)) + nbp_vlan_delete(p, pvid); + } + + br->default_pvid = 0; +} + +static int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid) +{ + struct net_bridge_port *p; + u16 old_pvid; + int err = 0; + unsigned long *changed; + + changed = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long), + GFP_KERNEL); + if (!changed) + return -ENOMEM; + + old_pvid = br->default_pvid; + + /* Update default_pvid config only if we do not conflict with + * user configuration. + */ + if ((!old_pvid || vlan_default_pvid(br_get_vlan_info(br), old_pvid)) && + !br_vlan_find(br, pvid)) { + err = br_vlan_add(br, pvid, + BRIDGE_VLAN_INFO_PVID | + BRIDGE_VLAN_INFO_UNTAGGED); + if (err) + goto out; + br_vlan_delete(br, old_pvid); + set_bit(0, changed); + } + + list_for_each_entry(p, &br->port_list, list) { + /* Update default_pvid config only if we do not conflict with + * user configuration. + */ + if ((old_pvid && + !vlan_default_pvid(nbp_get_vlan_info(p), old_pvid)) || + nbp_vlan_find(p, pvid)) + continue; + + err = nbp_vlan_add(p, pvid, + BRIDGE_VLAN_INFO_PVID | + BRIDGE_VLAN_INFO_UNTAGGED); + if (err) + goto err_port; + nbp_vlan_delete(p, old_pvid); + set_bit(p->port_no, changed); + } + + br->default_pvid = pvid; + +out: + kfree(changed); + return err; + +err_port: + list_for_each_entry_continue_reverse(p, &br->port_list, list) { + if (!test_bit(p->port_no, changed)) + continue; + + if (old_pvid) + nbp_vlan_add(p, old_pvid, + BRIDGE_VLAN_INFO_PVID | + BRIDGE_VLAN_INFO_UNTAGGED); + nbp_vlan_delete(p, pvid); + } + + if (test_bit(0, changed)) { + if (old_pvid) + br_vlan_add(br, old_pvid, + BRIDGE_VLAN_INFO_PVID | + BRIDGE_VLAN_INFO_UNTAGGED); + br_vlan_delete(br, pvid); + } + goto out; +} + +int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val) +{ + u16 pvid = val; + int err = 0; + + if (val >= VLAN_VID_MASK) + return -EINVAL; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (pvid == br->default_pvid) + goto unlock; + + /* Only allow default pvid change when filtering is disabled */ + if (br->vlan_enabled) { + pr_info_once("Please disable vlan filtering to change default_pvid\n"); + err = -EPERM; + goto unlock; + } + + if (!pvid) + br_vlan_disable_default_pvid(br); + else + err = __br_vlan_set_default_pvid(br, pvid); + +unlock: + rtnl_unlock(); + return err; +} + +int br_vlan_init(struct net_bridge *br) { br->vlan_proto = htons(ETH_P_8021Q); + br->default_pvid = 1; + return br_vlan_add(br, 1, + BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED); } /* Must be protected by RTNL. @@ -593,3 +725,12 @@ out: rcu_read_unlock(); return found; } + +int nbp_vlan_init(struct net_bridge_port *p) +{ + return p->br->default_pvid ? + nbp_vlan_add(p, p->br->default_pvid, + BRIDGE_VLAN_INFO_PVID | + BRIDGE_VLAN_INFO_UNTAGGED) : + 0; +} diff --git a/net/core/dev.c b/net/core/dev.c index e55c546717d4..1a90530f83ff 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2655,7 +2655,7 @@ struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t featur return skb; } -struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev) +static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev) { netdev_features_t features; @@ -2720,6 +2720,30 @@ out_null: return NULL; } +struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev) +{ + struct sk_buff *next, *head = NULL, *tail; + + while (skb) { + next = skb->next; + skb->next = NULL; + skb = validate_xmit_skb(skb, dev); + if (skb) { + struct sk_buff *end = skb; + + while (end->next) + end = end->next; + if (!head) + head = skb; + else + tail->next = skb; + tail = end; + } + skb = next; + } + return head; +} + static void qdisc_pkt_len_init(struct sk_buff *skb) { const struct skb_shared_info *shinfo = skb_shinfo(skb); @@ -2786,8 +2810,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_bstats_update(q, skb); - skb = validate_xmit_skb(skb, dev); - if (skb && sch_direct_xmit(skb, q, dev, txq, root_lock)) { + if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) { if (unlikely(contended)) { spin_unlock(&q->busylock); contended = false; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 5c728aaf8d6c..443256bdcddc 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -387,6 +387,7 @@ struct pktgen_dev { u16 queue_map_min; u16 queue_map_max; __u32 skb_priority; /* skb priority field */ + unsigned int burst; /* number of duplicated packets to burst */ int node; /* Memory node */ #ifdef CONFIG_XFRM @@ -613,6 +614,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->traffic_class) seq_printf(seq, " traffic_class: 0x%02x\n", pkt_dev->traffic_class); + if (pkt_dev->burst > 1) + seq_printf(seq, " burst: %d\n", pkt_dev->burst); + if (pkt_dev->node >= 0) seq_printf(seq, " node: %d\n", pkt_dev->node); @@ -1124,6 +1128,16 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->dst_mac_count); return count; } + if (!strcmp(name, "burst")) { + len = num_arg(&user_buffer[i], 10, &value); + if (len < 0) + return len; + + i += len; + pkt_dev->burst = value < 1 ? 1 : value; + sprintf(pg_result, "OK: burst=%d", pkt_dev->burst); + return count; + } if (!strcmp(name, "node")) { len = num_arg(&user_buffer[i], 10, &value); if (len < 0) @@ -3297,6 +3311,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) static void pktgen_xmit(struct pktgen_dev *pkt_dev) { + unsigned int burst = ACCESS_ONCE(pkt_dev->burst); struct net_device *odev = pkt_dev->odev; struct netdev_queue *txq; int ret; @@ -3347,8 +3362,10 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->last_ok = 0; goto unlock; } - atomic_inc(&(pkt_dev->skb->users)); - ret = netdev_start_xmit(pkt_dev->skb, odev, txq, false); + atomic_add(burst, &pkt_dev->skb->users); + +xmit_more: + ret = netdev_start_xmit(pkt_dev->skb, odev, txq, --burst > 0); switch (ret) { case NETDEV_TX_OK: @@ -3356,6 +3373,8 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->sofar++; pkt_dev->seq_num++; pkt_dev->tx_bytes += pkt_dev->last_pkt_size; + if (burst > 0 && !netif_xmit_frozen_or_drv_stopped(txq)) + goto xmit_more; break; case NET_XMIT_DROP: case NET_XMIT_CN: @@ -3374,6 +3393,8 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) atomic_dec(&(pkt_dev->skb->users)); pkt_dev->last_ok = 0; } + if (unlikely(burst)) + atomic_sub(burst, &pkt_dev->skb->users); unlock: HARD_TX_UNLOCK(odev, txq); @@ -3572,6 +3593,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->svlan_p = 0; pkt_dev->svlan_cfi = 0; pkt_dev->svlan_id = 0xffff; + pkt_dev->burst = 1; pkt_dev->node = -1; err = pktgen_setup_dev(t->net, pkt_dev, ifname); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7de3d679f3e0..9a423e2c5766 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -257,15 +257,16 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, kmemcheck_annotate_variable(shinfo->destructor_arg); if (flags & SKB_ALLOC_FCLONE) { - struct sk_buff *child = skb + 1; - atomic_t *fclone_ref = (atomic_t *) (child + 1); + struct sk_buff_fclones *fclones; - kmemcheck_annotate_bitfield(child, flags1); + fclones = container_of(skb, struct sk_buff_fclones, skb1); + + kmemcheck_annotate_bitfield(&fclones->skb2, flags1); skb->fclone = SKB_FCLONE_ORIG; - atomic_set(fclone_ref, 1); + atomic_set(&fclones->fclone_ref, 1); - child->fclone = SKB_FCLONE_UNAVAILABLE; - child->pfmemalloc = pfmemalloc; + fclones->skb2.fclone = SKB_FCLONE_FREE; + fclones->skb2.pfmemalloc = pfmemalloc; } out: return skb; @@ -524,8 +525,7 @@ static void skb_release_data(struct sk_buff *skb) */ static void kfree_skbmem(struct sk_buff *skb) { - struct sk_buff *other; - atomic_t *fclone_ref; + struct sk_buff_fclones *fclones; switch (skb->fclone) { case SKB_FCLONE_UNAVAILABLE: @@ -533,22 +533,28 @@ static void kfree_skbmem(struct sk_buff *skb) break; case SKB_FCLONE_ORIG: - fclone_ref = (atomic_t *) (skb + 2); - if (atomic_dec_and_test(fclone_ref)) - kmem_cache_free(skbuff_fclone_cache, skb); + fclones = container_of(skb, struct sk_buff_fclones, skb1); + if (atomic_dec_and_test(&fclones->fclone_ref)) + kmem_cache_free(skbuff_fclone_cache, fclones); break; case SKB_FCLONE_CLONE: - fclone_ref = (atomic_t *) (skb + 1); - other = skb - 1; + fclones = container_of(skb, struct sk_buff_fclones, skb2); - /* The clone portion is available for - * fast-cloning again. + /* Warning : We must perform the atomic_dec_and_test() before + * setting skb->fclone back to SKB_FCLONE_FREE, otherwise + * skb_clone() could set clone_ref to 2 before our decrement. + * Anyway, if we are going to free the structure, no need to + * rewrite skb->fclone. */ - skb->fclone = SKB_FCLONE_UNAVAILABLE; - - if (atomic_dec_and_test(fclone_ref)) - kmem_cache_free(skbuff_fclone_cache, other); + if (atomic_dec_and_test(&fclones->fclone_ref)) { + kmem_cache_free(skbuff_fclone_cache, fclones); + } else { + /* The clone portion is available for + * fast-cloning again. + */ + skb->fclone = SKB_FCLONE_FREE; + } break; } } @@ -859,17 +865,22 @@ EXPORT_SYMBOL_GPL(skb_copy_ubufs); struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) { - struct sk_buff *n; + struct sk_buff_fclones *fclones = container_of(skb, + struct sk_buff_fclones, + skb1); + struct sk_buff *n = &fclones->skb2; if (skb_orphan_frags(skb, gfp_mask)) return NULL; - n = skb + 1; if (skb->fclone == SKB_FCLONE_ORIG && - n->fclone == SKB_FCLONE_UNAVAILABLE) { - atomic_t *fclone_ref = (atomic_t *) (n + 1); + n->fclone == SKB_FCLONE_FREE) { n->fclone = SKB_FCLONE_CLONE; - atomic_inc(fclone_ref); + /* As our fastclone was free, clone_ref must be 1 at this point. + * We could use atomic_inc() here, but it is faster + * to set the final value. + */ + atomic_set(&fclones->fclone_ref, 2); } else { if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; @@ -3155,6 +3166,9 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; goto done; } + /* switch back to head shinfo */ + pinfo = skb_shinfo(p); + if (pinfo->frag_list) goto merge; if (skb_gro_len(p) != pinfo->gso_size) @@ -3230,7 +3244,6 @@ done: NAPI_GRO_CB(skb)->same_flow = 1; return 0; } -EXPORT_SYMBOL_GPL(skb_gro_receive); void __init skb_init(void) { @@ -3240,8 +3253,7 @@ void __init skb_init(void) SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", - (2*sizeof(struct sk_buff)) + - sizeof(atomic_t), + sizeof(struct sk_buff_fclones), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index 597557254ddb..83498975165f 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -99,7 +99,7 @@ static void ccid_kmem_cache_destroy(struct kmem_cache *slab) kmem_cache_destroy(slab); } -static int ccid_activate(struct ccid_operations *ccid_ops) +static int __init ccid_activate(struct ccid_operations *ccid_ops) { int err = -ENOBUFS; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index de2c1e719305..1ad150ed57cf 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1082,7 +1082,7 @@ void dccp_shutdown(struct sock *sk, int how) EXPORT_SYMBOL_GPL(dccp_shutdown); -static inline int dccp_mib_init(void) +static inline int __init dccp_mib_init(void) { dccp_statistics = alloc_percpu(struct dccp_mib); if (!dccp_statistics) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 6905f2d84c44..22f34cf4cb27 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -238,6 +238,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) { } +#ifdef CONFIG_PM_SLEEP static int dsa_switch_suspend(struct dsa_switch *ds) { int i, ret = 0; @@ -280,6 +281,7 @@ static int dsa_switch_resume(struct dsa_switch *ds) return 0; } +#endif /* link polling *************************************************************/ diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 36953c84ff2d..8030489d9cbe 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -608,7 +608,6 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, p->phy->speed = 0; p->phy->duplex = 0; p->phy->advertising = p->phy->supported | ADVERTISED_Autoneg; - phy_start_aneg(p->phy); } return slave_dev; diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 05b708bbdb0d..4715f25dfe03 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -246,7 +246,7 @@ static u32 cipso_v4_map_cache_hash(const unsigned char *key, u32 key_len) * success, negative values on error. * */ -static int cipso_v4_cache_init(void) +static int __init cipso_v4_cache_init(void) { u32 iter; diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index dced89fbe480..efa70ad44906 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -7,6 +7,7 @@ #include <linux/types.h> #include <linux/kernel.h> #include <net/genetlink.h> +#include <net/gue.h> #include <net/ip.h> #include <net/protocol.h> #include <net/udp.h> @@ -27,6 +28,7 @@ struct fou { }; struct fou_cfg { + u16 type; u8 protocol; struct udp_port_cfg udp_config; }; @@ -64,15 +66,51 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) sizeof(struct udphdr)); } +static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) +{ + struct fou *fou = fou_from_sock(sk); + size_t len; + struct guehdr *guehdr; + struct udphdr *uh; + + if (!fou) + return 1; + + len = sizeof(struct udphdr) + sizeof(struct guehdr); + if (!pskb_may_pull(skb, len)) + goto drop; + + uh = udp_hdr(skb); + guehdr = (struct guehdr *)&uh[1]; + + len += guehdr->hlen << 2; + if (!pskb_may_pull(skb, len)) + goto drop; + + if (guehdr->version != 0) + goto drop; + + if (guehdr->flags) { + /* No support yet */ + goto drop; + } + + return fou_udp_encap_recv_deliver(skb, guehdr->next_hdr, len); +drop: + kfree_skb(skb); + return 0; +} + static struct sk_buff **fou_gro_receive(struct sk_buff **head, - struct sk_buff *skb, - const struct net_offload **offloads) + struct sk_buff *skb) { const struct net_offload *ops; struct sk_buff **pp = NULL; u8 proto = NAPI_GRO_CB(skb)->proto; + const struct net_offload **offloads; rcu_read_lock(); + offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); if (!ops || !ops->callbacks.gro_receive) goto out_unlock; @@ -85,14 +123,15 @@ out_unlock: return pp; } -static int fou_gro_complete(struct sk_buff *skb, int nhoff, - const struct net_offload **offloads) +static int fou_gro_complete(struct sk_buff *skb, int nhoff) { const struct net_offload *ops; u8 proto = NAPI_GRO_CB(skb)->proto; int err = -ENOSYS; + const struct net_offload **offloads; rcu_read_lock(); + offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out_unlock; @@ -105,26 +144,110 @@ out_unlock: return err; } -static struct sk_buff **fou4_gro_receive(struct sk_buff **head, - struct sk_buff *skb) +static struct sk_buff **gue_gro_receive(struct sk_buff **head, + struct sk_buff *skb) { - return fou_gro_receive(head, skb, inet_offloads); -} + const struct net_offload **offloads; + const struct net_offload *ops; + struct sk_buff **pp = NULL; + struct sk_buff *p; + u8 proto; + struct guehdr *guehdr; + unsigned int hlen, guehlen; + unsigned int off; + int flush = 1; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*guehdr); + guehdr = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + guehdr = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!guehdr)) + goto out; + } -static int fou4_gro_complete(struct sk_buff *skb, int nhoff) -{ - return fou_gro_complete(skb, nhoff, inet_offloads); -} + proto = guehdr->next_hdr; -static struct sk_buff **fou6_gro_receive(struct sk_buff **head, - struct sk_buff *skb) -{ - return fou_gro_receive(head, skb, inet6_offloads); + rcu_read_lock(); + offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; + ops = rcu_dereference(offloads[proto]); + if (WARN_ON(!ops || !ops->callbacks.gro_receive)) + goto out_unlock; + + guehlen = sizeof(*guehdr) + (guehdr->hlen << 2); + + hlen = off + guehlen; + if (skb_gro_header_hard(skb, hlen)) { + guehdr = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!guehdr)) + goto out_unlock; + } + + flush = 0; + + for (p = *head; p; p = p->next) { + const struct guehdr *guehdr2; + + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + guehdr2 = (struct guehdr *)(p->data + off); + + /* Compare base GUE header to be equal (covers + * hlen, version, next_hdr, and flags. + */ + if (guehdr->word != guehdr2->word) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + /* Compare optional fields are the same. */ + if (guehdr->hlen && memcmp(&guehdr[1], &guehdr2[1], + guehdr->hlen << 2)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + } + + skb_gro_pull(skb, guehlen); + + /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/ + skb_gro_postpull_rcsum(skb, guehdr, guehlen); + + pp = ops->callbacks.gro_receive(head, skb); + +out_unlock: + rcu_read_unlock(); +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; } -static int fou6_gro_complete(struct sk_buff *skb, int nhoff) +static int gue_gro_complete(struct sk_buff *skb, int nhoff) { - return fou_gro_complete(skb, nhoff, inet6_offloads); + const struct net_offload **offloads; + struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff); + const struct net_offload *ops; + unsigned int guehlen; + u8 proto; + int err = -ENOENT; + + proto = guehdr->next_hdr; + + guehlen = sizeof(*guehdr) + (guehdr->hlen << 2); + + rcu_read_lock(); + offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; + ops = rcu_dereference(offloads[proto]); + if (WARN_ON(!ops || !ops->callbacks.gro_complete)) + goto out_unlock; + + err = ops->callbacks.gro_complete(skb, nhoff + guehlen); + +out_unlock: + rcu_read_unlock(); + return err; } static int fou_add_to_port_list(struct fou *fou) @@ -162,6 +285,28 @@ static void fou_release(struct fou *fou) kfree(fou); } +static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) +{ + udp_sk(sk)->encap_rcv = fou_udp_recv; + fou->protocol = cfg->protocol; + fou->udp_offloads.callbacks.gro_receive = fou_gro_receive; + fou->udp_offloads.callbacks.gro_complete = fou_gro_complete; + fou->udp_offloads.port = cfg->udp_config.local_udp_port; + fou->udp_offloads.ipproto = cfg->protocol; + + return 0; +} + +static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) +{ + udp_sk(sk)->encap_rcv = gue_udp_recv; + fou->udp_offloads.callbacks.gro_receive = gue_gro_receive; + fou->udp_offloads.callbacks.gro_complete = gue_gro_complete; + fou->udp_offloads.port = cfg->udp_config.local_udp_port; + + return 0; +} + static int fou_create(struct net *net, struct fou_cfg *cfg, struct socket **sockp) { @@ -184,10 +329,24 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, sk = sock->sk; - /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ - fou->protocol = cfg->protocol; - fou->port = cfg->udp_config.local_udp_port; - udp_sk(sk)->encap_rcv = fou_udp_recv; + fou->port = cfg->udp_config.local_udp_port; + + /* Initial for fou type */ + switch (cfg->type) { + case FOU_ENCAP_DIRECT: + err = fou_encap_init(sk, fou, cfg); + if (err) + goto error; + break; + case FOU_ENCAP_GUE: + err = gue_encap_init(sk, fou, cfg); + if (err) + goto error; + break; + default: + err = -EINVAL; + goto error; + } udp_sk(sk)->encap_type = 1; udp_encap_enable(); @@ -199,23 +358,6 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, sk->sk_allocation = GFP_ATOMIC; - switch (cfg->udp_config.family) { - case AF_INET: - fou->udp_offloads.callbacks.gro_receive = fou4_gro_receive; - fou->udp_offloads.callbacks.gro_complete = fou4_gro_complete; - break; - case AF_INET6: - fou->udp_offloads.callbacks.gro_receive = fou6_gro_receive; - fou->udp_offloads.callbacks.gro_complete = fou6_gro_complete; - break; - default: - err = -EPFNOSUPPORT; - goto error; - } - - fou->udp_offloads.port = cfg->udp_config.local_udp_port; - fou->udp_offloads.ipproto = cfg->protocol; - if (cfg->udp_config.family == AF_INET) { err = udp_add_offload(&fou->udp_offloads); if (err) @@ -272,6 +414,7 @@ static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { [FOU_ATTR_PORT] = { .type = NLA_U16, }, [FOU_ATTR_AF] = { .type = NLA_U8, }, [FOU_ATTR_IPPROTO] = { .type = NLA_U8, }, + [FOU_ATTR_TYPE] = { .type = NLA_U8, }, }; static int parse_nl_config(struct genl_info *info, @@ -299,6 +442,9 @@ static int parse_nl_config(struct genl_info *info, if (info->attrs[FOU_ATTR_IPPROTO]) cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]); + if (info->attrs[FOU_ATTR_TYPE]) + cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]); + return 0; } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 15f0e2bad7ad..2811cc18701a 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -790,7 +790,7 @@ static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net) kfree(table); } -static void ip4_frags_ctl_register(void) +static void __init ip4_frags_ctl_register(void) { register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table); } @@ -804,7 +804,7 @@ static inline void ip4_frags_ns_ctl_unregister(struct net *net) { } -static inline void ip4_frags_ctl_register(void) +static inline void __init ip4_frags_ctl_register(void) { } #endif diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 829aff8bf723..0485ef18d254 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -241,6 +241,8 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, /* Push GRE header. */ gre_build_header(skb, &tpi, tunnel->tun_hlen); + skb_set_inner_protocol(skb, tpi.proto); + ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index b75b47b0a223..0bb8e141eacc 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -56,6 +56,7 @@ #include <net/netns/generic.h> #include <net/rtnetlink.h> #include <net/udp.h> +#include <net/gue.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> @@ -495,6 +496,8 @@ static int ip_encap_hlen(struct ip_tunnel_encap *e) return 0; case TUNNEL_ENCAP_FOU: return sizeof(struct udphdr); + case TUNNEL_ENCAP_GUE: + return sizeof(struct udphdr) + sizeof(struct guehdr); default: return -EINVAL; } @@ -546,6 +549,15 @@ static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, skb_reset_transport_header(skb); uh = udp_hdr(skb); + if (e->type == TUNNEL_ENCAP_GUE) { + struct guehdr *guehdr = (struct guehdr *)&uh[1]; + + guehdr->version = 0; + guehdr->hlen = 0; + guehdr->flags = 0; + guehdr->next_hdr = *protocol; + } + uh->dest = e->dport; uh->source = sport; uh->len = htons(skb->len); @@ -565,6 +577,7 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, case TUNNEL_ENCAP_NONE: return 0; case TUNNEL_ENCAP_FOU: + case TUNNEL_ENCAP_GUE: return fou_build_header(skb, &t->encap, t->encap_hlen, protocol, fl4); default: @@ -759,7 +772,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, df |= (inner_iph->frag_off&htons(IP_DF)); max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) - + rt->dst.header_len; + + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); if (max_headroom > dev->needed_headroom) dev->needed_headroom = max_headroom; @@ -853,9 +866,14 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); - if (!t && (cmd == SIOCADDTUNNEL)) { - t = ip_tunnel_create(net, itn, p); - err = PTR_ERR_OR_ZERO(t); + if (cmd == SIOCADDTUNNEL) { + if (!t) { + t = ip_tunnel_create(net, itn, p); + err = PTR_ERR_OR_ZERO(t); + break; + } + + err = -EEXIST; break; } if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index bfec31df8b21..ea88ab3102a8 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -224,6 +224,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (IS_ERR(skb)) goto out; + skb_set_inner_ipproto(skb, IPPROTO_IPIP); + ip_tunnel_xmit(skb, dev, tiph, tiph->protocol); return NETDEV_TX_OK; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d4bd68dcdc39..793c0bb8c4fd 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -746,7 +746,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow } n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); - if (n) { + if (!IS_ERR(n)) { if (!(n->nud_state & NUD_VALID)) { neigh_event_send(n, NULL); } else { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cf5e508e1ef5..26a6f113f00c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2693,7 +2693,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; #endif case TCP_USER_TIMEOUT: - /* Cap the max timeout in ms TCP will retry/retrans + /* Cap the max time in ms TCP will retry or probe the window * before giving up and aborting (ETIMEDOUT) a connection. */ if (val < 0) @@ -3172,7 +3172,7 @@ static int __init set_thash_entries(char *str) } __setup("thash_entries=", set_thash_entries); -static void tcp_init_mem(void) +static void __init tcp_init_mem(void) { unsigned long limit = nr_free_buffer_pages() / 8; limit = max(limit, 128UL); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ee567e9e98c3..8d4eac793700 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2110,10 +2110,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) static bool skb_still_in_host_queue(const struct sock *sk, const struct sk_buff *skb) { - const struct sk_buff *fclone = skb + 1; - - if (unlikely(skb->fclone == SKB_FCLONE_ORIG && - fclone->fclone == SKB_FCLONE_CLONE)) { + if (unlikely(skb_fclone_busy(skb))) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); return true; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b24360f6e293..9b21ae8b2e31 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -52,7 +52,7 @@ static void tcp_write_err(struct sock *sk) * limit. * 2. If we have strong memory pressure. */ -static int tcp_out_of_resources(struct sock *sk, int do_reset) +static int tcp_out_of_resources(struct sock *sk, bool do_reset) { struct tcp_sock *tp = tcp_sk(sk); int shift = 0; @@ -72,7 +72,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset) if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN || /* 2. Window is closed. */ (!tp->snd_wnd && !tp->packets_out)) - do_reset = 1; + do_reset = true; if (do_reset) tcp_send_active_reset(sk, GFP_ATOMIC); tcp_done(sk); @@ -270,40 +270,41 @@ static void tcp_probe_timer(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int max_probes; + u32 start_ts; if (tp->packets_out || !tcp_send_head(sk)) { icsk->icsk_probes_out = 0; return; } - /* *WARNING* RFC 1122 forbids this - * - * It doesn't AFAIK, because we kill the retransmit timer -AK - * - * FIXME: We ought not to do it, Solaris 2.5 actually has fixing - * this behaviour in Solaris down as a bug fix. [AC] - * - * Let me to explain. icsk_probes_out is zeroed by incoming ACKs - * even if they advertise zero window. Hence, connection is killed only - * if we received no ACKs for normal connection timeout. It is not killed - * only because window stays zero for some time, window may be zero - * until armageddon and even later. We are in full accordance - * with RFCs, only probe timer combines both retransmission timeout - * and probe timeout in one bottle. --ANK + /* RFC 1122 4.2.2.17 requires the sender to stay open indefinitely as + * long as the receiver continues to respond probes. We support this by + * default and reset icsk_probes_out with incoming ACKs. But if the + * socket is orphaned or the user specifies TCP_USER_TIMEOUT, we + * kill the socket when the retry count and the time exceeds the + * corresponding system limit. We also implement similar policy when + * we use RTO to probe window in tcp_retransmit_timer(). */ - max_probes = sysctl_tcp_retries2; + start_ts = tcp_skb_timestamp(tcp_send_head(sk)); + if (!start_ts) + skb_mstamp_get(&tcp_send_head(sk)->skb_mstamp); + else if (icsk->icsk_user_timeout && + (s32)(tcp_time_stamp - start_ts) > icsk->icsk_user_timeout) + goto abort; + max_probes = sysctl_tcp_retries2; if (sock_flag(sk, SOCK_DEAD)) { const int alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; max_probes = tcp_orphan_retries(sk, alive); - - if (tcp_out_of_resources(sk, alive || icsk->icsk_probes_out <= max_probes)) + if (!alive && icsk->icsk_backoff >= max_probes) + goto abort; + if (tcp_out_of_resources(sk, true)) return; } if (icsk->icsk_probes_out > max_probes) { - tcp_write_err(sk); +abort: tcp_write_err(sk); } else { /* Only send another probe if we didn't close things up. */ tcp_send_probe0(sk); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 19ebe6a39ddc..507310ef4b56 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -25,8 +25,11 @@ struct udp_offload_priv { struct udp_offload_priv __rcu *next; }; -struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, - netdev_features_t features) +static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, + netdev_features_t features, + struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb, + netdev_features_t features), + __be16 new_protocol) { struct sk_buff *segs = ERR_PTR(-EINVAL); u16 mac_offset = skb->mac_header; @@ -48,7 +51,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); skb->mac_len = skb_inner_network_offset(skb); - skb->protocol = htons(ETH_P_TEB); + skb->protocol = new_protocol; need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM); if (need_csum) @@ -56,7 +59,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); - segs = skb_mac_gso_segment(skb, enc_features); + segs = gso_inner_segment(skb, enc_features); if (IS_ERR_OR_NULL(segs)) { skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, mac_len); @@ -101,6 +104,44 @@ out: return segs; } +struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, + netdev_features_t features, + bool is_ipv6) +{ + __be16 protocol = skb->protocol; + const struct net_offload **offloads; + const struct net_offload *ops; + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb, + netdev_features_t features); + + rcu_read_lock(); + + switch (skb->inner_protocol_type) { + case ENCAP_TYPE_ETHER: + protocol = skb->inner_protocol; + gso_inner_segment = skb_mac_gso_segment; + break; + case ENCAP_TYPE_IPPROTO: + offloads = is_ipv6 ? inet6_offloads : inet_offloads; + ops = rcu_dereference(offloads[skb->inner_ipproto]); + if (!ops || !ops->callbacks.gso_segment) + goto out_unlock; + gso_inner_segment = ops->callbacks.gso_segment; + break; + default: + goto out_unlock; + } + + segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment, + protocol); + +out_unlock: + rcu_read_unlock(); + + return segs; +} + static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, netdev_features_t features) { @@ -113,7 +154,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, if (skb->encapsulation && (skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) { - segs = skb_udp_tunnel_segment(skb, features); + segs = skb_udp_tunnel_segment(skb, features, false); goto out; } @@ -293,6 +334,7 @@ static struct sk_buff **udp4_gro_receive(struct sk_buff **head, skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check, inet_gro_compute_pseudo); skip: + NAPI_GRO_CB(skb)->is_ipv6 = 0; return udp_gro_receive(head, skb, uh); flush: diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e189480f8fd6..725c763270a0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4783,10 +4783,11 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) if (ip6_del_rt(ifp->rt)) dst_free(&ifp->rt->dst); + + rt_genid_bump_ipv6(net); break; } atomic_inc(&net->ipv6.dev_addr_genid); - rt_genid_bump_ipv6(net); } static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index e6960457f625..98cc4cd570e2 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -8,6 +8,13 @@ #include <net/addrconf.h> #include <net/ip.h> +/* if ipv6 module registers this function is used by xfrm to force all + * sockets to relookup their nodes - this is fairly expensive, be + * careful + */ +void (*__fib6_flush_trees)(struct net *); +EXPORT_SYMBOL(__fib6_flush_trees); + #define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16) static inline unsigned int ipv6_addr_scope2type(unsigned int scope) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 76b7f5ee8f4c..97b9fa8de377 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1605,6 +1605,24 @@ static void fib6_prune_clones(struct net *net, struct fib6_node *fn) fib6_clean_tree(net, fn, fib6_prune_clone, 1, NULL); } +static int fib6_update_sernum(struct rt6_info *rt, void *arg) +{ + __u32 sernum = *(__u32 *)arg; + + if (rt->rt6i_node && + rt->rt6i_node->fn_sernum != sernum) + rt->rt6i_node->fn_sernum = sernum; + + return 0; +} + +static void fib6_flush_trees(struct net *net) +{ + __u32 new_sernum = fib6_new_sernum(); + + fib6_clean_all(net, fib6_update_sernum, &new_sernum); +} + /* * Garbage collection */ @@ -1788,6 +1806,8 @@ int __init fib6_init(void) NULL); if (ret) goto out_unregister_subsys; + + __fib6_flush_trees = fib6_flush_trees; out: return ret; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 5f19dfbc4c6a..74b677916a70 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -314,6 +314,8 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE); + if (t && create) + return NULL; if (t || !create) return t; @@ -616,6 +618,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, int err = -1; u8 proto; struct sk_buff *new_skb; + __be16 protocol; if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; @@ -732,8 +735,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, ipv6h->daddr = fl6->daddr; ((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags; - ((__be16 *)(ipv6h + 1))[1] = (dev->type == ARPHRD_ETHER) ? - htons(ETH_P_TEB) : skb->protocol; + protocol = (dev->type == ARPHRD_ETHER) ? + htons(ETH_P_TEB) : skb->protocol; + ((__be16 *)(ipv6h + 1))[1] = protocol; if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { __be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4); @@ -754,6 +758,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, } } + skb_set_inner_protocol(skb, protocol); + ip6tunnel_xmit(skb, dev); if (ndst) ip6_tnl_dst_store(tunnel, ndst); @@ -1724,4 +1730,5 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); MODULE_DESCRIPTION("GRE over IPv6 tunneling device"); MODULE_ALIAS_RTNL_LINK("ip6gre"); +MODULE_ALIAS_RTNL_LINK("ip6gretap"); MODULE_ALIAS_NETDEV("ip6gre0"); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index e01bd0399297..d3e8888ad611 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -364,8 +364,12 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net, (t = rtnl_dereference(*tp)) != NULL; tp = &t->next) { if (ipv6_addr_equal(local, &t->parms.laddr) && - ipv6_addr_equal(remote, &t->parms.raddr)) + ipv6_addr_equal(remote, &t->parms.raddr)) { + if (create) + return NULL; + return t; + } } if (!create) return NULL; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 7f52fd9fa7b0..5833a2244467 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -253,8 +253,12 @@ static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p, (t = rtnl_dereference(*tp)) != NULL; tp = &t->next) { if (ipv6_addr_equal(local, &t->parms.laddr) && - ipv6_addr_equal(remote, &t->parms.raddr)) + ipv6_addr_equal(remote, &t->parms.raddr)) { + if (create) + return NULL; + return t; + } } if (!create) return NULL; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f74b0417bd60..a318dd89b6d9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -314,7 +314,6 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); - rt->rt6i_genid = rt_genid_ipv6(net); INIT_LIST_HEAD(&rt->rt6i_siblings); } return rt; @@ -1096,9 +1095,6 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) * DST_OBSOLETE_FORCE_CHK which forces validation calls down * into this function always. */ - if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev))) - return NULL; - if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) return NULL; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index db75809ab843..0d4e27466f82 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -982,6 +982,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto tx_error; } + skb_set_inner_ipproto(skb, IPPROTO_IPV6); + err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); @@ -1006,6 +1008,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (IS_ERR(skb)) goto out; + skb_set_inner_ipproto(skb, IPPROTO_IPIP); + ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP); return NETDEV_TX_OK; out: diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 212ebfc7973f..6b8f543f6ac6 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -58,7 +58,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (skb->encapsulation && skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)) - segs = skb_udp_tunnel_segment(skb, features); + segs = skb_udp_tunnel_segment(skb, features, true); else { const struct ipv6hdr *ipv6h; struct udphdr *uh; @@ -140,6 +140,7 @@ static struct sk_buff **udp6_gro_receive(struct sk_buff **head, ip6_gro_compute_pseudo); skip: + NAPI_GRO_CB(skb)->is_ipv6 = 1; return udp_gro_receive(head, skb, uh); flush: diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 608d18986923..ae5096ab65eb 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -856,6 +856,7 @@ config NETFILTER_XT_TARGET_TPROXY tristate '"TPROXY" target transparent proxying support' depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED + depends on (IPV6 || IPV6=n) depends on IP_NF_MANGLE select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index f77d3f7f22b5..6c5a915cfa75 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -222,6 +222,51 @@ replay: } } +struct nfnl_err { + struct list_head head; + struct nlmsghdr *nlh; + int err; +}; + +static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err) +{ + struct nfnl_err *nfnl_err; + + nfnl_err = kmalloc(sizeof(struct nfnl_err), GFP_KERNEL); + if (nfnl_err == NULL) + return -ENOMEM; + + nfnl_err->nlh = nlh; + nfnl_err->err = err; + list_add_tail(&nfnl_err->head, list); + + return 0; +} + +static void nfnl_err_del(struct nfnl_err *nfnl_err) +{ + list_del(&nfnl_err->head); + kfree(nfnl_err); +} + +static void nfnl_err_reset(struct list_head *err_list) +{ + struct nfnl_err *nfnl_err, *next; + + list_for_each_entry_safe(nfnl_err, next, err_list, head) + nfnl_err_del(nfnl_err); +} + +static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb) +{ + struct nfnl_err *nfnl_err, *next; + + list_for_each_entry_safe(nfnl_err, next, err_list, head) { + netlink_ack(skb, nfnl_err->nlh, nfnl_err->err); + nfnl_err_del(nfnl_err); + } +} + static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, u_int16_t subsys_id) { @@ -230,6 +275,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, const struct nfnetlink_subsystem *ss; const struct nfnl_callback *nc; bool success = true, done = false; + static LIST_HEAD(err_list); int err; if (subsys_id >= NFNL_SUBSYS_COUNT) @@ -287,6 +333,7 @@ replay: type = nlh->nlmsg_type; if (type == NFNL_MSG_BATCH_BEGIN) { /* Malformed: Batch begin twice */ + nfnl_err_reset(&err_list); success = false; goto done; } else if (type == NFNL_MSG_BATCH_END) { @@ -333,6 +380,7 @@ replay: * original skb. */ if (err == -EAGAIN) { + nfnl_err_reset(&err_list); ss->abort(oskb); nfnl_unlock(subsys_id); kfree_skb(nskb); @@ -341,11 +389,24 @@ replay: } ack: if (nlh->nlmsg_flags & NLM_F_ACK || err) { + /* Errors are delivered once the full batch has been + * processed, this avoids that the same error is + * reported several times when replaying the batch. + */ + if (nfnl_err_add(&err_list, nlh, err) < 0) { + /* We failed to enqueue an error, reset the + * list of errors and send OOM to userspace + * pointing to the batch header. + */ + nfnl_err_reset(&err_list); + netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM); + success = false; + goto done; + } /* We don't stop processing the batch on errors, thus, * userspace gets all the errors that the batch * triggers. */ - netlink_ack(skb, nlh, err); if (err) success = false; } @@ -361,6 +422,7 @@ done: else ss->abort(oskb); + nfnl_err_deliver(&err_list, oskb); nfnl_unlock(subsys_id); kfree_skb(nskb); } diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 28fb8f38e6ba..8892b7b6184a 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -180,15 +180,17 @@ static int nft_hash_init(const struct nft_set *set, static void nft_hash_destroy(const struct nft_set *set) { const struct rhashtable *priv = nft_set_priv(set); - const struct bucket_table *tbl; + const struct bucket_table *tbl = priv->tbl; struct nft_hash_elem *he, *next; unsigned int i; - tbl = rht_dereference(priv->tbl, priv); - for (i = 0; i < tbl->size; i++) - rht_for_each_entry_safe(he, next, tbl->buckets[i], priv, node) + for (i = 0; i < tbl->size; i++) { + for (he = rht_entry(tbl->buckets[i], struct nft_hash_elem, node); + he != NULL; he = next) { + next = rht_entry(he->node.next, struct nft_hash_elem, node); nft_hash_elem_destroy(set, he); - + } + } rhashtable_destroy(priv); } diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index e1836ff88199..46214f245665 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -234,13 +234,11 @@ static void nft_rbtree_destroy(const struct nft_set *set) struct nft_rbtree_elem *rbe; struct rb_node *node; - spin_lock_bh(&nft_rbtree_lock); while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); nft_rbtree_elem_destroy(set, rbe); } - spin_unlock_bh(&nft_rbtree_lock); } static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 8d0e83d6903e..30f10fb07f4a 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -254,10 +254,15 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, cp->tp = tp; if (p->perfect) { + int i; + cp->perfect = kmemdup(p->perfect, sizeof(*r) * cp->hash, GFP_KERNEL); if (!cp->perfect) goto errout; + for (i = 0; i < cp->hash; i++) + tcf_exts_init(&cp->perfect[i].exts, + TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); balloc = 1; } cp->h = p->h; @@ -353,6 +358,9 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, f = kzalloc(sizeof(*f), GFP_KERNEL); if (!f) goto errout_alloc; + f->key = handle; + tcindex_filter_result_init(&f->result); + f->next = NULL; } if (tb[TCA_TCINDEX_CLASSID]) { @@ -376,9 +384,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tcindex_filter *nfp; struct tcindex_filter __rcu **fp; - f->key = handle; - f->result = new_filter_result; - f->next = NULL; + tcf_exts_change(tp, &f->result.exts, &r->exts); fp = cp->h + (handle % cp->hash); for (nfp = rtnl_dereference(*fp); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 4be3ebf46d67..0472909bb014 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -358,7 +358,6 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, bool free_pf) { - tcf_unbind_filter(tp, &n->res); tcf_exts_destroy(&n->exts); if (n->ht_down) n->ht_down->refcnt--; @@ -416,6 +415,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) if (pkp == key) { RCU_INIT_POINTER(*kp, key->next); + tcf_unbind_filter(tp, &key->res); call_rcu(&key->rcu, u32_delete_key_freepf_rcu); return 0; } @@ -425,7 +425,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) return 0; } -static void u32_clear_hnode(struct tc_u_hnode *ht) +static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) { struct tc_u_knode *n; unsigned int h; @@ -434,6 +434,7 @@ static void u32_clear_hnode(struct tc_u_hnode *ht) while ((n = rtnl_dereference(ht->ht[h])) != NULL) { RCU_INIT_POINTER(ht->ht[h], rtnl_dereference(n->next)); + tcf_unbind_filter(tp, &n->res); call_rcu(&n->rcu, u32_delete_key_freepf_rcu); } } @@ -447,7 +448,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) WARN_ON(ht->refcnt); - u32_clear_hnode(ht); + u32_clear_hnode(tp, ht); hn = &tp_c->hlist; for (phn = rtnl_dereference(*hn); @@ -482,7 +483,7 @@ static void u32_destroy(struct tcf_proto *tp) ht; ht = rtnl_dereference(ht->next)) { ht->refcnt--; - u32_clear_hnode(ht); + u32_clear_hnode(tp, ht); } while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) { @@ -731,6 +732,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } u32_replace_knode(tp, tp_c, new); + tcf_unbind_filter(tp, &n->res); call_rcu(&n->rcu, u32_delete_key_rcu); return 0; } diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 3a633debb6df..ad57f4444b9c 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -526,9 +526,11 @@ pop_stack: match_idx = stack[--stackp]; cur_match = tcf_em_get_match(tree, match_idx); - if (tcf_em_early_end(cur_match, res)) + if (tcf_em_early_end(cur_match, res)) { + if (tcf_em_is_inverted(cur_match)) + res = !res; goto pop_stack; - else { + } else { match_idx++; goto proceed; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index aa8329508dba..c79a226cc25c 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -578,8 +578,10 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, timer); + rcu_read_lock(); qdisc_unthrottled(wd->qdisc); __netif_schedule(qdisc_root(wd->qdisc)); + rcu_read_unlock(); return HRTIMER_NORESTART; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 7c8e5d73d433..2b349a4de3c8 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -56,11 +56,34 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) return 0; } -static inline struct sk_buff *dequeue_skb(struct Qdisc *q) +static void try_bulk_dequeue_skb(struct Qdisc *q, + struct sk_buff *skb, + const struct netdev_queue *txq) +{ + int bytelimit = qdisc_avail_bulklimit(txq) - skb->len; + + while (bytelimit > 0) { + struct sk_buff *nskb = q->dequeue(q); + + if (!nskb) + break; + + bytelimit -= nskb->len; /* covers GSO len */ + skb->next = nskb; + skb = nskb; + } + skb->next = NULL; +} + +/* Note that dequeue_skb can possibly return a SKB list (via skb->next). + * A requeued skb (via q->gso_skb) can also be a SKB list. + */ +static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate) { struct sk_buff *skb = q->gso_skb; const struct netdev_queue *txq = q->dev_queue; + *validate = true; if (unlikely(skb)) { /* check the reason of requeuing without tx lock first */ txq = skb_get_tx_queue(txq->dev, skb); @@ -69,14 +92,16 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) q->q.qlen--; } else skb = NULL; + /* skb in gso_skb were already validated */ + *validate = false; } else { - if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) { + if (!(q->flags & TCQ_F_ONETXQUEUE) || + !netif_xmit_frozen_or_stopped(txq)) { skb = q->dequeue(q); - if (skb) - skb = validate_xmit_skb(skb, qdisc_dev(q)); + if (skb && qdisc_may_bulk(q)) + try_bulk_dequeue_skb(q, skb, txq); } } - return skb; } @@ -120,19 +145,24 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, */ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq, - spinlock_t *root_lock) + spinlock_t *root_lock, bool validate) { int ret = NETDEV_TX_BUSY; /* And release qdisc */ spin_unlock(root_lock); - HARD_TX_LOCK(dev, txq, smp_processor_id()); - if (!netif_xmit_frozen_or_stopped(txq)) - skb = dev_hard_start_xmit(skb, dev, txq, &ret); + /* Note that we validate skb (GSO, checksum, ...) outside of locks */ + if (validate) + skb = validate_xmit_skb_list(skb, dev); - HARD_TX_UNLOCK(dev, txq); + if (skb) { + HARD_TX_LOCK(dev, txq, smp_processor_id()); + if (!netif_xmit_frozen_or_stopped(txq)) + skb = dev_hard_start_xmit(skb, dev, txq, &ret); + HARD_TX_UNLOCK(dev, txq); + } spin_lock(root_lock); if (dev_xmit_complete(ret)) { @@ -181,9 +211,10 @@ static inline int qdisc_restart(struct Qdisc *q) struct net_device *dev; spinlock_t *root_lock; struct sk_buff *skb; + bool validate; /* Dequeue packet */ - skb = dequeue_skb(q); + skb = dequeue_skb(q, &validate); if (unlikely(!skb)) return 0; @@ -193,7 +224,7 @@ static inline int qdisc_restart(struct Qdisc *q) dev = qdisc_dev(q); txq = skb_get_tx_queue(dev, skb); - return sch_direct_xmit(skb, q, dev, txq, root_lock); + return sch_direct_xmit(skb, q, dev, txq, root_lock, validate); } void __qdisc_run(struct Qdisc *q) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f623dca6ce30..4c4e457e7888 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1961,10 +1961,8 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb) struct xfrm_dst *xdst = (struct xfrm_dst *) dst; struct xfrm_policy *pol = xdst->pols[0]; struct xfrm_policy_queue *pq = &pol->polq; - const struct sk_buff *fclone = skb + 1; - if (unlikely(skb->fclone == SKB_FCLONE_ORIG && - fclone->fclone == SKB_FCLONE_CLONE)) { + if (unlikely(skb_fclone_busy(skb))) { kfree_skb(skb); return 0; } |
