diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-04 09:41:05 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-04 09:41:05 -0800 |
| commit | b0f85fa11aefc4f3e03306b4cd47f113bd57dcba (patch) | |
| tree | 1333d36d99fde3f97210795941fc246f0ad08a75 /net/core | |
| parent | ccc9d4a6d640cbde05d519edeb727881646cf71b (diff) | |
| parent | f32bfb9a8ca083f8d148ea90ae5ba66f4831836e (diff) | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
Changes of note:
1) Allow to schedule ICMP packets in IPVS, from Alex Gartrell.
2) Provide FIB table ID in ipv4 route dumps just as ipv6 does, from
David Ahern.
3) Allow the user to ask for the statistics to be filtered out of
ipv4/ipv6 address netlink dumps. From Sowmini Varadhan.
4) More work to pass the network namespace context around deep into
various packet path APIs, starting with the netfilter hooks. From
Eric W Biederman.
5) Add layer 2 TX/RX checksum offloading to qeth driver, from Thomas
Richter.
6) Use usec resolution for SYN/ACK RTTs in TCP, from Yuchung Cheng.
7) Support Very High Throughput in wireless MESH code, from Bob
Copeland.
8) Allow setting the ageing_time in switchdev/rocker. From Scott
Feldman.
9) Properly autoload L2TP type modules, from Stephen Hemminger.
10) Fix and enable offload features by default in 8139cp driver, from
David Woodhouse.
11) Support both ipv4 and ipv6 sockets in a single vxlan device, from
Jiri Benc.
12) Fix CWND limiting of thin streams in TCP, from Bendik Rønning
Opstad.
13) Fix IPSEC flowcache overflows on large systems, from Steffen
Klassert.
14) Convert bridging to track VLANs using rhashtable entries rather than
a bitmap. From Nikolay Aleksandrov.
15) Make TCP listener handling completely lockless, this is a major
accomplishment. Incoming request sockets now live in the
established hash table just like any other socket too.
From Eric Dumazet.
15) Provide more bridging attributes to netlink, from Nikolay
Aleksandrov.
16) Use hash based algorithm for ipv4 multipath routing, this was very
long overdue. From Peter Nørlund.
17) Several y2038 cures, mostly avoiding timespec. From Arnd Bergmann.
18) Allow non-root execution of EBPF programs, from Alexei Starovoitov.
19) Support SO_INCOMING_CPU as setsockopt, from Eric Dumazet. This
influences the port binding selection logic used by SO_REUSEPORT.
20) Add ipv6 support to VRF, from David Ahern.
21) Add support for Mellanox Spectrum switch ASIC, from Jiri Pirko.
22) Add rtl8xxxu Realtek wireless driver, from Jes Sorensen.
23) Implement RACK loss recovery in TCP, from Yuchung Cheng.
24) Support multipath routes in MPLS, from Roopa Prabhu.
25) Fix POLLOUT notification for listening sockets in AF_UNIX, from Eric
Dumazet.
26) Add new QED Qlogic river, from Yuval Mintz, Manish Chopra, and
Sudarsana Kalluru.
27) Don't fetch timestamps on AF_UNIX sockets, from Hannes Frederic
Sowa.
28) Support ipv6 geneve tunnels, from John W Linville.
29) Add flood control support to switchdev layer, from Ido Schimmel.
30) Fix CHECKSUM_PARTIAL handling of potentially fragmented frames, from
Hannes Frederic Sowa.
31) Support persistent maps and progs in bpf, from Daniel Borkmann.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1790 commits)
sh_eth: use DMA barriers
switchdev: respect SKIP_EOPNOTSUPP flag in case there is no recursion
net: sched: kill dead code in sch_choke.c
irda: Delete an unnecessary check before the function call "irlmp_unregister_service"
net: dsa: mv88e6xxx: include DSA ports in VLANs
net: dsa: mv88e6xxx: disable SA learning for DSA and CPU ports
net/core: fix for_each_netdev_feature
vlan: Invoke driver vlan hooks only if device is present
arcnet/com20020: add LEDS_CLASS dependency
bpf, verifier: annotate verbose printer with __printf
dp83640: Only wait for timestamps for packets with timestamping enabled.
ptp: Change ptp_class to a proper bitmask
dp83640: Prune rx timestamp list before reading from it
dp83640: Delay scheduled work.
dp83640: Include hash in timestamp/packet matching
ipv6: fix tunnel error handling
net/mlx5e: Fix LSO vlan insertion
net/mlx5e: Re-eanble client vlan TX acceleration
net/mlx5e: Return error in case mlx5e_set_features() fails
net/mlx5e: Don't allow more than max supported channels
...
Diffstat (limited to 'net/core')
| -rw-r--r-- | net/core/dev.c | 99 | ||||
| -rw-r--r-- | net/core/dst.c | 14 | ||||
| -rw-r--r-- | net/core/filter.c | 135 | ||||
| -rw-r--r-- | net/core/lwtunnel.c | 4 | ||||
| -rw-r--r-- | net/core/neighbour.c | 45 | ||||
| -rw-r--r-- | net/core/net-sysfs.c | 11 | ||||
| -rw-r--r-- | net/core/netpoll.c | 23 | ||||
| -rw-r--r-- | net/core/ptp_classifier.c | 16 | ||||
| -rw-r--r-- | net/core/request_sock.c | 88 | ||||
| -rw-r--r-- | net/core/rtnetlink.c | 38 | ||||
| -rw-r--r-- | net/core/sock.c | 75 | ||||
| -rw-r--r-- | net/core/sock_diag.c | 14 | ||||
| -rw-r--r-- | net/core/tso.c | 18 | ||||
| -rw-r--r-- | net/core/utils.c | 49 |
14 files changed, 395 insertions, 234 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index c14748d051e7..8ce3f74cd6b9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2942,9 +2942,11 @@ EXPORT_SYMBOL(xmit_recursion); /** * dev_loopback_xmit - loop back @skb + * @net: network namespace this loopback is happening in + * @sk: sk needed to be a netfilter okfn * @skb: buffer to transmit */ -int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb) +int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { skb_reset_mac_header(skb); __skb_pull(skb, skb_network_offset(skb)); @@ -2999,6 +3001,7 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) new_index = skb_tx_hash(dev, skb); if (queue_index != new_index && sk && + sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) sk_tx_queue_set(sk, new_index); @@ -3170,11 +3173,11 @@ out: return rc; } -int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb) +int dev_queue_xmit(struct sk_buff *skb) { return __dev_queue_xmit(skb, NULL); } -EXPORT_SYMBOL(dev_queue_xmit_sk); +EXPORT_SYMBOL(dev_queue_xmit); int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv) { @@ -3695,6 +3698,14 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, case TC_ACT_QUEUED: kfree_skb(skb); return NULL; + case TC_ACT_REDIRECT: + /* skb_mac_header check was done by cls/act_bpf, so + * we can safely push the L2 header back before + * redirecting to another netdev + */ + __skb_push(skb, skb->mac_len); + skb_do_redirect(skb); + return NULL; default: break; } @@ -4009,13 +4020,13 @@ static int netif_receive_skb_internal(struct sk_buff *skb) * NET_RX_SUCCESS: no congestion * NET_RX_DROP: packet was dropped */ -int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb) +int netif_receive_skb(struct sk_buff *skb) { trace_netif_receive_skb_entry(skb); return netif_receive_skb_internal(skb); } -EXPORT_SYMBOL(netif_receive_skb_sk); +EXPORT_SYMBOL(netif_receive_skb); /* Network device is going away, flush any packets still pending * Called with irqs disabled. @@ -4884,8 +4895,7 @@ struct netdev_adjacent { struct rcu_head rcu; }; -static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, - struct net_device *adj_dev, +static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev, struct list_head *adj_list) { struct netdev_adjacent *adj; @@ -4911,7 +4921,7 @@ bool netdev_has_upper_dev(struct net_device *dev, { ASSERT_RTNL(); - return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper); + return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper); } EXPORT_SYMBOL(netdev_has_upper_dev); @@ -5173,7 +5183,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, struct netdev_adjacent *adj; int ret; - adj = __netdev_find_adj(dev, adj_dev, dev_list); + adj = __netdev_find_adj(adj_dev, dev_list); if (adj) { adj->ref_nr++; @@ -5229,7 +5239,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, { struct netdev_adjacent *adj; - adj = __netdev_find_adj(dev, adj_dev, dev_list); + adj = __netdev_find_adj(adj_dev, dev_list); if (!adj) { pr_err("tried to remove device %s from %s\n", @@ -5350,10 +5360,10 @@ static int __netdev_upper_dev_link(struct net_device *dev, return -EBUSY; /* To prevent loops, check if dev is not upper device to upper_dev. */ - if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper)) + if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper)) return -EBUSY; - if (__netdev_find_adj(dev, upper_dev, &dev->adj_list.upper)) + if (__netdev_find_adj(upper_dev, &dev->adj_list.upper)) return -EEXIST; if (master && netdev_master_upper_dev_get(dev)) @@ -5363,6 +5373,12 @@ static int __netdev_upper_dev_link(struct net_device *dev, changeupper_info.master = master; changeupper_info.linking = true; + ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, + &changeupper_info.info); + ret = notifier_to_errno(ret); + if (ret) + return ret; + ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private, master); if (ret) @@ -5505,6 +5521,9 @@ void netdev_upper_dev_unlink(struct net_device *dev, changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev; changeupper_info.linking = false; + call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, + &changeupper_info.info); + __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); /* Here is the tricky part. We must remove all dev's lower @@ -5631,7 +5650,7 @@ void *netdev_lower_dev_get_private(struct net_device *dev, if (!lower_dev) return NULL; - lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower); + lower = __netdev_find_adj(lower_dev, &dev->adj_list.lower); if (!lower) return NULL; @@ -6269,6 +6288,48 @@ static void rollback_registered(struct net_device *dev) list_del(&single); } +static netdev_features_t netdev_sync_upper_features(struct net_device *lower, + struct net_device *upper, netdev_features_t features) +{ + netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES; + netdev_features_t feature; + int feature_bit; + + for_each_netdev_feature(&upper_disables, feature_bit) { + feature = __NETIF_F_BIT(feature_bit); + if (!(upper->wanted_features & feature) + && (features & feature)) { + netdev_dbg(lower, "Dropping feature %pNF, upper dev %s has it off.\n", + &feature, upper->name); + features &= ~feature; + } + } + + return features; +} + +static void netdev_sync_lower_features(struct net_device *upper, + struct net_device *lower, netdev_features_t features) +{ + netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES; + netdev_features_t feature; + int feature_bit; + + for_each_netdev_feature(&upper_disables, feature_bit) { + feature = __NETIF_F_BIT(feature_bit); + if (!(features & feature) && (lower->features & feature)) { + netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n", + &feature, lower->name); + lower->wanted_features &= ~feature; + netdev_update_features(lower); + + if (unlikely(lower->features & feature)) + netdev_WARN(upper, "failed to disable %pNF on %s!\n", + &feature, lower->name); + } + } +} + static netdev_features_t netdev_fix_features(struct net_device *dev, netdev_features_t features) { @@ -6338,7 +6399,9 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, int __netdev_update_features(struct net_device *dev) { + struct net_device *upper, *lower; netdev_features_t features; + struct list_head *iter; int err = 0; ASSERT_RTNL(); @@ -6351,6 +6414,10 @@ int __netdev_update_features(struct net_device *dev) /* driver might be less strict about feature dependencies */ features = netdev_fix_features(dev, features); + /* some features can't be enabled if they're off an an upper device */ + netdev_for_each_upper_dev_rcu(dev, upper, iter) + features = netdev_sync_upper_features(dev, upper, features); + if (dev->features == features) return 0; @@ -6367,6 +6434,12 @@ int __netdev_update_features(struct net_device *dev) return -1; } + /* some features must be disabled on lower devices when disabled + * on an upper device (think: bonding master or bridge) + */ + netdev_for_each_lower_dev(dev, lower, iter) + netdev_sync_lower_features(dev, lower, features); + if (!err) dev->features = features; diff --git a/net/core/dst.c b/net/core/dst.c index 0771c8cb9307..2a1818065e12 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -144,12 +144,12 @@ loop: mutex_unlock(&dst_gc_mutex); } -int dst_discard_sk(struct sock *sk, struct sk_buff *skb) +int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) { kfree_skb(skb); return 0; } -EXPORT_SYMBOL(dst_discard_sk); +EXPORT_SYMBOL(dst_discard_out); const u32 dst_default_metrics[RTAX_MAX + 1] = { /* This initializer is needed to force linker to place this variable @@ -177,7 +177,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, dst->xfrm = NULL; #endif dst->input = dst_discard; - dst->output = dst_discard_sk; + dst->output = dst_discard_out; dst->error = 0; dst->obsolete = initial_obsolete; dst->header_len = 0; @@ -224,7 +224,7 @@ static void ___dst_free(struct dst_entry *dst) */ if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) { dst->input = dst_discard; - dst->output = dst_discard_sk; + dst->output = dst_discard_out; } dst->obsolete = DST_OBSOLETE_DEAD; } @@ -352,7 +352,7 @@ static struct dst_ops md_dst_ops = { .family = AF_UNSPEC, }; -static int dst_md_discard_sk(struct sock *sk, struct sk_buff *skb) +static int dst_md_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) { WARN_ONCE(1, "Attempting to call output on metadata dst\n"); kfree_skb(skb); @@ -375,7 +375,7 @@ static void __metadata_dst_init(struct metadata_dst *md_dst, u8 optslen) DST_METADATA | DST_NOCACHE | DST_NOCOUNT); dst->input = dst_md_discard; - dst->output = dst_md_discard_sk; + dst->output = dst_md_discard_out; memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst)); } @@ -430,7 +430,7 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, if (!unregister) { dst->input = dst_discard; - dst->output = dst_discard_sk; + dst->output = dst_discard_out; } else { dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); diff --git a/net/core/filter.c b/net/core/filter.c index bb18c3680001..672eefbfbe99 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -49,16 +49,17 @@ #include <net/sch_generic.h> #include <net/cls_cgroup.h> #include <net/dst_metadata.h> +#include <net/dst.h> /** * sk_filter - run a packet through a socket filter * @sk: sock associated with &sk_buff * @skb: buffer to filter * - * Run the filter code and then cut skb->data to correct size returned by - * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller + * Run the eBPF program and then cut skb->data to correct size returned by + * the program. If pkt_len is 0 we toss packet. If skb->len is smaller * than pkt_len we keep whole skb->data. This is the socket level - * wrapper to SK_RUN_FILTER. It returns 0 if the packet should + * wrapper to BPF_PROG_RUN. It returns 0 if the packet should * be accepted or -EPERM if the packet should be tossed. * */ @@ -82,7 +83,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); if (filter) { - unsigned int pkt_len = SK_RUN_FILTER(filter, skb); + unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb); err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; } @@ -148,12 +149,6 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) return raw_smp_processor_id(); } -/* note that this only generates 32-bit random numbers */ -static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) -{ - return prandom_u32(); -} - static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, struct bpf_insn *insn_buf) { @@ -312,7 +307,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, *insn = BPF_EMIT_CALL(__get_raw_cpu_id); break; case SKF_AD_OFF + SKF_AD_RANDOM: - *insn = BPF_EMIT_CALL(__get_random_u32); + *insn = BPF_EMIT_CALL(bpf_user_rnd_u32); + bpf_user_rnd_init_once(); break; } break; @@ -1001,7 +997,7 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, int err; fp->bpf_func = NULL; - fp->jited = false; + fp->jited = 0; err = bpf_check_classic(fp->insns, fp->len); if (err) { @@ -1083,16 +1079,18 @@ EXPORT_SYMBOL_GPL(bpf_prog_create); * @pfp: the unattached filter that is created * @fprog: the filter program * @trans: post-classic verifier transformation handler + * @save_orig: save classic BPF program * * This function effectively does the same as bpf_prog_create(), only * that it builds up its insns buffer from user space provided buffer. * It also allows for passing a bpf_aux_classic_check_t handler. */ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, - bpf_aux_classic_check_t trans) + bpf_aux_classic_check_t trans, bool save_orig) { unsigned int fsize = bpf_classic_proglen(fprog); struct bpf_prog *fp; + int err; /* Make sure new filter is there and in the right amounts. */ if (fprog->filter == NULL) @@ -1108,12 +1106,16 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, } fp->len = fprog->len; - /* Since unattached filters are not copied back to user - * space through sk_get_filter(), we do not need to hold - * a copy here, and can spare us the work. - */ fp->orig_prog = NULL; + if (save_orig) { + err = bpf_prog_store_orig_filter(fp, fprog); + if (err) { + __bpf_prog_free(fp); + return -ENOMEM; + } + } + /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ @@ -1404,9 +1406,6 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) if (unlikely(!dev)) return -EINVAL; - if (unlikely(!(dev->flags & IFF_UP))) - return -EINVAL; - skb2 = skb_clone(skb, GFP_ATOMIC); if (unlikely(!skb2)) return -ENOMEM; @@ -1428,6 +1427,49 @@ const struct bpf_func_proto bpf_clone_redirect_proto = { .arg3_type = ARG_ANYTHING, }; +struct redirect_info { + u32 ifindex; + u32 flags; +}; + +static DEFINE_PER_CPU(struct redirect_info, redirect_info); +static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + + ri->ifindex = ifindex; + ri->flags = flags; + return TC_ACT_REDIRECT; +} + +int skb_do_redirect(struct sk_buff *skb) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct net_device *dev; + + dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex); + ri->ifindex = 0; + if (unlikely(!dev)) { + kfree_skb(skb); + return -EINVAL; + } + + if (BPF_IS_REDIRECT_INGRESS(ri->flags)) + return dev_forward_skb(dev, skb); + + skb->dev = dev; + skb_sender_cpu_clear(skb); + return dev_queue_xmit(skb); +} + +const struct bpf_func_proto bpf_redirect_proto = { + .func = bpf_redirect, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, +}; + static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { return task_get_classid((struct sk_buff *) (unsigned long) r1); @@ -1440,6 +1482,25 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ +#ifdef CONFIG_IP_ROUTE_CLASSID + const struct dst_entry *dst; + + dst = skb_dst((struct sk_buff *) (unsigned long) r1); + if (dst) + return dst->tclassid; +#endif + return 0; +} + +static const struct bpf_func_proto bpf_get_route_realm_proto = { + .func = bpf_get_route_realm, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *) (long) r1; @@ -1580,7 +1641,8 @@ sk_filter_func_proto(enum bpf_func_id func_id) case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; case BPF_FUNC_trace_printk: - return bpf_get_trace_printk_proto(); + if (capable(CAP_SYS_ADMIN)) + return bpf_get_trace_printk_proto(); default: return NULL; } @@ -1608,6 +1670,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_get_tunnel_key_proto; case BPF_FUNC_skb_set_tunnel_key: return bpf_get_skb_set_tunnel_key_proto(); + case BPF_FUNC_redirect: + return &bpf_redirect_proto; + case BPF_FUNC_get_route_realm: + return &bpf_get_route_realm_proto; default: return sk_filter_func_proto(func_id); } @@ -1633,6 +1699,9 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type) static bool sk_filter_is_valid_access(int off, int size, enum bpf_access_type type) { + if (off == offsetof(struct __sk_buff, tc_classid)) + return false; + if (type == BPF_WRITE) { switch (off) { case offsetof(struct __sk_buff, cb[0]) ... @@ -1649,10 +1718,14 @@ static bool sk_filter_is_valid_access(int off, int size, static bool tc_cls_act_is_valid_access(int off, int size, enum bpf_access_type type) { + if (off == offsetof(struct __sk_buff, tc_classid)) + return type == BPF_WRITE ? true : false; + if (type == BPF_WRITE) { switch (off) { case offsetof(struct __sk_buff, mark): case offsetof(struct __sk_buff, tc_index): + case offsetof(struct __sk_buff, priority): case offsetof(struct __sk_buff, cb[0]) ... offsetof(struct __sk_buff, cb[4]): break; @@ -1665,7 +1738,8 @@ static bool tc_cls_act_is_valid_access(int off, int size, static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, - struct bpf_insn *insn_buf) + struct bpf_insn *insn_buf, + struct bpf_prog *prog) { struct bpf_insn *insn = insn_buf; @@ -1694,8 +1768,12 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, case offsetof(struct __sk_buff, priority): BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4); - *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, - offsetof(struct sk_buff, priority)); + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, priority)); + else + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, priority)); break; case offsetof(struct __sk_buff, ingress_ifindex): @@ -1752,6 +1830,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, offsetof(struct __sk_buff, cb[4]): BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20); + prog->cb_access = 1; ctx_off -= offsetof(struct __sk_buff, cb[0]); ctx_off += offsetof(struct sk_buff, cb); ctx_off += offsetof(struct qdisc_skb_cb, data); @@ -1761,6 +1840,14 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off); break; + case offsetof(struct __sk_buff, tc_classid): + ctx_off -= offsetof(struct __sk_buff, tc_classid); + ctx_off += offsetof(struct sk_buff, cb); + ctx_off += offsetof(struct qdisc_skb_cb, tc_classid); + WARN_ON(type != BPF_WRITE); + *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off); + break; + case offsetof(struct __sk_buff, tc_index): #ifdef CONFIG_NET_SCHED BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2); diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index dfb1a9ca0835..299cfc24d888 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -180,7 +180,7 @@ int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) } EXPORT_SYMBOL(lwtunnel_cmp_encap); -int lwtunnel_output(struct sock *sk, struct sk_buff *skb) +int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); const struct lwtunnel_encap_ops *ops; @@ -199,7 +199,7 @@ int lwtunnel_output(struct sock *sk, struct sk_buff *skb) rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[lwtstate->type]); if (likely(ops && ops->output)) - ret = ops->output(sk, skb); + ret = ops->output(net, sk, skb); rcu_read_unlock(); if (ret == -EOPNOTSUPP) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 2b515ba7e94f..1aa8437ed6c4 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2235,14 +2235,53 @@ static void neigh_update_notify(struct neighbour *neigh) __neigh_notify(neigh, RTM_NEWNEIGH, 0); } +static bool neigh_master_filtered(struct net_device *dev, int master_idx) +{ + struct net_device *master; + + if (!master_idx) + return false; + + master = netdev_master_upper_dev_get(dev); + if (!master || master->ifindex != master_idx) + return true; + + return false; +} + +static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx) +{ + if (filter_idx && dev->ifindex != filter_idx) + return true; + + return false; +} + static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); + const struct nlmsghdr *nlh = cb->nlh; + struct nlattr *tb[NDA_MAX + 1]; struct neighbour *n; int rc, h, s_h = cb->args[1]; int idx, s_idx = idx = cb->args[2]; struct neigh_hash_table *nht; + int filter_master_idx = 0, filter_idx = 0; + unsigned int flags = NLM_F_MULTI; + int err; + + err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL); + if (!err) { + if (tb[NDA_IFINDEX]) + filter_idx = nla_get_u32(tb[NDA_IFINDEX]); + + if (tb[NDA_MASTER]) + filter_master_idx = nla_get_u32(tb[NDA_MASTER]); + + if (filter_idx || filter_master_idx) + flags |= NLM_F_DUMP_FILTERED; + } rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); @@ -2255,12 +2294,16 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, n = rcu_dereference_bh(n->next)) { if (!net_eq(dev_net(n->dev), net)) continue; + if (neigh_ifindex_filtered(n->dev, filter_idx)) + continue; + if (neigh_master_filtered(n->dev, filter_master_idx)) + continue; if (idx < s_idx) goto next; if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, - NLM_F_MULTI) < 0) { + flags) < 0) { rc = -1; goto out; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 830f8a7c1cb1..f88a62ab019d 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -471,7 +471,7 @@ static ssize_t phys_switch_id_show(struct device *dev, if (dev_isalive(netdev)) { struct switchdev_attr attr = { - .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, .flags = SWITCHDEV_F_NO_RECURSE, }; @@ -1003,15 +1003,12 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue, } #ifdef CONFIG_XPS -static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) +static unsigned int get_netdev_queue_index(struct netdev_queue *queue) { struct net_device *dev = queue->dev; - int i; - - for (i = 0; i < dev->num_tx_queues; i++) - if (queue == &dev->_tx[i]) - break; + unsigned int i; + i = queue - dev->_tx; BUG_ON(i >= dev->num_tx_queues); return i; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 8bdada242a7d..94acfc89ad97 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -140,7 +140,7 @@ static void queue_process(struct work_struct *work) * case. Further, we test the poll_owner to avoid recursion on UP * systems where the lock doesn't exist. */ -static int poll_one_napi(struct napi_struct *napi, int budget) +static void poll_one_napi(struct napi_struct *napi) { int work = 0; @@ -149,33 +149,33 @@ static int poll_one_napi(struct napi_struct *napi, int budget) * holding the napi->poll_lock. */ if (!test_bit(NAPI_STATE_SCHED, &napi->state)) - return budget; + return; /* If we set this bit but see that it has already been set, * that indicates that napi has been disabled and we need * to abort this operation */ if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state)) - goto out; + return; - work = napi->poll(napi, budget); - WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll); + /* We explicilty pass the polling call a budget of 0 to + * indicate that we are clearing the Tx path only. + */ + work = napi->poll(napi, 0); + WARN_ONCE(work, "%pF exceeded budget in poll\n", napi->poll); trace_napi_poll(napi); clear_bit(NAPI_STATE_NPSVC, &napi->state); - -out: - return budget - work; } -static void poll_napi(struct net_device *dev, int budget) +static void poll_napi(struct net_device *dev) { struct napi_struct *napi; list_for_each_entry(napi, &dev->napi_list, dev_list) { if (napi->poll_owner != smp_processor_id() && spin_trylock(&napi->poll_lock)) { - budget = poll_one_napi(napi, budget); + poll_one_napi(napi); spin_unlock(&napi->poll_lock); } } @@ -185,7 +185,6 @@ static void netpoll_poll_dev(struct net_device *dev) { const struct net_device_ops *ops; struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); - int budget = 0; /* Don't do any rx activity if the dev_lock mutex is held * the dev_open/close paths use this to block netpoll activity @@ -208,7 +207,7 @@ static void netpoll_poll_dev(struct net_device *dev) /* Process pending work on NIC */ ops->ndo_poll_controller(dev); - poll_napi(dev, budget); + poll_napi(dev); up(&ni->dev_lock); diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c index 4eab4a94a59d..703cf76aa7c2 100644 --- a/net/core/ptp_classifier.c +++ b/net/core/ptp_classifier.c @@ -58,7 +58,7 @@ * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these * ldh [18] ; reload payload * and #0xf ; mask PTP_CLASS_VMASK - * or #0x70 ; PTP_CLASS_VLAN|PTP_CLASS_L2 + * or #0xc0 ; PTP_CLASS_VLAN|PTP_CLASS_L2 * ret a ; return PTP class * * ; PTP over UDP over IPv4 over 802.1Q over Ethernet @@ -73,7 +73,7 @@ * jneq #319, drop_8021q_ipv4 ; is port PTP_EV_PORT ? * ldh [x + 26] ; load payload * and #0xf ; mask PTP_CLASS_VMASK - * or #0x50 ; PTP_CLASS_VLAN|PTP_CLASS_IPV4 + * or #0x90 ; PTP_CLASS_VLAN|PTP_CLASS_IPV4 * ret a ; return PTP class * drop_8021q_ipv4: ret #0x0 ; PTP_CLASS_NONE * @@ -86,7 +86,7 @@ * jneq #319, drop_8021q_ipv6 ; is port PTP_EV_PORT ? * ldh [66] ; load payload * and #0xf ; mask PTP_CLASS_VMASK - * or #0x60 ; PTP_CLASS_VLAN|PTP_CLASS_IPV6 + * or #0xa0 ; PTP_CLASS_VLAN|PTP_CLASS_IPV6 * ret a ; return PTP class * drop_8021q_ipv6: ret #0x0 ; PTP_CLASS_NONE * @@ -98,7 +98,7 @@ * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these * ldh [14] ; reload payload * and #0xf ; mask PTP_CLASS_VMASK - * or #0x30 ; PTP_CLASS_L2 + * or #0x40 ; PTP_CLASS_L2 * ret a ; return PTP class * drop_ieee1588: ret #0x0 ; PTP_CLASS_NONE */ @@ -150,7 +150,7 @@ void __init ptp_classifier_init(void) { 0x15, 0, 35, 0x00000000 }, { 0x28, 0, 0, 0x00000012 }, { 0x54, 0, 0, 0x0000000f }, - { 0x44, 0, 0, 0x00000070 }, + { 0x44, 0, 0, 0x000000c0 }, { 0x16, 0, 0, 0x00000000 }, { 0x15, 0, 12, 0x00000800 }, { 0x30, 0, 0, 0x0000001b }, @@ -162,7 +162,7 @@ void __init ptp_classifier_init(void) { 0x15, 0, 4, 0x0000013f }, { 0x48, 0, 0, 0x0000001a }, { 0x54, 0, 0, 0x0000000f }, - { 0x44, 0, 0, 0x00000050 }, + { 0x44, 0, 0, 0x00000090 }, { 0x16, 0, 0, 0x00000000 }, { 0x06, 0, 0, 0x00000000 }, { 0x15, 0, 8, 0x000086dd }, @@ -172,7 +172,7 @@ void __init ptp_classifier_init(void) { 0x15, 0, 4, 0x0000013f }, { 0x28, 0, 0, 0x00000042 }, { 0x54, 0, 0, 0x0000000f }, - { 0x44, 0, 0, 0x00000060 }, + { 0x44, 0, 0, 0x000000a0 }, { 0x16, 0, 0, 0x00000000 }, { 0x06, 0, 0, 0x00000000 }, { 0x15, 0, 7, 0x000088f7 }, @@ -181,7 +181,7 @@ void __init ptp_classifier_init(void) { 0x15, 0, 4, 0x00000000 }, { 0x28, 0, 0, 0x0000000e }, { 0x54, 0, 0, 0x0000000f }, - { 0x44, 0, 0, 0x00000030 }, + { 0x44, 0, 0, 0x00000040 }, { 0x16, 0, 0, 0x00000000 }, { 0x06, 0, 0, 0x00000000 }, }; diff --git a/net/core/request_sock.c b/net/core/request_sock.c index b42f0e26f89e..5d26056b6d8f 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -37,90 +37,16 @@ int sysctl_max_syn_backlog = 256; EXPORT_SYMBOL(sysctl_max_syn_backlog); -int reqsk_queue_alloc(struct request_sock_queue *queue, - unsigned int nr_table_entries) +void reqsk_queue_alloc(struct request_sock_queue *queue) { - size_t lopt_size = sizeof(struct listen_sock); - struct listen_sock *lopt = NULL; + spin_lock_init(&queue->rskq_lock); - nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog); - nr_table_entries = max_t(u32, nr_table_entries, 8); - nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); - lopt_size += nr_table_entries * sizeof(struct request_sock *); + spin_lock_init(&queue->fastopenq.lock); + queue->fastopenq.rskq_rst_head = NULL; + queue->fastopenq.rskq_rst_tail = NULL; + queue->fastopenq.qlen = 0; - if (lopt_size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) - lopt = kzalloc(lopt_size, GFP_KERNEL | - __GFP_NOWARN | - __GFP_NORETRY); - if (!lopt) - lopt = vzalloc(lopt_size); - if (!lopt) - return -ENOMEM; - - get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); - spin_lock_init(&queue->syn_wait_lock); queue->rskq_accept_head = NULL; - lopt->nr_table_entries = nr_table_entries; - lopt->max_qlen_log = ilog2(nr_table_entries); - - spin_lock_bh(&queue->syn_wait_lock); - queue->listen_opt = lopt; - spin_unlock_bh(&queue->syn_wait_lock); - - return 0; -} - -void __reqsk_queue_destroy(struct request_sock_queue *queue) -{ - /* This is an error recovery path only, no locking needed */ - kvfree(queue->listen_opt); -} - -static inline struct listen_sock *reqsk_queue_yank_listen_sk( - struct request_sock_queue *queue) -{ - struct listen_sock *lopt; - - spin_lock_bh(&queue->syn_wait_lock); - lopt = queue->listen_opt; - queue->listen_opt = NULL; - spin_unlock_bh(&queue->syn_wait_lock); - - return lopt; -} - -void reqsk_queue_destroy(struct request_sock_queue *queue) -{ - /* make all the listen_opt local to us */ - struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); - - if (listen_sock_qlen(lopt) != 0) { - unsigned int i; - - for (i = 0; i < lopt->nr_table_entries; i++) { - struct request_sock *req; - - spin_lock_bh(&queue->syn_wait_lock); - while ((req = lopt->syn_table[i]) != NULL) { - lopt->syn_table[i] = req->dl_next; - /* Because of following del_timer_sync(), - * we must release the spinlock here - * or risk a dead lock. - */ - spin_unlock_bh(&queue->syn_wait_lock); - atomic_inc(&lopt->qlen_dec); - if (del_timer_sync(&req->rsk_timer)) - reqsk_put(req); - reqsk_put(req); - spin_lock_bh(&queue->syn_wait_lock); - } - spin_unlock_bh(&queue->syn_wait_lock); - } - } - - if (WARN_ON(listen_sock_qlen(lopt) != 0)) - pr_err("qlen %u\n", listen_sock_qlen(lopt)); - kvfree(lopt); } /* @@ -174,7 +100,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, struct sock *lsk = req->rsk_listener; struct fastopen_queue *fastopenq; - fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq; + fastopenq = &inet_csk(lsk)->icsk_accept_queue.fastopenq; tcp_sk(sk)->fastopen_rsk = NULL; spin_lock_bh(&fastopenq->lock); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 0ec48403ed68..504bd17b7456 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -96,7 +96,7 @@ int rtnl_is_locked(void) EXPORT_SYMBOL(rtnl_is_locked); #ifdef CONFIG_PROVE_LOCKING -int lockdep_rtnl_is_held(void) +bool lockdep_rtnl_is_held(void) { return lockdep_is_held(&rtnl_mutex); } @@ -497,7 +497,8 @@ void rtnl_af_unregister(struct rtnl_af_ops *ops) } EXPORT_SYMBOL_GPL(rtnl_af_unregister); -static size_t rtnl_link_get_af_size(const struct net_device *dev) +static size_t rtnl_link_get_af_size(const struct net_device *dev, + u32 ext_filter_mask) { struct rtnl_af_ops *af_ops; size_t size; @@ -509,7 +510,7 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev) if (af_ops->get_link_af_size) { /* AF_* + nested data */ size += nla_total_size(sizeof(struct nlattr)) + - af_ops->get_link_af_size(dev); + af_ops->get_link_af_size(dev, ext_filter_mask); } } @@ -837,7 +838,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, /* IFLA_VF_STATS_BROADCAST */ nla_total_size(sizeof(__u64)) + /* IFLA_VF_STATS_MULTICAST */ - nla_total_size(sizeof(__u64))); + nla_total_size(sizeof(__u64)) + + nla_total_size(sizeof(struct ifla_vf_trust))); return size; } else return 0; @@ -900,7 +902,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ - + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */ + + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ @@ -1025,7 +1027,7 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { int err; struct switchdev_attr attr = { - .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, .flags = SWITCHDEV_F_NO_RECURSE, }; @@ -1160,6 +1162,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct ifla_vf_link_state vf_linkstate; struct ifla_vf_rss_query_en vf_rss_query_en; struct ifla_vf_stats vf_stats; + struct ifla_vf_trust vf_trust; /* * Not all SR-IOV capable drivers support the @@ -1169,6 +1172,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, */ ivi.spoofchk = -1; ivi.rss_query_en = -1; + ivi.trusted = -1; memset(ivi.mac, 0, sizeof(ivi.mac)); /* The default value for VF link state is "auto" * IFLA_VF_LINK_STATE_AUTO which equals zero @@ -1182,7 +1186,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, vf_tx_rate.vf = vf_spoofchk.vf = vf_linkstate.vf = - vf_rss_query_en.vf = ivi.vf; + vf_rss_query_en.vf = + vf_trust.vf = ivi.vf; memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; @@ -1193,6 +1198,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, vf_spoofchk.setting = ivi.spoofchk; vf_linkstate.link_state = ivi.linkstate; vf_rss_query_en.setting = ivi.rss_query_en; + vf_trust.setting = ivi.trusted; vf = nla_nest_start(skb, IFLA_VF_INFO); if (!vf) { nla_nest_cancel(skb, vfinfo); @@ -1210,7 +1216,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, &vf_linkstate) || nla_put(skb, IFLA_VF_RSS_QUERY_EN, sizeof(vf_rss_query_en), - &vf_rss_query_en)) + &vf_rss_query_en) || + nla_put(skb, IFLA_VF_TRUST, + sizeof(vf_trust), &vf_trust)) goto nla_put_failure; memset(&vf_stats, 0, sizeof(vf_stats)); if (dev->netdev_ops->ndo_get_vf_stats) @@ -1272,7 +1280,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (!(af = nla_nest_start(skb, af_ops->family))) goto nla_put_failure; - err = af_ops->fill_link_af(skb, dev); + err = af_ops->fill_link_af(skb, dev, ext_filter_mask); /* * Caller may return ENODATA to indicate that there @@ -1347,6 +1355,7 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) }, [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) }, [IFLA_VF_STATS] = { .type = NLA_NESTED }, + [IFLA_VF_TRUST] = { .len = sizeof(struct ifla_vf_trust) }, }; static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = { @@ -1586,6 +1595,16 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) return err; } + if (tb[IFLA_VF_TRUST]) { + struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_trust) + err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting); + if (err < 0) + return err; + } + return err; } @@ -3443,4 +3462,3 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); } - diff --git a/net/core/sock.c b/net/core/sock.c index 3307c02244d3..7529eb9463be 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -422,13 +422,25 @@ static void sock_warn_obsolete_bsdism(const char *name) } } +static bool sock_needs_netstamp(const struct sock *sk) +{ + switch (sk->sk_family) { + case AF_UNSPEC: + case AF_UNIX: + return false; + default: + return true; + } +} + #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) static void sock_disable_timestamp(struct sock *sk, unsigned long flags) { if (sk->sk_flags & flags) { sk->sk_flags &= ~flags; - if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP)) + if (sock_needs_netstamp(sk) && + !(sk->sk_flags & SK_FLAGS_TIMESTAMP)) net_disable_timestamp(); } } @@ -988,6 +1000,10 @@ set_rcvbuf: sk->sk_max_pacing_rate); break; + case SO_INCOMING_CPU: + sk->sk_incoming_cpu = val; + break; + default: ret = -ENOPROTOOPT; break; @@ -1578,7 +1594,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) if (newsk->sk_prot->sockets_allocated) sk_sockets_allocated_inc(newsk); - if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) + if (sock_needs_netstamp(sk) && + newsk->sk_flags & SK_FLAGS_TIMESTAMP) net_enable_timestamp(); } out: @@ -1639,6 +1656,28 @@ void sock_wfree(struct sk_buff *skb) } EXPORT_SYMBOL(sock_wfree); +void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) +{ + skb_orphan(skb); + skb->sk = sk; +#ifdef CONFIG_INET + if (unlikely(!sk_fullsock(sk))) { + skb->destructor = sock_edemux; + sock_hold(sk); + return; + } +#endif + skb->destructor = sock_wfree; + skb_set_hash_from_sk(skb, sk); + /* + * We used to take a refcount on sk, but following operation + * is enough to guarantee sk_free() wont free this sock until + * all in-flight packets are completed + */ + atomic_add(skb->truesize, &sk->sk_wmem_alloc); +} +EXPORT_SYMBOL(skb_set_owner_w); + void skb_orphan_partial(struct sk_buff *skb) { /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc, @@ -1852,6 +1891,32 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, } EXPORT_SYMBOL(sock_alloc_send_skb); +int sock_cmsg_send(struct sock *sk, struct msghdr *msg, + struct sockcm_cookie *sockc) +{ + struct cmsghdr *cmsg; + + for_each_cmsghdr(cmsg, msg) { + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + if (cmsg->cmsg_level != SOL_SOCKET) + continue; + switch (cmsg->cmsg_type) { + case SO_MARK: + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) + return -EINVAL; + sockc->mark = *(u32 *)CMSG_DATA(cmsg); + break; + default: + return -EINVAL; + } + } + return 0; +} +EXPORT_SYMBOL(sock_cmsg_send); + /* On 32bit arches, an skb frag is limited to 2^15 */ #define SKB_FRAG_PAGE_ORDER get_order(32768) @@ -2353,6 +2418,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_max_pacing_rate = ~0U; sk->sk_pacing_rate = ~0U; + sk->sk_incoming_cpu = -1; /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) @@ -2479,7 +2545,8 @@ void sock_enable_timestamp(struct sock *sk, int flag) * time stamping, but time stamping might have been on * already because of the other one */ - if (!(previous_flags & SK_FLAGS_TIMESTAMP)) + if (sock_needs_netstamp(sk) && + !(previous_flags & SK_FLAGS_TIMESTAMP)) net_enable_timestamp(); } } @@ -2758,7 +2825,7 @@ static int req_prot_init(const struct proto *prot) rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name, rsk_prot->obj_size, 0, - 0, NULL); + prot->slab_flags, NULL); if (!rsk_prot->slab) { pr_crit("%s: Can't create request sock SLAB cache!\n", diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 817622f3dbb7..0c1d58d43f67 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -1,3 +1,5 @@ +/* License: GPL */ + #include <linux/mutex.h> #include <linux/socket.h> #include <linux/skbuff.h> @@ -323,14 +325,4 @@ static int __init sock_diag_init(void) BUG_ON(!broadcast_wq); return register_pernet_subsys(&diag_net_ops); } - -static void __exit sock_diag_exit(void) -{ - unregister_pernet_subsys(&diag_net_ops); - destroy_workqueue(broadcast_wq); -} - -module_init(sock_diag_init); -module_exit(sock_diag_exit); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_SOCK_DIAG); +device_initcall(sock_diag_init); diff --git a/net/core/tso.c b/net/core/tso.c index 630b30b4fb53..5dca7ce8ee9f 100644 --- a/net/core/tso.c +++ b/net/core/tso.c @@ -1,4 +1,5 @@ #include <linux/export.h> +#include <linux/if_vlan.h> #include <net/ip.h> #include <net/tso.h> #include <asm/unaligned.h> @@ -14,18 +15,24 @@ EXPORT_SYMBOL(tso_count_descs); void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso, int size, bool is_last) { - struct iphdr *iph; struct tcphdr *tcph; int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); int mac_hdr_len = skb_network_offset(skb); memcpy(hdr, skb->data, hdr_len); - iph = (struct iphdr *)(hdr + mac_hdr_len); - iph->id = htons(tso->ip_id); - iph->tot_len = htons(size + hdr_len - mac_hdr_len); + if (!tso->ipv6) { + struct iphdr *iph = (void *)(hdr + mac_hdr_len); + + iph->id = htons(tso->ip_id); + iph->tot_len = htons(size + hdr_len - mac_hdr_len); + tso->ip_id++; + } else { + struct ipv6hdr *iph = (void *)(hdr + mac_hdr_len); + + iph->payload_len = htons(size + tcp_hdrlen(skb)); + } tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb)); put_unaligned_be32(tso->tcp_seq, &tcph->seq); - tso->ip_id++; if (!is_last) { /* Clear all special flags for not last packet */ @@ -61,6 +68,7 @@ void tso_start(struct sk_buff *skb, struct tso_t *tso) tso->ip_id = ntohs(ip_hdr(skb)->id); tso->tcp_seq = ntohl(tcp_hdr(skb)->seq); tso->next_frag_idx = 0; + tso->ipv6 = vlan_get_protocol(skb) == htons(ETH_P_IPV6); /* Build first data */ tso->size = skb_headlen(skb) - hdr_len; diff --git a/net/core/utils.c b/net/core/utils.c index 3dffce953c39..3d17ca8b4744 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -348,52 +348,3 @@ void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, } } EXPORT_SYMBOL(inet_proto_csum_replace_by_diff); - -struct __net_random_once_work { - struct work_struct work; - struct static_key *key; -}; - -static void __net_random_once_deferred(struct work_struct *w) -{ - struct __net_random_once_work *work = - container_of(w, struct __net_random_once_work, work); - BUG_ON(!static_key_enabled(work->key)); - static_key_slow_dec(work->key); - kfree(work); -} - -static void __net_random_once_disable_jump(struct static_key *key) -{ - struct __net_random_once_work *w; - - w = kmalloc(sizeof(*w), GFP_ATOMIC); - if (!w) - return; - - INIT_WORK(&w->work, __net_random_once_deferred); - w->key = key; - schedule_work(&w->work); -} - -bool __net_get_random_once(void *buf, int nbytes, bool *done, - struct static_key *once_key) -{ - static DEFINE_SPINLOCK(lock); - unsigned long flags; - - spin_lock_irqsave(&lock, flags); - if (*done) { - spin_unlock_irqrestore(&lock, flags); - return false; - } - - get_random_bytes(buf, nbytes); - *done = true; - spin_unlock_irqrestore(&lock, flags); - - __net_random_once_disable_jump(once_key); - - return true; -} -EXPORT_SYMBOL(__net_get_random_once); |
