diff options
Diffstat (limited to 'net')
37 files changed, 283 insertions, 981 deletions
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index f7fba74108a9..e24754a0e052 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -29,6 +29,7 @@ EXPORT_SYMBOL(br_should_route_hook); static int br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) { + br_drop_fake_rtable(skb); return netif_receive_skb(skb); } diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 7ddbe7ec81d6..97fc19f001bf 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -516,21 +516,6 @@ static unsigned int br_nf_pre_routing(void *priv, } -/* PF_BRIDGE/LOCAL_IN ************************************************/ -/* The packet is locally destined, which requires a real - * dst_entry, so detach the fake one. On the way up, the - * packet would pass through PRE_ROUTING again (which already - * took place when the packet entered the bridge), but we - * register an IPv4 PRE_ROUTING 'sabotage' hook that will - * prevent this from happening. */ -static unsigned int br_nf_local_in(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - br_drop_fake_rtable(skb); - return NF_ACCEPT; -} - /* PF_BRIDGE/FORWARD *************************************************/ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -901,12 +886,6 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = { .priority = NF_BR_PRI_BRNF, }, { - .hook = br_nf_local_in, - .pf = NFPROTO_BRIDGE, - .hooknum = NF_BR_LOCAL_IN, - .priority = NF_BR_PRI_BRNF, - }, - { .hook = br_nf_forward_ip, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_FORWARD, diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index ddc3573894b0..bc95e48d5cfb 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1265,7 +1265,6 @@ static int decode_new_up_state_weight(void **p, void *end, if ((map->osd_state[osd] & CEPH_OSD_EXISTS) && (xorstate & CEPH_OSD_EXISTS)) { pr_info("osd%d does not exist\n", osd); - map->osd_weight[osd] = CEPH_OSD_IN; ret = set_primary_affinity(map, osd, CEPH_OSD_DEFAULT_PRIMARY_AFFINITY); if (ret) diff --git a/net/core/dev.c b/net/core/dev.c index 51aed87e8eec..2587d7f30191 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1679,27 +1679,54 @@ EXPORT_SYMBOL_GPL(net_dec_ingress_queue); static struct static_key netstamp_needed __read_mostly; #ifdef HAVE_JUMP_LABEL static atomic_t netstamp_needed_deferred; +static atomic_t netstamp_wanted; static void netstamp_clear(struct work_struct *work) { int deferred = atomic_xchg(&netstamp_needed_deferred, 0); + int wanted; - while (deferred--) - static_key_slow_dec(&netstamp_needed); + wanted = atomic_add_return(deferred, &netstamp_wanted); + if (wanted > 0) + static_key_enable(&netstamp_needed); + else + static_key_disable(&netstamp_needed); } static DECLARE_WORK(netstamp_work, netstamp_clear); #endif void net_enable_timestamp(void) { +#ifdef HAVE_JUMP_LABEL + int wanted; + + while (1) { + wanted = atomic_read(&netstamp_wanted); + if (wanted <= 0) + break; + if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted) + return; + } + atomic_inc(&netstamp_needed_deferred); + schedule_work(&netstamp_work); +#else static_key_slow_inc(&netstamp_needed); +#endif } EXPORT_SYMBOL(net_enable_timestamp); void net_disable_timestamp(void) { #ifdef HAVE_JUMP_LABEL - /* net_disable_timestamp() can be called from non process context */ - atomic_inc(&netstamp_needed_deferred); + int wanted; + + while (1) { + wanted = atomic_read(&netstamp_wanted); + if (wanted <= 1) + break; + if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted) + return; + } + atomic_dec(&netstamp_needed_deferred); schedule_work(&netstamp_work); #else static_key_slow_dec(&netstamp_needed); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5dd643d524d6..fedcee8263b6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3704,13 +3704,14 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, if (!skb_may_tx_timestamp(sk, false)) return; - /* take a reference to prevent skb_orphan() from freeing the socket */ - sock_hold(sk); - - *skb_hwtstamps(skb) = *hwtstamps; - __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); - - sock_put(sk); + /* Take a reference to prevent skb_orphan() from freeing the socket, + * but only if the socket refcount is not zero. + */ + if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { + *skb_hwtstamps(skb) = *hwtstamps; + __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); + sock_put(sk); + } } EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); @@ -3761,7 +3762,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) { struct sock *sk = skb->sk; struct sock_exterr_skb *serr; - int err; + int err = 1; skb->wifi_acked_valid = 1; skb->wifi_acked = acked; @@ -3771,14 +3772,15 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS; - /* take a reference to prevent skb_orphan() from freeing the socket */ - sock_hold(sk); - - err = sock_queue_err_skb(sk, skb); + /* Take a reference to prevent skb_orphan() from freeing the socket, + * but only if the socket refcount is not zero. + */ + if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { + err = sock_queue_err_skb(sk, skb); + sock_put(sk); + } if (err) kfree_skb(skb); - - sock_put(sk); } EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); diff --git a/net/core/sock.c b/net/core/sock.c index a84a154cdf0c..4efaa3b6633d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1463,6 +1463,11 @@ static void __sk_destruct(struct rcu_head *head) pr_debug("%s: optmem leakage (%d bytes) detected\n", __func__, atomic_read(&sk->sk_omem_alloc)); + if (sk->sk_frag.page) { + put_page(sk->sk_frag.page); + sk->sk_frag.page = NULL; + } + if (sk->sk_peer_cred) put_cred(sk->sk_peer_cred); put_pid(sk->sk_peer_pid); @@ -1564,6 +1569,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) is_charged = sk_filter_charge(newsk, filter); if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { + /* We need to make sure that we don't uncharge the new + * socket if we couldn't charge it in the first place + * as otherwise we uncharge the parent's filter. + */ + if (!is_charged) + RCU_INIT_POINTER(newsk->sk_filter, NULL); /* It is still raw copy of parent, so invalidate * destructor and make plain sk_free() */ newsk->sk_destruct = NULL; @@ -2706,11 +2717,6 @@ void sk_common_release(struct sock *sk) sk_refcnt_debug_release(sk); - if (sk->sk_frag.page) { - put_page(sk->sk_frag.page); - sk->sk_frag.page = NULL; - } - sock_put(sk); } EXPORT_SYMBOL(sk_common_release); diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index f053198e730c..5e3a7302f774 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -749,6 +749,7 @@ static void ccid2_hc_tx_exit(struct sock *sk) for (i = 0; i < hc->tx_seqbufc; i++) kfree(hc->tx_seqbuf[i]); hc->tx_seqbufc = 0; + dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); } static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 0759f5b9180e..6467bf392e1b 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -289,7 +289,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) switch (type) { case ICMP_REDIRECT: - dccp_do_redirect(skb, sk); + if (!sock_owned_by_user(sk)) + dccp_do_redirect(skb, sk); goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 27c4e81efa24..8113ad58fcb4 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -122,10 +122,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); if (type == NDISC_REDIRECT) { - struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + if (!sock_owned_by_user(sk)) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); - if (dst) - dst->ops->redirect(dst, sk, skb); + if (dst) + dst->ops->redirect(dst, sk, skb); + } goto out; } diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 1994f8af646b..68eed344b471 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -122,6 +122,7 @@ struct sock *dccp_create_openreq_child(const struct sock *sk, /* It is still raw copy of parent, so invalidate * destructor and make plain sk_free() */ newsk->sk_destruct = NULL; + bh_unlock_sock(newsk); sk_free(newsk); return NULL; } @@ -145,6 +146,13 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, struct dccp_request_sock *dreq = dccp_rsk(req); bool own_req; + /* TCP/DCCP listeners became lockless. + * DCCP stores complex state in its request_sock, so we need + * a protection for them, now this code runs without being protected + * by the parent (listener) lock. + */ + spin_lock_bh(&dreq->dreq_lock); + /* Check for retransmitted REQUEST */ if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { @@ -159,7 +167,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, inet_rtx_syn_ack(sk, req); } /* Network Duplicate, discard packet */ - return NULL; + goto out; } DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; @@ -185,20 +193,20 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, req, &own_req); - if (!child) - goto listen_overflow; - - return inet_csk_complete_hashdance(sk, child, req, own_req); + if (child) { + child = inet_csk_complete_hashdance(sk, child, req, own_req); + goto out; + } -listen_overflow: - dccp_pr_debug("listen_overflow!\n"); DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; drop: if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) req->rsk_ops->send_reset(sk, skb); inet_csk_reqsk_queue_drop(sk, req); - return NULL; +out: + spin_unlock_bh(&dreq->dreq_lock); + return child; } EXPORT_SYMBOL_GPL(dccp_check_req); @@ -249,6 +257,7 @@ int dccp_reqsk_init(struct request_sock *req, { struct dccp_request_sock *dreq = dccp_rsk(req); + spin_lock_init(&dreq->dreq_lock); inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport; inet_rsk(req)->ir_num = ntohs(dccp_hdr(skb)->dccph_dport); inet_rsk(req)->acked = 0; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 7e30c7b50a28..a353d1d92f01 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1081,7 +1081,8 @@ static void nl_fib_input(struct sk_buff *skb) net = sock_net(skb->sk); nlh = nlmsg_hdr(skb); - if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len || + if (skb->len < nlmsg_total_size(sizeof(*frn)) || + skb->len < nlh->nlmsg_len || nlmsg_len(nlh) < sizeof(*frn)) return; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6ace04d14e30..9af0f461b00d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1963,6 +1963,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, { int res; + tos &= IPTOS_RT_MASK; rcu_read_lock(); /* Multicast recognition logic is moved from route cache to here. diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e7e227f6760f..3f87c731477f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5437,6 +5437,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) struct inet_connection_sock *icsk = inet_csk(sk); tcp_set_state(sk, TCP_ESTABLISHED); + icsk->icsk_ack.lrcvtime = tcp_time_stamp; if (skb) { icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); @@ -5649,7 +5650,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * to stand against the temptation 8) --ANK */ inet_csk_schedule_ack(sk); - icsk->icsk_ack.lrcvtime = tcp_time_stamp; tcp_enter_quickack_mode(sk); inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 51a77e20f6c6..1660613ddae4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -271,10 +271,13 @@ EXPORT_SYMBOL(tcp_v4_connect); */ void tcp_v4_mtu_reduced(struct sock *sk) { - struct dst_entry *dst; struct inet_sock *inet = inet_sk(sk); - u32 mtu = tcp_sk(sk)->mtu_info; + struct dst_entry *dst; + u32 mtu; + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) + return; + mtu = tcp_sk(sk)->mtu_info; dst = inet_csk_update_pmtu(sk, mtu); if (!dst) return; @@ -420,7 +423,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) switch (type) { case ICMP_REDIRECT: - do_redirect(icmp_skb, sk); + if (!sock_owned_by_user(sk)) + do_redirect(icmp_skb, sk); goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 9475a2748a9a..019db68bdb9f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -472,6 +472,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); newtp->rtt_min[0].rtt = ~0U; newicsk->icsk_rto = TCP_TIMEOUT_INIT; + newicsk->icsk_ack.lrcvtime = tcp_time_stamp; newtp->packets_out = 0; newtp->retrans_out = 0; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index ce20968de667..4aef80d30fab 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -257,7 +257,8 @@ void tcp_delack_timer_handler(struct sock *sk) sk_mem_reclaim_partial(sk); - if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) + if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || + !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) goto out; if (time_after(icsk->icsk_ack.timeout, jiffies)) { @@ -538,7 +539,8 @@ void tcp_write_timer_handler(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); int event; - if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) + if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || + !icsk->icsk_pending) goto out; if (time_after(icsk->icsk_timeout, jiffies)) { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 01455f492e17..3d72aeffa3f1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -112,6 +112,27 @@ static inline u32 cstamp_delta(unsigned long cstamp) return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; } +static inline s32 rfc3315_s14_backoff_init(s32 irt) +{ + /* multiply 'initial retransmission time' by 0.9 .. 1.1 */ + u64 tmp = (900000 + prandom_u32() % 200001) * (u64)irt; + do_div(tmp, 1000000); + return (s32)tmp; +} + +static inline s32 rfc3315_s14_backoff_update(s32 rt, s32 mrt) +{ + /* multiply 'retransmission timeout' by 1.9 .. 2.1 */ + u64 tmp = (1900000 + prandom_u32() % 200001) * (u64)rt; + do_div(tmp, 1000000); + if ((s32)tmp > mrt) { + /* multiply 'maximum retransmission time' by 0.9 .. 1.1 */ + tmp = (900000 + prandom_u32() % 200001) * (u64)mrt; + do_div(tmp, 1000000); + } + return (s32)tmp; +} + #ifdef CONFIG_SYSCTL static int addrconf_sysctl_register(struct inet6_dev *idev); static void addrconf_sysctl_unregister(struct inet6_dev *idev); @@ -187,6 +208,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .dad_transmits = 1, .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, + .rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, @@ -202,6 +224,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_ra_rtr_pref = 1, .rtr_probe_interval = 60 * HZ, #ifdef CONFIG_IPV6_ROUTE_INFO + .accept_ra_rt_info_min_plen = 0, .accept_ra_rt_info_max_plen = 0, #endif #endif @@ -233,6 +256,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .dad_transmits = 1, .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, + .rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, @@ -248,6 +272,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .accept_ra_rtr_pref = 1, .rtr_probe_interval = 60 * HZ, #ifdef CONFIG_IPV6_ROUTE_INFO + .accept_ra_rt_info_min_plen = 0, .accept_ra_rt_info_max_plen = 0, #endif #endif @@ -3502,7 +3527,7 @@ static void addrconf_rs_timer(unsigned long data) if (idev->if_flags & IF_RA_RCVD) goto out; - if (idev->rs_probes++ < idev->cnf.rtr_solicits) { + if (idev->rs_probes++ < idev->cnf.rtr_solicits || idev->cnf.rtr_solicits < 0) { write_unlock(&idev->lock); if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) ndisc_send_rs(dev, &lladdr, @@ -3511,11 +3536,13 @@ static void addrconf_rs_timer(unsigned long data) goto put; write_lock(&idev->lock); + idev->rs_interval = rfc3315_s14_backoff_update( + idev->rs_interval, idev->cnf.rtr_solicit_max_interval); /* The wait after the last probe can be shorter */ addrconf_mod_rs_timer(idev, (idev->rs_probes == idev->cnf.rtr_solicits) ? idev->cnf.rtr_solicit_delay : - idev->cnf.rtr_solicit_interval); + idev->rs_interval); } else { /* * Note: we do not support deprecated "all on-link" @@ -3743,7 +3770,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp); send_rs = send_mld && ipv6_accept_ra(ifp->idev) && - ifp->idev->cnf.rtr_solicits > 0 && + ifp->idev->cnf.rtr_solicits != 0 && (dev->flags&IFF_LOOPBACK) == 0; read_unlock_bh(&ifp->idev->lock); @@ -3765,10 +3792,11 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) write_lock_bh(&ifp->idev->lock); spin_lock(&ifp->lock); + ifp->idev->rs_interval = rfc3315_s14_backoff_init( + ifp->idev->cnf.rtr_solicit_interval); ifp->idev->rs_probes = 1; ifp->idev->if_flags |= IF_RS_SENT; - addrconf_mod_rs_timer(ifp->idev, - ifp->idev->cnf.rtr_solicit_interval); + addrconf_mod_rs_timer(ifp->idev, ifp->idev->rs_interval); spin_unlock(&ifp->lock); write_unlock_bh(&ifp->idev->lock); } @@ -4685,6 +4713,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits; array[DEVCONF_RTR_SOLICIT_INTERVAL] = jiffies_to_msecs(cnf->rtr_solicit_interval); + array[DEVCONF_RTR_SOLICIT_MAX_INTERVAL] = + jiffies_to_msecs(cnf->rtr_solicit_max_interval); array[DEVCONF_RTR_SOLICIT_DELAY] = jiffies_to_msecs(cnf->rtr_solicit_delay); array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version; @@ -4706,6 +4736,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_RTR_PROBE_INTERVAL] = jiffies_to_msecs(cnf->rtr_probe_interval); #ifdef CONFIG_IPV6_ROUTE_INFO + array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] = cnf->accept_ra_rt_info_min_plen; array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; #endif #endif @@ -4893,7 +4924,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) return -EINVAL; if (!ipv6_accept_ra(idev)) return -EINVAL; - if (idev->cnf.rtr_solicits <= 0) + if (idev->cnf.rtr_solicits == 0) return -EINVAL; write_lock_bh(&idev->lock); @@ -4918,8 +4949,10 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) if (update_rs) { idev->if_flags |= IF_RS_SENT; + idev->rs_interval = rfc3315_s14_backoff_init( + idev->cnf.rtr_solicit_interval); idev->rs_probes = 1; - addrconf_mod_rs_timer(idev, idev->cnf.rtr_solicit_interval); + addrconf_mod_rs_timer(idev, idev->rs_interval); } /* Well, that's kinda nasty ... */ @@ -5557,6 +5590,13 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec_jiffies, }, { + .procname = "router_solicitation_max_interval", + .data = &ipv6_devconf.rtr_solicit_max_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { .procname = "router_solicitation_delay", .data = &ipv6_devconf.rtr_solicit_delay, .maxlen = sizeof(int), @@ -5666,6 +5706,13 @@ static struct addrconf_sysctl_table }, #ifdef CONFIG_IPV6_ROUTE_INFO { + .procname = "accept_ra_rt_info_min_plen", + .data = &ipv6_devconf.accept_ra_rt_info_min_plen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "accept_ra_rt_info_max_plen", .data = &ipv6_devconf.accept_ra_rt_info_max_plen, .maxlen = sizeof(int), diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 34cf46d74554..85bf86458706 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -903,6 +903,8 @@ add: ins = &rt->dst.rt6_next; iter = *ins; while (iter) { + if (iter->rt6i_metric > rt->rt6i_metric) + break; if (rt6_qualify_for_ecmp(iter)) { *ins = iter->dst.rt6_next; fib6_purge_rt(iter, fn, info->nl_net); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cd96a01032a2..cf90a9bf26a3 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -742,13 +742,14 @@ slow_path: * Fragment the datagram. */ - *prevhdr = NEXTHDR_FRAGMENT; troom = rt->dst.dev->needed_tailroom; /* * Keep copying data until we run out. */ while (left > 0) { + u8 *fragnexthdr_offset; + len = left; /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > mtu) @@ -793,6 +794,10 @@ slow_path: */ skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); + fragnexthdr_offset = skb_network_header(frag); + fragnexthdr_offset += prevhdr - skb_network_header(skb); + *fragnexthdr_offset = NEXTHDR_FRAGMENT; + /* * Build fragment header. */ diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 24fb9c0efd00..5b7433887eda 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -681,6 +681,10 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p) u->link = p->link; u->i_key = p->i_key; u->o_key = p->o_key; + if (u->i_key) + u->i_flags |= GRE_KEY; + if (u->o_key) + u->o_flags |= GRE_KEY; u->proto = p->proto; memcpy(u->name, p->name, sizeof(u->name)); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 84afb9a77278..3452f9037ad4 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1358,6 +1358,8 @@ skip_linkparms: if (ri->prefix_len == 0 && !in6_dev->cnf.accept_ra_defrtr) continue; + if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen) + continue; if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen) continue; rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 108b39967694..8532768b4eaa 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -377,10 +377,12 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); if (type == NDISC_REDIRECT) { - struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + if (!sock_owned_by_user(sk)) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); - if (dst) - dst->ops->redirect(dst, sk, skb); + if (dst) + dst->ops->redirect(dst, sk, skb); + } goto out; } diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 445b7cd0826a..48ab93842322 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -383,7 +383,7 @@ static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb) drop: IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS); kfree_skb(skb); - return -1; + return 0; } /* Userspace will call sendmsg() on the tunnel socket to send L2TP diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 881bc2072809..52cfc4478511 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1567,6 +1567,7 @@ static void mpls_net_exit(struct net *net) for (index = 0; index < platform_labels; index++) { struct mpls_route *rt = rtnl_dereference(platform_label[index]); RCU_INIT_POINTER(platform_label[index], NULL); + mpls_notify_route(net, index, rt, NULL, NULL); mpls_rt_free(rt); } rtnl_unlock(); diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index ececa65868ef..e0d8e9ad315b 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1802,8 +1802,11 @@ ret_res: } #ifdef DDEBUG -/* This function is not in xt_qtaguid_print.c because of locks visibility */ -static void prdebug_full_state(int indent_level, const char *fmt, ...) +/* + * This function is not in xt_qtaguid_print.c because of locks visibility. + * The lock of sock_tag_list must be aquired before calling this function + */ +static void prdebug_full_state_locked(int indent_level, const char *fmt, ...) { va_list args; char *fmt_buff; @@ -1824,16 +1827,12 @@ static void prdebug_full_state(int indent_level, const char *fmt, ...) kfree(buff); va_end(args); - spin_lock_bh(&sock_tag_list_lock); prdebug_sock_tag_tree(indent_level, &sock_tag_tree); - spin_unlock_bh(&sock_tag_list_lock); - spin_lock_bh(&sock_tag_list_lock); spin_lock_bh(&uid_tag_data_tree_lock); prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree); prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree); spin_unlock_bh(&uid_tag_data_tree_lock); - spin_unlock_bh(&sock_tag_list_lock); spin_lock_bh(&iface_stat_list_lock); prdebug_iface_stat_list(indent_level, &iface_stat_list); @@ -1842,7 +1841,7 @@ static void prdebug_full_state(int indent_level, const char *fmt, ...) pr_debug("qtaguid: %s(): }\n", __func__); } #else -static void prdebug_full_state(int indent_level, const char *fmt, ...) {} +static void prdebug_full_state_locked(int indent_level, const char *fmt, ...) {} #endif struct proc_ctrl_print_info { @@ -1965,8 +1964,11 @@ static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v) (u64)atomic64_read(&qtu_events.match_no_sk), (u64)atomic64_read(&qtu_events.match_no_sk_file)); - /* Count the following as part of the last item_index */ - prdebug_full_state(0, "proc ctrl"); + /* Count the following as part of the last item_index. No need + * to lock the sock_tag_list here since it is already locked when + * starting the seq_file operation + */ + prdebug_full_state_locked(0, "proc ctrl"); } return 0; @@ -2875,8 +2877,10 @@ static int qtudev_release(struct inode *inode, struct file *file) sock_tag_tree_erase(&st_to_free_tree); - prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__, + spin_lock_bh(&sock_tag_list_lock); + prdebug_full_state_locked(0, "%s(): pid=%u tgid=%u", __func__, current->pid, current->tgid); + spin_unlock_bh(&sock_tag_list_lock); return 0; } diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig index 2c5e95e9bfbd..5d6e8c05b3d4 100644 --- a/net/netlink/Kconfig +++ b/net/netlink/Kconfig @@ -2,15 +2,6 @@ # Netlink Sockets # -config NETLINK_MMAP - bool "NETLINK: mmaped IO" - ---help--- - This option enables support for memory mapped netlink IO. This - reduces overhead by avoiding copying data between kernel- and - userspace. - - If unsure, say N. - config NETLINK_DIAG tristate "NETLINK: socket monitoring interface" default n diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f4dd706c6cd1..96fe1c103bf9 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -225,7 +225,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, dev_hold(dev); - if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head)) + if (is_vmalloc_addr(skb->head)) nskb = netlink_to_full_skb(skb, GFP_ATOMIC); else nskb = skb_clone(skb, GFP_ATOMIC); @@ -300,610 +300,8 @@ static void netlink_rcv_wake(struct sock *sk) wake_up_interruptible(&nlk->wait); } -#ifdef CONFIG_NETLINK_MMAP -static bool netlink_rx_is_mmaped(struct sock *sk) -{ - return nlk_sk(sk)->rx_ring.pg_vec != NULL; -} - -static bool netlink_tx_is_mmaped(struct sock *sk) -{ - return nlk_sk(sk)->tx_ring.pg_vec != NULL; -} - -static __pure struct page *pgvec_to_page(const void *addr) -{ - if (is_vmalloc_addr(addr)) - return vmalloc_to_page(addr); - else - return virt_to_page(addr); -} - -static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len) -{ - unsigned int i; - - for (i = 0; i < len; i++) { - if (pg_vec[i] != NULL) { - if (is_vmalloc_addr(pg_vec[i])) - vfree(pg_vec[i]); - else - free_pages((unsigned long)pg_vec[i], order); - } - } - kfree(pg_vec); -} - -static void *alloc_one_pg_vec_page(unsigned long order) -{ - void *buffer; - gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | - __GFP_NOWARN | __GFP_NORETRY; - - buffer = (void *)__get_free_pages(gfp_flags, order); - if (buffer != NULL) - return buffer; - - buffer = vzalloc((1 << order) * PAGE_SIZE); - if (buffer != NULL) - return buffer; - - gfp_flags &= ~__GFP_NORETRY; - return (void *)__get_free_pages(gfp_flags, order); -} - -static void **alloc_pg_vec(struct netlink_sock *nlk, - struct nl_mmap_req *req, unsigned int order) -{ - unsigned int block_nr = req->nm_block_nr; - unsigned int i; - void **pg_vec; - - pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL); - if (pg_vec == NULL) - return NULL; - - for (i = 0; i < block_nr; i++) { - pg_vec[i] = alloc_one_pg_vec_page(order); - if (pg_vec[i] == NULL) - goto err1; - } - - return pg_vec; -err1: - free_pg_vec(pg_vec, order, block_nr); - return NULL; -} - - -static void -__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec, - unsigned int order) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct sk_buff_head *queue; - struct netlink_ring *ring; - - queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; - ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; - - spin_lock_bh(&queue->lock); - - ring->frame_max = req->nm_frame_nr - 1; - ring->head = 0; - ring->frame_size = req->nm_frame_size; - ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; - - swap(ring->pg_vec_len, req->nm_block_nr); - swap(ring->pg_vec_order, order); - swap(ring->pg_vec, pg_vec); - - __skb_queue_purge(queue); - spin_unlock_bh(&queue->lock); - - WARN_ON(atomic_read(&nlk->mapped)); - - if (pg_vec) - free_pg_vec(pg_vec, order, req->nm_block_nr); -} - -static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, - bool tx_ring) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring; - void **pg_vec = NULL; - unsigned int order = 0; - - ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; - - if (atomic_read(&nlk->mapped)) - return -EBUSY; - if (atomic_read(&ring->pending)) - return -EBUSY; - - if (req->nm_block_nr) { - if (ring->pg_vec != NULL) - return -EBUSY; - - if ((int)req->nm_block_size <= 0) - return -EINVAL; - if (!PAGE_ALIGNED(req->nm_block_size)) - return -EINVAL; - if (req->nm_frame_size < NL_MMAP_HDRLEN) - return -EINVAL; - if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT)) - return -EINVAL; - - ring->frames_per_block = req->nm_block_size / - req->nm_frame_size; - if (ring->frames_per_block == 0) - return -EINVAL; - if (ring->frames_per_block * req->nm_block_nr != - req->nm_frame_nr) - return -EINVAL; - - order = get_order(req->nm_block_size); - pg_vec = alloc_pg_vec(nlk, req, order); - if (pg_vec == NULL) - return -ENOMEM; - } else { - if (req->nm_frame_nr) - return -EINVAL; - } - - mutex_lock(&nlk->pg_vec_lock); - if (atomic_read(&nlk->mapped) == 0) { - __netlink_set_ring(sk, req, tx_ring, pg_vec, order); - mutex_unlock(&nlk->pg_vec_lock); - return 0; - } - - mutex_unlock(&nlk->pg_vec_lock); - - if (pg_vec) - free_pg_vec(pg_vec, order, req->nm_block_nr); - - return -EBUSY; -} - -static void netlink_mm_open(struct vm_area_struct *vma) -{ - struct file *file = vma->vm_file; - struct socket *sock = file->private_data; - struct sock *sk = sock->sk; - - if (sk) - atomic_inc(&nlk_sk(sk)->mapped); -} - -static void netlink_mm_close(struct vm_area_struct *vma) -{ - struct file *file = vma->vm_file; - struct socket *sock = file->private_data; - struct sock *sk = sock->sk; - - if (sk) - atomic_dec(&nlk_sk(sk)->mapped); -} - -static const struct vm_operations_struct netlink_mmap_ops = { - .open = netlink_mm_open, - .close = netlink_mm_close, -}; - -static int netlink_mmap(struct file *file, struct socket *sock, - struct vm_area_struct *vma) -{ - struct sock *sk = sock->sk; - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring; - unsigned long start, size, expected; - unsigned int i; - int err = -EINVAL; - - if (vma->vm_pgoff) - return -EINVAL; - - mutex_lock(&nlk->pg_vec_lock); - - expected = 0; - for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { - if (ring->pg_vec == NULL) - continue; - expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE; - } - - if (expected == 0) - goto out; - - size = vma->vm_end - vma->vm_start; - if (size != expected) - goto out; - - start = vma->vm_start; - for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { - if (ring->pg_vec == NULL) - continue; - - for (i = 0; i < ring->pg_vec_len; i++) { - struct page *page; - void *kaddr = ring->pg_vec[i]; - unsigned int pg_num; - - for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) { - page = pgvec_to_page(kaddr); - err = vm_insert_page(vma, start, page); - if (err < 0) - goto out; - start += PAGE_SIZE; - kaddr += PAGE_SIZE; - } - } - } - - atomic_inc(&nlk->mapped); - vma->vm_ops = &netlink_mmap_ops; - err = 0; -out: - mutex_unlock(&nlk->pg_vec_lock); - return err; -} - -static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len) -{ -#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 - struct page *p_start, *p_end; - - /* First page is flushed through netlink_{get,set}_status */ - p_start = pgvec_to_page(hdr + PAGE_SIZE); - p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1); - while (p_start <= p_end) { - flush_dcache_page(p_start); - p_start++; - } -#endif -} - -static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr) -{ - smp_rmb(); - flush_dcache_page(pgvec_to_page(hdr)); - return hdr->nm_status; -} - -static void netlink_set_status(struct nl_mmap_hdr *hdr, - enum nl_mmap_status status) -{ - smp_mb(); - hdr->nm_status = status; - flush_dcache_page(pgvec_to_page(hdr)); -} - -static struct nl_mmap_hdr * -__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos) -{ - unsigned int pg_vec_pos, frame_off; - - pg_vec_pos = pos / ring->frames_per_block; - frame_off = pos % ring->frames_per_block; - - return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size); -} - -static struct nl_mmap_hdr * -netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos, - enum nl_mmap_status status) -{ - struct nl_mmap_hdr *hdr; - - hdr = __netlink_lookup_frame(ring, pos); - if (netlink_get_status(hdr) != status) - return NULL; - - return hdr; -} - -static struct nl_mmap_hdr * -netlink_current_frame(const struct netlink_ring *ring, - enum nl_mmap_status status) -{ - return netlink_lookup_frame(ring, ring->head, status); -} - -static void netlink_increment_head(struct netlink_ring *ring) -{ - ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0; -} - -static void netlink_forward_ring(struct netlink_ring *ring) -{ - unsigned int head = ring->head; - const struct nl_mmap_hdr *hdr; - - do { - hdr = __netlink_lookup_frame(ring, ring->head); - if (hdr->nm_status == NL_MMAP_STATUS_UNUSED) - break; - if (hdr->nm_status != NL_MMAP_STATUS_SKIP) - break; - netlink_increment_head(ring); - } while (ring->head != head); -} - -static bool netlink_has_valid_frame(struct netlink_ring *ring) -{ - unsigned int head = ring->head, pos = head; - const struct nl_mmap_hdr *hdr; - - do { - hdr = __netlink_lookup_frame(ring, pos); - if (hdr->nm_status == NL_MMAP_STATUS_VALID) - return true; - pos = pos != 0 ? pos - 1 : ring->frame_max; - } while (pos != head); - - return false; -} - -static bool netlink_dump_space(struct netlink_sock *nlk) -{ - struct netlink_ring *ring = &nlk->rx_ring; - struct nl_mmap_hdr *hdr; - unsigned int n; - - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); - if (hdr == NULL) - return false; - - n = ring->head + ring->frame_max / 2; - if (n > ring->frame_max) - n -= ring->frame_max; - - hdr = __netlink_lookup_frame(ring, n); - - return hdr->nm_status == NL_MMAP_STATUS_UNUSED; -} - -static unsigned int netlink_poll(struct file *file, struct socket *sock, - poll_table *wait) -{ - struct sock *sk = sock->sk; - struct netlink_sock *nlk = nlk_sk(sk); - unsigned int mask; - int err; - - if (nlk->rx_ring.pg_vec != NULL) { - /* Memory mapped sockets don't call recvmsg(), so flow control - * for dumps is performed here. A dump is allowed to continue - * if at least half the ring is unused. - */ - while (nlk->cb_running && netlink_dump_space(nlk)) { - err = netlink_dump(sk); - if (err < 0) { - sk->sk_err = -err; - sk->sk_error_report(sk); - break; - } - } - netlink_rcv_wake(sk); - } - - mask = datagram_poll(file, sock, wait); - - /* We could already have received frames in the normal receive - * queue, that will show up as NL_MMAP_STATUS_COPY in the ring, - * so if mask contains pollin/etc already, there's no point - * walking the ring. - */ - if ((mask & (POLLIN | POLLRDNORM)) != (POLLIN | POLLRDNORM)) { - spin_lock_bh(&sk->sk_receive_queue.lock); - if (nlk->rx_ring.pg_vec) { - if (netlink_has_valid_frame(&nlk->rx_ring)) - mask |= POLLIN | POLLRDNORM; - } - spin_unlock_bh(&sk->sk_receive_queue.lock); - } - - spin_lock_bh(&sk->sk_write_queue.lock); - if (nlk->tx_ring.pg_vec) { - if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED)) - mask |= POLLOUT | POLLWRNORM; - } - spin_unlock_bh(&sk->sk_write_queue.lock); - - return mask; -} - -static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb) -{ - return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN); -} - -static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk, - struct netlink_ring *ring, - struct nl_mmap_hdr *hdr) -{ - unsigned int size; - void *data; - - size = ring->frame_size - NL_MMAP_HDRLEN; - data = (void *)hdr + NL_MMAP_HDRLEN; - - skb->head = data; - skb->data = data; - skb_reset_tail_pointer(skb); - skb->end = skb->tail + size; - skb->len = 0; - - skb->destructor = netlink_skb_destructor; - NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED; - NETLINK_CB(skb).sk = sk; -} - -static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, - u32 dst_portid, u32 dst_group, - struct scm_cookie *scm) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring; - struct nl_mmap_hdr *hdr; - struct sk_buff *skb; - unsigned int maxlen; - int err = 0, len = 0; - - mutex_lock(&nlk->pg_vec_lock); - - ring = &nlk->tx_ring; - maxlen = ring->frame_size - NL_MMAP_HDRLEN; - - do { - unsigned int nm_len; - - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID); - if (hdr == NULL) { - if (!(msg->msg_flags & MSG_DONTWAIT) && - atomic_read(&nlk->tx_ring.pending)) - schedule(); - continue; - } - - nm_len = ACCESS_ONCE(hdr->nm_len); - if (nm_len > maxlen) { - err = -EINVAL; - goto out; - } - - netlink_frame_flush_dcache(hdr, nm_len); - - skb = alloc_skb(nm_len, GFP_KERNEL); - if (skb == NULL) { - err = -ENOBUFS; - goto out; - } - __skb_put(skb, nm_len); - memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); - - netlink_increment_head(ring); - - NETLINK_CB(skb).portid = nlk->portid; - NETLINK_CB(skb).dst_group = dst_group; - NETLINK_CB(skb).creds = scm->creds; - - err = security_netlink_send(sk, skb); - if (err) { - kfree_skb(skb); - goto out; - } - - if (unlikely(dst_group)) { - atomic_inc(&skb->users); - netlink_broadcast(sk, skb, dst_portid, dst_group, - GFP_KERNEL); - } - err = netlink_unicast(sk, skb, dst_portid, - msg->msg_flags & MSG_DONTWAIT); - if (err < 0) - goto out; - len += err; - - } while (hdr != NULL || - (!(msg->msg_flags & MSG_DONTWAIT) && - atomic_read(&nlk->tx_ring.pending))); - - if (len > 0) - err = len; -out: - mutex_unlock(&nlk->pg_vec_lock); - return err; -} - -static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb) -{ - struct nl_mmap_hdr *hdr; - - hdr = netlink_mmap_hdr(skb); - hdr->nm_len = skb->len; - hdr->nm_group = NETLINK_CB(skb).dst_group; - hdr->nm_pid = NETLINK_CB(skb).creds.pid; - hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); - hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); - netlink_frame_flush_dcache(hdr, hdr->nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_VALID); - - NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED; - kfree_skb(skb); -} - -static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring = &nlk->rx_ring; - struct nl_mmap_hdr *hdr; - - spin_lock_bh(&sk->sk_receive_queue.lock); - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); - if (hdr == NULL) { - spin_unlock_bh(&sk->sk_receive_queue.lock); - kfree_skb(skb); - netlink_overrun(sk); - return; - } - netlink_increment_head(ring); - __skb_queue_tail(&sk->sk_receive_queue, skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); - - hdr->nm_len = skb->len; - hdr->nm_group = NETLINK_CB(skb).dst_group; - hdr->nm_pid = NETLINK_CB(skb).creds.pid; - hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); - hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); - netlink_set_status(hdr, NL_MMAP_STATUS_COPY); -} - -#else /* CONFIG_NETLINK_MMAP */ -#define netlink_rx_is_mmaped(sk) false -#define netlink_tx_is_mmaped(sk) false -#define netlink_mmap sock_no_mmap -#define netlink_poll datagram_poll -#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, scm) 0 -#endif /* CONFIG_NETLINK_MMAP */ - static void netlink_skb_destructor(struct sk_buff *skb) { -#ifdef CONFIG_NETLINK_MMAP - struct nl_mmap_hdr *hdr; - struct netlink_ring *ring; - struct sock *sk; - - /* If a packet from the kernel to userspace was freed because of an - * error without being delivered to userspace, the kernel must reset - * the status. In the direction userspace to kernel, the status is - * always reset here after the packet was processed and freed. - */ - if (netlink_skb_is_mmaped(skb)) { - hdr = netlink_mmap_hdr(skb); - sk = NETLINK_CB(skb).sk; - - if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) { - netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); - ring = &nlk_sk(sk)->tx_ring; - } else { - if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) { - hdr->nm_len = 0; - netlink_set_status(hdr, NL_MMAP_STATUS_VALID); - } - ring = &nlk_sk(sk)->rx_ring; - } - - WARN_ON(atomic_read(&ring->pending) == 0); - atomic_dec(&ring->pending); - sock_put(sk); - - skb->head = NULL; - } -#endif if (is_vmalloc_addr(skb->head)) { if (!skb->cloned || !atomic_dec_return(&(skb_shinfo(skb)->dataref))) @@ -927,18 +325,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) static void netlink_sock_destruct(struct sock *sk) { skb_queue_purge(&sk->sk_receive_queue); -#ifdef CONFIG_NETLINK_MMAP - if (1) { - struct nl_mmap_req req; - - memset(&req, 0, sizeof(req)); - if (nlk->rx_ring.pg_vec) - __netlink_set_ring(sk, &req, false, NULL, 0); - memset(&req, 0, sizeof(req)); - if (nlk->tx_ring.pg_vec) - __netlink_set_ring(sk, &req, true, NULL, 0); - } -#endif /* CONFIG_NETLINK_MMAP */ if (!sock_flag(sk, SOCK_DEAD)) { printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); @@ -1185,9 +571,6 @@ static int __netlink_create(struct net *net, struct socket *sock, mutex_init(nlk->cb_mutex); } init_waitqueue_head(&nlk->wait); -#ifdef CONFIG_NETLINK_MMAP - mutex_init(&nlk->pg_vec_lock); -#endif sock_set_flag(sk, SOCK_RCU_FREE); sk->sk_destruct = netlink_sock_destruct; @@ -1725,8 +1108,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, nlk = nlk_sk(sk); if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(NETLINK_S_CONGESTED, &nlk->state)) && - !netlink_skb_is_mmaped(skb)) { + test_bit(NETLINK_S_CONGESTED, &nlk->state))) { DECLARE_WAITQUEUE(wait, current); if (!*timeo) { if (!ssk || netlink_is_kernel(ssk)) @@ -1764,14 +1146,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) netlink_deliver_tap(skb); -#ifdef CONFIG_NETLINK_MMAP - if (netlink_skb_is_mmaped(skb)) - netlink_queue_mmaped_skb(sk, skb); - else if (netlink_rx_is_mmaped(sk)) - netlink_ring_set_copied(sk, skb); - else -#endif /* CONFIG_NETLINK_MMAP */ - skb_queue_tail(&sk->sk_receive_queue, skb); + skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk); return len; } @@ -1795,9 +1170,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) int delta; WARN_ON(skb->sk != NULL); - if (netlink_skb_is_mmaped(skb)) - return skb; - delta = skb->end - skb->tail; if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) return skb; @@ -1877,71 +1249,6 @@ struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size, unsigned int ldiff, u32 dst_portid, gfp_t gfp_mask) { -#ifdef CONFIG_NETLINK_MMAP - unsigned int maxlen, linear_size; - struct sock *sk = NULL; - struct sk_buff *skb; - struct netlink_ring *ring; - struct nl_mmap_hdr *hdr; - - sk = netlink_getsockbyportid(ssk, dst_portid); - if (IS_ERR(sk)) - goto out; - - ring = &nlk_sk(sk)->rx_ring; - /* fast-path without atomic ops for common case: non-mmaped receiver */ - if (ring->pg_vec == NULL) - goto out_put; - - /* We need to account the full linear size needed as a ring - * slot cannot have non-linear parts. - */ - linear_size = size + ldiff; - if (ring->frame_size - NL_MMAP_HDRLEN < linear_size) - goto out_put; - - skb = alloc_skb_head(gfp_mask); - if (skb == NULL) - goto err1; - - spin_lock_bh(&sk->sk_receive_queue.lock); - /* check again under lock */ - if (ring->pg_vec == NULL) - goto out_free; - - /* check again under lock */ - maxlen = ring->frame_size - NL_MMAP_HDRLEN; - if (maxlen < linear_size) - goto out_free; - - netlink_forward_ring(ring); - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); - if (hdr == NULL) - goto err2; - - netlink_ring_setup_skb(skb, sk, ring, hdr); - netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); - atomic_inc(&ring->pending); - netlink_increment_head(ring); - - spin_unlock_bh(&sk->sk_receive_queue.lock); - return skb; - -err2: - kfree_skb(skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); - netlink_overrun(sk); -err1: - sock_put(sk); - return NULL; - -out_free: - kfree_skb(skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); -out_put: - sock_put(sk); -out: -#endif return alloc_skb(size, gfp_mask); } EXPORT_SYMBOL_GPL(__netlink_alloc_skb); @@ -2222,8 +1529,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, if (level != SOL_NETLINK) return -ENOPROTOOPT; - if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING && - optlen >= sizeof(int) && + if (optlen >= sizeof(int) && get_user(val, (unsigned int __user *)optval)) return -EFAULT; @@ -2276,25 +1582,6 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, } err = 0; break; -#ifdef CONFIG_NETLINK_MMAP - case NETLINK_RX_RING: - case NETLINK_TX_RING: { - struct nl_mmap_req req; - - /* Rings might consume more memory than queue limits, require - * CAP_NET_ADMIN. - */ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (optlen < sizeof(req)) - return -EINVAL; - if (copy_from_user(&req, optval, sizeof(req))) - return -EFAULT; - err = netlink_set_ring(sk, &req, - optname == NETLINK_TX_RING); - break; - } -#endif /* CONFIG_NETLINK_MMAP */ case NETLINK_LISTEN_ALL_NSID: if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST)) return -EPERM; @@ -2464,18 +1751,6 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) smp_rmb(); } - /* It's a really convoluted way for userland to ask for mmaped - * sendmsg(), but that's what we've got... - */ - if (netlink_tx_is_mmaped(sk) && - iter_is_iovec(&msg->msg_iter) && - msg->msg_iter.nr_segs == 1 && - msg->msg_iter.iov->iov_base == NULL) { - err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, - &scm); - goto out; - } - err = -EMSGSIZE; if (len > sk->sk_sndbuf - 32) goto out; @@ -2792,8 +2067,7 @@ static int netlink_dump(struct sock *sk) goto errout_skb; } - if (!netlink_rx_is_mmaped(sk) && - atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) + if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) goto errout_skb; /* NLMSG_GOODSIZE is small to avoid high order allocations being @@ -2882,16 +2156,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, struct netlink_sock *nlk; int ret; - /* Memory mapped dump requests need to be copied to avoid looping - * on the pending state in netlink_mmap_sendmsg() while the CB hold - * a reference to the skb. - */ - if (netlink_skb_is_mmaped(skb)) { - skb = skb_copy(skb, GFP_KERNEL); - if (skb == NULL) - return -ENOBUFS; - } else - atomic_inc(&skb->users); + atomic_inc(&skb->users); sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); if (sk == NULL) { @@ -3235,7 +2500,7 @@ static const struct proto_ops netlink_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = netlink_getname, - .poll = netlink_poll, + .poll = datagram_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -3243,7 +2508,7 @@ static const struct proto_ops netlink_ops = { .getsockopt = netlink_getsockopt, .sendmsg = netlink_sendmsg, .recvmsg = netlink_recvmsg, - .mmap = netlink_mmap, + .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index b0c1ddc97260..15e62973cfc6 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -45,12 +45,6 @@ struct netlink_sock { int (*netlink_bind)(struct net *net, int group); void (*netlink_unbind)(struct net *net, int group); struct module *module; -#ifdef CONFIG_NETLINK_MMAP - struct mutex pg_vec_lock; - struct netlink_ring rx_ring; - struct netlink_ring tx_ring; - atomic_t mapped; -#endif /* CONFIG_NETLINK_MMAP */ struct rhash_head node; struct work_struct work; @@ -61,15 +55,6 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk) return container_of(sk, struct netlink_sock, sk); } -static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb) -{ -#ifdef CONFIG_NETLINK_MMAP - return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; -#else - return false; -#endif /* CONFIG_NETLINK_MMAP */ -} - struct netlink_table { struct rhashtable hash; struct hlist_head mc_list; diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 3ee63a3cff30..8dd836a8dd60 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -8,41 +8,6 @@ #include "af_netlink.h" -#ifdef CONFIG_NETLINK_MMAP -static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type, - struct sk_buff *nlskb) -{ - struct netlink_diag_ring ndr; - - ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT; - ndr.ndr_block_nr = ring->pg_vec_len; - ndr.ndr_frame_size = ring->frame_size; - ndr.ndr_frame_nr = ring->frame_max + 1; - - return nla_put(nlskb, nl_type, sizeof(ndr), &ndr); -} - -static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) -{ - struct netlink_sock *nlk = nlk_sk(sk); - int ret; - - mutex_lock(&nlk->pg_vec_lock); - ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb); - if (!ret) - ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING, - nlskb); - mutex_unlock(&nlk->pg_vec_lock); - - return ret; -} -#else -static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) -{ - return 0; -} -#endif - static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb) { struct netlink_sock *nlk = nlk_sk(sk); @@ -87,10 +52,6 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO)) goto out_nlmsg_trim; - if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) && - sk_diag_put_rings_cfg(sk, skb)) - goto out_nlmsg_trim; - nlmsg_end(skb, nlh); return 0; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index d1bd4a45ca2d..d26b28def310 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -588,7 +588,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, ipv4 = true; break; case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: - SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst, + SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src, nla_get_in6_addr(a), is_mask); ipv6 = true; break; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d805cd577a60..3975ac809934 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3021,7 +3021,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; - char name[15]; + char name[sizeof(uaddr->sa_data) + 1]; /* * Check legality @@ -3029,7 +3029,11 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, if (addr_len != sizeof(struct sockaddr)) return -EINVAL; - strlcpy(name, uaddr->sa_data, sizeof(name)); + /* uaddr->sa_data comes from the userspace, it's not guaranteed to be + * zero-terminated. + */ + memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); + name[sizeof(uaddr->sa_data)] = 0; return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 06e7c4a37245..694a06f1e0d5 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -820,10 +820,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, goto out_module_put; err = a.ops->walk(skb, &dcb, RTM_DELACTION, &a); - if (err < 0) + if (err <= 0) goto out_module_put; - if (err == 0) - goto noflush_out; nla_nest_end(skb, nest); @@ -840,7 +838,6 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, out_module_put: module_put(a.ops->owner); err_out: -noflush_out: kfree_skb(skb); return err; } diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index bb41699c6c49..7ecb14f3db54 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -109,6 +109,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, if (ret < 0) return ret; + if (!tb[TCA_CONNMARK_PARMS]) + return -EINVAL; + parm = nla_data(tb[TCA_CONNMARK_PARMS]); if (!tcf_hash_check(parm->index, a, bind)) { diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 6a0d48525fcf..c36757e72844 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -146,6 +146,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp) if (s) { struct unix_sock *u = unix_sk(s); + BUG_ON(!atomic_long_read(&u->inflight)); BUG_ON(list_empty(&u->link)); if (atomic_long_dec_and_test(&u->inflight)) @@ -341,6 +342,14 @@ void unix_gc(void) } list_del(&cursor); + /* Now gc_candidates contains only garbage. Restore original + * inflight counters for these as well, and remove the skbuffs + * which are creating the cycle(s). + */ + skb_queue_head_init(&hitlist); + list_for_each_entry(u, &gc_candidates, link) + scan_children(&u->sk, inc_inflight, &hitlist); + /* not_cycle_list contains those sockets which do not make up a * cycle. Restore these to the inflight list. */ @@ -350,14 +359,6 @@ void unix_gc(void) list_move_tail(&u->link, &gc_inflight_list); } - /* Now gc_candidates contains only garbage. Restore original - * inflight counters for these as well, and remove the skbuffs - * which are creating the cycle(s). - */ - skb_queue_head_init(&hitlist); - list_for_each_entry(u, &gc_candidates, link) - scan_children(&u->sk, inc_inflight, &hitlist); - spin_unlock(&unix_gc_lock); /* Here we are. Hitlist is filled. Die. */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index adc62ec5507a..074cf0d25558 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -509,21 +509,17 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, { int err; - rtnl_lock(); - if (!cb->args[0]) { err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, nl80211_fam.attrbuf, nl80211_fam.maxattr, nl80211_policy); if (err) - goto out_unlock; + return err; *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), nl80211_fam.attrbuf); - if (IS_ERR(*wdev)) { - err = PTR_ERR(*wdev); - goto out_unlock; - } + if (IS_ERR(*wdev)) + return PTR_ERR(*wdev); *rdev = wiphy_to_rdev((*wdev)->wiphy); /* 0 is the first index - add 1 to parse only once */ cb->args[0] = (*rdev)->wiphy_idx + 1; @@ -533,10 +529,8 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1); struct wireless_dev *tmp; - if (!wiphy) { - err = -ENODEV; - goto out_unlock; - } + if (!wiphy) + return -ENODEV; *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; @@ -547,21 +541,11 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, } } - if (!*wdev) { - err = -ENODEV; - goto out_unlock; - } + if (!*wdev) + return -ENODEV; } return 0; - out_unlock: - rtnl_unlock(); - return err; -} - -static void nl80211_finish_wdev_dump(struct cfg80211_registered_device *rdev) -{ - rtnl_unlock(); } /* IE validation */ @@ -4278,9 +4262,10 @@ static int nl80211_dump_station(struct sk_buff *skb, int sta_idx = cb->args[2]; int err; + rtnl_lock(); err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (err) - return err; + goto out_err; if (!wdev->netdev) { err = -EINVAL; @@ -4316,7 +4301,7 @@ static int nl80211_dump_station(struct sk_buff *skb, cb->args[2] = sta_idx; err = skb->len; out_err: - nl80211_finish_wdev_dump(rdev); + rtnl_unlock(); return err; } @@ -5033,9 +5018,10 @@ static int nl80211_dump_mpath(struct sk_buff *skb, int path_idx = cb->args[2]; int err; + rtnl_lock(); err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (err) - return err; + goto out_err; if (!rdev->ops->dump_mpath) { err = -EOPNOTSUPP; @@ -5069,7 +5055,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb, cb->args[2] = path_idx; err = skb->len; out_err: - nl80211_finish_wdev_dump(rdev); + rtnl_unlock(); return err; } @@ -5229,9 +5215,10 @@ static int nl80211_dump_mpp(struct sk_buff *skb, int path_idx = cb->args[2]; int err; + rtnl_lock(); err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (err) - return err; + goto out_err; if (!rdev->ops->dump_mpp) { err = -EOPNOTSUPP; @@ -5264,7 +5251,7 @@ static int nl80211_dump_mpp(struct sk_buff *skb, cb->args[2] = path_idx; err = skb->len; out_err: - nl80211_finish_wdev_dump(rdev); + rtnl_unlock(); return err; } @@ -7333,9 +7320,12 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) int start = cb->args[2], idx = 0; int err; + rtnl_lock(); err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); - if (err) + if (err) { + rtnl_unlock(); return err; + } wdev_lock(wdev); spin_lock_bh(&rdev->bss_lock); @@ -7358,7 +7348,7 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) wdev_unlock(wdev); cb->args[2] = idx; - nl80211_finish_wdev_dump(rdev); + rtnl_unlock(); return skb->len; } @@ -7442,9 +7432,10 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) int res; bool radio_stats; + rtnl_lock(); res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (res) - return res; + goto out_err; /* prepare_wdev_dump parsed the attributes */ radio_stats = nl80211_fam.attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS]; @@ -7485,7 +7476,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) cb->args[2] = survey_idx; res = skb->len; out_err: - nl80211_finish_wdev_dump(rdev); + rtnl_unlock(); return res; } @@ -10561,17 +10552,13 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, void *data = NULL; unsigned int data_len = 0; - rtnl_lock(); - if (cb->args[0]) { /* subtract the 1 again here */ struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1); struct wireless_dev *tmp; - if (!wiphy) { - err = -ENODEV; - goto out_unlock; - } + if (!wiphy) + return -ENODEV; *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; @@ -10592,13 +10579,11 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, nl80211_fam.attrbuf, nl80211_fam.maxattr, nl80211_policy); if (err) - goto out_unlock; + return err; if (!nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID] || - !nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) { - err = -EINVAL; - goto out_unlock; - } + !nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) + return -EINVAL; *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), nl80211_fam.attrbuf); @@ -10607,10 +10592,8 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), nl80211_fam.attrbuf); - if (IS_ERR(*rdev)) { - err = PTR_ERR(*rdev); - goto out_unlock; - } + if (IS_ERR(*rdev)) + return PTR_ERR(*rdev); vid = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID]); subcmd = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]); @@ -10623,19 +10606,15 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd) continue; - if (!vcmd->dumpit) { - err = -EOPNOTSUPP; - goto out_unlock; - } + if (!vcmd->dumpit) + return -EOPNOTSUPP; vcmd_idx = i; break; } - if (vcmd_idx < 0) { - err = -EOPNOTSUPP; - goto out_unlock; - } + if (vcmd_idx < 0) + return -EOPNOTSUPP; if (nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]) { data = nla_data(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]); @@ -10652,9 +10631,6 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, /* keep rtnl locked in successful case */ return 0; - out_unlock: - rtnl_unlock(); - return err; } static int nl80211_vendor_cmd_dump(struct sk_buff *skb, @@ -10669,9 +10645,10 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb, int err; struct nlattr *vendor_data; + rtnl_lock(); err = nl80211_prepare_vendor_dump(skb, cb, &rdev, &wdev); if (err) - return err; + goto out; vcmd_idx = cb->args[2]; data = (void *)cb->args[3]; @@ -10680,18 +10657,26 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb, if (vcmd->flags & (WIPHY_VENDOR_CMD_NEED_WDEV | WIPHY_VENDOR_CMD_NEED_NETDEV)) { - if (!wdev) - return -EINVAL; + if (!wdev) { + err = -EINVAL; + goto out; + } if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_NETDEV && - !wdev->netdev) - return -EINVAL; + !wdev->netdev) { + err = -EINVAL; + goto out; + } if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) { if (wdev->netdev && - !netif_running(wdev->netdev)) - return -ENETDOWN; - if (!wdev->netdev && !wdev->p2p_started) - return -ENETDOWN; + !netif_running(wdev->netdev)) { + err = -ENETDOWN; + goto out; + } + if (!wdev->netdev && !wdev->p2p_started) { + err = -ENETDOWN; + goto out; + } } } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index cf0193b74ae3..d5b4ac7bf0d8 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3027,6 +3027,11 @@ static int __net_init xfrm_net_init(struct net *net) { int rv; + /* Initialize the per-net locks here */ + spin_lock_init(&net->xfrm.xfrm_state_lock); + rwlock_init(&net->xfrm.xfrm_policy_lock); + mutex_init(&net->xfrm.xfrm_cfg_mutex); + rv = xfrm_statistics_init(net); if (rv < 0) goto out_statistics; @@ -3043,11 +3048,6 @@ static int __net_init xfrm_net_init(struct net *net) if (rv < 0) goto out; - /* Initialize the per-net locks here */ - spin_lock_init(&net->xfrm.xfrm_state_lock); - rwlock_init(&net->xfrm.xfrm_policy_lock); - mutex_init(&net->xfrm.xfrm_cfg_mutex); - return 0; out: diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 805681a7d356..7a5a64e70b4d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -412,7 +412,14 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es up = nla_data(rp); ulen = xfrm_replay_state_esn_len(up); - if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) + /* Check the overall length and the internal bitmap length to avoid + * potential overflow. */ + if (nla_len(rp) < ulen || + xfrm_replay_state_esn_len(replay_esn) != ulen || + replay_esn->bmp_len != up->bmp_len) + return -EINVAL; + + if (up->replay_window > up->bmp_len * sizeof(__u32) * 8) return -EINVAL; return 0; |