summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorDmitry Shmidt <dimitrysh@google.com>2016-08-01 15:51:01 -0700
committerDmitry Shmidt <dimitrysh@google.com>2016-08-01 15:57:55 -0700
commitb558f17a13b10761eb6f838e713425b9e83f8a01 (patch)
tree425828a423411d6c65e5b18a3330d244eef987b0 /net
parent818aa36ea868ba8f2985f9ca0906fd9cba3e437d (diff)
parentb05965f284db3e086022f4e318e46cb5bffb1376 (diff)
Merge tag 'v4.4.16' into android-4.4.y
This is the 4.4.16 stable release Change-Id: Ibaf7b7e03695e1acebc654a2ca1a4bfcc48fcea4
Diffstat (limited to 'net')
-rw-r--r--net/ax25/af_ax25.c3
-rw-r--r--net/ax25/ax25_ds_timer.c5
-rw-r--r--net/ax25/ax25_ip.c15
-rw-r--r--net/ax25/ax25_std_timer.c5
-rw-r--r--net/ax25/ax25_subr.c3
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c10
-rw-r--r--net/batman-adv/distributed-arp-table.c17
-rw-r--r--net/batman-adv/hard-interface.h12
-rw-r--r--net/batman-adv/network-coding.c19
-rw-r--r--net/batman-adv/originator.c149
-rw-r--r--net/batman-adv/originator.h1
-rw-r--r--net/batman-adv/routing.c9
-rw-r--r--net/batman-adv/send.c6
-rw-r--r--net/batman-adv/soft-interface.c8
-rw-r--r--net/batman-adv/translation-table.c28
-rw-r--r--net/bluetooth/6lowpan.c7
-rw-r--r--net/bluetooth/hci_conn.c6
-rw-r--r--net/bluetooth/hci_request.c28
-rw-r--r--net/bluetooth/mgmt.c4
-rw-r--r--net/bluetooth/smp.c16
-rw-r--r--net/bridge/br.c3
-rw-r--r--net/bridge/br_device.c8
-rw-r--r--net/bridge/br_fdb.c2
-rw-r--r--net/bridge/br_ioctl.c5
-rw-r--r--net/bridge/br_multicast.c16
-rw-r--r--net/bridge/br_private.h23
-rw-r--r--net/bridge/br_stp.c13
-rw-r--r--net/ceph/messenger.c91
-rw-r--r--net/ceph/osd_client.c4
-rw-r--r--net/core/dev.c16
-rw-r--r--net/core/filter.c56
-rw-r--r--net/core/flow_dissector.c59
-rw-r--r--net/core/neighbour.c6
-rw-r--r--net/core/pktgen.c4
-rw-r--r--net/core/rtnetlink.c1
-rw-r--r--net/core/scm.c7
-rw-r--r--net/core/skbuff.c17
-rw-r--r--net/core/sysctl_net_core.c10
-rw-r--r--net/dccp/ipv4.c16
-rw-r--r--net/dccp/ipv6.c14
-rw-r--r--net/decnet/dn_route.c9
-rw-r--r--net/ipv4/devinet.c6
-rw-r--r--net/ipv4/esp4.c52
-rw-r--r--net/ipv4/fib_frontend.c26
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/igmp.c3
-rw-r--r--net/ipv4/inet_connection_sock.c14
-rw-r--r--net/ipv4/ip_fragment.c1
-rw-r--r--net/ipv4/ip_gre.c19
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv4/ip_tunnel.c23
-rw-r--r--net/ipv4/ipmr.c4
-rw-r--r--net/ipv4/netfilter/arp_tables.c293
-rw-r--r--net/ipv4/netfilter/ip_tables.c318
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c12
-rw-r--r--net/ipv4/ping.c4
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/route.c89
-rw-r--r--net/ipv4/tcp.c16
-rw-r--r--net/ipv4/tcp_ipv4.c46
-rw-r--r--net/ipv4/tcp_metrics.c2
-rw-r--r--net/ipv4/tcp_minisocks.c3
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/tcp_yeah.c2
-rw-r--r--net/ipv4/udp.c18
-rw-r--r--net/ipv4/udp_tunnel.c2
-rw-r--r--net/ipv6/addrconf.c7
-rw-r--r--net/ipv6/datagram.c3
-rw-r--r--net/ipv6/exthdrs_core.c6
-rw-r--r--net/ipv6/ip6_fib.c1
-rw-r--r--net/ipv6/ip6_flowlabel.c5
-rw-r--r--net/ipv6/ip6_gre.c2
-rw-r--r--net/ipv6/ip6_output.c27
-rw-r--r--net/ipv6/ip6_tunnel.c4
-rw-r--r--net/ipv6/ip6mr.c1
-rw-r--r--net/ipv6/mcast.c3
-rw-r--r--net/ipv6/netfilter/ip6_tables.c311
-rw-r--r--net/ipv6/reassembly.c6
-rw-r--r--net/ipv6/route.c9
-rw-r--r--net/ipv6/sit.c4
-rw-r--r--net/ipv6/tcp_ipv6.c25
-rw-r--r--net/ipv6/udp.c12
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c2
-rw-r--r--net/iucv/af_iucv.c3
-rw-r--r--net/l2tp/l2tp_core.c2
-rw-r--r--net/l2tp/l2tp_ip.c8
-rw-r--r--net/l2tp/l2tp_ip6.c8
-rw-r--r--net/l2tp/l2tp_netlink.c18
-rw-r--r--net/llc/af_llc.c1
-rw-r--r--net/mac80211/agg-rx.c2
-rw-r--r--net/mac80211/ibss.c23
-rw-r--r--net/mac80211/ieee80211_i.h2
-rw-r--r--net/mac80211/iface.c7
-rw-r--r--net/mac80211/mesh.c22
-rw-r--r--net/mac80211/mesh.h4
-rw-r--r--net/mac80211/mlme.c2
-rw-r--r--net/mac80211/rc80211_minstrel.c2
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c7
-rw-r--r--net/mac80211/rx.c42
-rw-r--r--net/mac80211/scan.c12
-rw-r--r--net/mac80211/sta_info.c37
-rw-r--r--net/mac80211/sta_info.h2
-rw-r--r--net/mpls/af_mpls.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c37
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c6
-rw-r--r--net/netfilter/nf_conntrack_core.c4
-rw-r--r--net/netfilter/x_tables.c245
-rw-r--r--net/netlink/af_netlink.c9
-rw-r--r--net/openvswitch/actions.c12
-rw-r--r--net/openvswitch/datapath.c5
-rw-r--r--net/openvswitch/vport-netdev.c2
-rw-r--r--net/openvswitch/vport-vxlan.c4
-rw-r--r--net/openvswitch/vport.h7
-rw-r--r--net/packet/af_packet.c40
-rw-r--r--net/phonet/af_phonet.c4
-rw-r--r--net/rfkill/core.c16
-rw-r--r--net/sched/act_csum.c8
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_nat.c18
-rw-r--r--net/sched/cls_flower.c10
-rw-r--r--net/sched/sch_api.c9
-rw-r--r--net/sched/sch_cbq.c12
-rw-r--r--net/sched/sch_choke.c6
-rw-r--r--net/sched/sch_codel.c10
-rw-r--r--net/sched/sch_drr.c9
-rw-r--r--net/sched/sch_dsmark.c11
-rw-r--r--net/sched/sch_fifo.c4
-rw-r--r--net/sched/sch_fq.c4
-rw-r--r--net/sched/sch_fq_codel.c17
-rw-r--r--net/sched/sch_generic.c5
-rw-r--r--net/sched/sch_hfsc.c9
-rw-r--r--net/sched/sch_hhf.c10
-rw-r--r--net/sched/sch_htb.c24
-rw-r--r--net/sched/sch_multiq.c16
-rw-r--r--net/sched/sch_netem.c82
-rw-r--r--net/sched/sch_pie.c5
-rw-r--r--net/sched/sch_prio.c15
-rw-r--r--net/sched/sch_qfq.c9
-rw-r--r--net/sched/sch_red.c10
-rw-r--r--net/sched/sch_sfb.c10
-rw-r--r--net/sched/sch_sfq.c16
-rw-r--r--net/sched/sch_tbf.c15
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/protocol.c43
-rw-r--r--net/sctp/socket.c11
-rw-r--r--net/sctp/sysctl.c2
-rw-r--r--net/socket.c38
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c4
-rw-r--r--net/sunrpc/cache.c8
-rw-r--r--net/sunrpc/clnt.c12
-rw-r--r--net/sunrpc/xprtsock.c49
-rw-r--r--net/switchdev/switchdev.c21
-rw-r--r--net/tipc/bcast.c4
-rw-r--r--net/tipc/netlink_compat.c2
-rw-r--r--net/tipc/node.c12
-rw-r--r--net/tipc/socket.c36
-rw-r--r--net/tipc/subscr.c11
-rw-r--r--net/unix/af_unix.c56
-rw-r--r--net/unix/diag.c2
-rw-r--r--net/unix/garbage.c17
-rw-r--r--net/vmw_vsock/af_vsock.c21
-rw-r--r--net/wireless/core.c2
-rw-r--r--net/wireless/nl80211.c2
-rw-r--r--net/wireless/wext-core.c77
-rw-r--r--net/x25/x25_facilities.c1
-rw-r--r--net/xfrm/xfrm_input.c3
-rw-r--r--net/xfrm/xfrm_output.c2
169 files changed, 1961 insertions, 1557 deletions
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index fbd0acf80b13..2fdebabbfacd 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -976,7 +976,8 @@ static int ax25_release(struct socket *sock)
release_sock(sk);
ax25_disconnect(ax25, 0);
lock_sock(sk);
- ax25_destroy_socket(ax25);
+ if (!sock_flag(ax25->sk, SOCK_DESTROY))
+ ax25_destroy_socket(ax25);
break;
case AX25_STATE_3:
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 951cd57bb07d..5237dff6941d 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -102,6 +102,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25)
switch (ax25->state) {
case AX25_STATE_0:
+ case AX25_STATE_2:
/* Magic here: If we listen() and a new link dies before it
is accepted() it isn't 'dead' so doesn't get removed. */
if (!sk || sock_flag(sk, SOCK_DESTROY) ||
@@ -111,6 +112,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25)
sock_hold(sk);
ax25_destroy_socket(ax25);
bh_unlock_sock(sk);
+ /* Ungrab socket and destroy it */
sock_put(sk);
} else
ax25_destroy_socket(ax25);
@@ -213,7 +215,8 @@ void ax25_ds_t1_timeout(ax25_cb *ax25)
case AX25_STATE_2:
if (ax25->n2count == ax25->n2) {
ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
- ax25_disconnect(ax25, ETIMEDOUT);
+ if (!sock_flag(ax25->sk, SOCK_DESTROY))
+ ax25_disconnect(ax25, ETIMEDOUT);
return;
} else {
ax25->n2count++;
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index b563a3f5f2a8..2fa3be965101 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -228,8 +228,23 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
}
#endif
+static bool ax25_validate_header(const char *header, unsigned int len)
+{
+ ax25_digi digi;
+
+ if (!len)
+ return false;
+
+ if (header[0])
+ return true;
+
+ return ax25_addr_parse(header + 1, len - 1, NULL, NULL, &digi, NULL,
+ NULL);
+}
+
const struct header_ops ax25_header_ops = {
.create = ax25_hard_header,
+ .validate = ax25_validate_header,
};
EXPORT_SYMBOL(ax25_header_ops);
diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c
index 004467c9e6e1..2c0d6ef66f9d 100644
--- a/net/ax25/ax25_std_timer.c
+++ b/net/ax25/ax25_std_timer.c
@@ -38,6 +38,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25)
switch (ax25->state) {
case AX25_STATE_0:
+ case AX25_STATE_2:
/* Magic here: If we listen() and a new link dies before it
is accepted() it isn't 'dead' so doesn't get removed. */
if (!sk || sock_flag(sk, SOCK_DESTROY) ||
@@ -47,6 +48,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25)
sock_hold(sk);
ax25_destroy_socket(ax25);
bh_unlock_sock(sk);
+ /* Ungrab socket and destroy it */
sock_put(sk);
} else
ax25_destroy_socket(ax25);
@@ -144,7 +146,8 @@ void ax25_std_t1timer_expiry(ax25_cb *ax25)
case AX25_STATE_2:
if (ax25->n2count == ax25->n2) {
ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
- ax25_disconnect(ax25, ETIMEDOUT);
+ if (!sock_flag(ax25->sk, SOCK_DESTROY))
+ ax25_disconnect(ax25, ETIMEDOUT);
return;
} else {
ax25->n2count++;
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 3b78e8473a01..655a7d4c96e1 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -264,7 +264,8 @@ void ax25_disconnect(ax25_cb *ax25, int reason)
{
ax25_clear_queues(ax25);
- ax25_stop_heartbeat(ax25);
+ if (!sock_flag(ax25->sk, SOCK_DESTROY))
+ ax25_stop_heartbeat(ax25);
ax25_stop_t1timer(ax25);
ax25_stop_t2timer(ax25);
ax25_stop_t3timer(ax25);
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 191a70290dca..f5d2fe5e31cc 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -127,21 +127,17 @@ batadv_backbone_gw_free_ref(struct batadv_bla_backbone_gw *backbone_gw)
}
/* finally deinitialize the claim */
-static void batadv_claim_free_rcu(struct rcu_head *rcu)
+static void batadv_claim_release(struct batadv_bla_claim *claim)
{
- struct batadv_bla_claim *claim;
-
- claim = container_of(rcu, struct batadv_bla_claim, rcu);
-
batadv_backbone_gw_free_ref(claim->backbone_gw);
- kfree(claim);
+ kfree_rcu(claim, rcu);
}
/* free a claim, call claim_free_rcu if its the last reference */
static void batadv_claim_free_ref(struct batadv_bla_claim *claim)
{
if (atomic_dec_and_test(&claim->refcount))
- call_rcu(&claim->rcu, batadv_claim_free_rcu);
+ batadv_claim_release(claim);
}
/**
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index a49c705fb86b..5f19133c5530 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -553,6 +553,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
* be sent to
* @bat_priv: the bat priv with all the soft interface information
* @ip_dst: ipv4 to look up in the DHT
+ * @vid: VLAN identifier
*
* An originator O is selected if and only if its DHT_ID value is one of three
* closest values (from the LEFT, with wrap around if needed) then the hash
@@ -561,7 +562,8 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
* Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM.
*/
static struct batadv_dat_candidate *
-batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
+batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst,
+ unsigned short vid)
{
int select;
batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key;
@@ -577,7 +579,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
return NULL;
dat.ip = ip_dst;
- dat.vid = 0;
+ dat.vid = vid;
ip_key = (batadv_dat_addr_t)batadv_hash_dat(&dat,
BATADV_DAT_ADDR_MAX);
@@ -597,6 +599,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
* @bat_priv: the bat priv with all the soft interface information
* @skb: payload to send
* @ip: the DHT key
+ * @vid: VLAN identifier
* @packet_subtype: unicast4addr packet subtype to use
*
* This function copies the skb with pskb_copy() and is sent as unicast packet
@@ -607,7 +610,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
*/
static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
struct sk_buff *skb, __be32 ip,
- int packet_subtype)
+ unsigned short vid, int packet_subtype)
{
int i;
bool ret = false;
@@ -616,7 +619,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
struct sk_buff *tmp_skb;
struct batadv_dat_candidate *cand;
- cand = batadv_dat_select_candidates(bat_priv, ip);
+ cand = batadv_dat_select_candidates(bat_priv, ip, vid);
if (!cand)
goto out;
@@ -1004,7 +1007,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
ret = true;
} else {
/* Send the request to the DHT */
- ret = batadv_dat_send_data(bat_priv, skb, ip_dst,
+ ret = batadv_dat_send_data(bat_priv, skb, ip_dst, vid,
BATADV_P_DAT_DHT_GET);
}
out:
@@ -1132,8 +1135,8 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
/* Send the ARP reply to the candidates for both the IP addresses that
* the node obtained from the ARP reply
*/
- batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT);
- batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT);
+ batadv_dat_send_data(bat_priv, skb, ip_src, vid, BATADV_P_DAT_DHT_PUT);
+ batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT);
}
/**
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 5a31420513e1..7b12ea8ea29d 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -75,18 +75,6 @@ batadv_hardif_free_ref(struct batadv_hard_iface *hard_iface)
call_rcu(&hard_iface->rcu, batadv_hardif_free_rcu);
}
-/**
- * batadv_hardif_free_ref_now - decrement the hard interface refcounter and
- * possibly free it (without rcu callback)
- * @hard_iface: the hard interface to free
- */
-static inline void
-batadv_hardif_free_ref_now(struct batadv_hard_iface *hard_iface)
-{
- if (atomic_dec_and_test(&hard_iface->refcount))
- batadv_hardif_free_rcu(&hard_iface->rcu);
-}
-
static inline struct batadv_hard_iface *
batadv_primary_if_get_selected(struct batadv_priv *bat_priv)
{
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index f5276be2c77c..d0956f726547 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -203,28 +203,25 @@ void batadv_nc_init_orig(struct batadv_orig_node *orig_node)
}
/**
- * batadv_nc_node_free_rcu - rcu callback to free an nc node and remove
- * its refcount on the orig_node
- * @rcu: rcu pointer of the nc node
+ * batadv_nc_node_release - release nc_node from lists and queue for free after
+ * rcu grace period
+ * @nc_node: the nc node to free
*/
-static void batadv_nc_node_free_rcu(struct rcu_head *rcu)
+static void batadv_nc_node_release(struct batadv_nc_node *nc_node)
{
- struct batadv_nc_node *nc_node;
-
- nc_node = container_of(rcu, struct batadv_nc_node, rcu);
batadv_orig_node_free_ref(nc_node->orig_node);
- kfree(nc_node);
+ kfree_rcu(nc_node, rcu);
}
/**
- * batadv_nc_node_free_ref - decrements the nc node refcounter and possibly
- * frees it
+ * batadv_nc_node_free_ref - decrement the nc node refcounter and possibly
+ * release it
* @nc_node: the nc node to free
*/
static void batadv_nc_node_free_ref(struct batadv_nc_node *nc_node)
{
if (atomic_dec_and_test(&nc_node->refcount))
- call_rcu(&nc_node->rcu, batadv_nc_node_free_rcu);
+ batadv_nc_node_release(nc_node);
}
/**
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 7486df9ed48d..17851d3aaf22 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -163,92 +163,66 @@ err:
}
/**
- * batadv_neigh_ifinfo_free_rcu - free the neigh_ifinfo object
- * @rcu: rcu pointer of the neigh_ifinfo object
- */
-static void batadv_neigh_ifinfo_free_rcu(struct rcu_head *rcu)
-{
- struct batadv_neigh_ifinfo *neigh_ifinfo;
-
- neigh_ifinfo = container_of(rcu, struct batadv_neigh_ifinfo, rcu);
-
- if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT)
- batadv_hardif_free_ref_now(neigh_ifinfo->if_outgoing);
-
- kfree(neigh_ifinfo);
-}
-
-/**
- * batadv_neigh_ifinfo_free_now - decrement the refcounter and possibly free
- * the neigh_ifinfo (without rcu callback)
+ * batadv_neigh_ifinfo_release - release neigh_ifinfo from lists and queue for
+ * free after rcu grace period
* @neigh_ifinfo: the neigh_ifinfo object to release
*/
static void
-batadv_neigh_ifinfo_free_ref_now(struct batadv_neigh_ifinfo *neigh_ifinfo)
+batadv_neigh_ifinfo_release(struct batadv_neigh_ifinfo *neigh_ifinfo)
{
- if (atomic_dec_and_test(&neigh_ifinfo->refcount))
- batadv_neigh_ifinfo_free_rcu(&neigh_ifinfo->rcu);
+ if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT)
+ batadv_hardif_free_ref(neigh_ifinfo->if_outgoing);
+
+ kfree_rcu(neigh_ifinfo, rcu);
}
/**
- * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly free
+ * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly release
* the neigh_ifinfo
* @neigh_ifinfo: the neigh_ifinfo object to release
*/
void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo)
{
if (atomic_dec_and_test(&neigh_ifinfo->refcount))
- call_rcu(&neigh_ifinfo->rcu, batadv_neigh_ifinfo_free_rcu);
+ batadv_neigh_ifinfo_release(neigh_ifinfo);
}
/**
* batadv_neigh_node_free_rcu - free the neigh_node
- * @rcu: rcu pointer of the neigh_node
+ * batadv_neigh_node_release - release neigh_node from lists and queue for
+ * free after rcu grace period
+ * @neigh_node: neigh neighbor to free
*/
-static void batadv_neigh_node_free_rcu(struct rcu_head *rcu)
+static void batadv_neigh_node_release(struct batadv_neigh_node *neigh_node)
{
struct hlist_node *node_tmp;
- struct batadv_neigh_node *neigh_node;
struct batadv_neigh_ifinfo *neigh_ifinfo;
struct batadv_algo_ops *bao;
- neigh_node = container_of(rcu, struct batadv_neigh_node, rcu);
bao = neigh_node->orig_node->bat_priv->bat_algo_ops;
hlist_for_each_entry_safe(neigh_ifinfo, node_tmp,
&neigh_node->ifinfo_list, list) {
- batadv_neigh_ifinfo_free_ref_now(neigh_ifinfo);
+ batadv_neigh_ifinfo_free_ref(neigh_ifinfo);
}
if (bao->bat_neigh_free)
bao->bat_neigh_free(neigh_node);
- batadv_hardif_free_ref_now(neigh_node->if_incoming);
+ batadv_hardif_free_ref(neigh_node->if_incoming);
- kfree(neigh_node);
-}
-
-/**
- * batadv_neigh_node_free_ref_now - decrement the neighbors refcounter
- * and possibly free it (without rcu callback)
- * @neigh_node: neigh neighbor to free
- */
-static void
-batadv_neigh_node_free_ref_now(struct batadv_neigh_node *neigh_node)
-{
- if (atomic_dec_and_test(&neigh_node->refcount))
- batadv_neigh_node_free_rcu(&neigh_node->rcu);
+ kfree_rcu(neigh_node, rcu);
}
/**
* batadv_neigh_node_free_ref - decrement the neighbors refcounter
- * and possibly free it
+ * and possibly release it
* @neigh_node: neigh neighbor to free
*/
void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node)
{
if (atomic_dec_and_test(&neigh_node->refcount))
- call_rcu(&neigh_node->rcu, batadv_neigh_node_free_rcu);
+ batadv_neigh_node_release(neigh_node);
}
/**
@@ -532,108 +506,99 @@ out:
}
/**
- * batadv_orig_ifinfo_free_rcu - free the orig_ifinfo object
- * @rcu: rcu pointer of the orig_ifinfo object
+ * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for
+ * free after rcu grace period
+ * @orig_ifinfo: the orig_ifinfo object to release
*/
-static void batadv_orig_ifinfo_free_rcu(struct rcu_head *rcu)
+static void batadv_orig_ifinfo_release(struct batadv_orig_ifinfo *orig_ifinfo)
{
- struct batadv_orig_ifinfo *orig_ifinfo;
struct batadv_neigh_node *router;
- orig_ifinfo = container_of(rcu, struct batadv_orig_ifinfo, rcu);
-
if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT)
- batadv_hardif_free_ref_now(orig_ifinfo->if_outgoing);
+ batadv_hardif_free_ref(orig_ifinfo->if_outgoing);
/* this is the last reference to this object */
router = rcu_dereference_protected(orig_ifinfo->router, true);
if (router)
- batadv_neigh_node_free_ref_now(router);
- kfree(orig_ifinfo);
+ batadv_neigh_node_free_ref(router);
+
+ kfree_rcu(orig_ifinfo, rcu);
}
/**
- * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free
- * the orig_ifinfo (without rcu callback)
+ * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly release
+ * the orig_ifinfo
* @orig_ifinfo: the orig_ifinfo object to release
*/
-static void
-batadv_orig_ifinfo_free_ref_now(struct batadv_orig_ifinfo *orig_ifinfo)
+void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo)
{
if (atomic_dec_and_test(&orig_ifinfo->refcount))
- batadv_orig_ifinfo_free_rcu(&orig_ifinfo->rcu);
+ batadv_orig_ifinfo_release(orig_ifinfo);
}
/**
- * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free
- * the orig_ifinfo
- * @orig_ifinfo: the orig_ifinfo object to release
+ * batadv_orig_node_free_rcu - free the orig_node
+ * @rcu: rcu pointer of the orig_node
*/
-void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo)
+static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
{
- if (atomic_dec_and_test(&orig_ifinfo->refcount))
- call_rcu(&orig_ifinfo->rcu, batadv_orig_ifinfo_free_rcu);
+ struct batadv_orig_node *orig_node;
+
+ orig_node = container_of(rcu, struct batadv_orig_node, rcu);
+
+ batadv_mcast_purge_orig(orig_node);
+
+ batadv_frag_purge_orig(orig_node, NULL);
+
+ if (orig_node->bat_priv->bat_algo_ops->bat_orig_free)
+ orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node);
+
+ kfree(orig_node->tt_buff);
+ kfree(orig_node);
}
-static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
+/**
+ * batadv_orig_node_release - release orig_node from lists and queue for
+ * free after rcu grace period
+ * @orig_node: the orig node to free
+ */
+static void batadv_orig_node_release(struct batadv_orig_node *orig_node)
{
struct hlist_node *node_tmp;
struct batadv_neigh_node *neigh_node;
- struct batadv_orig_node *orig_node;
struct batadv_orig_ifinfo *orig_ifinfo;
- orig_node = container_of(rcu, struct batadv_orig_node, rcu);
-
spin_lock_bh(&orig_node->neigh_list_lock);
/* for all neighbors towards this originator ... */
hlist_for_each_entry_safe(neigh_node, node_tmp,
&orig_node->neigh_list, list) {
hlist_del_rcu(&neigh_node->list);
- batadv_neigh_node_free_ref_now(neigh_node);
+ batadv_neigh_node_free_ref(neigh_node);
}
hlist_for_each_entry_safe(orig_ifinfo, node_tmp,
&orig_node->ifinfo_list, list) {
hlist_del_rcu(&orig_ifinfo->list);
- batadv_orig_ifinfo_free_ref_now(orig_ifinfo);
+ batadv_orig_ifinfo_free_ref(orig_ifinfo);
}
spin_unlock_bh(&orig_node->neigh_list_lock);
- batadv_mcast_purge_orig(orig_node);
-
/* Free nc_nodes */
batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL);
- batadv_frag_purge_orig(orig_node, NULL);
-
- if (orig_node->bat_priv->bat_algo_ops->bat_orig_free)
- orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node);
-
- kfree(orig_node->tt_buff);
- kfree(orig_node);
+ call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu);
}
/**
* batadv_orig_node_free_ref - decrement the orig node refcounter and possibly
- * schedule an rcu callback for freeing it
+ * release it
* @orig_node: the orig node to free
*/
void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node)
{
if (atomic_dec_and_test(&orig_node->refcount))
- call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu);
-}
-
-/**
- * batadv_orig_node_free_ref_now - decrement the orig node refcounter and
- * possibly free it (without rcu callback)
- * @orig_node: the orig node to free
- */
-void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node)
-{
- if (atomic_dec_and_test(&orig_node->refcount))
- batadv_orig_node_free_rcu(&orig_node->rcu);
+ batadv_orig_node_release(orig_node);
}
void batadv_originator_free(struct batadv_priv *bat_priv)
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index fa18f9bf266b..a5c37882b409 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -38,7 +38,6 @@ int batadv_originator_init(struct batadv_priv *bat_priv);
void batadv_originator_free(struct batadv_priv *bat_priv);
void batadv_purge_orig_ref(struct batadv_priv *bat_priv);
void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node);
-void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node);
struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
const u8 *addr);
struct batadv_neigh_node *
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 3207667e69de..d8a2f33e60e5 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -104,6 +104,15 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
neigh_node = NULL;
spin_lock_bh(&orig_node->neigh_list_lock);
+ /* curr_router used earlier may not be the current orig_ifinfo->router
+ * anymore because it was dereferenced outside of the neigh_list_lock
+ * protected region. After the new best neighbor has replace the current
+ * best neighbor the reference counter needs to decrease. Consequently,
+ * the code needs to ensure the curr_router variable contains a pointer
+ * to the replaced best neighbor.
+ */
+ curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
+
rcu_assign_pointer(orig_ifinfo->router, neigh_node);
spin_unlock_bh(&orig_node->neigh_list_lock);
batadv_orig_ifinfo_free_ref(orig_ifinfo);
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index f664324805eb..0e0c3b8ed927 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -630,6 +630,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
if (pending) {
hlist_del(&forw_packet->list);
+ if (!forw_packet->own)
+ atomic_inc(&bat_priv->bcast_queue_left);
+
batadv_forw_packet_free(forw_packet);
}
}
@@ -657,6 +660,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
if (pending) {
hlist_del(&forw_packet->list);
+ if (!forw_packet->own)
+ atomic_inc(&bat_priv->batman_queue_left);
+
batadv_forw_packet_free(forw_packet);
}
}
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index ac4d08de5df4..720f1a5b81ac 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -407,11 +407,17 @@ void batadv_interface_rx(struct net_device *soft_iface,
*/
nf_reset(skb);
+ if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
+ goto dropped;
+
vid = batadv_get_vid(skb, 0);
ethhdr = eth_hdr(skb);
switch (ntohs(ethhdr->h_proto)) {
case ETH_P_8021Q:
+ if (!pskb_may_pull(skb, VLAN_ETH_HLEN))
+ goto dropped;
+
vhdr = (struct vlan_ethhdr *)skb->data;
if (vhdr->h_vlan_encapsulated_proto != ethertype)
@@ -423,8 +429,6 @@ void batadv_interface_rx(struct net_device *soft_iface,
}
/* skb->dev & skb->pkt_type are set here */
- if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
- goto dropped;
skb->protocol = eth_type_trans(skb, soft_iface);
/* should not be necessary anymore as we use skb_pull_rcsum()
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 76f19ba62462..83b0ca27a45e 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -240,20 +240,6 @@ int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
return count;
}
-static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
-{
- struct batadv_tt_orig_list_entry *orig_entry;
-
- orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu);
-
- /* We are in an rcu callback here, therefore we cannot use
- * batadv_orig_node_free_ref() and its call_rcu():
- * An rcu_barrier() wouldn't wait for that to finish
- */
- batadv_orig_node_free_ref_now(orig_entry->orig_node);
- kfree(orig_entry);
-}
-
/**
* batadv_tt_local_size_mod - change the size by v of the local table identified
* by vid
@@ -349,13 +335,25 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node,
batadv_tt_global_size_mod(orig_node, vid, -1);
}
+/**
+ * batadv_tt_orig_list_entry_release - release tt orig entry from lists and
+ * queue for free after rcu grace period
+ * @orig_entry: tt orig entry to be free'd
+ */
+static void
+batadv_tt_orig_list_entry_release(struct batadv_tt_orig_list_entry *orig_entry)
+{
+ batadv_orig_node_free_ref(orig_entry->orig_node);
+ kfree_rcu(orig_entry, rcu);
+}
+
static void
batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry)
{
if (!atomic_dec_and_test(&orig_entry->refcount))
return;
- call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu);
+ batadv_tt_orig_list_entry_release(orig_entry);
}
/**
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 9e9cca3689a0..795ddd8b2f77 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -307,6 +307,9 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
/* check that it's our buffer */
if (lowpan_is_ipv6(*skb_network_header(skb))) {
+ /* Pull off the 1-byte of 6lowpan header. */
+ skb_pull(skb, 1);
+
/* Copy the packet so that the IPv6 header is
* properly aligned.
*/
@@ -317,6 +320,7 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
local_skb->protocol = htons(ETH_P_IPV6);
local_skb->pkt_type = PACKET_HOST;
+ local_skb->dev = dev;
skb_set_transport_header(local_skb, sizeof(struct ipv6hdr));
@@ -335,6 +339,8 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
if (!local_skb)
goto drop;
+ local_skb->dev = dev;
+
ret = iphc_decompress(local_skb, dev, chan);
if (ret < 0) {
kfree_skb(local_skb);
@@ -343,7 +349,6 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
local_skb->protocol = htons(ETH_P_IPV6);
local_skb->pkt_type = PACKET_HOST;
- local_skb->dev = dev;
if (give_skb_to_upper(local_skb, dev)
!= NET_RX_SUCCESS) {
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 85b82f7adbd2..24e9410923d0 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -722,8 +722,12 @@ static void hci_req_add_le_create_conn(struct hci_request *req,
if (hci_update_random_address(req, false, &own_addr_type))
return;
+ /* Set window to be the same value as the interval to enable
+ * continuous scanning.
+ */
cp.scan_interval = cpu_to_le16(hdev->le_scan_interval);
- cp.scan_window = cpu_to_le16(hdev->le_scan_window);
+ cp.scan_window = cp.scan_interval;
+
bacpy(&cp.peer_addr, &conn->dst);
cp.peer_addr_type = conn->dst_type;
cp.own_address_type = own_addr_type;
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 981f8a202c27..02778c5bc149 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -175,21 +175,29 @@ static u8 update_white_list(struct hci_request *req)
* command to remove it from the controller.
*/
list_for_each_entry(b, &hdev->le_white_list, list) {
- struct hci_cp_le_del_from_white_list cp;
+ /* If the device is neither in pend_le_conns nor
+ * pend_le_reports then remove it from the whitelist.
+ */
+ if (!hci_pend_le_action_lookup(&hdev->pend_le_conns,
+ &b->bdaddr, b->bdaddr_type) &&
+ !hci_pend_le_action_lookup(&hdev->pend_le_reports,
+ &b->bdaddr, b->bdaddr_type)) {
+ struct hci_cp_le_del_from_white_list cp;
+
+ cp.bdaddr_type = b->bdaddr_type;
+ bacpy(&cp.bdaddr, &b->bdaddr);
- if (hci_pend_le_action_lookup(&hdev->pend_le_conns,
- &b->bdaddr, b->bdaddr_type) ||
- hci_pend_le_action_lookup(&hdev->pend_le_reports,
- &b->bdaddr, b->bdaddr_type)) {
- white_list_entries++;
+ hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST,
+ sizeof(cp), &cp);
continue;
}
- cp.bdaddr_type = b->bdaddr_type;
- bacpy(&cp.bdaddr, &b->bdaddr);
+ if (hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) {
+ /* White list can not be used with RPAs */
+ return 0x00;
+ }
- hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST,
- sizeof(cp), &cp);
+ white_list_entries++;
}
/* Since all no longer valid white list entries have been
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 7f22119276f3..b1b0a1c0bd8d 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -7155,6 +7155,10 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
status);
+ if (data_len != sizeof(*cp) + cp->adv_data_len + cp->scan_rsp_len)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
+ MGMT_STATUS_INVALID_PARAMS);
+
flags = __le32_to_cpu(cp->flags);
timeout = __le16_to_cpu(cp->timeout);
duration = __le16_to_cpu(cp->duration);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index ffed8a1d4f27..4b175df35184 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -1072,22 +1072,6 @@ static void smp_notify_keys(struct l2cap_conn *conn)
hcon->dst_type = smp->remote_irk->addr_type;
queue_work(hdev->workqueue, &conn->id_addr_update_work);
}
-
- /* When receiving an indentity resolving key for
- * a remote device that does not use a resolvable
- * private address, just remove the key so that
- * it is possible to use the controller white
- * list for scanning.
- *
- * Userspace will have been told to not store
- * this key at this point. So it is safe to
- * just remove it.
- */
- if (!bacmp(&smp->remote_irk->rpa, BDADDR_ANY)) {
- list_del_rcu(&smp->remote_irk->list);
- kfree_rcu(smp->remote_irk, rcu);
- smp->remote_irk = NULL;
- }
}
if (smp->csrk) {
diff --git a/net/bridge/br.c b/net/bridge/br.c
index a1abe4936fe1..3addc05b9a16 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -121,6 +121,7 @@ static struct notifier_block br_device_notifier = {
.notifier_call = br_device_event
};
+/* called with RTNL */
static int br_switchdev_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
@@ -130,7 +131,6 @@ static int br_switchdev_event(struct notifier_block *unused,
struct switchdev_notifier_fdb_info *fdb_info;
int err = NOTIFY_DONE;
- rtnl_lock();
p = br_port_get_rtnl(dev);
if (!p)
goto out;
@@ -155,7 +155,6 @@ static int br_switchdev_event(struct notifier_block *unused,
}
out:
- rtnl_unlock();
return err;
}
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 689b8412c58e..0346c215ff6a 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -28,6 +28,8 @@
const struct nf_br_ops __rcu *nf_br_ops __read_mostly;
EXPORT_SYMBOL_GPL(nf_br_ops);
+static struct lock_class_key bridge_netdev_addr_lock_key;
+
/* net device transmit always called with BH disabled */
netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
@@ -88,6 +90,11 @@ out:
return NETDEV_TX_OK;
}
+static void br_set_lockdep_class(struct net_device *dev)
+{
+ lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key);
+}
+
static int br_dev_init(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
@@ -100,6 +107,7 @@ static int br_dev_init(struct net_device *dev)
err = br_vlan_init(br);
if (err)
free_percpu(br->stats);
+ br_set_lockdep_class(dev);
return err;
}
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index a642bb829d09..09442e0f7f67 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -278,6 +278,8 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
* change from under us.
*/
list_for_each_entry(v, &vg->vlan_list, vlist) {
+ if (!br_vlan_should_use(v))
+ continue;
f = __br_fdb_get(br, br->dev->dev_addr, v->vid);
if (f && f->is_local && !f->dst)
fdb_delete_local(br, NULL, f);
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 263b4de4de57..60a3dbfca8a1 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -21,18 +21,19 @@
#include <asm/uaccess.h>
#include "br_private.h"
-/* called with RTNL */
static int get_bridge_ifindices(struct net *net, int *indices, int num)
{
struct net_device *dev;
int i = 0;
- for_each_netdev(net, dev) {
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
if (i >= num)
break;
if (dev->priv_flags & IFF_EBRIDGE)
indices[i++] = dev->ifindex;
}
+ rcu_read_unlock();
return i;
}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 03661d97463c..7173a685309a 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -464,8 +464,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
&ip6h->saddr)) {
kfree_skb(skb);
+ br->has_ipv6_addr = 0;
return NULL;
}
+
+ br->has_ipv6_addr = 1;
ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
hopopt = (u8 *)(ip6h + 1);
@@ -1270,6 +1273,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
struct br_ip saddr;
unsigned long max_delay;
unsigned long now = jiffies;
+ unsigned int offset = skb_transport_offset(skb);
__be32 group;
int err = 0;
@@ -1280,14 +1284,14 @@ static int br_ip4_multicast_query(struct net_bridge *br,
group = ih->group;
- if (skb->len == sizeof(*ih)) {
+ if (skb->len == offset + sizeof(*ih)) {
max_delay = ih->code * (HZ / IGMP_TIMER_SCALE);
if (!max_delay) {
max_delay = 10 * HZ;
group = 0;
}
- } else if (skb->len >= sizeof(*ih3)) {
+ } else if (skb->len >= offset + sizeof(*ih3)) {
ih3 = igmpv3_query_hdr(skb);
if (ih3->nsrcs)
goto out;
@@ -1348,6 +1352,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
struct br_ip saddr;
unsigned long max_delay;
unsigned long now = jiffies;
+ unsigned int offset = skb_transport_offset(skb);
const struct in6_addr *group = NULL;
bool is_general_query;
int err = 0;
@@ -1357,8 +1362,8 @@ static int br_ip6_multicast_query(struct net_bridge *br,
(port && port->state == BR_STATE_DISABLED))
goto out;
- if (skb->len == sizeof(*mld)) {
- if (!pskb_may_pull(skb, sizeof(*mld))) {
+ if (skb->len == offset + sizeof(*mld)) {
+ if (!pskb_may_pull(skb, offset + sizeof(*mld))) {
err = -EINVAL;
goto out;
}
@@ -1367,7 +1372,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
if (max_delay)
group = &mld->mld_mca;
} else {
- if (!pskb_may_pull(skb, sizeof(*mld2q))) {
+ if (!pskb_may_pull(skb, offset + sizeof(*mld2q))) {
err = -EINVAL;
goto out;
}
@@ -1734,6 +1739,7 @@ void br_multicast_init(struct net_bridge *br)
br->ip6_other_query.delay_time = 0;
br->ip6_querier.port = NULL;
#endif
+ br->has_ipv6_addr = 1;
spin_lock_init(&br->multicast_lock);
setup_timer(&br->multicast_router_timer,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 216018c76018..1001a1b7df9b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -301,6 +301,7 @@ struct net_bridge
u8 multicast_disabled:1;
u8 multicast_querier:1;
u8 multicast_query_use_ifaddr:1;
+ u8 has_ipv6_addr:1;
u32 hash_elasticity;
u32 hash_max;
@@ -574,10 +575,22 @@ static inline bool br_multicast_is_router(struct net_bridge *br)
static inline bool
__br_multicast_querier_exists(struct net_bridge *br,
- struct bridge_mcast_other_query *querier)
+ struct bridge_mcast_other_query *querier,
+ const bool is_ipv6)
{
+ bool own_querier_enabled;
+
+ if (br->multicast_querier) {
+ if (is_ipv6 && !br->has_ipv6_addr)
+ own_querier_enabled = false;
+ else
+ own_querier_enabled = true;
+ } else {
+ own_querier_enabled = false;
+ }
+
return time_is_before_jiffies(querier->delay_time) &&
- (br->multicast_querier || timer_pending(&querier->timer));
+ (own_querier_enabled || timer_pending(&querier->timer));
}
static inline bool br_multicast_querier_exists(struct net_bridge *br,
@@ -585,10 +598,12 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br,
{
switch (eth->h_proto) {
case (htons(ETH_P_IP)):
- return __br_multicast_querier_exists(br, &br->ip4_other_query);
+ return __br_multicast_querier_exists(br,
+ &br->ip4_other_query, false);
#if IS_ENABLED(CONFIG_IPV6)
case (htons(ETH_P_IPV6)):
- return __br_multicast_querier_exists(br, &br->ip6_other_query);
+ return __br_multicast_querier_exists(br,
+ &br->ip6_other_query, true);
#endif
default:
return false;
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 5f3f64553179..eff69cb270d2 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -567,6 +567,14 @@ int br_set_max_age(struct net_bridge *br, unsigned long val)
}
+/* Set time interval that dynamic forwarding entries live
+ * For pure software bridge, allow values outside the 802.1
+ * standard specification for special cases:
+ * 0 - entry never ages (all permanant)
+ * 1 - entry disappears (no persistance)
+ *
+ * Offloaded switch entries maybe more restrictive
+ */
int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
{
struct switchdev_attr attr = {
@@ -577,11 +585,8 @@ int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
unsigned long t = clock_t_to_jiffies(ageing_time);
int err;
- if (t < BR_MIN_AGEING_TIME || t > BR_MAX_AGEING_TIME)
- return -ERANGE;
-
err = switchdev_port_attr_set(br->dev, &attr);
- if (err)
+ if (err && err != -EOPNOTSUPP)
return err;
br->ageing_time = t;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 9981039ef4ff..63ae5dd24fc5 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -672,6 +672,8 @@ static void reset_connection(struct ceph_connection *con)
}
con->in_seq = 0;
con->in_seq_acked = 0;
+
+ con->out_skip = 0;
}
/*
@@ -771,6 +773,8 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
static void con_out_kvec_reset(struct ceph_connection *con)
{
+ BUG_ON(con->out_skip);
+
con->out_kvec_left = 0;
con->out_kvec_bytes = 0;
con->out_kvec_cur = &con->out_kvec[0];
@@ -779,9 +783,9 @@ static void con_out_kvec_reset(struct ceph_connection *con)
static void con_out_kvec_add(struct ceph_connection *con,
size_t size, void *data)
{
- int index;
+ int index = con->out_kvec_left;
- index = con->out_kvec_left;
+ BUG_ON(con->out_skip);
BUG_ON(index >= ARRAY_SIZE(con->out_kvec));
con->out_kvec[index].iov_len = size;
@@ -790,6 +794,27 @@ static void con_out_kvec_add(struct ceph_connection *con,
con->out_kvec_bytes += size;
}
+/*
+ * Chop off a kvec from the end. Return residual number of bytes for
+ * that kvec, i.e. how many bytes would have been written if the kvec
+ * hadn't been nuked.
+ */
+static int con_out_kvec_skip(struct ceph_connection *con)
+{
+ int off = con->out_kvec_cur - con->out_kvec;
+ int skip = 0;
+
+ if (con->out_kvec_bytes > 0) {
+ skip = con->out_kvec[off + con->out_kvec_left - 1].iov_len;
+ BUG_ON(con->out_kvec_bytes < skip);
+ BUG_ON(!con->out_kvec_left);
+ con->out_kvec_bytes -= skip;
+ con->out_kvec_left--;
+ }
+
+ return skip;
+}
+
#ifdef CONFIG_BLOCK
/*
@@ -1175,6 +1200,13 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
return new_piece;
}
+static size_t sizeof_footer(struct ceph_connection *con)
+{
+ return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ?
+ sizeof(struct ceph_msg_footer) :
+ sizeof(struct ceph_msg_footer_old);
+}
+
static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
{
BUG_ON(!msg);
@@ -1197,7 +1229,6 @@ static void prepare_write_message_footer(struct ceph_connection *con)
m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
dout("prepare_write_message_footer %p\n", con);
- con->out_kvec_is_msg = true;
con->out_kvec[v].iov_base = &m->footer;
if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
if (con->ops->sign_message)
@@ -1225,7 +1256,6 @@ static void prepare_write_message(struct ceph_connection *con)
u32 crc;
con_out_kvec_reset(con);
- con->out_kvec_is_msg = true;
con->out_msg_done = false;
/* Sneak an ack in there first? If we can get it into the same
@@ -1265,18 +1295,19 @@ static void prepare_write_message(struct ceph_connection *con)
/* tag + hdr + front + middle */
con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
- con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
+ con_out_kvec_add(con, sizeof(con->out_hdr), &con->out_hdr);
con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
if (m->middle)
con_out_kvec_add(con, m->middle->vec.iov_len,
m->middle->vec.iov_base);
- /* fill in crc (except data pages), footer */
+ /* fill in hdr crc and finalize hdr */
crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
con->out_msg->hdr.crc = cpu_to_le32(crc);
- con->out_msg->footer.flags = 0;
+ memcpy(&con->out_hdr, &con->out_msg->hdr, sizeof(con->out_hdr));
+ /* fill in front and middle crc, footer */
crc = crc32c(0, m->front.iov_base, m->front.iov_len);
con->out_msg->footer.front_crc = cpu_to_le32(crc);
if (m->middle) {
@@ -1288,6 +1319,7 @@ static void prepare_write_message(struct ceph_connection *con)
dout("%s front_crc %u middle_crc %u\n", __func__,
le32_to_cpu(con->out_msg->footer.front_crc),
le32_to_cpu(con->out_msg->footer.middle_crc));
+ con->out_msg->footer.flags = 0;
/* is there a data payload? */
con->out_msg->footer.data_crc = 0;
@@ -1492,7 +1524,6 @@ static int write_partial_kvec(struct ceph_connection *con)
}
}
con->out_kvec_left = 0;
- con->out_kvec_is_msg = false;
ret = 1;
out:
dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con,
@@ -1584,6 +1615,7 @@ static int write_partial_skip(struct ceph_connection *con)
{
int ret;
+ dout("%s %p %d left\n", __func__, con, con->out_skip);
while (con->out_skip > 0) {
size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE);
@@ -2313,9 +2345,9 @@ static int read_partial_message(struct ceph_connection *con)
ceph_pr_addr(&con->peer_addr.in_addr),
seq, con->in_seq + 1);
con->in_base_pos = -front_len - middle_len - data_len -
- sizeof(m->footer);
+ sizeof_footer(con);
con->in_tag = CEPH_MSGR_TAG_READY;
- return 0;
+ return 1;
} else if ((s64)seq - (s64)con->in_seq > 1) {
pr_err("read_partial_message bad seq %lld expected %lld\n",
seq, con->in_seq + 1);
@@ -2338,10 +2370,10 @@ static int read_partial_message(struct ceph_connection *con)
/* skip this message */
dout("alloc_msg said skip message\n");
con->in_base_pos = -front_len - middle_len - data_len -
- sizeof(m->footer);
+ sizeof_footer(con);
con->in_tag = CEPH_MSGR_TAG_READY;
con->in_seq++;
- return 0;
+ return 1;
}
BUG_ON(!con->in_msg);
@@ -2506,13 +2538,13 @@ more:
more_kvec:
/* kvec data queued? */
- if (con->out_skip) {
- ret = write_partial_skip(con);
+ if (con->out_kvec_left) {
+ ret = write_partial_kvec(con);
if (ret <= 0)
goto out;
}
- if (con->out_kvec_left) {
- ret = write_partial_kvec(con);
+ if (con->out_skip) {
+ ret = write_partial_skip(con);
if (ret <= 0)
goto out;
}
@@ -3050,16 +3082,31 @@ void ceph_msg_revoke(struct ceph_msg *msg)
ceph_msg_put(msg);
}
if (con->out_msg == msg) {
- dout("%s %p msg %p - was sending\n", __func__, con, msg);
- con->out_msg = NULL;
- if (con->out_kvec_is_msg) {
- con->out_skip = con->out_kvec_bytes;
- con->out_kvec_is_msg = false;
+ BUG_ON(con->out_skip);
+ /* footer */
+ if (con->out_msg_done) {
+ con->out_skip += con_out_kvec_skip(con);
+ } else {
+ BUG_ON(!msg->data_length);
+ if (con->peer_features & CEPH_FEATURE_MSG_AUTH)
+ con->out_skip += sizeof(msg->footer);
+ else
+ con->out_skip += sizeof(msg->old_footer);
}
+ /* data, middle, front */
+ if (msg->data_length)
+ con->out_skip += msg->cursor.total_resid;
+ if (msg->middle)
+ con->out_skip += con_out_kvec_skip(con);
+ con->out_skip += con_out_kvec_skip(con);
+
+ dout("%s %p msg %p - was sending, will write %d skip %d\n",
+ __func__, con, msg, con->out_kvec_bytes, con->out_skip);
msg->hdr.seq = 0;
-
+ con->out_msg = NULL;
ceph_msg_put(msg);
}
+
mutex_unlock(&con->mutex);
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index f8f235930d88..a28e47ff1b1b 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2843,8 +2843,8 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
mutex_lock(&osdc->request_mutex);
req = __lookup_request(osdc, tid);
if (!req) {
- pr_warn("%s osd%d tid %llu unknown, skipping\n",
- __func__, osd->o_osd, tid);
+ dout("%s osd%d tid %llu unknown, skipping\n", __func__,
+ osd->o_osd, tid);
m = NULL;
*skip = 1;
goto out;
diff --git a/net/core/dev.c b/net/core/dev.c
index ae00b894e675..9efbdb3ff78a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2542,6 +2542,8 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
*
* It may return NULL if the skb requires no segmentation. This is
* only possible when GSO is used for verifying header integrity.
+ *
+ * Segmentation preserves SKB_SGO_CB_OFFSET bytes of previous skb cb.
*/
struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
netdev_features_t features, bool tx_path)
@@ -2556,6 +2558,9 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
return ERR_PTR(err);
}
+ BUILD_BUG_ON(SKB_SGO_CB_OFFSET +
+ sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
+
SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
SKB_GSO_CB(skb)->encap_level = 0;
@@ -4140,6 +4145,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
diffs |= p->vlan_tci ^ skb->vlan_tci;
+ diffs |= skb_metadata_dst_cmp(p, skb);
if (maclen == ETH_HLEN)
diffs |= compare_ether_header(skb_mac_header(p),
skb_mac_header(skb));
@@ -4337,10 +4343,12 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
break;
case GRO_MERGED_FREE:
- if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+ if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
+ skb_dst_drop(skb);
kmem_cache_free(skbuff_head_cache, skb);
- else
+ } else {
__kfree_skb(skb);
+ }
break;
case GRO_HELD:
@@ -7120,8 +7128,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
- if (!dev->tx_queue_len)
+ if (!dev->tx_queue_len) {
dev->priv_flags |= IFF_NO_QUEUE;
+ dev->tx_queue_len = 1;
+ }
dev->num_tx_queues = txqs;
dev->real_num_tx_queues = txqs;
diff --git a/net/core/filter.c b/net/core/filter.c
index 672eefbfbe99..75e9b2b2336d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -777,6 +777,11 @@ static int bpf_check_classic(const struct sock_filter *filter,
if (ftest->k == 0)
return -EINVAL;
break;
+ case BPF_ALU | BPF_LSH | BPF_K:
+ case BPF_ALU | BPF_RSH | BPF_K:
+ if (ftest->k >= 32)
+ return -EINVAL;
+ break;
case BPF_LD | BPF_MEM:
case BPF_LDX | BPF_MEM:
case BPF_ST:
@@ -1134,7 +1139,8 @@ void bpf_prog_destroy(struct bpf_prog *fp)
}
EXPORT_SYMBOL_GPL(bpf_prog_destroy);
-static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
+static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk,
+ bool locked)
{
struct sk_filter *fp, *old_fp;
@@ -1150,10 +1156,8 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
return -ENOMEM;
}
- old_fp = rcu_dereference_protected(sk->sk_filter,
- sock_owned_by_user(sk));
+ old_fp = rcu_dereference_protected(sk->sk_filter, locked);
rcu_assign_pointer(sk->sk_filter, fp);
-
if (old_fp)
sk_filter_uncharge(sk, old_fp);
@@ -1170,7 +1174,8 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
* occurs or there is insufficient memory for the filter a negative
* errno code is returned. On success the return is zero.
*/
-int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk,
+ bool locked)
{
unsigned int fsize = bpf_classic_proglen(fprog);
unsigned int bpf_fsize = bpf_prog_size(fprog->len);
@@ -1208,7 +1213,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
if (IS_ERR(prog))
return PTR_ERR(prog);
- err = __sk_attach_prog(prog, sk);
+ err = __sk_attach_prog(prog, sk, locked);
if (err < 0) {
__bpf_prog_release(prog);
return err;
@@ -1216,7 +1221,12 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
return 0;
}
-EXPORT_SYMBOL_GPL(sk_attach_filter);
+EXPORT_SYMBOL_GPL(__sk_attach_filter);
+
+int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+{
+ return __sk_attach_filter(fprog, sk, sock_owned_by_user(sk));
+}
int sk_attach_bpf(u32 ufd, struct sock *sk)
{
@@ -1235,7 +1245,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
return -EINVAL;
}
- err = __sk_attach_prog(prog, sk);
+ err = __sk_attach_prog(prog, sk, sock_owned_by_user(sk));
if (err < 0) {
bpf_prog_put(prog);
return err;
@@ -1265,9 +1275,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
*/
if (unlikely((u32) offset > 0xffff || len > sizeof(buf)))
return -EFAULT;
-
- if (unlikely(skb_cloned(skb) &&
- !skb_clone_writable(skb, offset + len)))
+ if (unlikely(skb_try_make_writable(skb, offset + len)))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, len, buf);
@@ -1311,8 +1319,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
if (unlikely((u32) offset > 0xffff))
return -EFAULT;
- if (unlikely(skb_cloned(skb) &&
- !skb_clone_writable(skb, offset + sizeof(sum))))
+ if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1357,9 +1364,7 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
if (unlikely((u32) offset > 0xffff))
return -EFAULT;
-
- if (unlikely(skb_cloned(skb) &&
- !skb_clone_writable(skb, offset + sizeof(sum))))
+ if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1544,6 +1549,13 @@ bool bpf_helper_changes_skb_data(void *func)
return true;
if (func == bpf_skb_vlan_pop)
return true;
+ if (func == bpf_skb_store_bytes)
+ return true;
+ if (func == bpf_l3_csum_replace)
+ return true;
+ if (func == bpf_l4_csum_replace)
+ return true;
+
return false;
}
@@ -1908,7 +1920,7 @@ static int __init register_sk_filter_ops(void)
}
late_initcall(register_sk_filter_ops);
-int sk_detach_filter(struct sock *sk)
+int __sk_detach_filter(struct sock *sk, bool locked)
{
int ret = -ENOENT;
struct sk_filter *filter;
@@ -1916,8 +1928,7 @@ int sk_detach_filter(struct sock *sk)
if (sock_flag(sk, SOCK_FILTER_LOCKED))
return -EPERM;
- filter = rcu_dereference_protected(sk->sk_filter,
- sock_owned_by_user(sk));
+ filter = rcu_dereference_protected(sk->sk_filter, locked);
if (filter) {
RCU_INIT_POINTER(sk->sk_filter, NULL);
sk_filter_uncharge(sk, filter);
@@ -1926,7 +1937,12 @@ int sk_detach_filter(struct sock *sk)
return ret;
}
-EXPORT_SYMBOL_GPL(sk_detach_filter);
+EXPORT_SYMBOL_GPL(__sk_detach_filter);
+
+int sk_detach_filter(struct sock *sk)
+{
+ return __sk_detach_filter(sk, sock_owned_by_user(sk));
+}
int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
unsigned int len)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d79699c9d1b9..4ab6ead3d8ee 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -208,7 +208,6 @@ ip:
case htons(ETH_P_IPV6): {
const struct ipv6hdr *iph;
struct ipv6hdr _iph;
- __be32 flow_label;
ipv6:
iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
@@ -230,8 +229,12 @@ ipv6:
key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
}
- flow_label = ip6_flowlabel(iph);
- if (flow_label) {
+ if ((dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_FLOW_LABEL) ||
+ (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)) &&
+ ip6_flowlabel(iph)) {
+ __be32 flow_label = ip6_flowlabel(iph);
+
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
key_tags = skb_flow_dissector_target(flow_dissector,
@@ -396,6 +399,13 @@ ip_proto_again:
goto out_bad;
proto = eth->h_proto;
nhoff += sizeof(*eth);
+
+ /* Cap headers that we access via pointers at the
+ * end of the Ethernet header as our maximum alignment
+ * at that point is only 2 bytes.
+ */
+ if (NET_IP_ALIGN)
+ hlen = nhoff;
}
key_control->flags |= FLOW_DIS_ENCAPSULATION;
@@ -652,6 +662,23 @@ void make_flow_keys_digest(struct flow_keys_digest *digest,
}
EXPORT_SYMBOL(make_flow_keys_digest);
+static struct flow_dissector flow_keys_dissector_symmetric __read_mostly;
+
+u32 __skb_get_hash_symmetric(struct sk_buff *skb)
+{
+ struct flow_keys keys;
+
+ __flow_hash_secret_init();
+
+ memset(&keys, 0, sizeof(keys));
+ __skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys,
+ NULL, 0, 0, 0,
+ FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+
+ return __flow_hash_from_keys(&keys, hashrnd);
+}
+EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
+
/**
* __skb_get_hash: calculate a flow hash
* @skb: sk_buff to calculate flow hash from
@@ -864,6 +891,29 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = {
},
};
+static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = {
+ {
+ .key_id = FLOW_DISSECTOR_KEY_CONTROL,
+ .offset = offsetof(struct flow_keys, control),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_BASIC,
+ .offset = offsetof(struct flow_keys, basic),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ .offset = offsetof(struct flow_keys, addrs.v4addrs),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ .offset = offsetof(struct flow_keys, addrs.v6addrs),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_PORTS,
+ .offset = offsetof(struct flow_keys, ports),
+ },
+};
+
static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
{
.key_id = FLOW_DISSECTOR_KEY_CONTROL,
@@ -885,6 +935,9 @@ static int __init init_default_flow_dissectors(void)
skb_flow_dissector_init(&flow_keys_dissector,
flow_keys_dissector_keys,
ARRAY_SIZE(flow_keys_dissector_keys));
+ skb_flow_dissector_init(&flow_keys_dissector_symmetric,
+ flow_keys_dissector_symmetric_keys,
+ ARRAY_SIZE(flow_keys_dissector_symmetric_keys));
skb_flow_dissector_init(&flow_keys_buf_dissector,
flow_keys_buf_dissector_keys,
ARRAY_SIZE(flow_keys_buf_dissector_keys));
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f18ae91b652e..769cece9b00b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2467,13 +2467,17 @@ int neigh_xmit(int index, struct net_device *dev,
tbl = neigh_tables[index];
if (!tbl)
goto out;
+ rcu_read_lock_bh();
neigh = __neigh_lookup_noref(tbl, addr, dev);
if (!neigh)
neigh = __neigh_create(tbl, addr, dev, false);
err = PTR_ERR(neigh);
- if (IS_ERR(neigh))
+ if (IS_ERR(neigh)) {
+ rcu_read_unlock_bh();
goto out_kfree_skb;
+ }
err = neigh->output(neigh, skb);
+ rcu_read_unlock_bh();
}
else if (index == NEIGH_LINK_TABLE) {
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index de8d5cc5eb24..4da4d51a2ccf 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2787,7 +2787,9 @@ static struct sk_buff *pktgen_alloc_skb(struct net_device *dev,
} else {
skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT);
}
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+
+ if (likely(skb))
+ skb_reserve(skb, LL_RESERVED_SPACE(dev));
return skb;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9c6d15756e7a..87b91ffbdec3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -905,6 +905,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */
+ nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
+ nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
+ + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
+ nla_total_size(1); /* IFLA_PROTO_DOWN */
}
diff --git a/net/core/scm.c b/net/core/scm.c
index 8a1741b14302..dce0acb929f1 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -87,6 +87,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
*fplp = fpl;
fpl->count = 0;
fpl->max = SCM_MAX_FD;
+ fpl->user = NULL;
}
fpp = &fpl->fp[fpl->count];
@@ -107,6 +108,10 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
*fpp++ = file;
fpl->count++;
}
+
+ if (!fpl->user)
+ fpl->user = get_uid(current_user());
+
return num;
}
@@ -119,6 +124,7 @@ void __scm_destroy(struct scm_cookie *scm)
scm->fp = NULL;
for (i=fpl->count-1; i>=0; i--)
fput(fpl->fp[i]);
+ free_uid(fpl->user);
kfree(fpl);
}
}
@@ -336,6 +342,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
for (i = 0; i < fpl->count; i++)
get_file(fpl->fp[i]);
new_fpl->max = new_fpl->count;
+ new_fpl->user = get_uid(fpl->user);
}
return new_fpl;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b2df375ec9c2..4968b5ddea69 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -79,6 +79,8 @@
struct kmem_cache *skbuff_head_cache __read_mostly;
static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
+EXPORT_SYMBOL(sysctl_max_skb_frags);
/**
* skb_panic - private function for out-of-line support
@@ -4082,9 +4084,9 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
if (!pskb_may_pull(skb_chk, offset))
goto err;
- __skb_pull(skb_chk, offset);
+ skb_pull_rcsum(skb_chk, offset);
ret = skb_chkf(skb_chk);
- __skb_push(skb_chk, offset);
+ skb_push_rcsum(skb_chk, offset);
if (ret)
goto err;
@@ -4407,15 +4409,16 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
__skb_push(skb, offset);
err = __vlan_insert_tag(skb, skb->vlan_proto,
skb_vlan_tag_get(skb));
- if (err)
+ if (err) {
+ __skb_pull(skb, offset);
return err;
+ }
+
skb->protocol = skb->vlan_proto;
skb->mac_len += VLAN_HLEN;
- __skb_pull(skb, offset);
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->csum = csum_add(skb->csum, csum_partial(skb->data
- + (2 * ETH_ALEN), VLAN_HLEN, 0));
+ skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
+ __skb_pull(skb, offset);
}
__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
return 0;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 95b6139d710c..a6beb7b6ae55 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -26,6 +26,7 @@ static int zero = 0;
static int one = 1;
static int min_sndbuf = SOCK_MIN_SNDBUF;
static int min_rcvbuf = SOCK_MIN_RCVBUF;
+static int max_skb_frags = MAX_SKB_FRAGS;
static int net_msg_warn; /* Unused, but still a sysctl */
@@ -392,6 +393,15 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "max_skb_frags",
+ .data = &sysctl_max_skb_frags,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ .extra2 = &max_skb_frags,
+ },
{ }
};
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 5684e14932bd..8be8f27bfacc 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -204,8 +204,6 @@ void dccp_req_err(struct sock *sk, u64 seq)
* ICMPs are not backlogged, hence we cannot get an established
* socket here.
*/
- WARN_ON(req->sk);
-
if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) {
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
} else {
@@ -824,26 +822,26 @@ lookup:
if (sk->sk_state == DCCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk = NULL;
+ struct sock *nsk;
sk = req->rsk_listener;
- if (likely(sk->sk_state == DCCP_LISTEN)) {
- nsk = dccp_check_req(sk, skb, req);
- } else {
+ if (unlikely(sk->sk_state != DCCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
+ sock_hold(sk);
+ nsk = dccp_check_req(sk, skb, req);
if (!nsk) {
reqsk_put(req);
- goto discard_it;
+ goto discard_and_relse;
}
if (nsk == sk) {
- sock_hold(sk);
reqsk_put(req);
} else if (dccp_child_process(sk, nsk, skb)) {
dccp_v4_ctl_send_reset(sk, skb);
- goto discard_it;
+ goto discard_and_relse;
} else {
+ sock_put(sk);
return 0;
}
}
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 9c6d0508e63a..b8608b71a66d 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -691,26 +691,26 @@ lookup:
if (sk->sk_state == DCCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk = NULL;
+ struct sock *nsk;
sk = req->rsk_listener;
- if (likely(sk->sk_state == DCCP_LISTEN)) {
- nsk = dccp_check_req(sk, skb, req);
- } else {
+ if (unlikely(sk->sk_state != DCCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
+ sock_hold(sk);
+ nsk = dccp_check_req(sk, skb, req);
if (!nsk) {
reqsk_put(req);
- goto discard_it;
+ goto discard_and_relse;
}
if (nsk == sk) {
- sock_hold(sk);
reqsk_put(req);
} else if (dccp_child_process(sk, nsk, skb)) {
dccp_v6_ctl_send_reset(sk, skb);
- goto discard_it;
+ goto discard_and_relse;
} else {
+ sock_put(sk);
return 0;
}
}
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 607a14f20d88..b1dc096d22f8 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1034,10 +1034,13 @@ source_ok:
if (!fld.daddr) {
fld.daddr = fld.saddr;
- err = -EADDRNOTAVAIL;
if (dev_out)
dev_put(dev_out);
+ err = -EINVAL;
dev_out = init_net.loopback_dev;
+ if (!dev_out->dn_ptr)
+ goto out;
+ err = -EADDRNOTAVAIL;
dev_hold(dev_out);
if (!fld.daddr) {
fld.daddr =
@@ -1110,6 +1113,8 @@ source_ok:
if (dev_out == NULL)
goto out;
dn_db = rcu_dereference_raw(dev_out->dn_ptr);
+ if (!dn_db)
+ goto e_inval;
/* Possible improvement - check all devices for local addr */
if (dn_dev_islocal(dev_out, fld.daddr)) {
dev_put(dev_out);
@@ -1151,6 +1156,8 @@ select_source:
dev_put(dev_out);
dev_out = init_net.loopback_dev;
dev_hold(dev_out);
+ if (!dev_out->dn_ptr)
+ goto e_inval;
fld.flowidn_oif = dev_out->ifindex;
if (res.fi)
dn_fib_info_put(res.fi);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index cebd9d31e65a..0212591b0077 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -334,6 +334,9 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
ASSERT_RTNL();
+ if (in_dev->dead)
+ goto no_promotions;
+
/* 1. Deleting primary ifaddr forces deletion all secondaries
* unless alias promotion is set
**/
@@ -380,6 +383,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
fib_del_ifaddr(ifa, ifa1);
}
+no_promotions:
/* 2. Unlink it */
*ifap = ifa1->ifa_next;
@@ -1847,7 +1851,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
if (err < 0)
goto errout;
- err = EINVAL;
+ err = -EINVAL;
if (!tb[NETCONFA_IFINDEX])
goto errout;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 477937465a20..d95631d09248 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -23,6 +23,11 @@ struct esp_skb_cb {
void *tmp;
};
+struct esp_output_extra {
+ __be32 seqhi;
+ u32 esphoff;
+};
+
#define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
static u32 esp4_get_mtu(struct xfrm_state *x, int mtu);
@@ -35,11 +40,11 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu);
*
* TODO: Use spare space in skb for this where possible.
*/
-static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen)
+static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int extralen)
{
unsigned int len;
- len = seqhilen;
+ len = extralen;
len += crypto_aead_ivsize(aead);
@@ -57,15 +62,16 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen)
return kmalloc(len, GFP_ATOMIC);
}
-static inline __be32 *esp_tmp_seqhi(void *tmp)
+static inline void *esp_tmp_extra(void *tmp)
{
- return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32));
+ return PTR_ALIGN(tmp, __alignof__(struct esp_output_extra));
}
-static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen)
+
+static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int extralen)
{
return crypto_aead_ivsize(aead) ?
- PTR_ALIGN((u8 *)tmp + seqhilen,
- crypto_aead_alignmask(aead) + 1) : tmp + seqhilen;
+ PTR_ALIGN((u8 *)tmp + extralen,
+ crypto_aead_alignmask(aead) + 1) : tmp + extralen;
}
static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv)
@@ -99,7 +105,7 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset)
{
struct ip_esp_hdr *esph = (void *)(skb->data + offset);
void *tmp = ESP_SKB_CB(skb)->tmp;
- __be32 *seqhi = esp_tmp_seqhi(tmp);
+ __be32 *seqhi = esp_tmp_extra(tmp);
esph->seq_no = esph->spi;
esph->spi = *seqhi;
@@ -107,7 +113,11 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset)
static void esp_output_restore_header(struct sk_buff *skb)
{
- esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32));
+ void *tmp = ESP_SKB_CB(skb)->tmp;
+ struct esp_output_extra *extra = esp_tmp_extra(tmp);
+
+ esp_restore_header(skb, skb_transport_offset(skb) + extra->esphoff -
+ sizeof(__be32));
}
static void esp_output_done_esn(struct crypto_async_request *base, int err)
@@ -121,6 +131,7 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err)
static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
{
int err;
+ struct esp_output_extra *extra;
struct ip_esp_hdr *esph;
struct crypto_aead *aead;
struct aead_request *req;
@@ -137,8 +148,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
int tfclen;
int nfrags;
int assoclen;
- int seqhilen;
- __be32 *seqhi;
+ int extralen;
__be64 seqno;
/* skb is pure payload to encrypt */
@@ -166,21 +176,21 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
nfrags = err;
assoclen = sizeof(*esph);
- seqhilen = 0;
+ extralen = 0;
if (x->props.flags & XFRM_STATE_ESN) {
- seqhilen += sizeof(__be32);
- assoclen += seqhilen;
+ extralen += sizeof(*extra);
+ assoclen += sizeof(__be32);
}
- tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
+ tmp = esp_alloc_tmp(aead, nfrags, extralen);
if (!tmp) {
err = -ENOMEM;
goto error;
}
- seqhi = esp_tmp_seqhi(tmp);
- iv = esp_tmp_iv(aead, tmp, seqhilen);
+ extra = esp_tmp_extra(tmp);
+ iv = esp_tmp_iv(aead, tmp, extralen);
req = esp_tmp_req(aead, iv);
sg = esp_req_sg(aead, req);
@@ -247,8 +257,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
* encryption.
*/
if ((x->props.flags & XFRM_STATE_ESN)) {
- esph = (void *)(skb_transport_header(skb) - sizeof(__be32));
- *seqhi = esph->spi;
+ extra->esphoff = (unsigned char *)esph -
+ skb_transport_header(skb);
+ esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4);
+ extra->seqhi = esph->spi;
esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
aead_request_set_callback(req, 0, esp_output_done_esn, skb);
}
@@ -445,7 +457,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
goto out;
ESP_SKB_CB(skb)->tmp = tmp;
- seqhi = esp_tmp_seqhi(tmp);
+ seqhi = esp_tmp_extra(tmp);
iv = esp_tmp_iv(aead, tmp, seqhilen);
req = esp_tmp_req(aead, iv);
sg = esp_req_sg(aead, req);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index e10edb5e78b0..98c754e61024 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -280,7 +280,6 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
struct in_device *in_dev;
struct fib_result res;
struct rtable *rt;
- struct flowi4 fl4;
struct net *net;
int scope;
@@ -296,14 +295,13 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
scope = RT_SCOPE_UNIVERSE;
if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
- fl4.flowi4_oif = 0;
- fl4.flowi4_iif = LOOPBACK_IFINDEX;
- fl4.daddr = ip_hdr(skb)->saddr;
- fl4.saddr = 0;
- fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
- fl4.flowi4_scope = scope;
- fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
- fl4.flowi4_tun_key.tun_id = 0;
+ struct flowi4 fl4 = {
+ .flowi4_iif = LOOPBACK_IFINDEX,
+ .daddr = ip_hdr(skb)->saddr,
+ .flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
+ .flowi4_scope = scope,
+ .flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0,
+ };
if (!fib_lookup(net, &fl4, &res, 0))
return FIB_RES_PREFSRC(net, res);
} else {
@@ -907,7 +905,11 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
if (ifa->ifa_flags & IFA_F_SECONDARY) {
prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
if (!prim) {
- pr_warn("%s: bug: prim == NULL\n", __func__);
+ /* if the device has been deleted, we don't perform
+ * address promotion
+ */
+ if (!in_dev->dead)
+ pr_warn("%s: bug: prim == NULL\n", __func__);
return;
}
if (iprim && iprim != prim) {
@@ -923,6 +925,9 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
subnet = 1;
}
+ if (in_dev->dead)
+ goto no_promotions;
+
/* Deletion is more complicated than add.
* We should take care of not to delete too much :-)
*
@@ -998,6 +1003,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
}
}
+no_promotions:
if (!(ok & BRD_OK))
fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
if (subnet && ifa->ifa_prefixlen < 31) {
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d97268e8ff10..2b68418c7198 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -975,6 +975,8 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
val = 65535 - 40;
if (type == RTAX_MTU && val > 65535 - 15)
val = 65535 - 15;
+ if (type == RTAX_HOPLIMIT && val > 255)
+ val = 255;
if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
return -EINVAL;
fi->fib_metrics[type - 1] = val;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 05e4cba14162..b3086cf27027 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -356,9 +356,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
skb_dst_set(skb, &rt->dst);
skb->dev = dev;
- skb->reserved_tailroom = skb_end_offset(skb) -
- min(mtu, skb_end_offset(skb));
skb_reserve(skb, hlen);
+ skb_tailroom_reserve(skb, mtu, tlen);
skb_reset_network_header(skb);
pip = ip_hdr(skb);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 728414dcea3b..030cd09dd2a2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -789,14 +789,16 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req,
reqsk_put(req);
}
-void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
- struct sock *child)
+struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
+ struct request_sock *req,
+ struct sock *child)
{
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
spin_lock(&queue->rskq_lock);
if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_child_forget(sk, req, child);
+ child = NULL;
} else {
req->sk = child;
req->dl_next = NULL;
@@ -808,6 +810,7 @@ void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
sk_acceptq_added(sk);
}
spin_unlock(&queue->rskq_lock);
+ return child;
}
EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
@@ -817,11 +820,8 @@ struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
if (own_req) {
inet_csk_reqsk_queue_drop(sk, req);
reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
- inet_csk_reqsk_queue_add(sk, req, child);
- /* Warning: caller must not call reqsk_put(req);
- * child stole last reference on it.
- */
- return child;
+ if (inet_csk_reqsk_queue_add(sk, req, child))
+ return child;
}
/* Too bad, another child took ownership of the request, undo. */
bh_unlock_sock(child);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 1fe55ae81781..b8a0607dab96 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -661,6 +661,7 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
struct ipq *qp;
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
+ skb_orphan(skb);
/* Lookup (or create) queue header */
qp = ip_find(net, ip_hdr(skb), user, vif);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 614521437e30..3e4184088082 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -180,6 +180,7 @@ static __be16 tnl_flags_to_gre_flags(__be16 tflags)
return flags;
}
+/* Fills in tpi and returns header length to be pulled. */
static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
bool *csum_err)
{
@@ -239,7 +240,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
return -EINVAL;
}
}
- return iptunnel_pull_header(skb, hdr_len, tpi->proto);
+ return hdr_len;
}
static void ipgre_err(struct sk_buff *skb, u32 info,
@@ -342,7 +343,7 @@ static void gre_err(struct sk_buff *skb, u32 info)
struct tnl_ptk_info tpi;
bool csum_err = false;
- if (parse_gre_header(skb, &tpi, &csum_err)) {
+ if (parse_gre_header(skb, &tpi, &csum_err) < 0) {
if (!csum_err) /* ignore csum errors. */
return;
}
@@ -420,6 +421,7 @@ static int gre_rcv(struct sk_buff *skb)
{
struct tnl_ptk_info tpi;
bool csum_err = false;
+ int hdr_len;
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
@@ -429,7 +431,10 @@ static int gre_rcv(struct sk_buff *skb)
}
#endif
- if (parse_gre_header(skb, &tpi, &csum_err) < 0)
+ hdr_len = parse_gre_header(skb, &tpi, &csum_err);
+ if (hdr_len < 0)
+ goto drop;
+ if (iptunnel_pull_header(skb, hdr_len, tpi.proto) < 0)
goto drop;
if (ipgre_rcv(skb, &tpi) == PACKET_RCVD)
@@ -1242,6 +1247,14 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
err = ipgre_newlink(net, dev, tb, NULL);
if (err < 0)
goto out;
+
+ /* openvswitch users expect packet sizes to be unrestricted,
+ * so set the largest MTU we can.
+ */
+ err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
+ if (err)
+ goto out;
+
return dev;
out:
free_netdev(dev);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 33bef2763c72..dbf7f7ee2958 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -240,6 +240,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
* from host network stack.
*/
features = netif_skb_features(skb);
+ BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET);
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
if (IS_ERR_OR_NULL(segs)) {
kfree_skb(skb);
@@ -921,7 +922,7 @@ static int __ip_append_data(struct sock *sk,
if (((length > mtu) || (skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
- (sk->sk_type == SOCK_DGRAM)) {
+ (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
err = ip_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen, transhdrlen,
maxfraglen, flags);
@@ -1236,13 +1237,16 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
if (!skb)
return -EINVAL;
- cork->length += size;
if ((size + skb->len > mtu) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO)) {
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ return -EOPNOTSUPP;
+
skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
}
+ cork->length += size;
while (size > 0) {
if (skb_is_gso(skb)) {
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 5f73a7c03e27..a50124260f5a 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -249,6 +249,8 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc,
switch (cmsg->cmsg_type) {
case IP_RETOPTS:
err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
+
+ /* Our caller is responsible for freeing ipc->opt */
err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
err < 40 ? err : 40);
if (err)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index cbb51f3fac06..3310ac75e3f3 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -663,6 +663,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
connected = (tunnel->parms.iph.daddr != 0);
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
dst = tnl_params->daddr;
if (dst == 0) {
/* NBMA tunnel */
@@ -760,7 +762,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
tunnel->err_count--;
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
dst_link_failure(skb);
} else
tunnel->err_count = 0;
@@ -947,17 +948,31 @@ done:
}
EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
-int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+ int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
- if (new_mtu < 68 ||
- new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
+ if (new_mtu < 68)
return -EINVAL;
+
+ if (new_mtu > max_mtu) {
+ if (strict)
+ return -EINVAL;
+
+ new_mtu = max_mtu;
+ }
+
dev->mtu = new_mtu;
return 0;
}
+EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
+
+int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+{
+ return __ip_tunnel_change_mtu(dev, new_mtu, true);
+}
EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
static void ip_tunnel_dev_free(struct net_device *dev)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c3a38353f5dc..9d1e555496e3 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -882,8 +882,10 @@ static struct mfc_cache *ipmr_cache_alloc(void)
{
struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
- if (c)
+ if (c) {
+ c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
c->mfc_un.res.minvif = MAXVIFS;
+ }
return c;
}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 36a30fab8625..6e3e0e8b1ce3 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -367,6 +367,18 @@ static inline bool unconditional(const struct arpt_entry *e)
memcmp(&e->arp, &uncond, sizeof(uncond)) == 0;
}
+static bool find_jump_target(const struct xt_table_info *t,
+ const struct arpt_entry *target)
+{
+ struct arpt_entry *iter;
+
+ xt_entry_foreach(iter, t->entries, t->size) {
+ if (iter == target)
+ return true;
+ }
+ return false;
+}
+
/* Figures out from what hook each rule can be called: returns 0 if
* there are loops. Puts hook bitmask in comefrom.
*/
@@ -439,6 +451,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
size = e->next_offset;
e = (struct arpt_entry *)
(entry0 + pos + size);
+ if (pos + size >= newinfo->size)
+ return 0;
e->counters.pcnt = pos;
pos += size;
} else {
@@ -458,9 +472,15 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
+ e = (struct arpt_entry *)
+ (entry0 + newpos);
+ if (!find_jump_target(newinfo, e))
+ return 0;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
+ if (newpos >= newinfo->size)
+ return 0;
}
e = (struct arpt_entry *)
(entry0 + newpos);
@@ -474,23 +494,6 @@ next:
return 1;
}
-static inline int check_entry(const struct arpt_entry *e)
-{
- const struct xt_entry_target *t;
-
- if (!arp_checkentry(&e->arp))
- return -EINVAL;
-
- if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset)
- return -EINVAL;
-
- t = arpt_get_target_c(e);
- if (e->target_offset + t->u.target_size > e->next_offset)
- return -EINVAL;
-
- return 0;
-}
-
static inline int check_target(struct arpt_entry *e, const char *name)
{
struct xt_entry_target *t = arpt_get_target(e);
@@ -586,7 +589,11 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
return -EINVAL;
}
- err = check_entry(e);
+ if (!arp_checkentry(&e->arp))
+ return -EINVAL;
+
+ err = xt_check_entry_offsets(e, e->elems, e->target_offset,
+ e->next_offset);
if (err)
return err;
@@ -691,10 +698,8 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
}
}
- if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) {
- duprintf("Looping hook\n");
+ if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
return -ELOOP;
- }
/* Finally, each sanity check must pass */
i = 0;
@@ -1125,55 +1130,17 @@ static int do_add_counters(struct net *net, const void __user *user,
unsigned int i;
struct xt_counters_info tmp;
struct xt_counters *paddc;
- unsigned int num_counters;
- const char *name;
- int size;
- void *ptmp;
struct xt_table *t;
const struct xt_table_info *private;
int ret = 0;
struct arpt_entry *iter;
unsigned int addend;
-#ifdef CONFIG_COMPAT
- struct compat_xt_counters_info compat_tmp;
- if (compat) {
- ptmp = &compat_tmp;
- size = sizeof(struct compat_xt_counters_info);
- } else
-#endif
- {
- ptmp = &tmp;
- size = sizeof(struct xt_counters_info);
- }
+ paddc = xt_copy_counters_from_user(user, len, &tmp, compat);
+ if (IS_ERR(paddc))
+ return PTR_ERR(paddc);
- if (copy_from_user(ptmp, user, size) != 0)
- return -EFAULT;
-
-#ifdef CONFIG_COMPAT
- if (compat) {
- num_counters = compat_tmp.num_counters;
- name = compat_tmp.name;
- } else
-#endif
- {
- num_counters = tmp.num_counters;
- name = tmp.name;
- }
-
- if (len != size + num_counters * sizeof(struct xt_counters))
- return -EINVAL;
-
- paddc = vmalloc(len - size);
- if (!paddc)
- return -ENOMEM;
-
- if (copy_from_user(paddc, user + size, len - size) != 0) {
- ret = -EFAULT;
- goto free;
- }
-
- t = xt_find_table_lock(net, NFPROTO_ARP, name);
+ t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name);
if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
@@ -1181,7 +1148,7 @@ static int do_add_counters(struct net *net, const void __user *user,
local_bh_disable();
private = t->private;
- if (private->number != num_counters) {
+ if (private->number != tmp.num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
@@ -1208,6 +1175,18 @@ static int do_add_counters(struct net *net, const void __user *user,
}
#ifdef CONFIG_COMPAT
+struct compat_arpt_replace {
+ char name[XT_TABLE_MAXNAMELEN];
+ u32 valid_hooks;
+ u32 num_entries;
+ u32 size;
+ u32 hook_entry[NF_ARP_NUMHOOKS];
+ u32 underflow[NF_ARP_NUMHOOKS];
+ u32 num_counters;
+ compat_uptr_t counters;
+ struct compat_arpt_entry entries[0];
+};
+
static inline void compat_release_entry(struct compat_arpt_entry *e)
{
struct xt_entry_target *t;
@@ -1216,20 +1195,17 @@ static inline void compat_release_entry(struct compat_arpt_entry *e)
module_put(t->u.kernel.target->me);
}
-static inline int
+static int
check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
struct xt_table_info *newinfo,
unsigned int *size,
const unsigned char *base,
- const unsigned char *limit,
- const unsigned int *hook_entries,
- const unsigned int *underflows,
- const char *name)
+ const unsigned char *limit)
{
struct xt_entry_target *t;
struct xt_target *target;
unsigned int entry_offset;
- int ret, off, h;
+ int ret, off;
duprintf("check_compat_entry_size_and_hooks %p\n", e);
if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 ||
@@ -1246,8 +1222,11 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
return -EINVAL;
}
- /* For purposes of check_entry casting the compat entry is fine */
- ret = check_entry((struct arpt_entry *)e);
+ if (!arp_checkentry(&e->arp))
+ return -EINVAL;
+
+ ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset,
+ e->next_offset);
if (ret)
return ret;
@@ -1271,17 +1250,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
if (ret)
goto release_target;
- /* Check hooks & underflows */
- for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
- if ((unsigned char *)e - base == hook_entries[h])
- newinfo->hook_entry[h] = hook_entries[h];
- if ((unsigned char *)e - base == underflows[h])
- newinfo->underflow[h] = underflows[h];
- }
-
- /* Clear counters and comefrom */
- memset(&e->counters, 0, sizeof(e->counters));
- e->comefrom = 0;
return 0;
release_target:
@@ -1290,18 +1258,17 @@ out:
return ret;
}
-static int
+static void
compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
- unsigned int *size, const char *name,
+ unsigned int *size,
struct xt_table_info *newinfo, unsigned char *base)
{
struct xt_entry_target *t;
struct xt_target *target;
struct arpt_entry *de;
unsigned int origsize;
- int ret, h;
+ int h;
- ret = 0;
origsize = *size;
de = (struct arpt_entry *)*dstptr;
memcpy(de, e, sizeof(struct arpt_entry));
@@ -1322,148 +1289,82 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
if ((unsigned char *)de - base < newinfo->underflow[h])
newinfo->underflow[h] -= origsize - *size;
}
- return ret;
}
-static int translate_compat_table(const char *name,
- unsigned int valid_hooks,
- struct xt_table_info **pinfo,
+static int translate_compat_table(struct xt_table_info **pinfo,
void **pentry0,
- unsigned int total_size,
- unsigned int number,
- unsigned int *hook_entries,
- unsigned int *underflows)
+ const struct compat_arpt_replace *compatr)
{
unsigned int i, j;
struct xt_table_info *newinfo, *info;
void *pos, *entry0, *entry1;
struct compat_arpt_entry *iter0;
- struct arpt_entry *iter1;
+ struct arpt_replace repl;
unsigned int size;
int ret = 0;
info = *pinfo;
entry0 = *pentry0;
- size = total_size;
- info->number = number;
-
- /* Init all hooks to impossible value. */
- for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
- info->hook_entry[i] = 0xFFFFFFFF;
- info->underflow[i] = 0xFFFFFFFF;
- }
+ size = compatr->size;
+ info->number = compatr->num_entries;
duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(NFPROTO_ARP);
- xt_compat_init_offsets(NFPROTO_ARP, number);
+ xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries);
/* Walk through entries, checking offsets. */
- xt_entry_foreach(iter0, entry0, total_size) {
+ xt_entry_foreach(iter0, entry0, compatr->size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
entry0,
- entry0 + total_size,
- hook_entries,
- underflows,
- name);
+ entry0 + compatr->size);
if (ret != 0)
goto out_unlock;
++j;
}
ret = -EINVAL;
- if (j != number) {
+ if (j != compatr->num_entries) {
duprintf("translate_compat_table: %u not %u entries\n",
- j, number);
+ j, compatr->num_entries);
goto out_unlock;
}
- /* Check hooks all assigned */
- for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
- /* Only hooks which are valid */
- if (!(valid_hooks & (1 << i)))
- continue;
- if (info->hook_entry[i] == 0xFFFFFFFF) {
- duprintf("Invalid hook entry %u %u\n",
- i, hook_entries[i]);
- goto out_unlock;
- }
- if (info->underflow[i] == 0xFFFFFFFF) {
- duprintf("Invalid underflow %u %u\n",
- i, underflows[i]);
- goto out_unlock;
- }
- }
-
ret = -ENOMEM;
newinfo = xt_alloc_table_info(size);
if (!newinfo)
goto out_unlock;
- newinfo->number = number;
+ newinfo->number = compatr->num_entries;
for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
newinfo->hook_entry[i] = info->hook_entry[i];
newinfo->underflow[i] = info->underflow[i];
}
entry1 = newinfo->entries;
pos = entry1;
- size = total_size;
- xt_entry_foreach(iter0, entry0, total_size) {
- ret = compat_copy_entry_from_user(iter0, &pos, &size,
- name, newinfo, entry1);
- if (ret != 0)
- break;
- }
+ size = compatr->size;
+ xt_entry_foreach(iter0, entry0, compatr->size)
+ compat_copy_entry_from_user(iter0, &pos, &size,
+ newinfo, entry1);
+
+ /* all module references in entry0 are now gone */
+
xt_compat_flush_offsets(NFPROTO_ARP);
xt_compat_unlock(NFPROTO_ARP);
- if (ret)
- goto free_newinfo;
- ret = -ELOOP;
- if (!mark_source_chains(newinfo, valid_hooks, entry1))
- goto free_newinfo;
+ memcpy(&repl, compatr, sizeof(*compatr));
- i = 0;
- xt_entry_foreach(iter1, entry1, newinfo->size) {
- iter1->counters.pcnt = xt_percpu_counter_alloc();
- if (IS_ERR_VALUE(iter1->counters.pcnt)) {
- ret = -ENOMEM;
- break;
- }
-
- ret = check_target(iter1, name);
- if (ret != 0) {
- xt_percpu_counter_free(iter1->counters.pcnt);
- break;
- }
- ++i;
- if (strcmp(arpt_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
- }
- if (ret) {
- /*
- * The first i matches need cleanup_entry (calls ->destroy)
- * because they had called ->check already. The other j-i
- * entries need only release.
- */
- int skip = i;
- j -= i;
- xt_entry_foreach(iter0, entry0, newinfo->size) {
- if (skip-- > 0)
- continue;
- if (j-- == 0)
- break;
- compat_release_entry(iter0);
- }
- xt_entry_foreach(iter1, entry1, newinfo->size) {
- if (i-- == 0)
- break;
- cleanup_entry(iter1);
- }
- xt_free_table_info(newinfo);
- return ret;
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ repl.hook_entry[i] = newinfo->hook_entry[i];
+ repl.underflow[i] = newinfo->underflow[i];
}
+ repl.num_counters = 0;
+ repl.counters = NULL;
+ repl.size = newinfo->size;
+ ret = translate_table(newinfo, entry1, &repl);
+ if (ret)
+ goto free_newinfo;
+
*pinfo = newinfo;
*pentry0 = entry1;
xt_free_table_info(info);
@@ -1471,31 +1372,18 @@ static int translate_compat_table(const char *name,
free_newinfo:
xt_free_table_info(newinfo);
-out:
- xt_entry_foreach(iter0, entry0, total_size) {
+ return ret;
+out_unlock:
+ xt_compat_flush_offsets(NFPROTO_ARP);
+ xt_compat_unlock(NFPROTO_ARP);
+ xt_entry_foreach(iter0, entry0, compatr->size) {
if (j-- == 0)
break;
compat_release_entry(iter0);
}
return ret;
-out_unlock:
- xt_compat_flush_offsets(NFPROTO_ARP);
- xt_compat_unlock(NFPROTO_ARP);
- goto out;
}
-struct compat_arpt_replace {
- char name[XT_TABLE_MAXNAMELEN];
- u32 valid_hooks;
- u32 num_entries;
- u32 size;
- u32 hook_entry[NF_ARP_NUMHOOKS];
- u32 underflow[NF_ARP_NUMHOOKS];
- u32 num_counters;
- compat_uptr_t counters;
- struct compat_arpt_entry entries[0];
-};
-
static int compat_do_replace(struct net *net, void __user *user,
unsigned int len)
{
@@ -1528,10 +1416,7 @@ static int compat_do_replace(struct net *net, void __user *user,
goto free_newinfo;
}
- ret = translate_compat_table(tmp.name, tmp.valid_hooks,
- &newinfo, &loc_cpu_entry, tmp.size,
- tmp.num_entries, tmp.hook_entry,
- tmp.underflow);
+ ret = translate_compat_table(&newinfo, &loc_cpu_entry, &tmp);
if (ret != 0)
goto free_newinfo;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 99d46b0a4ead..a399c5419622 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -443,6 +443,18 @@ ipt_do_table(struct sk_buff *skb,
#endif
}
+static bool find_jump_target(const struct xt_table_info *t,
+ const struct ipt_entry *target)
+{
+ struct ipt_entry *iter;
+
+ xt_entry_foreach(iter, t->entries, t->size) {
+ if (iter == target)
+ return true;
+ }
+ return false;
+}
+
/* Figures out from what hook each rule can be called: returns 0 if
there are loops. Puts hook bitmask in comefrom. */
static int
@@ -520,6 +532,8 @@ mark_source_chains(const struct xt_table_info *newinfo,
size = e->next_offset;
e = (struct ipt_entry *)
(entry0 + pos + size);
+ if (pos + size >= newinfo->size)
+ return 0;
e->counters.pcnt = pos;
pos += size;
} else {
@@ -538,9 +552,15 @@ mark_source_chains(const struct xt_table_info *newinfo,
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
+ e = (struct ipt_entry *)
+ (entry0 + newpos);
+ if (!find_jump_target(newinfo, e))
+ return 0;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
+ if (newpos >= newinfo->size)
+ return 0;
}
e = (struct ipt_entry *)
(entry0 + newpos);
@@ -568,25 +588,6 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net)
}
static int
-check_entry(const struct ipt_entry *e)
-{
- const struct xt_entry_target *t;
-
- if (!ip_checkentry(&e->ip))
- return -EINVAL;
-
- if (e->target_offset + sizeof(struct xt_entry_target) >
- e->next_offset)
- return -EINVAL;
-
- t = ipt_get_target_c(e);
- if (e->target_offset + t->u.target_size > e->next_offset)
- return -EINVAL;
-
- return 0;
-}
-
-static int
check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
{
const struct ipt_ip *ip = par->entryinfo;
@@ -750,7 +751,11 @@ check_entry_size_and_hooks(struct ipt_entry *e,
return -EINVAL;
}
- err = check_entry(e);
+ if (!ip_checkentry(&e->ip))
+ return -EINVAL;
+
+ err = xt_check_entry_offsets(e, e->elems, e->target_offset,
+ e->next_offset);
if (err)
return err;
@@ -1308,55 +1313,17 @@ do_add_counters(struct net *net, const void __user *user,
unsigned int i;
struct xt_counters_info tmp;
struct xt_counters *paddc;
- unsigned int num_counters;
- const char *name;
- int size;
- void *ptmp;
struct xt_table *t;
const struct xt_table_info *private;
int ret = 0;
struct ipt_entry *iter;
unsigned int addend;
-#ifdef CONFIG_COMPAT
- struct compat_xt_counters_info compat_tmp;
- if (compat) {
- ptmp = &compat_tmp;
- size = sizeof(struct compat_xt_counters_info);
- } else
-#endif
- {
- ptmp = &tmp;
- size = sizeof(struct xt_counters_info);
- }
-
- if (copy_from_user(ptmp, user, size) != 0)
- return -EFAULT;
-
-#ifdef CONFIG_COMPAT
- if (compat) {
- num_counters = compat_tmp.num_counters;
- name = compat_tmp.name;
- } else
-#endif
- {
- num_counters = tmp.num_counters;
- name = tmp.name;
- }
+ paddc = xt_copy_counters_from_user(user, len, &tmp, compat);
+ if (IS_ERR(paddc))
+ return PTR_ERR(paddc);
- if (len != size + num_counters * sizeof(struct xt_counters))
- return -EINVAL;
-
- paddc = vmalloc(len - size);
- if (!paddc)
- return -ENOMEM;
-
- if (copy_from_user(paddc, user + size, len - size) != 0) {
- ret = -EFAULT;
- goto free;
- }
-
- t = xt_find_table_lock(net, AF_INET, name);
+ t = xt_find_table_lock(net, AF_INET, tmp.name);
if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
@@ -1364,7 +1331,7 @@ do_add_counters(struct net *net, const void __user *user,
local_bh_disable();
private = t->private;
- if (private->number != num_counters) {
+ if (private->number != tmp.num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
@@ -1443,7 +1410,6 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
static int
compat_find_calc_match(struct xt_entry_match *m,
- const char *name,
const struct ipt_ip *ip,
int *size)
{
@@ -1478,17 +1444,14 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
struct xt_table_info *newinfo,
unsigned int *size,
const unsigned char *base,
- const unsigned char *limit,
- const unsigned int *hook_entries,
- const unsigned int *underflows,
- const char *name)
+ const unsigned char *limit)
{
struct xt_entry_match *ematch;
struct xt_entry_target *t;
struct xt_target *target;
unsigned int entry_offset;
unsigned int j;
- int ret, off, h;
+ int ret, off;
duprintf("check_compat_entry_size_and_hooks %p\n", e);
if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
@@ -1505,8 +1468,11 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
return -EINVAL;
}
- /* For purposes of check_entry casting the compat entry is fine */
- ret = check_entry((struct ipt_entry *)e);
+ if (!ip_checkentry(&e->ip))
+ return -EINVAL;
+
+ ret = xt_compat_check_entry_offsets(e, e->elems,
+ e->target_offset, e->next_offset);
if (ret)
return ret;
@@ -1514,7 +1480,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
entry_offset = (void *)e - (void *)base;
j = 0;
xt_ematch_foreach(ematch, e) {
- ret = compat_find_calc_match(ematch, name, &e->ip, &off);
+ ret = compat_find_calc_match(ematch, &e->ip, &off);
if (ret != 0)
goto release_matches;
++j;
@@ -1537,17 +1503,6 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
if (ret)
goto out;
- /* Check hooks & underflows */
- for (h = 0; h < NF_INET_NUMHOOKS; h++) {
- if ((unsigned char *)e - base == hook_entries[h])
- newinfo->hook_entry[h] = hook_entries[h];
- if ((unsigned char *)e - base == underflows[h])
- newinfo->underflow[h] = underflows[h];
- }
-
- /* Clear counters and comefrom */
- memset(&e->counters, 0, sizeof(e->counters));
- e->comefrom = 0;
return 0;
out:
@@ -1561,19 +1516,18 @@ release_matches:
return ret;
}
-static int
+static void
compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
- unsigned int *size, const char *name,
+ unsigned int *size,
struct xt_table_info *newinfo, unsigned char *base)
{
struct xt_entry_target *t;
struct xt_target *target;
struct ipt_entry *de;
unsigned int origsize;
- int ret, h;
+ int h;
struct xt_entry_match *ematch;
- ret = 0;
origsize = *size;
de = (struct ipt_entry *)*dstptr;
memcpy(de, e, sizeof(struct ipt_entry));
@@ -1582,201 +1536,105 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
*dstptr += sizeof(struct ipt_entry);
*size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
- xt_ematch_foreach(ematch, e) {
- ret = xt_compat_match_from_user(ematch, dstptr, size);
- if (ret != 0)
- return ret;
- }
+ xt_ematch_foreach(ematch, e)
+ xt_compat_match_from_user(ematch, dstptr, size);
+
de->target_offset = e->target_offset - (origsize - *size);
t = compat_ipt_get_target(e);
target = t->u.kernel.target;
xt_compat_target_from_user(t, dstptr, size);
de->next_offset = e->next_offset - (origsize - *size);
+
for (h = 0; h < NF_INET_NUMHOOKS; h++) {
if ((unsigned char *)de - base < newinfo->hook_entry[h])
newinfo->hook_entry[h] -= origsize - *size;
if ((unsigned char *)de - base < newinfo->underflow[h])
newinfo->underflow[h] -= origsize - *size;
}
- return ret;
-}
-
-static int
-compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
-{
- struct xt_entry_match *ematch;
- struct xt_mtchk_param mtpar;
- unsigned int j;
- int ret = 0;
-
- e->counters.pcnt = xt_percpu_counter_alloc();
- if (IS_ERR_VALUE(e->counters.pcnt))
- return -ENOMEM;
-
- j = 0;
- mtpar.net = net;
- mtpar.table = name;
- mtpar.entryinfo = &e->ip;
- mtpar.hook_mask = e->comefrom;
- mtpar.family = NFPROTO_IPV4;
- xt_ematch_foreach(ematch, e) {
- ret = check_match(ematch, &mtpar);
- if (ret != 0)
- goto cleanup_matches;
- ++j;
- }
-
- ret = check_target(e, net, name);
- if (ret)
- goto cleanup_matches;
- return 0;
-
- cleanup_matches:
- xt_ematch_foreach(ematch, e) {
- if (j-- == 0)
- break;
- cleanup_match(ematch, net);
- }
-
- xt_percpu_counter_free(e->counters.pcnt);
-
- return ret;
}
static int
translate_compat_table(struct net *net,
- const char *name,
- unsigned int valid_hooks,
struct xt_table_info **pinfo,
void **pentry0,
- unsigned int total_size,
- unsigned int number,
- unsigned int *hook_entries,
- unsigned int *underflows)
+ const struct compat_ipt_replace *compatr)
{
unsigned int i, j;
struct xt_table_info *newinfo, *info;
void *pos, *entry0, *entry1;
struct compat_ipt_entry *iter0;
- struct ipt_entry *iter1;
+ struct ipt_replace repl;
unsigned int size;
int ret;
info = *pinfo;
entry0 = *pentry0;
- size = total_size;
- info->number = number;
-
- /* Init all hooks to impossible value. */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- info->hook_entry[i] = 0xFFFFFFFF;
- info->underflow[i] = 0xFFFFFFFF;
- }
+ size = compatr->size;
+ info->number = compatr->num_entries;
duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(AF_INET);
- xt_compat_init_offsets(AF_INET, number);
+ xt_compat_init_offsets(AF_INET, compatr->num_entries);
/* Walk through entries, checking offsets. */
- xt_entry_foreach(iter0, entry0, total_size) {
+ xt_entry_foreach(iter0, entry0, compatr->size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
entry0,
- entry0 + total_size,
- hook_entries,
- underflows,
- name);
+ entry0 + compatr->size);
if (ret != 0)
goto out_unlock;
++j;
}
ret = -EINVAL;
- if (j != number) {
+ if (j != compatr->num_entries) {
duprintf("translate_compat_table: %u not %u entries\n",
- j, number);
+ j, compatr->num_entries);
goto out_unlock;
}
- /* Check hooks all assigned */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- /* Only hooks which are valid */
- if (!(valid_hooks & (1 << i)))
- continue;
- if (info->hook_entry[i] == 0xFFFFFFFF) {
- duprintf("Invalid hook entry %u %u\n",
- i, hook_entries[i]);
- goto out_unlock;
- }
- if (info->underflow[i] == 0xFFFFFFFF) {
- duprintf("Invalid underflow %u %u\n",
- i, underflows[i]);
- goto out_unlock;
- }
- }
-
ret = -ENOMEM;
newinfo = xt_alloc_table_info(size);
if (!newinfo)
goto out_unlock;
- newinfo->number = number;
+ newinfo->number = compatr->num_entries;
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- newinfo->hook_entry[i] = info->hook_entry[i];
- newinfo->underflow[i] = info->underflow[i];
+ newinfo->hook_entry[i] = compatr->hook_entry[i];
+ newinfo->underflow[i] = compatr->underflow[i];
}
entry1 = newinfo->entries;
pos = entry1;
- size = total_size;
- xt_entry_foreach(iter0, entry0, total_size) {
- ret = compat_copy_entry_from_user(iter0, &pos, &size,
- name, newinfo, entry1);
- if (ret != 0)
- break;
- }
+ size = compatr->size;
+ xt_entry_foreach(iter0, entry0, compatr->size)
+ compat_copy_entry_from_user(iter0, &pos, &size,
+ newinfo, entry1);
+
+ /* all module references in entry0 are now gone.
+ * entry1/newinfo contains a 64bit ruleset that looks exactly as
+ * generated by 64bit userspace.
+ *
+ * Call standard translate_table() to validate all hook_entrys,
+ * underflows, check for loops, etc.
+ */
xt_compat_flush_offsets(AF_INET);
xt_compat_unlock(AF_INET);
- if (ret)
- goto free_newinfo;
- ret = -ELOOP;
- if (!mark_source_chains(newinfo, valid_hooks, entry1))
- goto free_newinfo;
+ memcpy(&repl, compatr, sizeof(*compatr));
- i = 0;
- xt_entry_foreach(iter1, entry1, newinfo->size) {
- ret = compat_check_entry(iter1, net, name);
- if (ret != 0)
- break;
- ++i;
- if (strcmp(ipt_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
- }
- if (ret) {
- /*
- * The first i matches need cleanup_entry (calls ->destroy)
- * because they had called ->check already. The other j-i
- * entries need only release.
- */
- int skip = i;
- j -= i;
- xt_entry_foreach(iter0, entry0, newinfo->size) {
- if (skip-- > 0)
- continue;
- if (j-- == 0)
- break;
- compat_release_entry(iter0);
- }
- xt_entry_foreach(iter1, entry1, newinfo->size) {
- if (i-- == 0)
- break;
- cleanup_entry(iter1, net);
- }
- xt_free_table_info(newinfo);
- return ret;
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ repl.hook_entry[i] = newinfo->hook_entry[i];
+ repl.underflow[i] = newinfo->underflow[i];
}
+ repl.num_counters = 0;
+ repl.counters = NULL;
+ repl.size = newinfo->size;
+ ret = translate_table(net, newinfo, entry1, &repl);
+ if (ret)
+ goto free_newinfo;
+
*pinfo = newinfo;
*pentry0 = entry1;
xt_free_table_info(info);
@@ -1784,17 +1642,16 @@ translate_compat_table(struct net *net,
free_newinfo:
xt_free_table_info(newinfo);
-out:
- xt_entry_foreach(iter0, entry0, total_size) {
+ return ret;
+out_unlock:
+ xt_compat_flush_offsets(AF_INET);
+ xt_compat_unlock(AF_INET);
+ xt_entry_foreach(iter0, entry0, compatr->size) {
if (j-- == 0)
break;
compat_release_entry(iter0);
}
return ret;
-out_unlock:
- xt_compat_flush_offsets(AF_INET);
- xt_compat_unlock(AF_INET);
- goto out;
}
static int
@@ -1830,10 +1687,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
goto free_newinfo;
}
- ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
- &newinfo, &loc_cpu_entry, tmp.size,
- tmp.num_entries, tmp.hook_entry,
- tmp.underflow);
+ ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp);
if (ret != 0)
goto free_newinfo;
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 6fb869f646bf..a04dee536b8e 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -27,8 +27,6 @@ static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
{
int err;
- skb_orphan(skb);
-
local_bh_disable();
err = ip_defrag(net, skb, user);
local_bh_enable();
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index c6eb42100e9a..ea91058b5f6f 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -108,10 +108,18 @@ static int masq_inet_event(struct notifier_block *this,
unsigned long event,
void *ptr)
{
- struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
+ struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
struct netdev_notifier_info info;
- netdev_notifier_info_init(&info, dev);
+ /* The masq_dev_notifier will catch the case of the device going
+ * down. So if the inetdev is dead and being destroyed we have
+ * no work to do. Otherwise this is an individual address removal
+ * and we have to perform the flush.
+ */
+ if (idev->dead)
+ return NOTIFY_DONE;
+
+ netdev_notifier_info_init(&info, idev->dev);
return masq_device_event(this, event, &info);
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index b27e98010dea..0d5278ca4777 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -746,8 +746,10 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_controllen) {
err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
- if (err)
+ if (unlikely(err)) {
+ kfree(ipc.opt);
return err;
+ }
if (ipc.opt)
free = 1;
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 865895d3fb27..a9b479a1c4a0 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -547,8 +547,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_controllen) {
err = ip_cmsg_send(net, msg, &ipc, false);
- if (err)
+ if (unlikely(err)) {
+ kfree(ipc.opt);
goto out;
+ }
if (ipc.opt)
free = 1;
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a0d842f4e9cf..fb54659320d8 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -129,6 +129,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
static int ip_rt_min_advmss __read_mostly = 256;
+static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
/*
* Interface to generic destination cache.
*/
@@ -757,7 +758,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
struct fib_nh *nh = &FIB_RES_NH(res);
update_or_create_fnhe(nh, fl4->daddr, new_gw,
- 0, 0);
+ 0, jiffies + ip_rt_gc_timeout);
}
if (kill_route)
rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -1558,6 +1559,36 @@ static void ip_handle_martian_source(struct net_device *dev,
#endif
}
+static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
+{
+ struct fnhe_hash_bucket *hash;
+ struct fib_nh_exception *fnhe, __rcu **fnhe_p;
+ u32 hval = fnhe_hashfun(daddr);
+
+ spin_lock_bh(&fnhe_lock);
+
+ hash = rcu_dereference_protected(nh->nh_exceptions,
+ lockdep_is_held(&fnhe_lock));
+ hash += hval;
+
+ fnhe_p = &hash->chain;
+ fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
+ while (fnhe) {
+ if (fnhe->fnhe_daddr == daddr) {
+ rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
+ fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
+ fnhe_flush_routes(fnhe);
+ kfree_rcu(fnhe, rcu);
+ break;
+ }
+ fnhe_p = &fnhe->fnhe_next;
+ fnhe = rcu_dereference_protected(fnhe->fnhe_next,
+ lockdep_is_held(&fnhe_lock));
+ }
+
+ spin_unlock_bh(&fnhe_lock);
+}
+
/* called in rcu_read_lock() section */
static int __mkroute_input(struct sk_buff *skb,
const struct fib_result *res,
@@ -1611,11 +1642,20 @@ static int __mkroute_input(struct sk_buff *skb,
fnhe = find_exception(&FIB_RES_NH(*res), daddr);
if (do_cache) {
- if (fnhe)
+ if (fnhe) {
rth = rcu_dereference(fnhe->fnhe_rth_input);
- else
- rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+ if (rth && rth->dst.expires &&
+ time_after(jiffies, rth->dst.expires)) {
+ ip_del_fnhe(&FIB_RES_NH(*res), daddr);
+ fnhe = NULL;
+ } else {
+ goto rt_cache;
+ }
+ }
+
+ rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+rt_cache:
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
goto out;
@@ -2007,6 +2047,18 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
*/
if (fi && res->prefixlen < 4)
fi = NULL;
+ } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
+ (orig_oif != dev_out->ifindex)) {
+ /* For local routes that require a particular output interface
+ * we do not want to cache the result. Caching the result
+ * causes incorrect behaviour when there are multiple source
+ * addresses on the interface, the end result being that if the
+ * intended recipient is waiting on that interface for the
+ * packet he won't receive it because it will be delivered on
+ * the loopback interface and the IP_PKTINFO ipi_ifindex will
+ * be set to the loopback interface as well.
+ */
+ fi = NULL;
}
fnhe = NULL;
@@ -2016,19 +2068,29 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
struct fib_nh *nh = &FIB_RES_NH(*res);
fnhe = find_exception(nh, fl4->daddr);
- if (fnhe)
+ if (fnhe) {
prth = &fnhe->fnhe_rth_output;
- else {
- if (unlikely(fl4->flowi4_flags &
- FLOWI_FLAG_KNOWN_NH &&
- !(nh->nh_gw &&
- nh->nh_scope == RT_SCOPE_LINK))) {
- do_cache = false;
- goto add;
+ rth = rcu_dereference(*prth);
+ if (rth && rth->dst.expires &&
+ time_after(jiffies, rth->dst.expires)) {
+ ip_del_fnhe(nh, fl4->daddr);
+ fnhe = NULL;
+ } else {
+ goto rt_cache;
}
- prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
}
+
+ if (unlikely(fl4->flowi4_flags &
+ FLOWI_FLAG_KNOWN_NH &&
+ !(nh->nh_gw &&
+ nh->nh_scope == RT_SCOPE_LINK))) {
+ do_cache = false;
+ goto add;
+ }
+ prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
rth = rcu_dereference(*prth);
+
+rt_cache:
if (rt_cache_valid(rth)) {
dst_hold(&rth->dst);
return rth;
@@ -2582,7 +2644,6 @@ void ip_rt_multicast_event(struct in_device *in_dev)
}
#ifdef CONFIG_SYSCTL
-static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
static int ip_rt_gc_interval __read_mostly = 60 * HZ;
static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
static int ip_rt_gc_elasticity __read_mostly = 8;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a1d5c67e251d..6ecfc9de599c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -279,6 +279,7 @@
#include <asm/uaccess.h>
#include <asm/ioctls.h>
+#include <asm/unaligned.h>
#include <net/busy_poll.h>
int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
@@ -938,7 +939,7 @@ new_segment:
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, page, offset);
- if (!can_coalesce && i >= MAX_SKB_FRAGS) {
+ if (!can_coalesce && i >= sysctl_max_skb_frags) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -1211,7 +1212,7 @@ new_segment:
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i == MAX_SKB_FRAGS || !sg) {
+ if (i == sysctl_max_skb_frags || !sg) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -2637,6 +2638,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 now = tcp_time_stamp;
unsigned int start;
+ u64 rate64;
u32 rate;
memset(info, 0, sizeof(*info));
@@ -2702,15 +2704,17 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_total_retrans = tp->total_retrans;
rate = READ_ONCE(sk->sk_pacing_rate);
- info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL;
+ rate64 = rate != ~0U ? rate : ~0ULL;
+ put_unaligned(rate64, &info->tcpi_pacing_rate);
rate = READ_ONCE(sk->sk_max_pacing_rate);
- info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL;
+ rate64 = rate != ~0U ? rate : ~0ULL;
+ put_unaligned(rate64, &info->tcpi_max_pacing_rate);
do {
start = u64_stats_fetch_begin_irq(&tp->syncp);
- info->tcpi_bytes_acked = tp->bytes_acked;
- info->tcpi_bytes_received = tp->bytes_received;
+ put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked);
+ put_unaligned(tp->bytes_received, &info->tcpi_bytes_received);
} while (u64_stats_fetch_retry_irq(&tp->syncp, start));
info->tcpi_segs_out = tp->segs_out;
info->tcpi_segs_in = tp->segs_in;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 205e6745393f..7decaa439360 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -312,7 +312,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk)
/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
-void tcp_req_err(struct sock *sk, u32 seq)
+void tcp_req_err(struct sock *sk, u32 seq, bool abort)
{
struct request_sock *req = inet_reqsk(sk);
struct net *net = sock_net(sk);
@@ -320,11 +320,9 @@ void tcp_req_err(struct sock *sk, u32 seq)
/* ICMPs are not backlogged, hence we cannot get
* an established socket here.
*/
- WARN_ON(req->sk);
-
if (seq != tcp_rsk(req)->snt_isn) {
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
- } else {
+ } else if (abort) {
/*
* Still in SYN_RECV, just remove it silently.
* There is no good way to pass the error to the newly
@@ -384,7 +382,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
}
seq = ntohl(th->seq);
if (sk->sk_state == TCP_NEW_SYN_RECV)
- return tcp_req_err(sk, seq);
+ return tcp_req_err(sk, seq,
+ type == ICMP_PARAMETERPROB ||
+ type == ICMP_TIME_EXCEEDED ||
+ (type == ICMP_DEST_UNREACH &&
+ (code == ICMP_NET_UNREACH ||
+ code == ICMP_HOST_UNREACH)));
bh_lock_sock(sk);
/* If too many ICMPs get dropped on busy
@@ -705,7 +708,8 @@ release_sk1:
outside socket context is ugly, certainly. What can I do?
*/
-static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
+static void tcp_v4_send_ack(struct net *net,
+ struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key,
int reply_flags, u8 tos)
@@ -720,7 +724,6 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
];
} rep;
struct ip_reply_arg arg;
- struct net *net = dev_net(skb_dst(skb)->dev);
memset(&rep.th, 0, sizeof(struct tcphdr));
memset(&arg, 0, sizeof(arg));
@@ -782,7 +785,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
struct inet_timewait_sock *tw = inet_twsk(sk);
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
- tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+ tcp_v4_send_ack(sock_net(sk), skb,
+ tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp + tcptw->tw_ts_offset,
tcptw->tw_ts_recent,
@@ -801,8 +805,10 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
*/
- tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
- tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+ u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
+ tcp_sk(sk)->snd_nxt;
+
+ tcp_v4_send_ack(sock_net(sk), skb, seq,
tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
tcp_time_stamp,
req->ts_recent,
@@ -1586,28 +1592,30 @@ process:
if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk = NULL;
+ struct sock *nsk;
sk = req->rsk_listener;
- if (tcp_v4_inbound_md5_hash(sk, skb))
- goto discard_and_relse;
- if (likely(sk->sk_state == TCP_LISTEN)) {
- nsk = tcp_check_req(sk, skb, req, false);
- } else {
+ if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
+ reqsk_put(req);
+ goto discard_it;
+ }
+ if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
+ sock_hold(sk);
+ nsk = tcp_check_req(sk, skb, req, false);
if (!nsk) {
reqsk_put(req);
- goto discard_it;
+ goto discard_and_relse;
}
if (nsk == sk) {
- sock_hold(sk);
reqsk_put(req);
} else if (tcp_child_process(sk, nsk, skb)) {
tcp_v4_send_reset(nsk, skb);
- goto discard_it;
+ goto discard_and_relse;
} else {
+ sock_put(sk);
return 0;
}
}
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index c8cbc2b4b792..a726d7853ce5 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -550,7 +550,7 @@ reset:
*/
if (crtt > tp->srtt_us) {
/* Set RTO like tcp_rtt_estimator(), but from cached RTT. */
- crtt /= 8 * USEC_PER_MSEC;
+ crtt /= 8 * USEC_PER_SEC / HZ;
inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk));
} else if (tp->srtt_us == 0) {
/* RFC6298: 5.7 We've failed to get a valid RTT sample from
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index ac6b1961ffeb..9475a2748a9a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -458,7 +458,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->rcv_wup = newtp->copied_seq =
newtp->rcv_nxt = treq->rcv_isn + 1;
- newtp->segs_in = 0;
+ newtp->segs_in = 1;
newtp->snd_sml = newtp->snd_una =
newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
@@ -818,6 +818,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
int ret = 0;
int state = child->sk_state;
+ tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
if (!sock_owned_by_user(child)) {
ret = tcp_rcv_state_process(child, skb);
/* Wakeup parent, send SIGIO */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9f069bd9de46..0dd207cd1f38 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2625,8 +2625,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
*/
if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
skb_headroom(skb) >= 0xFFFF)) {
- struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
- GFP_ATOMIC);
+ struct sk_buff *nskb;
+
+ skb_mstamp_get(&skb->skb_mstamp);
+ nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
-ENOBUFS;
} else {
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 17d35662930d..3e6a472e6b88 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -219,7 +219,7 @@ static u32 tcp_yeah_ssthresh(struct sock *sk)
yeah->fast_count = 0;
yeah->reno_count = max(yeah->reno_count>>1, 2U);
- return tp->snd_cwnd - reduction;
+ return max_t(int, tp->snd_cwnd - reduction, 2);
}
static struct tcp_congestion_ops tcp_yeah __read_mostly = {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d8946929813e..7473dad69c92 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -966,8 +966,10 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_controllen) {
err = ip_cmsg_send(sock_net(sk), msg, &ipc,
sk->sk_family == AF_INET6);
- if (err)
+ if (unlikely(err)) {
+ kfree(ipc.opt);
return err;
+ }
if (ipc.opt)
free = 1;
connected = 0;
@@ -1530,7 +1532,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
/* if we're overly short, let UDP handle it */
encap_rcv = ACCESS_ONCE(up->encap_rcv);
- if (skb->len > sizeof(struct udphdr) && encap_rcv) {
+ if (encap_rcv) {
int ret;
/* Verify checksum before giving to encap */
@@ -1989,10 +1991,14 @@ void udp_v4_early_demux(struct sk_buff *skb)
if (!in_dev)
return;
- ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
- iph->protocol);
- if (!ours)
- return;
+ /* we are supposed to accept bcast packets */
+ if (skb->pkt_type == PACKET_MULTICAST) {
+ ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
+ iph->protocol);
+ if (!ours)
+ return;
+ }
+
sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
uh->source, iph->saddr, dif);
} else if (skb->pkt_type == PACKET_HOST) {
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index aba428626b52..280a9bdeddee 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -89,6 +89,8 @@ int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
uh->source = src_port;
uh->len = htons(skb->len);
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
udp_set_csum(nocheck, skb, src, dst, skb->len);
return iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4f7d6e0189cc..3cdf59161a7e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -585,7 +585,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
if (err < 0)
goto errout;
- err = EINVAL;
+ err = -EINVAL;
if (!tb[NETCONFA_IFINDEX])
goto errout;
@@ -3533,6 +3533,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
struct net_device *dev = idev->dev;
+ bool notify = false;
addrconf_join_solict(dev, &ifp->addr);
@@ -3578,7 +3579,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
/* Because optimistic nodes can use this address,
* notify listeners. If DAD fails, RTM_DELADDR is sent.
*/
- ipv6_ifa_notify(RTM_NEWADDR, ifp);
+ notify = true;
}
}
@@ -3586,6 +3587,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
out:
spin_unlock(&ifp->lock);
read_unlock_bh(&idev->lock);
+ if (notify)
+ ipv6_ifa_notify(RTM_NEWADDR, ifp);
}
static void addrconf_dad_start(struct inet6_ifaddr *ifp)
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 0743a5f4c533..183ff87dacf3 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -163,6 +163,9 @@ ipv4_connected:
fl6.fl6_sport = inet->inet_sport;
fl6.flowi6_uid = sock_i_uid(sk);
+ if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
+
if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
fl6.flowi6_oif = np->mcast_oif;
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 835ec57c233b..840a4388f860 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -260,7 +260,11 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
return -EINVAL;
}
}
- return -ENOENT;
+ if (!found)
+ return -ENOENT;
+ if (fragoff)
+ *fragoff = _frag_off;
+ break;
}
hdrlen = 8;
} else if (nexthdr == NEXTHDR_AUTH) {
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 0c7e276c230e..34cf46d74554 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -179,6 +179,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
}
}
+ free_percpu(non_pcpu_rt->rt6i_pcpu);
non_pcpu_rt->rt6i_pcpu = NULL;
}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 1f9ebe3cbb4a..dc2db4f7b182 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -540,12 +540,13 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
}
spin_lock_bh(&ip6_sk_fl_lock);
for (sflp = &np->ipv6_fl_list;
- (sfl = rcu_dereference(*sflp)) != NULL;
+ (sfl = rcu_dereference_protected(*sflp,
+ lockdep_is_held(&ip6_sk_fl_lock))) != NULL;
sflp = &sfl->next) {
if (sfl->fl->label == freq.flr_label) {
if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
np->flow_label &= ~IPV6_FLOWLABEL_MASK;
- *sflp = rcu_dereference(sfl->next);
+ *sflp = sfl->next;
spin_unlock_bh(&ip6_sk_fl_lock);
fl_release(sfl->fl);
kfree_rcu(sfl, rcu);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index e5ea177d34c6..4650c6824783 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -778,6 +778,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
__u32 mtu;
int err;
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
encap_limit = t->parms.encap_limit;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index e6a7bd15b9b7..58900c21e4e4 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -909,6 +909,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
struct rt6_info *rt;
#endif
int err;
+ int flags = 0;
/* The correct way to handle this would be to do
* ip6_route_get_saddr, and then ip6_route_output; however,
@@ -940,10 +941,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
dst_release(*dst);
*dst = NULL;
}
+
+ if (fl6->flowi6_oif)
+ flags |= RT6_LOOKUP_F_IFACE;
}
if (!*dst)
- *dst = ip6_route_output(net, sk, fl6);
+ *dst = ip6_route_output_flags(net, sk, fl6, flags);
err = (*dst)->error;
if (err)
@@ -1068,17 +1072,12 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst)
{
struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
- int err;
dst = ip6_sk_dst_check(sk, dst, fl6);
+ if (!dst)
+ dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
- err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
- if (err)
- return ERR_PTR(err);
- if (final_dst)
- fl6->daddr = *final_dst;
-
- return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+ return dst;
}
EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
@@ -1087,8 +1086,8 @@ static inline int ip6_ufo_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb),
void *from, int length, int hh_len, int fragheaderlen,
- int transhdrlen, int mtu, unsigned int flags,
- const struct flowi6 *fl6)
+ int exthdrlen, int transhdrlen, int mtu,
+ unsigned int flags, const struct flowi6 *fl6)
{
struct sk_buff *skb;
@@ -1113,7 +1112,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
skb_put(skb, fragheaderlen + transhdrlen);
/* initialize network header pointer */
- skb_reset_network_header(skb);
+ skb_set_network_header(skb, exthdrlen);
/* initialize protocol header pointer */
skb->transport_header = skb->network_header + fragheaderlen;
@@ -1353,9 +1352,9 @@ emsgsize:
(skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) &&
- (sk->sk_type == SOCK_DGRAM)) {
+ (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
- hh_len, fragheaderlen,
+ hh_len, fragheaderlen, exthdrlen,
transhdrlen, mtu, flags, fl6);
if (err)
goto error;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 137fca42aaa6..3991b21e24ad 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -343,12 +343,12 @@ static int ip6_tnl_create2(struct net_device *dev)
t = netdev_priv(dev);
+ dev->rtnl_link_ops = &ip6_link_ops;
err = register_netdevice(dev);
if (err < 0)
goto out;
strcpy(t->parms.name, dev->name);
- dev->rtnl_link_ops = &ip6_link_ops;
dev_hold(dev);
ip6_tnl_link(ip6n, t);
@@ -1180,6 +1180,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
u8 tproto;
int err;
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
tproto = ACCESS_ONCE(t->parms.proto);
if (tproto != IPPROTO_IPIP && tproto != 0)
return -1;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index a10e77103c88..e207cb2468da 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1074,6 +1074,7 @@ static struct mfc6_cache *ip6mr_cache_alloc(void)
struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
if (!c)
return NULL;
+ c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
c->mfc_un.res.minvif = MAXMIFS;
return c;
}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 5ee56d0a8699..d64ee7e83664 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1574,9 +1574,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
return NULL;
skb->priority = TC_PRIO_CONTROL;
- skb->reserved_tailroom = skb_end_offset(skb) -
- min(mtu, skb_end_offset(skb));
skb_reserve(skb, hlen);
+ skb_tailroom_reserve(skb, mtu, tlen);
if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 6198807e06f4..22f39e00bef3 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -455,6 +455,18 @@ ip6t_do_table(struct sk_buff *skb,
#endif
}
+static bool find_jump_target(const struct xt_table_info *t,
+ const struct ip6t_entry *target)
+{
+ struct ip6t_entry *iter;
+
+ xt_entry_foreach(iter, t->entries, t->size) {
+ if (iter == target)
+ return true;
+ }
+ return false;
+}
+
/* Figures out from what hook each rule can be called: returns 0 if
there are loops. Puts hook bitmask in comefrom. */
static int
@@ -532,6 +544,8 @@ mark_source_chains(const struct xt_table_info *newinfo,
size = e->next_offset;
e = (struct ip6t_entry *)
(entry0 + pos + size);
+ if (pos + size >= newinfo->size)
+ return 0;
e->counters.pcnt = pos;
pos += size;
} else {
@@ -550,9 +564,15 @@ mark_source_chains(const struct xt_table_info *newinfo,
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
+ e = (struct ip6t_entry *)
+ (entry0 + newpos);
+ if (!find_jump_target(newinfo, e))
+ return 0;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
+ if (newpos >= newinfo->size)
+ return 0;
}
e = (struct ip6t_entry *)
(entry0 + newpos);
@@ -579,25 +599,6 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net)
module_put(par.match->me);
}
-static int
-check_entry(const struct ip6t_entry *e)
-{
- const struct xt_entry_target *t;
-
- if (!ip6_checkentry(&e->ipv6))
- return -EINVAL;
-
- if (e->target_offset + sizeof(struct xt_entry_target) >
- e->next_offset)
- return -EINVAL;
-
- t = ip6t_get_target_c(e);
- if (e->target_offset + t->u.target_size > e->next_offset)
- return -EINVAL;
-
- return 0;
-}
-
static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
{
const struct ip6t_ip6 *ipv6 = par->entryinfo;
@@ -762,7 +763,11 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
return -EINVAL;
}
- err = check_entry(e);
+ if (!ip6_checkentry(&e->ipv6))
+ return -EINVAL;
+
+ err = xt_check_entry_offsets(e, e->elems, e->target_offset,
+ e->next_offset);
if (err)
return err;
@@ -1320,55 +1325,16 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
unsigned int i;
struct xt_counters_info tmp;
struct xt_counters *paddc;
- unsigned int num_counters;
- char *name;
- int size;
- void *ptmp;
struct xt_table *t;
const struct xt_table_info *private;
int ret = 0;
struct ip6t_entry *iter;
unsigned int addend;
-#ifdef CONFIG_COMPAT
- struct compat_xt_counters_info compat_tmp;
-
- if (compat) {
- ptmp = &compat_tmp;
- size = sizeof(struct compat_xt_counters_info);
- } else
-#endif
- {
- ptmp = &tmp;
- size = sizeof(struct xt_counters_info);
- }
-
- if (copy_from_user(ptmp, user, size) != 0)
- return -EFAULT;
-
-#ifdef CONFIG_COMPAT
- if (compat) {
- num_counters = compat_tmp.num_counters;
- name = compat_tmp.name;
- } else
-#endif
- {
- num_counters = tmp.num_counters;
- name = tmp.name;
- }
-
- if (len != size + num_counters * sizeof(struct xt_counters))
- return -EINVAL;
-
- paddc = vmalloc(len - size);
- if (!paddc)
- return -ENOMEM;
- if (copy_from_user(paddc, user + size, len - size) != 0) {
- ret = -EFAULT;
- goto free;
- }
-
- t = xt_find_table_lock(net, AF_INET6, name);
+ paddc = xt_copy_counters_from_user(user, len, &tmp, compat);
+ if (IS_ERR(paddc))
+ return PTR_ERR(paddc);
+ t = xt_find_table_lock(net, AF_INET6, tmp.name);
if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
@@ -1376,7 +1342,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
local_bh_disable();
private = t->private;
- if (private->number != num_counters) {
+ if (private->number != tmp.num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
@@ -1455,7 +1421,6 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
static int
compat_find_calc_match(struct xt_entry_match *m,
- const char *name,
const struct ip6t_ip6 *ipv6,
int *size)
{
@@ -1490,17 +1455,14 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
struct xt_table_info *newinfo,
unsigned int *size,
const unsigned char *base,
- const unsigned char *limit,
- const unsigned int *hook_entries,
- const unsigned int *underflows,
- const char *name)
+ const unsigned char *limit)
{
struct xt_entry_match *ematch;
struct xt_entry_target *t;
struct xt_target *target;
unsigned int entry_offset;
unsigned int j;
- int ret, off, h;
+ int ret, off;
duprintf("check_compat_entry_size_and_hooks %p\n", e);
if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 ||
@@ -1517,8 +1479,11 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
return -EINVAL;
}
- /* For purposes of check_entry casting the compat entry is fine */
- ret = check_entry((struct ip6t_entry *)e);
+ if (!ip6_checkentry(&e->ipv6))
+ return -EINVAL;
+
+ ret = xt_compat_check_entry_offsets(e, e->elems,
+ e->target_offset, e->next_offset);
if (ret)
return ret;
@@ -1526,7 +1491,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
entry_offset = (void *)e - (void *)base;
j = 0;
xt_ematch_foreach(ematch, e) {
- ret = compat_find_calc_match(ematch, name, &e->ipv6, &off);
+ ret = compat_find_calc_match(ematch, &e->ipv6, &off);
if (ret != 0)
goto release_matches;
++j;
@@ -1549,17 +1514,6 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
if (ret)
goto out;
- /* Check hooks & underflows */
- for (h = 0; h < NF_INET_NUMHOOKS; h++) {
- if ((unsigned char *)e - base == hook_entries[h])
- newinfo->hook_entry[h] = hook_entries[h];
- if ((unsigned char *)e - base == underflows[h])
- newinfo->underflow[h] = underflows[h];
- }
-
- /* Clear counters and comefrom */
- memset(&e->counters, 0, sizeof(e->counters));
- e->comefrom = 0;
return 0;
out:
@@ -1573,18 +1527,17 @@ release_matches:
return ret;
}
-static int
+static void
compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
- unsigned int *size, const char *name,
+ unsigned int *size,
struct xt_table_info *newinfo, unsigned char *base)
{
struct xt_entry_target *t;
struct ip6t_entry *de;
unsigned int origsize;
- int ret, h;
+ int h;
struct xt_entry_match *ematch;
- ret = 0;
origsize = *size;
de = (struct ip6t_entry *)*dstptr;
memcpy(de, e, sizeof(struct ip6t_entry));
@@ -1593,11 +1546,9 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
*dstptr += sizeof(struct ip6t_entry);
*size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
- xt_ematch_foreach(ematch, e) {
- ret = xt_compat_match_from_user(ematch, dstptr, size);
- if (ret != 0)
- return ret;
- }
+ xt_ematch_foreach(ematch, e)
+ xt_compat_match_from_user(ematch, dstptr, size);
+
de->target_offset = e->target_offset - (origsize - *size);
t = compat_ip6t_get_target(e);
xt_compat_target_from_user(t, dstptr, size);
@@ -1609,183 +1560,83 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
if ((unsigned char *)de - base < newinfo->underflow[h])
newinfo->underflow[h] -= origsize - *size;
}
- return ret;
-}
-
-static int compat_check_entry(struct ip6t_entry *e, struct net *net,
- const char *name)
-{
- unsigned int j;
- int ret = 0;
- struct xt_mtchk_param mtpar;
- struct xt_entry_match *ematch;
-
- e->counters.pcnt = xt_percpu_counter_alloc();
- if (IS_ERR_VALUE(e->counters.pcnt))
- return -ENOMEM;
- j = 0;
- mtpar.net = net;
- mtpar.table = name;
- mtpar.entryinfo = &e->ipv6;
- mtpar.hook_mask = e->comefrom;
- mtpar.family = NFPROTO_IPV6;
- xt_ematch_foreach(ematch, e) {
- ret = check_match(ematch, &mtpar);
- if (ret != 0)
- goto cleanup_matches;
- ++j;
- }
-
- ret = check_target(e, net, name);
- if (ret)
- goto cleanup_matches;
- return 0;
-
- cleanup_matches:
- xt_ematch_foreach(ematch, e) {
- if (j-- == 0)
- break;
- cleanup_match(ematch, net);
- }
-
- xt_percpu_counter_free(e->counters.pcnt);
-
- return ret;
}
static int
translate_compat_table(struct net *net,
- const char *name,
- unsigned int valid_hooks,
struct xt_table_info **pinfo,
void **pentry0,
- unsigned int total_size,
- unsigned int number,
- unsigned int *hook_entries,
- unsigned int *underflows)
+ const struct compat_ip6t_replace *compatr)
{
unsigned int i, j;
struct xt_table_info *newinfo, *info;
void *pos, *entry0, *entry1;
struct compat_ip6t_entry *iter0;
- struct ip6t_entry *iter1;
+ struct ip6t_replace repl;
unsigned int size;
int ret = 0;
info = *pinfo;
entry0 = *pentry0;
- size = total_size;
- info->number = number;
-
- /* Init all hooks to impossible value. */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- info->hook_entry[i] = 0xFFFFFFFF;
- info->underflow[i] = 0xFFFFFFFF;
- }
+ size = compatr->size;
+ info->number = compatr->num_entries;
duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(AF_INET6);
- xt_compat_init_offsets(AF_INET6, number);
+ xt_compat_init_offsets(AF_INET6, compatr->num_entries);
/* Walk through entries, checking offsets. */
- xt_entry_foreach(iter0, entry0, total_size) {
+ xt_entry_foreach(iter0, entry0, compatr->size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
entry0,
- entry0 + total_size,
- hook_entries,
- underflows,
- name);
+ entry0 + compatr->size);
if (ret != 0)
goto out_unlock;
++j;
}
ret = -EINVAL;
- if (j != number) {
+ if (j != compatr->num_entries) {
duprintf("translate_compat_table: %u not %u entries\n",
- j, number);
+ j, compatr->num_entries);
goto out_unlock;
}
- /* Check hooks all assigned */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- /* Only hooks which are valid */
- if (!(valid_hooks & (1 << i)))
- continue;
- if (info->hook_entry[i] == 0xFFFFFFFF) {
- duprintf("Invalid hook entry %u %u\n",
- i, hook_entries[i]);
- goto out_unlock;
- }
- if (info->underflow[i] == 0xFFFFFFFF) {
- duprintf("Invalid underflow %u %u\n",
- i, underflows[i]);
- goto out_unlock;
- }
- }
-
ret = -ENOMEM;
newinfo = xt_alloc_table_info(size);
if (!newinfo)
goto out_unlock;
- newinfo->number = number;
+ newinfo->number = compatr->num_entries;
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- newinfo->hook_entry[i] = info->hook_entry[i];
- newinfo->underflow[i] = info->underflow[i];
+ newinfo->hook_entry[i] = compatr->hook_entry[i];
+ newinfo->underflow[i] = compatr->underflow[i];
}
entry1 = newinfo->entries;
pos = entry1;
- size = total_size;
- xt_entry_foreach(iter0, entry0, total_size) {
- ret = compat_copy_entry_from_user(iter0, &pos, &size,
- name, newinfo, entry1);
- if (ret != 0)
- break;
- }
+ size = compatr->size;
+ xt_entry_foreach(iter0, entry0, compatr->size)
+ compat_copy_entry_from_user(iter0, &pos, &size,
+ newinfo, entry1);
+
+ /* all module references in entry0 are now gone. */
xt_compat_flush_offsets(AF_INET6);
xt_compat_unlock(AF_INET6);
- if (ret)
- goto free_newinfo;
- ret = -ELOOP;
- if (!mark_source_chains(newinfo, valid_hooks, entry1))
- goto free_newinfo;
+ memcpy(&repl, compatr, sizeof(*compatr));
- i = 0;
- xt_entry_foreach(iter1, entry1, newinfo->size) {
- ret = compat_check_entry(iter1, net, name);
- if (ret != 0)
- break;
- ++i;
- if (strcmp(ip6t_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
- }
- if (ret) {
- /*
- * The first i matches need cleanup_entry (calls ->destroy)
- * because they had called ->check already. The other j-i
- * entries need only release.
- */
- int skip = i;
- j -= i;
- xt_entry_foreach(iter0, entry0, newinfo->size) {
- if (skip-- > 0)
- continue;
- if (j-- == 0)
- break;
- compat_release_entry(iter0);
- }
- xt_entry_foreach(iter1, entry1, newinfo->size) {
- if (i-- == 0)
- break;
- cleanup_entry(iter1, net);
- }
- xt_free_table_info(newinfo);
- return ret;
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ repl.hook_entry[i] = newinfo->hook_entry[i];
+ repl.underflow[i] = newinfo->underflow[i];
}
+ repl.num_counters = 0;
+ repl.counters = NULL;
+ repl.size = newinfo->size;
+ ret = translate_table(net, newinfo, entry1, &repl);
+ if (ret)
+ goto free_newinfo;
+
*pinfo = newinfo;
*pentry0 = entry1;
xt_free_table_info(info);
@@ -1793,17 +1644,16 @@ translate_compat_table(struct net *net,
free_newinfo:
xt_free_table_info(newinfo);
-out:
- xt_entry_foreach(iter0, entry0, total_size) {
+ return ret;
+out_unlock:
+ xt_compat_flush_offsets(AF_INET6);
+ xt_compat_unlock(AF_INET6);
+ xt_entry_foreach(iter0, entry0, compatr->size) {
if (j-- == 0)
break;
compat_release_entry(iter0);
}
return ret;
-out_unlock:
- xt_compat_flush_offsets(AF_INET6);
- xt_compat_unlock(AF_INET6);
- goto out;
}
static int
@@ -1839,10 +1689,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
goto free_newinfo;
}
- ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
- &newinfo, &loc_cpu_entry, tmp.size,
- tmp.num_entries, tmp.hook_entry,
- tmp.underflow);
+ ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp);
if (ret != 0)
goto free_newinfo;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 45f5ae51de65..a234552a7e3d 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -496,10 +496,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
/* Yes, and fold redundant checksum back. 8) */
- if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_partial(skb_network_header(head),
- skb_network_header_len(head),
- head->csum);
+ skb_postpush_rcsum(head, skb_network_header(head),
+ skb_network_header_len(head));
rcu_read_lock();
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 02ba70201e05..aed4f305f5f6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1170,11 +1170,10 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
}
-struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
- struct flowi6 *fl6)
+struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
+ struct flowi6 *fl6, int flags)
{
struct dst_entry *dst;
- int flags = 0;
bool any_src;
dst = l3mdev_rt6_dst_by_oif(net, fl6);
@@ -1195,7 +1194,7 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
}
-EXPORT_SYMBOL(ip6_route_output);
+EXPORT_SYMBOL_GPL(ip6_route_output_flags);
struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{
@@ -1725,6 +1724,8 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
} else {
val = nla_get_u32(nla);
}
+ if (type == RTAX_HOPLIMIT && val > 255)
+ val = 255;
if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
goto err;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index dcccae86190f..ba3d2f3d66d2 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -560,13 +560,13 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->parms.link, 0, IPPROTO_IPV6, 0);
+ t->parms.link, 0, iph->protocol, 0);
err = 0;
goto out;
}
if (type == ICMP_REDIRECT) {
ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
- IPPROTO_IPV6, 0);
+ iph->protocol, 0);
err = 0;
goto out;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e6b044480333..3c6acb67d8e5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -329,6 +329,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct tcp_sock *tp;
__u32 seq, snd_una;
struct sock *sk;
+ bool fatal;
int err;
sk = __inet6_lookup_established(net, &tcp_hashinfo,
@@ -347,8 +348,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return;
}
seq = ntohl(th->seq);
+ fatal = icmpv6_err_convert(type, code, &err);
if (sk->sk_state == TCP_NEW_SYN_RECV)
- return tcp_req_err(sk, seq);
+ return tcp_req_err(sk, seq, fatal);
bh_lock_sock(sk);
if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
@@ -402,7 +404,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
goto out;
}
- icmpv6_err_convert(type, code, &err);
/* Might be for an request_sock */
switch (sk->sk_state) {
@@ -463,8 +464,10 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
if (np->repflow && ireq->pktopts)
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
+ rcu_read_lock();
err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt),
np->tclass);
+ rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -1386,7 +1389,7 @@ process:
if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk = NULL;
+ struct sock *nsk;
sk = req->rsk_listener;
tcp_v6_fill_cb(skb, hdr, th);
@@ -1394,24 +1397,24 @@ process:
reqsk_put(req);
goto discard_it;
}
- if (likely(sk->sk_state == TCP_LISTEN)) {
- nsk = tcp_check_req(sk, skb, req, false);
- } else {
+ if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
+ sock_hold(sk);
+ nsk = tcp_check_req(sk, skb, req, false);
if (!nsk) {
reqsk_put(req);
- goto discard_it;
+ goto discard_and_relse;
}
if (nsk == sk) {
- sock_hold(sk);
reqsk_put(req);
tcp_v6_restore_cb(skb);
} else if (tcp_child_process(sk, nsk, skb)) {
tcp_v6_send_reset(nsk, skb);
- goto discard_it;
+ goto discard_and_relse;
} else {
+ sock_put(sk);
return 0;
}
}
@@ -1704,7 +1707,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
destp = ntohs(inet->inet_dport);
srcp = ntohs(inet->inet_sport);
- if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
+ if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
timer_expires = icsk->icsk_timeout;
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index a1b6adc20e1e..ed7f4a81a932 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -647,7 +647,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
/* if we're overly short, let UDP handle it */
encap_rcv = ACCESS_ONCE(up->encap_rcv);
- if (skb->len > sizeof(struct udphdr) && encap_rcv) {
+ if (encap_rcv) {
int ret;
/* Verify checksum before giving to encap */
@@ -837,8 +837,8 @@ start_lookup:
flush_stack(stack, count, skb, count - 1);
} else {
if (!inner_flushed)
- UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
- proto == IPPROTO_UDPLITE);
+ UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
+ proto == IPPROTO_UDPLITE);
consume_skb(skb);
}
return 0;
@@ -916,11 +916,9 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
ret = udpv6_queue_rcv_skb(sk, skb);
sock_put(sk);
- /* a return value > 0 means to resubmit the input, but
- * it wants the return to be -protocol, or 0
- */
+ /* a return value > 0 means to resubmit the input */
if (ret > 0)
- return -ret;
+ return ret;
return 0;
}
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index f7fbdbabe50e..372855eeaf42 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -23,7 +23,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
- IP6_ECN_set_ce(inner_iph);
+ IP6_ECN_set_ce(skb, inner_iph);
}
/* Add encapsulation header.
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 435608c4306d..20ab7b2ec463 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -708,6 +708,9 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
if (!addr || addr->sa_family != AF_IUCV)
return -EINVAL;
+ if (addr_len < sizeof(struct sockaddr_iucv))
+ return -EINVAL;
+
lock_sock(sk);
if (sk->sk_state != IUCV_OPEN) {
err = -EBADFD;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index afca2eb4dfa7..ec17cbe8a02b 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1581,7 +1581,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
tunnel->encap = encap;
if (encap == L2TP_ENCAPTYPE_UDP) {
- struct udp_tunnel_sock_cfg udp_cfg;
+ struct udp_tunnel_sock_cfg udp_cfg = { };
udp_cfg.sk_user_data = tunnel;
udp_cfg.encap_type = UDP_ENCAP_L2TPINUDP;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index ec22078b0914..42de4ccd159f 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -123,12 +123,11 @@ static int l2tp_ip_recv(struct sk_buff *skb)
struct l2tp_tunnel *tunnel = NULL;
int length;
- /* Point to L2TP header */
- optr = ptr = skb->data;
-
if (!pskb_may_pull(skb, 4))
goto discard;
+ /* Point to L2TP header */
+ optr = ptr = skb->data;
session_id = ntohl(*((__be32 *) ptr));
ptr += 4;
@@ -156,6 +155,9 @@ static int l2tp_ip_recv(struct sk_buff *skb)
if (!pskb_may_pull(skb, length))
goto discard;
+ /* Point to L2TP header */
+ optr = ptr = skb->data;
+ ptr += 4;
pr_debug("%s: ip recv\n", tunnel->name);
print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
}
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index a2c8747d2936..9ee4ddb6b397 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -135,12 +135,11 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
struct l2tp_tunnel *tunnel = NULL;
int length;
- /* Point to L2TP header */
- optr = ptr = skb->data;
-
if (!pskb_may_pull(skb, 4))
goto discard;
+ /* Point to L2TP header */
+ optr = ptr = skb->data;
session_id = ntohl(*((__be32 *) ptr));
ptr += 4;
@@ -168,6 +167,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
if (!pskb_may_pull(skb, length))
goto discard;
+ /* Point to L2TP header */
+ optr = ptr = skb->data;
+ ptr += 4;
pr_debug("%s: ip recv\n", tunnel->name);
print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
}
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index f93c5be612a7..2caaa84ce92d 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -124,8 +124,13 @@ static int l2tp_tunnel_notify(struct genl_family *family,
ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
NLM_F_ACK, tunnel, cmd);
- if (ret >= 0)
- return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+ if (ret >= 0) {
+ ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+ /* We don't care if no one is listening */
+ if (ret == -ESRCH)
+ ret = 0;
+ return ret;
+ }
nlmsg_free(msg);
@@ -147,8 +152,13 @@ static int l2tp_session_notify(struct genl_family *family,
ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
NLM_F_ACK, session, cmd);
- if (ret >= 0)
- return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+ if (ret >= 0) {
+ ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+ /* We don't care if no one is listening */
+ if (ret == -ESRCH)
+ ret = 0;
+ return ret;
+ }
nlmsg_free(msg);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 8dab4e569571..bb8edb9ef506 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -626,6 +626,7 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb)
if (llc->cmsg_flags & LLC_CMSG_PKTINFO) {
struct llc_pktinfo info;
+ memset(&info, 0, sizeof(info));
info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex;
llc_pdu_decode_dsap(skb, &info.lpi_sap);
llc_pdu_decode_da(skb, info.lpi_mac);
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 10ad4ac1fa0b..367784be5df2 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -291,7 +291,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
}
/* prepare A-MPDU MLME for Rx aggregation */
- tid_agg_rx = kmalloc(sizeof(struct tid_ampdu_rx), GFP_KERNEL);
+ tid_agg_rx = kzalloc(sizeof(*tid_agg_rx), GFP_KERNEL);
if (!tid_agg_rx)
goto end;
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 337bb5d78003..980e9e9b6684 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -7,6 +7,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2009, Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -1484,14 +1485,21 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
sdata_info(sdata, "Trigger new scan to find an IBSS to join\n");
- num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy,
- &ifibss->chandef,
- channels,
- ARRAY_SIZE(channels));
scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef);
- ieee80211_request_ibss_scan(sdata, ifibss->ssid,
- ifibss->ssid_len, channels, num,
- scan_width);
+
+ if (ifibss->fixed_channel) {
+ num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy,
+ &ifibss->chandef,
+ channels,
+ ARRAY_SIZE(channels));
+ ieee80211_request_ibss_scan(sdata, ifibss->ssid,
+ ifibss->ssid_len, channels,
+ num, scan_width);
+ } else {
+ ieee80211_request_ibss_scan(sdata, ifibss->ssid,
+ ifibss->ssid_len, NULL,
+ 0, scan_width);
+ }
} else {
int interval = IEEE80211_SCAN_INTERVAL;
@@ -1732,7 +1740,6 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
if (sdata->vif.type != NL80211_IFTYPE_ADHOC)
continue;
sdata->u.ibss.last_scan_completed = jiffies;
- ieee80211_queue_work(&local->hw, &sdata->work);
}
mutex_unlock(&local->iflist_mtx);
}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 5322b4c71630..6837a46ca4a2 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -92,7 +92,7 @@ struct ieee80211_fragment_entry {
u16 extra_len;
u16 last_frag;
u8 rx_queue;
- bool ccmp; /* Whether fragments were encrypted with CCMP */
+ bool check_sequential_pn; /* needed for CCMP/GCMP */
u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
};
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index c9e325d2e120..bcb0a1b64556 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -977,7 +977,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
if (sdata->vif.txq) {
struct txq_info *txqi = to_txq_info(sdata->vif.txq);
+ spin_lock_bh(&txqi->queue.lock);
ieee80211_purge_tx_queue(&local->hw, &txqi->queue);
+ spin_unlock_bh(&txqi->queue.lock);
+
atomic_set(&sdata->txqs_len[txqi->txq.ac], 0);
}
@@ -1747,7 +1750,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ret = dev_alloc_name(ndev, ndev->name);
if (ret < 0) {
- free_netdev(ndev);
+ ieee80211_if_free(ndev);
return ret;
}
@@ -1833,7 +1836,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ret = register_netdevice(ndev);
if (ret) {
- free_netdev(ndev);
+ ieee80211_if_free(ndev);
return ret;
}
}
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index fa28500f28fd..f7bb6829b415 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -151,19 +151,26 @@ u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata)
void mesh_sta_cleanup(struct sta_info *sta)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
- u32 changed;
+ u32 changed = 0;
/*
* maybe userspace handles peer allocation and peering, but in either
* case the beacon is still generated by the kernel and we might need
* an update.
*/
- changed = mesh_accept_plinks_update(sdata);
+ if (sdata->u.mesh.user_mpm &&
+ sta->mesh->plink_state == NL80211_PLINK_ESTAB)
+ changed |= mesh_plink_dec_estab_count(sdata);
+ changed |= mesh_accept_plinks_update(sdata);
if (!sdata->u.mesh.user_mpm) {
changed |= mesh_plink_deactivate(sta);
del_timer_sync(&sta->mesh->plink_timer);
}
+ /* make sure no readers can access nexthop sta from here on */
+ mesh_path_flush_by_nexthop(sta);
+ synchronize_net();
+
if (changed)
ieee80211_mbss_info_change_notify(sdata, changed);
}
@@ -1370,17 +1377,6 @@ out:
sdata_unlock(sdata);
}
-void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local)
-{
- struct ieee80211_sub_if_data *sdata;
-
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list)
- if (ieee80211_vif_is_mesh(&sdata->vif) &&
- ieee80211_sdata_running(sdata))
- ieee80211_queue_work(&local->hw, &sdata->work);
- rcu_read_unlock();
-}
void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
{
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index a1596344c3ba..4a8019f79fb2 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -362,14 +362,10 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
return sdata->u.mesh.mesh_pp_id == IEEE80211_PATH_PROTOCOL_HWMP;
}
-void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local);
-
void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata);
void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata);
void ieee80211s_stop(void);
#else
-static inline void
-ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {}
static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
{ return false; }
static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 3aa04344942b..83097c3832d1 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4003,8 +4003,6 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
if (!ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR))
ieee80211_queue_work(&sdata->local->hw,
&sdata->u.mgd.monitor_work);
- /* and do all the other regular work too */
- ieee80211_queue_work(&sdata->local->hw, &sdata->work);
}
}
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 3ece7d1034c8..b54f398cda5d 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -711,7 +711,7 @@ static u32 minstrel_get_expected_throughput(void *priv_sta)
* computing cur_tp
*/
tmp_mrs = &mi->r[idx].stats;
- tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma);
+ tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma) * 10;
tmp_cur_tp = tmp_cur_tp * 1200 * 8 / 1024;
return tmp_cur_tp;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 3928dbd24e25..239ed6e92b89 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -691,7 +691,7 @@ minstrel_aggr_check(struct ieee80211_sta *pubsta, struct sk_buff *skb)
if (likely(sta->ampdu_mlme.tid_tx[tid]))
return;
- ieee80211_start_tx_ba_session(pubsta, tid, 5000);
+ ieee80211_start_tx_ba_session(pubsta, tid, 0);
}
static void
@@ -871,7 +871,7 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
* - if station is in dynamic SMPS (and streams > 1)
* - for fallback rates, to increase chances of getting through
*/
- if (offset > 0 &&
+ if (offset > 0 ||
(mi->sta->smps_mode == IEEE80211_SMPS_DYNAMIC &&
group->streams > 1)) {
ratetbl->rate[offset].count = ratetbl->rate[offset].count_rts;
@@ -1334,7 +1334,8 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
prob = mi->groups[i].rates[j].prob_ewma;
/* convert tp_avg from pkt per second in kbps */
- tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * AVG_PKT_SIZE * 8 / 1024;
+ tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * 10;
+ tp_avg = tp_avg * AVG_PKT_SIZE * 8 / 1024;
return tp_avg;
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 82af407fea7a..a3bb8f7f5fc5 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1754,7 +1754,7 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
entry->seq = seq;
entry->rx_queue = rx_queue;
entry->last_frag = frag;
- entry->ccmp = 0;
+ entry->check_sequential_pn = false;
entry->extra_len = 0;
return entry;
@@ -1850,15 +1850,27 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
rx->seqno_idx, &(rx->skb));
if (rx->key &&
(rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP ||
- rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256) &&
+ rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 ||
+ rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP ||
+ rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) &&
ieee80211_has_protected(fc)) {
int queue = rx->security_idx;
- /* Store CCMP PN so that we can verify that the next
- * fragment has a sequential PN value. */
- entry->ccmp = 1;
+
+ /* Store CCMP/GCMP PN so that we can verify that the
+ * next fragment has a sequential PN value.
+ */
+ entry->check_sequential_pn = true;
memcpy(entry->last_pn,
rx->key->u.ccmp.rx_pn[queue],
IEEE80211_CCMP_PN_LEN);
+ BUILD_BUG_ON(offsetof(struct ieee80211_key,
+ u.ccmp.rx_pn) !=
+ offsetof(struct ieee80211_key,
+ u.gcmp.rx_pn));
+ BUILD_BUG_ON(sizeof(rx->key->u.ccmp.rx_pn[queue]) !=
+ sizeof(rx->key->u.gcmp.rx_pn[queue]));
+ BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN !=
+ IEEE80211_GCMP_PN_LEN);
}
return RX_QUEUED;
}
@@ -1873,15 +1885,21 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
return RX_DROP_MONITOR;
}
- /* Verify that MPDUs within one MSDU have sequential PN values.
- * (IEEE 802.11i, 8.3.3.4.5) */
- if (entry->ccmp) {
+ /* "The receiver shall discard MSDUs and MMPDUs whose constituent
+ * MPDU PN values are not incrementing in steps of 1."
+ * see IEEE P802.11-REVmc/D5.0, 12.5.3.4.4, item d (for CCMP)
+ * and IEEE P802.11-REVmc/D5.0, 12.5.5.4.4, item d (for GCMP)
+ */
+ if (entry->check_sequential_pn) {
int i;
u8 pn[IEEE80211_CCMP_PN_LEN], *rpn;
int queue;
+
if (!rx->key ||
(rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP &&
- rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256))
+ rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256 &&
+ rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP &&
+ rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP_256))
return RX_DROP_UNUSABLE;
memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN);
for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) {
@@ -2232,7 +2250,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
struct ieee80211_local *local = rx->local;
struct ieee80211_sub_if_data *sdata = rx->sdata;
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- u16 q, hdrlen;
+ u16 ac, q, hdrlen;
hdr = (struct ieee80211_hdr *) skb->data;
hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -2301,7 +2319,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
ether_addr_equal(sdata->vif.addr, hdr->addr3))
return RX_CONTINUE;
- q = ieee80211_select_queue_80211(sdata, skb, hdr);
+ ac = ieee80211_select_queue_80211(sdata, skb, hdr);
+ q = sdata->vif.hw_queue[ac];
if (ieee80211_queue_stopped(&local->hw, q)) {
IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion);
return RX_DROP_MONITOR;
@@ -3367,6 +3386,7 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
return false;
/* ignore action frames to TDLS-peers */
if (ieee80211_is_action(hdr->frame_control) &&
+ !is_broadcast_ether_addr(bssid) &&
!ether_addr_equal(bssid, hdr->addr1))
return false;
}
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index a413e52f7691..acbe182b75d1 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -314,6 +314,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
bool was_scanning = local->scanning;
struct cfg80211_scan_request *scan_req;
struct ieee80211_sub_if_data *scan_sdata;
+ struct ieee80211_sub_if_data *sdata;
lockdep_assert_held(&local->mtx);
@@ -373,7 +374,16 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
ieee80211_mlme_notify_scan_completed(local);
ieee80211_ibss_notify_scan_completed(local);
- ieee80211_mesh_notify_scan_completed(local);
+
+ /* Requeue all the work that might have been ignored while
+ * the scan was in progress; if there was none this will
+ * just be a no-op for the particular interface.
+ */
+ list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ if (ieee80211_sdata_running(sdata))
+ ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ }
+
if (was_scanning)
ieee80211_start_next_roc(local);
}
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f91d1873218c..67066d048e6f 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -256,11 +256,11 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
}
/* Caller must hold local->sta_mtx */
-static void sta_info_hash_add(struct ieee80211_local *local,
- struct sta_info *sta)
+static int sta_info_hash_add(struct ieee80211_local *local,
+ struct sta_info *sta)
{
- rhashtable_insert_fast(&local->sta_hash, &sta->hash_node,
- sta_rht_params);
+ return rhashtable_insert_fast(&local->sta_hash, &sta->hash_node,
+ sta_rht_params);
}
static void sta_deliver_ps_frames(struct work_struct *wk)
@@ -484,11 +484,17 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
{
struct ieee80211_local *local = sta->local;
struct ieee80211_sub_if_data *sdata = sta->sdata;
- struct station_info sinfo;
+ struct station_info *sinfo;
int err = 0;
lockdep_assert_held(&local->sta_mtx);
+ sinfo = kzalloc(sizeof(struct station_info), GFP_KERNEL);
+ if (!sinfo) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
/* check if STA exists already */
if (sta_info_get_bss(sdata, sta->sta.addr)) {
err = -EEXIST;
@@ -503,7 +509,9 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
set_sta_flag(sta, WLAN_STA_BLOCK_BA);
/* make the station visible */
- sta_info_hash_add(local, sta);
+ err = sta_info_hash_add(local, sta);
+ if (err)
+ goto out_drop_sta;
list_add_tail_rcu(&sta->list, &local->sta_list);
@@ -520,10 +528,9 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
ieee80211_sta_debugfs_add(sta);
rate_control_add_sta_debugfs(sta);
- memset(&sinfo, 0, sizeof(sinfo));
- sinfo.filled = 0;
- sinfo.generation = local->sta_generation;
- cfg80211_new_sta(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL);
+ sinfo->generation = local->sta_generation;
+ cfg80211_new_sta(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL);
+ kfree(sinfo);
sta_dbg(sdata, "Inserted STA %pM\n", sta->sta.addr);
@@ -538,6 +545,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
out_remove:
sta_info_hash_del(local, sta);
list_del_rcu(&sta->list);
+ out_drop_sta:
local->num_sta--;
synchronize_net();
__cleanup_single_sta(sta);
@@ -882,7 +890,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
{
struct ieee80211_local *local = sta->local;
struct ieee80211_sub_if_data *sdata = sta->sdata;
- struct station_info sinfo = {};
+ struct station_info *sinfo;
int ret;
/*
@@ -920,8 +928,11 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr);
- sta_set_sinfo(sta, &sinfo);
- cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL);
+ sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL);
+ if (sinfo)
+ sta_set_sinfo(sta, sinfo);
+ cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL);
+ kfree(sinfo);
rate_control_remove_sta_debugfs(sta);
ieee80211_sta_debugfs_remove(sta);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 2cafb21b422f..15b0150283b6 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -269,7 +269,7 @@ struct ieee80211_fast_tx {
u8 sa_offs, da_offs, pn_offs;
u8 band;
u8 hdr[30 + 2 + IEEE80211_FAST_XMIT_MAX_IV +
- sizeof(rfc1042_header)];
+ sizeof(rfc1042_header)] __aligned(2);
struct rcu_head rcu_head;
};
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index c32fc411a911..881bc2072809 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -518,6 +518,9 @@ static struct net_device *find_outdev(struct net *net,
if (!dev)
return ERR_PTR(-ENODEV);
+ if (IS_ERR(dev))
+ return dev;
+
/* The caller is holding rtnl anyways, so release the dev reference */
dev_put(dev);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index f57b4dcdb233..4da560005b0e 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1757,15 +1757,34 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
cp = pp->conn_in_get(ipvs, af, skb, &iph);
conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
- if (conn_reuse_mode && !iph.fragoffs &&
- is_new_conn(skb, &iph) && cp &&
- ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
- unlikely(!atomic_read(&cp->dest->weight))) ||
- unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) {
- if (!atomic_read(&cp->n_control))
- ip_vs_conn_expire_now(cp);
- __ip_vs_conn_put(cp);
- cp = NULL;
+ if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) {
+ bool uses_ct = false, resched = false;
+
+ if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
+ unlikely(!atomic_read(&cp->dest->weight))) {
+ resched = true;
+ uses_ct = ip_vs_conn_uses_conntrack(cp, skb);
+ } else if (is_new_conn_expected(cp, conn_reuse_mode)) {
+ uses_ct = ip_vs_conn_uses_conntrack(cp, skb);
+ if (!atomic_read(&cp->n_control)) {
+ resched = true;
+ } else {
+ /* Do not reschedule controlling connection
+ * that uses conntrack while it is still
+ * referenced by controlled connection(s).
+ */
+ resched = !uses_ct;
+ }
+ }
+
+ if (resched) {
+ if (!atomic_read(&cp->n_control))
+ ip_vs_conn_expire_now(cp);
+ __ip_vs_conn_put(cp);
+ if (uses_ct)
+ return NF_DROP;
+ cp = NULL;
+ }
}
if (unlikely(!cp)) {
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 1b8d594e493a..0a6eb5c0d9e9 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -70,10 +70,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
const char *dptr;
int retc;
- ip_vs_fill_iph_skb(p->af, skb, false, &iph);
+ retc = ip_vs_fill_iph_skb(p->af, skb, false, &iph);
/* Only useful with UDP */
- if (iph.protocol != IPPROTO_UDP)
+ if (!retc || iph.protocol != IPPROTO_UDP)
return -EINVAL;
/* todo: IPv6 fragments:
* I think this only should be done for the first fragment. /HS
@@ -88,7 +88,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
dptr = skb->data + dataoff;
datalen = skb->len - dataoff;
- if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen))
+ if (get_callid(dptr, 0, datalen, &matchoff, &matchlen))
return -EINVAL;
/* N.B: pe_data is only set on success,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3cb3cb831591..86a3c6f0c871 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1757,6 +1757,7 @@ void nf_conntrack_init_end(void)
int nf_conntrack_init_net(struct net *net)
{
+ static atomic64_t unique_id;
int ret = -ENOMEM;
int cpu;
@@ -1779,7 +1780,8 @@ int nf_conntrack_init_net(struct net *net)
if (!net->ct.stat)
goto err_pcpu_lists;
- net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
+ net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%llu",
+ (u64)atomic64_inc_return(&unique_id));
if (!net->ct.slabname)
goto err_slabname;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index d4aaad747ea9..25391fb25516 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -415,6 +415,47 @@ int xt_check_match(struct xt_mtchk_param *par,
}
EXPORT_SYMBOL_GPL(xt_check_match);
+/** xt_check_entry_match - check that matches end before start of target
+ *
+ * @match: beginning of xt_entry_match
+ * @target: beginning of this rules target (alleged end of matches)
+ * @alignment: alignment requirement of match structures
+ *
+ * Validates that all matches add up to the beginning of the target,
+ * and that each match covers at least the base structure size.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+static int xt_check_entry_match(const char *match, const char *target,
+ const size_t alignment)
+{
+ const struct xt_entry_match *pos;
+ int length = target - match;
+
+ if (length == 0) /* no matches */
+ return 0;
+
+ pos = (struct xt_entry_match *)match;
+ do {
+ if ((unsigned long)pos % alignment)
+ return -EINVAL;
+
+ if (length < (int)sizeof(struct xt_entry_match))
+ return -EINVAL;
+
+ if (pos->u.match_size < sizeof(struct xt_entry_match))
+ return -EINVAL;
+
+ if (pos->u.match_size > length)
+ return -EINVAL;
+
+ length -= pos->u.match_size;
+ pos = ((void *)((char *)(pos) + (pos)->u.match_size));
+ } while (length > 0);
+
+ return 0;
+}
+
#ifdef CONFIG_COMPAT
int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
{
@@ -484,13 +525,14 @@ int xt_compat_match_offset(const struct xt_match *match)
}
EXPORT_SYMBOL_GPL(xt_compat_match_offset);
-int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
- unsigned int *size)
+void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
+ unsigned int *size)
{
const struct xt_match *match = m->u.kernel.match;
struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
int pad, off = xt_compat_match_offset(match);
u_int16_t msize = cm->u.user.match_size;
+ char name[sizeof(m->u.user.name)];
m = *dstptr;
memcpy(m, cm, sizeof(*cm));
@@ -504,10 +546,12 @@ int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
msize += off;
m->u.user.match_size = msize;
+ strlcpy(name, match->name, sizeof(name));
+ module_put(match->me);
+ strncpy(m->u.user.name, name, sizeof(m->u.user.name));
*size += off;
*dstptr += msize;
- return 0;
}
EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
@@ -538,8 +582,125 @@ int xt_compat_match_to_user(const struct xt_entry_match *m,
return 0;
}
EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
+
+/* non-compat version may have padding after verdict */
+struct compat_xt_standard_target {
+ struct compat_xt_entry_target t;
+ compat_uint_t verdict;
+};
+
+int xt_compat_check_entry_offsets(const void *base, const char *elems,
+ unsigned int target_offset,
+ unsigned int next_offset)
+{
+ long size_of_base_struct = elems - (const char *)base;
+ const struct compat_xt_entry_target *t;
+ const char *e = base;
+
+ if (target_offset < size_of_base_struct)
+ return -EINVAL;
+
+ if (target_offset + sizeof(*t) > next_offset)
+ return -EINVAL;
+
+ t = (void *)(e + target_offset);
+ if (t->u.target_size < sizeof(*t))
+ return -EINVAL;
+
+ if (target_offset + t->u.target_size > next_offset)
+ return -EINVAL;
+
+ if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 &&
+ COMPAT_XT_ALIGN(target_offset + sizeof(struct compat_xt_standard_target)) != next_offset)
+ return -EINVAL;
+
+ /* compat_xt_entry match has less strict aligment requirements,
+ * otherwise they are identical. In case of padding differences
+ * we need to add compat version of xt_check_entry_match.
+ */
+ BUILD_BUG_ON(sizeof(struct compat_xt_entry_match) != sizeof(struct xt_entry_match));
+
+ return xt_check_entry_match(elems, base + target_offset,
+ __alignof__(struct compat_xt_entry_match));
+}
+EXPORT_SYMBOL(xt_compat_check_entry_offsets);
#endif /* CONFIG_COMPAT */
+/**
+ * xt_check_entry_offsets - validate arp/ip/ip6t_entry
+ *
+ * @base: pointer to arp/ip/ip6t_entry
+ * @elems: pointer to first xt_entry_match, i.e. ip(6)t_entry->elems
+ * @target_offset: the arp/ip/ip6_t->target_offset
+ * @next_offset: the arp/ip/ip6_t->next_offset
+ *
+ * validates that target_offset and next_offset are sane and that all
+ * match sizes (if any) align with the target offset.
+ *
+ * This function does not validate the targets or matches themselves, it
+ * only tests that all the offsets and sizes are correct, that all
+ * match structures are aligned, and that the last structure ends where
+ * the target structure begins.
+ *
+ * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version.
+ *
+ * The arp/ip/ip6t_entry structure @base must have passed following tests:
+ * - it must point to a valid memory location
+ * - base to base + next_offset must be accessible, i.e. not exceed allocated
+ * length.
+ *
+ * A well-formed entry looks like this:
+ *
+ * ip(6)t_entry match [mtdata] match [mtdata] target [tgdata] ip(6)t_entry
+ * e->elems[]-----' | |
+ * matchsize | |
+ * matchsize | |
+ * | |
+ * target_offset---------------------------------' |
+ * next_offset---------------------------------------------------'
+ *
+ * elems[]: flexible array member at end of ip(6)/arpt_entry struct.
+ * This is where matches (if any) and the target reside.
+ * target_offset: beginning of target.
+ * next_offset: start of the next rule; also: size of this rule.
+ * Since targets have a minimum size, target_offset + minlen <= next_offset.
+ *
+ * Every match stores its size, sum of sizes must not exceed target_offset.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int xt_check_entry_offsets(const void *base,
+ const char *elems,
+ unsigned int target_offset,
+ unsigned int next_offset)
+{
+ long size_of_base_struct = elems - (const char *)base;
+ const struct xt_entry_target *t;
+ const char *e = base;
+
+ /* target start is within the ip/ip6/arpt_entry struct */
+ if (target_offset < size_of_base_struct)
+ return -EINVAL;
+
+ if (target_offset + sizeof(*t) > next_offset)
+ return -EINVAL;
+
+ t = (void *)(e + target_offset);
+ if (t->u.target_size < sizeof(*t))
+ return -EINVAL;
+
+ if (target_offset + t->u.target_size > next_offset)
+ return -EINVAL;
+
+ if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 &&
+ XT_ALIGN(target_offset + sizeof(struct xt_standard_target)) != next_offset)
+ return -EINVAL;
+
+ return xt_check_entry_match(elems, base + target_offset,
+ __alignof__(struct xt_entry_match));
+}
+EXPORT_SYMBOL(xt_check_entry_offsets);
+
int xt_check_target(struct xt_tgchk_param *par,
unsigned int size, u_int8_t proto, bool inv_proto)
{
@@ -590,6 +751,80 @@ int xt_check_target(struct xt_tgchk_param *par,
}
EXPORT_SYMBOL_GPL(xt_check_target);
+/**
+ * xt_copy_counters_from_user - copy counters and metadata from userspace
+ *
+ * @user: src pointer to userspace memory
+ * @len: alleged size of userspace memory
+ * @info: where to store the xt_counters_info metadata
+ * @compat: true if we setsockopt call is done by 32bit task on 64bit kernel
+ *
+ * Copies counter meta data from @user and stores it in @info.
+ *
+ * vmallocs memory to hold the counters, then copies the counter data
+ * from @user to the new memory and returns a pointer to it.
+ *
+ * If @compat is true, @info gets converted automatically to the 64bit
+ * representation.
+ *
+ * The metadata associated with the counters is stored in @info.
+ *
+ * Return: returns pointer that caller has to test via IS_ERR().
+ * If IS_ERR is false, caller has to vfree the pointer.
+ */
+void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
+ struct xt_counters_info *info, bool compat)
+{
+ void *mem;
+ u64 size;
+
+#ifdef CONFIG_COMPAT
+ if (compat) {
+ /* structures only differ in size due to alignment */
+ struct compat_xt_counters_info compat_tmp;
+
+ if (len <= sizeof(compat_tmp))
+ return ERR_PTR(-EINVAL);
+
+ len -= sizeof(compat_tmp);
+ if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0)
+ return ERR_PTR(-EFAULT);
+
+ strlcpy(info->name, compat_tmp.name, sizeof(info->name));
+ info->num_counters = compat_tmp.num_counters;
+ user += sizeof(compat_tmp);
+ } else
+#endif
+ {
+ if (len <= sizeof(*info))
+ return ERR_PTR(-EINVAL);
+
+ len -= sizeof(*info);
+ if (copy_from_user(info, user, sizeof(*info)) != 0)
+ return ERR_PTR(-EFAULT);
+
+ info->name[sizeof(info->name) - 1] = '\0';
+ user += sizeof(*info);
+ }
+
+ size = sizeof(struct xt_counters);
+ size *= info->num_counters;
+
+ if (size != (u64)len)
+ return ERR_PTR(-EINVAL);
+
+ mem = vmalloc(len);
+ if (!mem)
+ return ERR_PTR(-ENOMEM);
+
+ if (copy_from_user(mem, user, len) == 0)
+ return mem;
+
+ vfree(mem);
+ return ERR_PTR(-EFAULT);
+}
+EXPORT_SYMBOL_GPL(xt_copy_counters_from_user);
+
#ifdef CONFIG_COMPAT
int xt_compat_target_offset(const struct xt_target *target)
{
@@ -605,6 +840,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
int pad, off = xt_compat_target_offset(target);
u_int16_t tsize = ct->u.user.target_size;
+ char name[sizeof(t->u.user.name)];
t = *dstptr;
memcpy(t, ct, sizeof(*ct));
@@ -618,6 +854,9 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
tsize += off;
t->u.user.target_size = tsize;
+ strlcpy(name, target->name, sizeof(name));
+ module_put(target->me);
+ strncpy(t->u.user.name, name, sizeof(t->u.user.name));
*size += off;
*dstptr += tsize;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 59651af8cc27..7a5fa0c98377 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1305,7 +1305,7 @@ static int netlink_release(struct socket *sock)
skb_queue_purge(&sk->sk_write_queue);
- if (nlk->portid) {
+ if (nlk->portid && nlk->bound) {
struct netlink_notify n = {
.net = sock_net(sk),
.protocol = sk->sk_protocol,
@@ -2784,6 +2784,7 @@ static int netlink_dump(struct sock *sk)
struct netlink_callback *cb;
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
+ struct module *module;
int len, err = -ENOBUFS;
int alloc_min_size;
int alloc_size;
@@ -2863,9 +2864,11 @@ static int netlink_dump(struct sock *sk)
cb->done(cb);
nlk->cb_running = false;
+ module = cb->module;
+ skb = cb->skb;
mutex_unlock(nlk->cb_mutex);
- module_put(cb->module);
- consume_skb(cb->skb);
+ module_put(module);
+ consume_skb(skb);
return 0;
errout_skb:
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index c88d0f2d3e01..7cb8184ac165 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -158,9 +158,7 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
new_mpls_lse = (__be32 *)skb_mpls_header(skb);
*new_mpls_lse = mpls->mpls_lse;
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
- MPLS_HLEN, 0));
+ skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
hdr = eth_hdr(skb);
hdr->h_proto = mpls->mpls_ethertype;
@@ -280,7 +278,7 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
mask->eth_dst);
- ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
+ skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
@@ -463,7 +461,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
- set_ipv6_addr(skb, key->ipv6_proto, saddr, masked,
+ set_ipv6_addr(skb, flow_key->ip.proto, saddr, masked,
true);
memcpy(&flow_key->ipv6.addr.src, masked,
sizeof(flow_key->ipv6.addr.src));
@@ -485,7 +483,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
NULL, &flags)
!= NEXTHDR_ROUTING);
- set_ipv6_addr(skb, key->ipv6_proto, daddr, masked,
+ set_ipv6_addr(skb, flow_key->ip.proto, daddr, masked,
recalc_csum);
memcpy(&flow_key->ipv6.addr.dst, masked,
sizeof(flow_key->ipv6.addr.dst));
@@ -639,7 +637,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk
/* Reconstruct the MAC header. */
skb_push(skb, data->l2_len);
memcpy(skb->data, &data->l2_data, data->l2_len);
- ovs_skb_postpush_rcsum(skb, skb->data, data->l2_len);
+ skb_postpush_rcsum(skb, skb->data, data->l2_len);
skb_reset_mac_header(skb);
ovs_vport_send(vport, skb);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 91a8b004dc51..deadfdab1bc3 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -336,12 +336,10 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
unsigned short gso_type = skb_shinfo(skb)->gso_type;
struct sw_flow_key later_key;
struct sk_buff *segs, *nskb;
- struct ovs_skb_cb ovs_cb;
int err;
- ovs_cb = *OVS_CB(skb);
+ BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET);
segs = __skb_gso_segment(skb, NETIF_F_SG, false);
- *OVS_CB(skb) = ovs_cb;
if (IS_ERR(segs))
return PTR_ERR(segs);
if (segs == NULL)
@@ -359,7 +357,6 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
/* Queue all of the segments. */
skb = segs;
do {
- *OVS_CB(skb) = ovs_cb;
if (gso_type & SKB_GSO_UDP && skb != segs)
key = &later_key;
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 6b0190b987ec..76fcaf1fd2a9 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -58,7 +58,7 @@ static void netdev_port_receive(struct sk_buff *skb)
return;
skb_push(skb, ETH_HLEN);
- ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
+ skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
ovs_vport_receive(vport, skb, skb_tunnel_info(skb));
return;
error:
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 1605691d9414..5eb7694348b5 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -90,7 +90,9 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
int err;
struct vxlan_config conf = {
.no_share = true,
- .flags = VXLAN_F_COLLECT_METADATA,
+ .flags = VXLAN_F_COLLECT_METADATA | VXLAN_F_UDP_ZERO_CSUM6_RX,
+ /* Don't restrict the packets that can be sent by MTU */
+ .mtu = IP_MAX_MTU,
};
if (!options) {
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 8ea3a96980ac..6e2b62f9d595 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -184,13 +184,6 @@ static inline struct vport *vport_from_priv(void *priv)
int ovs_vport_receive(struct vport *, struct sk_buff *,
const struct ip_tunnel_info *);
-static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
- const void *start, unsigned int len)
-{
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
-}
-
static inline const char *ovs_vport_name(struct vport *vport)
{
return vport->dev->name;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 992396aa635c..a86f26d05bc2 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1341,7 +1341,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f,
struct sk_buff *skb,
unsigned int num)
{
- return reciprocal_scale(skb_get_hash(skb), num);
+ return reciprocal_scale(__skb_get_hash_symmetric(skb), num);
}
static unsigned int fanout_demux_lb(struct packet_fanout *f,
@@ -1916,6 +1916,10 @@ retry:
goto retry;
}
+ if (!dev_validate_header(dev, skb->data, len)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
!packet_extra_vlan_len_allowed(dev, skb)) {
err = -EMSGSIZE;
@@ -2326,18 +2330,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
sock_wfree(skb);
}
-static bool ll_header_truncated(const struct net_device *dev, int len)
-{
- /* net device doesn't like empty head */
- if (unlikely(len < dev->hard_header_len)) {
- net_warn_ratelimited("%s: packet size is too short (%d < %d)\n",
- current->comm, len, dev->hard_header_len);
- return true;
- }
-
- return false;
-}
-
static void tpacket_set_protocol(const struct net_device *dev,
struct sk_buff *skb)
{
@@ -2420,19 +2412,19 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
if (unlikely(err < 0))
return -EINVAL;
} else if (dev->hard_header_len) {
- if (ll_header_truncated(dev, tp_len))
- return -EINVAL;
+ int hdrlen = min_t(int, dev->hard_header_len, tp_len);
skb_push(skb, dev->hard_header_len);
- err = skb_store_bits(skb, 0, data,
- dev->hard_header_len);
+ err = skb_store_bits(skb, 0, data, hdrlen);
if (unlikely(err))
return err;
+ if (!dev_validate_header(dev, skb->data, hdrlen))
+ return -EINVAL;
if (!skb->protocol)
tpacket_set_protocol(dev, skb);
- data += dev->hard_header_len;
- to_write -= dev->hard_header_len;
+ data += hdrlen;
+ to_write -= hdrlen;
}
offset = offset_in_page(data);
@@ -2763,9 +2755,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
if (unlikely(offset < 0))
goto out_free;
- } else {
- if (ll_header_truncated(dev, len))
- goto out_free;
}
/* Returns -EFAULT on error */
@@ -2773,6 +2762,12 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (err)
goto out_free;
+ if (sock->type == SOCK_RAW &&
+ !dev_validate_header(dev, skb->data, len)) {
+ err = -EINVAL;
+ goto out_free;
+ }
+
sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
if (!gso_type && (len > dev->mtu + reserve + extra_len) &&
@@ -3441,6 +3436,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
i->ifindex = mreq->mr_ifindex;
i->alen = mreq->mr_alen;
memcpy(i->addr, mreq->mr_address, i->alen);
+ memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen);
i->count = 1;
i->next = po->mclist;
po->mclist = i;
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 10d42f3220ab..f925753668a7 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -377,6 +377,10 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
struct sockaddr_pn sa;
u16 len;
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (!skb)
+ return NET_RX_DROP;
+
/* check we have at least a full Phonet header */
if (!pskb_pull(skb, sizeof(struct phonethdr)))
goto out;
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 9f843bbe8c10..d778d99326df 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1097,17 +1097,6 @@ static unsigned int rfkill_fop_poll(struct file *file, poll_table *wait)
return res;
}
-static bool rfkill_readable(struct rfkill_data *data)
-{
- bool r;
-
- mutex_lock(&data->mtx);
- r = !list_empty(&data->events);
- mutex_unlock(&data->mtx);
-
- return r;
-}
-
static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
size_t count, loff_t *pos)
{
@@ -1124,8 +1113,11 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
goto out;
}
mutex_unlock(&data->mtx);
+ /* since we re-check and it just compares pointers,
+ * using !list_empty() without locking isn't a problem
+ */
ret = wait_event_interruptible(data->read_wait,
- rfkill_readable(data));
+ !list_empty(&data->events));
mutex_lock(&data->mtx);
if (ret)
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index b07c535ba8e7..eeb3eb3ea9eb 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -105,9 +105,7 @@ static void *tcf_csum_skb_nextlayer(struct sk_buff *skb,
int hl = ihl + jhl;
if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) ||
- (skb_cloned(skb) &&
- !skb_clone_writable(skb, hl + ntkoff) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+ skb_try_make_writable(skb, hl + ntkoff))
return NULL;
else
return (void *)(skb_network_header(skb) + ihl);
@@ -365,9 +363,7 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
}
if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) {
- if (skb_cloned(skb) &&
- !skb_clone_writable(skb, sizeof(*iph) + ntkoff) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ if (skb_try_make_writable(skb, sizeof(*iph) + ntkoff))
goto fail;
ip_send_check(ip_hdr(skb));
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 32fcdecdb9e2..e384d6aefa3a 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -170,7 +170,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
if (!(at & AT_EGRESS)) {
if (m->tcfm_ok_push)
- skb_push(skb2, skb->mac_len);
+ skb_push_rcsum(skb2, skb->mac_len);
}
/* mirror is always swallowed */
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index b7c4ead8b5a8..27607b863aba 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -126,9 +126,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
addr = iph->daddr;
if (!((old_addr ^ addr) & mask)) {
- if (skb_cloned(skb) &&
- !skb_clone_writable(skb, sizeof(*iph) + noff) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ if (skb_try_make_writable(skb, sizeof(*iph) + noff))
goto drop;
new_addr &= mask;
@@ -156,9 +154,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
struct tcphdr *tcph;
if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) ||
- (skb_cloned(skb) &&
- !skb_clone_writable(skb, ihl + sizeof(*tcph) + noff) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+ skb_try_make_writable(skb, ihl + sizeof(*tcph) + noff))
goto drop;
tcph = (void *)(skb_network_header(skb) + ihl);
@@ -171,9 +167,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
struct udphdr *udph;
if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) ||
- (skb_cloned(skb) &&
- !skb_clone_writable(skb, ihl + sizeof(*udph) + noff) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+ skb_try_make_writable(skb, ihl + sizeof(*udph) + noff))
goto drop;
udph = (void *)(skb_network_header(skb) + ihl);
@@ -213,10 +207,8 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
if ((old_addr ^ addr) & mask)
break;
- if (skb_cloned(skb) &&
- !skb_clone_writable(skb, ihl + sizeof(*icmph) +
- sizeof(*iph) + noff) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ if (skb_try_make_writable(skb, ihl + sizeof(*icmph) +
+ sizeof(*iph) + noff))
goto drop;
icmph = (void *)(skb_network_header(skb) + ihl);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 57692947ebbe..95b021243233 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -252,23 +252,28 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
sizeof(key->eth.src));
+
fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
&mask->basic.n_proto, TCA_FLOWER_UNSPEC,
sizeof(key->basic.n_proto));
+
if (key->basic.n_proto == htons(ETH_P_IP) ||
key->basic.n_proto == htons(ETH_P_IPV6)) {
fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
&mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
sizeof(key->basic.ip_proto));
}
- if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+
+ if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) {
+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
&mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
sizeof(key->ipv4.src));
fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
&mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
sizeof(key->ipv4.dst));
- } else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ } else if (tb[TCA_FLOWER_KEY_IPV6_SRC] || tb[TCA_FLOWER_KEY_IPV6_DST]) {
+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
&mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
sizeof(key->ipv6.src));
@@ -276,6 +281,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
&mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
sizeof(key->ipv6.dst));
}
+
if (key->basic.ip_proto == IPPROTO_TCP) {
fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
&mask->tp.src, TCA_FLOWER_UNSPEC,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b5c2cf2aa6d4..95b560f0b253 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -744,14 +744,15 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
return 0;
}
-void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
+void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
+ unsigned int len)
{
const struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
int drops;
- if (n == 0)
+ if (n == 0 && len == 0)
return;
drops = max_t(int, n, 0);
rcu_read_lock();
@@ -774,11 +775,12 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
cops->put(sch, cl);
}
sch->q.qlen -= n;
+ sch->qstats.backlog -= len;
__qdisc_qstats_drop(sch, drops);
}
rcu_read_unlock();
}
-EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
+EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
static void notify_and_destroy(struct net *net, struct sk_buff *skb,
struct nlmsghdr *n, u32 clid,
@@ -1852,6 +1854,7 @@ reset:
}
tp = old_tp;
+ protocol = tc_skb_protocol(skb);
goto reclassify;
#endif
}
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index c538d9e4a8f6..baafddf229ce 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1624,13 +1624,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
new->reshape_fail = cbq_reshape_fail;
#endif
}
- sch_tree_lock(sch);
- *old = cl->q;
- cl->q = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &cl->q);
return 0;
}
@@ -1914,7 +1909,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl = (struct cbq_class *)arg;
- unsigned int qlen;
+ unsigned int qlen, backlog;
if (cl->filters || cl->children || cl == &q->link)
return -EBUSY;
@@ -1922,8 +1917,9 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
sch_tree_lock(sch);
qlen = cl->q->q.qlen;
+ backlog = cl->q->qstats.backlog;
qdisc_reset(cl->q);
- qdisc_tree_decrease_qlen(cl->q, qlen);
+ qdisc_tree_reduce_backlog(cl->q, qlen, backlog);
if (cl->next_alive)
cbq_deactivate_class(cl);
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 5ffb8b8337c7..0a08c860eee4 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -128,8 +128,8 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
choke_zap_tail_holes(q);
qdisc_qstats_backlog_dec(sch, skb);
+ qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
qdisc_drop(skb, sch);
- qdisc_tree_decrease_qlen(sch, 1);
--sch->q.qlen;
}
@@ -456,6 +456,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
old = q->tab;
if (old) {
unsigned int oqlen = sch->q.qlen, tail = 0;
+ unsigned dropped = 0;
while (q->head != q->tail) {
struct sk_buff *skb = q->tab[q->head];
@@ -467,11 +468,12 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
ntab[tail++] = skb;
continue;
}
+ dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
--sch->q.qlen;
qdisc_drop(skb, sch);
}
- qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
+ qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
q->head = 0;
q->tail = tail;
}
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 535007d5f0b5..9b7e2980ee5c 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -79,12 +79,13 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats, dequeue);
- /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
+ /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
* or HTB crashes. Defer it for next round.
*/
if (q->stats.drop_count && sch->q.qlen) {
- qdisc_tree_decrease_qlen(sch, q->stats.drop_count);
+ qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len);
q->stats.drop_count = 0;
+ q->stats.drop_len = 0;
}
if (skb)
qdisc_bstats_update(sch, skb);
@@ -116,7 +117,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
{
struct codel_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_CODEL_MAX + 1];
- unsigned int qlen;
+ unsigned int qlen, dropped = 0;
int err;
if (!opt)
@@ -156,10 +157,11 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = __skb_dequeue(&sch->q);
+ dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
qdisc_drop(skb, sch);
}
- qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+ qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index f26bdea875c1..d6e3ad43cecb 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -53,9 +53,10 @@ static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid)
static void drr_purge_queue(struct drr_class *cl)
{
unsigned int len = cl->qdisc->q.qlen;
+ unsigned int backlog = cl->qdisc->qstats.backlog;
qdisc_reset(cl->qdisc);
- qdisc_tree_decrease_qlen(cl->qdisc, len);
+ qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
}
static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
@@ -226,11 +227,7 @@ static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
new = &noop_qdisc;
}
- sch_tree_lock(sch);
- drr_purge_queue(cl);
- *old = cl->qdisc;
- cl->qdisc = new;
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &cl->qdisc);
return 0;
}
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index f357f34d02d2..d0dff0cd8186 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -73,13 +73,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
new = &noop_qdisc;
}
- sch_tree_lock(sch);
- *old = p->q;
- p->q = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
-
+ *old = qdisc_replace(sch, new, &p->q);
return 0;
}
@@ -264,6 +258,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return err;
}
+ qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
return NET_XMIT_SUCCESS;
@@ -286,6 +281,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
return NULL;
qdisc_bstats_update(sch, skb);
+ qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
index = skb->tc_index & (p->indices - 1);
@@ -401,6 +397,7 @@ static void dsmark_reset(struct Qdisc *sch)
pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
qdisc_reset(p->q);
+ sch->qstats.backlog = 0;
sch->q.qlen = 0;
}
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 2177eac0a61e..2e4bd2c0a50c 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -37,14 +37,18 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
+ unsigned int prev_backlog;
+
if (likely(skb_queue_len(&sch->q) < sch->limit))
return qdisc_enqueue_tail(skb, sch);
+ prev_backlog = sch->qstats.backlog;
/* queue full, remove one skb to fulfill the limit */
__qdisc_queue_drop_head(sch, &sch->q);
qdisc_qstats_drop(sch);
qdisc_enqueue_tail(skb, sch);
+ qdisc_tree_reduce_backlog(sch, 0, prev_backlog - sch->qstats.backlog);
return NET_XMIT_CN;
}
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 109b2322778f..3c6a47d66a04 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -662,6 +662,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
struct fq_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_FQ_MAX + 1];
int err, drop_count = 0;
+ unsigned drop_len = 0;
u32 fq_log;
if (!opt)
@@ -736,10 +737,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
if (!skb)
break;
+ drop_len += qdisc_pkt_len(skb);
kfree_skb(skb);
drop_count++;
}
- qdisc_tree_decrease_qlen(sch, drop_count);
+ qdisc_tree_reduce_backlog(sch, drop_count, drop_len);
sch_tree_unlock(sch);
return err;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 4c834e93dafb..d3fc8f9dd3d4 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -175,7 +175,7 @@ static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch)
static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
- unsigned int idx;
+ unsigned int idx, prev_backlog;
struct fq_codel_flow *flow;
int uninitialized_var(ret);
@@ -203,6 +203,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (++sch->q.qlen <= sch->limit)
return NET_XMIT_SUCCESS;
+ prev_backlog = sch->qstats.backlog;
q->drop_overlimit++;
/* Return Congestion Notification only if we dropped a packet
* from this flow.
@@ -211,7 +212,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_CN;
/* As we dropped a packet, better let upper stack know this */
- qdisc_tree_decrease_qlen(sch, 1);
+ qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
return NET_XMIT_SUCCESS;
}
@@ -241,6 +242,7 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch)
struct fq_codel_flow *flow;
struct list_head *head;
u32 prev_drop_count, prev_ecn_mark;
+ unsigned int prev_backlog;
begin:
head = &q->new_flows;
@@ -259,6 +261,7 @@ begin:
prev_drop_count = q->cstats.drop_count;
prev_ecn_mark = q->cstats.ecn_mark;
+ prev_backlog = sch->qstats.backlog;
skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats,
dequeue);
@@ -276,12 +279,14 @@ begin:
}
qdisc_bstats_update(sch, skb);
flow->deficit -= qdisc_pkt_len(skb);
- /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
+ /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
* or HTB crashes. Defer it for next round.
*/
if (q->cstats.drop_count && sch->q.qlen) {
- qdisc_tree_decrease_qlen(sch, q->cstats.drop_count);
+ qdisc_tree_reduce_backlog(sch, q->cstats.drop_count,
+ q->cstats.drop_len);
q->cstats.drop_count = 0;
+ q->cstats.drop_len = 0;
}
return skb;
}
@@ -372,11 +377,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = fq_codel_dequeue(sch);
+ q->cstats.drop_len += qdisc_pkt_len(skb);
kfree_skb(skb);
q->cstats.drop_count++;
}
- qdisc_tree_decrease_qlen(sch, q->cstats.drop_count);
+ qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len);
q->cstats.drop_count = 0;
+ q->cstats.drop_len = 0;
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 16bc83b2842a..aa4725038f94 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -159,12 +159,15 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
if (validate)
skb = validate_xmit_skb_list(skb, dev);
- if (skb) {
+ if (likely(skb)) {
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (!netif_xmit_frozen_or_stopped(txq))
skb = dev_hard_start_xmit(skb, dev, txq, &ret);
HARD_TX_UNLOCK(dev, txq);
+ } else {
+ spin_lock(root_lock);
+ return qdisc_qlen(q);
}
spin_lock(root_lock);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index b7ebe2c87586..d783d7cc3348 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -895,9 +895,10 @@ static void
hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl)
{
unsigned int len = cl->qdisc->q.qlen;
+ unsigned int backlog = cl->qdisc->qstats.backlog;
qdisc_reset(cl->qdisc);
- qdisc_tree_decrease_qlen(cl->qdisc, len);
+ qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
}
static void
@@ -1215,11 +1216,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
new = &noop_qdisc;
}
- sch_tree_lock(sch);
- hfsc_purge_queue(sch, cl);
- *old = cl->qdisc;
- cl->qdisc = new;
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &cl->qdisc);
return 0;
}
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 86b04e31e60b..13d6f83ec491 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -382,6 +382,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
struct hhf_sched_data *q = qdisc_priv(sch);
enum wdrr_bucket_idx idx;
struct wdrr_bucket *bucket;
+ unsigned int prev_backlog;
idx = hhf_classify(skb, sch);
@@ -409,6 +410,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (++sch->q.qlen <= sch->limit)
return NET_XMIT_SUCCESS;
+ prev_backlog = sch->qstats.backlog;
q->drop_overlimit++;
/* Return Congestion Notification only if we dropped a packet from this
* bucket.
@@ -417,7 +419,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_CN;
/* As we dropped a packet, better let upper stack know this. */
- qdisc_tree_decrease_qlen(sch, 1);
+ qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
return NET_XMIT_SUCCESS;
}
@@ -527,7 +529,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
{
struct hhf_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_HHF_MAX + 1];
- unsigned int qlen;
+ unsigned int qlen, prev_backlog;
int err;
u64 non_hh_quantum;
u32 new_quantum = q->quantum;
@@ -577,12 +579,14 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
}
qlen = sch->q.qlen;
+ prev_backlog = sch->qstats.backlog;
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = hhf_dequeue(sch);
kfree_skb(skb);
}
- qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+ qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen,
+ prev_backlog - sch->qstats.backlog);
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 15ccd7f8fb2a..87b02ed3d5f2 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -600,6 +600,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
htb_activate(q, cl);
}
+ qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
return NET_XMIT_SUCCESS;
}
@@ -889,6 +890,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
ok:
qdisc_bstats_update(sch, skb);
qdisc_unthrottled(sch);
+ qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
return skb;
}
@@ -955,6 +957,7 @@ static unsigned int htb_drop(struct Qdisc *sch)
unsigned int len;
if (cl->un.leaf.q->ops->drop &&
(len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
+ sch->qstats.backlog -= len;
sch->q.qlen--;
if (!cl->un.leaf.q->q.qlen)
htb_deactivate(q, cl);
@@ -984,12 +987,12 @@ static void htb_reset(struct Qdisc *sch)
}
cl->prio_activity = 0;
cl->cmode = HTB_CAN_SEND;
-
}
}
qdisc_watchdog_cancel(&q->watchdog);
__skb_queue_purge(&q->direct_queue);
sch->q.qlen = 0;
+ sch->qstats.backlog = 0;
memset(q->hlevel, 0, sizeof(q->hlevel));
memset(q->row_mask, 0, sizeof(q->row_mask));
for (i = 0; i < TC_HTB_NUMPRIO; i++)
@@ -1163,14 +1166,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
cl->common.classid)) == NULL)
return -ENOBUFS;
- sch_tree_lock(sch);
- *old = cl->un.leaf.q;
- cl->un.leaf.q = new;
- if (*old != NULL) {
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- }
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &cl->un.leaf.q);
return 0;
}
@@ -1272,7 +1268,6 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
{
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class *)arg;
- unsigned int qlen;
struct Qdisc *new_q = NULL;
int last_child = 0;
@@ -1292,9 +1287,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
sch_tree_lock(sch);
if (!cl->level) {
- qlen = cl->un.leaf.q->q.qlen;
+ unsigned int qlen = cl->un.leaf.q->q.qlen;
+ unsigned int backlog = cl->un.leaf.q->qstats.backlog;
+
qdisc_reset(cl->un.leaf.q);
- qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
+ qdisc_tree_reduce_backlog(cl->un.leaf.q, qlen, backlog);
}
/* delete from hash and active; remainder in destroy_class */
@@ -1428,10 +1425,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
sch_tree_lock(sch);
if (parent && !parent->level) {
unsigned int qlen = parent->un.leaf.q->q.qlen;
+ unsigned int backlog = parent->un.leaf.q->qstats.backlog;
/* turn parent into inner node */
qdisc_reset(parent->un.leaf.q);
- qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
+ qdisc_tree_reduce_backlog(parent->un.leaf.q, qlen, backlog);
qdisc_destroy(parent->un.leaf.q);
if (parent->prio_activity)
htb_deactivate(q, parent);
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 4e904ca0af9d..bcdd54bb101c 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -218,7 +218,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
if (q->queues[i] != &noop_qdisc) {
struct Qdisc *child = q->queues[i];
q->queues[i] = &noop_qdisc;
- qdisc_tree_decrease_qlen(child, child->q.qlen);
+ qdisc_tree_reduce_backlog(child, child->q.qlen,
+ child->qstats.backlog);
qdisc_destroy(child);
}
}
@@ -238,8 +239,9 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
q->queues[i] = child;
if (old != &noop_qdisc) {
- qdisc_tree_decrease_qlen(old,
- old->q.qlen);
+ qdisc_tree_reduce_backlog(old,
+ old->q.qlen,
+ old->qstats.backlog);
qdisc_destroy(old);
}
sch_tree_unlock(sch);
@@ -303,13 +305,7 @@ static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (new == NULL)
new = &noop_qdisc;
- sch_tree_lock(sch);
- *old = q->queues[band];
- q->queues[band] = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
-
+ *old = qdisc_replace(sch, new, &q->queues[band]);
return 0;
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 5abd1d9de989..b7c29d5b6f04 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -395,6 +395,25 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
sch->q.qlen++;
}
+/* netem can't properly corrupt a megapacket (like we get from GSO), so instead
+ * when we statistically choose to corrupt one, we instead segment it, returning
+ * the first packet to be corrupted, and re-enqueue the remaining frames
+ */
+static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct sk_buff *segs;
+ netdev_features_t features = netif_skb_features(skb);
+
+ segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+
+ if (IS_ERR_OR_NULL(segs)) {
+ qdisc_reshape_fail(skb, sch);
+ return NULL;
+ }
+ consume_skb(skb);
+ return segs;
+}
+
/*
* Insert one skb into qdisc.
* Note: parent depends on return value to account for queue length.
@@ -407,7 +426,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
/* We don't fill cb now as skb_unshare() may invalidate it */
struct netem_skb_cb *cb;
struct sk_buff *skb2;
+ struct sk_buff *segs = NULL;
+ unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb);
+ int nb = 0;
int count = 1;
+ int rc = NET_XMIT_SUCCESS;
/* Random duplication */
if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
@@ -453,10 +476,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
* do it now in software before we mangle it.
*/
if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
+ if (skb_is_gso(skb)) {
+ segs = netem_segment(skb, sch);
+ if (!segs)
+ return NET_XMIT_DROP;
+ } else {
+ segs = skb;
+ }
+
+ skb = segs;
+ segs = segs->next;
+
if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
(skb->ip_summed == CHECKSUM_PARTIAL &&
- skb_checksum_help(skb)))
- return qdisc_drop(skb, sch);
+ skb_checksum_help(skb))) {
+ rc = qdisc_drop(skb, sch);
+ goto finish_segs;
+ }
skb->data[prandom_u32() % skb_headlen(skb)] ^=
1<<(prandom_u32() % 8);
@@ -516,6 +552,27 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
sch->qstats.requeues++;
}
+finish_segs:
+ if (segs) {
+ while (segs) {
+ skb2 = segs->next;
+ segs->next = NULL;
+ qdisc_skb_cb(segs)->pkt_len = segs->len;
+ last_len = segs->len;
+ rc = qdisc_enqueue(segs, sch);
+ if (rc != NET_XMIT_SUCCESS) {
+ if (net_xmit_drop_count(rc))
+ qdisc_qstats_drop(sch);
+ } else {
+ nb++;
+ len += last_len;
+ }
+ segs = skb2;
+ }
+ sch->q.qlen += nb;
+ if (nb > 1)
+ qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
+ }
return NET_XMIT_SUCCESS;
}
@@ -593,13 +650,14 @@ deliver:
#endif
if (q->qdisc) {
+ unsigned int pkt_len = qdisc_pkt_len(skb);
int err = qdisc_enqueue(skb, q->qdisc);
- if (unlikely(err != NET_XMIT_SUCCESS)) {
- if (net_xmit_drop_count(err)) {
- qdisc_qstats_drop(sch);
- qdisc_tree_decrease_qlen(sch, 1);
- }
+ if (err != NET_XMIT_SUCCESS &&
+ net_xmit_drop_count(err)) {
+ qdisc_qstats_drop(sch);
+ qdisc_tree_reduce_backlog(sch, 1,
+ pkt_len);
}
goto tfifo_dequeue;
}
@@ -1037,15 +1095,7 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
{
struct netem_sched_data *q = qdisc_priv(sch);
- sch_tree_lock(sch);
- *old = q->qdisc;
- q->qdisc = new;
- if (*old) {
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- }
- sch_tree_unlock(sch);
-
+ *old = qdisc_replace(sch, new, &q->qdisc);
return 0;
}
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index b783a446d884..71ae3b9629f9 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -183,7 +183,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
{
struct pie_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_PIE_MAX + 1];
- unsigned int qlen;
+ unsigned int qlen, dropped = 0;
int err;
if (!opt)
@@ -232,10 +232,11 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = __skb_dequeue(&sch->q);
+ dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
qdisc_drop(skb, sch);
}
- qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+ qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index ba6487f2741f..fee1b15506b2 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -191,7 +191,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
struct Qdisc *child = q->queues[i];
q->queues[i] = &noop_qdisc;
if (child != &noop_qdisc) {
- qdisc_tree_decrease_qlen(child, child->q.qlen);
+ qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog);
qdisc_destroy(child);
}
}
@@ -210,8 +210,9 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
q->queues[i] = child;
if (old != &noop_qdisc) {
- qdisc_tree_decrease_qlen(old,
- old->q.qlen);
+ qdisc_tree_reduce_backlog(old,
+ old->q.qlen,
+ old->qstats.backlog);
qdisc_destroy(old);
}
sch_tree_unlock(sch);
@@ -268,13 +269,7 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (new == NULL)
new = &noop_qdisc;
- sch_tree_lock(sch);
- *old = q->queues[band];
- q->queues[band] = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
-
+ *old = qdisc_replace(sch, new, &q->queues[band]);
return 0;
}
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 3dc3a6e56052..8d2d8d953432 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -220,9 +220,10 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
static void qfq_purge_queue(struct qfq_class *cl)
{
unsigned int len = cl->qdisc->q.qlen;
+ unsigned int backlog = cl->qdisc->qstats.backlog;
qdisc_reset(cl->qdisc);
- qdisc_tree_decrease_qlen(cl->qdisc, len);
+ qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
}
static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
@@ -617,11 +618,7 @@ static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
new = &noop_qdisc;
}
- sch_tree_lock(sch);
- qfq_purge_queue(cl);
- *old = cl->qdisc;
- cl->qdisc = new;
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &cl->qdisc);
return 0;
}
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 6c0534cc7758..8c0508c0e287 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -210,7 +210,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
q->flags = ctl->flags;
q->limit = ctl->limit;
if (child) {
- qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
+ qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
+ q->qdisc->qstats.backlog);
qdisc_destroy(q->qdisc);
q->qdisc = child;
}
@@ -313,12 +314,7 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (new == NULL)
new = &noop_qdisc;
- sch_tree_lock(sch);
- *old = q->qdisc;
- q->qdisc = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &q->qdisc);
return 0;
}
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 5bbb6332ec57..c69611640fa5 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -510,7 +510,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
sch_tree_lock(sch);
- qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
+ qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
+ q->qdisc->qstats.backlog);
qdisc_destroy(q->qdisc);
q->qdisc = child;
@@ -606,12 +607,7 @@ static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (new == NULL)
new = &noop_qdisc;
- sch_tree_lock(sch);
- *old = q->qdisc;
- q->qdisc = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &q->qdisc);
return 0;
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 3abab534eb5c..498f0a2cb47f 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -346,7 +346,7 @@ static int
sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct sfq_sched_data *q = qdisc_priv(sch);
- unsigned int hash;
+ unsigned int hash, dropped;
sfq_index x, qlen;
struct sfq_slot *slot;
int uninitialized_var(ret);
@@ -461,7 +461,7 @@ enqueue:
return NET_XMIT_SUCCESS;
qlen = slot->qlen;
- sfq_drop(sch);
+ dropped = sfq_drop(sch);
/* Return Congestion Notification only if we dropped a packet
* from this flow.
*/
@@ -469,7 +469,7 @@ enqueue:
return NET_XMIT_CN;
/* As we dropped a packet, better let upper stack know this */
- qdisc_tree_decrease_qlen(sch, 1);
+ qdisc_tree_reduce_backlog(sch, 1, dropped);
return NET_XMIT_SUCCESS;
}
@@ -537,6 +537,7 @@ static void sfq_rehash(struct Qdisc *sch)
struct sfq_slot *slot;
struct sk_buff_head list;
int dropped = 0;
+ unsigned int drop_len = 0;
__skb_queue_head_init(&list);
@@ -565,6 +566,7 @@ static void sfq_rehash(struct Qdisc *sch)
if (x >= SFQ_MAX_FLOWS) {
drop:
qdisc_qstats_backlog_dec(sch, skb);
+ drop_len += qdisc_pkt_len(skb);
kfree_skb(skb);
dropped++;
continue;
@@ -594,7 +596,7 @@ drop:
}
}
sch->q.qlen -= dropped;
- qdisc_tree_decrease_qlen(sch, dropped);
+ qdisc_tree_reduce_backlog(sch, dropped, drop_len);
}
static void sfq_perturbation(unsigned long arg)
@@ -618,7 +620,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
struct sfq_sched_data *q = qdisc_priv(sch);
struct tc_sfq_qopt *ctl = nla_data(opt);
struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
- unsigned int qlen;
+ unsigned int qlen, dropped = 0;
struct red_parms *p = NULL;
if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
@@ -667,8 +669,8 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
qlen = sch->q.qlen;
while (sch->q.qlen > q->limit)
- sfq_drop(sch);
- qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+ dropped += sfq_drop(sch);
+ qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
del_timer(&q->perturb_timer);
if (q->perturb_period) {
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index a4afde14e865..c2fbde742f37 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -160,6 +160,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
struct tbf_sched_data *q = qdisc_priv(sch);
struct sk_buff *segs, *nskb;
netdev_features_t features = netif_skb_features(skb);
+ unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
int ret, nb;
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
@@ -172,6 +173,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
nskb = segs->next;
segs->next = NULL;
qdisc_skb_cb(segs)->pkt_len = segs->len;
+ len += segs->len;
ret = qdisc_enqueue(segs, q->qdisc);
if (ret != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret))
@@ -183,7 +185,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
}
sch->q.qlen += nb;
if (nb > 1)
- qdisc_tree_decrease_qlen(sch, 1 - nb);
+ qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
consume_skb(skb);
return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
}
@@ -399,7 +401,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
sch_tree_lock(sch);
if (child) {
- qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
+ qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
+ q->qdisc->qstats.backlog);
qdisc_destroy(q->qdisc);
q->qdisc = child;
}
@@ -502,13 +505,7 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (new == NULL)
new = &noop_qdisc;
- sch_tree_lock(sch);
- *old = q->qdisc;
- q->qdisc = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
-
+ *old = qdisc_replace(sch, new, &q->qdisc);
return 0;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index ec529121f38a..ce46f1c7f133 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -526,6 +526,8 @@ static int sctp_v6_cmp_addr(const union sctp_addr *addr1,
}
return 0;
}
+ if (addr1->v6.sin6_port != addr2->v6.sin6_port)
+ return 0;
if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr))
return 0;
/* If this is a linklocal address, compare the scope_id. */
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 3d9ea9a48289..8b4ff315695e 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -60,6 +60,8 @@
#include <net/inet_common.h>
#include <net/inet_ecn.h>
+#define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024)
+
/* Global data structures. */
struct sctp_globals sctp_globals __read_mostly;
@@ -1352,6 +1354,8 @@ static __init int sctp_init(void)
unsigned long limit;
int max_share;
int order;
+ int num_entries;
+ int max_entry_order;
sock_skb_cb_check_size(sizeof(struct sctp_ulpevent));
@@ -1404,14 +1408,24 @@ static __init int sctp_init(void)
/* Size and allocate the association hash table.
* The methodology is similar to that of the tcp hash tables.
+ * Though not identical. Start by getting a goal size
*/
if (totalram_pages >= (128 * 1024))
goal = totalram_pages >> (22 - PAGE_SHIFT);
else
goal = totalram_pages >> (24 - PAGE_SHIFT);
- for (order = 0; (1UL << order) < goal; order++)
- ;
+ /* Then compute the page order for said goal */
+ order = get_order(goal);
+
+ /* Now compute the required page order for the maximum sized table we
+ * want to create
+ */
+ max_entry_order = get_order(MAX_SCTP_PORT_HASH_ENTRIES *
+ sizeof(struct sctp_bind_hashbucket));
+
+ /* Limit the page order by that maximum hash table size */
+ order = min(order, max_entry_order);
do {
sctp_assoc_hashsize = (1UL << order) * PAGE_SIZE /
@@ -1445,20 +1459,35 @@ static __init int sctp_init(void)
INIT_HLIST_HEAD(&sctp_ep_hashtable[i].chain);
}
- /* Allocate and initialize the SCTP port hash table. */
+ /* Allocate and initialize the SCTP port hash table.
+ * Note that order is initalized to start at the max sized
+ * table we want to support. If we can't get that many pages
+ * reduce the order and try again
+ */
do {
- sctp_port_hashsize = (1UL << order) * PAGE_SIZE /
- sizeof(struct sctp_bind_hashbucket);
- if ((sctp_port_hashsize > (64 * 1024)) && order > 0)
- continue;
sctp_port_hashtable = (struct sctp_bind_hashbucket *)
__get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order);
} while (!sctp_port_hashtable && --order > 0);
+
if (!sctp_port_hashtable) {
pr_err("Failed bind hash alloc\n");
status = -ENOMEM;
goto err_bhash_alloc;
}
+
+ /* Now compute the number of entries that will fit in the
+ * port hash space we allocated
+ */
+ num_entries = (1UL << order) * PAGE_SIZE /
+ sizeof(struct sctp_bind_hashbucket);
+
+ /* And finish by rounding it down to the nearest power of two
+ * this wastes some memory of course, but its needed because
+ * the hash function operates based on the assumption that
+ * that the number of entries is a power of two
+ */
+ sctp_port_hashsize = rounddown_pow_of_two(num_entries);
+
for (i = 0; i < sctp_port_hashsize; i++) {
spin_lock_init(&sctp_port_hashtable[i].lock);
INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ef1d90fdc773..be1489fc3234 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5542,6 +5542,7 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
struct sctp_hmac_algo_param *hmacs;
__u16 data_len = 0;
u32 num_idents;
+ int i;
if (!ep->auth_enable)
return -EACCES;
@@ -5559,8 +5560,12 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
return -EFAULT;
if (put_user(num_idents, &p->shmac_num_idents))
return -EFAULT;
- if (copy_to_user(p->shmac_idents, hmacs->hmac_ids, data_len))
- return -EFAULT;
+ for (i = 0; i < num_idents; i++) {
+ __u16 hmacid = ntohs(hmacs->hmac_ids[i]);
+
+ if (copy_to_user(&p->shmac_idents[i], &hmacid, sizeof(__u16)))
+ return -EFAULT;
+ }
return 0;
}
@@ -6640,6 +6645,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
if (cmsgs->srinfo->sinfo_flags &
~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+ SCTP_SACK_IMMEDIATELY |
SCTP_ABORT | SCTP_EOF))
return -EINVAL;
break;
@@ -6663,6 +6669,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
if (cmsgs->sinfo->snd_flags &
~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+ SCTP_SACK_IMMEDIATELY |
SCTP_ABORT | SCTP_EOF))
return -EINVAL;
break;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 26d50c565f54..3e0fc5127225 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -320,7 +320,7 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write,
struct ctl_table tbl;
bool changed = false;
char *none = "none";
- char tmp[8];
+ char tmp[8] = {0};
int ret;
memset(&tbl, 0, sizeof(struct ctl_table));
diff --git a/net/socket.c b/net/socket.c
index d730ef9dfbf0..263b334ec5e4 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2238,31 +2238,31 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
break;
}
-out_put:
- fput_light(sock->file, fput_needed);
-
if (err == 0)
- return datagrams;
+ goto out_put;
- if (datagrams != 0) {
+ if (datagrams == 0) {
+ datagrams = err;
+ goto out_put;
+ }
+
+ /*
+ * We may return less entries than requested (vlen) if the
+ * sock is non block and there aren't enough datagrams...
+ */
+ if (err != -EAGAIN) {
/*
- * We may return less entries than requested (vlen) if the
- * sock is non block and there aren't enough datagrams...
+ * ... or if recvmsg returns an error after we
+ * received some datagrams, where we record the
+ * error to return on the next call or if the
+ * app asks about it using getsockopt(SO_ERROR).
*/
- if (err != -EAGAIN) {
- /*
- * ... or if recvmsg returns an error after we
- * received some datagrams, where we record the
- * error to return on the next call or if the
- * app asks about it using getsockopt(SO_ERROR).
- */
- sock->sk->sk_err = -err;
- }
-
- return datagrams;
+ sock->sk->sk_err = -err;
}
+out_put:
+ fput_light(sock->file, fput_needed);
- return err;
+ return datagrams;
}
SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 1095be9c80ab..4605dc73def6 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -857,8 +857,8 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
goto out;
if (svc_getnl(&buf->head[0]) != seq)
goto out;
- /* trim off the mic at the end before returning */
- xdr_buf_trim(buf, mic.len + 4);
+ /* trim off the mic and padding at the end before returning */
+ xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4);
stat = 0;
out:
kfree(mic.data);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 5e4f815c2b34..63fb5ee212cf 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1182,14 +1182,14 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
}
crq->q.reader = 0;
- crq->item = cache_get(h);
crq->buf = buf;
crq->len = 0;
crq->readers = 0;
spin_lock(&queue_lock);
- if (test_bit(CACHE_PENDING, &h->flags))
+ if (test_bit(CACHE_PENDING, &h->flags)) {
+ crq->item = cache_get(h);
list_add_tail(&crq->q.list, &detail->queue);
- else
+ } else
/* Lost a race, no longer PENDING, so don't enqueue */
ret = -EAGAIN;
spin_unlock(&queue_lock);
@@ -1225,7 +1225,7 @@ int qword_get(char **bpp, char *dest, int bufsize)
if (bp[0] == '\\' && bp[1] == 'x') {
/* HEX STRING */
bp += 2;
- while (len < bufsize) {
+ while (len < bufsize - 1) {
int h, l;
h = hex_to_bin(bp[0]);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 23608eb0ded2..7a93922457ff 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -442,7 +442,7 @@ out_no_rpciod:
return ERR_PTR(err);
}
-struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
+static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
struct rpc_xprt *xprt)
{
struct rpc_clnt *clnt = NULL;
@@ -474,7 +474,6 @@ struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
return clnt;
}
-EXPORT_SYMBOL_GPL(rpc_create_xprt);
/**
* rpc_create - create an RPC client and transport with one call
@@ -500,6 +499,15 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
};
char servername[48];
+ if (args->bc_xprt) {
+ WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP);
+ xprt = args->bc_xprt->xpt_bc_xprt;
+ if (xprt) {
+ xprt_get(xprt);
+ return rpc_create_xprt(args, xprt);
+ }
+ }
+
if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS)
xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS;
if (args->flags & RPC_CLNT_CREATE_NO_IDLE_TIMEOUT)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 2ffaf6a79499..027c9ef8a263 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -398,7 +398,6 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
if (unlikely(!sock))
return -ENOTSOCK;
- clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags);
if (base != 0) {
addr = NULL;
addrlen = 0;
@@ -442,7 +441,6 @@ static void xs_nospace_callback(struct rpc_task *task)
struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt);
transport->inet->sk_write_pending--;
- clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
}
/**
@@ -467,20 +465,11 @@ static int xs_nospace(struct rpc_task *task)
/* Don't race with disconnect */
if (xprt_connected(xprt)) {
- if (test_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags)) {
- /*
- * Notify TCP that we're limited by the application
- * window size
- */
- set_bit(SOCK_NOSPACE, &transport->sock->flags);
- sk->sk_write_pending++;
- /* ...and wait for more buffer space */
- xprt_wait_for_buffer_space(task, xs_nospace_callback);
- }
- } else {
- clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
+ /* wait for more buffer space */
+ sk->sk_write_pending++;
+ xprt_wait_for_buffer_space(task, xs_nospace_callback);
+ } else
ret = -ENOTCONN;
- }
spin_unlock_bh(&xprt->transport_lock);
@@ -616,9 +605,6 @@ process_status:
case -EAGAIN:
status = xs_nospace(task);
break;
- default:
- dprintk("RPC: sendmsg returned unrecognized error %d\n",
- -status);
case -ENETUNREACH:
case -ENOBUFS:
case -EPIPE:
@@ -626,7 +612,10 @@ process_status:
case -EPERM:
/* When the server has died, an ICMP port unreachable message
* prompts ECONNREFUSED. */
- clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
+ break;
+ default:
+ dprintk("RPC: sendmsg returned unrecognized error %d\n",
+ -status);
}
return status;
@@ -706,16 +695,16 @@ static int xs_tcp_send_request(struct rpc_task *task)
case -EAGAIN:
status = xs_nospace(task);
break;
- default:
- dprintk("RPC: sendmsg returned unrecognized error %d\n",
- -status);
case -ECONNRESET:
case -ECONNREFUSED:
case -ENOTCONN:
case -EADDRINUSE:
case -ENOBUFS:
case -EPIPE:
- clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
+ break;
+ default:
+ dprintk("RPC: sendmsg returned unrecognized error %d\n",
+ -status);
}
return status;
@@ -1609,19 +1598,23 @@ static void xs_tcp_state_change(struct sock *sk)
static void xs_write_space(struct sock *sk)
{
- struct socket *sock;
+ struct socket_wq *wq;
struct rpc_xprt *xprt;
- if (unlikely(!(sock = sk->sk_socket)))
+ if (!sk->sk_socket)
return;
- clear_bit(SOCK_NOSPACE, &sock->flags);
+ clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
if (unlikely(!(xprt = xprt_from_sock(sk))))
return;
- if (test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags) == 0)
- return;
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0)
+ goto out;
xprt_write_space(xprt);
+out:
+ rcu_read_unlock();
}
/**
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index f34e535e93bd..1b58866175e6 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -20,6 +20,7 @@
#include <linux/list.h>
#include <linux/workqueue.h>
#include <linux/if_vlan.h>
+#include <linux/rtnetlink.h>
#include <net/ip_fib.h>
#include <net/switchdev.h>
@@ -565,7 +566,6 @@ int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
}
EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
-static DEFINE_MUTEX(switchdev_mutex);
static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
/**
@@ -580,9 +580,9 @@ int register_switchdev_notifier(struct notifier_block *nb)
{
int err;
- mutex_lock(&switchdev_mutex);
+ rtnl_lock();
err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
- mutex_unlock(&switchdev_mutex);
+ rtnl_unlock();
return err;
}
EXPORT_SYMBOL_GPL(register_switchdev_notifier);
@@ -598,9 +598,9 @@ int unregister_switchdev_notifier(struct notifier_block *nb)
{
int err;
- mutex_lock(&switchdev_mutex);
+ rtnl_lock();
err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
- mutex_unlock(&switchdev_mutex);
+ rtnl_unlock();
return err;
}
EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
@@ -614,16 +614,17 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
* Call all network notifier blocks. This should be called by driver
* when it needs to propagate hardware event.
* Return values are same as for atomic_notifier_call_chain().
+ * rtnl_lock must be held.
*/
int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
struct switchdev_notifier_info *info)
{
int err;
+ ASSERT_RTNL();
+
info->dev = dev;
- mutex_lock(&switchdev_mutex);
err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
- mutex_unlock(&switchdev_mutex);
return err;
}
EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
@@ -1168,6 +1169,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
.obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
.dst = dst,
.dst_len = dst_len,
+ .fi = fi,
.tos = tos,
.type = type,
.nlflags = nlflags,
@@ -1176,8 +1178,6 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
struct net_device *dev;
int err = 0;
- memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi));
-
/* Don't offload route if using custom ip rules or if
* IPv4 FIB offloading has been disabled completely.
*/
@@ -1221,6 +1221,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
.obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
.dst = dst,
.dst_len = dst_len,
+ .fi = fi,
.tos = tos,
.type = type,
.nlflags = 0,
@@ -1229,8 +1230,6 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
struct net_device *dev;
int err = 0;
- memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi));
-
if (!(fi->fib_flags & RTNH_F_OFFLOAD))
return 0;
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 9dc239dfe192..92e367a0a5ce 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -399,8 +399,10 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
NLM_F_MULTI, TIPC_NL_LINK_GET);
- if (!hdr)
+ if (!hdr) {
+ tipc_bcast_unlock(net);
return -EMSGSIZE;
+ }
attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK);
if (!attrs)
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 1eadc95e1132..2ed732bfe94b 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -802,7 +802,7 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg,
goto out;
tipc_tlv_sprintf(msg->rep, "%-10u %s",
- nla_get_u32(publ[TIPC_NLA_PUBL_REF]),
+ nla_get_u32(publ[TIPC_NLA_PUBL_KEY]),
scope_str[nla_get_u32(publ[TIPC_NLA_PUBL_SCOPE])]);
out:
tipc_tlv_sprintf(msg->rep, "\n");
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 20cddec0a43c..3926b561f873 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -168,12 +168,6 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
skb_queue_head_init(&n_ptr->bc_entry.inputq1);
__skb_queue_head_init(&n_ptr->bc_entry.arrvq);
skb_queue_head_init(&n_ptr->bc_entry.inputq2);
- hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]);
- list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
- if (n_ptr->addr < temp_node->addr)
- break;
- }
- list_add_tail_rcu(&n_ptr->list, &temp_node->list);
n_ptr->state = SELF_DOWN_PEER_LEAVING;
n_ptr->signature = INVALID_NODE_SIG;
n_ptr->active_links[0] = INVALID_BEARER_ID;
@@ -193,6 +187,12 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
tipc_node_get(n_ptr);
setup_timer(&n_ptr->timer, tipc_node_timeout, (unsigned long)n_ptr);
n_ptr->keepalive_intv = U32_MAX;
+ hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]);
+ list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+ if (n_ptr->addr < temp_node->addr)
+ break;
+ }
+ list_add_tail_rcu(&n_ptr->list, &temp_node->list);
exit:
spin_unlock_bh(&tn->node_list_lock);
return n_ptr;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b53246fb0412..9b713e0ce00d 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -673,7 +673,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
struct tipc_sock *tsk = tipc_sk(sk);
struct net *net = sock_net(sk);
struct tipc_msg *mhdr = &tsk->phdr;
- struct sk_buff_head *pktchain = &sk->sk_write_queue;
+ struct sk_buff_head pktchain;
struct iov_iter save = msg->msg_iter;
uint mtu;
int rc;
@@ -687,14 +687,16 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
msg_set_nameupper(mhdr, seq->upper);
msg_set_hdr_sz(mhdr, MCAST_H_SIZE);
+ skb_queue_head_init(&pktchain);
+
new_mtu:
mtu = tipc_bcast_get_mtu(net);
- rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain);
+ rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &pktchain);
if (unlikely(rc < 0))
return rc;
do {
- rc = tipc_bcast_xmit(net, pktchain);
+ rc = tipc_bcast_xmit(net, &pktchain);
if (likely(!rc))
return dsz;
@@ -704,7 +706,7 @@ new_mtu:
if (!rc)
continue;
}
- __skb_queue_purge(pktchain);
+ __skb_queue_purge(&pktchain);
if (rc == -EMSGSIZE) {
msg->msg_iter = save;
goto new_mtu;
@@ -863,7 +865,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
struct net *net = sock_net(sk);
struct tipc_msg *mhdr = &tsk->phdr;
u32 dnode, dport;
- struct sk_buff_head *pktchain = &sk->sk_write_queue;
+ struct sk_buff_head pktchain;
struct sk_buff *skb;
struct tipc_name_seq *seq;
struct iov_iter save;
@@ -924,17 +926,18 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
msg_set_hdr_sz(mhdr, BASIC_H_SIZE);
}
+ skb_queue_head_init(&pktchain);
save = m->msg_iter;
new_mtu:
mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
- rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain);
+ rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &pktchain);
if (rc < 0)
return rc;
do {
- skb = skb_peek(pktchain);
+ skb = skb_peek(&pktchain);
TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
- rc = tipc_node_xmit(net, pktchain, dnode, tsk->portid);
+ rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid);
if (likely(!rc)) {
if (sock->state != SS_READY)
sock->state = SS_CONNECTING;
@@ -946,7 +949,7 @@ new_mtu:
if (!rc)
continue;
}
- __skb_queue_purge(pktchain);
+ __skb_queue_purge(&pktchain);
if (rc == -EMSGSIZE) {
m->msg_iter = save;
goto new_mtu;
@@ -1016,7 +1019,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
struct net *net = sock_net(sk);
struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_msg *mhdr = &tsk->phdr;
- struct sk_buff_head *pktchain = &sk->sk_write_queue;
+ struct sk_buff_head pktchain;
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
u32 portid = tsk->portid;
int rc = -EINVAL;
@@ -1044,17 +1047,19 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
dnode = tsk_peer_node(tsk);
+ skb_queue_head_init(&pktchain);
next:
save = m->msg_iter;
mtu = tsk->max_pkt;
send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
- rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain);
+ rc = tipc_msg_build(mhdr, m, sent, send, mtu, &pktchain);
if (unlikely(rc < 0))
return rc;
+
do {
if (likely(!tsk_conn_cong(tsk))) {
- rc = tipc_node_xmit(net, pktchain, dnode, portid);
+ rc = tipc_node_xmit(net, &pktchain, dnode, portid);
if (likely(!rc)) {
tsk->sent_unacked++;
sent += send;
@@ -1063,7 +1068,7 @@ next:
goto next;
}
if (rc == -EMSGSIZE) {
- __skb_queue_purge(pktchain);
+ __skb_queue_purge(&pktchain);
tsk->max_pkt = tipc_node_get_mtu(net, dnode,
portid);
m->msg_iter = save;
@@ -1077,7 +1082,7 @@ next:
rc = tipc_wait_for_sndpkt(sock, &timeo);
} while (!rc);
- __skb_queue_purge(pktchain);
+ __skb_queue_purge(&pktchain);
return sent ? sent : rc;
}
@@ -2809,6 +2814,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (err)
return err;
+ if (!attrs[TIPC_NLA_SOCK])
+ return -EINVAL;
+
err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX,
attrs[TIPC_NLA_SOCK],
tipc_nl_sock_policy);
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 350cca33ee0a..69ee2eeef968 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -289,15 +289,14 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid,
struct sockaddr_tipc *addr, void *usr_data,
void *buf, size_t len)
{
- struct tipc_subscriber *subscriber = usr_data;
+ struct tipc_subscriber *subscrb = usr_data;
struct tipc_subscription *sub = NULL;
struct tipc_net *tn = net_generic(net, tipc_net_id);
- tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscriber, &sub);
- if (sub)
- tipc_nametbl_subscribe(sub);
- else
- tipc_conn_terminate(tn->topsrv, subscriber->conid);
+ if (tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscrb, &sub))
+ return tipc_conn_terminate(tn->topsrv, subscrb->conid);
+
+ tipc_nametbl_subscribe(sub);
}
/* Handle one request to establish a new subscriber */
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ef05cd9403d4..6579fd6e7459 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -315,7 +315,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
&unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
struct dentry *dentry = unix_sk(s)->path.dentry;
- if (dentry && d_backing_inode(dentry) == i) {
+ if (dentry && d_real_inode(dentry) == i) {
sock_hold(s);
goto found;
}
@@ -911,7 +911,7 @@ static struct sock *unix_find_other(struct net *net,
err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
if (err)
goto fail;
- inode = d_backing_inode(path.dentry);
+ inode = d_real_inode(path.dentry);
err = inode_permission(inode, MAY_WRITE);
if (err)
goto put_fail;
@@ -1048,7 +1048,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out_up;
}
addr->hash = UNIX_HASH_SIZE;
- hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
+ hash = d_real_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
spin_lock(&unix_table_lock);
u->path = u_path;
list = &unix_socket_table[hash];
@@ -1496,7 +1496,7 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
UNIXCB(skb).fp = NULL;
for (i = scm->fp->count-1; i >= 0; i--)
- unix_notinflight(scm->fp->fp[i]);
+ unix_notinflight(scm->fp->user, scm->fp->fp[i]);
}
static void unix_destruct_scm(struct sk_buff *skb)
@@ -1513,6 +1513,21 @@ static void unix_destruct_scm(struct sk_buff *skb)
sock_wfree(skb);
}
+/*
+ * The "user->unix_inflight" variable is protected by the garbage
+ * collection lock, and we just read it locklessly here. If you go
+ * over the limit, there might be a tiny race in actually noticing
+ * it across threads. Tough.
+ */
+static inline bool too_many_unix_fds(struct task_struct *p)
+{
+ struct user_struct *user = current_user();
+
+ if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
+ return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+ return false;
+}
+
#define MAX_RECURSION_LEVEL 4
static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
@@ -1521,6 +1536,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
unsigned char max_level = 0;
int unix_sock_count = 0;
+ if (too_many_unix_fds(current))
+ return -ETOOMANYREFS;
+
for (i = scm->fp->count - 1; i >= 0; i--) {
struct sock *sk = unix_get_socket(scm->fp->fp[i]);
@@ -1542,10 +1560,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
if (!UNIXCB(skb).fp)
return -ENOMEM;
- if (unix_sock_count) {
- for (i = scm->fp->count - 1; i >= 0; i--)
- unix_inflight(scm->fp->fp[i]);
- }
+ for (i = scm->fp->count - 1; i >= 0; i--)
+ unix_inflight(scm->fp->user, scm->fp->fp[i]);
return max_level;
}
@@ -1765,7 +1781,12 @@ restart_locked:
goto out_unlock;
}
- if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
+ /* other == sk && unix_peer(other) != sk if
+ * - unix_peer(sk) == NULL, destination address bound to sk
+ * - unix_peer(sk) == sk by time of get but disconnected before lock
+ */
+ if (other != sk &&
+ unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
if (timeo) {
timeo = unix_wait_for_peer(other, timeo);
@@ -2254,13 +2275,15 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
size_t size = state->size;
unsigned int last_len;
- err = -EINVAL;
- if (sk->sk_state != TCP_ESTABLISHED)
+ if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
+ err = -EINVAL;
goto out;
+ }
- err = -EOPNOTSUPP;
- if (flags & MSG_OOB)
+ if (unlikely(flags & MSG_OOB)) {
+ err = -EOPNOTSUPP;
goto out;
+ }
target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
timeo = sock_rcvtimeo(sk, noblock);
@@ -2306,9 +2329,11 @@ again:
goto unlock;
unix_state_unlock(sk);
- err = -EAGAIN;
- if (!timeo)
+ if (!timeo) {
+ err = -EAGAIN;
break;
+ }
+
mutex_unlock(&u->readlock);
timeo = unix_stream_data_wait(sk, timeo, last,
@@ -2316,6 +2341,7 @@ again:
if (signal_pending(current)) {
err = sock_intr_errno(timeo);
+ scm_destroy(&scm);
goto out;
}
diff --git a/net/unix/diag.c b/net/unix/diag.c
index c512f64d5287..4d9679701a6d 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -220,7 +220,7 @@ done:
return skb->len;
}
-static struct sock *unix_lookup_by_ino(int ino)
+static struct sock *unix_lookup_by_ino(unsigned int ino)
{
int i;
struct sock *sk;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index a73a226f2d33..6a0d48525fcf 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -116,15 +116,15 @@ struct sock *unix_get_socket(struct file *filp)
* descriptor if it is for an AF_UNIX socket.
*/
-void unix_inflight(struct file *fp)
+void unix_inflight(struct user_struct *user, struct file *fp)
{
struct sock *s = unix_get_socket(fp);
+ spin_lock(&unix_gc_lock);
+
if (s) {
struct unix_sock *u = unix_sk(s);
- spin_lock(&unix_gc_lock);
-
if (atomic_long_inc_return(&u->inflight) == 1) {
BUG_ON(!list_empty(&u->link));
list_add_tail(&u->link, &gc_inflight_list);
@@ -132,25 +132,28 @@ void unix_inflight(struct file *fp)
BUG_ON(list_empty(&u->link));
}
unix_tot_inflight++;
- spin_unlock(&unix_gc_lock);
}
+ user->unix_inflight++;
+ spin_unlock(&unix_gc_lock);
}
-void unix_notinflight(struct file *fp)
+void unix_notinflight(struct user_struct *user, struct file *fp)
{
struct sock *s = unix_get_socket(fp);
+ spin_lock(&unix_gc_lock);
+
if (s) {
struct unix_sock *u = unix_sk(s);
- spin_lock(&unix_gc_lock);
BUG_ON(list_empty(&u->link));
if (atomic_long_dec_and_test(&u->inflight))
list_del_init(&u->link);
unix_tot_inflight--;
- spin_unlock(&unix_gc_lock);
}
+ user->unix_inflight--;
+ spin_unlock(&unix_gc_lock);
}
static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 7fd1220fbfa0..9b5bd6d142dc 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1794,27 +1794,8 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
else if (sk->sk_shutdown & RCV_SHUTDOWN)
err = 0;
- if (copied > 0) {
- /* We only do these additional bookkeeping/notification steps
- * if we actually copied something out of the queue pair
- * instead of just peeking ahead.
- */
-
- if (!(flags & MSG_PEEK)) {
- /* If the other side has shutdown for sending and there
- * is nothing more to read, then modify the socket
- * state.
- */
- if (vsk->peer_shutdown & SEND_SHUTDOWN) {
- if (vsock_stream_has_data(vsk) <= 0) {
- sk->sk_state = SS_UNCONNECTED;
- sock_set_flag(sk, SOCK_DONE);
- sk->sk_state_change(sk);
- }
- }
- }
+ if (copied > 0)
err = copied;
- }
out_wait:
finish_wait(sk_sleep(sk), &wait);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index b0915515640e..8f0bac7e03c4 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1147,6 +1147,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
return NOTIFY_DONE;
}
+ wireless_nlevent_flush();
+
return NOTIFY_OK;
}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 75b0d23ee882..5d89f13a98db 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -13161,7 +13161,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
struct wireless_dev *wdev;
struct cfg80211_beacon_registration *reg, *tmp;
- if (state != NETLINK_URELEASE)
+ if (state != NETLINK_URELEASE || notify->protocol != NETLINK_GENERIC)
return NOTIFY_DONE;
rcu_read_lock();
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index c8717c1d082e..c753211cb83f 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -342,6 +342,40 @@ static const int compat_event_type_size[] = {
/* IW event code */
+void wireless_nlevent_flush(void)
+{
+ struct sk_buff *skb;
+ struct net *net;
+
+ ASSERT_RTNL();
+
+ for_each_net(net) {
+ while ((skb = skb_dequeue(&net->wext_nlevents)))
+ rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
+ GFP_KERNEL);
+ }
+}
+EXPORT_SYMBOL_GPL(wireless_nlevent_flush);
+
+static int wext_netdev_notifier_call(struct notifier_block *nb,
+ unsigned long state, void *ptr)
+{
+ /*
+ * When a netdev changes state in any way, flush all pending messages
+ * to avoid them going out in a strange order, e.g. RTM_NEWLINK after
+ * RTM_DELLINK, or with IFF_UP after without IFF_UP during dev_close()
+ * or similar - all of which could otherwise happen due to delays from
+ * schedule_work().
+ */
+ wireless_nlevent_flush();
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block wext_netdev_notifier = {
+ .notifier_call = wext_netdev_notifier_call,
+};
+
static int __net_init wext_pernet_init(struct net *net)
{
skb_queue_head_init(&net->wext_nlevents);
@@ -360,7 +394,12 @@ static struct pernet_operations wext_pernet_ops = {
static int __init wireless_nlevent_init(void)
{
- return register_pernet_subsys(&wext_pernet_ops);
+ int err = register_pernet_subsys(&wext_pernet_ops);
+
+ if (err)
+ return err;
+
+ return register_netdevice_notifier(&wext_netdev_notifier);
}
subsys_initcall(wireless_nlevent_init);
@@ -368,17 +407,8 @@ subsys_initcall(wireless_nlevent_init);
/* Process events generated by the wireless layer or the driver. */
static void wireless_nlevent_process(struct work_struct *work)
{
- struct sk_buff *skb;
- struct net *net;
-
rtnl_lock();
-
- for_each_net(net) {
- while ((skb = skb_dequeue(&net->wext_nlevents)))
- rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
- GFP_KERNEL);
- }
-
+ wireless_nlevent_flush();
rtnl_unlock();
}
@@ -925,8 +955,29 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
return private(dev, iwr, cmd, info, handler);
}
/* Old driver API : call driver ioctl handler */
- if (dev->netdev_ops->ndo_do_ioctl)
- return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
+ if (dev->netdev_ops->ndo_do_ioctl) {
+#ifdef CONFIG_COMPAT
+ if (info->flags & IW_REQUEST_FLAG_COMPAT) {
+ int ret = 0;
+ struct iwreq iwr_lcl;
+ struct compat_iw_point *iwp_compat = (void *) &iwr->u.data;
+
+ memcpy(&iwr_lcl, iwr, sizeof(struct iwreq));
+ iwr_lcl.u.data.pointer = compat_ptr(iwp_compat->pointer);
+ iwr_lcl.u.data.length = iwp_compat->length;
+ iwr_lcl.u.data.flags = iwp_compat->flags;
+
+ ret = dev->netdev_ops->ndo_do_ioctl(dev, (void *) &iwr_lcl, cmd);
+
+ iwp_compat->pointer = ptr_to_compat(iwr_lcl.u.data.pointer);
+ iwp_compat->length = iwr_lcl.u.data.length;
+ iwp_compat->flags = iwr_lcl.u.data.flags;
+
+ return ret;
+ } else
+#endif
+ return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
+ }
return -EOPNOTSUPP;
}
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 7ecd04c21360..997ff7b2509b 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -277,6 +277,7 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk,
memset(&theirs, 0, sizeof(theirs));
memcpy(new, ours, sizeof(*new));
+ memset(dte, 0, sizeof(*dte));
len = x25_parse_facilities(skb, &theirs, dte, &x25->vc_facil_mask);
if (len < 0)
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index ad7f5b3f9b61..1c4ad477ce93 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -292,12 +292,15 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
XFRM_SKB_CB(skb)->seq.input.hi = seq_hi;
skb_dst_force(skb);
+ dev_hold(skb->dev);
nexthdr = x->type->input(x, skb);
if (nexthdr == -EINPROGRESS)
return 0;
resume:
+ dev_put(skb->dev);
+
spin_lock(&x->lock);
if (nexthdr <= 0) {
if (nexthdr == -EBADMSG) {
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index cc3676eb6239..ff4a91fcab9f 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -167,6 +167,8 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb
{
struct sk_buff *segs;
+ BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET);
+ BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_SGO_CB_OFFSET);
segs = skb_gso_segment(skb, 0);
kfree_skb(skb);
if (IS_ERR(segs))