diff options
author | Dmitry Shmidt <dimitrysh@google.com> | 2016-08-01 15:51:01 -0700 |
---|---|---|
committer | Dmitry Shmidt <dimitrysh@google.com> | 2016-08-01 15:57:55 -0700 |
commit | b558f17a13b10761eb6f838e713425b9e83f8a01 (patch) | |
tree | 425828a423411d6c65e5b18a3330d244eef987b0 /net | |
parent | 818aa36ea868ba8f2985f9ca0906fd9cba3e437d (diff) | |
parent | b05965f284db3e086022f4e318e46cb5bffb1376 (diff) |
Merge tag 'v4.4.16' into android-4.4.y
This is the 4.4.16 stable release
Change-Id: Ibaf7b7e03695e1acebc654a2ca1a4bfcc48fcea4
Diffstat (limited to 'net')
169 files changed, 1961 insertions, 1557 deletions
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index fbd0acf80b13..2fdebabbfacd 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -976,7 +976,8 @@ static int ax25_release(struct socket *sock) release_sock(sk); ax25_disconnect(ax25, 0); lock_sock(sk); - ax25_destroy_socket(ax25); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_destroy_socket(ax25); break; case AX25_STATE_3: diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index 951cd57bb07d..5237dff6941d 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -102,6 +102,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25) switch (ax25->state) { case AX25_STATE_0: + case AX25_STATE_2: /* Magic here: If we listen() and a new link dies before it is accepted() it isn't 'dead' so doesn't get removed. */ if (!sk || sock_flag(sk, SOCK_DESTROY) || @@ -111,6 +112,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25) sock_hold(sk); ax25_destroy_socket(ax25); bh_unlock_sock(sk); + /* Ungrab socket and destroy it */ sock_put(sk); } else ax25_destroy_socket(ax25); @@ -213,7 +215,8 @@ void ax25_ds_t1_timeout(ax25_cb *ax25) case AX25_STATE_2: if (ax25->n2count == ax25->n2) { ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - ax25_disconnect(ax25, ETIMEDOUT); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_disconnect(ax25, ETIMEDOUT); return; } else { ax25->n2count++; diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c index b563a3f5f2a8..2fa3be965101 100644 --- a/net/ax25/ax25_ip.c +++ b/net/ax25/ax25_ip.c @@ -228,8 +228,23 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) } #endif +static bool ax25_validate_header(const char *header, unsigned int len) +{ + ax25_digi digi; + + if (!len) + return false; + + if (header[0]) + return true; + + return ax25_addr_parse(header + 1, len - 1, NULL, NULL, &digi, NULL, + NULL); +} + const struct header_ops ax25_header_ops = { .create = ax25_hard_header, + .validate = ax25_validate_header, }; EXPORT_SYMBOL(ax25_header_ops); diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c index 004467c9e6e1..2c0d6ef66f9d 100644 --- a/net/ax25/ax25_std_timer.c +++ b/net/ax25/ax25_std_timer.c @@ -38,6 +38,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25) switch (ax25->state) { case AX25_STATE_0: + case AX25_STATE_2: /* Magic here: If we listen() and a new link dies before it is accepted() it isn't 'dead' so doesn't get removed. */ if (!sk || sock_flag(sk, SOCK_DESTROY) || @@ -47,6 +48,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25) sock_hold(sk); ax25_destroy_socket(ax25); bh_unlock_sock(sk); + /* Ungrab socket and destroy it */ sock_put(sk); } else ax25_destroy_socket(ax25); @@ -144,7 +146,8 @@ void ax25_std_t1timer_expiry(ax25_cb *ax25) case AX25_STATE_2: if (ax25->n2count == ax25->n2) { ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - ax25_disconnect(ax25, ETIMEDOUT); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_disconnect(ax25, ETIMEDOUT); return; } else { ax25->n2count++; diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 3b78e8473a01..655a7d4c96e1 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -264,7 +264,8 @@ void ax25_disconnect(ax25_cb *ax25, int reason) { ax25_clear_queues(ax25); - ax25_stop_heartbeat(ax25); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_stop_heartbeat(ax25); ax25_stop_t1timer(ax25); ax25_stop_t2timer(ax25); ax25_stop_t3timer(ax25); diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 191a70290dca..f5d2fe5e31cc 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -127,21 +127,17 @@ batadv_backbone_gw_free_ref(struct batadv_bla_backbone_gw *backbone_gw) } /* finally deinitialize the claim */ -static void batadv_claim_free_rcu(struct rcu_head *rcu) +static void batadv_claim_release(struct batadv_bla_claim *claim) { - struct batadv_bla_claim *claim; - - claim = container_of(rcu, struct batadv_bla_claim, rcu); - batadv_backbone_gw_free_ref(claim->backbone_gw); - kfree(claim); + kfree_rcu(claim, rcu); } /* free a claim, call claim_free_rcu if its the last reference */ static void batadv_claim_free_ref(struct batadv_bla_claim *claim) { if (atomic_dec_and_test(&claim->refcount)) - call_rcu(&claim->rcu, batadv_claim_free_rcu); + batadv_claim_release(claim); } /** diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index a49c705fb86b..5f19133c5530 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -553,6 +553,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, * be sent to * @bat_priv: the bat priv with all the soft interface information * @ip_dst: ipv4 to look up in the DHT + * @vid: VLAN identifier * * An originator O is selected if and only if its DHT_ID value is one of three * closest values (from the LEFT, with wrap around if needed) then the hash @@ -561,7 +562,8 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM. */ static struct batadv_dat_candidate * -batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) +batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst, + unsigned short vid) { int select; batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key; @@ -577,7 +579,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) return NULL; dat.ip = ip_dst; - dat.vid = 0; + dat.vid = vid; ip_key = (batadv_dat_addr_t)batadv_hash_dat(&dat, BATADV_DAT_ADDR_MAX); @@ -597,6 +599,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) * @bat_priv: the bat priv with all the soft interface information * @skb: payload to send * @ip: the DHT key + * @vid: VLAN identifier * @packet_subtype: unicast4addr packet subtype to use * * This function copies the skb with pskb_copy() and is sent as unicast packet @@ -607,7 +610,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) */ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, struct sk_buff *skb, __be32 ip, - int packet_subtype) + unsigned short vid, int packet_subtype) { int i; bool ret = false; @@ -616,7 +619,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, struct sk_buff *tmp_skb; struct batadv_dat_candidate *cand; - cand = batadv_dat_select_candidates(bat_priv, ip); + cand = batadv_dat_select_candidates(bat_priv, ip, vid); if (!cand) goto out; @@ -1004,7 +1007,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, ret = true; } else { /* Send the request to the DHT */ - ret = batadv_dat_send_data(bat_priv, skb, ip_dst, + ret = batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_GET); } out: @@ -1132,8 +1135,8 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, /* Send the ARP reply to the candidates for both the IP addresses that * the node obtained from the ARP reply */ - batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT); - batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT); + batadv_dat_send_data(bat_priv, skb, ip_src, vid, BATADV_P_DAT_DHT_PUT); + batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT); } /** diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 5a31420513e1..7b12ea8ea29d 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -75,18 +75,6 @@ batadv_hardif_free_ref(struct batadv_hard_iface *hard_iface) call_rcu(&hard_iface->rcu, batadv_hardif_free_rcu); } -/** - * batadv_hardif_free_ref_now - decrement the hard interface refcounter and - * possibly free it (without rcu callback) - * @hard_iface: the hard interface to free - */ -static inline void -batadv_hardif_free_ref_now(struct batadv_hard_iface *hard_iface) -{ - if (atomic_dec_and_test(&hard_iface->refcount)) - batadv_hardif_free_rcu(&hard_iface->rcu); -} - static inline struct batadv_hard_iface * batadv_primary_if_get_selected(struct batadv_priv *bat_priv) { diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index f5276be2c77c..d0956f726547 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -203,28 +203,25 @@ void batadv_nc_init_orig(struct batadv_orig_node *orig_node) } /** - * batadv_nc_node_free_rcu - rcu callback to free an nc node and remove - * its refcount on the orig_node - * @rcu: rcu pointer of the nc node + * batadv_nc_node_release - release nc_node from lists and queue for free after + * rcu grace period + * @nc_node: the nc node to free */ -static void batadv_nc_node_free_rcu(struct rcu_head *rcu) +static void batadv_nc_node_release(struct batadv_nc_node *nc_node) { - struct batadv_nc_node *nc_node; - - nc_node = container_of(rcu, struct batadv_nc_node, rcu); batadv_orig_node_free_ref(nc_node->orig_node); - kfree(nc_node); + kfree_rcu(nc_node, rcu); } /** - * batadv_nc_node_free_ref - decrements the nc node refcounter and possibly - * frees it + * batadv_nc_node_free_ref - decrement the nc node refcounter and possibly + * release it * @nc_node: the nc node to free */ static void batadv_nc_node_free_ref(struct batadv_nc_node *nc_node) { if (atomic_dec_and_test(&nc_node->refcount)) - call_rcu(&nc_node->rcu, batadv_nc_node_free_rcu); + batadv_nc_node_release(nc_node); } /** diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 7486df9ed48d..17851d3aaf22 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -163,92 +163,66 @@ err: } /** - * batadv_neigh_ifinfo_free_rcu - free the neigh_ifinfo object - * @rcu: rcu pointer of the neigh_ifinfo object - */ -static void batadv_neigh_ifinfo_free_rcu(struct rcu_head *rcu) -{ - struct batadv_neigh_ifinfo *neigh_ifinfo; - - neigh_ifinfo = container_of(rcu, struct batadv_neigh_ifinfo, rcu); - - if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT) - batadv_hardif_free_ref_now(neigh_ifinfo->if_outgoing); - - kfree(neigh_ifinfo); -} - -/** - * batadv_neigh_ifinfo_free_now - decrement the refcounter and possibly free - * the neigh_ifinfo (without rcu callback) + * batadv_neigh_ifinfo_release - release neigh_ifinfo from lists and queue for + * free after rcu grace period * @neigh_ifinfo: the neigh_ifinfo object to release */ static void -batadv_neigh_ifinfo_free_ref_now(struct batadv_neigh_ifinfo *neigh_ifinfo) +batadv_neigh_ifinfo_release(struct batadv_neigh_ifinfo *neigh_ifinfo) { - if (atomic_dec_and_test(&neigh_ifinfo->refcount)) - batadv_neigh_ifinfo_free_rcu(&neigh_ifinfo->rcu); + if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT) + batadv_hardif_free_ref(neigh_ifinfo->if_outgoing); + + kfree_rcu(neigh_ifinfo, rcu); } /** - * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly free + * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly release * the neigh_ifinfo * @neigh_ifinfo: the neigh_ifinfo object to release */ void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo) { if (atomic_dec_and_test(&neigh_ifinfo->refcount)) - call_rcu(&neigh_ifinfo->rcu, batadv_neigh_ifinfo_free_rcu); + batadv_neigh_ifinfo_release(neigh_ifinfo); } /** * batadv_neigh_node_free_rcu - free the neigh_node - * @rcu: rcu pointer of the neigh_node + * batadv_neigh_node_release - release neigh_node from lists and queue for + * free after rcu grace period + * @neigh_node: neigh neighbor to free */ -static void batadv_neigh_node_free_rcu(struct rcu_head *rcu) +static void batadv_neigh_node_release(struct batadv_neigh_node *neigh_node) { struct hlist_node *node_tmp; - struct batadv_neigh_node *neigh_node; struct batadv_neigh_ifinfo *neigh_ifinfo; struct batadv_algo_ops *bao; - neigh_node = container_of(rcu, struct batadv_neigh_node, rcu); bao = neigh_node->orig_node->bat_priv->bat_algo_ops; hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, &neigh_node->ifinfo_list, list) { - batadv_neigh_ifinfo_free_ref_now(neigh_ifinfo); + batadv_neigh_ifinfo_free_ref(neigh_ifinfo); } if (bao->bat_neigh_free) bao->bat_neigh_free(neigh_node); - batadv_hardif_free_ref_now(neigh_node->if_incoming); + batadv_hardif_free_ref(neigh_node->if_incoming); - kfree(neigh_node); -} - -/** - * batadv_neigh_node_free_ref_now - decrement the neighbors refcounter - * and possibly free it (without rcu callback) - * @neigh_node: neigh neighbor to free - */ -static void -batadv_neigh_node_free_ref_now(struct batadv_neigh_node *neigh_node) -{ - if (atomic_dec_and_test(&neigh_node->refcount)) - batadv_neigh_node_free_rcu(&neigh_node->rcu); + kfree_rcu(neigh_node, rcu); } /** * batadv_neigh_node_free_ref - decrement the neighbors refcounter - * and possibly free it + * and possibly release it * @neigh_node: neigh neighbor to free */ void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node) { if (atomic_dec_and_test(&neigh_node->refcount)) - call_rcu(&neigh_node->rcu, batadv_neigh_node_free_rcu); + batadv_neigh_node_release(neigh_node); } /** @@ -532,108 +506,99 @@ out: } /** - * batadv_orig_ifinfo_free_rcu - free the orig_ifinfo object - * @rcu: rcu pointer of the orig_ifinfo object + * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for + * free after rcu grace period + * @orig_ifinfo: the orig_ifinfo object to release */ -static void batadv_orig_ifinfo_free_rcu(struct rcu_head *rcu) +static void batadv_orig_ifinfo_release(struct batadv_orig_ifinfo *orig_ifinfo) { - struct batadv_orig_ifinfo *orig_ifinfo; struct batadv_neigh_node *router; - orig_ifinfo = container_of(rcu, struct batadv_orig_ifinfo, rcu); - if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT) - batadv_hardif_free_ref_now(orig_ifinfo->if_outgoing); + batadv_hardif_free_ref(orig_ifinfo->if_outgoing); /* this is the last reference to this object */ router = rcu_dereference_protected(orig_ifinfo->router, true); if (router) - batadv_neigh_node_free_ref_now(router); - kfree(orig_ifinfo); + batadv_neigh_node_free_ref(router); + + kfree_rcu(orig_ifinfo, rcu); } /** - * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free - * the orig_ifinfo (without rcu callback) + * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly release + * the orig_ifinfo * @orig_ifinfo: the orig_ifinfo object to release */ -static void -batadv_orig_ifinfo_free_ref_now(struct batadv_orig_ifinfo *orig_ifinfo) +void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo) { if (atomic_dec_and_test(&orig_ifinfo->refcount)) - batadv_orig_ifinfo_free_rcu(&orig_ifinfo->rcu); + batadv_orig_ifinfo_release(orig_ifinfo); } /** - * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free - * the orig_ifinfo - * @orig_ifinfo: the orig_ifinfo object to release + * batadv_orig_node_free_rcu - free the orig_node + * @rcu: rcu pointer of the orig_node */ -void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo) +static void batadv_orig_node_free_rcu(struct rcu_head *rcu) { - if (atomic_dec_and_test(&orig_ifinfo->refcount)) - call_rcu(&orig_ifinfo->rcu, batadv_orig_ifinfo_free_rcu); + struct batadv_orig_node *orig_node; + + orig_node = container_of(rcu, struct batadv_orig_node, rcu); + + batadv_mcast_purge_orig(orig_node); + + batadv_frag_purge_orig(orig_node, NULL); + + if (orig_node->bat_priv->bat_algo_ops->bat_orig_free) + orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node); + + kfree(orig_node->tt_buff); + kfree(orig_node); } -static void batadv_orig_node_free_rcu(struct rcu_head *rcu) +/** + * batadv_orig_node_release - release orig_node from lists and queue for + * free after rcu grace period + * @orig_node: the orig node to free + */ +static void batadv_orig_node_release(struct batadv_orig_node *orig_node) { struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; - struct batadv_orig_node *orig_node; struct batadv_orig_ifinfo *orig_ifinfo; - orig_node = container_of(rcu, struct batadv_orig_node, rcu); - spin_lock_bh(&orig_node->neigh_list_lock); /* for all neighbors towards this originator ... */ hlist_for_each_entry_safe(neigh_node, node_tmp, &orig_node->neigh_list, list) { hlist_del_rcu(&neigh_node->list); - batadv_neigh_node_free_ref_now(neigh_node); + batadv_neigh_node_free_ref(neigh_node); } hlist_for_each_entry_safe(orig_ifinfo, node_tmp, &orig_node->ifinfo_list, list) { hlist_del_rcu(&orig_ifinfo->list); - batadv_orig_ifinfo_free_ref_now(orig_ifinfo); + batadv_orig_ifinfo_free_ref(orig_ifinfo); } spin_unlock_bh(&orig_node->neigh_list_lock); - batadv_mcast_purge_orig(orig_node); - /* Free nc_nodes */ batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL); - batadv_frag_purge_orig(orig_node, NULL); - - if (orig_node->bat_priv->bat_algo_ops->bat_orig_free) - orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node); - - kfree(orig_node->tt_buff); - kfree(orig_node); + call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu); } /** * batadv_orig_node_free_ref - decrement the orig node refcounter and possibly - * schedule an rcu callback for freeing it + * release it * @orig_node: the orig node to free */ void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node) { if (atomic_dec_and_test(&orig_node->refcount)) - call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu); -} - -/** - * batadv_orig_node_free_ref_now - decrement the orig node refcounter and - * possibly free it (without rcu callback) - * @orig_node: the orig node to free - */ -void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node) -{ - if (atomic_dec_and_test(&orig_node->refcount)) - batadv_orig_node_free_rcu(&orig_node->rcu); + batadv_orig_node_release(orig_node); } void batadv_originator_free(struct batadv_priv *bat_priv) diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index fa18f9bf266b..a5c37882b409 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -38,7 +38,6 @@ int batadv_originator_init(struct batadv_priv *bat_priv); void batadv_originator_free(struct batadv_priv *bat_priv); void batadv_purge_orig_ref(struct batadv_priv *bat_priv); void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node); -void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node); struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, const u8 *addr); struct batadv_neigh_node * diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 3207667e69de..d8a2f33e60e5 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -104,6 +104,15 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, neigh_node = NULL; spin_lock_bh(&orig_node->neigh_list_lock); + /* curr_router used earlier may not be the current orig_ifinfo->router + * anymore because it was dereferenced outside of the neigh_list_lock + * protected region. After the new best neighbor has replace the current + * best neighbor the reference counter needs to decrease. Consequently, + * the code needs to ensure the curr_router variable contains a pointer + * to the replaced best neighbor. + */ + curr_router = rcu_dereference_protected(orig_ifinfo->router, true); + rcu_assign_pointer(orig_ifinfo->router, neigh_node); spin_unlock_bh(&orig_node->neigh_list_lock); batadv_orig_ifinfo_free_ref(orig_ifinfo); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index f664324805eb..0e0c3b8ed927 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -630,6 +630,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); + if (!forw_packet->own) + atomic_inc(&bat_priv->bcast_queue_left); + batadv_forw_packet_free(forw_packet); } } @@ -657,6 +660,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); + if (!forw_packet->own) + atomic_inc(&bat_priv->batman_queue_left); + batadv_forw_packet_free(forw_packet); } } diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index ac4d08de5df4..720f1a5b81ac 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -407,11 +407,17 @@ void batadv_interface_rx(struct net_device *soft_iface, */ nf_reset(skb); + if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) + goto dropped; + vid = batadv_get_vid(skb, 0); ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: + if (!pskb_may_pull(skb, VLAN_ETH_HLEN)) + goto dropped; + vhdr = (struct vlan_ethhdr *)skb->data; if (vhdr->h_vlan_encapsulated_proto != ethertype) @@ -423,8 +429,6 @@ void batadv_interface_rx(struct net_device *soft_iface, } /* skb->dev & skb->pkt_type are set here */ - if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) - goto dropped; skb->protocol = eth_type_trans(skb, soft_iface); /* should not be necessary anymore as we use skb_pull_rcsum() diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 76f19ba62462..83b0ca27a45e 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -240,20 +240,6 @@ int batadv_tt_global_hash_count(struct batadv_priv *bat_priv, return count; } -static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu) -{ - struct batadv_tt_orig_list_entry *orig_entry; - - orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu); - - /* We are in an rcu callback here, therefore we cannot use - * batadv_orig_node_free_ref() and its call_rcu(): - * An rcu_barrier() wouldn't wait for that to finish - */ - batadv_orig_node_free_ref_now(orig_entry->orig_node); - kfree(orig_entry); -} - /** * batadv_tt_local_size_mod - change the size by v of the local table identified * by vid @@ -349,13 +335,25 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node, batadv_tt_global_size_mod(orig_node, vid, -1); } +/** + * batadv_tt_orig_list_entry_release - release tt orig entry from lists and + * queue for free after rcu grace period + * @orig_entry: tt orig entry to be free'd + */ +static void +batadv_tt_orig_list_entry_release(struct batadv_tt_orig_list_entry *orig_entry) +{ + batadv_orig_node_free_ref(orig_entry->orig_node); + kfree_rcu(orig_entry, rcu); +} + static void batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry) { if (!atomic_dec_and_test(&orig_entry->refcount)) return; - call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); + batadv_tt_orig_list_entry_release(orig_entry); } /** diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 9e9cca3689a0..795ddd8b2f77 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -307,6 +307,9 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev, /* check that it's our buffer */ if (lowpan_is_ipv6(*skb_network_header(skb))) { + /* Pull off the 1-byte of 6lowpan header. */ + skb_pull(skb, 1); + /* Copy the packet so that the IPv6 header is * properly aligned. */ @@ -317,6 +320,7 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev, local_skb->protocol = htons(ETH_P_IPV6); local_skb->pkt_type = PACKET_HOST; + local_skb->dev = dev; skb_set_transport_header(local_skb, sizeof(struct ipv6hdr)); @@ -335,6 +339,8 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev, if (!local_skb) goto drop; + local_skb->dev = dev; + ret = iphc_decompress(local_skb, dev, chan); if (ret < 0) { kfree_skb(local_skb); @@ -343,7 +349,6 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev, local_skb->protocol = htons(ETH_P_IPV6); local_skb->pkt_type = PACKET_HOST; - local_skb->dev = dev; if (give_skb_to_upper(local_skb, dev) != NET_RX_SUCCESS) { diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 85b82f7adbd2..24e9410923d0 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -722,8 +722,12 @@ static void hci_req_add_le_create_conn(struct hci_request *req, if (hci_update_random_address(req, false, &own_addr_type)) return; + /* Set window to be the same value as the interval to enable + * continuous scanning. + */ cp.scan_interval = cpu_to_le16(hdev->le_scan_interval); - cp.scan_window = cpu_to_le16(hdev->le_scan_window); + cp.scan_window = cp.scan_interval; + bacpy(&cp.peer_addr, &conn->dst); cp.peer_addr_type = conn->dst_type; cp.own_address_type = own_addr_type; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 981f8a202c27..02778c5bc149 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -175,21 +175,29 @@ static u8 update_white_list(struct hci_request *req) * command to remove it from the controller. */ list_for_each_entry(b, &hdev->le_white_list, list) { - struct hci_cp_le_del_from_white_list cp; + /* If the device is neither in pend_le_conns nor + * pend_le_reports then remove it from the whitelist. + */ + if (!hci_pend_le_action_lookup(&hdev->pend_le_conns, + &b->bdaddr, b->bdaddr_type) && + !hci_pend_le_action_lookup(&hdev->pend_le_reports, + &b->bdaddr, b->bdaddr_type)) { + struct hci_cp_le_del_from_white_list cp; + + cp.bdaddr_type = b->bdaddr_type; + bacpy(&cp.bdaddr, &b->bdaddr); - if (hci_pend_le_action_lookup(&hdev->pend_le_conns, - &b->bdaddr, b->bdaddr_type) || - hci_pend_le_action_lookup(&hdev->pend_le_reports, - &b->bdaddr, b->bdaddr_type)) { - white_list_entries++; + hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, + sizeof(cp), &cp); continue; } - cp.bdaddr_type = b->bdaddr_type; - bacpy(&cp.bdaddr, &b->bdaddr); + if (hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) { + /* White list can not be used with RPAs */ + return 0x00; + } - hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, - sizeof(cp), &cp); + white_list_entries++; } /* Since all no longer valid white list entries have been diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7f22119276f3..b1b0a1c0bd8d 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -7155,6 +7155,10 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, status); + if (data_len != sizeof(*cp) + cp->adv_data_len + cp->scan_rsp_len) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_INVALID_PARAMS); + flags = __le32_to_cpu(cp->flags); timeout = __le16_to_cpu(cp->timeout); duration = __le16_to_cpu(cp->duration); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index ffed8a1d4f27..4b175df35184 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1072,22 +1072,6 @@ static void smp_notify_keys(struct l2cap_conn *conn) hcon->dst_type = smp->remote_irk->addr_type; queue_work(hdev->workqueue, &conn->id_addr_update_work); } - - /* When receiving an indentity resolving key for - * a remote device that does not use a resolvable - * private address, just remove the key so that - * it is possible to use the controller white - * list for scanning. - * - * Userspace will have been told to not store - * this key at this point. So it is safe to - * just remove it. - */ - if (!bacmp(&smp->remote_irk->rpa, BDADDR_ANY)) { - list_del_rcu(&smp->remote_irk->list); - kfree_rcu(smp->remote_irk, rcu); - smp->remote_irk = NULL; - } } if (smp->csrk) { diff --git a/net/bridge/br.c b/net/bridge/br.c index a1abe4936fe1..3addc05b9a16 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -121,6 +121,7 @@ static struct notifier_block br_device_notifier = { .notifier_call = br_device_event }; +/* called with RTNL */ static int br_switchdev_event(struct notifier_block *unused, unsigned long event, void *ptr) { @@ -130,7 +131,6 @@ static int br_switchdev_event(struct notifier_block *unused, struct switchdev_notifier_fdb_info *fdb_info; int err = NOTIFY_DONE; - rtnl_lock(); p = br_port_get_rtnl(dev); if (!p) goto out; @@ -155,7 +155,6 @@ static int br_switchdev_event(struct notifier_block *unused, } out: - rtnl_unlock(); return err; } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 689b8412c58e..0346c215ff6a 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -28,6 +28,8 @@ const struct nf_br_ops __rcu *nf_br_ops __read_mostly; EXPORT_SYMBOL_GPL(nf_br_ops); +static struct lock_class_key bridge_netdev_addr_lock_key; + /* net device transmit always called with BH disabled */ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -88,6 +90,11 @@ out: return NETDEV_TX_OK; } +static void br_set_lockdep_class(struct net_device *dev) +{ + lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key); +} + static int br_dev_init(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); @@ -100,6 +107,7 @@ static int br_dev_init(struct net_device *dev) err = br_vlan_init(br); if (err) free_percpu(br->stats); + br_set_lockdep_class(dev); return err; } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index a642bb829d09..09442e0f7f67 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -278,6 +278,8 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) * change from under us. */ list_for_each_entry(v, &vg->vlan_list, vlist) { + if (!br_vlan_should_use(v)) + continue; f = __br_fdb_get(br, br->dev->dev_addr, v->vid); if (f && f->is_local && !f->dst) fdb_delete_local(br, NULL, f); diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 263b4de4de57..60a3dbfca8a1 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -21,18 +21,19 @@ #include <asm/uaccess.h> #include "br_private.h" -/* called with RTNL */ static int get_bridge_ifindices(struct net *net, int *indices, int num) { struct net_device *dev; int i = 0; - for_each_netdev(net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { if (i >= num) break; if (dev->priv_flags & IFF_EBRIDGE) indices[i++] = dev->ifindex; } + rcu_read_unlock(); return i; } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 03661d97463c..7173a685309a 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -464,8 +464,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, &ip6h->saddr)) { kfree_skb(skb); + br->has_ipv6_addr = 0; return NULL; } + + br->has_ipv6_addr = 1; ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); hopopt = (u8 *)(ip6h + 1); @@ -1270,6 +1273,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, struct br_ip saddr; unsigned long max_delay; unsigned long now = jiffies; + unsigned int offset = skb_transport_offset(skb); __be32 group; int err = 0; @@ -1280,14 +1284,14 @@ static int br_ip4_multicast_query(struct net_bridge *br, group = ih->group; - if (skb->len == sizeof(*ih)) { + if (skb->len == offset + sizeof(*ih)) { max_delay = ih->code * (HZ / IGMP_TIMER_SCALE); if (!max_delay) { max_delay = 10 * HZ; group = 0; } - } else if (skb->len >= sizeof(*ih3)) { + } else if (skb->len >= offset + sizeof(*ih3)) { ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs) goto out; @@ -1348,6 +1352,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, struct br_ip saddr; unsigned long max_delay; unsigned long now = jiffies; + unsigned int offset = skb_transport_offset(skb); const struct in6_addr *group = NULL; bool is_general_query; int err = 0; @@ -1357,8 +1362,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - if (skb->len == sizeof(*mld)) { - if (!pskb_may_pull(skb, sizeof(*mld))) { + if (skb->len == offset + sizeof(*mld)) { + if (!pskb_may_pull(skb, offset + sizeof(*mld))) { err = -EINVAL; goto out; } @@ -1367,7 +1372,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (max_delay) group = &mld->mld_mca; } else { - if (!pskb_may_pull(skb, sizeof(*mld2q))) { + if (!pskb_may_pull(skb, offset + sizeof(*mld2q))) { err = -EINVAL; goto out; } @@ -1734,6 +1739,7 @@ void br_multicast_init(struct net_bridge *br) br->ip6_other_query.delay_time = 0; br->ip6_querier.port = NULL; #endif + br->has_ipv6_addr = 1; spin_lock_init(&br->multicast_lock); setup_timer(&br->multicast_router_timer, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 216018c76018..1001a1b7df9b 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -301,6 +301,7 @@ struct net_bridge u8 multicast_disabled:1; u8 multicast_querier:1; u8 multicast_query_use_ifaddr:1; + u8 has_ipv6_addr:1; u32 hash_elasticity; u32 hash_max; @@ -574,10 +575,22 @@ static inline bool br_multicast_is_router(struct net_bridge *br) static inline bool __br_multicast_querier_exists(struct net_bridge *br, - struct bridge_mcast_other_query *querier) + struct bridge_mcast_other_query *querier, + const bool is_ipv6) { + bool own_querier_enabled; + + if (br->multicast_querier) { + if (is_ipv6 && !br->has_ipv6_addr) + own_querier_enabled = false; + else + own_querier_enabled = true; + } else { + own_querier_enabled = false; + } + return time_is_before_jiffies(querier->delay_time) && - (br->multicast_querier || timer_pending(&querier->timer)); + (own_querier_enabled || timer_pending(&querier->timer)); } static inline bool br_multicast_querier_exists(struct net_bridge *br, @@ -585,10 +598,12 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br, { switch (eth->h_proto) { case (htons(ETH_P_IP)): - return __br_multicast_querier_exists(br, &br->ip4_other_query); + return __br_multicast_querier_exists(br, + &br->ip4_other_query, false); #if IS_ENABLED(CONFIG_IPV6) case (htons(ETH_P_IPV6)): - return __br_multicast_querier_exists(br, &br->ip6_other_query); + return __br_multicast_querier_exists(br, + &br->ip6_other_query, true); #endif default: return false; diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 5f3f64553179..eff69cb270d2 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -567,6 +567,14 @@ int br_set_max_age(struct net_bridge *br, unsigned long val) } +/* Set time interval that dynamic forwarding entries live + * For pure software bridge, allow values outside the 802.1 + * standard specification for special cases: + * 0 - entry never ages (all permanant) + * 1 - entry disappears (no persistance) + * + * Offloaded switch entries maybe more restrictive + */ int br_set_ageing_time(struct net_bridge *br, u32 ageing_time) { struct switchdev_attr attr = { @@ -577,11 +585,8 @@ int br_set_ageing_time(struct net_bridge *br, u32 ageing_time) unsigned long t = clock_t_to_jiffies(ageing_time); int err; - if (t < BR_MIN_AGEING_TIME || t > BR_MAX_AGEING_TIME) - return -ERANGE; - err = switchdev_port_attr_set(br->dev, &attr); - if (err) + if (err && err != -EOPNOTSUPP) return err; br->ageing_time = t; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 9981039ef4ff..63ae5dd24fc5 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -672,6 +672,8 @@ static void reset_connection(struct ceph_connection *con) } con->in_seq = 0; con->in_seq_acked = 0; + + con->out_skip = 0; } /* @@ -771,6 +773,8 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt) static void con_out_kvec_reset(struct ceph_connection *con) { + BUG_ON(con->out_skip); + con->out_kvec_left = 0; con->out_kvec_bytes = 0; con->out_kvec_cur = &con->out_kvec[0]; @@ -779,9 +783,9 @@ static void con_out_kvec_reset(struct ceph_connection *con) static void con_out_kvec_add(struct ceph_connection *con, size_t size, void *data) { - int index; + int index = con->out_kvec_left; - index = con->out_kvec_left; + BUG_ON(con->out_skip); BUG_ON(index >= ARRAY_SIZE(con->out_kvec)); con->out_kvec[index].iov_len = size; @@ -790,6 +794,27 @@ static void con_out_kvec_add(struct ceph_connection *con, con->out_kvec_bytes += size; } +/* + * Chop off a kvec from the end. Return residual number of bytes for + * that kvec, i.e. how many bytes would have been written if the kvec + * hadn't been nuked. + */ +static int con_out_kvec_skip(struct ceph_connection *con) +{ + int off = con->out_kvec_cur - con->out_kvec; + int skip = 0; + + if (con->out_kvec_bytes > 0) { + skip = con->out_kvec[off + con->out_kvec_left - 1].iov_len; + BUG_ON(con->out_kvec_bytes < skip); + BUG_ON(!con->out_kvec_left); + con->out_kvec_bytes -= skip; + con->out_kvec_left--; + } + + return skip; +} + #ifdef CONFIG_BLOCK /* @@ -1175,6 +1200,13 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, return new_piece; } +static size_t sizeof_footer(struct ceph_connection *con) +{ + return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ? + sizeof(struct ceph_msg_footer) : + sizeof(struct ceph_msg_footer_old); +} + static void prepare_message_data(struct ceph_msg *msg, u32 data_len) { BUG_ON(!msg); @@ -1197,7 +1229,6 @@ static void prepare_write_message_footer(struct ceph_connection *con) m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; dout("prepare_write_message_footer %p\n", con); - con->out_kvec_is_msg = true; con->out_kvec[v].iov_base = &m->footer; if (con->peer_features & CEPH_FEATURE_MSG_AUTH) { if (con->ops->sign_message) @@ -1225,7 +1256,6 @@ static void prepare_write_message(struct ceph_connection *con) u32 crc; con_out_kvec_reset(con); - con->out_kvec_is_msg = true; con->out_msg_done = false; /* Sneak an ack in there first? If we can get it into the same @@ -1265,18 +1295,19 @@ static void prepare_write_message(struct ceph_connection *con) /* tag + hdr + front + middle */ con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); - con_out_kvec_add(con, sizeof (m->hdr), &m->hdr); + con_out_kvec_add(con, sizeof(con->out_hdr), &con->out_hdr); con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); if (m->middle) con_out_kvec_add(con, m->middle->vec.iov_len, m->middle->vec.iov_base); - /* fill in crc (except data pages), footer */ + /* fill in hdr crc and finalize hdr */ crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); con->out_msg->hdr.crc = cpu_to_le32(crc); - con->out_msg->footer.flags = 0; + memcpy(&con->out_hdr, &con->out_msg->hdr, sizeof(con->out_hdr)); + /* fill in front and middle crc, footer */ crc = crc32c(0, m->front.iov_base, m->front.iov_len); con->out_msg->footer.front_crc = cpu_to_le32(crc); if (m->middle) { @@ -1288,6 +1319,7 @@ static void prepare_write_message(struct ceph_connection *con) dout("%s front_crc %u middle_crc %u\n", __func__, le32_to_cpu(con->out_msg->footer.front_crc), le32_to_cpu(con->out_msg->footer.middle_crc)); + con->out_msg->footer.flags = 0; /* is there a data payload? */ con->out_msg->footer.data_crc = 0; @@ -1492,7 +1524,6 @@ static int write_partial_kvec(struct ceph_connection *con) } } con->out_kvec_left = 0; - con->out_kvec_is_msg = false; ret = 1; out: dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con, @@ -1584,6 +1615,7 @@ static int write_partial_skip(struct ceph_connection *con) { int ret; + dout("%s %p %d left\n", __func__, con, con->out_skip); while (con->out_skip > 0) { size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE); @@ -2313,9 +2345,9 @@ static int read_partial_message(struct ceph_connection *con) ceph_pr_addr(&con->peer_addr.in_addr), seq, con->in_seq + 1); con->in_base_pos = -front_len - middle_len - data_len - - sizeof(m->footer); + sizeof_footer(con); con->in_tag = CEPH_MSGR_TAG_READY; - return 0; + return 1; } else if ((s64)seq - (s64)con->in_seq > 1) { pr_err("read_partial_message bad seq %lld expected %lld\n", seq, con->in_seq + 1); @@ -2338,10 +2370,10 @@ static int read_partial_message(struct ceph_connection *con) /* skip this message */ dout("alloc_msg said skip message\n"); con->in_base_pos = -front_len - middle_len - data_len - - sizeof(m->footer); + sizeof_footer(con); con->in_tag = CEPH_MSGR_TAG_READY; con->in_seq++; - return 0; + return 1; } BUG_ON(!con->in_msg); @@ -2506,13 +2538,13 @@ more: more_kvec: /* kvec data queued? */ - if (con->out_skip) { - ret = write_partial_skip(con); + if (con->out_kvec_left) { + ret = write_partial_kvec(con); if (ret <= 0) goto out; } - if (con->out_kvec_left) { - ret = write_partial_kvec(con); + if (con->out_skip) { + ret = write_partial_skip(con); if (ret <= 0) goto out; } @@ -3050,16 +3082,31 @@ void ceph_msg_revoke(struct ceph_msg *msg) ceph_msg_put(msg); } if (con->out_msg == msg) { - dout("%s %p msg %p - was sending\n", __func__, con, msg); - con->out_msg = NULL; - if (con->out_kvec_is_msg) { - con->out_skip = con->out_kvec_bytes; - con->out_kvec_is_msg = false; + BUG_ON(con->out_skip); + /* footer */ + if (con->out_msg_done) { + con->out_skip += con_out_kvec_skip(con); + } else { + BUG_ON(!msg->data_length); + if (con->peer_features & CEPH_FEATURE_MSG_AUTH) + con->out_skip += sizeof(msg->footer); + else + con->out_skip += sizeof(msg->old_footer); } + /* data, middle, front */ + if (msg->data_length) + con->out_skip += msg->cursor.total_resid; + if (msg->middle) + con->out_skip += con_out_kvec_skip(con); + con->out_skip += con_out_kvec_skip(con); + + dout("%s %p msg %p - was sending, will write %d skip %d\n", + __func__, con, msg, con->out_kvec_bytes, con->out_skip); msg->hdr.seq = 0; - + con->out_msg = NULL; ceph_msg_put(msg); } + mutex_unlock(&con->mutex); } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index f8f235930d88..a28e47ff1b1b 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2843,8 +2843,8 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, mutex_lock(&osdc->request_mutex); req = __lookup_request(osdc, tid); if (!req) { - pr_warn("%s osd%d tid %llu unknown, skipping\n", - __func__, osd->o_osd, tid); + dout("%s osd%d tid %llu unknown, skipping\n", __func__, + osd->o_osd, tid); m = NULL; *skip = 1; goto out; diff --git a/net/core/dev.c b/net/core/dev.c index ae00b894e675..9efbdb3ff78a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2542,6 +2542,8 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) * * It may return NULL if the skb requires no segmentation. This is * only possible when GSO is used for verifying header integrity. + * + * Segmentation preserves SKB_SGO_CB_OFFSET bytes of previous skb cb. */ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path) @@ -2556,6 +2558,9 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, return ERR_PTR(err); } + BUILD_BUG_ON(SKB_SGO_CB_OFFSET + + sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb)); + SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); SKB_GSO_CB(skb)->encap_level = 0; @@ -4140,6 +4145,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; diffs |= p->vlan_tci ^ skb->vlan_tci; + diffs |= skb_metadata_dst_cmp(p, skb); if (maclen == ETH_HLEN) diffs |= compare_ether_header(skb_mac_header(p), skb_mac_header(skb)); @@ -4337,10 +4343,12 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) break; case GRO_MERGED_FREE: - if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) + if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) { + skb_dst_drop(skb); kmem_cache_free(skbuff_head_cache, skb); - else + } else { __kfree_skb(skb); + } break; case GRO_HELD: @@ -7120,8 +7128,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); - if (!dev->tx_queue_len) + if (!dev->tx_queue_len) { dev->priv_flags |= IFF_NO_QUEUE; + dev->tx_queue_len = 1; + } dev->num_tx_queues = txqs; dev->real_num_tx_queues = txqs; diff --git a/net/core/filter.c b/net/core/filter.c index 672eefbfbe99..75e9b2b2336d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -777,6 +777,11 @@ static int bpf_check_classic(const struct sock_filter *filter, if (ftest->k == 0) return -EINVAL; break; + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_K: + if (ftest->k >= 32) + return -EINVAL; + break; case BPF_LD | BPF_MEM: case BPF_LDX | BPF_MEM: case BPF_ST: @@ -1134,7 +1139,8 @@ void bpf_prog_destroy(struct bpf_prog *fp) } EXPORT_SYMBOL_GPL(bpf_prog_destroy); -static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) +static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk, + bool locked) { struct sk_filter *fp, *old_fp; @@ -1150,10 +1156,8 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) return -ENOMEM; } - old_fp = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); + old_fp = rcu_dereference_protected(sk->sk_filter, locked); rcu_assign_pointer(sk->sk_filter, fp); - if (old_fp) sk_filter_uncharge(sk, old_fp); @@ -1170,7 +1174,8 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) * occurs or there is insufficient memory for the filter a negative * errno code is returned. On success the return is zero. */ -int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) +int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, + bool locked) { unsigned int fsize = bpf_classic_proglen(fprog); unsigned int bpf_fsize = bpf_prog_size(fprog->len); @@ -1208,7 +1213,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (IS_ERR(prog)) return PTR_ERR(prog); - err = __sk_attach_prog(prog, sk); + err = __sk_attach_prog(prog, sk, locked); if (err < 0) { __bpf_prog_release(prog); return err; @@ -1216,7 +1221,12 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) return 0; } -EXPORT_SYMBOL_GPL(sk_attach_filter); +EXPORT_SYMBOL_GPL(__sk_attach_filter); + +int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) +{ + return __sk_attach_filter(fprog, sk, sock_owned_by_user(sk)); +} int sk_attach_bpf(u32 ufd, struct sock *sk) { @@ -1235,7 +1245,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) return -EINVAL; } - err = __sk_attach_prog(prog, sk); + err = __sk_attach_prog(prog, sk, sock_owned_by_user(sk)); if (err < 0) { bpf_prog_put(prog); return err; @@ -1265,9 +1275,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) */ if (unlikely((u32) offset > 0xffff || len > sizeof(buf))) return -EFAULT; - - if (unlikely(skb_cloned(skb) && - !skb_clone_writable(skb, offset + len))) + if (unlikely(skb_try_make_writable(skb, offset + len))) return -EFAULT; ptr = skb_header_pointer(skb, offset, len, buf); @@ -1311,8 +1319,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) if (unlikely((u32) offset > 0xffff)) return -EFAULT; - if (unlikely(skb_cloned(skb) && - !skb_clone_writable(skb, offset + sizeof(sum)))) + if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum)))) return -EFAULT; ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); @@ -1357,9 +1364,7 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) if (unlikely((u32) offset > 0xffff)) return -EFAULT; - - if (unlikely(skb_cloned(skb) && - !skb_clone_writable(skb, offset + sizeof(sum)))) + if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum)))) return -EFAULT; ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); @@ -1544,6 +1549,13 @@ bool bpf_helper_changes_skb_data(void *func) return true; if (func == bpf_skb_vlan_pop) return true; + if (func == bpf_skb_store_bytes) + return true; + if (func == bpf_l3_csum_replace) + return true; + if (func == bpf_l4_csum_replace) + return true; + return false; } @@ -1908,7 +1920,7 @@ static int __init register_sk_filter_ops(void) } late_initcall(register_sk_filter_ops); -int sk_detach_filter(struct sock *sk) +int __sk_detach_filter(struct sock *sk, bool locked) { int ret = -ENOENT; struct sk_filter *filter; @@ -1916,8 +1928,7 @@ int sk_detach_filter(struct sock *sk) if (sock_flag(sk, SOCK_FILTER_LOCKED)) return -EPERM; - filter = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); + filter = rcu_dereference_protected(sk->sk_filter, locked); if (filter) { RCU_INIT_POINTER(sk->sk_filter, NULL); sk_filter_uncharge(sk, filter); @@ -1926,7 +1937,12 @@ int sk_detach_filter(struct sock *sk) return ret; } -EXPORT_SYMBOL_GPL(sk_detach_filter); +EXPORT_SYMBOL_GPL(__sk_detach_filter); + +int sk_detach_filter(struct sock *sk) +{ + return __sk_detach_filter(sk, sock_owned_by_user(sk)); +} int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d79699c9d1b9..4ab6ead3d8ee 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -208,7 +208,6 @@ ip: case htons(ETH_P_IPV6): { const struct ipv6hdr *iph; struct ipv6hdr _iph; - __be32 flow_label; ipv6: iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); @@ -230,8 +229,12 @@ ipv6: key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } - flow_label = ip6_flowlabel(iph); - if (flow_label) { + if ((dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_FLOW_LABEL) || + (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)) && + ip6_flowlabel(iph)) { + __be32 flow_label = ip6_flowlabel(iph); + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_FLOW_LABEL)) { key_tags = skb_flow_dissector_target(flow_dissector, @@ -396,6 +399,13 @@ ip_proto_again: goto out_bad; proto = eth->h_proto; nhoff += sizeof(*eth); + + /* Cap headers that we access via pointers at the + * end of the Ethernet header as our maximum alignment + * at that point is only 2 bytes. + */ + if (NET_IP_ALIGN) + hlen = nhoff; } key_control->flags |= FLOW_DIS_ENCAPSULATION; @@ -652,6 +662,23 @@ void make_flow_keys_digest(struct flow_keys_digest *digest, } EXPORT_SYMBOL(make_flow_keys_digest); +static struct flow_dissector flow_keys_dissector_symmetric __read_mostly; + +u32 __skb_get_hash_symmetric(struct sk_buff *skb) +{ + struct flow_keys keys; + + __flow_hash_secret_init(); + + memset(&keys, 0, sizeof(keys)); + __skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, + NULL, 0, 0, 0, + FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); + + return __flow_hash_from_keys(&keys, hashrnd); +} +EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); + /** * __skb_get_hash: calculate a flow hash * @skb: sk_buff to calculate flow hash from @@ -864,6 +891,29 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { }, }; +static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_CONTROL, + .offset = offsetof(struct flow_keys, control), + }, + { + .key_id = FLOW_DISSECTOR_KEY_BASIC, + .offset = offsetof(struct flow_keys, basic), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, + .offset = offsetof(struct flow_keys, addrs.v4addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, + .offset = offsetof(struct flow_keys, addrs.v6addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_PORTS, + .offset = offsetof(struct flow_keys, ports), + }, +}; + static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = { { .key_id = FLOW_DISSECTOR_KEY_CONTROL, @@ -885,6 +935,9 @@ static int __init init_default_flow_dissectors(void) skb_flow_dissector_init(&flow_keys_dissector, flow_keys_dissector_keys, ARRAY_SIZE(flow_keys_dissector_keys)); + skb_flow_dissector_init(&flow_keys_dissector_symmetric, + flow_keys_dissector_symmetric_keys, + ARRAY_SIZE(flow_keys_dissector_symmetric_keys)); skb_flow_dissector_init(&flow_keys_buf_dissector, flow_keys_buf_dissector_keys, ARRAY_SIZE(flow_keys_buf_dissector_keys)); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f18ae91b652e..769cece9b00b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2467,13 +2467,17 @@ int neigh_xmit(int index, struct net_device *dev, tbl = neigh_tables[index]; if (!tbl) goto out; + rcu_read_lock_bh(); neigh = __neigh_lookup_noref(tbl, addr, dev); if (!neigh) neigh = __neigh_create(tbl, addr, dev, false); err = PTR_ERR(neigh); - if (IS_ERR(neigh)) + if (IS_ERR(neigh)) { + rcu_read_unlock_bh(); goto out_kfree_skb; + } err = neigh->output(neigh, skb); + rcu_read_unlock_bh(); } else if (index == NEIGH_LINK_TABLE) { err = dev_hard_header(skb, dev, ntohs(skb->protocol), diff --git a/net/core/pktgen.c b/net/core/pktgen.c index de8d5cc5eb24..4da4d51a2ccf 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2787,7 +2787,9 @@ static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, } else { skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT); } - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + + if (likely(skb)) + skb_reserve(skb, LL_RESERVED_SPACE(dev)); return skb; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9c6d15756e7a..87b91ffbdec3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -905,6 +905,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ + + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ } diff --git a/net/core/scm.c b/net/core/scm.c index 8a1741b14302..dce0acb929f1 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -87,6 +87,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) *fplp = fpl; fpl->count = 0; fpl->max = SCM_MAX_FD; + fpl->user = NULL; } fpp = &fpl->fp[fpl->count]; @@ -107,6 +108,10 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) *fpp++ = file; fpl->count++; } + + if (!fpl->user) + fpl->user = get_uid(current_user()); + return num; } @@ -119,6 +124,7 @@ void __scm_destroy(struct scm_cookie *scm) scm->fp = NULL; for (i=fpl->count-1; i>=0; i--) fput(fpl->fp[i]); + free_uid(fpl->user); kfree(fpl); } } @@ -336,6 +342,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) for (i = 0; i < fpl->count; i++) get_file(fpl->fp[i]); new_fpl->max = new_fpl->count; + new_fpl->user = get_uid(fpl->user); } return new_fpl; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b2df375ec9c2..4968b5ddea69 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -79,6 +79,8 @@ struct kmem_cache *skbuff_head_cache __read_mostly; static struct kmem_cache *skbuff_fclone_cache __read_mostly; +int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; +EXPORT_SYMBOL(sysctl_max_skb_frags); /** * skb_panic - private function for out-of-line support @@ -4082,9 +4084,9 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, if (!pskb_may_pull(skb_chk, offset)) goto err; - __skb_pull(skb_chk, offset); + skb_pull_rcsum(skb_chk, offset); ret = skb_chkf(skb_chk); - __skb_push(skb_chk, offset); + skb_push_rcsum(skb_chk, offset); if (ret) goto err; @@ -4407,15 +4409,16 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) __skb_push(skb, offset); err = __vlan_insert_tag(skb, skb->vlan_proto, skb_vlan_tag_get(skb)); - if (err) + if (err) { + __skb_pull(skb, offset); return err; + } + skb->protocol = skb->vlan_proto; skb->mac_len += VLAN_HLEN; - __skb_pull(skb, offset); - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(skb->data - + (2 * ETH_ALEN), VLAN_HLEN, 0)); + skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); + __skb_pull(skb, offset); } __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); return 0; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 95b6139d710c..a6beb7b6ae55 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -26,6 +26,7 @@ static int zero = 0; static int one = 1; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; +static int max_skb_frags = MAX_SKB_FRAGS; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -392,6 +393,15 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "max_skb_frags", + .data = &sysctl_max_skb_frags, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &max_skb_frags, + }, { } }; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 5684e14932bd..8be8f27bfacc 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -204,8 +204,6 @@ void dccp_req_err(struct sock *sk, u64 seq) * ICMPs are not backlogged, hence we cannot get an established * socket here. */ - WARN_ON(req->sk); - if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); } else { @@ -824,26 +822,26 @@ lookup: if (sk->sk_state == DCCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); - struct sock *nsk = NULL; + struct sock *nsk; sk = req->rsk_listener; - if (likely(sk->sk_state == DCCP_LISTEN)) { - nsk = dccp_check_req(sk, skb, req); - } else { + if (unlikely(sk->sk_state != DCCP_LISTEN)) { inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } + sock_hold(sk); + nsk = dccp_check_req(sk, skb, req); if (!nsk) { reqsk_put(req); - goto discard_it; + goto discard_and_relse; } if (nsk == sk) { - sock_hold(sk); reqsk_put(req); } else if (dccp_child_process(sk, nsk, skb)) { dccp_v4_ctl_send_reset(sk, skb); - goto discard_it; + goto discard_and_relse; } else { + sock_put(sk); return 0; } } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9c6d0508e63a..b8608b71a66d 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -691,26 +691,26 @@ lookup: if (sk->sk_state == DCCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); - struct sock *nsk = NULL; + struct sock *nsk; sk = req->rsk_listener; - if (likely(sk->sk_state == DCCP_LISTEN)) { - nsk = dccp_check_req(sk, skb, req); - } else { + if (unlikely(sk->sk_state != DCCP_LISTEN)) { inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } + sock_hold(sk); + nsk = dccp_check_req(sk, skb, req); if (!nsk) { reqsk_put(req); - goto discard_it; + goto discard_and_relse; } if (nsk == sk) { - sock_hold(sk); reqsk_put(req); } else if (dccp_child_process(sk, nsk, skb)) { dccp_v6_ctl_send_reset(sk, skb); - goto discard_it; + goto discard_and_relse; } else { + sock_put(sk); return 0; } } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 607a14f20d88..b1dc096d22f8 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1034,10 +1034,13 @@ source_ok: if (!fld.daddr) { fld.daddr = fld.saddr; - err = -EADDRNOTAVAIL; if (dev_out) dev_put(dev_out); + err = -EINVAL; dev_out = init_net.loopback_dev; + if (!dev_out->dn_ptr) + goto out; + err = -EADDRNOTAVAIL; dev_hold(dev_out); if (!fld.daddr) { fld.daddr = @@ -1110,6 +1113,8 @@ source_ok: if (dev_out == NULL) goto out; dn_db = rcu_dereference_raw(dev_out->dn_ptr); + if (!dn_db) + goto e_inval; /* Possible improvement - check all devices for local addr */ if (dn_dev_islocal(dev_out, fld.daddr)) { dev_put(dev_out); @@ -1151,6 +1156,8 @@ select_source: dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); + if (!dev_out->dn_ptr) + goto e_inval; fld.flowidn_oif = dev_out->ifindex; if (res.fi) dn_fib_info_put(res.fi); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index cebd9d31e65a..0212591b0077 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -334,6 +334,9 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, ASSERT_RTNL(); + if (in_dev->dead) + goto no_promotions; + /* 1. Deleting primary ifaddr forces deletion all secondaries * unless alias promotion is set **/ @@ -380,6 +383,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, fib_del_ifaddr(ifa, ifa1); } +no_promotions: /* 2. Unlink it */ *ifap = ifa1->ifa_next; @@ -1847,7 +1851,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb, if (err < 0) goto errout; - err = EINVAL; + err = -EINVAL; if (!tb[NETCONFA_IFINDEX]) goto errout; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 477937465a20..d95631d09248 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -23,6 +23,11 @@ struct esp_skb_cb { void *tmp; }; +struct esp_output_extra { + __be32 seqhi; + u32 esphoff; +}; + #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); @@ -35,11 +40,11 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); * * TODO: Use spare space in skb for this where possible. */ -static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen) +static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int extralen) { unsigned int len; - len = seqhilen; + len = extralen; len += crypto_aead_ivsize(aead); @@ -57,15 +62,16 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen) return kmalloc(len, GFP_ATOMIC); } -static inline __be32 *esp_tmp_seqhi(void *tmp) +static inline void *esp_tmp_extra(void *tmp) { - return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32)); + return PTR_ALIGN(tmp, __alignof__(struct esp_output_extra)); } -static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) + +static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int extralen) { return crypto_aead_ivsize(aead) ? - PTR_ALIGN((u8 *)tmp + seqhilen, - crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; + PTR_ALIGN((u8 *)tmp + extralen, + crypto_aead_alignmask(aead) + 1) : tmp + extralen; } static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv) @@ -99,7 +105,7 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) { struct ip_esp_hdr *esph = (void *)(skb->data + offset); void *tmp = ESP_SKB_CB(skb)->tmp; - __be32 *seqhi = esp_tmp_seqhi(tmp); + __be32 *seqhi = esp_tmp_extra(tmp); esph->seq_no = esph->spi; esph->spi = *seqhi; @@ -107,7 +113,11 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) static void esp_output_restore_header(struct sk_buff *skb) { - esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32)); + void *tmp = ESP_SKB_CB(skb)->tmp; + struct esp_output_extra *extra = esp_tmp_extra(tmp); + + esp_restore_header(skb, skb_transport_offset(skb) + extra->esphoff - + sizeof(__be32)); } static void esp_output_done_esn(struct crypto_async_request *base, int err) @@ -121,6 +131,7 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) static int esp_output(struct xfrm_state *x, struct sk_buff *skb) { int err; + struct esp_output_extra *extra; struct ip_esp_hdr *esph; struct crypto_aead *aead; struct aead_request *req; @@ -137,8 +148,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int tfclen; int nfrags; int assoclen; - int seqhilen; - __be32 *seqhi; + int extralen; __be64 seqno; /* skb is pure payload to encrypt */ @@ -166,21 +176,21 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) nfrags = err; assoclen = sizeof(*esph); - seqhilen = 0; + extralen = 0; if (x->props.flags & XFRM_STATE_ESN) { - seqhilen += sizeof(__be32); - assoclen += seqhilen; + extralen += sizeof(*extra); + assoclen += sizeof(__be32); } - tmp = esp_alloc_tmp(aead, nfrags, seqhilen); + tmp = esp_alloc_tmp(aead, nfrags, extralen); if (!tmp) { err = -ENOMEM; goto error; } - seqhi = esp_tmp_seqhi(tmp); - iv = esp_tmp_iv(aead, tmp, seqhilen); + extra = esp_tmp_extra(tmp); + iv = esp_tmp_iv(aead, tmp, extralen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); @@ -247,8 +257,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) * encryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { - esph = (void *)(skb_transport_header(skb) - sizeof(__be32)); - *seqhi = esph->spi; + extra->esphoff = (unsigned char *)esph - + skb_transport_header(skb); + esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4); + extra->seqhi = esph->spi; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); aead_request_set_callback(req, 0, esp_output_done_esn, skb); } @@ -445,7 +457,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) goto out; ESP_SKB_CB(skb)->tmp = tmp; - seqhi = esp_tmp_seqhi(tmp); + seqhi = esp_tmp_extra(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index e10edb5e78b0..98c754e61024 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -280,7 +280,6 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) struct in_device *in_dev; struct fib_result res; struct rtable *rt; - struct flowi4 fl4; struct net *net; int scope; @@ -296,14 +295,13 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) scope = RT_SCOPE_UNIVERSE; if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { - fl4.flowi4_oif = 0; - fl4.flowi4_iif = LOOPBACK_IFINDEX; - fl4.daddr = ip_hdr(skb)->saddr; - fl4.saddr = 0; - fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); - fl4.flowi4_scope = scope; - fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; - fl4.flowi4_tun_key.tun_id = 0; + struct flowi4 fl4 = { + .flowi4_iif = LOOPBACK_IFINDEX, + .daddr = ip_hdr(skb)->saddr, + .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), + .flowi4_scope = scope, + .flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0, + }; if (!fib_lookup(net, &fl4, &res, 0)) return FIB_RES_PREFSRC(net, res); } else { @@ -907,7 +905,11 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) if (ifa->ifa_flags & IFA_F_SECONDARY) { prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); if (!prim) { - pr_warn("%s: bug: prim == NULL\n", __func__); + /* if the device has been deleted, we don't perform + * address promotion + */ + if (!in_dev->dead) + pr_warn("%s: bug: prim == NULL\n", __func__); return; } if (iprim && iprim != prim) { @@ -923,6 +925,9 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) subnet = 1; } + if (in_dev->dead) + goto no_promotions; + /* Deletion is more complicated than add. * We should take care of not to delete too much :-) * @@ -998,6 +1003,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) } } +no_promotions: if (!(ok & BRD_OK)) fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); if (subnet && ifa->ifa_prefixlen < 31) { diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index d97268e8ff10..2b68418c7198 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -975,6 +975,8 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) val = 65535 - 40; if (type == RTAX_MTU && val > 65535 - 15) val = 65535 - 15; + if (type == RTAX_HOPLIMIT && val > 255) + val = 255; if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) return -EINVAL; fi->fib_metrics[type - 1] = val; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 05e4cba14162..b3086cf27027 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -356,9 +356,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) skb_dst_set(skb, &rt->dst); skb->dev = dev; - skb->reserved_tailroom = skb_end_offset(skb) - - min(mtu, skb_end_offset(skb)); skb_reserve(skb, hlen); + skb_tailroom_reserve(skb, mtu, tlen); skb_reset_network_header(skb); pip = ip_hdr(skb); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 728414dcea3b..030cd09dd2a2 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -789,14 +789,16 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req, reqsk_put(req); } -void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, - struct sock *child) +struct sock *inet_csk_reqsk_queue_add(struct sock *sk, + struct request_sock *req, + struct sock *child) { struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; spin_lock(&queue->rskq_lock); if (unlikely(sk->sk_state != TCP_LISTEN)) { inet_child_forget(sk, req, child); + child = NULL; } else { req->sk = child; req->dl_next = NULL; @@ -808,6 +810,7 @@ void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, sk_acceptq_added(sk); } spin_unlock(&queue->rskq_lock); + return child; } EXPORT_SYMBOL(inet_csk_reqsk_queue_add); @@ -817,11 +820,8 @@ struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, if (own_req) { inet_csk_reqsk_queue_drop(sk, req); reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); - inet_csk_reqsk_queue_add(sk, req, child); - /* Warning: caller must not call reqsk_put(req); - * child stole last reference on it. - */ - return child; + if (inet_csk_reqsk_queue_add(sk, req, child)) + return child; } /* Too bad, another child took ownership of the request, undo. */ bh_unlock_sock(child); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1fe55ae81781..b8a0607dab96 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -661,6 +661,7 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) struct ipq *qp; IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); + skb_orphan(skb); /* Lookup (or create) queue header */ qp = ip_find(net, ip_hdr(skb), user, vif); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 614521437e30..3e4184088082 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -180,6 +180,7 @@ static __be16 tnl_flags_to_gre_flags(__be16 tflags) return flags; } +/* Fills in tpi and returns header length to be pulled. */ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err) { @@ -239,7 +240,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, return -EINVAL; } } - return iptunnel_pull_header(skb, hdr_len, tpi->proto); + return hdr_len; } static void ipgre_err(struct sk_buff *skb, u32 info, @@ -342,7 +343,7 @@ static void gre_err(struct sk_buff *skb, u32 info) struct tnl_ptk_info tpi; bool csum_err = false; - if (parse_gre_header(skb, &tpi, &csum_err)) { + if (parse_gre_header(skb, &tpi, &csum_err) < 0) { if (!csum_err) /* ignore csum errors. */ return; } @@ -420,6 +421,7 @@ static int gre_rcv(struct sk_buff *skb) { struct tnl_ptk_info tpi; bool csum_err = false; + int hdr_len; #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { @@ -429,7 +431,10 @@ static int gre_rcv(struct sk_buff *skb) } #endif - if (parse_gre_header(skb, &tpi, &csum_err) < 0) + hdr_len = parse_gre_header(skb, &tpi, &csum_err); + if (hdr_len < 0) + goto drop; + if (iptunnel_pull_header(skb, hdr_len, tpi.proto) < 0) goto drop; if (ipgre_rcv(skb, &tpi) == PACKET_RCVD) @@ -1242,6 +1247,14 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, err = ipgre_newlink(net, dev, tb, NULL); if (err < 0) goto out; + + /* openvswitch users expect packet sizes to be unrestricted, + * so set the largest MTU we can. + */ + err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false); + if (err) + goto out; + return dev; out: free_netdev(dev); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 33bef2763c72..dbf7f7ee2958 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -240,6 +240,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk, * from host network stack. */ features = netif_skb_features(skb); + BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET); segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); if (IS_ERR_OR_NULL(segs)) { kfree_skb(skb); @@ -921,7 +922,7 @@ static int __ip_append_data(struct sock *sk, if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && - (sk->sk_type == SOCK_DGRAM)) { + (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, maxfraglen, flags); @@ -1236,13 +1237,16 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, if (!skb) return -EINVAL; - cork->length += size; if ((size + skb->len > mtu) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO)) { + if (skb->ip_summed != CHECKSUM_PARTIAL) + return -EOPNOTSUPP; + skb_shinfo(skb)->gso_size = mtu - fragheaderlen; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; } + cork->length += size; while (size > 0) { if (skb_is_gso(skb)) { diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 5f73a7c03e27..a50124260f5a 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -249,6 +249,8 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc, switch (cmsg->cmsg_type) { case IP_RETOPTS: err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); + + /* Our caller is responsible for freeing ipc->opt */ err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40); if (err) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index cbb51f3fac06..3310ac75e3f3 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -663,6 +663,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, inner_iph = (const struct iphdr *)skb_inner_network_header(skb); connected = (tunnel->parms.iph.daddr != 0); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + dst = tnl_params->daddr; if (dst == 0) { /* NBMA tunnel */ @@ -760,7 +762,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { tunnel->err_count--; - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); dst_link_failure(skb); } else tunnel->err_count = 0; @@ -947,17 +948,31 @@ done: } EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); -int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) +int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) { struct ip_tunnel *tunnel = netdev_priv(dev); int t_hlen = tunnel->hlen + sizeof(struct iphdr); + int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; - if (new_mtu < 68 || - new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) + if (new_mtu < 68) return -EINVAL; + + if (new_mtu > max_mtu) { + if (strict) + return -EINVAL; + + new_mtu = max_mtu; + } + dev->mtu = new_mtu; return 0; } +EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu); + +int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) +{ + return __ip_tunnel_change_mtu(dev, new_mtu, true); +} EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); static void ip_tunnel_dev_free(struct net_device *dev) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c3a38353f5dc..9d1e555496e3 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -882,8 +882,10 @@ static struct mfc_cache *ipmr_cache_alloc(void) { struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); - if (c) + if (c) { + c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->mfc_un.res.minvif = MAXVIFS; + } return c; } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 36a30fab8625..6e3e0e8b1ce3 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -367,6 +367,18 @@ static inline bool unconditional(const struct arpt_entry *e) memcmp(&e->arp, &uncond, sizeof(uncond)) == 0; } +static bool find_jump_target(const struct xt_table_info *t, + const struct arpt_entry *target) +{ + struct arpt_entry *iter; + + xt_entry_foreach(iter, t->entries, t->size) { + if (iter == target) + return true; + } + return false; +} + /* Figures out from what hook each rule can be called: returns 0 if * there are loops. Puts hook bitmask in comefrom. */ @@ -439,6 +451,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo, size = e->next_offset; e = (struct arpt_entry *) (entry0 + pos + size); + if (pos + size >= newinfo->size) + return 0; e->counters.pcnt = pos; pos += size; } else { @@ -458,9 +472,15 @@ static int mark_source_chains(const struct xt_table_info *newinfo, /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); + e = (struct arpt_entry *) + (entry0 + newpos); + if (!find_jump_target(newinfo, e)) + return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; + if (newpos >= newinfo->size) + return 0; } e = (struct arpt_entry *) (entry0 + newpos); @@ -474,23 +494,6 @@ next: return 1; } -static inline int check_entry(const struct arpt_entry *e) -{ - const struct xt_entry_target *t; - - if (!arp_checkentry(&e->arp)) - return -EINVAL; - - if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) - return -EINVAL; - - t = arpt_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; -} - static inline int check_target(struct arpt_entry *e, const char *name) { struct xt_entry_target *t = arpt_get_target(e); @@ -586,7 +589,11 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, return -EINVAL; } - err = check_entry(e); + if (!arp_checkentry(&e->arp)) + return -EINVAL; + + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (err) return err; @@ -691,10 +698,8 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, } } - if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) { - duprintf("Looping hook\n"); + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) return -ELOOP; - } /* Finally, each sanity check must pass */ i = 0; @@ -1125,55 +1130,17 @@ static int do_add_counters(struct net *net, const void __user *user, unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - const char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct arpt_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; - -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } - - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; - - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } - - t = xt_find_table_lock(net, NFPROTO_ARP, name); + t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1181,7 +1148,7 @@ static int do_add_counters(struct net *net, const void __user *user, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } @@ -1208,6 +1175,18 @@ static int do_add_counters(struct net *net, const void __user *user, } #ifdef CONFIG_COMPAT +struct compat_arpt_replace { + char name[XT_TABLE_MAXNAMELEN]; + u32 valid_hooks; + u32 num_entries; + u32 size; + u32 hook_entry[NF_ARP_NUMHOOKS]; + u32 underflow[NF_ARP_NUMHOOKS]; + u32 num_counters; + compat_uptr_t counters; + struct compat_arpt_entry entries[0]; +}; + static inline void compat_release_entry(struct compat_arpt_entry *e) { struct xt_entry_target *t; @@ -1216,20 +1195,17 @@ static inline void compat_release_entry(struct compat_arpt_entry *e) module_put(t->u.kernel.target->me); } -static inline int +static int check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, - const unsigned char *limit, - const unsigned int *hook_entries, - const unsigned int *underflows, - const char *name) + const unsigned char *limit) { struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; - int ret, off, h; + int ret, off; duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || @@ -1246,8 +1222,11 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, return -EINVAL; } - /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct arpt_entry *)e); + if (!arp_checkentry(&e->arp)) + return -EINVAL; + + ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (ret) return ret; @@ -1271,17 +1250,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, if (ret) goto release_target; - /* Check hooks & underflows */ - for (h = 0; h < NF_ARP_NUMHOOKS; h++) { - if ((unsigned char *)e - base == hook_entries[h]) - newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) - newinfo->underflow[h] = underflows[h]; - } - - /* Clear counters and comefrom */ - memset(&e->counters, 0, sizeof(e->counters)); - e->comefrom = 0; return 0; release_target: @@ -1290,18 +1258,17 @@ out: return ret; } -static int +static void compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, - unsigned int *size, const char *name, + unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; struct xt_target *target; struct arpt_entry *de; unsigned int origsize; - int ret, h; + int h; - ret = 0; origsize = *size; de = (struct arpt_entry *)*dstptr; memcpy(de, e, sizeof(struct arpt_entry)); @@ -1322,148 +1289,82 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; } -static int translate_compat_table(const char *name, - unsigned int valid_hooks, - struct xt_table_info **pinfo, +static int translate_compat_table(struct xt_table_info **pinfo, void **pentry0, - unsigned int total_size, - unsigned int number, - unsigned int *hook_entries, - unsigned int *underflows) + const struct compat_arpt_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_arpt_entry *iter0; - struct arpt_entry *iter1; + struct arpt_replace repl; unsigned int size; int ret = 0; info = *pinfo; entry0 = *pentry0; - size = total_size; - info->number = number; - - /* Init all hooks to impossible value. */ - for (i = 0; i < NF_ARP_NUMHOOKS; i++) { - info->hook_entry[i] = 0xFFFFFFFF; - info->underflow[i] = 0xFFFFFFFF; - } + size = compatr->size; + info->number = compatr->num_entries; duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(NFPROTO_ARP); - xt_compat_init_offsets(NFPROTO_ARP, number); + xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries); /* Walk through entries, checking offsets. */ - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + total_size, - hook_entries, - underflows, - name); + entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; - if (j != number) { + if (j != compatr->num_entries) { duprintf("translate_compat_table: %u not %u entries\n", - j, number); + j, compatr->num_entries); goto out_unlock; } - /* Check hooks all assigned */ - for (i = 0; i < NF_ARP_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) - continue; - if (info->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); - goto out_unlock; - } - if (info->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, underflows[i]); - goto out_unlock; - } - } - ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) goto out_unlock; - newinfo->number = number; + newinfo->number = compatr->num_entries; for (i = 0; i < NF_ARP_NUMHOOKS; i++) { newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } entry1 = newinfo->entries; pos = entry1; - size = total_size; - xt_entry_foreach(iter0, entry0, total_size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - name, newinfo, entry1); - if (ret != 0) - break; - } + size = compatr->size; + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); + + /* all module references in entry0 are now gone */ + xt_compat_flush_offsets(NFPROTO_ARP); xt_compat_unlock(NFPROTO_ARP); - if (ret) - goto free_newinfo; - ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry1)) - goto free_newinfo; + memcpy(&repl, compatr, sizeof(*compatr)); - i = 0; - xt_entry_foreach(iter1, entry1, newinfo->size) { - iter1->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(iter1->counters.pcnt)) { - ret = -ENOMEM; - break; - } - - ret = check_target(iter1, name); - if (ret != 0) { - xt_percpu_counter_free(iter1->counters.pcnt); - break; - } - ++i; - if (strcmp(arpt_get_target(iter1)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; - } - if (ret) { - /* - * The first i matches need cleanup_entry (calls ->destroy) - * because they had called ->check already. The other j-i - * entries need only release. - */ - int skip = i; - j -= i; - xt_entry_foreach(iter0, entry0, newinfo->size) { - if (skip-- > 0) - continue; - if (j-- == 0) - break; - compat_release_entry(iter0); - } - xt_entry_foreach(iter1, entry1, newinfo->size) { - if (i-- == 0) - break; - cleanup_entry(iter1); - } - xt_free_table_info(newinfo); - return ret; + for (i = 0; i < NF_ARP_NUMHOOKS; i++) { + repl.hook_entry[i] = newinfo->hook_entry[i]; + repl.underflow[i] = newinfo->underflow[i]; } + repl.num_counters = 0; + repl.counters = NULL; + repl.size = newinfo->size; + ret = translate_table(newinfo, entry1, &repl); + if (ret) + goto free_newinfo; + *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1471,31 +1372,18 @@ static int translate_compat_table(const char *name, free_newinfo: xt_free_table_info(newinfo); -out: - xt_entry_foreach(iter0, entry0, total_size) { + return ret; +out_unlock: + xt_compat_flush_offsets(NFPROTO_ARP); + xt_compat_unlock(NFPROTO_ARP); + xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; -out_unlock: - xt_compat_flush_offsets(NFPROTO_ARP); - xt_compat_unlock(NFPROTO_ARP); - goto out; } -struct compat_arpt_replace { - char name[XT_TABLE_MAXNAMELEN]; - u32 valid_hooks; - u32 num_entries; - u32 size; - u32 hook_entry[NF_ARP_NUMHOOKS]; - u32 underflow[NF_ARP_NUMHOOKS]; - u32 num_counters; - compat_uptr_t counters; - struct compat_arpt_entry entries[0]; -}; - static int compat_do_replace(struct net *net, void __user *user, unsigned int len) { @@ -1528,10 +1416,7 @@ static int compat_do_replace(struct net *net, void __user *user, goto free_newinfo; } - ret = translate_compat_table(tmp.name, tmp.valid_hooks, - &newinfo, &loc_cpu_entry, tmp.size, - tmp.num_entries, tmp.hook_entry, - tmp.underflow); + ret = translate_compat_table(&newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 99d46b0a4ead..a399c5419622 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -443,6 +443,18 @@ ipt_do_table(struct sk_buff *skb, #endif } +static bool find_jump_target(const struct xt_table_info *t, + const struct ipt_entry *target) +{ + struct ipt_entry *iter; + + xt_entry_foreach(iter, t->entries, t->size) { + if (iter == target) + return true; + } + return false; +} + /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int @@ -520,6 +532,8 @@ mark_source_chains(const struct xt_table_info *newinfo, size = e->next_offset; e = (struct ipt_entry *) (entry0 + pos + size); + if (pos + size >= newinfo->size) + return 0; e->counters.pcnt = pos; pos += size; } else { @@ -538,9 +552,15 @@ mark_source_chains(const struct xt_table_info *newinfo, /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); + e = (struct ipt_entry *) + (entry0 + newpos); + if (!find_jump_target(newinfo, e)) + return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; + if (newpos >= newinfo->size) + return 0; } e = (struct ipt_entry *) (entry0 + newpos); @@ -568,25 +588,6 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) } static int -check_entry(const struct ipt_entry *e) -{ - const struct xt_entry_target *t; - - if (!ip_checkentry(&e->ip)) - return -EINVAL; - - if (e->target_offset + sizeof(struct xt_entry_target) > - e->next_offset) - return -EINVAL; - - t = ipt_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; -} - -static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { const struct ipt_ip *ip = par->entryinfo; @@ -750,7 +751,11 @@ check_entry_size_and_hooks(struct ipt_entry *e, return -EINVAL; } - err = check_entry(e); + if (!ip_checkentry(&e->ip)) + return -EINVAL; + + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (err) return err; @@ -1308,55 +1313,17 @@ do_add_counters(struct net *net, const void __user *user, unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - const char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct ipt_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } - - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; - -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; - - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } - - t = xt_find_table_lock(net, AF_INET, name); + t = xt_find_table_lock(net, AF_INET, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1364,7 +1331,7 @@ do_add_counters(struct net *net, const void __user *user, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } @@ -1443,7 +1410,6 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, static int compat_find_calc_match(struct xt_entry_match *m, - const char *name, const struct ipt_ip *ip, int *size) { @@ -1478,17 +1444,14 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, - const unsigned char *limit, - const unsigned int *hook_entries, - const unsigned int *underflows, - const char *name) + const unsigned char *limit) { struct xt_entry_match *ematch; struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; unsigned int j; - int ret, off, h; + int ret, off; duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || @@ -1505,8 +1468,11 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, return -EINVAL; } - /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ipt_entry *)e); + if (!ip_checkentry(&e->ip)) + return -EINVAL; + + ret = xt_compat_check_entry_offsets(e, e->elems, + e->target_offset, e->next_offset); if (ret) return ret; @@ -1514,7 +1480,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, entry_offset = (void *)e - (void *)base; j = 0; xt_ematch_foreach(ematch, e) { - ret = compat_find_calc_match(ematch, name, &e->ip, &off); + ret = compat_find_calc_match(ematch, &e->ip, &off); if (ret != 0) goto release_matches; ++j; @@ -1537,17 +1503,6 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, if (ret) goto out; - /* Check hooks & underflows */ - for (h = 0; h < NF_INET_NUMHOOKS; h++) { - if ((unsigned char *)e - base == hook_entries[h]) - newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) - newinfo->underflow[h] = underflows[h]; - } - - /* Clear counters and comefrom */ - memset(&e->counters, 0, sizeof(e->counters)); - e->comefrom = 0; return 0; out: @@ -1561,19 +1516,18 @@ release_matches: return ret; } -static int +static void compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, - unsigned int *size, const char *name, + unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; struct xt_target *target; struct ipt_entry *de; unsigned int origsize; - int ret, h; + int h; struct xt_entry_match *ematch; - ret = 0; origsize = *size; de = (struct ipt_entry *)*dstptr; memcpy(de, e, sizeof(struct ipt_entry)); @@ -1582,201 +1536,105 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, *dstptr += sizeof(struct ipt_entry); *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); - xt_ematch_foreach(ematch, e) { - ret = xt_compat_match_from_user(ematch, dstptr, size); - if (ret != 0) - return ret; - } + xt_ematch_foreach(ematch, e) + xt_compat_match_from_user(ematch, dstptr, size); + de->target_offset = e->target_offset - (origsize - *size); t = compat_ipt_get_target(e); target = t->u.kernel.target; xt_compat_target_from_user(t, dstptr, size); de->next_offset = e->next_offset - (origsize - *size); + for (h = 0; h < NF_INET_NUMHOOKS; h++) { if ((unsigned char *)de - base < newinfo->hook_entry[h]) newinfo->hook_entry[h] -= origsize - *size; if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; -} - -static int -compat_check_entry(struct ipt_entry *e, struct net *net, const char *name) -{ - struct xt_entry_match *ematch; - struct xt_mtchk_param mtpar; - unsigned int j; - int ret = 0; - - e->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(e->counters.pcnt)) - return -ENOMEM; - - j = 0; - mtpar.net = net; - mtpar.table = name; - mtpar.entryinfo = &e->ip; - mtpar.hook_mask = e->comefrom; - mtpar.family = NFPROTO_IPV4; - xt_ematch_foreach(ematch, e) { - ret = check_match(ematch, &mtpar); - if (ret != 0) - goto cleanup_matches; - ++j; - } - - ret = check_target(e, net, name); - if (ret) - goto cleanup_matches; - return 0; - - cleanup_matches: - xt_ematch_foreach(ematch, e) { - if (j-- == 0) - break; - cleanup_match(ematch, net); - } - - xt_percpu_counter_free(e->counters.pcnt); - - return ret; } static int translate_compat_table(struct net *net, - const char *name, - unsigned int valid_hooks, struct xt_table_info **pinfo, void **pentry0, - unsigned int total_size, - unsigned int number, - unsigned int *hook_entries, - unsigned int *underflows) + const struct compat_ipt_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_ipt_entry *iter0; - struct ipt_entry *iter1; + struct ipt_replace repl; unsigned int size; int ret; info = *pinfo; entry0 = *pentry0; - size = total_size; - info->number = number; - - /* Init all hooks to impossible value. */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - info->hook_entry[i] = 0xFFFFFFFF; - info->underflow[i] = 0xFFFFFFFF; - } + size = compatr->size; + info->number = compatr->num_entries; duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET); - xt_compat_init_offsets(AF_INET, number); + xt_compat_init_offsets(AF_INET, compatr->num_entries); /* Walk through entries, checking offsets. */ - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + total_size, - hook_entries, - underflows, - name); + entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; - if (j != number) { + if (j != compatr->num_entries) { duprintf("translate_compat_table: %u not %u entries\n", - j, number); + j, compatr->num_entries); goto out_unlock; } - /* Check hooks all assigned */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) - continue; - if (info->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); - goto out_unlock; - } - if (info->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, underflows[i]); - goto out_unlock; - } - } - ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) goto out_unlock; - newinfo->number = number; + newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { - newinfo->hook_entry[i] = info->hook_entry[i]; - newinfo->underflow[i] = info->underflow[i]; + newinfo->hook_entry[i] = compatr->hook_entry[i]; + newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries; pos = entry1; - size = total_size; - xt_entry_foreach(iter0, entry0, total_size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - name, newinfo, entry1); - if (ret != 0) - break; - } + size = compatr->size; + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); + + /* all module references in entry0 are now gone. + * entry1/newinfo contains a 64bit ruleset that looks exactly as + * generated by 64bit userspace. + * + * Call standard translate_table() to validate all hook_entrys, + * underflows, check for loops, etc. + */ xt_compat_flush_offsets(AF_INET); xt_compat_unlock(AF_INET); - if (ret) - goto free_newinfo; - ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry1)) - goto free_newinfo; + memcpy(&repl, compatr, sizeof(*compatr)); - i = 0; - xt_entry_foreach(iter1, entry1, newinfo->size) { - ret = compat_check_entry(iter1, net, name); - if (ret != 0) - break; - ++i; - if (strcmp(ipt_get_target(iter1)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; - } - if (ret) { - /* - * The first i matches need cleanup_entry (calls ->destroy) - * because they had called ->check already. The other j-i - * entries need only release. - */ - int skip = i; - j -= i; - xt_entry_foreach(iter0, entry0, newinfo->size) { - if (skip-- > 0) - continue; - if (j-- == 0) - break; - compat_release_entry(iter0); - } - xt_entry_foreach(iter1, entry1, newinfo->size) { - if (i-- == 0) - break; - cleanup_entry(iter1, net); - } - xt_free_table_info(newinfo); - return ret; + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + repl.hook_entry[i] = newinfo->hook_entry[i]; + repl.underflow[i] = newinfo->underflow[i]; } + repl.num_counters = 0; + repl.counters = NULL; + repl.size = newinfo->size; + ret = translate_table(net, newinfo, entry1, &repl); + if (ret) + goto free_newinfo; + *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1784,17 +1642,16 @@ translate_compat_table(struct net *net, free_newinfo: xt_free_table_info(newinfo); -out: - xt_entry_foreach(iter0, entry0, total_size) { + return ret; +out_unlock: + xt_compat_flush_offsets(AF_INET); + xt_compat_unlock(AF_INET); + xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; -out_unlock: - xt_compat_flush_offsets(AF_INET); - xt_compat_unlock(AF_INET); - goto out; } static int @@ -1830,10 +1687,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, - &newinfo, &loc_cpu_entry, tmp.size, - tmp.num_entries, tmp.hook_entry, - tmp.underflow); + ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 6fb869f646bf..a04dee536b8e 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -27,8 +27,6 @@ static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb, { int err; - skb_orphan(skb); - local_bh_disable(); err = ip_defrag(net, skb, user); local_bh_enable(); diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c index c6eb42100e9a..ea91058b5f6f 100644 --- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c @@ -108,10 +108,18 @@ static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; + struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev; struct netdev_notifier_info info; - netdev_notifier_info_init(&info, dev); + /* The masq_dev_notifier will catch the case of the device going + * down. So if the inetdev is dead and being destroyed we have + * no work to do. Otherwise this is an individual address removal + * and we have to perform the flush. + */ + if (idev->dead) + return NOTIFY_DONE; + + netdev_notifier_info_init(&info, idev->dev); return masq_device_event(this, event, &info); } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index b27e98010dea..0d5278ca4777 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -746,8 +746,10 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc, false); - if (err) + if (unlikely(err)) { + kfree(ipc.opt); return err; + } if (ipc.opt) free = 1; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 865895d3fb27..a9b479a1c4a0 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -547,8 +547,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = ip_cmsg_send(net, msg, &ipc, false); - if (err) + if (unlikely(err)) { + kfree(ipc.opt); goto out; + } if (ipc.opt) free = 1; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a0d842f4e9cf..fb54659320d8 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -129,6 +129,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; +static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; /* * Interface to generic destination cache. */ @@ -757,7 +758,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, new_gw, - 0, 0); + 0, jiffies + ip_rt_gc_timeout); } if (kill_route) rt->dst.obsolete = DST_OBSOLETE_KILL; @@ -1558,6 +1559,36 @@ static void ip_handle_martian_source(struct net_device *dev, #endif } +static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) +{ + struct fnhe_hash_bucket *hash; + struct fib_nh_exception *fnhe, __rcu **fnhe_p; + u32 hval = fnhe_hashfun(daddr); + + spin_lock_bh(&fnhe_lock); + + hash = rcu_dereference_protected(nh->nh_exceptions, + lockdep_is_held(&fnhe_lock)); + hash += hval; + + fnhe_p = &hash->chain; + fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock)); + while (fnhe) { + if (fnhe->fnhe_daddr == daddr) { + rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( + fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); + fnhe_flush_routes(fnhe); + kfree_rcu(fnhe, rcu); + break; + } + fnhe_p = &fnhe->fnhe_next; + fnhe = rcu_dereference_protected(fnhe->fnhe_next, + lockdep_is_held(&fnhe_lock)); + } + + spin_unlock_bh(&fnhe_lock); +} + /* called in rcu_read_lock() section */ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, @@ -1611,11 +1642,20 @@ static int __mkroute_input(struct sk_buff *skb, fnhe = find_exception(&FIB_RES_NH(*res), daddr); if (do_cache) { - if (fnhe) + if (fnhe) { rth = rcu_dereference(fnhe->fnhe_rth_input); - else - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + if (rth && rth->dst.expires && + time_after(jiffies, rth->dst.expires)) { + ip_del_fnhe(&FIB_RES_NH(*res), daddr); + fnhe = NULL; + } else { + goto rt_cache; + } + } + + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); +rt_cache: if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; @@ -2007,6 +2047,18 @@ static struct rtable *__mkroute_output(const struct fib_result *res, */ if (fi && res->prefixlen < 4) fi = NULL; + } else if ((type == RTN_LOCAL) && (orig_oif != 0) && + (orig_oif != dev_out->ifindex)) { + /* For local routes that require a particular output interface + * we do not want to cache the result. Caching the result + * causes incorrect behaviour when there are multiple source + * addresses on the interface, the end result being that if the + * intended recipient is waiting on that interface for the + * packet he won't receive it because it will be delivered on + * the loopback interface and the IP_PKTINFO ipi_ifindex will + * be set to the loopback interface as well. + */ + fi = NULL; } fnhe = NULL; @@ -2016,19 +2068,29 @@ static struct rtable *__mkroute_output(const struct fib_result *res, struct fib_nh *nh = &FIB_RES_NH(*res); fnhe = find_exception(nh, fl4->daddr); - if (fnhe) + if (fnhe) { prth = &fnhe->fnhe_rth_output; - else { - if (unlikely(fl4->flowi4_flags & - FLOWI_FLAG_KNOWN_NH && - !(nh->nh_gw && - nh->nh_scope == RT_SCOPE_LINK))) { - do_cache = false; - goto add; + rth = rcu_dereference(*prth); + if (rth && rth->dst.expires && + time_after(jiffies, rth->dst.expires)) { + ip_del_fnhe(nh, fl4->daddr); + fnhe = NULL; + } else { + goto rt_cache; } - prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); } + + if (unlikely(fl4->flowi4_flags & + FLOWI_FLAG_KNOWN_NH && + !(nh->nh_gw && + nh->nh_scope == RT_SCOPE_LINK))) { + do_cache = false; + goto add; + } + prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); rth = rcu_dereference(*prth); + +rt_cache: if (rt_cache_valid(rth)) { dst_hold(&rth->dst); return rth; @@ -2582,7 +2644,6 @@ void ip_rt_multicast_event(struct in_device *in_dev) } #ifdef CONFIG_SYSCTL -static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; static int ip_rt_gc_interval __read_mostly = 60 * HZ; static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_gc_elasticity __read_mostly = 8; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a1d5c67e251d..6ecfc9de599c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -279,6 +279,7 @@ #include <asm/uaccess.h> #include <asm/ioctls.h> +#include <asm/unaligned.h> #include <net/busy_poll.h> int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; @@ -938,7 +939,7 @@ new_segment: i = skb_shinfo(skb)->nr_frags; can_coalesce = skb_can_coalesce(skb, i, page, offset); - if (!can_coalesce && i >= MAX_SKB_FRAGS) { + if (!can_coalesce && i >= sysctl_max_skb_frags) { tcp_mark_push(tp, skb); goto new_segment; } @@ -1211,7 +1212,7 @@ new_segment: if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { - if (i == MAX_SKB_FRAGS || !sg) { + if (i == sysctl_max_skb_frags || !sg) { tcp_mark_push(tp, skb); goto new_segment; } @@ -2637,6 +2638,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; unsigned int start; + u64 rate64; u32 rate; memset(info, 0, sizeof(*info)); @@ -2702,15 +2704,17 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_total_retrans = tp->total_retrans; rate = READ_ONCE(sk->sk_pacing_rate); - info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL; + rate64 = rate != ~0U ? rate : ~0ULL; + put_unaligned(rate64, &info->tcpi_pacing_rate); rate = READ_ONCE(sk->sk_max_pacing_rate); - info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; + rate64 = rate != ~0U ? rate : ~0ULL; + put_unaligned(rate64, &info->tcpi_max_pacing_rate); do { start = u64_stats_fetch_begin_irq(&tp->syncp); - info->tcpi_bytes_acked = tp->bytes_acked; - info->tcpi_bytes_received = tp->bytes_received; + put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked); + put_unaligned(tp->bytes_received, &info->tcpi_bytes_received); } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); info->tcpi_segs_out = tp->segs_out; info->tcpi_segs_in = tp->segs_in; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 205e6745393f..7decaa439360 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -312,7 +312,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk) /* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */ -void tcp_req_err(struct sock *sk, u32 seq) +void tcp_req_err(struct sock *sk, u32 seq, bool abort) { struct request_sock *req = inet_reqsk(sk); struct net *net = sock_net(sk); @@ -320,11 +320,9 @@ void tcp_req_err(struct sock *sk, u32 seq) /* ICMPs are not backlogged, hence we cannot get * an established socket here. */ - WARN_ON(req->sk); - if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); - } else { + } else if (abort) { /* * Still in SYN_RECV, just remove it silently. * There is no good way to pass the error to the newly @@ -384,7 +382,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) } seq = ntohl(th->seq); if (sk->sk_state == TCP_NEW_SYN_RECV) - return tcp_req_err(sk, seq); + return tcp_req_err(sk, seq, + type == ICMP_PARAMETERPROB || + type == ICMP_TIME_EXCEEDED || + (type == ICMP_DEST_UNREACH && + (code == ICMP_NET_UNREACH || + code == ICMP_HOST_UNREACH))); bh_lock_sock(sk); /* If too many ICMPs get dropped on busy @@ -705,7 +708,8 @@ release_sk1: outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, +static void tcp_v4_send_ack(struct net *net, + struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int reply_flags, u8 tos) @@ -720,7 +724,6 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, ]; } rep; struct ip_reply_arg arg; - struct net *net = dev_net(skb_dst(skb)->dev); memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof(arg)); @@ -782,7 +785,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcp_v4_send_ack(sock_net(sk), skb, + tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, @@ -801,8 +805,10 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. */ - tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? - tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, + u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : + tcp_sk(sk)->snd_nxt; + + tcp_v4_send_ack(sock_net(sk), skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd, tcp_time_stamp, req->ts_recent, @@ -1586,28 +1592,30 @@ process: if (sk->sk_state == TCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); - struct sock *nsk = NULL; + struct sock *nsk; sk = req->rsk_listener; - if (tcp_v4_inbound_md5_hash(sk, skb)) - goto discard_and_relse; - if (likely(sk->sk_state == TCP_LISTEN)) { - nsk = tcp_check_req(sk, skb, req, false); - } else { + if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) { + reqsk_put(req); + goto discard_it; + } + if (unlikely(sk->sk_state != TCP_LISTEN)) { inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } + sock_hold(sk); + nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); - goto discard_it; + goto discard_and_relse; } if (nsk == sk) { - sock_hold(sk); reqsk_put(req); } else if (tcp_child_process(sk, nsk, skb)) { tcp_v4_send_reset(nsk, skb); - goto discard_it; + goto discard_and_relse; } else { + sock_put(sk); return 0; } } diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index c8cbc2b4b792..a726d7853ce5 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -550,7 +550,7 @@ reset: */ if (crtt > tp->srtt_us) { /* Set RTO like tcp_rtt_estimator(), but from cached RTT. */ - crtt /= 8 * USEC_PER_MSEC; + crtt /= 8 * USEC_PER_SEC / HZ; inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk)); } else if (tp->srtt_us == 0) { /* RFC6298: 5.7 We've failed to get a valid RTT sample from diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ac6b1961ffeb..9475a2748a9a 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -458,7 +458,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1; - newtp->segs_in = 0; + newtp->segs_in = 1; newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; @@ -818,6 +818,7 @@ int tcp_child_process(struct sock *parent, struct sock *child, int ret = 0; int state = child->sk_state; + tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); if (!sock_owned_by_user(child)) { ret = tcp_rcv_state_process(child, skb); /* Wakeup parent, send SIGIO */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9f069bd9de46..0dd207cd1f38 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2625,8 +2625,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) */ if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) || skb_headroom(skb) >= 0xFFFF)) { - struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, - GFP_ATOMIC); + struct sk_buff *nskb; + + skb_mstamp_get(&skb->skb_mstamp); + nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : -ENOBUFS; } else { diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 17d35662930d..3e6a472e6b88 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -219,7 +219,7 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) yeah->fast_count = 0; yeah->reno_count = max(yeah->reno_count>>1, 2U); - return tp->snd_cwnd - reduction; + return max_t(int, tp->snd_cwnd - reduction, 2); } static struct tcp_congestion_ops tcp_yeah __read_mostly = { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d8946929813e..7473dad69c92 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -966,8 +966,10 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc, sk->sk_family == AF_INET6); - if (err) + if (unlikely(err)) { + kfree(ipc.opt); return err; + } if (ipc.opt) free = 1; connected = 0; @@ -1530,7 +1532,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) /* if we're overly short, let UDP handle it */ encap_rcv = ACCESS_ONCE(up->encap_rcv); - if (skb->len > sizeof(struct udphdr) && encap_rcv) { + if (encap_rcv) { int ret; /* Verify checksum before giving to encap */ @@ -1989,10 +1991,14 @@ void udp_v4_early_demux(struct sk_buff *skb) if (!in_dev) return; - ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, - iph->protocol); - if (!ours) - return; + /* we are supposed to accept bcast packets */ + if (skb->pkt_type == PACKET_MULTICAST) { + ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, + iph->protocol); + if (!ours) + return; + } + sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, uh->source, iph->saddr, dif); } else if (skb->pkt_type == PACKET_HOST) { diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index aba428626b52..280a9bdeddee 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -89,6 +89,8 @@ int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, uh->source = src_port; uh->len = htons(skb->len); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + udp_set_csum(nocheck, skb, src, dst, skb->len); return iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4f7d6e0189cc..3cdf59161a7e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -585,7 +585,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, if (err < 0) goto errout; - err = EINVAL; + err = -EINVAL; if (!tb[NETCONFA_IFINDEX]) goto errout; @@ -3533,6 +3533,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net_device *dev = idev->dev; + bool notify = false; addrconf_join_solict(dev, &ifp->addr); @@ -3578,7 +3579,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) /* Because optimistic nodes can use this address, * notify listeners. If DAD fails, RTM_DELADDR is sent. */ - ipv6_ifa_notify(RTM_NEWADDR, ifp); + notify = true; } } @@ -3586,6 +3587,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) out: spin_unlock(&ifp->lock); read_unlock_bh(&idev->lock); + if (notify) + ipv6_ifa_notify(RTM_NEWADDR, ifp); } static void addrconf_dad_start(struct inet6_ifaddr *ifp) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 0743a5f4c533..183ff87dacf3 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -163,6 +163,9 @@ ipv4_connected: fl6.fl6_sport = inet->inet_sport; fl6.flowi6_uid = sock_i_uid(sk); + if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; + if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) fl6.flowi6_oif = np->mcast_oif; diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 835ec57c233b..840a4388f860 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -260,7 +260,11 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, return -EINVAL; } } - return -ENOENT; + if (!found) + return -ENOENT; + if (fragoff) + *fragoff = _frag_off; + break; } hdrlen = 8; } else if (nexthdr == NEXTHDR_AUTH) { diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 0c7e276c230e..34cf46d74554 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -179,6 +179,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) } } + free_percpu(non_pcpu_rt->rt6i_pcpu); non_pcpu_rt->rt6i_pcpu = NULL; } diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 1f9ebe3cbb4a..dc2db4f7b182 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -540,12 +540,13 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) } spin_lock_bh(&ip6_sk_fl_lock); for (sflp = &np->ipv6_fl_list; - (sfl = rcu_dereference(*sflp)) != NULL; + (sfl = rcu_dereference_protected(*sflp, + lockdep_is_held(&ip6_sk_fl_lock))) != NULL; sflp = &sfl->next) { if (sfl->fl->label == freq.flr_label) { if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK)) np->flow_label &= ~IPV6_FLOWLABEL_MASK; - *sflp = rcu_dereference(sfl->next); + *sflp = sfl->next; spin_unlock_bh(&ip6_sk_fl_lock); fl_release(sfl->fl); kfree_rcu(sfl, rcu); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index e5ea177d34c6..4650c6824783 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -778,6 +778,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) __u32 mtu; int err; + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e6a7bd15b9b7..58900c21e4e4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -909,6 +909,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, struct rt6_info *rt; #endif int err; + int flags = 0; /* The correct way to handle this would be to do * ip6_route_get_saddr, and then ip6_route_output; however, @@ -940,10 +941,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, dst_release(*dst); *dst = NULL; } + + if (fl6->flowi6_oif) + flags |= RT6_LOOKUP_F_IFACE; } if (!*dst) - *dst = ip6_route_output(net, sk, fl6); + *dst = ip6_route_output_flags(net, sk, fl6, flags); err = (*dst)->error; if (err) @@ -1068,17 +1072,12 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst) { struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); - int err; dst = ip6_sk_dst_check(sk, dst, fl6); + if (!dst) + dst = ip6_dst_lookup_flow(sk, fl6, final_dst); - err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6); - if (err) - return ERR_PTR(err); - if (final_dst) - fl6->daddr = *final_dst; - - return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); + return dst; } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); @@ -1087,8 +1086,8 @@ static inline int ip6_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, - int transhdrlen, int mtu, unsigned int flags, - const struct flowi6 *fl6) + int exthdrlen, int transhdrlen, int mtu, + unsigned int flags, const struct flowi6 *fl6) { struct sk_buff *skb; @@ -1113,7 +1112,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb_put(skb, fragheaderlen + transhdrlen); /* initialize network header pointer */ - skb_reset_network_header(skb); + skb_set_network_header(skb, exthdrlen); /* initialize protocol header pointer */ skb->transport_header = skb->network_header + fragheaderlen; @@ -1353,9 +1352,9 @@ emsgsize: (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && - (sk->sk_type == SOCK_DGRAM)) { + (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, - hh_len, fragheaderlen, + hh_len, fragheaderlen, exthdrlen, transhdrlen, mtu, flags, fl6); if (err) goto error; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 137fca42aaa6..3991b21e24ad 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -343,12 +343,12 @@ static int ip6_tnl_create2(struct net_device *dev) t = netdev_priv(dev); + dev->rtnl_link_ops = &ip6_link_ops; err = register_netdevice(dev); if (err < 0) goto out; strcpy(t->parms.name, dev->name); - dev->rtnl_link_ops = &ip6_link_ops; dev_hold(dev); ip6_tnl_link(ip6n, t); @@ -1180,6 +1180,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + tproto = ACCESS_ONCE(t->parms.proto); if (tproto != IPPROTO_IPIP && tproto != 0) return -1; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index a10e77103c88..e207cb2468da 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1074,6 +1074,7 @@ static struct mfc6_cache *ip6mr_cache_alloc(void) struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (!c) return NULL; + c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->mfc_un.res.minvif = MAXMIFS; return c; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 5ee56d0a8699..d64ee7e83664 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1574,9 +1574,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) return NULL; skb->priority = TC_PRIO_CONTROL; - skb->reserved_tailroom = skb_end_offset(skb) - - min(mtu, skb_end_offset(skb)); skb_reserve(skb, hlen); + skb_tailroom_reserve(skb, mtu, tlen); if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) { /* <draft-ietf-magma-mld-source-05.txt>: diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 6198807e06f4..22f39e00bef3 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -455,6 +455,18 @@ ip6t_do_table(struct sk_buff *skb, #endif } +static bool find_jump_target(const struct xt_table_info *t, + const struct ip6t_entry *target) +{ + struct ip6t_entry *iter; + + xt_entry_foreach(iter, t->entries, t->size) { + if (iter == target) + return true; + } + return false; +} + /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int @@ -532,6 +544,8 @@ mark_source_chains(const struct xt_table_info *newinfo, size = e->next_offset; e = (struct ip6t_entry *) (entry0 + pos + size); + if (pos + size >= newinfo->size) + return 0; e->counters.pcnt = pos; pos += size; } else { @@ -550,9 +564,15 @@ mark_source_chains(const struct xt_table_info *newinfo, /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); + e = (struct ip6t_entry *) + (entry0 + newpos); + if (!find_jump_target(newinfo, e)) + return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; + if (newpos >= newinfo->size) + return 0; } e = (struct ip6t_entry *) (entry0 + newpos); @@ -579,25 +599,6 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) module_put(par.match->me); } -static int -check_entry(const struct ip6t_entry *e) -{ - const struct xt_entry_target *t; - - if (!ip6_checkentry(&e->ipv6)) - return -EINVAL; - - if (e->target_offset + sizeof(struct xt_entry_target) > - e->next_offset) - return -EINVAL; - - t = ip6t_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; -} - static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { const struct ip6t_ip6 *ipv6 = par->entryinfo; @@ -762,7 +763,11 @@ check_entry_size_and_hooks(struct ip6t_entry *e, return -EINVAL; } - err = check_entry(e); + if (!ip6_checkentry(&e->ipv6)) + return -EINVAL; + + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (err) return err; @@ -1320,55 +1325,16 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct ip6t_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } - - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; - -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } - - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } - - t = xt_find_table_lock(net, AF_INET6, name); + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); + t = xt_find_table_lock(net, AF_INET6, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1376,7 +1342,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } @@ -1455,7 +1421,6 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr, static int compat_find_calc_match(struct xt_entry_match *m, - const char *name, const struct ip6t_ip6 *ipv6, int *size) { @@ -1490,17 +1455,14 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, - const unsigned char *limit, - const unsigned int *hook_entries, - const unsigned int *underflows, - const char *name) + const unsigned char *limit) { struct xt_entry_match *ematch; struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; unsigned int j; - int ret, off, h; + int ret, off; duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || @@ -1517,8 +1479,11 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, return -EINVAL; } - /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ip6t_entry *)e); + if (!ip6_checkentry(&e->ipv6)) + return -EINVAL; + + ret = xt_compat_check_entry_offsets(e, e->elems, + e->target_offset, e->next_offset); if (ret) return ret; @@ -1526,7 +1491,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, entry_offset = (void *)e - (void *)base; j = 0; xt_ematch_foreach(ematch, e) { - ret = compat_find_calc_match(ematch, name, &e->ipv6, &off); + ret = compat_find_calc_match(ematch, &e->ipv6, &off); if (ret != 0) goto release_matches; ++j; @@ -1549,17 +1514,6 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, if (ret) goto out; - /* Check hooks & underflows */ - for (h = 0; h < NF_INET_NUMHOOKS; h++) { - if ((unsigned char *)e - base == hook_entries[h]) - newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) - newinfo->underflow[h] = underflows[h]; - } - - /* Clear counters and comefrom */ - memset(&e->counters, 0, sizeof(e->counters)); - e->comefrom = 0; return 0; out: @@ -1573,18 +1527,17 @@ release_matches: return ret; } -static int +static void compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, - unsigned int *size, const char *name, + unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; struct ip6t_entry *de; unsigned int origsize; - int ret, h; + int h; struct xt_entry_match *ematch; - ret = 0; origsize = *size; de = (struct ip6t_entry *)*dstptr; memcpy(de, e, sizeof(struct ip6t_entry)); @@ -1593,11 +1546,9 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, *dstptr += sizeof(struct ip6t_entry); *size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); - xt_ematch_foreach(ematch, e) { - ret = xt_compat_match_from_user(ematch, dstptr, size); - if (ret != 0) - return ret; - } + xt_ematch_foreach(ematch, e) + xt_compat_match_from_user(ematch, dstptr, size); + de->target_offset = e->target_offset - (origsize - *size); t = compat_ip6t_get_target(e); xt_compat_target_from_user(t, dstptr, size); @@ -1609,183 +1560,83 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; -} - -static int compat_check_entry(struct ip6t_entry *e, struct net *net, - const char *name) -{ - unsigned int j; - int ret = 0; - struct xt_mtchk_param mtpar; - struct xt_entry_match *ematch; - - e->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(e->counters.pcnt)) - return -ENOMEM; - j = 0; - mtpar.net = net; - mtpar.table = name; - mtpar.entryinfo = &e->ipv6; - mtpar.hook_mask = e->comefrom; - mtpar.family = NFPROTO_IPV6; - xt_ematch_foreach(ematch, e) { - ret = check_match(ematch, &mtpar); - if (ret != 0) - goto cleanup_matches; - ++j; - } - - ret = check_target(e, net, name); - if (ret) - goto cleanup_matches; - return 0; - - cleanup_matches: - xt_ematch_foreach(ematch, e) { - if (j-- == 0) - break; - cleanup_match(ematch, net); - } - - xt_percpu_counter_free(e->counters.pcnt); - - return ret; } static int translate_compat_table(struct net *net, - const char *name, - unsigned int valid_hooks, struct xt_table_info **pinfo, void **pentry0, - unsigned int total_size, - unsigned int number, - unsigned int *hook_entries, - unsigned int *underflows) + const struct compat_ip6t_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_ip6t_entry *iter0; - struct ip6t_entry *iter1; + struct ip6t_replace repl; unsigned int size; int ret = 0; info = *pinfo; entry0 = *pentry0; - size = total_size; - info->number = number; - - /* Init all hooks to impossible value. */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - info->hook_entry[i] = 0xFFFFFFFF; - info->underflow[i] = 0xFFFFFFFF; - } + size = compatr->size; + info->number = compatr->num_entries; duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET6); - xt_compat_init_offsets(AF_INET6, number); + xt_compat_init_offsets(AF_INET6, compatr->num_entries); /* Walk through entries, checking offsets. */ - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + total_size, - hook_entries, - underflows, - name); + entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; - if (j != number) { + if (j != compatr->num_entries) { duprintf("translate_compat_table: %u not %u entries\n", - j, number); + j, compatr->num_entries); goto out_unlock; } - /* Check hooks all assigned */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) - continue; - if (info->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); - goto out_unlock; - } - if (info->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, underflows[i]); - goto out_unlock; - } - } - ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) goto out_unlock; - newinfo->number = number; + newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { - newinfo->hook_entry[i] = info->hook_entry[i]; - newinfo->underflow[i] = info->underflow[i]; + newinfo->hook_entry[i] = compatr->hook_entry[i]; + newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries; pos = entry1; - size = total_size; - xt_entry_foreach(iter0, entry0, total_size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - name, newinfo, entry1); - if (ret != 0) - break; - } + size = compatr->size; + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); + + /* all module references in entry0 are now gone. */ xt_compat_flush_offsets(AF_INET6); xt_compat_unlock(AF_INET6); - if (ret) - goto free_newinfo; - ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry1)) - goto free_newinfo; + memcpy(&repl, compatr, sizeof(*compatr)); - i = 0; - xt_entry_foreach(iter1, entry1, newinfo->size) { - ret = compat_check_entry(iter1, net, name); - if (ret != 0) - break; - ++i; - if (strcmp(ip6t_get_target(iter1)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; - } - if (ret) { - /* - * The first i matches need cleanup_entry (calls ->destroy) - * because they had called ->check already. The other j-i - * entries need only release. - */ - int skip = i; - j -= i; - xt_entry_foreach(iter0, entry0, newinfo->size) { - if (skip-- > 0) - continue; - if (j-- == 0) - break; - compat_release_entry(iter0); - } - xt_entry_foreach(iter1, entry1, newinfo->size) { - if (i-- == 0) - break; - cleanup_entry(iter1, net); - } - xt_free_table_info(newinfo); - return ret; + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + repl.hook_entry[i] = newinfo->hook_entry[i]; + repl.underflow[i] = newinfo->underflow[i]; } + repl.num_counters = 0; + repl.counters = NULL; + repl.size = newinfo->size; + ret = translate_table(net, newinfo, entry1, &repl); + if (ret) + goto free_newinfo; + *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1793,17 +1644,16 @@ translate_compat_table(struct net *net, free_newinfo: xt_free_table_info(newinfo); -out: - xt_entry_foreach(iter0, entry0, total_size) { + return ret; +out_unlock: + xt_compat_flush_offsets(AF_INET6); + xt_compat_unlock(AF_INET6); + xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; -out_unlock: - xt_compat_flush_offsets(AF_INET6); - xt_compat_unlock(AF_INET6); - goto out; } static int @@ -1839,10 +1689,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, - &newinfo, &loc_cpu_entry, tmp.size, - tmp.num_entries, tmp.hook_entry, - tmp.underflow); + ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 45f5ae51de65..a234552a7e3d 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -496,10 +496,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, IP6CB(head)->flags |= IP6SKB_FRAGMENTED; /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_partial(skb_network_header(head), - skb_network_header_len(head), - head->csum); + skb_postpush_rcsum(head, skb_network_header(head), + skb_network_header_len(head)); rcu_read_lock(); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 02ba70201e05..aed4f305f5f6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1170,11 +1170,10 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); } -struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, - struct flowi6 *fl6) +struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, + struct flowi6 *fl6, int flags) { struct dst_entry *dst; - int flags = 0; bool any_src; dst = l3mdev_rt6_dst_by_oif(net, fl6); @@ -1195,7 +1194,7 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); } -EXPORT_SYMBOL(ip6_route_output); +EXPORT_SYMBOL_GPL(ip6_route_output_flags); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) { @@ -1725,6 +1724,8 @@ static int ip6_convert_metrics(struct mx6_config *mxc, } else { val = nla_get_u32(nla); } + if (type == RTAX_HOPLIMIT && val > 255) + val = 255; if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) goto err; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index dcccae86190f..ba3d2f3d66d2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -560,13 +560,13 @@ static int ipip6_err(struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->parms.link, 0, IPPROTO_IPV6, 0); + t->parms.link, 0, iph->protocol, 0); err = 0; goto out; } if (type == ICMP_REDIRECT) { ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, - IPPROTO_IPV6, 0); + iph->protocol, 0); err = 0; goto out; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e6b044480333..3c6acb67d8e5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -329,6 +329,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct tcp_sock *tp; __u32 seq, snd_una; struct sock *sk; + bool fatal; int err; sk = __inet6_lookup_established(net, &tcp_hashinfo, @@ -347,8 +348,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; } seq = ntohl(th->seq); + fatal = icmpv6_err_convert(type, code, &err); if (sk->sk_state == TCP_NEW_SYN_RECV) - return tcp_req_err(sk, seq); + return tcp_req_err(sk, seq, fatal); bh_lock_sock(sk); if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) @@ -402,7 +404,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - icmpv6_err_convert(type, code, &err); /* Might be for an request_sock */ switch (sk->sk_state) { @@ -463,8 +464,10 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, if (np->repflow && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); + rcu_read_lock(); err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), np->tclass); + rcu_read_unlock(); err = net_xmit_eval(err); } @@ -1386,7 +1389,7 @@ process: if (sk->sk_state == TCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); - struct sock *nsk = NULL; + struct sock *nsk; sk = req->rsk_listener; tcp_v6_fill_cb(skb, hdr, th); @@ -1394,24 +1397,24 @@ process: reqsk_put(req); goto discard_it; } - if (likely(sk->sk_state == TCP_LISTEN)) { - nsk = tcp_check_req(sk, skb, req, false); - } else { + if (unlikely(sk->sk_state != TCP_LISTEN)) { inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } + sock_hold(sk); + nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); - goto discard_it; + goto discard_and_relse; } if (nsk == sk) { - sock_hold(sk); reqsk_put(req); tcp_v6_restore_cb(skb); } else if (tcp_child_process(sk, nsk, skb)) { tcp_v6_send_reset(nsk, skb); - goto discard_it; + goto discard_and_relse; } else { + sock_put(sk); return 0; } } @@ -1704,7 +1707,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) destp = ntohs(inet->inet_dport); srcp = ntohs(inet->inet_sport); - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index a1b6adc20e1e..ed7f4a81a932 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -647,7 +647,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) /* if we're overly short, let UDP handle it */ encap_rcv = ACCESS_ONCE(up->encap_rcv); - if (skb->len > sizeof(struct udphdr) && encap_rcv) { + if (encap_rcv) { int ret; /* Verify checksum before giving to encap */ @@ -837,8 +837,8 @@ start_lookup: flush_stack(stack, count, skb, count - 1); } else { if (!inner_flushed) - UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, - proto == IPPROTO_UDPLITE); + UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, + proto == IPPROTO_UDPLITE); consume_skb(skb); } return 0; @@ -916,11 +916,9 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); - /* a return value > 0 means to resubmit the input, but - * it wants the return to be -protocol, or 0 - */ + /* a return value > 0 means to resubmit the input */ if (ret > 0) - return -ret; + return ret; return 0; } diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index f7fbdbabe50e..372855eeaf42 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -23,7 +23,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) struct ipv6hdr *inner_iph = ipipv6_hdr(skb); if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos)) - IP6_ECN_set_ce(inner_iph); + IP6_ECN_set_ce(skb, inner_iph); } /* Add encapsulation header. diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 435608c4306d..20ab7b2ec463 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -708,6 +708,9 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, if (!addr || addr->sa_family != AF_IUCV) return -EINVAL; + if (addr_len < sizeof(struct sockaddr_iucv)) + return -EINVAL; + lock_sock(sk); if (sk->sk_state != IUCV_OPEN) { err = -EBADFD; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index afca2eb4dfa7..ec17cbe8a02b 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1581,7 +1581,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ tunnel->encap = encap; if (encap == L2TP_ENCAPTYPE_UDP) { - struct udp_tunnel_sock_cfg udp_cfg; + struct udp_tunnel_sock_cfg udp_cfg = { }; udp_cfg.sk_user_data = tunnel; udp_cfg.encap_type = UDP_ENCAP_L2TPINUDP; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index ec22078b0914..42de4ccd159f 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -123,12 +123,11 @@ static int l2tp_ip_recv(struct sk_buff *skb) struct l2tp_tunnel *tunnel = NULL; int length; - /* Point to L2TP header */ - optr = ptr = skb->data; - if (!pskb_may_pull(skb, 4)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; session_id = ntohl(*((__be32 *) ptr)); ptr += 4; @@ -156,6 +155,9 @@ static int l2tp_ip_recv(struct sk_buff *skb) if (!pskb_may_pull(skb, length)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; + ptr += 4; pr_debug("%s: ip recv\n", tunnel->name); print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index a2c8747d2936..9ee4ddb6b397 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -135,12 +135,11 @@ static int l2tp_ip6_recv(struct sk_buff *skb) struct l2tp_tunnel *tunnel = NULL; int length; - /* Point to L2TP header */ - optr = ptr = skb->data; - if (!pskb_may_pull(skb, 4)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; session_id = ntohl(*((__be32 *) ptr)); ptr += 4; @@ -168,6 +167,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb) if (!pskb_may_pull(skb, length)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; + ptr += 4; pr_debug("%s: ip recv\n", tunnel->name); print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index f93c5be612a7..2caaa84ce92d 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -124,8 +124,13 @@ static int l2tp_tunnel_notify(struct genl_family *family, ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, NLM_F_ACK, tunnel, cmd); - if (ret >= 0) - return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + if (ret >= 0) { + ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + /* We don't care if no one is listening */ + if (ret == -ESRCH) + ret = 0; + return ret; + } nlmsg_free(msg); @@ -147,8 +152,13 @@ static int l2tp_session_notify(struct genl_family *family, ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, NLM_F_ACK, session, cmd); - if (ret >= 0) - return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + if (ret >= 0) { + ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + /* We don't care if no one is listening */ + if (ret == -ESRCH) + ret = 0; + return ret; + } nlmsg_free(msg); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 8dab4e569571..bb8edb9ef506 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -626,6 +626,7 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb) if (llc->cmsg_flags & LLC_CMSG_PKTINFO) { struct llc_pktinfo info; + memset(&info, 0, sizeof(info)); info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex; llc_pdu_decode_dsap(skb, &info.lpi_sap); llc_pdu_decode_da(skb, info.lpi_mac); diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 10ad4ac1fa0b..367784be5df2 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -291,7 +291,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, } /* prepare A-MPDU MLME for Rx aggregation */ - tid_agg_rx = kmalloc(sizeof(struct tid_ampdu_rx), GFP_KERNEL); + tid_agg_rx = kzalloc(sizeof(*tid_agg_rx), GFP_KERNEL); if (!tid_agg_rx) goto end; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 337bb5d78003..980e9e9b6684 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -7,6 +7,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2009, Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -1484,14 +1485,21 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) sdata_info(sdata, "Trigger new scan to find an IBSS to join\n"); - num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy, - &ifibss->chandef, - channels, - ARRAY_SIZE(channels)); scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef); - ieee80211_request_ibss_scan(sdata, ifibss->ssid, - ifibss->ssid_len, channels, num, - scan_width); + + if (ifibss->fixed_channel) { + num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy, + &ifibss->chandef, + channels, + ARRAY_SIZE(channels)); + ieee80211_request_ibss_scan(sdata, ifibss->ssid, + ifibss->ssid_len, channels, + num, scan_width); + } else { + ieee80211_request_ibss_scan(sdata, ifibss->ssid, + ifibss->ssid_len, NULL, + 0, scan_width); + } } else { int interval = IEEE80211_SCAN_INTERVAL; @@ -1732,7 +1740,6 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local) if (sdata->vif.type != NL80211_IFTYPE_ADHOC) continue; sdata->u.ibss.last_scan_completed = jiffies; - ieee80211_queue_work(&local->hw, &sdata->work); } mutex_unlock(&local->iflist_mtx); } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 5322b4c71630..6837a46ca4a2 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -92,7 +92,7 @@ struct ieee80211_fragment_entry { u16 extra_len; u16 last_frag; u8 rx_queue; - bool ccmp; /* Whether fragments were encrypted with CCMP */ + bool check_sequential_pn; /* needed for CCMP/GCMP */ u8 last_pn[6]; /* PN of the last fragment if CCMP was used */ }; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index c9e325d2e120..bcb0a1b64556 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -977,7 +977,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, if (sdata->vif.txq) { struct txq_info *txqi = to_txq_info(sdata->vif.txq); + spin_lock_bh(&txqi->queue.lock); ieee80211_purge_tx_queue(&local->hw, &txqi->queue); + spin_unlock_bh(&txqi->queue.lock); + atomic_set(&sdata->txqs_len[txqi->txq.ac], 0); } @@ -1747,7 +1750,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ret = dev_alloc_name(ndev, ndev->name); if (ret < 0) { - free_netdev(ndev); + ieee80211_if_free(ndev); return ret; } @@ -1833,7 +1836,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ret = register_netdevice(ndev); if (ret) { - free_netdev(ndev); + ieee80211_if_free(ndev); return ret; } } diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index fa28500f28fd..f7bb6829b415 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -151,19 +151,26 @@ u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) void mesh_sta_cleanup(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - u32 changed; + u32 changed = 0; /* * maybe userspace handles peer allocation and peering, but in either * case the beacon is still generated by the kernel and we might need * an update. */ - changed = mesh_accept_plinks_update(sdata); + if (sdata->u.mesh.user_mpm && + sta->mesh->plink_state == NL80211_PLINK_ESTAB) + changed |= mesh_plink_dec_estab_count(sdata); + changed |= mesh_accept_plinks_update(sdata); if (!sdata->u.mesh.user_mpm) { changed |= mesh_plink_deactivate(sta); del_timer_sync(&sta->mesh->plink_timer); } + /* make sure no readers can access nexthop sta from here on */ + mesh_path_flush_by_nexthop(sta); + synchronize_net(); + if (changed) ieee80211_mbss_info_change_notify(sdata, changed); } @@ -1370,17 +1377,6 @@ out: sdata_unlock(sdata); } -void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) -{ - struct ieee80211_sub_if_data *sdata; - - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) - if (ieee80211_vif_is_mesh(&sdata->vif) && - ieee80211_sdata_running(sdata)) - ieee80211_queue_work(&local->hw, &sdata->work); - rcu_read_unlock(); -} void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) { diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index a1596344c3ba..4a8019f79fb2 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -362,14 +362,10 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata) return sdata->u.mesh.mesh_pp_id == IEEE80211_PATH_PROTOCOL_HWMP; } -void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local); - void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata); void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata); void ieee80211s_stop(void); #else -static inline void -ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {} static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata) { return false; } static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3aa04344942b..83097c3832d1 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4003,8 +4003,6 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) if (!ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR)) ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.monitor_work); - /* and do all the other regular work too */ - ieee80211_queue_work(&sdata->local->hw, &sdata->work); } } diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 3ece7d1034c8..b54f398cda5d 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -711,7 +711,7 @@ static u32 minstrel_get_expected_throughput(void *priv_sta) * computing cur_tp */ tmp_mrs = &mi->r[idx].stats; - tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma); + tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma) * 10; tmp_cur_tp = tmp_cur_tp * 1200 * 8 / 1024; return tmp_cur_tp; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 3928dbd24e25..239ed6e92b89 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -691,7 +691,7 @@ minstrel_aggr_check(struct ieee80211_sta *pubsta, struct sk_buff *skb) if (likely(sta->ampdu_mlme.tid_tx[tid])) return; - ieee80211_start_tx_ba_session(pubsta, tid, 5000); + ieee80211_start_tx_ba_session(pubsta, tid, 0); } static void @@ -871,7 +871,7 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, * - if station is in dynamic SMPS (and streams > 1) * - for fallback rates, to increase chances of getting through */ - if (offset > 0 && + if (offset > 0 || (mi->sta->smps_mode == IEEE80211_SMPS_DYNAMIC && group->streams > 1)) { ratetbl->rate[offset].count = ratetbl->rate[offset].count_rts; @@ -1334,7 +1334,8 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta) prob = mi->groups[i].rates[j].prob_ewma; /* convert tp_avg from pkt per second in kbps */ - tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * AVG_PKT_SIZE * 8 / 1024; + tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * 10; + tp_avg = tp_avg * AVG_PKT_SIZE * 8 / 1024; return tp_avg; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 82af407fea7a..a3bb8f7f5fc5 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1754,7 +1754,7 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, entry->seq = seq; entry->rx_queue = rx_queue; entry->last_frag = frag; - entry->ccmp = 0; + entry->check_sequential_pn = false; entry->extra_len = 0; return entry; @@ -1850,15 +1850,27 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) rx->seqno_idx, &(rx->skb)); if (rx->key && (rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP || - rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256) && + rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 || + rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP || + rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) && ieee80211_has_protected(fc)) { int queue = rx->security_idx; - /* Store CCMP PN so that we can verify that the next - * fragment has a sequential PN value. */ - entry->ccmp = 1; + + /* Store CCMP/GCMP PN so that we can verify that the + * next fragment has a sequential PN value. + */ + entry->check_sequential_pn = true; memcpy(entry->last_pn, rx->key->u.ccmp.rx_pn[queue], IEEE80211_CCMP_PN_LEN); + BUILD_BUG_ON(offsetof(struct ieee80211_key, + u.ccmp.rx_pn) != + offsetof(struct ieee80211_key, + u.gcmp.rx_pn)); + BUILD_BUG_ON(sizeof(rx->key->u.ccmp.rx_pn[queue]) != + sizeof(rx->key->u.gcmp.rx_pn[queue])); + BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN != + IEEE80211_GCMP_PN_LEN); } return RX_QUEUED; } @@ -1873,15 +1885,21 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) return RX_DROP_MONITOR; } - /* Verify that MPDUs within one MSDU have sequential PN values. - * (IEEE 802.11i, 8.3.3.4.5) */ - if (entry->ccmp) { + /* "The receiver shall discard MSDUs and MMPDUs whose constituent + * MPDU PN values are not incrementing in steps of 1." + * see IEEE P802.11-REVmc/D5.0, 12.5.3.4.4, item d (for CCMP) + * and IEEE P802.11-REVmc/D5.0, 12.5.5.4.4, item d (for GCMP) + */ + if (entry->check_sequential_pn) { int i; u8 pn[IEEE80211_CCMP_PN_LEN], *rpn; int queue; + if (!rx->key || (rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP && - rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256)) + rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256 && + rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP && + rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP_256)) return RX_DROP_UNUSABLE; memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN); for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) { @@ -2232,7 +2250,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) struct ieee80211_local *local = rx->local; struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - u16 q, hdrlen; + u16 ac, q, hdrlen; hdr = (struct ieee80211_hdr *) skb->data; hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -2301,7 +2319,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) ether_addr_equal(sdata->vif.addr, hdr->addr3)) return RX_CONTINUE; - q = ieee80211_select_queue_80211(sdata, skb, hdr); + ac = ieee80211_select_queue_80211(sdata, skb, hdr); + q = sdata->vif.hw_queue[ac]; if (ieee80211_queue_stopped(&local->hw, q)) { IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion); return RX_DROP_MONITOR; @@ -3367,6 +3386,7 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) return false; /* ignore action frames to TDLS-peers */ if (ieee80211_is_action(hdr->frame_control) && + !is_broadcast_ether_addr(bssid) && !ether_addr_equal(bssid, hdr->addr1)) return false; } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index a413e52f7691..acbe182b75d1 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -314,6 +314,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) bool was_scanning = local->scanning; struct cfg80211_scan_request *scan_req; struct ieee80211_sub_if_data *scan_sdata; + struct ieee80211_sub_if_data *sdata; lockdep_assert_held(&local->mtx); @@ -373,7 +374,16 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) ieee80211_mlme_notify_scan_completed(local); ieee80211_ibss_notify_scan_completed(local); - ieee80211_mesh_notify_scan_completed(local); + + /* Requeue all the work that might have been ignored while + * the scan was in progress; if there was none this will + * just be a no-op for the particular interface. + */ + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (ieee80211_sdata_running(sdata)) + ieee80211_queue_work(&sdata->local->hw, &sdata->work); + } + if (was_scanning) ieee80211_start_next_roc(local); } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f91d1873218c..67066d048e6f 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -256,11 +256,11 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) } /* Caller must hold local->sta_mtx */ -static void sta_info_hash_add(struct ieee80211_local *local, - struct sta_info *sta) +static int sta_info_hash_add(struct ieee80211_local *local, + struct sta_info *sta) { - rhashtable_insert_fast(&local->sta_hash, &sta->hash_node, - sta_rht_params); + return rhashtable_insert_fast(&local->sta_hash, &sta->hash_node, + sta_rht_params); } static void sta_deliver_ps_frames(struct work_struct *wk) @@ -484,11 +484,17 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) { struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; - struct station_info sinfo; + struct station_info *sinfo; int err = 0; lockdep_assert_held(&local->sta_mtx); + sinfo = kzalloc(sizeof(struct station_info), GFP_KERNEL); + if (!sinfo) { + err = -ENOMEM; + goto out_err; + } + /* check if STA exists already */ if (sta_info_get_bss(sdata, sta->sta.addr)) { err = -EEXIST; @@ -503,7 +509,9 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) set_sta_flag(sta, WLAN_STA_BLOCK_BA); /* make the station visible */ - sta_info_hash_add(local, sta); + err = sta_info_hash_add(local, sta); + if (err) + goto out_drop_sta; list_add_tail_rcu(&sta->list, &local->sta_list); @@ -520,10 +528,9 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) ieee80211_sta_debugfs_add(sta); rate_control_add_sta_debugfs(sta); - memset(&sinfo, 0, sizeof(sinfo)); - sinfo.filled = 0; - sinfo.generation = local->sta_generation; - cfg80211_new_sta(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL); + sinfo->generation = local->sta_generation; + cfg80211_new_sta(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL); + kfree(sinfo); sta_dbg(sdata, "Inserted STA %pM\n", sta->sta.addr); @@ -538,6 +545,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) out_remove: sta_info_hash_del(local, sta); list_del_rcu(&sta->list); + out_drop_sta: local->num_sta--; synchronize_net(); __cleanup_single_sta(sta); @@ -882,7 +890,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta) { struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; - struct station_info sinfo = {}; + struct station_info *sinfo; int ret; /* @@ -920,8 +928,11 @@ static void __sta_info_destroy_part2(struct sta_info *sta) sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr); - sta_set_sinfo(sta, &sinfo); - cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL); + sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL); + if (sinfo) + sta_set_sinfo(sta, sinfo); + cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL); + kfree(sinfo); rate_control_remove_sta_debugfs(sta); ieee80211_sta_debugfs_remove(sta); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 2cafb21b422f..15b0150283b6 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -269,7 +269,7 @@ struct ieee80211_fast_tx { u8 sa_offs, da_offs, pn_offs; u8 band; u8 hdr[30 + 2 + IEEE80211_FAST_XMIT_MAX_IV + - sizeof(rfc1042_header)]; + sizeof(rfc1042_header)] __aligned(2); struct rcu_head rcu_head; }; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index c32fc411a911..881bc2072809 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -518,6 +518,9 @@ static struct net_device *find_outdev(struct net *net, if (!dev) return ERR_PTR(-ENODEV); + if (IS_ERR(dev)) + return dev; + /* The caller is holding rtnl anyways, so release the dev reference */ dev_put(dev); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index f57b4dcdb233..4da560005b0e 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1757,15 +1757,34 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int cp = pp->conn_in_get(ipvs, af, skb, &iph); conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); - if (conn_reuse_mode && !iph.fragoffs && - is_new_conn(skb, &iph) && cp && - ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest && - unlikely(!atomic_read(&cp->dest->weight))) || - unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) { - if (!atomic_read(&cp->n_control)) - ip_vs_conn_expire_now(cp); - __ip_vs_conn_put(cp); - cp = NULL; + if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) { + bool uses_ct = false, resched = false; + + if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest && + unlikely(!atomic_read(&cp->dest->weight))) { + resched = true; + uses_ct = ip_vs_conn_uses_conntrack(cp, skb); + } else if (is_new_conn_expected(cp, conn_reuse_mode)) { + uses_ct = ip_vs_conn_uses_conntrack(cp, skb); + if (!atomic_read(&cp->n_control)) { + resched = true; + } else { + /* Do not reschedule controlling connection + * that uses conntrack while it is still + * referenced by controlled connection(s). + */ + resched = !uses_ct; + } + } + + if (resched) { + if (!atomic_read(&cp->n_control)) + ip_vs_conn_expire_now(cp); + __ip_vs_conn_put(cp); + if (uses_ct) + return NF_DROP; + cp = NULL; + } } if (unlikely(!cp)) { diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 1b8d594e493a..0a6eb5c0d9e9 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -70,10 +70,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) const char *dptr; int retc; - ip_vs_fill_iph_skb(p->af, skb, false, &iph); + retc = ip_vs_fill_iph_skb(p->af, skb, false, &iph); /* Only useful with UDP */ - if (iph.protocol != IPPROTO_UDP) + if (!retc || iph.protocol != IPPROTO_UDP) return -EINVAL; /* todo: IPv6 fragments: * I think this only should be done for the first fragment. /HS @@ -88,7 +88,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) dptr = skb->data + dataoff; datalen = skb->len - dataoff; - if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen)) + if (get_callid(dptr, 0, datalen, &matchoff, &matchlen)) return -EINVAL; /* N.B: pe_data is only set on success, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 3cb3cb831591..86a3c6f0c871 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1757,6 +1757,7 @@ void nf_conntrack_init_end(void) int nf_conntrack_init_net(struct net *net) { + static atomic64_t unique_id; int ret = -ENOMEM; int cpu; @@ -1779,7 +1780,8 @@ int nf_conntrack_init_net(struct net *net) if (!net->ct.stat) goto err_pcpu_lists; - net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net); + net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%llu", + (u64)atomic64_inc_return(&unique_id)); if (!net->ct.slabname) goto err_slabname; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index d4aaad747ea9..25391fb25516 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -415,6 +415,47 @@ int xt_check_match(struct xt_mtchk_param *par, } EXPORT_SYMBOL_GPL(xt_check_match); +/** xt_check_entry_match - check that matches end before start of target + * + * @match: beginning of xt_entry_match + * @target: beginning of this rules target (alleged end of matches) + * @alignment: alignment requirement of match structures + * + * Validates that all matches add up to the beginning of the target, + * and that each match covers at least the base structure size. + * + * Return: 0 on success, negative errno on failure. + */ +static int xt_check_entry_match(const char *match, const char *target, + const size_t alignment) +{ + const struct xt_entry_match *pos; + int length = target - match; + + if (length == 0) /* no matches */ + return 0; + + pos = (struct xt_entry_match *)match; + do { + if ((unsigned long)pos % alignment) + return -EINVAL; + + if (length < (int)sizeof(struct xt_entry_match)) + return -EINVAL; + + if (pos->u.match_size < sizeof(struct xt_entry_match)) + return -EINVAL; + + if (pos->u.match_size > length) + return -EINVAL; + + length -= pos->u.match_size; + pos = ((void *)((char *)(pos) + (pos)->u.match_size)); + } while (length > 0); + + return 0; +} + #ifdef CONFIG_COMPAT int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta) { @@ -484,13 +525,14 @@ int xt_compat_match_offset(const struct xt_match *match) } EXPORT_SYMBOL_GPL(xt_compat_match_offset); -int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, - unsigned int *size) +void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, + unsigned int *size) { const struct xt_match *match = m->u.kernel.match; struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; int pad, off = xt_compat_match_offset(match); u_int16_t msize = cm->u.user.match_size; + char name[sizeof(m->u.user.name)]; m = *dstptr; memcpy(m, cm, sizeof(*cm)); @@ -504,10 +546,12 @@ int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, msize += off; m->u.user.match_size = msize; + strlcpy(name, match->name, sizeof(name)); + module_put(match->me); + strncpy(m->u.user.name, name, sizeof(m->u.user.name)); *size += off; *dstptr += msize; - return 0; } EXPORT_SYMBOL_GPL(xt_compat_match_from_user); @@ -538,8 +582,125 @@ int xt_compat_match_to_user(const struct xt_entry_match *m, return 0; } EXPORT_SYMBOL_GPL(xt_compat_match_to_user); + +/* non-compat version may have padding after verdict */ +struct compat_xt_standard_target { + struct compat_xt_entry_target t; + compat_uint_t verdict; +}; + +int xt_compat_check_entry_offsets(const void *base, const char *elems, + unsigned int target_offset, + unsigned int next_offset) +{ + long size_of_base_struct = elems - (const char *)base; + const struct compat_xt_entry_target *t; + const char *e = base; + + if (target_offset < size_of_base_struct) + return -EINVAL; + + if (target_offset + sizeof(*t) > next_offset) + return -EINVAL; + + t = (void *)(e + target_offset); + if (t->u.target_size < sizeof(*t)) + return -EINVAL; + + if (target_offset + t->u.target_size > next_offset) + return -EINVAL; + + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && + COMPAT_XT_ALIGN(target_offset + sizeof(struct compat_xt_standard_target)) != next_offset) + return -EINVAL; + + /* compat_xt_entry match has less strict aligment requirements, + * otherwise they are identical. In case of padding differences + * we need to add compat version of xt_check_entry_match. + */ + BUILD_BUG_ON(sizeof(struct compat_xt_entry_match) != sizeof(struct xt_entry_match)); + + return xt_check_entry_match(elems, base + target_offset, + __alignof__(struct compat_xt_entry_match)); +} +EXPORT_SYMBOL(xt_compat_check_entry_offsets); #endif /* CONFIG_COMPAT */ +/** + * xt_check_entry_offsets - validate arp/ip/ip6t_entry + * + * @base: pointer to arp/ip/ip6t_entry + * @elems: pointer to first xt_entry_match, i.e. ip(6)t_entry->elems + * @target_offset: the arp/ip/ip6_t->target_offset + * @next_offset: the arp/ip/ip6_t->next_offset + * + * validates that target_offset and next_offset are sane and that all + * match sizes (if any) align with the target offset. + * + * This function does not validate the targets or matches themselves, it + * only tests that all the offsets and sizes are correct, that all + * match structures are aligned, and that the last structure ends where + * the target structure begins. + * + * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version. + * + * The arp/ip/ip6t_entry structure @base must have passed following tests: + * - it must point to a valid memory location + * - base to base + next_offset must be accessible, i.e. not exceed allocated + * length. + * + * A well-formed entry looks like this: + * + * ip(6)t_entry match [mtdata] match [mtdata] target [tgdata] ip(6)t_entry + * e->elems[]-----' | | + * matchsize | | + * matchsize | | + * | | + * target_offset---------------------------------' | + * next_offset---------------------------------------------------' + * + * elems[]: flexible array member at end of ip(6)/arpt_entry struct. + * This is where matches (if any) and the target reside. + * target_offset: beginning of target. + * next_offset: start of the next rule; also: size of this rule. + * Since targets have a minimum size, target_offset + minlen <= next_offset. + * + * Every match stores its size, sum of sizes must not exceed target_offset. + * + * Return: 0 on success, negative errno on failure. + */ +int xt_check_entry_offsets(const void *base, + const char *elems, + unsigned int target_offset, + unsigned int next_offset) +{ + long size_of_base_struct = elems - (const char *)base; + const struct xt_entry_target *t; + const char *e = base; + + /* target start is within the ip/ip6/arpt_entry struct */ + if (target_offset < size_of_base_struct) + return -EINVAL; + + if (target_offset + sizeof(*t) > next_offset) + return -EINVAL; + + t = (void *)(e + target_offset); + if (t->u.target_size < sizeof(*t)) + return -EINVAL; + + if (target_offset + t->u.target_size > next_offset) + return -EINVAL; + + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && + XT_ALIGN(target_offset + sizeof(struct xt_standard_target)) != next_offset) + return -EINVAL; + + return xt_check_entry_match(elems, base + target_offset, + __alignof__(struct xt_entry_match)); +} +EXPORT_SYMBOL(xt_check_entry_offsets); + int xt_check_target(struct xt_tgchk_param *par, unsigned int size, u_int8_t proto, bool inv_proto) { @@ -590,6 +751,80 @@ int xt_check_target(struct xt_tgchk_param *par, } EXPORT_SYMBOL_GPL(xt_check_target); +/** + * xt_copy_counters_from_user - copy counters and metadata from userspace + * + * @user: src pointer to userspace memory + * @len: alleged size of userspace memory + * @info: where to store the xt_counters_info metadata + * @compat: true if we setsockopt call is done by 32bit task on 64bit kernel + * + * Copies counter meta data from @user and stores it in @info. + * + * vmallocs memory to hold the counters, then copies the counter data + * from @user to the new memory and returns a pointer to it. + * + * If @compat is true, @info gets converted automatically to the 64bit + * representation. + * + * The metadata associated with the counters is stored in @info. + * + * Return: returns pointer that caller has to test via IS_ERR(). + * If IS_ERR is false, caller has to vfree the pointer. + */ +void *xt_copy_counters_from_user(const void __user *user, unsigned int len, + struct xt_counters_info *info, bool compat) +{ + void *mem; + u64 size; + +#ifdef CONFIG_COMPAT + if (compat) { + /* structures only differ in size due to alignment */ + struct compat_xt_counters_info compat_tmp; + + if (len <= sizeof(compat_tmp)) + return ERR_PTR(-EINVAL); + + len -= sizeof(compat_tmp); + if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0) + return ERR_PTR(-EFAULT); + + strlcpy(info->name, compat_tmp.name, sizeof(info->name)); + info->num_counters = compat_tmp.num_counters; + user += sizeof(compat_tmp); + } else +#endif + { + if (len <= sizeof(*info)) + return ERR_PTR(-EINVAL); + + len -= sizeof(*info); + if (copy_from_user(info, user, sizeof(*info)) != 0) + return ERR_PTR(-EFAULT); + + info->name[sizeof(info->name) - 1] = '\0'; + user += sizeof(*info); + } + + size = sizeof(struct xt_counters); + size *= info->num_counters; + + if (size != (u64)len) + return ERR_PTR(-EINVAL); + + mem = vmalloc(len); + if (!mem) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(mem, user, len) == 0) + return mem; + + vfree(mem); + return ERR_PTR(-EFAULT); +} +EXPORT_SYMBOL_GPL(xt_copy_counters_from_user); + #ifdef CONFIG_COMPAT int xt_compat_target_offset(const struct xt_target *target) { @@ -605,6 +840,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t; int pad, off = xt_compat_target_offset(target); u_int16_t tsize = ct->u.user.target_size; + char name[sizeof(t->u.user.name)]; t = *dstptr; memcpy(t, ct, sizeof(*ct)); @@ -618,6 +854,9 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, tsize += off; t->u.user.target_size = tsize; + strlcpy(name, target->name, sizeof(name)); + module_put(target->me); + strncpy(t->u.user.name, name, sizeof(t->u.user.name)); *size += off; *dstptr += tsize; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 59651af8cc27..7a5fa0c98377 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1305,7 +1305,7 @@ static int netlink_release(struct socket *sock) skb_queue_purge(&sk->sk_write_queue); - if (nlk->portid) { + if (nlk->portid && nlk->bound) { struct netlink_notify n = { .net = sock_net(sk), .protocol = sk->sk_protocol, @@ -2784,6 +2784,7 @@ static int netlink_dump(struct sock *sk) struct netlink_callback *cb; struct sk_buff *skb = NULL; struct nlmsghdr *nlh; + struct module *module; int len, err = -ENOBUFS; int alloc_min_size; int alloc_size; @@ -2863,9 +2864,11 @@ static int netlink_dump(struct sock *sk) cb->done(cb); nlk->cb_running = false; + module = cb->module; + skb = cb->skb; mutex_unlock(nlk->cb_mutex); - module_put(cb->module); - consume_skb(cb->skb); + module_put(module); + consume_skb(skb); return 0; errout_skb: diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index c88d0f2d3e01..7cb8184ac165 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -158,9 +158,7 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, new_mpls_lse = (__be32 *)skb_mpls_header(skb); *new_mpls_lse = mpls->mpls_lse; - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse, - MPLS_HLEN, 0)); + skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN); hdr = eth_hdr(skb); hdr->h_proto = mpls->mpls_ethertype; @@ -280,7 +278,7 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key, ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst, mask->eth_dst); - ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); + skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source); ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest); @@ -463,7 +461,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked); if (unlikely(memcmp(saddr, masked, sizeof(masked)))) { - set_ipv6_addr(skb, key->ipv6_proto, saddr, masked, + set_ipv6_addr(skb, flow_key->ip.proto, saddr, masked, true); memcpy(&flow_key->ipv6.addr.src, masked, sizeof(flow_key->ipv6.addr.src)); @@ -485,7 +483,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, NULL, &flags) != NEXTHDR_ROUTING); - set_ipv6_addr(skb, key->ipv6_proto, daddr, masked, + set_ipv6_addr(skb, flow_key->ip.proto, daddr, masked, recalc_csum); memcpy(&flow_key->ipv6.addr.dst, masked, sizeof(flow_key->ipv6.addr.dst)); @@ -639,7 +637,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk /* Reconstruct the MAC header. */ skb_push(skb, data->l2_len); memcpy(skb->data, &data->l2_data, data->l2_len); - ovs_skb_postpush_rcsum(skb, skb->data, data->l2_len); + skb_postpush_rcsum(skb, skb->data, data->l2_len); skb_reset_mac_header(skb); ovs_vport_send(vport, skb); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 91a8b004dc51..deadfdab1bc3 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -336,12 +336,10 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, unsigned short gso_type = skb_shinfo(skb)->gso_type; struct sw_flow_key later_key; struct sk_buff *segs, *nskb; - struct ovs_skb_cb ovs_cb; int err; - ovs_cb = *OVS_CB(skb); + BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET); segs = __skb_gso_segment(skb, NETIF_F_SG, false); - *OVS_CB(skb) = ovs_cb; if (IS_ERR(segs)) return PTR_ERR(segs); if (segs == NULL) @@ -359,7 +357,6 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, /* Queue all of the segments. */ skb = segs; do { - *OVS_CB(skb) = ovs_cb; if (gso_type & SKB_GSO_UDP && skb != segs) key = &later_key; diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 6b0190b987ec..76fcaf1fd2a9 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -58,7 +58,7 @@ static void netdev_port_receive(struct sk_buff *skb) return; skb_push(skb, ETH_HLEN); - ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + skb_postpush_rcsum(skb, skb->data, ETH_HLEN); ovs_vport_receive(vport, skb, skb_tunnel_info(skb)); return; error: diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 1605691d9414..5eb7694348b5 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -90,7 +90,9 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) int err; struct vxlan_config conf = { .no_share = true, - .flags = VXLAN_F_COLLECT_METADATA, + .flags = VXLAN_F_COLLECT_METADATA | VXLAN_F_UDP_ZERO_CSUM6_RX, + /* Don't restrict the packets that can be sent by MTU */ + .mtu = IP_MAX_MTU, }; if (!options) { diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 8ea3a96980ac..6e2b62f9d595 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -184,13 +184,6 @@ static inline struct vport *vport_from_priv(void *priv) int ovs_vport_receive(struct vport *, struct sk_buff *, const struct ip_tunnel_info *); -static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, - const void *start, unsigned int len) -{ - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); -} - static inline const char *ovs_vport_name(struct vport *vport) { return vport->dev->name; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 992396aa635c..a86f26d05bc2 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1341,7 +1341,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - return reciprocal_scale(skb_get_hash(skb), num); + return reciprocal_scale(__skb_get_hash_symmetric(skb), num); } static unsigned int fanout_demux_lb(struct packet_fanout *f, @@ -1916,6 +1916,10 @@ retry: goto retry; } + if (!dev_validate_header(dev, skb->data, len)) { + err = -EINVAL; + goto out_unlock; + } if (len > (dev->mtu + dev->hard_header_len + extra_len) && !packet_extra_vlan_len_allowed(dev, skb)) { err = -EMSGSIZE; @@ -2326,18 +2330,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb) sock_wfree(skb); } -static bool ll_header_truncated(const struct net_device *dev, int len) -{ - /* net device doesn't like empty head */ - if (unlikely(len < dev->hard_header_len)) { - net_warn_ratelimited("%s: packet size is too short (%d < %d)\n", - current->comm, len, dev->hard_header_len); - return true; - } - - return false; -} - static void tpacket_set_protocol(const struct net_device *dev, struct sk_buff *skb) { @@ -2420,19 +2412,19 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, if (unlikely(err < 0)) return -EINVAL; } else if (dev->hard_header_len) { - if (ll_header_truncated(dev, tp_len)) - return -EINVAL; + int hdrlen = min_t(int, dev->hard_header_len, tp_len); skb_push(skb, dev->hard_header_len); - err = skb_store_bits(skb, 0, data, - dev->hard_header_len); + err = skb_store_bits(skb, 0, data, hdrlen); if (unlikely(err)) return err; + if (!dev_validate_header(dev, skb->data, hdrlen)) + return -EINVAL; if (!skb->protocol) tpacket_set_protocol(dev, skb); - data += dev->hard_header_len; - to_write -= dev->hard_header_len; + data += hdrlen; + to_write -= hdrlen; } offset = offset_in_page(data); @@ -2763,9 +2755,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len); if (unlikely(offset < 0)) goto out_free; - } else { - if (ll_header_truncated(dev, len)) - goto out_free; } /* Returns -EFAULT on error */ @@ -2773,6 +2762,12 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (err) goto out_free; + if (sock->type == SOCK_RAW && + !dev_validate_header(dev, skb->data, len)) { + err = -EINVAL; + goto out_free; + } + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (!gso_type && (len > dev->mtu + reserve + extra_len) && @@ -3441,6 +3436,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) i->ifindex = mreq->mr_ifindex; i->alen = mreq->mr_alen; memcpy(i->addr, mreq->mr_address, i->alen); + memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen); i->count = 1; i->next = po->mclist; po->mclist = i; diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 10d42f3220ab..f925753668a7 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -377,6 +377,10 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, struct sockaddr_pn sa; u16 len; + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NET_RX_DROP; + /* check we have at least a full Phonet header */ if (!pskb_pull(skb, sizeof(struct phonethdr))) goto out; diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 9f843bbe8c10..d778d99326df 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -1097,17 +1097,6 @@ static unsigned int rfkill_fop_poll(struct file *file, poll_table *wait) return res; } -static bool rfkill_readable(struct rfkill_data *data) -{ - bool r; - - mutex_lock(&data->mtx); - r = !list_empty(&data->events); - mutex_unlock(&data->mtx); - - return r; -} - static ssize_t rfkill_fop_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { @@ -1124,8 +1113,11 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf, goto out; } mutex_unlock(&data->mtx); + /* since we re-check and it just compares pointers, + * using !list_empty() without locking isn't a problem + */ ret = wait_event_interruptible(data->read_wait, - rfkill_readable(data)); + !list_empty(&data->events)); mutex_lock(&data->mtx); if (ret) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index b07c535ba8e7..eeb3eb3ea9eb 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -105,9 +105,7 @@ static void *tcf_csum_skb_nextlayer(struct sk_buff *skb, int hl = ihl + jhl; if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) || - (skb_cloned(skb) && - !skb_clone_writable(skb, hl + ntkoff) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + skb_try_make_writable(skb, hl + ntkoff)) return NULL; else return (void *)(skb_network_header(skb) + ihl); @@ -365,9 +363,7 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags) } if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) { - if (skb_cloned(skb) && - !skb_clone_writable(skb, sizeof(*iph) + ntkoff) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_try_make_writable(skb, sizeof(*iph) + ntkoff)) goto fail; ip_send_check(ip_hdr(skb)); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 32fcdecdb9e2..e384d6aefa3a 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -170,7 +170,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, if (!(at & AT_EGRESS)) { if (m->tcfm_ok_push) - skb_push(skb2, skb->mac_len); + skb_push_rcsum(skb2, skb->mac_len); } /* mirror is always swallowed */ diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index b7c4ead8b5a8..27607b863aba 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -126,9 +126,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, addr = iph->daddr; if (!((old_addr ^ addr) & mask)) { - if (skb_cloned(skb) && - !skb_clone_writable(skb, sizeof(*iph) + noff) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_try_make_writable(skb, sizeof(*iph) + noff)) goto drop; new_addr &= mask; @@ -156,9 +154,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, struct tcphdr *tcph; if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) || - (skb_cloned(skb) && - !skb_clone_writable(skb, ihl + sizeof(*tcph) + noff) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + skb_try_make_writable(skb, ihl + sizeof(*tcph) + noff)) goto drop; tcph = (void *)(skb_network_header(skb) + ihl); @@ -171,9 +167,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, struct udphdr *udph; if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) || - (skb_cloned(skb) && - !skb_clone_writable(skb, ihl + sizeof(*udph) + noff) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + skb_try_make_writable(skb, ihl + sizeof(*udph) + noff)) goto drop; udph = (void *)(skb_network_header(skb) + ihl); @@ -213,10 +207,8 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, if ((old_addr ^ addr) & mask) break; - if (skb_cloned(skb) && - !skb_clone_writable(skb, ihl + sizeof(*icmph) + - sizeof(*iph) + noff) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_try_make_writable(skb, ihl + sizeof(*icmph) + + sizeof(*iph) + noff)) goto drop; icmph = (void *)(skb_network_header(skb) + ihl); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 57692947ebbe..95b021243233 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -252,23 +252,28 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC, mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK, sizeof(key->eth.src)); + fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE, &mask->basic.n_proto, TCA_FLOWER_UNSPEC, sizeof(key->basic.n_proto)); + if (key->basic.n_proto == htons(ETH_P_IP) || key->basic.n_proto == htons(ETH_P_IPV6)) { fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, &mask->basic.ip_proto, TCA_FLOWER_UNSPEC, sizeof(key->basic.ip_proto)); } - if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + + if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) { + key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC, &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK, sizeof(key->ipv4.src)); fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST, &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK, sizeof(key->ipv4.dst)); - } else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + } else if (tb[TCA_FLOWER_KEY_IPV6_SRC] || tb[TCA_FLOWER_KEY_IPV6_DST]) { + key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC, &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK, sizeof(key->ipv6.src)); @@ -276,6 +281,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb, &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK, sizeof(key->ipv6.dst)); } + if (key->basic.ip_proto == IPPROTO_TCP) { fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC, &mask->tp.src, TCA_FLOWER_UNSPEC, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index b5c2cf2aa6d4..95b560f0b253 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -744,14 +744,15 @@ static u32 qdisc_alloc_handle(struct net_device *dev) return 0; } -void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) +void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, + unsigned int len) { const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; int drops; - if (n == 0) + if (n == 0 && len == 0) return; drops = max_t(int, n, 0); rcu_read_lock(); @@ -774,11 +775,12 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) cops->put(sch, cl); } sch->q.qlen -= n; + sch->qstats.backlog -= len; __qdisc_qstats_drop(sch, drops); } rcu_read_unlock(); } -EXPORT_SYMBOL(qdisc_tree_decrease_qlen); +EXPORT_SYMBOL(qdisc_tree_reduce_backlog); static void notify_and_destroy(struct net *net, struct sk_buff *skb, struct nlmsghdr *n, u32 clid, @@ -1852,6 +1854,7 @@ reset: } tp = old_tp; + protocol = tc_skb_protocol(skb); goto reclassify; #endif } diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index c538d9e4a8f6..baafddf229ce 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1624,13 +1624,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, new->reshape_fail = cbq_reshape_fail; #endif } - sch_tree_lock(sch); - *old = cl->q; - cl->q = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &cl->q); return 0; } @@ -1914,7 +1909,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class *)arg; - unsigned int qlen; + unsigned int qlen, backlog; if (cl->filters || cl->children || cl == &q->link) return -EBUSY; @@ -1922,8 +1917,9 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); qlen = cl->q->q.qlen; + backlog = cl->q->qstats.backlog; qdisc_reset(cl->q); - qdisc_tree_decrease_qlen(cl->q, qlen); + qdisc_tree_reduce_backlog(cl->q, qlen, backlog); if (cl->next_alive) cbq_deactivate_class(cl); diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 5ffb8b8337c7..0a08c860eee4 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -128,8 +128,8 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx) choke_zap_tail_holes(q); qdisc_qstats_backlog_dec(sch, skb); + qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb)); qdisc_drop(skb, sch); - qdisc_tree_decrease_qlen(sch, 1); --sch->q.qlen; } @@ -456,6 +456,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) old = q->tab; if (old) { unsigned int oqlen = sch->q.qlen, tail = 0; + unsigned dropped = 0; while (q->head != q->tail) { struct sk_buff *skb = q->tab[q->head]; @@ -467,11 +468,12 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) ntab[tail++] = skb; continue; } + dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); --sch->q.qlen; qdisc_drop(skb, sch); } - qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen); + qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped); q->head = 0; q->tail = tail; } diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 535007d5f0b5..9b7e2980ee5c 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -79,12 +79,13 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch) skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats, dequeue); - /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0, + /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0, * or HTB crashes. Defer it for next round. */ if (q->stats.drop_count && sch->q.qlen) { - qdisc_tree_decrease_qlen(sch, q->stats.drop_count); + qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len); q->stats.drop_count = 0; + q->stats.drop_len = 0; } if (skb) qdisc_bstats_update(sch, skb); @@ -116,7 +117,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) { struct codel_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_CODEL_MAX + 1]; - unsigned int qlen; + unsigned int qlen, dropped = 0; int err; if (!opt) @@ -156,10 +157,11 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) while (sch->q.qlen > sch->limit) { struct sk_buff *skb = __skb_dequeue(&sch->q); + dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); qdisc_drop(skb, sch); } - qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); + qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index f26bdea875c1..d6e3ad43cecb 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -53,9 +53,10 @@ static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid) static void drr_purge_queue(struct drr_class *cl) { unsigned int len = cl->qdisc->q.qlen; + unsigned int backlog = cl->qdisc->qstats.backlog; qdisc_reset(cl->qdisc); - qdisc_tree_decrease_qlen(cl->qdisc, len); + qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); } static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = { @@ -226,11 +227,7 @@ static int drr_graft_class(struct Qdisc *sch, unsigned long arg, new = &noop_qdisc; } - sch_tree_lock(sch); - drr_purge_queue(cl); - *old = cl->qdisc; - cl->qdisc = new; - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &cl->qdisc); return 0; } diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index f357f34d02d2..d0dff0cd8186 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -73,13 +73,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, new = &noop_qdisc; } - sch_tree_lock(sch); - *old = p->q; - p->q = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); - + *old = qdisc_replace(sch, new, &p->q); return 0; } @@ -264,6 +258,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) return err; } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -286,6 +281,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) return NULL; qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; index = skb->tc_index & (p->indices - 1); @@ -401,6 +397,7 @@ static void dsmark_reset(struct Qdisc *sch) pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); qdisc_reset(p->q); + sch->qstats.backlog = 0; sch->q.qlen = 0; } diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 2177eac0a61e..2e4bd2c0a50c 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -37,14 +37,18 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch) { + unsigned int prev_backlog; + if (likely(skb_queue_len(&sch->q) < sch->limit)) return qdisc_enqueue_tail(skb, sch); + prev_backlog = sch->qstats.backlog; /* queue full, remove one skb to fulfill the limit */ __qdisc_queue_drop_head(sch, &sch->q); qdisc_qstats_drop(sch); qdisc_enqueue_tail(skb, sch); + qdisc_tree_reduce_backlog(sch, 0, prev_backlog - sch->qstats.backlog); return NET_XMIT_CN; } diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 109b2322778f..3c6a47d66a04 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -662,6 +662,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) struct fq_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_FQ_MAX + 1]; int err, drop_count = 0; + unsigned drop_len = 0; u32 fq_log; if (!opt) @@ -736,10 +737,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) if (!skb) break; + drop_len += qdisc_pkt_len(skb); kfree_skb(skb); drop_count++; } - qdisc_tree_decrease_qlen(sch, drop_count); + qdisc_tree_reduce_backlog(sch, drop_count, drop_len); sch_tree_unlock(sch); return err; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 4c834e93dafb..d3fc8f9dd3d4 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -175,7 +175,7 @@ static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch) static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct fq_codel_sched_data *q = qdisc_priv(sch); - unsigned int idx; + unsigned int idx, prev_backlog; struct fq_codel_flow *flow; int uninitialized_var(ret); @@ -203,6 +203,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (++sch->q.qlen <= sch->limit) return NET_XMIT_SUCCESS; + prev_backlog = sch->qstats.backlog; q->drop_overlimit++; /* Return Congestion Notification only if we dropped a packet * from this flow. @@ -211,7 +212,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_CN; /* As we dropped a packet, better let upper stack know this */ - qdisc_tree_decrease_qlen(sch, 1); + qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog); return NET_XMIT_SUCCESS; } @@ -241,6 +242,7 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) struct fq_codel_flow *flow; struct list_head *head; u32 prev_drop_count, prev_ecn_mark; + unsigned int prev_backlog; begin: head = &q->new_flows; @@ -259,6 +261,7 @@ begin: prev_drop_count = q->cstats.drop_count; prev_ecn_mark = q->cstats.ecn_mark; + prev_backlog = sch->qstats.backlog; skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats, dequeue); @@ -276,12 +279,14 @@ begin: } qdisc_bstats_update(sch, skb); flow->deficit -= qdisc_pkt_len(skb); - /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0, + /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0, * or HTB crashes. Defer it for next round. */ if (q->cstats.drop_count && sch->q.qlen) { - qdisc_tree_decrease_qlen(sch, q->cstats.drop_count); + qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, + q->cstats.drop_len); q->cstats.drop_count = 0; + q->cstats.drop_len = 0; } return skb; } @@ -372,11 +377,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) while (sch->q.qlen > sch->limit) { struct sk_buff *skb = fq_codel_dequeue(sch); + q->cstats.drop_len += qdisc_pkt_len(skb); kfree_skb(skb); q->cstats.drop_count++; } - qdisc_tree_decrease_qlen(sch, q->cstats.drop_count); + qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len); q->cstats.drop_count = 0; + q->cstats.drop_len = 0; sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 16bc83b2842a..aa4725038f94 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -159,12 +159,15 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, if (validate) skb = validate_xmit_skb_list(skb, dev); - if (skb) { + if (likely(skb)) { HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq)) skb = dev_hard_start_xmit(skb, dev, txq, &ret); HARD_TX_UNLOCK(dev, txq); + } else { + spin_lock(root_lock); + return qdisc_qlen(q); } spin_lock(root_lock); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index b7ebe2c87586..d783d7cc3348 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -895,9 +895,10 @@ static void hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl) { unsigned int len = cl->qdisc->q.qlen; + unsigned int backlog = cl->qdisc->qstats.backlog; qdisc_reset(cl->qdisc); - qdisc_tree_decrease_qlen(cl->qdisc, len); + qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); } static void @@ -1215,11 +1216,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, new = &noop_qdisc; } - sch_tree_lock(sch); - hfsc_purge_queue(sch, cl); - *old = cl->qdisc; - cl->qdisc = new; - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &cl->qdisc); return 0; } diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 86b04e31e60b..13d6f83ec491 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -382,6 +382,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch) struct hhf_sched_data *q = qdisc_priv(sch); enum wdrr_bucket_idx idx; struct wdrr_bucket *bucket; + unsigned int prev_backlog; idx = hhf_classify(skb, sch); @@ -409,6 +410,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (++sch->q.qlen <= sch->limit) return NET_XMIT_SUCCESS; + prev_backlog = sch->qstats.backlog; q->drop_overlimit++; /* Return Congestion Notification only if we dropped a packet from this * bucket. @@ -417,7 +419,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_CN; /* As we dropped a packet, better let upper stack know this. */ - qdisc_tree_decrease_qlen(sch, 1); + qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog); return NET_XMIT_SUCCESS; } @@ -527,7 +529,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt) { struct hhf_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_HHF_MAX + 1]; - unsigned int qlen; + unsigned int qlen, prev_backlog; int err; u64 non_hh_quantum; u32 new_quantum = q->quantum; @@ -577,12 +579,14 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt) } qlen = sch->q.qlen; + prev_backlog = sch->qstats.backlog; while (sch->q.qlen > sch->limit) { struct sk_buff *skb = hhf_dequeue(sch); kfree_skb(skb); } - qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); + qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, + prev_backlog - sch->qstats.backlog); sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 15ccd7f8fb2a..87b02ed3d5f2 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -600,6 +600,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) htb_activate(q, cl); } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } @@ -889,6 +890,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) ok: qdisc_bstats_update(sch, skb); qdisc_unthrottled(sch); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; } @@ -955,6 +957,7 @@ static unsigned int htb_drop(struct Qdisc *sch) unsigned int len; if (cl->un.leaf.q->ops->drop && (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) { + sch->qstats.backlog -= len; sch->q.qlen--; if (!cl->un.leaf.q->q.qlen) htb_deactivate(q, cl); @@ -984,12 +987,12 @@ static void htb_reset(struct Qdisc *sch) } cl->prio_activity = 0; cl->cmode = HTB_CAN_SEND; - } } qdisc_watchdog_cancel(&q->watchdog); __skb_queue_purge(&q->direct_queue); sch->q.qlen = 0; + sch->qstats.backlog = 0; memset(q->hlevel, 0, sizeof(q->hlevel)); memset(q->row_mask, 0, sizeof(q->row_mask)); for (i = 0; i < TC_HTB_NUMPRIO; i++) @@ -1163,14 +1166,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, cl->common.classid)) == NULL) return -ENOBUFS; - sch_tree_lock(sch); - *old = cl->un.leaf.q; - cl->un.leaf.q = new; - if (*old != NULL) { - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - } - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &cl->un.leaf.q); return 0; } @@ -1272,7 +1268,6 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; - unsigned int qlen; struct Qdisc *new_q = NULL; int last_child = 0; @@ -1292,9 +1287,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); if (!cl->level) { - qlen = cl->un.leaf.q->q.qlen; + unsigned int qlen = cl->un.leaf.q->q.qlen; + unsigned int backlog = cl->un.leaf.q->qstats.backlog; + qdisc_reset(cl->un.leaf.q); - qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen); + qdisc_tree_reduce_backlog(cl->un.leaf.q, qlen, backlog); } /* delete from hash and active; remainder in destroy_class */ @@ -1428,10 +1425,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, sch_tree_lock(sch); if (parent && !parent->level) { unsigned int qlen = parent->un.leaf.q->q.qlen; + unsigned int backlog = parent->un.leaf.q->qstats.backlog; /* turn parent into inner node */ qdisc_reset(parent->un.leaf.q); - qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen); + qdisc_tree_reduce_backlog(parent->un.leaf.q, qlen, backlog); qdisc_destroy(parent->un.leaf.q); if (parent->prio_activity) htb_deactivate(q, parent); diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 4e904ca0af9d..bcdd54bb101c 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -218,7 +218,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) if (q->queues[i] != &noop_qdisc) { struct Qdisc *child = q->queues[i]; q->queues[i] = &noop_qdisc; - qdisc_tree_decrease_qlen(child, child->q.qlen); + qdisc_tree_reduce_backlog(child, child->q.qlen, + child->qstats.backlog); qdisc_destroy(child); } } @@ -238,8 +239,9 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) q->queues[i] = child; if (old != &noop_qdisc) { - qdisc_tree_decrease_qlen(old, - old->q.qlen); + qdisc_tree_reduce_backlog(old, + old->q.qlen, + old->qstats.backlog); qdisc_destroy(old); } sch_tree_unlock(sch); @@ -303,13 +305,7 @@ static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (new == NULL) new = &noop_qdisc; - sch_tree_lock(sch); - *old = q->queues[band]; - q->queues[band] = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); - + *old = qdisc_replace(sch, new, &q->queues[band]); return 0; } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 5abd1d9de989..b7c29d5b6f04 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -395,6 +395,25 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) sch->q.qlen++; } +/* netem can't properly corrupt a megapacket (like we get from GSO), so instead + * when we statistically choose to corrupt one, we instead segment it, returning + * the first packet to be corrupted, and re-enqueue the remaining frames + */ +static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch) +{ + struct sk_buff *segs; + netdev_features_t features = netif_skb_features(skb); + + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + + if (IS_ERR_OR_NULL(segs)) { + qdisc_reshape_fail(skb, sch); + return NULL; + } + consume_skb(skb); + return segs; +} + /* * Insert one skb into qdisc. * Note: parent depends on return value to account for queue length. @@ -407,7 +426,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) /* We don't fill cb now as skb_unshare() may invalidate it */ struct netem_skb_cb *cb; struct sk_buff *skb2; + struct sk_buff *segs = NULL; + unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb); + int nb = 0; int count = 1; + int rc = NET_XMIT_SUCCESS; /* Random duplication */ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) @@ -453,10 +476,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) * do it now in software before we mangle it. */ if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { + if (skb_is_gso(skb)) { + segs = netem_segment(skb, sch); + if (!segs) + return NET_XMIT_DROP; + } else { + segs = skb; + } + + skb = segs; + segs = segs->next; + if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(skb))) - return qdisc_drop(skb, sch); + skb_checksum_help(skb))) { + rc = qdisc_drop(skb, sch); + goto finish_segs; + } skb->data[prandom_u32() % skb_headlen(skb)] ^= 1<<(prandom_u32() % 8); @@ -516,6 +552,27 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) sch->qstats.requeues++; } +finish_segs: + if (segs) { + while (segs) { + skb2 = segs->next; + segs->next = NULL; + qdisc_skb_cb(segs)->pkt_len = segs->len; + last_len = segs->len; + rc = qdisc_enqueue(segs, sch); + if (rc != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(rc)) + qdisc_qstats_drop(sch); + } else { + nb++; + len += last_len; + } + segs = skb2; + } + sch->q.qlen += nb; + if (nb > 1) + qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len); + } return NET_XMIT_SUCCESS; } @@ -593,13 +650,14 @@ deliver: #endif if (q->qdisc) { + unsigned int pkt_len = qdisc_pkt_len(skb); int err = qdisc_enqueue(skb, q->qdisc); - if (unlikely(err != NET_XMIT_SUCCESS)) { - if (net_xmit_drop_count(err)) { - qdisc_qstats_drop(sch); - qdisc_tree_decrease_qlen(sch, 1); - } + if (err != NET_XMIT_SUCCESS && + net_xmit_drop_count(err)) { + qdisc_qstats_drop(sch); + qdisc_tree_reduce_backlog(sch, 1, + pkt_len); } goto tfifo_dequeue; } @@ -1037,15 +1095,7 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, { struct netem_sched_data *q = qdisc_priv(sch); - sch_tree_lock(sch); - *old = q->qdisc; - q->qdisc = new; - if (*old) { - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - } - sch_tree_unlock(sch); - + *old = qdisc_replace(sch, new, &q->qdisc); return 0; } diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index b783a446d884..71ae3b9629f9 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -183,7 +183,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt) { struct pie_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_PIE_MAX + 1]; - unsigned int qlen; + unsigned int qlen, dropped = 0; int err; if (!opt) @@ -232,10 +232,11 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt) while (sch->q.qlen > sch->limit) { struct sk_buff *skb = __skb_dequeue(&sch->q); + dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); qdisc_drop(skb, sch); } - qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); + qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index ba6487f2741f..fee1b15506b2 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -191,7 +191,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) struct Qdisc *child = q->queues[i]; q->queues[i] = &noop_qdisc; if (child != &noop_qdisc) { - qdisc_tree_decrease_qlen(child, child->q.qlen); + qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog); qdisc_destroy(child); } } @@ -210,8 +210,9 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) q->queues[i] = child; if (old != &noop_qdisc) { - qdisc_tree_decrease_qlen(old, - old->q.qlen); + qdisc_tree_reduce_backlog(old, + old->q.qlen, + old->qstats.backlog); qdisc_destroy(old); } sch_tree_unlock(sch); @@ -268,13 +269,7 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (new == NULL) new = &noop_qdisc; - sch_tree_lock(sch); - *old = q->queues[band]; - q->queues[band] = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); - + *old = qdisc_replace(sch, new, &q->queues[band]); return 0; } diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 3dc3a6e56052..8d2d8d953432 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -220,9 +220,10 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) static void qfq_purge_queue(struct qfq_class *cl) { unsigned int len = cl->qdisc->q.qlen; + unsigned int backlog = cl->qdisc->qstats.backlog; qdisc_reset(cl->qdisc); - qdisc_tree_decrease_qlen(cl->qdisc, len); + qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); } static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = { @@ -617,11 +618,7 @@ static int qfq_graft_class(struct Qdisc *sch, unsigned long arg, new = &noop_qdisc; } - sch_tree_lock(sch); - qfq_purge_queue(cl); - *old = cl->qdisc; - cl->qdisc = new; - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &cl->qdisc); return 0; } diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 6c0534cc7758..8c0508c0e287 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -210,7 +210,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) q->flags = ctl->flags; q->limit = ctl->limit; if (child) { - qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); + qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, + q->qdisc->qstats.backlog); qdisc_destroy(q->qdisc); q->qdisc = child; } @@ -313,12 +314,7 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (new == NULL) new = &noop_qdisc; - sch_tree_lock(sch); - *old = q->qdisc; - q->qdisc = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &q->qdisc); return 0; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 5bbb6332ec57..c69611640fa5 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -510,7 +510,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt) sch_tree_lock(sch); - qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); + qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, + q->qdisc->qstats.backlog); qdisc_destroy(q->qdisc); q->qdisc = child; @@ -606,12 +607,7 @@ static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (new == NULL) new = &noop_qdisc; - sch_tree_lock(sch); - *old = q->qdisc; - q->qdisc = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &q->qdisc); return 0; } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 3abab534eb5c..498f0a2cb47f 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -346,7 +346,7 @@ static int sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - unsigned int hash; + unsigned int hash, dropped; sfq_index x, qlen; struct sfq_slot *slot; int uninitialized_var(ret); @@ -461,7 +461,7 @@ enqueue: return NET_XMIT_SUCCESS; qlen = slot->qlen; - sfq_drop(sch); + dropped = sfq_drop(sch); /* Return Congestion Notification only if we dropped a packet * from this flow. */ @@ -469,7 +469,7 @@ enqueue: return NET_XMIT_CN; /* As we dropped a packet, better let upper stack know this */ - qdisc_tree_decrease_qlen(sch, 1); + qdisc_tree_reduce_backlog(sch, 1, dropped); return NET_XMIT_SUCCESS; } @@ -537,6 +537,7 @@ static void sfq_rehash(struct Qdisc *sch) struct sfq_slot *slot; struct sk_buff_head list; int dropped = 0; + unsigned int drop_len = 0; __skb_queue_head_init(&list); @@ -565,6 +566,7 @@ static void sfq_rehash(struct Qdisc *sch) if (x >= SFQ_MAX_FLOWS) { drop: qdisc_qstats_backlog_dec(sch, skb); + drop_len += qdisc_pkt_len(skb); kfree_skb(skb); dropped++; continue; @@ -594,7 +596,7 @@ drop: } } sch->q.qlen -= dropped; - qdisc_tree_decrease_qlen(sch, dropped); + qdisc_tree_reduce_backlog(sch, dropped, drop_len); } static void sfq_perturbation(unsigned long arg) @@ -618,7 +620,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) struct sfq_sched_data *q = qdisc_priv(sch); struct tc_sfq_qopt *ctl = nla_data(opt); struct tc_sfq_qopt_v1 *ctl_v1 = NULL; - unsigned int qlen; + unsigned int qlen, dropped = 0; struct red_parms *p = NULL; if (opt->nla_len < nla_attr_size(sizeof(*ctl))) @@ -667,8 +669,8 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) qlen = sch->q.qlen; while (sch->q.qlen > q->limit) - sfq_drop(sch); - qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); + dropped += sfq_drop(sch); + qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); del_timer(&q->perturb_timer); if (q->perturb_period) { diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index a4afde14e865..c2fbde742f37 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -160,6 +160,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) struct tbf_sched_data *q = qdisc_priv(sch); struct sk_buff *segs, *nskb; netdev_features_t features = netif_skb_features(skb); + unsigned int len = 0, prev_len = qdisc_pkt_len(skb); int ret, nb; segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); @@ -172,6 +173,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) nskb = segs->next; segs->next = NULL; qdisc_skb_cb(segs)->pkt_len = segs->len; + len += segs->len; ret = qdisc_enqueue(segs, q->qdisc); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) @@ -183,7 +185,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) } sch->q.qlen += nb; if (nb > 1) - qdisc_tree_decrease_qlen(sch, 1 - nb); + qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len); consume_skb(skb); return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; } @@ -399,7 +401,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) sch_tree_lock(sch); if (child) { - qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); + qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, + q->qdisc->qstats.backlog); qdisc_destroy(q->qdisc); q->qdisc = child; } @@ -502,13 +505,7 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (new == NULL) new = &noop_qdisc; - sch_tree_lock(sch); - *old = q->qdisc; - q->qdisc = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); - + *old = qdisc_replace(sch, new, &q->qdisc); return 0; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index ec529121f38a..ce46f1c7f133 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -526,6 +526,8 @@ static int sctp_v6_cmp_addr(const union sctp_addr *addr1, } return 0; } + if (addr1->v6.sin6_port != addr2->v6.sin6_port) + return 0; if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr)) return 0; /* If this is a linklocal address, compare the scope_id. */ diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 3d9ea9a48289..8b4ff315695e 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -60,6 +60,8 @@ #include <net/inet_common.h> #include <net/inet_ecn.h> +#define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024) + /* Global data structures. */ struct sctp_globals sctp_globals __read_mostly; @@ -1352,6 +1354,8 @@ static __init int sctp_init(void) unsigned long limit; int max_share; int order; + int num_entries; + int max_entry_order; sock_skb_cb_check_size(sizeof(struct sctp_ulpevent)); @@ -1404,14 +1408,24 @@ static __init int sctp_init(void) /* Size and allocate the association hash table. * The methodology is similar to that of the tcp hash tables. + * Though not identical. Start by getting a goal size */ if (totalram_pages >= (128 * 1024)) goal = totalram_pages >> (22 - PAGE_SHIFT); else goal = totalram_pages >> (24 - PAGE_SHIFT); - for (order = 0; (1UL << order) < goal; order++) - ; + /* Then compute the page order for said goal */ + order = get_order(goal); + + /* Now compute the required page order for the maximum sized table we + * want to create + */ + max_entry_order = get_order(MAX_SCTP_PORT_HASH_ENTRIES * + sizeof(struct sctp_bind_hashbucket)); + + /* Limit the page order by that maximum hash table size */ + order = min(order, max_entry_order); do { sctp_assoc_hashsize = (1UL << order) * PAGE_SIZE / @@ -1445,20 +1459,35 @@ static __init int sctp_init(void) INIT_HLIST_HEAD(&sctp_ep_hashtable[i].chain); } - /* Allocate and initialize the SCTP port hash table. */ + /* Allocate and initialize the SCTP port hash table. + * Note that order is initalized to start at the max sized + * table we want to support. If we can't get that many pages + * reduce the order and try again + */ do { - sctp_port_hashsize = (1UL << order) * PAGE_SIZE / - sizeof(struct sctp_bind_hashbucket); - if ((sctp_port_hashsize > (64 * 1024)) && order > 0) - continue; sctp_port_hashtable = (struct sctp_bind_hashbucket *) __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order); } while (!sctp_port_hashtable && --order > 0); + if (!sctp_port_hashtable) { pr_err("Failed bind hash alloc\n"); status = -ENOMEM; goto err_bhash_alloc; } + + /* Now compute the number of entries that will fit in the + * port hash space we allocated + */ + num_entries = (1UL << order) * PAGE_SIZE / + sizeof(struct sctp_bind_hashbucket); + + /* And finish by rounding it down to the nearest power of two + * this wastes some memory of course, but its needed because + * the hash function operates based on the assumption that + * that the number of entries is a power of two + */ + sctp_port_hashsize = rounddown_pow_of_two(num_entries); + for (i = 0; i < sctp_port_hashsize; i++) { spin_lock_init(&sctp_port_hashtable[i].lock); INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ef1d90fdc773..be1489fc3234 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5542,6 +5542,7 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, struct sctp_hmac_algo_param *hmacs; __u16 data_len = 0; u32 num_idents; + int i; if (!ep->auth_enable) return -EACCES; @@ -5559,8 +5560,12 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, return -EFAULT; if (put_user(num_idents, &p->shmac_num_idents)) return -EFAULT; - if (copy_to_user(p->shmac_idents, hmacs->hmac_ids, data_len)) - return -EFAULT; + for (i = 0; i < num_idents; i++) { + __u16 hmacid = ntohs(hmacs->hmac_ids[i]); + + if (copy_to_user(&p->shmac_idents[i], &hmacid, sizeof(__u16))) + return -EFAULT; + } return 0; } @@ -6640,6 +6645,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) if (cmsgs->srinfo->sinfo_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | + SCTP_SACK_IMMEDIATELY | SCTP_ABORT | SCTP_EOF)) return -EINVAL; break; @@ -6663,6 +6669,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) if (cmsgs->sinfo->snd_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | + SCTP_SACK_IMMEDIATELY | SCTP_ABORT | SCTP_EOF)) return -EINVAL; break; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 26d50c565f54..3e0fc5127225 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -320,7 +320,7 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, struct ctl_table tbl; bool changed = false; char *none = "none"; - char tmp[8]; + char tmp[8] = {0}; int ret; memset(&tbl, 0, sizeof(struct ctl_table)); diff --git a/net/socket.c b/net/socket.c index d730ef9dfbf0..263b334ec5e4 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2238,31 +2238,31 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, break; } -out_put: - fput_light(sock->file, fput_needed); - if (err == 0) - return datagrams; + goto out_put; - if (datagrams != 0) { + if (datagrams == 0) { + datagrams = err; + goto out_put; + } + + /* + * We may return less entries than requested (vlen) if the + * sock is non block and there aren't enough datagrams... + */ + if (err != -EAGAIN) { /* - * We may return less entries than requested (vlen) if the - * sock is non block and there aren't enough datagrams... + * ... or if recvmsg returns an error after we + * received some datagrams, where we record the + * error to return on the next call or if the + * app asks about it using getsockopt(SO_ERROR). */ - if (err != -EAGAIN) { - /* - * ... or if recvmsg returns an error after we - * received some datagrams, where we record the - * error to return on the next call or if the - * app asks about it using getsockopt(SO_ERROR). - */ - sock->sk->sk_err = -err; - } - - return datagrams; + sock->sk->sk_err = -err; } +out_put: + fput_light(sock->file, fput_needed); - return err; + return datagrams; } SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 1095be9c80ab..4605dc73def6 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -857,8 +857,8 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g goto out; if (svc_getnl(&buf->head[0]) != seq) goto out; - /* trim off the mic at the end before returning */ - xdr_buf_trim(buf, mic.len + 4); + /* trim off the mic and padding at the end before returning */ + xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4); stat = 0; out: kfree(mic.data); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 5e4f815c2b34..63fb5ee212cf 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1182,14 +1182,14 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) } crq->q.reader = 0; - crq->item = cache_get(h); crq->buf = buf; crq->len = 0; crq->readers = 0; spin_lock(&queue_lock); - if (test_bit(CACHE_PENDING, &h->flags)) + if (test_bit(CACHE_PENDING, &h->flags)) { + crq->item = cache_get(h); list_add_tail(&crq->q.list, &detail->queue); - else + } else /* Lost a race, no longer PENDING, so don't enqueue */ ret = -EAGAIN; spin_unlock(&queue_lock); @@ -1225,7 +1225,7 @@ int qword_get(char **bpp, char *dest, int bufsize) if (bp[0] == '\\' && bp[1] == 'x') { /* HEX STRING */ bp += 2; - while (len < bufsize) { + while (len < bufsize - 1) { int h, l; h = hex_to_bin(bp[0]); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 23608eb0ded2..7a93922457ff 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -442,7 +442,7 @@ out_no_rpciod: return ERR_PTR(err); } -struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, +static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, struct rpc_xprt *xprt) { struct rpc_clnt *clnt = NULL; @@ -474,7 +474,6 @@ struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, return clnt; } -EXPORT_SYMBOL_GPL(rpc_create_xprt); /** * rpc_create - create an RPC client and transport with one call @@ -500,6 +499,15 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) }; char servername[48]; + if (args->bc_xprt) { + WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP); + xprt = args->bc_xprt->xpt_bc_xprt; + if (xprt) { + xprt_get(xprt); + return rpc_create_xprt(args, xprt); + } + } + if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS) xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS; if (args->flags & RPC_CLNT_CREATE_NO_IDLE_TIMEOUT) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 2ffaf6a79499..027c9ef8a263 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -398,7 +398,6 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, if (unlikely(!sock)) return -ENOTSOCK; - clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags); if (base != 0) { addr = NULL; addrlen = 0; @@ -442,7 +441,6 @@ static void xs_nospace_callback(struct rpc_task *task) struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt); transport->inet->sk_write_pending--; - clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); } /** @@ -467,20 +465,11 @@ static int xs_nospace(struct rpc_task *task) /* Don't race with disconnect */ if (xprt_connected(xprt)) { - if (test_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags)) { - /* - * Notify TCP that we're limited by the application - * window size - */ - set_bit(SOCK_NOSPACE, &transport->sock->flags); - sk->sk_write_pending++; - /* ...and wait for more buffer space */ - xprt_wait_for_buffer_space(task, xs_nospace_callback); - } - } else { - clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); + /* wait for more buffer space */ + sk->sk_write_pending++; + xprt_wait_for_buffer_space(task, xs_nospace_callback); + } else ret = -ENOTCONN; - } spin_unlock_bh(&xprt->transport_lock); @@ -616,9 +605,6 @@ process_status: case -EAGAIN: status = xs_nospace(task); break; - default: - dprintk("RPC: sendmsg returned unrecognized error %d\n", - -status); case -ENETUNREACH: case -ENOBUFS: case -EPIPE: @@ -626,7 +612,10 @@ process_status: case -EPERM: /* When the server has died, an ICMP port unreachable message * prompts ECONNREFUSED. */ - clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); + break; + default: + dprintk("RPC: sendmsg returned unrecognized error %d\n", + -status); } return status; @@ -706,16 +695,16 @@ static int xs_tcp_send_request(struct rpc_task *task) case -EAGAIN: status = xs_nospace(task); break; - default: - dprintk("RPC: sendmsg returned unrecognized error %d\n", - -status); case -ECONNRESET: case -ECONNREFUSED: case -ENOTCONN: case -EADDRINUSE: case -ENOBUFS: case -EPIPE: - clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); + break; + default: + dprintk("RPC: sendmsg returned unrecognized error %d\n", + -status); } return status; @@ -1609,19 +1598,23 @@ static void xs_tcp_state_change(struct sock *sk) static void xs_write_space(struct sock *sk) { - struct socket *sock; + struct socket_wq *wq; struct rpc_xprt *xprt; - if (unlikely(!(sock = sk->sk_socket))) + if (!sk->sk_socket) return; - clear_bit(SOCK_NOSPACE, &sock->flags); + clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); if (unlikely(!(xprt = xprt_from_sock(sk)))) return; - if (test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags) == 0) - return; + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0) + goto out; xprt_write_space(xprt); +out: + rcu_read_unlock(); } /** diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index f34e535e93bd..1b58866175e6 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -20,6 +20,7 @@ #include <linux/list.h> #include <linux/workqueue.h> #include <linux/if_vlan.h> +#include <linux/rtnetlink.h> #include <net/ip_fib.h> #include <net/switchdev.h> @@ -565,7 +566,6 @@ int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj, } EXPORT_SYMBOL_GPL(switchdev_port_obj_dump); -static DEFINE_MUTEX(switchdev_mutex); static RAW_NOTIFIER_HEAD(switchdev_notif_chain); /** @@ -580,9 +580,9 @@ int register_switchdev_notifier(struct notifier_block *nb) { int err; - mutex_lock(&switchdev_mutex); + rtnl_lock(); err = raw_notifier_chain_register(&switchdev_notif_chain, nb); - mutex_unlock(&switchdev_mutex); + rtnl_unlock(); return err; } EXPORT_SYMBOL_GPL(register_switchdev_notifier); @@ -598,9 +598,9 @@ int unregister_switchdev_notifier(struct notifier_block *nb) { int err; - mutex_lock(&switchdev_mutex); + rtnl_lock(); err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb); - mutex_unlock(&switchdev_mutex); + rtnl_unlock(); return err; } EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); @@ -614,16 +614,17 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); * Call all network notifier blocks. This should be called by driver * when it needs to propagate hardware event. * Return values are same as for atomic_notifier_call_chain(). + * rtnl_lock must be held. */ int call_switchdev_notifiers(unsigned long val, struct net_device *dev, struct switchdev_notifier_info *info) { int err; + ASSERT_RTNL(); + info->dev = dev; - mutex_lock(&switchdev_mutex); err = raw_notifier_call_chain(&switchdev_notif_chain, val, info); - mutex_unlock(&switchdev_mutex); return err; } EXPORT_SYMBOL_GPL(call_switchdev_notifiers); @@ -1168,6 +1169,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, .dst = dst, .dst_len = dst_len, + .fi = fi, .tos = tos, .type = type, .nlflags = nlflags, @@ -1176,8 +1178,6 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, struct net_device *dev; int err = 0; - memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi)); - /* Don't offload route if using custom ip rules or if * IPv4 FIB offloading has been disabled completely. */ @@ -1221,6 +1221,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, .dst = dst, .dst_len = dst_len, + .fi = fi, .tos = tos, .type = type, .nlflags = 0, @@ -1229,8 +1230,6 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, struct net_device *dev; int err = 0; - memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi)); - if (!(fi->fib_flags & RTNH_F_OFFLOAD)) return 0; diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 9dc239dfe192..92e367a0a5ce 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -399,8 +399,10 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_LINK_GET); - if (!hdr) + if (!hdr) { + tipc_bcast_unlock(net); return -EMSGSIZE; + } attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); if (!attrs) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 1eadc95e1132..2ed732bfe94b 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -802,7 +802,7 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, goto out; tipc_tlv_sprintf(msg->rep, "%-10u %s", - nla_get_u32(publ[TIPC_NLA_PUBL_REF]), + nla_get_u32(publ[TIPC_NLA_PUBL_KEY]), scope_str[nla_get_u32(publ[TIPC_NLA_PUBL_SCOPE])]); out: tipc_tlv_sprintf(msg->rep, "\n"); diff --git a/net/tipc/node.c b/net/tipc/node.c index 20cddec0a43c..3926b561f873 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -168,12 +168,6 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) skb_queue_head_init(&n_ptr->bc_entry.inputq1); __skb_queue_head_init(&n_ptr->bc_entry.arrvq); skb_queue_head_init(&n_ptr->bc_entry.inputq2); - hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); - list_for_each_entry_rcu(temp_node, &tn->node_list, list) { - if (n_ptr->addr < temp_node->addr) - break; - } - list_add_tail_rcu(&n_ptr->list, &temp_node->list); n_ptr->state = SELF_DOWN_PEER_LEAVING; n_ptr->signature = INVALID_NODE_SIG; n_ptr->active_links[0] = INVALID_BEARER_ID; @@ -193,6 +187,12 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) tipc_node_get(n_ptr); setup_timer(&n_ptr->timer, tipc_node_timeout, (unsigned long)n_ptr); n_ptr->keepalive_intv = U32_MAX; + hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + if (n_ptr->addr < temp_node->addr) + break; + } + list_add_tail_rcu(&n_ptr->list, &temp_node->list); exit: spin_unlock_bh(&tn->node_list_lock); return n_ptr; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b53246fb0412..9b713e0ce00d 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -673,7 +673,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct tipc_sock *tsk = tipc_sk(sk); struct net *net = sock_net(sk); struct tipc_msg *mhdr = &tsk->phdr; - struct sk_buff_head *pktchain = &sk->sk_write_queue; + struct sk_buff_head pktchain; struct iov_iter save = msg->msg_iter; uint mtu; int rc; @@ -687,14 +687,16 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, msg_set_nameupper(mhdr, seq->upper); msg_set_hdr_sz(mhdr, MCAST_H_SIZE); + skb_queue_head_init(&pktchain); + new_mtu: mtu = tipc_bcast_get_mtu(net); - rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain); + rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &pktchain); if (unlikely(rc < 0)) return rc; do { - rc = tipc_bcast_xmit(net, pktchain); + rc = tipc_bcast_xmit(net, &pktchain); if (likely(!rc)) return dsz; @@ -704,7 +706,7 @@ new_mtu: if (!rc) continue; } - __skb_queue_purge(pktchain); + __skb_queue_purge(&pktchain); if (rc == -EMSGSIZE) { msg->msg_iter = save; goto new_mtu; @@ -863,7 +865,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) struct net *net = sock_net(sk); struct tipc_msg *mhdr = &tsk->phdr; u32 dnode, dport; - struct sk_buff_head *pktchain = &sk->sk_write_queue; + struct sk_buff_head pktchain; struct sk_buff *skb; struct tipc_name_seq *seq; struct iov_iter save; @@ -924,17 +926,18 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) msg_set_hdr_sz(mhdr, BASIC_H_SIZE); } + skb_queue_head_init(&pktchain); save = m->msg_iter; new_mtu: mtu = tipc_node_get_mtu(net, dnode, tsk->portid); - rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain); + rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &pktchain); if (rc < 0) return rc; do { - skb = skb_peek(pktchain); + skb = skb_peek(&pktchain); TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; - rc = tipc_node_xmit(net, pktchain, dnode, tsk->portid); + rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid); if (likely(!rc)) { if (sock->state != SS_READY) sock->state = SS_CONNECTING; @@ -946,7 +949,7 @@ new_mtu: if (!rc) continue; } - __skb_queue_purge(pktchain); + __skb_queue_purge(&pktchain); if (rc == -EMSGSIZE) { m->msg_iter = save; goto new_mtu; @@ -1016,7 +1019,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *mhdr = &tsk->phdr; - struct sk_buff_head *pktchain = &sk->sk_write_queue; + struct sk_buff_head pktchain; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); u32 portid = tsk->portid; int rc = -EINVAL; @@ -1044,17 +1047,19 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); dnode = tsk_peer_node(tsk); + skb_queue_head_init(&pktchain); next: save = m->msg_iter; mtu = tsk->max_pkt; send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); - rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain); + rc = tipc_msg_build(mhdr, m, sent, send, mtu, &pktchain); if (unlikely(rc < 0)) return rc; + do { if (likely(!tsk_conn_cong(tsk))) { - rc = tipc_node_xmit(net, pktchain, dnode, portid); + rc = tipc_node_xmit(net, &pktchain, dnode, portid); if (likely(!rc)) { tsk->sent_unacked++; sent += send; @@ -1063,7 +1068,7 @@ next: goto next; } if (rc == -EMSGSIZE) { - __skb_queue_purge(pktchain); + __skb_queue_purge(&pktchain); tsk->max_pkt = tipc_node_get_mtu(net, dnode, portid); m->msg_iter = save; @@ -1077,7 +1082,7 @@ next: rc = tipc_wait_for_sndpkt(sock, &timeo); } while (!rc); - __skb_queue_purge(pktchain); + __skb_queue_purge(&pktchain); return sent ? sent : rc; } @@ -2809,6 +2814,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) if (err) return err; + if (!attrs[TIPC_NLA_SOCK]) + return -EINVAL; + err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], tipc_nl_sock_policy); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 350cca33ee0a..69ee2eeef968 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -289,15 +289,14 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid, struct sockaddr_tipc *addr, void *usr_data, void *buf, size_t len) { - struct tipc_subscriber *subscriber = usr_data; + struct tipc_subscriber *subscrb = usr_data; struct tipc_subscription *sub = NULL; struct tipc_net *tn = net_generic(net, tipc_net_id); - tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscriber, &sub); - if (sub) - tipc_nametbl_subscribe(sub); - else - tipc_conn_terminate(tn->topsrv, subscriber->conid); + if (tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscrb, &sub)) + return tipc_conn_terminate(tn->topsrv, subscrb->conid); + + tipc_nametbl_subscribe(sub); } /* Handle one request to establish a new subscriber */ diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ef05cd9403d4..6579fd6e7459 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -315,7 +315,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i) &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->path.dentry; - if (dentry && d_backing_inode(dentry) == i) { + if (dentry && d_real_inode(dentry) == i) { sock_hold(s); goto found; } @@ -911,7 +911,7 @@ static struct sock *unix_find_other(struct net *net, err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path); if (err) goto fail; - inode = d_backing_inode(path.dentry); + inode = d_real_inode(path.dentry); err = inode_permission(inode, MAY_WRITE); if (err) goto put_fail; @@ -1048,7 +1048,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out_up; } addr->hash = UNIX_HASH_SIZE; - hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); + hash = d_real_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); spin_lock(&unix_table_lock); u->path = u_path; list = &unix_socket_table[hash]; @@ -1496,7 +1496,7 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) UNIXCB(skb).fp = NULL; for (i = scm->fp->count-1; i >= 0; i--) - unix_notinflight(scm->fp->fp[i]); + unix_notinflight(scm->fp->user, scm->fp->fp[i]); } static void unix_destruct_scm(struct sk_buff *skb) @@ -1513,6 +1513,21 @@ static void unix_destruct_scm(struct sk_buff *skb) sock_wfree(skb); } +/* + * The "user->unix_inflight" variable is protected by the garbage + * collection lock, and we just read it locklessly here. If you go + * over the limit, there might be a tiny race in actually noticing + * it across threads. Tough. + */ +static inline bool too_many_unix_fds(struct task_struct *p) +{ + struct user_struct *user = current_user(); + + if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); + return false; +} + #define MAX_RECURSION_LEVEL 4 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) @@ -1521,6 +1536,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) unsigned char max_level = 0; int unix_sock_count = 0; + if (too_many_unix_fds(current)) + return -ETOOMANYREFS; + for (i = scm->fp->count - 1; i >= 0; i--) { struct sock *sk = unix_get_socket(scm->fp->fp[i]); @@ -1542,10 +1560,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) if (!UNIXCB(skb).fp) return -ENOMEM; - if (unix_sock_count) { - for (i = scm->fp->count - 1; i >= 0; i--) - unix_inflight(scm->fp->fp[i]); - } + for (i = scm->fp->count - 1; i >= 0; i--) + unix_inflight(scm->fp->user, scm->fp->fp[i]); return max_level; } @@ -1765,7 +1781,12 @@ restart_locked: goto out_unlock; } - if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { + /* other == sk && unix_peer(other) != sk if + * - unix_peer(sk) == NULL, destination address bound to sk + * - unix_peer(sk) == sk by time of get but disconnected before lock + */ + if (other != sk && + unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { if (timeo) { timeo = unix_wait_for_peer(other, timeo); @@ -2254,13 +2275,15 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state) size_t size = state->size; unsigned int last_len; - err = -EINVAL; - if (sk->sk_state != TCP_ESTABLISHED) + if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { + err = -EINVAL; goto out; + } - err = -EOPNOTSUPP; - if (flags & MSG_OOB) + if (unlikely(flags & MSG_OOB)) { + err = -EOPNOTSUPP; goto out; + } target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, noblock); @@ -2306,9 +2329,11 @@ again: goto unlock; unix_state_unlock(sk); - err = -EAGAIN; - if (!timeo) + if (!timeo) { + err = -EAGAIN; break; + } + mutex_unlock(&u->readlock); timeo = unix_stream_data_wait(sk, timeo, last, @@ -2316,6 +2341,7 @@ again: if (signal_pending(current)) { err = sock_intr_errno(timeo); + scm_destroy(&scm); goto out; } diff --git a/net/unix/diag.c b/net/unix/diag.c index c512f64d5287..4d9679701a6d 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -220,7 +220,7 @@ done: return skb->len; } -static struct sock *unix_lookup_by_ino(int ino) +static struct sock *unix_lookup_by_ino(unsigned int ino) { int i; struct sock *sk; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index a73a226f2d33..6a0d48525fcf 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -116,15 +116,15 @@ struct sock *unix_get_socket(struct file *filp) * descriptor if it is for an AF_UNIX socket. */ -void unix_inflight(struct file *fp) +void unix_inflight(struct user_struct *user, struct file *fp) { struct sock *s = unix_get_socket(fp); + spin_lock(&unix_gc_lock); + if (s) { struct unix_sock *u = unix_sk(s); - spin_lock(&unix_gc_lock); - if (atomic_long_inc_return(&u->inflight) == 1) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); @@ -132,25 +132,28 @@ void unix_inflight(struct file *fp) BUG_ON(list_empty(&u->link)); } unix_tot_inflight++; - spin_unlock(&unix_gc_lock); } + user->unix_inflight++; + spin_unlock(&unix_gc_lock); } -void unix_notinflight(struct file *fp) +void unix_notinflight(struct user_struct *user, struct file *fp) { struct sock *s = unix_get_socket(fp); + spin_lock(&unix_gc_lock); + if (s) { struct unix_sock *u = unix_sk(s); - spin_lock(&unix_gc_lock); BUG_ON(list_empty(&u->link)); if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); unix_tot_inflight--; - spin_unlock(&unix_gc_lock); } + user->unix_inflight--; + spin_unlock(&unix_gc_lock); } static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 7fd1220fbfa0..9b5bd6d142dc 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1794,27 +1794,8 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, else if (sk->sk_shutdown & RCV_SHUTDOWN) err = 0; - if (copied > 0) { - /* We only do these additional bookkeeping/notification steps - * if we actually copied something out of the queue pair - * instead of just peeking ahead. - */ - - if (!(flags & MSG_PEEK)) { - /* If the other side has shutdown for sending and there - * is nothing more to read, then modify the socket - * state. - */ - if (vsk->peer_shutdown & SEND_SHUTDOWN) { - if (vsock_stream_has_data(vsk) <= 0) { - sk->sk_state = SS_UNCONNECTED; - sock_set_flag(sk, SOCK_DONE); - sk->sk_state_change(sk); - } - } - } + if (copied > 0) err = copied; - } out_wait: finish_wait(sk_sleep(sk), &wait); diff --git a/net/wireless/core.c b/net/wireless/core.c index b0915515640e..8f0bac7e03c4 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1147,6 +1147,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, return NOTIFY_DONE; } + wireless_nlevent_flush(); + return NOTIFY_OK; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 75b0d23ee882..5d89f13a98db 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13161,7 +13161,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb, struct wireless_dev *wdev; struct cfg80211_beacon_registration *reg, *tmp; - if (state != NETLINK_URELEASE) + if (state != NETLINK_URELEASE || notify->protocol != NETLINK_GENERIC) return NOTIFY_DONE; rcu_read_lock(); diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index c8717c1d082e..c753211cb83f 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -342,6 +342,40 @@ static const int compat_event_type_size[] = { /* IW event code */ +void wireless_nlevent_flush(void) +{ + struct sk_buff *skb; + struct net *net; + + ASSERT_RTNL(); + + for_each_net(net) { + while ((skb = skb_dequeue(&net->wext_nlevents))) + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, + GFP_KERNEL); + } +} +EXPORT_SYMBOL_GPL(wireless_nlevent_flush); + +static int wext_netdev_notifier_call(struct notifier_block *nb, + unsigned long state, void *ptr) +{ + /* + * When a netdev changes state in any way, flush all pending messages + * to avoid them going out in a strange order, e.g. RTM_NEWLINK after + * RTM_DELLINK, or with IFF_UP after without IFF_UP during dev_close() + * or similar - all of which could otherwise happen due to delays from + * schedule_work(). + */ + wireless_nlevent_flush(); + + return NOTIFY_OK; +} + +static struct notifier_block wext_netdev_notifier = { + .notifier_call = wext_netdev_notifier_call, +}; + static int __net_init wext_pernet_init(struct net *net) { skb_queue_head_init(&net->wext_nlevents); @@ -360,7 +394,12 @@ static struct pernet_operations wext_pernet_ops = { static int __init wireless_nlevent_init(void) { - return register_pernet_subsys(&wext_pernet_ops); + int err = register_pernet_subsys(&wext_pernet_ops); + + if (err) + return err; + + return register_netdevice_notifier(&wext_netdev_notifier); } subsys_initcall(wireless_nlevent_init); @@ -368,17 +407,8 @@ subsys_initcall(wireless_nlevent_init); /* Process events generated by the wireless layer or the driver. */ static void wireless_nlevent_process(struct work_struct *work) { - struct sk_buff *skb; - struct net *net; - rtnl_lock(); - - for_each_net(net) { - while ((skb = skb_dequeue(&net->wext_nlevents))) - rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, - GFP_KERNEL); - } - + wireless_nlevent_flush(); rtnl_unlock(); } @@ -925,8 +955,29 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, return private(dev, iwr, cmd, info, handler); } /* Old driver API : call driver ioctl handler */ - if (dev->netdev_ops->ndo_do_ioctl) - return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); + if (dev->netdev_ops->ndo_do_ioctl) { +#ifdef CONFIG_COMPAT + if (info->flags & IW_REQUEST_FLAG_COMPAT) { + int ret = 0; + struct iwreq iwr_lcl; + struct compat_iw_point *iwp_compat = (void *) &iwr->u.data; + + memcpy(&iwr_lcl, iwr, sizeof(struct iwreq)); + iwr_lcl.u.data.pointer = compat_ptr(iwp_compat->pointer); + iwr_lcl.u.data.length = iwp_compat->length; + iwr_lcl.u.data.flags = iwp_compat->flags; + + ret = dev->netdev_ops->ndo_do_ioctl(dev, (void *) &iwr_lcl, cmd); + + iwp_compat->pointer = ptr_to_compat(iwr_lcl.u.data.pointer); + iwp_compat->length = iwr_lcl.u.data.length; + iwp_compat->flags = iwr_lcl.u.data.flags; + + return ret; + } else +#endif + return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); + } return -EOPNOTSUPP; } diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index 7ecd04c21360..997ff7b2509b 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -277,6 +277,7 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk, memset(&theirs, 0, sizeof(theirs)); memcpy(new, ours, sizeof(*new)); + memset(dte, 0, sizeof(*dte)); len = x25_parse_facilities(skb, &theirs, dte, &x25->vc_facil_mask); if (len < 0) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index ad7f5b3f9b61..1c4ad477ce93 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -292,12 +292,15 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; skb_dst_force(skb); + dev_hold(skb->dev); nexthdr = x->type->input(x, skb); if (nexthdr == -EINPROGRESS) return 0; resume: + dev_put(skb->dev); + spin_lock(&x->lock); if (nexthdr <= 0) { if (nexthdr == -EBADMSG) { diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index cc3676eb6239..ff4a91fcab9f 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -167,6 +167,8 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb { struct sk_buff *segs; + BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET); + BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_SGO_CB_OFFSET); segs = skb_gso_segment(skb, 0); kfree_skb(skb); if (IS_ERR(segs)) |