summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/nhc.c2
-rw-r--r--net/802/garp.c14
-rw-r--r--net/802/mrp.c14
-rw-r--r--net/8021q/vlan.h1
-rw-r--r--net/8021q/vlan_dev.c3
-rw-r--r--net/8021q/vlan_netlink.c19
-rw-r--r--net/9p/trans_fd.c26
-rw-r--r--net/9p/trans_virtio.c4
-rw-r--r--net/Kconfig21
-rw-r--r--net/Makefile2
-rw-r--r--net/appletalk/aarp.c15
-rw-r--r--net/appletalk/ddp.c54
-rw-r--r--net/atm/lec.c6
-rw-r--r--net/ax25/af_ax25.c20
-rw-r--r--net/batman-adv/bat_iv_ogm.c119
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c305
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h4
-rw-r--r--net/batman-adv/debugfs.c41
-rw-r--r--net/batman-adv/debugfs.h11
-rw-r--r--net/batman-adv/distributed-arp-table.c19
-rw-r--r--net/batman-adv/fragmentation.c55
-rw-r--r--net/batman-adv/gateway_client.c24
-rw-r--r--net/batman-adv/hard-interface.c92
-rw-r--r--net/batman-adv/hard-interface.h6
-rw-r--r--net/batman-adv/main.c54
-rw-r--r--net/batman-adv/multicast.c31
-rw-r--r--net/batman-adv/multicast.h15
-rw-r--r--net/batman-adv/network-coding.c46
-rw-r--r--net/batman-adv/originator.c26
-rw-r--r--net/batman-adv/originator.h4
-rw-r--r--net/batman-adv/routing.c115
-rw-r--r--net/batman-adv/send.c4
-rw-r--r--net/batman-adv/soft-interface.c40
-rw-r--r--net/batman-adv/translation-table.c262
-rw-r--r--net/batman-adv/types.h23
-rw-r--r--net/bluetooth/6lowpan.c5
-rw-r--r--net/bluetooth/a2mp.c23
-rw-r--r--net/bluetooth/amp.c3
-rw-r--r--net/bluetooth/cmtp/cmtp.h2
-rw-r--r--net/bluetooth/cmtp/core.c9
-rw-r--r--net/bluetooth/hci_conn.c17
-rw-r--r--net/bluetooth/hci_core.c66
-rw-r--r--net/bluetooth/hci_event.c148
-rw-r--r--net/bluetooth/hci_sock.c52
-rw-r--r--net/bluetooth/hci_sysfs.c3
-rw-r--r--net/bluetooth/hidp/core.c5
-rw-r--r--net/bluetooth/l2cap_core.c37
-rw-r--r--net/bluetooth/l2cap_sock.c35
-rw-r--r--net/bluetooth/mgmt.c10
-rw-r--r--net/bluetooth/rfcomm/tty.c4
-rw-r--r--net/bluetooth/sco.c35
-rw-r--r--net/bluetooth/smp.c9
-rw-r--r--net/bridge/br_device.c7
-rw-r--r--net/bridge/br_fdb.c52
-rw-r--r--net/bridge/br_input.c7
-rw-r--r--net/bridge/br_netfilter_hooks.c17
-rw-r--r--net/bridge/br_vlan.c4
-rw-r--r--net/bridge/netfilter/ebt_limit.c1
-rw-r--r--net/bridge/netfilter/ebtables.c37
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c6
-rw-r--r--net/caif/caif_dev.c13
-rw-r--r--net/caif/caif_socket.c3
-rw-r--r--net/caif/caif_usb.c14
-rw-r--r--net/caif/cfcnfg.c16
-rw-r--r--net/caif/cfserl.c5
-rw-r--r--net/caif/chnl_net.c19
-rw-r--r--net/can/bcm.c91
-rw-r--r--net/can/gw.c3
-rw-r--r--net/can/raw.c82
-rw-r--r--net/ceph/messenger.c5
-rw-r--r--net/compat.c6
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/dev.c132
-rw-r--r--net/core/drop_monitor.c11
-rw-r--r--net/core/ethtool.c55
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/filter.c1698
-rw-r--r--net/core/flow_dissector.c17
-rw-r--r--net/core/neighbour.c26
-rw-r--r--net/core/net-procfs.c38
-rw-r--r--net/core/net-sysfs.c2
-rw-r--r--net/core/net_namespace.c13
-rw-r--r--net/core/netclassid_cgroup.c11
-rw-r--r--net/core/netpoll.c24
-rw-r--r--net/core/netprio_cgroup.c21
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/rtnetlink.c7
-rw-r--r--net/core/scm.c4
-rw-r--r--net/core/skbuff.c83
-rw-r--r--net/core/sock.c104
-rw-r--r--net/core/sock_diag.c2
-rw-r--r--net/core/sock_reuseport.c269
-rw-r--r--net/core/stream.c3
-rw-r--r--net/core/sysctl_net_core.c80
-rw-r--r--net/core/utils.c20
-rw-r--r--net/dcb/dcbnl.c2
-rw-r--r--net/dccp/dccp.h6
-rw-r--r--net/dccp/feat.c7
-rw-r--r--net/dccp/ipv4.c6
-rw-r--r--net/dccp/ipv6.c23
-rw-r--r--net/dccp/minisocks.c2
-rw-r--r--net/decnet/af_decnet.c27
-rw-r--r--net/decnet/dn_dev.c2
-rw-r--r--net/dsa/slave.c7
-rw-r--r--net/dsa/tag_brcm.c2
-rw-r--r--net/ethernet/eth.c7
-rw-r--r--net/hsr/hsr_device.c2
-rw-r--r--net/hsr/hsr_framereg.c13
-rw-r--r--net/hsr/hsr_netlink.c74
-rw-r--r--net/hsr/hsr_slave.c10
-rw-r--r--net/ieee802154/6lowpan/reassembly.c2
-rw-r--r--net/ieee802154/nl-mac.c11
-rw-r--r--net/ieee802154/nl-phy.c4
-rw-r--r--net/ieee802154/nl802154.c52
-rw-r--r--net/ieee802154/nl_policy.c6
-rw-r--r--net/ieee802154/socket.c24
-rw-r--r--net/ipv4/Kconfig1
-rw-r--r--net/ipv4/af_inet.c35
-rw-r--r--net/ipv4/arp.c8
-rw-r--r--net/ipv4/cipso_ipv4.c14
-rw-r--r--net/ipv4/devinet.c37
-rw-r--r--net/ipv4/fib_frontend.c5
-rw-r--r--net/ipv4/fib_semantics.c10
-rw-r--r--net/ipv4/fib_trie.c9
-rw-r--r--net/ipv4/fou.c6
-rw-r--r--net/ipv4/gre_offload.c8
-rw-r--r--net/ipv4/icmp.c45
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/inet_connection_sock.c72
-rw-r--r--net/ipv4/inet_diag.c6
-rw-r--r--net/ipv4/inet_hashtables.c67
-rw-r--r--net/ipv4/ip_gre.c57
-rw-r--r--net/ipv4/ip_input.c25
-rw-r--r--net/ipv4/ip_output.c63
-rw-r--r--net/ipv4/ip_sockglue.c7
-rw-r--r--net/ipv4/ip_tunnel.c34
-rw-r--r--net/ipv4/ip_tunnel_core.c6
-rw-r--r--net/ipv4/ip_vti.c114
-rw-r--r--net/ipv4/ipconfig.c13
-rw-r--r--net/ipv4/netfilter/arp_tables.c29
-rw-r--r--net/ipv4/netfilter/ip_tables.c23
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c1
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c7
-rw-r--r--net/ipv4/netfilter/nft_dup_ipv4.c6
-rw-r--r--net/ipv4/ping.c21
-rw-r--r--net/ipv4/raw.c15
-rw-r--r--net/ipv4/route.c112
-rw-r--r--net/ipv4/syncookies.c11
-rw-r--r--net/ipv4/tcp.c16
-rw-r--r--net/ipv4/tcp_cong.c2
-rw-r--r--net/ipv4/tcp_cubic.c7
-rw-r--r--net/ipv4/tcp_input.c42
-rw-r--r--net/ipv4/tcp_ipv4.c34
-rw-r--r--net/ipv4/tcp_output.c36
-rw-r--r--net/ipv4/tcp_timer.c2
-rw-r--r--net/ipv4/udp.c405
-rw-r--r--net/ipv4/udp_diag.c30
-rw-r--r--net/ipv4/udp_offload.c2
-rw-r--r--net/ipv4/udplite.c1
-rw-r--r--net/ipv4/xfrm4_output.c2
-rw-r--r--net/ipv6/addrconf.c81
-rw-r--r--net/ipv6/addrconf_core.c11
-rw-r--r--net/ipv6/af_inet6.c37
-rw-r--r--net/ipv6/ah6.c3
-rw-r--r--net/ipv6/anycast.c17
-rw-r--r--net/ipv6/datagram.c8
-rw-r--r--net/ipv6/esp6.c4
-rw-r--r--net/ipv6/icmp.c13
-rw-r--r--net/ipv6/inet6_connection_sock.c14
-rw-r--r--net/ipv6/inet6_hashtables.c80
-rw-r--r--net/ipv6/ip6_checksum.c7
-rw-r--r--net/ipv6/ip6_fib.c7
-rw-r--r--net/ipv6/ip6_gre.c23
-rw-r--r--net/ipv6/ip6_input.c20
-rw-r--r--net/ipv6/ip6_output.c24
-rw-r--r--net/ipv6/ip6_tunnel.c43
-rw-r--r--net/ipv6/ip6_udp_tunnel.c6
-rw-r--r--net/ipv6/ip6_vti.c42
-rw-r--r--net/ipv6/ip6mr.c13
-rw-r--r--net/ipv6/ipv6_sockglue.c9
-rw-r--r--net/ipv6/mcast.c4
-rw-r--r--net/ipv6/ndisc.c9
-rw-r--r--net/ipv6/netfilter/ip6_tables.c24
-rw-r--r--net/ipv6/netfilter/ip6t_NPT.c2
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nft_dup_ipv6.c6
-rw-r--r--net/ipv6/output_core.c28
-rw-r--r--net/ipv6/ping.c2
-rw-r--r--net/ipv6/raw.c16
-rw-r--r--net/ipv6/reassembly.c2
-rw-r--r--net/ipv6/route.c64
-rw-r--r--net/ipv6/sit.c12
-rw-r--r--net/ipv6/syncookies.c14
-rw-r--r--net/ipv6/tcp_ipv6.c52
-rw-r--r--net/ipv6/udp.c242
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_input.c15
-rw-r--r--net/ipv6/xfrm6_output.c4
-rw-r--r--net/ipv6/xfrm6_tunnel.c2
-rw-r--r--net/irda/af_irda.c5
-rw-r--r--net/iucv/af_iucv.c34
-rw-r--r--net/key/af_key.c13
-rw-r--r--net/l2tp/l2tp_core.c289
-rw-r--r--net/l2tp/l2tp_core.h49
-rw-r--r--net/l2tp/l2tp_eth.c226
-rw-r--r--net/l2tp/l2tp_ip.c114
-rw-r--r--net/l2tp/l2tp_ip6.c137
-rw-r--r--net/l2tp/l2tp_netlink.c124
-rw-r--r--net/l2tp/l2tp_ppp.c403
-rw-r--r--net/lapb/lapb_out.c3
-rw-r--r--net/llc/af_llc.c54
-rw-r--r--net/llc/llc_conn.c35
-rw-r--r--net/llc/llc_if.c12
-rw-r--r--net/llc/llc_s_ac.c2
-rw-r--r--net/llc/llc_station.c4
-rw-r--r--net/mac80211/agg-tx.c2
-rw-r--r--net/mac80211/cfg.c74
-rw-r--r--net/mac80211/debugfs.c1
-rw-r--r--net/mac80211/debugfs_netdev.c12
-rw-r--r--net/mac80211/driver-ops.c5
-rw-r--r--net/mac80211/ibss.c12
-rw-r--r--net/mac80211/ieee80211_i.h75
-rw-r--r--net/mac80211/iface.c17
-rw-r--r--net/mac80211/key.c7
-rw-r--r--net/mac80211/key.h2
-rw-r--r--net/mac80211/main.c26
-rw-r--r--net/mac80211/mesh.c21
-rw-r--r--net/mac80211/mesh.h3
-rw-r--r--net/mac80211/mesh_hwmp.c3
-rw-r--r--net/mac80211/mesh_plink.c42
-rw-r--r--net/mac80211/mlme.c21
-rw-r--r--net/mac80211/offchannel.c5
-rw-r--r--net/mac80211/rate.c5
-rw-r--r--net/mac80211/rc80211_minstrel.c33
-rw-r--r--net/mac80211/rc80211_minstrel.h1
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c6
-rw-r--r--net/mac80211/rx.c209
-rw-r--r--net/mac80211/scan.c12
-rw-r--r--net/mac80211/spectmgmt.c4
-rw-r--r--net/mac80211/sta_info.c65
-rw-r--r--net/mac80211/sta_info.h31
-rw-r--r--net/mac80211/status.c5
-rw-r--r--net/mac80211/tdls.c61
-rw-r--r--net/mac80211/trace.h6
-rw-r--r--net/mac80211/tx.c49
-rw-r--r--net/mac80211/util.c40
-rw-r--r--net/mac80211/vht.c34
-rw-r--r--net/mac80211/wpa.c18
-rw-r--r--net/mac802154/llsec.c2
-rw-r--r--net/mpls/af_mpls.c7
-rw-r--r--net/netfilter/Kconfig18
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h4
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c6
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c6
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c6
-rw-r--r--net/netfilter/ipset/ip_set_core.c5
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h24
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c14
-rw-r--r--net/netfilter/nf_conntrack_core.c6
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c1
-rw-r--r--net/netfilter/nf_conntrack_netlink.c5
-rw-r--r--net/netfilter/nf_conntrack_pptp.c62
-rw-r--r--net/netfilter/nf_nat_ftp.c2
-rw-r--r--net/netfilter/nf_nat_irc.c2
-rw-r--r--net/netfilter/nf_synproxy_core.c5
-rw-r--r--net/netfilter/nf_tables_api.c4
-rw-r--r--net/netfilter/nf_tables_core.c2
-rw-r--r--net/netfilter/nfnetlink.c6
-rw-r--r--net/netfilter/nfnetlink_cthelper.c10
-rw-r--r--net/netfilter/nfnetlink_queue.c2
-rw-r--r--net/netfilter/nft_dynset.c7
-rw-r--r--net/netfilter/nft_exthdr.c3
-rw-r--r--net/netfilter/nft_meta.c2
-rw-r--r--net/netfilter/nft_nat.c8
-rw-r--r--net/netfilter/nft_payload.c4
-rw-r--r--net/netfilter/x_tables.c73
-rw-r--r--net/netfilter/xt_CT.c3
-rw-r--r--net/netfilter/xt_IDLETIMER.c1
-rw-r--r--net/netfilter/xt_LED.c1
-rw-r--r--net/netfilter/xt_RATEEST.c4
-rw-r--r--net/netfilter/xt_TEE.c2
-rw-r--r--net/netfilter/xt_TPROXY.c31
-rw-r--r--net/netfilter/xt_bpf.c113
-rw-r--r--net/netfilter/xt_cgroup.c3
-rw-r--r--net/netfilter/xt_connlimit.c1
-rw-r--r--net/netfilter/xt_hashlimit.c2
-rw-r--r--net/netfilter/xt_limit.c1
-rw-r--r--net/netfilter/xt_nfacct.c1
-rw-r--r--net/netfilter/xt_qtaguid.c3015
-rw-r--r--net/netfilter/xt_qtaguid_internal.h350
-rw-r--r--net/netfilter/xt_qtaguid_print.c566
-rw-r--r--net/netfilter/xt_qtaguid_print.h120
-rw-r--r--net/netfilter/xt_quota.c1
-rw-r--r--net/netfilter/xt_quota2.c25
-rw-r--r--net/netfilter/xt_rateest.c1
-rw-r--r--net/netfilter/xt_recent.c12
-rw-r--r--net/netfilter/xt_socket.c32
-rw-r--r--net/netfilter/xt_statistic.c1
-rw-r--r--net/netfilter/xt_string.c1
-rw-r--r--net/netlabel/netlabel_cipso_v4.c12
-rw-r--r--net/netlabel/netlabel_kapi.c6
-rw-r--r--net/netlabel/netlabel_mgmt.c19
-rw-r--r--net/netlabel/netlabel_unlabeled.c17
-rw-r--r--net/netlink/af_netlink.c34
-rw-r--r--net/netlink/genetlink.c52
-rw-r--r--net/netrom/nr_route.c1
-rw-r--r--net/netrom/nr_timer.c20
-rw-r--r--net/nfc/af_nfc.c3
-rw-r--r--net/nfc/core.c32
-rw-r--r--net/nfc/digital_core.c9
-rw-r--r--net/nfc/digital_dep.c2
-rw-r--r--net/nfc/digital_technology.c8
-rw-r--r--net/nfc/hci/core.c19
-rw-r--r--net/nfc/llcp_sock.c21
-rw-r--r--net/nfc/nci/core.c37
-rw-r--r--net/nfc/nci/hci.c5
-rw-r--r--net/nfc/nci/rsp.c2
-rw-r--r--net/nfc/nci/uart.c2
-rw-r--r--net/nfc/netlink.c18
-rw-r--r--net/nfc/rawsock.c9
-rw-r--r--net/openvswitch/actions.c30
-rw-r--r--net/openvswitch/datapath.c17
-rw-r--r--net/openvswitch/flow.h2
-rw-r--r--net/packet/af_packet.c36
-rw-r--r--net/phonet/pep.c3
-rw-r--r--net/phonet/socket.c6
-rw-r--r--net/rds/ib_stats.c2
-rw-r--r--net/rds/recv.c5
-rw-r--r--net/rds/tcp.c2
-rw-r--r--net/rds/tcp_listen.c40
-rw-r--r--net/rfkill/core.c7
-rw-r--r--net/rose/rose_loopback.c17
-rw-r--r--net/rxrpc/ar-key.c22
-rw-r--r--net/rxrpc/ar-output.c2
-rw-r--r--net/rxrpc/ar-recvmsg.c2
-rw-r--r--net/sched/act_bpf.c9
-rw-r--r--net/sched/cls_bpf.c22
-rw-r--r--net/sched/cls_flower.c29
-rw-r--r--net/sched/cls_route.c4
-rw-r--r--net/sched/cls_rsvp.h6
-rw-r--r--net/sched/cls_tcindex.c45
-rw-r--r--net/sched/ematch.c5
-rw-r--r--net/sched/sch_api.c3
-rw-r--r--net/sched/sch_choke.c10
-rw-r--r--net/sched/sch_drr.c4
-rw-r--r--net/sched/sch_dsmark.c3
-rw-r--r--net/sched/sch_fifo.c3
-rw-r--r--net/sched/sch_fq.c3
-rw-r--r--net/sched/sch_fq_codel.c2
-rw-r--r--net/sched/sch_generic.c20
-rw-r--r--net/sched/sch_gred.c2
-rw-r--r--net/sched/sch_hfsc.c12
-rw-r--r--net/sched/sch_mq.c2
-rw-r--r--net/sched/sch_mqprio.c3
-rw-r--r--net/sched/sch_multiq.c2
-rw-r--r--net/sched/sch_prio.c6
-rw-r--r--net/sched/sch_qfq.c9
-rw-r--r--net/sched/sch_red.c7
-rw-r--r--net/sched/sch_sfb.c3
-rw-r--r--net/sched/sch_sfq.c11
-rw-r--r--net/sched/sch_tbf.c4
-rw-r--r--net/sched/sch_teql.c3
-rw-r--r--net/sctp/associola.c7
-rw-r--r--net/sctp/auth.c1
-rw-r--r--net/sctp/bind_addr.c21
-rw-r--r--net/sctp/input.c15
-rw-r--r--net/sctp/ipv6.c31
-rw-r--r--net/sctp/protocol.c43
-rw-r--r--net/sctp/sm_make_chunk.c50
-rw-r--r--net/sctp/sm_sideeffect.c32
-rw-r--r--net/sctp/sm_statefuns.c43
-rw-r--r--net/sctp/socket.c56
-rw-r--r--net/sctp/transport.c4
-rw-r--r--net/socket.c58
-rw-r--r--net/sunrpc/addr.c6
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c30
-rw-r--r--net/sunrpc/auth_gss/auth_gss_internal.h45
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c31
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c12
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c35
-rw-r--r--net/sunrpc/cache.c6
-rw-r--r--net/sunrpc/clnt.c8
-rw-r--r--net/sunrpc/rpc_pipe.c9
-rw-r--r--net/sunrpc/rpcb_clnt.c4
-rw-r--r--net/sunrpc/sched.c12
-rw-r--r--net/sunrpc/svc.c4
-rw-r--r--net/sunrpc/svc_xprt.c23
-rw-r--r--net/sunrpc/xdr.c4
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c22
-rw-r--r--net/sunrpc/xprtrdma/transport.c3
-rw-r--r--net/sunrpc/xprtrdma/verbs.c41
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h2
-rw-r--r--net/tipc/core.c34
-rw-r--r--net/tipc/link.c2
-rw-r--r--net/tipc/msg.c14
-rw-r--r--net/tipc/netlink_compat.c22
-rw-r--r--net/tipc/socket.c14
-rw-r--r--net/tipc/sysctl.c8
-rw-r--r--net/tipc/udp_media.c16
-rw-r--r--net/unix/Kconfig5
-rw-r--r--net/unix/Makefile2
-rw-r--r--net/unix/af_unix.c169
-rw-r--r--net/unix/garbage.c82
-rw-r--r--net/unix/scm.c163
-rw-r--r--net/unix/scm.h10
-rw-r--r--net/vmw_vsock/af_vsock.c22
-rw-r--r--net/vmw_vsock/virtio_transport_common.c4
-rw-r--r--net/vmw_vsock/vmci_transport.c3
-rw-r--r--net/wireless/chan.c2
-rw-r--r--net/wireless/core.c8
-rw-r--r--net/wireless/debugfs.c4
-rw-r--r--net/wireless/ethtool.c8
-rw-r--r--net/wireless/ibss.c11
-rw-r--r--net/wireless/mesh.c4
-rw-r--r--net/wireless/mlme.c2
-rw-r--r--net/wireless/nl80211.c110
-rw-r--r--net/wireless/rdev-ops.h4
-rw-r--r--net/wireless/reg.c35
-rw-r--r--net/wireless/reg.h2
-rw-r--r--net/wireless/scan.c18
-rw-r--r--net/wireless/sme.c42
-rw-r--r--net/wireless/trace.h8
-rw-r--r--net/wireless/util.c104
-rw-r--r--net/wireless/wext-compat.c14
-rw-r--r--net/wireless/wext-core.c8
-rw-r--r--net/wireless/wext-sme.c2
-rw-r--r--net/wireless/wext-spy.c14
-rw-r--r--net/x25/af_x25.c35
-rw-r--r--net/x25/x25_dev.c4
-rw-r--r--net/x25/x25_subr.c6
-rw-r--r--net/xfrm/Kconfig11
-rw-r--r--net/xfrm/Makefile1
-rw-r--r--net/xfrm/xfrm_compat.c685
-rw-r--r--net/xfrm/xfrm_input.c18
-rw-r--r--net/xfrm/xfrm_output.c3
-rw-r--r--net/xfrm/xfrm_policy.c9
-rw-r--r--net/xfrm/xfrm_state.c105
-rw-r--r--net/xfrm/xfrm_user.c159
442 files changed, 9966 insertions, 8130 deletions
diff --git a/net/6lowpan/nhc.c b/net/6lowpan/nhc.c
index 7008d53e455c..e61679bf0908 100644
--- a/net/6lowpan/nhc.c
+++ b/net/6lowpan/nhc.c
@@ -18,7 +18,7 @@
#include "nhc.h"
static struct rb_root rb_root = RB_ROOT;
-static struct lowpan_nhc *lowpan_nexthdr_nhcs[NEXTHDR_MAX];
+static struct lowpan_nhc *lowpan_nexthdr_nhcs[NEXTHDR_MAX + 1];
static DEFINE_SPINLOCK(lowpan_nhc_lock);
static int lowpan_nhc_insert(struct lowpan_nhc *nhc)
diff --git a/net/802/garp.c b/net/802/garp.c
index b38ee6dcba45..5239b8f244e7 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -206,6 +206,19 @@ static void garp_attr_destroy(struct garp_applicant *app, struct garp_attr *attr
kfree(attr);
}
+static void garp_attr_destroy_all(struct garp_applicant *app)
+{
+ struct rb_node *node, *next;
+ struct garp_attr *attr;
+
+ for (node = rb_first(&app->gid);
+ next = node ? rb_next(node) : NULL, node != NULL;
+ node = next) {
+ attr = rb_entry(node, struct garp_attr, node);
+ garp_attr_destroy(app, attr);
+ }
+}
+
static int garp_pdu_init(struct garp_applicant *app)
{
struct sk_buff *skb;
@@ -612,6 +625,7 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl
spin_lock_bh(&app->lock);
garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU);
+ garp_attr_destroy_all(app);
garp_pdu_queue(app);
spin_unlock_bh(&app->lock);
diff --git a/net/802/mrp.c b/net/802/mrp.c
index 72db2785ef2c..4ee3af3d400b 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -295,6 +295,19 @@ static void mrp_attr_destroy(struct mrp_applicant *app, struct mrp_attr *attr)
kfree(attr);
}
+static void mrp_attr_destroy_all(struct mrp_applicant *app)
+{
+ struct rb_node *node, *next;
+ struct mrp_attr *attr;
+
+ for (node = rb_first(&app->mad);
+ next = node ? rb_next(node) : NULL, node != NULL;
+ node = next) {
+ attr = rb_entry(node, struct mrp_attr, node);
+ mrp_attr_destroy(app, attr);
+ }
+}
+
static int mrp_pdu_init(struct mrp_applicant *app)
{
struct sk_buff *skb;
@@ -900,6 +913,7 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
spin_lock_bh(&app->lock);
mrp_mad_event(app, MRP_EVENT_TX);
+ mrp_attr_destroy_all(app);
mrp_pdu_queue(app);
spin_unlock_bh(&app->lock);
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index cc1557978066..ecdfeaafba9c 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -109,6 +109,7 @@ int vlan_check_real_dev(struct net_device *real_dev,
void vlan_setup(struct net_device *dev);
int register_vlan_dev(struct net_device *dev);
void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
+void vlan_dev_uninit(struct net_device *dev);
bool vlan_dev_inherit_address(struct net_device *dev,
struct net_device *real_dev);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 5139c4ebb96b..22f4e5292278 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -606,7 +606,8 @@ static int vlan_dev_init(struct net_device *dev)
return 0;
}
-static void vlan_dev_uninit(struct net_device *dev)
+/* Note: this function might be called multiple times for the same device. */
+void vlan_dev_uninit(struct net_device *dev)
{
struct vlan_priority_tci_mapping *pm;
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index c92b52f37d38..7c95a16c1cef 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -92,11 +92,13 @@ static int vlan_changelink(struct net_device *dev,
struct ifla_vlan_flags *flags;
struct ifla_vlan_qos_mapping *m;
struct nlattr *attr;
- int rem;
+ int rem, err;
if (data[IFLA_VLAN_FLAGS]) {
flags = nla_data(data[IFLA_VLAN_FLAGS]);
- vlan_dev_change_flags(dev, flags->flags, flags->mask);
+ err = vlan_dev_change_flags(dev, flags->flags, flags->mask);
+ if (err)
+ return err;
}
if (data[IFLA_VLAN_INGRESS_QOS]) {
nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) {
@@ -107,7 +109,9 @@ static int vlan_changelink(struct net_device *dev,
if (data[IFLA_VLAN_EGRESS_QOS]) {
nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) {
m = nla_data(attr);
- vlan_dev_set_egress_priority(dev, m->from, m->to);
+ err = vlan_dev_set_egress_priority(dev, m->from, m->to);
+ if (err)
+ return err;
}
}
return 0;
@@ -150,10 +154,11 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
return -EINVAL;
err = vlan_changelink(dev, tb, data);
- if (err < 0)
- return err;
-
- return register_vlan_dev(dev);
+ if (!err)
+ err = register_vlan_dev(dev);
+ if (err)
+ vlan_dev_uninit(dev);
+ return err;
}
static inline size_t vlan_qos_map_size(unsigned int n)
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 2f68ffda3715..6f8e84844bb2 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -793,20 +793,28 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
return -ENOMEM;
ts->rd = fget(rfd);
+ if (!ts->rd)
+ goto out_free_ts;
+ if (!(ts->rd->f_mode & FMODE_READ))
+ goto out_put_rd;
ts->wr = fget(wfd);
- if (!ts->rd || !ts->wr) {
- if (ts->rd)
- fput(ts->rd);
- if (ts->wr)
- fput(ts->wr);
- kfree(ts);
- return -EIO;
- }
+ if (!ts->wr)
+ goto out_put_rd;
+ if (!(ts->wr->f_mode & FMODE_WRITE))
+ goto out_put_wr;
client->trans = ts;
client->status = Connected;
return 0;
+
+out_put_wr:
+ fput(ts->wr);
+out_put_rd:
+ fput(ts->rd);
+out_free_ts:
+ kfree(ts);
+ return -EIO;
}
static int p9_socket_open(struct p9_client *client, struct socket *csocket)
@@ -983,7 +991,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
csocket = NULL;
- if (addr == NULL)
+ if (!addr || !strlen(addr))
return -EINVAL;
if (strlen(addr) >= UNIX_PATH_MAX) {
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 5892bd1457d4..252a4c22898e 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -605,7 +605,7 @@ static int p9_virtio_probe(struct virtio_device *vdev)
chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
if (!chan->vc_wq) {
err = -ENOMEM;
- goto out_free_tag;
+ goto out_remove_file;
}
init_waitqueue_head(chan->vc_wq);
chan->ring_bufs_avail = 1;
@@ -623,6 +623,8 @@ static int p9_virtio_probe(struct virtio_device *vdev)
return 0;
+out_remove_file:
+ sysfs_remove_file(&vdev->dev.kobj, &dev_attr_mount_tag.attr);
out_free_tag:
kfree(tag);
out_free_vq:
diff --git a/net/Kconfig b/net/Kconfig
index d9da78da8cd9..539a7a01383f 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -270,6 +270,7 @@ config XPS
config CGROUP_NET_PRIO
bool "Network priority cgroup"
depends on CGROUPS
+ select SOCK_CGROUP_DATA
---help---
Cgroup subsystem for use in assigning processes to network priorities on
a per-interface basis.
@@ -277,6 +278,7 @@ config CGROUP_NET_PRIO
config CGROUP_NET_CLASSID
bool "Network classid cgroup"
depends on CGROUPS
+ select SOCK_CGROUP_DATA
---help---
Cgroup subsystem for use as general purpose socket classid marker that is
being used in cls_cgroup and for netfilter matching.
@@ -293,14 +295,17 @@ config BQL
config BPF_JIT
bool "enable BPF Just In Time compiler"
- depends on HAVE_BPF_JIT
+ depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT
depends on MODULES
---help---
Berkeley Packet Filter filtering capabilities are normally handled
by an interpreter. This option allows kernel to generate a native
code when filter is loaded in memory. This should speedup
- packet sniffing (libpcap/tcpdump). Note : Admin should enable
- this feature changing /proc/sys/net/core/bpf_jit_enable
+ packet sniffing (libpcap/tcpdump).
+
+ Note, admin should enable this feature changing:
+ /proc/sys/net/core/bpf_jit_enable
+ /proc/sys/net/core/bpf_jit_harden (optional)
config NET_FLOW_LIMIT
bool
@@ -417,9 +422,15 @@ config DST_CACHE
endif # if NET
-# Used by archs to tell that they support BPF_JIT
-config HAVE_BPF_JIT
+# Used by archs to tell that they support BPF JIT compiler plus which flavour.
+# Only one of the two can be selected for a specific arch since eBPF JIT supersedes
+# the cBPF JIT.
+
+# Classic BPF JIT (cBPF)
+config HAVE_CBPF_JIT
bool
+# Extended BPF JIT (eBPF)
config HAVE_EBPF_JIT
bool
+
diff --git a/net/Makefile b/net/Makefile
index e700aa62b1af..8b139678bb0b 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -16,7 +16,7 @@ obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/
obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_INET) += ipv4/
obj-$(CONFIG_XFRM) += xfrm/
-obj-$(CONFIG_UNIX) += unix/
+obj-$(CONFIG_UNIX_SCM) += unix/
obj-$(CONFIG_NET) += ipv6/
obj-$(CONFIG_PACKET) += packet/
obj-$(CONFIG_NET_KEY) += key/
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 8ad3ec2610b6..b9e85a4751a6 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -879,15 +879,24 @@ static struct notifier_block aarp_notifier = {
static unsigned char aarp_snap_id[] = { 0x00, 0x00, 0x00, 0x80, 0xF3 };
-void __init aarp_proto_init(void)
+int __init aarp_proto_init(void)
{
+ int rc;
+
aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv);
- if (!aarp_dl)
+ if (!aarp_dl) {
printk(KERN_CRIT "Unable to register AARP with SNAP.\n");
+ return -ENOMEM;
+ }
setup_timer(&aarp_timer, aarp_expire_timeout, 0);
aarp_timer.expires = jiffies + sysctl_aarp_expiry_time;
add_timer(&aarp_timer);
- register_netdevice_notifier(&aarp_notifier);
+ rc = register_netdevice_notifier(&aarp_notifier);
+ if (rc) {
+ del_timer_sync(&aarp_timer);
+ unregister_snap_client(aarp_dl);
+ }
+ return rc;
}
/* Remove the AARP entries associated with a device. */
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index e23bf739492c..1048cddcc9a3 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1575,8 +1575,8 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
struct sk_buff *skb;
struct net_device *dev;
struct ddpehdr *ddp;
- int size;
- struct atalk_route *rt;
+ int size, hard_header_len;
+ struct atalk_route *rt, *rt_lo = NULL;
int err;
if (flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
@@ -1639,7 +1639,22 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
SOCK_DEBUG(sk, "SK %p: Size needed %d, device %s\n",
sk, size, dev->name);
- size += dev->hard_header_len;
+ hard_header_len = dev->hard_header_len;
+ /* Leave room for loopback hardware header if necessary */
+ if (usat->sat_addr.s_node == ATADDR_BCAST &&
+ (dev->flags & IFF_LOOPBACK || !(rt->flags & RTF_GATEWAY))) {
+ struct atalk_addr at_lo;
+
+ at_lo.s_node = 0;
+ at_lo.s_net = 0;
+
+ rt_lo = atrtr_find(&at_lo);
+
+ if (rt_lo && rt_lo->dev->hard_header_len > hard_header_len)
+ hard_header_len = rt_lo->dev->hard_header_len;
+ }
+
+ size += hard_header_len;
release_sock(sk);
skb = sock_alloc_send_skb(sk, size, (flags & MSG_DONTWAIT), &err);
lock_sock(sk);
@@ -1647,7 +1662,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
goto out;
skb_reserve(skb, ddp_dl->header_length);
- skb_reserve(skb, dev->hard_header_len);
+ skb_reserve(skb, hard_header_len);
skb->dev = dev;
SOCK_DEBUG(sk, "SK %p: Begin build.\n", sk);
@@ -1698,18 +1713,12 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
/* loop back */
skb_orphan(skb);
if (ddp->deh_dnode == ATADDR_BCAST) {
- struct atalk_addr at_lo;
-
- at_lo.s_node = 0;
- at_lo.s_net = 0;
-
- rt = atrtr_find(&at_lo);
- if (!rt) {
+ if (!rt_lo) {
kfree_skb(skb);
err = -ENETUNREACH;
goto out;
}
- dev = rt->dev;
+ dev = rt_lo->dev;
skb->dev = dev;
}
ddp_dl->request(ddp_dl, skb, dev->dev_addr);
@@ -1911,9 +1920,6 @@ static unsigned char ddp_snap_id[] = { 0x08, 0x00, 0x07, 0x80, 0x9B };
EXPORT_SYMBOL(atrtr_get_dev);
EXPORT_SYMBOL(atalk_find_dev_addr);
-static const char atalk_err_snap[] __initconst =
- KERN_CRIT "Unable to register DDP with SNAP.\n";
-
/* Called by proto.c on kernel start up */
static int __init atalk_init(void)
{
@@ -1928,17 +1934,23 @@ static int __init atalk_init(void)
goto out_proto;
ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv);
- if (!ddp_dl)
- printk(atalk_err_snap);
+ if (!ddp_dl) {
+ pr_crit("Unable to register DDP with SNAP.\n");
+ rc = -ENOMEM;
+ goto out_sock;
+ }
dev_add_pack(&ltalk_packet_type);
dev_add_pack(&ppptalk_packet_type);
rc = register_netdevice_notifier(&ddp_notifier);
if (rc)
- goto out_sock;
+ goto out_snap;
+
+ rc = aarp_proto_init();
+ if (rc)
+ goto out_dev;
- aarp_proto_init();
rc = atalk_proc_init();
if (rc)
goto out_aarp;
@@ -1952,11 +1964,13 @@ out_proc:
atalk_proc_exit();
out_aarp:
aarp_cleanup_module();
+out_dev:
unregister_netdevice_notifier(&ddp_notifier);
-out_sock:
+out_snap:
dev_remove_pack(&ppptalk_packet_type);
dev_remove_pack(&ltalk_packet_type);
unregister_snap_client(ddp_dl);
+out_sock:
sock_unregister(PF_APPLETALK);
out_proto:
proto_unregister(&ddp_proto);
diff --git a/net/atm/lec.c b/net/atm/lec.c
index e4afac94ff15..a38680e19443 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1290,6 +1290,12 @@ static void lec_arp_clear_vccs(struct lec_arp_table *entry)
entry->vcc = NULL;
}
if (entry->recv_vcc) {
+ struct atm_vcc *vcc = entry->recv_vcc;
+ struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc);
+
+ kfree(vpriv);
+ vcc->user_back = NULL;
+
entry->recv_vcc->push = entry->old_recv_push;
vcc_release_async(entry->recv_vcc, -EPIPE);
entry->recv_vcc = NULL;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index de55a3f001dc..f4c8567e91b3 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -88,8 +88,10 @@ static void ax25_kill_by_device(struct net_device *dev)
again:
ax25_for_each(s, &ax25_list) {
if (s->ax25_dev == ax25_dev) {
- s->ax25_dev = NULL;
spin_unlock_bh(&ax25_list_lock);
+ lock_sock(s->sk);
+ s->ax25_dev = NULL;
+ release_sock(s->sk);
ax25_disconnect(s, ENETUNREACH);
spin_lock_bh(&ax25_list_lock);
@@ -639,8 +641,10 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
break;
case SO_BINDTODEVICE:
- if (optlen > IFNAMSIZ)
- optlen = IFNAMSIZ;
+ if (optlen > IFNAMSIZ - 1)
+ optlen = IFNAMSIZ - 1;
+
+ memset(devname, 0, sizeof(devname));
if (copy_from_user(devname, optval, optlen)) {
res = -EFAULT;
@@ -1189,7 +1193,10 @@ static int __must_check ax25_connect(struct socket *sock,
if (addr_len > sizeof(struct sockaddr_ax25) &&
fsa->fsa_ax25.sax25_ndigis != 0) {
/* Valid number of digipeaters ? */
- if (fsa->fsa_ax25.sax25_ndigis < 1 || fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS) {
+ if (fsa->fsa_ax25.sax25_ndigis < 1 ||
+ fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS ||
+ addr_len < sizeof(struct sockaddr_ax25) +
+ sizeof(ax25_address) * fsa->fsa_ax25.sax25_ndigis) {
err = -EINVAL;
goto out_release;
}
@@ -1508,7 +1515,10 @@ static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)usax;
/* Valid number of digipeaters ? */
- if (usax->sax25_ndigis < 1 || usax->sax25_ndigis > AX25_MAX_DIGIS) {
+ if (usax->sax25_ndigis < 1 ||
+ usax->sax25_ndigis > AX25_MAX_DIGIS ||
+ addr_len < sizeof(struct sockaddr_ax25) +
+ sizeof(ax25_address) * usax->sax25_ndigis) {
err = -EINVAL;
goto out;
}
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 912d9c36fb1c..6f8d2fe114f6 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -135,7 +135,7 @@ static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node)
* Returns 0 on success, a negative error code otherwise.
*/
static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node,
- int max_if_num)
+ unsigned int max_if_num)
{
void *data_ptr;
size_t old_size;
@@ -155,10 +155,8 @@ static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node,
orig_node->bat_iv.bcast_own = data_ptr;
data_ptr = kmalloc_array(max_if_num, sizeof(u8), GFP_ATOMIC);
- if (!data_ptr) {
- kfree(orig_node->bat_iv.bcast_own);
+ if (!data_ptr)
goto unlock;
- }
memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum,
(max_if_num - 1) * sizeof(u8));
@@ -183,9 +181,11 @@ unlock:
* Returns 0 on success, a negative error code otherwise.
*/
static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
- int max_if_num, int del_if_num)
+ unsigned int max_if_num,
+ unsigned int del_if_num)
{
- int chunk_size, ret = -ENOMEM, if_offset;
+ int ret = -ENOMEM;
+ size_t chunk_size, if_offset;
void *data_ptr = NULL;
spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
@@ -203,8 +203,9 @@ static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
memcpy(data_ptr, orig_node->bat_iv.bcast_own, del_if_num * chunk_size);
/* copy second part */
+ if_offset = (del_if_num + 1) * chunk_size;
memcpy((char *)data_ptr + del_if_num * chunk_size,
- orig_node->bat_iv.bcast_own + ((del_if_num + 1) * chunk_size),
+ (uint8_t *)orig_node->bat_iv.bcast_own + if_offset,
(max_if_num - del_if_num) * chunk_size);
free_bcast_own:
@@ -252,7 +253,8 @@ static struct batadv_orig_node *
batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr)
{
struct batadv_orig_node *orig_node;
- int size, hash_added;
+ int hash_added;
+ size_t size;
orig_node = batadv_orig_hash_find(bat_priv, addr);
if (orig_node)
@@ -314,14 +316,18 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
unsigned char *ogm_buff;
u32 random_seqno;
+ mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex);
+
/* randomize initial seqno to avoid collision */
get_random_bytes(&random_seqno, sizeof(random_seqno));
atomic_set(&hard_iface->bat_iv.ogm_seqno, random_seqno);
hard_iface->bat_iv.ogm_buff_len = BATADV_OGM_HLEN;
ogm_buff = kmalloc(hard_iface->bat_iv.ogm_buff_len, GFP_ATOMIC);
- if (!ogm_buff)
+ if (!ogm_buff) {
+ mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
return -ENOMEM;
+ }
hard_iface->bat_iv.ogm_buff = ogm_buff;
@@ -333,36 +339,60 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
batadv_ogm_packet->reserved = 0;
batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE;
+ mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
+
return 0;
}
static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface)
{
+ mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex);
+
kfree(hard_iface->bat_iv.ogm_buff);
hard_iface->bat_iv.ogm_buff = NULL;
+
+ mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
}
static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface)
{
struct batadv_ogm_packet *batadv_ogm_packet;
- unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff;
+ void *ogm_buff;
- batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff;
+ mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex);
+
+ ogm_buff = hard_iface->bat_iv.ogm_buff;
+ if (!ogm_buff)
+ goto unlock;
+
+ batadv_ogm_packet = ogm_buff;
ether_addr_copy(batadv_ogm_packet->orig,
hard_iface->net_dev->dev_addr);
ether_addr_copy(batadv_ogm_packet->prev_sender,
hard_iface->net_dev->dev_addr);
+
+unlock:
+ mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
}
static void
batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface)
{
struct batadv_ogm_packet *batadv_ogm_packet;
- unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff;
+ void *ogm_buff;
- batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff;
+ mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex);
+
+ ogm_buff = hard_iface->bat_iv.ogm_buff;
+ if (!ogm_buff)
+ goto unlock;
+
+ batadv_ogm_packet = ogm_buff;
batadv_ogm_packet->flags = BATADV_PRIMARIES_FIRST_HOP;
batadv_ogm_packet->ttl = BATADV_TTL;
+
+unlock:
+ mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
}
/* when do we schedule our own ogm to be sent */
@@ -395,14 +425,19 @@ static u8 batadv_hop_penalty(u8 tq, const struct batadv_priv *bat_priv)
return new_tq;
}
-/* is there another aggregated packet here? */
-static bool batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len,
- __be16 tvlv_len)
+static bool
+batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len,
+ const struct batadv_ogm_packet *ogm_packet)
{
int next_buff_pos = 0;
- next_buff_pos += buff_pos + BATADV_OGM_HLEN;
- next_buff_pos += ntohs(tvlv_len);
+ /* check if there is enough space for the header */
+ next_buff_pos += buff_pos + sizeof(*ogm_packet);
+ if (next_buff_pos > packet_len)
+ return false;
+
+ /* check if there is enough space for the optional TVLV */
+ next_buff_pos += ntohs(ogm_packet->tvlv_len);
return (next_buff_pos <= packet_len) &&
(next_buff_pos <= BATADV_MAX_AGGREGATION_BYTES);
@@ -430,7 +465,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
/* adjust all flags and log packets */
while (batadv_iv_ogm_aggr_packet(buff_pos, forw_packet->packet_len,
- batadv_ogm_packet->tvlv_len)) {
+ batadv_ogm_packet)) {
/* we might have aggregated direct link packets with an
* ordinary base packet
*/
@@ -491,8 +526,10 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
if (WARN_ON(!forw_packet->if_outgoing))
goto out;
- if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface))
+ if (forw_packet->if_outgoing->soft_iface != soft_iface) {
+ pr_warn("%s: soft interface switch for queued OGM\n", __func__);
goto out;
+ }
if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE)
goto out;
@@ -871,7 +908,7 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface)
u32 i;
size_t word_index;
u8 *w;
- int if_num;
+ unsigned int if_num;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -892,7 +929,11 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface)
}
}
-static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
+/**
+ * batadv_iv_ogm_schedule_buff() - schedule submission of hardif ogm buffer
+ * @hard_iface: interface whose ogm buffer should be transmitted
+ */
+static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
unsigned char **ogm_buff = &hard_iface->bat_iv.ogm_buff;
@@ -903,6 +944,12 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
u16 tvlv_len = 0;
unsigned long send_time;
+ lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex);
+
+ /* interface already disabled by batadv_iv_ogm_iface_disable */
+ if (!*ogm_buff)
+ return;
+
primary_if = batadv_primary_if_get_selected(bat_priv);
if (hard_iface == primary_if) {
@@ -954,6 +1001,17 @@ out:
batadv_hardif_free_ref(primary_if);
}
+static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
+{
+ if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
+ hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
+ return;
+
+ mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex);
+ batadv_iv_ogm_schedule_buff(hard_iface);
+ mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
+}
+
/**
* batadv_iv_ogm_orig_update - use OGM to update corresponding data in an
* originator
@@ -982,7 +1040,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
struct batadv_neigh_node *tmp_neigh_node = NULL;
struct batadv_neigh_node *router = NULL;
struct batadv_orig_node *orig_node_tmp;
- int if_num;
+ unsigned int if_num;
u8 sum_orig, sum_neigh;
u8 *neigh_addr;
u8 tq_avg;
@@ -1140,9 +1198,10 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
u8 total_count;
u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own;
unsigned int neigh_rq_inv_cube, neigh_rq_max_cube;
- int tq_asym_penalty, inv_asym_penalty, if_num, ret = 0;
+ int if_num, ret = 0;
+ unsigned int tq_asym_penalty, inv_asym_penalty;
unsigned int combined_tq;
- int tq_iface_penalty;
+ unsigned int tq_iface_penalty;
/* find corresponding one hop neighbor */
rcu_read_lock();
@@ -1179,7 +1238,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
orig_node->last_seen = jiffies;
/* find packet count of corresponding one hop neighbor */
- spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+ spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
if_num = if_incoming->if_num;
orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num];
neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing);
@@ -1189,7 +1248,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
} else {
neigh_rq_count = 0;
}
- spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+ spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
/* pay attention to not get a value bigger than 100 % */
if (orig_eq_count > neigh_rq_count)
@@ -1646,9 +1705,9 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
if (is_my_orig) {
unsigned long *word;
- int offset;
+ size_t offset;
s32 bit_pos;
- s16 if_num;
+ unsigned int if_num;
u8 *weight;
orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv,
@@ -1748,7 +1807,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
/* unpack the aggregated packets and process them one by one */
while (batadv_iv_ogm_aggr_packet(ogm_offset, skb_headlen(skb),
- ogm_packet->tvlv_len)) {
+ ogm_packet)) {
batadv_iv_ogm_process(skb, ogm_offset, if_incoming);
ogm_offset += BATADV_OGM_HLEN;
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index c5208136e3fc..355a18d373e6 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -73,11 +73,12 @@ static inline u32 batadv_choose_claim(const void *data, u32 size)
/* return the index of the backbone gateway */
static inline u32 batadv_choose_backbone_gw(const void *data, u32 size)
{
- const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
+ const struct batadv_bla_backbone_gw *gw;
u32 hash = 0;
- hash = jhash(&claim->addr, sizeof(claim->addr), hash);
- hash = jhash(&claim->vid, sizeof(claim->vid), hash);
+ gw = (struct batadv_bla_backbone_gw *)data;
+ hash = jhash(&gw->orig, sizeof(gw->orig), hash);
+ hash = jhash(&gw->vid, sizeof(gw->vid), hash);
return hash % size;
}
@@ -129,7 +130,19 @@ batadv_backbone_gw_free_ref(struct batadv_bla_backbone_gw *backbone_gw)
/* finally deinitialize the claim */
static void batadv_claim_release(struct batadv_bla_claim *claim)
{
- batadv_backbone_gw_free_ref(claim->backbone_gw);
+ struct batadv_bla_backbone_gw *old_backbone_gw;
+
+ spin_lock_bh(&claim->backbone_lock);
+ old_backbone_gw = claim->backbone_gw;
+ claim->backbone_gw = NULL;
+ spin_unlock_bh(&claim->backbone_lock);
+
+ spin_lock_bh(&old_backbone_gw->crc_lock);
+ old_backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
+ spin_unlock_bh(&old_backbone_gw->crc_lock);
+
+ batadv_backbone_gw_free_ref(old_backbone_gw);
+
kfree_rcu(claim, rcu);
}
@@ -256,7 +269,9 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
}
/* all claims gone, initialize CRC */
+ spin_lock_bh(&backbone_gw->crc_lock);
backbone_gw->crc = BATADV_BLA_CRC_INIT;
+ spin_unlock_bh(&backbone_gw->crc_lock);
}
/**
@@ -352,9 +367,12 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
break;
}
- if (vid & BATADV_VLAN_HAS_TAG)
+ if (vid & BATADV_VLAN_HAS_TAG) {
skb = vlan_insert_tag(skb, htons(ETH_P_8021Q),
vid & VLAN_VID_MASK);
+ if (!skb)
+ goto out;
+ }
skb_reset_mac_header(skb);
skb->protocol = eth_type_trans(skb, soft_iface);
@@ -363,7 +381,10 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
skb->len + ETH_HLEN);
soft_iface->last_rx = jiffies;
- netif_rx(skb);
+ if (in_interrupt())
+ netif_rx(skb);
+ else
+ netif_rx_ni(skb);
out:
if (primary_if)
batadv_hardif_free_ref(primary_if);
@@ -404,6 +425,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
entry->lasttime = jiffies;
entry->crc = BATADV_BLA_CRC_INIT;
entry->bat_priv = bat_priv;
+ spin_lock_init(&entry->crc_lock);
atomic_set(&entry->request_sent, 0);
atomic_set(&entry->wait_periods, 0);
ether_addr_copy(entry->orig, orig);
@@ -553,7 +575,9 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv,
__be16 crc;
memcpy(mac, batadv_announce_mac, 4);
+ spin_lock_bh(&backbone_gw->crc_lock);
crc = htons(backbone_gw->crc);
+ spin_unlock_bh(&backbone_gw->crc_lock);
memcpy(&mac[4], &crc, 2);
batadv_bla_send_claim(bat_priv, mac, backbone_gw->vid,
@@ -571,8 +595,10 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
const u8 *mac, const unsigned short vid,
struct batadv_bla_backbone_gw *backbone_gw)
{
+ struct batadv_bla_backbone_gw *old_backbone_gw;
struct batadv_bla_claim *claim;
struct batadv_bla_claim search_claim;
+ bool remove_crc = false;
int hash_added;
ether_addr_copy(search_claim.addr, mac);
@@ -586,8 +612,10 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
return;
ether_addr_copy(claim->addr, mac);
+ spin_lock_init(&claim->backbone_lock);
claim->vid = vid;
claim->lasttime = jiffies;
+ atomic_inc(&backbone_gw->refcount);
claim->backbone_gw = backbone_gw;
atomic_set(&claim->refcount, 2);
@@ -614,20 +642,55 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
"bla_add_claim(): changing ownership for %pM, vid %d\n",
mac, BATADV_PRINT_VID(vid));
- claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
- batadv_backbone_gw_free_ref(claim->backbone_gw);
+ remove_crc = true;
}
- /* set (new) backbone gw */
+
+ /* replace backbone_gw atomically and adjust reference counters */
+ spin_lock_bh(&claim->backbone_lock);
+ old_backbone_gw = claim->backbone_gw;
atomic_inc(&backbone_gw->refcount);
claim->backbone_gw = backbone_gw;
+ spin_unlock_bh(&claim->backbone_lock);
+
+ if (remove_crc) {
+ /* remove claim address from old backbone_gw */
+ spin_lock_bh(&old_backbone_gw->crc_lock);
+ old_backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
+ spin_unlock_bh(&old_backbone_gw->crc_lock);
+ }
+
+ batadv_backbone_gw_free_ref(old_backbone_gw);
+ /* add claim address to new backbone_gw */
+ spin_lock_bh(&backbone_gw->crc_lock);
backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
+ spin_unlock_bh(&backbone_gw->crc_lock);
backbone_gw->lasttime = jiffies;
claim_free_ref:
batadv_claim_free_ref(claim);
}
+/**
+ * batadv_bla_claim_get_backbone_gw - Get valid reference for backbone_gw of
+ * claim
+ * @claim: claim whose backbone_gw should be returned
+ *
+ * Return: valid reference to claim::backbone_gw
+ */
+static struct batadv_bla_backbone_gw *
+batadv_bla_claim_get_backbone_gw(struct batadv_bla_claim *claim)
+{
+ struct batadv_bla_backbone_gw *backbone_gw;
+
+ spin_lock_bh(&claim->backbone_lock);
+ backbone_gw = claim->backbone_gw;
+ atomic_inc(&backbone_gw->refcount);
+ spin_unlock_bh(&claim->backbone_lock);
+
+ return backbone_gw;
+}
+
/* Delete a claim from the claim hash which has the
* given mac address and vid.
*/
@@ -635,6 +698,8 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
const u8 *mac, const unsigned short vid)
{
struct batadv_bla_claim search_claim, *claim;
+ struct batadv_bla_claim *claim_removed_entry;
+ struct hlist_node *claim_removed_node;
ether_addr_copy(search_claim.addr, mac);
search_claim.vid = vid;
@@ -645,12 +710,18 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n",
mac, BATADV_PRINT_VID(vid));
- batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim,
- batadv_choose_claim, claim);
- batadv_claim_free_ref(claim); /* reference from the hash is gone */
+ claim_removed_node = batadv_hash_remove(bat_priv->bla.claim_hash,
+ batadv_compare_claim,
+ batadv_choose_claim, claim);
+ if (!claim_removed_node)
+ goto free_claim;
- claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
+ /* reference from the hash is gone */
+ claim_removed_entry = hlist_entry(claim_removed_node,
+ struct batadv_bla_claim, hash_entry);
+ batadv_claim_free_ref(claim_removed_entry);
+free_claim:
/* don't need the reference from hash_find() anymore */
batadv_claim_free_ref(claim);
}
@@ -660,7 +731,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
u8 *backbone_addr, unsigned short vid)
{
struct batadv_bla_backbone_gw *backbone_gw;
- u16 crc;
+ u16 backbone_crc, crc;
if (memcmp(an_addr, batadv_announce_mac, 4) != 0)
return 0;
@@ -679,12 +750,16 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
"handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n",
BATADV_PRINT_VID(vid), backbone_gw->orig, crc);
- if (backbone_gw->crc != crc) {
+ spin_lock_bh(&backbone_gw->crc_lock);
+ backbone_crc = backbone_gw->crc;
+ spin_unlock_bh(&backbone_gw->crc_lock);
+
+ if (backbone_crc != crc) {
batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv,
"handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n",
backbone_gw->orig,
BATADV_PRINT_VID(backbone_gw->vid),
- backbone_gw->crc, crc);
+ backbone_crc, crc);
batadv_bla_send_request(backbone_gw);
} else {
@@ -1056,6 +1131,7 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
int now)
{
+ struct batadv_bla_backbone_gw *backbone_gw;
struct batadv_bla_claim *claim;
struct hlist_head *head;
struct batadv_hashtable *hash;
@@ -1070,14 +1146,17 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
rcu_read_lock();
hlist_for_each_entry_rcu(claim, head, hash_entry) {
+ backbone_gw = batadv_bla_claim_get_backbone_gw(claim);
if (now)
goto purge_now;
- if (!batadv_compare_eth(claim->backbone_gw->orig,
+
+ if (!batadv_compare_eth(backbone_gw->orig,
primary_if->net_dev->dev_addr))
- continue;
+ goto skip;
+
if (!batadv_has_timed_out(claim->lasttime,
BATADV_BLA_CLAIM_TIMEOUT))
- continue;
+ goto skip;
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_purge_claims(): %pM, vid %d, time out\n",
@@ -1085,8 +1164,10 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
purge_now:
batadv_handle_unclaim(bat_priv, primary_if,
- claim->backbone_gw->orig,
+ backbone_gw->orig,
claim->addr, claim->vid);
+skip:
+ batadv_backbone_gw_free_ref(backbone_gw);
}
rcu_read_unlock();
}
@@ -1265,10 +1346,14 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
return 0;
bat_priv->bla.claim_hash = batadv_hash_new(128);
- bat_priv->bla.backbone_hash = batadv_hash_new(32);
+ if (!bat_priv->bla.claim_hash)
+ return -ENOMEM;
- if (!bat_priv->bla.claim_hash || !bat_priv->bla.backbone_hash)
+ bat_priv->bla.backbone_hash = batadv_hash_new(32);
+ if (!bat_priv->bla.backbone_hash) {
+ batadv_hash_destroy(bat_priv->bla.claim_hash);
return -ENOMEM;
+ }
batadv_hash_set_lock_class(bat_priv->bla.claim_hash,
&batadv_claim_hash_lock_class_key);
@@ -1285,31 +1370,32 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
}
/**
- * batadv_bla_check_bcast_duplist
+ * batadv_bla_check_duplist() - Check if a frame is in the broadcast dup.
* @bat_priv: the bat priv with all the soft interface information
- * @skb: contains the bcast_packet to be checked
+ * @skb: contains the multicast packet to be checked
+ * @payload_ptr: pointer to position inside the head buffer of the skb
+ * marking the start of the data to be CRC'ed
+ * @orig: originator mac address, NULL if unknown
*
- * check if it is on our broadcast list. Another gateway might
- * have sent the same packet because it is connected to the same backbone,
- * so we have to remove this duplicate.
+ * Check if it is on our broadcast list. Another gateway might have sent the
+ * same packet because it is connected to the same backbone, so we have to
+ * remove this duplicate.
*
* This is performed by checking the CRC, which will tell us
* with a good chance that it is the same packet. If it is furthermore
* sent by another host, drop it. We allow equal packets from
* the same host however as this might be intended.
*/
-int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
- struct sk_buff *skb)
+static int batadv_bla_check_duplist(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, u8 *payload_ptr,
+ const u8 *orig)
{
int i, curr, ret = 0;
__be32 crc;
- struct batadv_bcast_packet *bcast_packet;
struct batadv_bcast_duplist_entry *entry;
- bcast_packet = (struct batadv_bcast_packet *)skb->data;
-
/* calculate the crc ... */
- crc = batadv_skb_crc32(skb, (u8 *)(bcast_packet + 1));
+ crc = batadv_skb_crc32(skb, payload_ptr);
spin_lock_bh(&bat_priv->bla.bcast_duplist_lock);
@@ -1328,8 +1414,21 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
if (entry->crc != crc)
continue;
- if (batadv_compare_eth(entry->orig, bcast_packet->orig))
- continue;
+ /* are the originators both known and not anonymous? */
+ if (orig && !is_zero_ether_addr(orig) &&
+ !is_zero_ether_addr(entry->orig)) {
+ /* If known, check if the new frame came from
+ * the same originator:
+ * We are safe to take identical frames from the
+ * same orig, if known, as multiplications in
+ * the mesh are detected via the (orig, seqno) pair.
+ * So we can be a bit more liberal here and allow
+ * identical frames from the same orig which the source
+ * host might have sent multiple times on purpose.
+ */
+ if (batadv_compare_eth(entry->orig, orig))
+ continue;
+ }
/* this entry seems to match: same crc, not too old,
* and from another gw. therefore return 1 to forbid it.
@@ -1345,7 +1444,14 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
entry = &bat_priv->bla.bcast_duplist[curr];
entry->crc = crc;
entry->entrytime = jiffies;
- ether_addr_copy(entry->orig, bcast_packet->orig);
+
+ /* known originator */
+ if (orig)
+ ether_addr_copy(entry->orig, orig);
+ /* anonymous originator */
+ else
+ eth_zero_addr(entry->orig);
+
bat_priv->bla.bcast_duplist_curr = curr;
out:
@@ -1355,6 +1461,48 @@ out:
}
/**
+ * batadv_bla_check_ucast_duplist() - Check if a frame is in the broadcast dup.
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: contains the multicast packet to be checked, decapsulated from a
+ * unicast_packet
+ *
+ * Check if it is on our broadcast list. Another gateway might have sent the
+ * same packet because it is connected to the same backbone, so we have to
+ * remove this duplicate.
+ *
+ * Return: true if a packet is in the duplicate list, false otherwise.
+ */
+static bool batadv_bla_check_ucast_duplist(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ return batadv_bla_check_duplist(bat_priv, skb, (u8 *)skb->data, NULL);
+}
+
+/**
+ * batadv_bla_check_bcast_duplist() - Check if a frame is in the broadcast dup.
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: contains the bcast_packet to be checked
+ *
+ * Check if it is on our broadcast list. Another gateway might have sent the
+ * same packet because it is connected to the same backbone, so we have to
+ * remove this duplicate.
+ *
+ * Return: true if a packet is in the duplicate list, false otherwise.
+ */
+int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ struct batadv_bcast_packet *bcast_packet;
+ u8 *payload_ptr;
+
+ bcast_packet = (struct batadv_bcast_packet *)skb->data;
+ payload_ptr = (u8 *)(bcast_packet + 1);
+
+ return batadv_bla_check_duplist(bat_priv, skb, payload_ptr,
+ bcast_packet->orig);
+}
+
+/**
* batadv_bla_is_backbone_gw_orig
* @bat_priv: the bat priv with all the soft interface information
* @orig: originator mac address
@@ -1457,7 +1605,7 @@ void batadv_bla_free(struct batadv_priv *bat_priv)
* @bat_priv: the bat priv with all the soft interface information
* @skb: the frame to be checked
* @vid: the VLAN ID of the frame
- * @is_bcast: the packet came in a broadcast packet type.
+ * @packet_type: the batman packet type this frame came in
*
* bla_rx avoidance checks if:
* * we have to race for a claim
@@ -1468,11 +1616,13 @@ void batadv_bla_free(struct batadv_priv *bat_priv)
* process the skb.
*/
int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
- unsigned short vid, bool is_bcast)
+ unsigned short vid, int packet_type)
{
+ struct batadv_bla_backbone_gw *backbone_gw;
struct ethhdr *ethhdr;
struct batadv_bla_claim search_claim, *claim = NULL;
struct batadv_hard_iface *primary_if;
+ bool own_claim;
int ret;
ethhdr = eth_hdr(skb);
@@ -1485,9 +1635,32 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
goto allow;
if (unlikely(atomic_read(&bat_priv->bla.num_requests)))
- /* don't allow broadcasts while requests are in flight */
- if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast)
- goto handled;
+ /* don't allow multicast packets while requests are in flight */
+ if (is_multicast_ether_addr(ethhdr->h_dest))
+ /* Both broadcast flooding or multicast-via-unicasts
+ * delivery might send to multiple backbone gateways
+ * sharing the same LAN and therefore need to coordinate
+ * which backbone gateway forwards into the LAN,
+ * by claiming the payload source address.
+ *
+ * Broadcast flooding and multicast-via-unicasts
+ * delivery use the following two batman packet types.
+ * Note: explicitly exclude BATADV_UNICAST_4ADDR,
+ * as the DHCP gateway feature will send explicitly
+ * to only one BLA gateway, so the claiming process
+ * should be avoided there.
+ */
+ if (packet_type == BATADV_BCAST ||
+ packet_type == BATADV_UNICAST)
+ goto handled;
+
+ /* potential duplicates from foreign BLA backbone gateways via
+ * multicast-in-unicast packets
+ */
+ if (is_multicast_ether_addr(ethhdr->h_dest) &&
+ packet_type == BATADV_UNICAST &&
+ batadv_bla_check_ucast_duplist(bat_priv, skb))
+ goto handled;
ether_addr_copy(search_claim.addr, ethhdr->h_source);
search_claim.vid = vid;
@@ -1504,20 +1677,25 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
}
/* if it is our own claim ... */
- if (batadv_compare_eth(claim->backbone_gw->orig,
- primary_if->net_dev->dev_addr)) {
+ backbone_gw = batadv_bla_claim_get_backbone_gw(claim);
+ own_claim = batadv_compare_eth(backbone_gw->orig,
+ primary_if->net_dev->dev_addr);
+ batadv_backbone_gw_free_ref(backbone_gw);
+
+ if (own_claim) {
/* ... allow it in any case */
claim->lasttime = jiffies;
goto allow;
}
- /* if it is a broadcast ... */
- if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) {
+ /* if it is a multicast ... */
+ if (is_multicast_ether_addr(ethhdr->h_dest) &&
+ (packet_type == BATADV_BCAST || packet_type == BATADV_UNICAST)) {
/* ... drop it. the responsible gateway is in charge.
*
- * We need to check is_bcast because with the gateway
+ * We need to check packet type because with the gateway
* feature, broadcasts (like DHCP requests) may be sent
- * using a unicast packet type.
+ * using a unicast 4 address packet type. See comment above.
*/
goto handled;
} else {
@@ -1568,7 +1746,9 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
{
struct ethhdr *ethhdr;
struct batadv_bla_claim search_claim, *claim = NULL;
+ struct batadv_bla_backbone_gw *backbone_gw;
struct batadv_hard_iface *primary_if;
+ bool client_roamed;
int ret = 0;
primary_if = batadv_primary_if_get_selected(bat_priv);
@@ -1598,8 +1778,12 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
goto allow;
/* check if we are responsible. */
- if (batadv_compare_eth(claim->backbone_gw->orig,
- primary_if->net_dev->dev_addr)) {
+ backbone_gw = batadv_bla_claim_get_backbone_gw(claim);
+ client_roamed = batadv_compare_eth(backbone_gw->orig,
+ primary_if->net_dev->dev_addr);
+ batadv_backbone_gw_free_ref(backbone_gw);
+
+ if (client_roamed) {
/* if yes, the client has roamed and we have
* to unclaim it.
*/
@@ -1652,9 +1836,11 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
struct net_device *net_dev = (struct net_device *)seq->private;
struct batadv_priv *bat_priv = netdev_priv(net_dev);
struct batadv_hashtable *hash = bat_priv->bla.claim_hash;
+ struct batadv_bla_backbone_gw *backbone_gw;
struct batadv_bla_claim *claim;
struct batadv_hard_iface *primary_if;
struct hlist_head *head;
+ u16 backbone_crc;
u32 i;
bool is_own;
u8 *primary_addr;
@@ -1675,13 +1861,21 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
rcu_read_lock();
hlist_for_each_entry_rcu(claim, head, hash_entry) {
- is_own = batadv_compare_eth(claim->backbone_gw->orig,
+ backbone_gw = batadv_bla_claim_get_backbone_gw(claim);
+
+ is_own = batadv_compare_eth(backbone_gw->orig,
primary_addr);
+
+ spin_lock_bh(&backbone_gw->crc_lock);
+ backbone_crc = backbone_gw->crc;
+ spin_unlock_bh(&backbone_gw->crc_lock);
seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n",
claim->addr, BATADV_PRINT_VID(claim->vid),
- claim->backbone_gw->orig,
+ backbone_gw->orig,
(is_own ? 'x' : ' '),
- claim->backbone_gw->crc);
+ backbone_crc);
+
+ batadv_backbone_gw_free_ref(backbone_gw);
}
rcu_read_unlock();
}
@@ -1700,6 +1894,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
struct batadv_hard_iface *primary_if;
struct hlist_head *head;
int secs, msecs;
+ u16 backbone_crc;
u32 i;
bool is_own;
u8 *primary_addr;
@@ -1730,10 +1925,14 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
if (is_own)
continue;
+ spin_lock_bh(&backbone_gw->crc_lock);
+ backbone_crc = backbone_gw->crc;
+ spin_unlock_bh(&backbone_gw->crc_lock);
+
seq_printf(seq, " * %pM on %5d %4i.%03is (%#.4x)\n",
backbone_gw->orig,
BATADV_PRINT_VID(backbone_gw->vid), secs,
- msecs, backbone_gw->crc);
+ msecs, backbone_crc);
}
rcu_read_unlock();
}
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 025152b34282..d1553c46df8c 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -27,7 +27,7 @@ struct sk_buff;
#ifdef CONFIG_BATMAN_ADV_BLA
int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
- unsigned short vid, bool is_bcast);
+ unsigned short vid, int packet_type);
int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
unsigned short vid);
int batadv_bla_is_backbone_gw(struct sk_buff *skb,
@@ -50,7 +50,7 @@ void batadv_bla_free(struct batadv_priv *bat_priv);
static inline int batadv_bla_rx(struct batadv_priv *bat_priv,
struct sk_buff *skb, unsigned short vid,
- bool is_bcast)
+ int packet_type)
{
return 0;
}
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index c4c1e8030ba0..b905763dc2e7 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -19,6 +19,7 @@
#include "main.h"
#include <linux/compiler.h>
+#include <linux/dcache.h>
#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/errno.h>
@@ -213,6 +214,7 @@ static const struct file_operations batadv_log_fops = {
.read = batadv_log_read,
.poll = batadv_log_poll,
.llseek = no_llseek,
+ .owner = THIS_MODULE,
};
static int batadv_debug_log_setup(struct batadv_priv *bat_priv)
@@ -507,6 +509,25 @@ out:
}
/**
+ * batadv_debugfs_rename_hardif() - Fix debugfs path for renamed hardif
+ * @hard_iface: hard interface which was renamed
+ */
+void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface)
+{
+ const char *name = hard_iface->net_dev->name;
+ struct dentry *dir;
+ struct dentry *d;
+
+ dir = hard_iface->debug_dir;
+ if (!dir)
+ return;
+
+ d = debugfs_rename(dir->d_parent, dir, dir->d_parent, name);
+ if (!d)
+ pr_err("Can't rename debugfs dir to %s\n", name);
+}
+
+/**
* batadv_debugfs_del_hardif - delete the base directory for a hard interface
* in debugfs.
* @hard_iface: hard interface which is deleted.
@@ -561,6 +582,26 @@ out:
return -ENOMEM;
}
+/**
+ * batadv_debugfs_rename_meshif() - Fix debugfs path for renamed softif
+ * @dev: net_device which was renamed
+ */
+void batadv_debugfs_rename_meshif(struct net_device *dev)
+{
+ struct batadv_priv *bat_priv = netdev_priv(dev);
+ const char *name = dev->name;
+ struct dentry *dir;
+ struct dentry *d;
+
+ dir = bat_priv->debug_dir;
+ if (!dir)
+ return;
+
+ d = debugfs_rename(dir->d_parent, dir, dir->d_parent, name);
+ if (!d)
+ pr_err("Can't rename debugfs dir to %s\n", name);
+}
+
void batadv_debugfs_del_meshif(struct net_device *dev)
{
struct batadv_priv *bat_priv = netdev_priv(dev);
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 80ab8d6f0ab3..347f793a18b2 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -31,8 +31,10 @@ struct net_device;
void batadv_debugfs_init(void);
void batadv_debugfs_destroy(void);
int batadv_debugfs_add_meshif(struct net_device *dev);
+void batadv_debugfs_rename_meshif(struct net_device *dev);
void batadv_debugfs_del_meshif(struct net_device *dev);
int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface);
+void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface);
void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface);
#else
@@ -50,6 +52,10 @@ static inline int batadv_debugfs_add_meshif(struct net_device *dev)
return 0;
}
+static inline void batadv_debugfs_rename_meshif(struct net_device *dev)
+{
+}
+
static inline void batadv_debugfs_del_meshif(struct net_device *dev)
{
}
@@ -61,6 +67,11 @@ int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
}
static inline
+void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface)
+{
+}
+
+static inline
void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface)
{
}
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index c2dff7c6e960..769683da8d9d 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -226,6 +226,7 @@ static u32 batadv_hash_dat(const void *data, u32 size)
u32 hash = 0;
const struct batadv_dat_entry *dat = data;
const unsigned char *key;
+ __be16 vid;
u32 i;
key = (const unsigned char *)&dat->ip;
@@ -235,7 +236,8 @@ static u32 batadv_hash_dat(const void *data, u32 size)
hash ^= (hash >> 6);
}
- key = (const unsigned char *)&dat->vid;
+ vid = htons(dat->vid);
+ key = (__force const unsigned char *)&vid;
for (i = 0; i < sizeof(dat->vid); i++) {
hash += key[i];
hash += (hash << 10);
@@ -991,15 +993,19 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
if (!skb_new)
goto out;
- if (vid & BATADV_VLAN_HAS_TAG)
+ if (vid & BATADV_VLAN_HAS_TAG) {
skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q),
vid & VLAN_VID_MASK);
+ if (!skb_new)
+ goto out;
+ }
skb_reset_mac_header(skb_new);
skb_new->protocol = eth_type_trans(skb_new,
bat_priv->soft_iface);
- bat_priv->stats.rx_packets++;
- bat_priv->stats.rx_bytes += skb->len + ETH_HLEN + hdr_size;
+ batadv_inc_counter(bat_priv, BATADV_CNT_RX);
+ batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES,
+ skb->len + ETH_HLEN + hdr_size);
bat_priv->soft_iface->last_rx = jiffies;
netif_rx(skb_new);
@@ -1071,9 +1077,12 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
*/
skb_reset_mac_header(skb_new);
- if (vid & BATADV_VLAN_HAS_TAG)
+ if (vid & BATADV_VLAN_HAS_TAG) {
skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q),
vid & VLAN_VID_MASK);
+ if (!skb_new)
+ goto out;
+ }
/* To preserve backwards compatibility, the node has choose the outgoing
* format based on the incoming request packet type. The assumption is
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index d50c3b003dc9..371f50804fc2 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -233,8 +233,10 @@ err_unlock:
spin_unlock_bh(&chain->lock);
err:
- if (!ret)
+ if (!ret) {
kfree(frag_entry_new);
+ kfree_skb(skb);
+ }
return ret;
}
@@ -329,9 +331,9 @@ bool batadv_frag_skb_buffer(struct sk_buff **skb,
goto out_err;
out:
- *skb = skb_out;
ret = true;
out_err:
+ *skb = skb_out;
return ret;
}
@@ -392,9 +394,10 @@ out:
/**
* batadv_frag_create - create a fragment from skb
+ * @net_dev: outgoing device for fragment
* @skb: skb to create fragment from
* @frag_head: header to use in new fragment
- * @mtu: size of new fragment
+ * @fragment_size: size of new fragment
*
* Split the passed skb into two fragments: A new one with size matching the
* passed mtu and the old one with the rest. The new skb contains data from the
@@ -402,22 +405,25 @@ out:
*
* Returns the new fragment, NULL on error.
*/
-static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
+static struct sk_buff *batadv_frag_create(struct net_device *net_dev,
+ struct sk_buff *skb,
struct batadv_frag_packet *frag_head,
- unsigned int mtu)
+ unsigned int fragment_size)
{
+ unsigned int ll_reserved = LL_RESERVED_SPACE(net_dev);
+ unsigned int tailroom = net_dev->needed_tailroom;
struct sk_buff *skb_fragment;
unsigned header_size = sizeof(*frag_head);
- unsigned fragment_size = mtu - header_size;
+ unsigned mtu = fragment_size + header_size;
- skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN);
+ skb_fragment = dev_alloc_skb(ll_reserved + mtu + tailroom);
if (!skb_fragment)
goto err;
skb->priority = TC_PRIO_CONTROL;
/* Eat the last mtu-bytes of the skb */
- skb_reserve(skb_fragment, header_size + ETH_HLEN);
+ skb_reserve(skb_fragment, ll_reserved + header_size);
skb_split(skb, skb_fragment, skb->len - fragment_size);
/* Add the header */
@@ -440,13 +446,14 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
struct batadv_orig_node *orig_node,
struct batadv_neigh_node *neigh_node)
{
+ struct net_device *net_dev = neigh_node->if_incoming->net_dev;
struct batadv_priv *bat_priv;
struct batadv_hard_iface *primary_if = NULL;
struct batadv_frag_packet frag_header;
struct sk_buff *skb_fragment;
- unsigned mtu = neigh_node->if_incoming->net_dev->mtu;
+ unsigned mtu = net_dev->mtu;
unsigned header_size = sizeof(frag_header);
- unsigned max_fragment_size, max_packet_size;
+ unsigned max_fragment_size, num_fragments;
bool ret = false;
/* To avoid merge and refragmentation at next-hops we never send
@@ -454,10 +461,15 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
*/
mtu = min_t(unsigned, mtu, BATADV_FRAG_MAX_FRAG_SIZE);
max_fragment_size = mtu - header_size;
- max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS;
+
+ if (skb->len == 0 || max_fragment_size == 0)
+ goto out_err;
+
+ num_fragments = (skb->len - 1) / max_fragment_size + 1;
+ max_fragment_size = (skb->len - 1) / num_fragments + 1;
/* Don't even try to fragment, if we need more than 16 fragments */
- if (skb->len > max_packet_size)
+ if (num_fragments > BATADV_FRAG_MAX_FRAGMENTS)
goto out_err;
bat_priv = orig_node->bat_priv;
@@ -478,7 +490,12 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
/* Eat and send fragments from the tail of skb */
while (skb->len > max_fragment_size) {
- skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
+ /* The initial check in this function should cover this case */
+ if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1)
+ goto out_err;
+
+ skb_fragment = batadv_frag_create(net_dev, skb, &frag_header,
+ max_fragment_size);
if (!skb_fragment)
goto out_err;
@@ -488,17 +505,15 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
batadv_send_skb_packet(skb_fragment, neigh_node->if_incoming,
neigh_node->addr);
frag_header.no++;
-
- /* The initial check in this function should cover this case */
- if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1)
- goto out_err;
}
- /* Make room for the fragment header. */
- if (batadv_skb_head_push(skb, header_size) < 0 ||
- pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0)
+ /* make sure that there is at least enough head for the fragmentation
+ * and ethernet headers
+ */
+ if (skb_cow_head(skb, ETH_HLEN + header_size) < 0)
goto out_err;
+ skb_push(skb, header_size);
memcpy(skb->data, &frag_header, header_size);
/* Send the last fragment */
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 6abfba1e227f..5fdb88f72b68 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -29,6 +29,7 @@
#include <linux/ipv6.h>
#include <linux/kernel.h>
#include <linux/list.h>
+#include <linux/lockdep.h>
#include <linux/netdevice.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
@@ -413,6 +414,9 @@ out:
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: originator announcing gateway capabilities
* @gateway: announced bandwidth information
+ *
+ * Has to be called with the appropriate locks being acquired
+ * (gw.list_lock).
*/
static void batadv_gw_node_add(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
@@ -420,6 +424,8 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
{
struct batadv_gw_node *gw_node;
+ lockdep_assert_held(&bat_priv->gw.list_lock);
+
if (gateway->bandwidth_down == 0)
return;
@@ -438,9 +444,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
gw_node->bandwidth_up = ntohl(gateway->bandwidth_up);
atomic_set(&gw_node->refcount, 1);
- spin_lock_bh(&bat_priv->gw.list_lock);
hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list);
- spin_unlock_bh(&bat_priv->gw.list_lock);
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Found new gateway %pM -> gw bandwidth: %u.%u/%u.%u MBit\n",
@@ -493,11 +497,14 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
{
struct batadv_gw_node *gw_node, *curr_gw = NULL;
+ spin_lock_bh(&bat_priv->gw.list_lock);
gw_node = batadv_gw_node_get(bat_priv, orig_node);
if (!gw_node) {
batadv_gw_node_add(bat_priv, orig_node, gateway);
+ spin_unlock_bh(&bat_priv->gw.list_lock);
goto out;
}
+ spin_unlock_bh(&bat_priv->gw.list_lock);
if ((gw_node->bandwidth_down == ntohl(gateway->bandwidth_down)) &&
(gw_node->bandwidth_up == ntohl(gateway->bandwidth_up)))
@@ -527,11 +534,12 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
* gets dereferenced.
*/
spin_lock_bh(&bat_priv->gw.list_lock);
- hlist_del_init_rcu(&gw_node->list);
+ if (!hlist_unhashed(&gw_node->list)) {
+ hlist_del_init_rcu(&gw_node->list);
+ batadv_gw_node_free_ref(gw_node);
+ }
spin_unlock_bh(&bat_priv->gw.list_lock);
- batadv_gw_node_free_ref(gw_node);
-
curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
if (gw_node == curr_gw)
batadv_gw_reselect(bat_priv);
@@ -749,8 +757,10 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
chaddr_offset = *header_len + BATADV_DHCP_CHADDR_OFFSET;
/* store the client address if the message is going to a client */
- if (ret == BATADV_DHCP_TO_CLIENT &&
- pskb_may_pull(skb, chaddr_offset + ETH_ALEN)) {
+ if (ret == BATADV_DHCP_TO_CLIENT) {
+ if (!pskb_may_pull(skb, chaddr_offset + ETH_ALEN))
+ return BATADV_DHCP_NO;
+
/* check if the DHCP packet carries an Ethernet DHCP */
p = skb->data + *header_len + BATADV_DHCP_HTYPE_OFFSET;
if (*p != BATADV_DHCP_HTYPE_ETHERNET)
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 3c8d8142e8c6..0bd7c9e6c9a0 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -26,6 +26,7 @@
#include <linux/if.h>
#include <linux/kernel.h>
#include <linux/list.h>
+#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
#include <linux/rculist.h>
@@ -45,13 +46,16 @@
#include "sysfs.h"
#include "translation-table.h"
-void batadv_hardif_free_rcu(struct rcu_head *rcu)
+/**
+ * batadv_hardif_release - release hard interface from lists and queue for
+ * free after rcu grace period
+ * @hard_iface: the hard interface to free
+ */
+void batadv_hardif_release(struct batadv_hard_iface *hard_iface)
{
- struct batadv_hard_iface *hard_iface;
-
- hard_iface = container_of(rcu, struct batadv_hard_iface, rcu);
dev_put(hard_iface->net_dev);
- kfree(hard_iface);
+
+ kfree_rcu(hard_iface, rcu);
}
struct batadv_hard_iface *
@@ -74,6 +78,28 @@ out:
}
/**
+ * batadv_mutual_parents - check if two devices are each others parent
+ * @dev1: 1st net_device
+ * @dev2: 2nd net_device
+ *
+ * veth devices come in pairs and each is the parent of the other!
+ *
+ * Return: true if the devices are each others parent, otherwise false
+ */
+static bool batadv_mutual_parents(const struct net_device *dev1,
+ const struct net_device *dev2)
+{
+ int dev1_parent_iflink = dev_get_iflink(dev1);
+ int dev2_parent_iflink = dev_get_iflink(dev2);
+
+ if (!dev1_parent_iflink || !dev2_parent_iflink)
+ return false;
+
+ return (dev1_parent_iflink == dev2->ifindex) &&
+ (dev2_parent_iflink == dev1->ifindex);
+}
+
+/**
* batadv_is_on_batman_iface - check if a device is a batman iface descendant
* @net_dev: the device to check
*
@@ -108,6 +134,9 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
return false;
}
+ if (batadv_mutual_parents(net_dev, parent_dev))
+ return false;
+
ret = batadv_is_on_batman_iface(parent_dev);
return ret;
@@ -287,6 +316,9 @@ static void batadv_hardif_recalc_extra_skbroom(struct net_device *soft_iface)
needed_headroom = lower_headroom + (lower_header_len - ETH_HLEN);
needed_headroom += batadv_max_header_len();
+ /* fragmentation headers don't strip the unicast/... header */
+ needed_headroom += sizeof(struct batadv_frag_packet);
+
soft_iface->needed_headroom = needed_headroom;
soft_iface->needed_tailroom = lower_tailroom;
}
@@ -465,6 +497,11 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
hard_iface->soft_iface = soft_iface;
bat_priv = netdev_priv(hard_iface->soft_iface);
+ if (bat_priv->num_ifaces >= UINT_MAX) {
+ ret = -ENOSPC;
+ goto err_dev;
+ }
+
ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface);
if (ret)
goto err_dev;
@@ -537,8 +574,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
struct batadv_hard_iface *primary_if = NULL;
- if (hard_iface->if_status == BATADV_IF_ACTIVE)
- batadv_hardif_deactivate_interface(hard_iface);
+ batadv_hardif_deactivate_interface(hard_iface);
if (hard_iface->if_status != BATADV_IF_INACTIVE)
goto out;
@@ -573,7 +609,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
batadv_hardif_recalc_extra_skbroom(hard_iface->soft_iface);
/* nobody uses this interface anymore */
- if (!bat_priv->num_ifaces) {
+ if (bat_priv->num_ifaces == 0) {
batadv_gw_check_client_stop(bat_priv);
if (autodel == BATADV_IF_CLEANUP_AUTO)
@@ -629,7 +665,7 @@ batadv_hardif_add_interface(struct net_device *net_dev)
if (ret)
goto free_if;
- hard_iface->if_num = -1;
+ hard_iface->if_num = 0;
hard_iface->net_dev = net_dev;
hard_iface->soft_iface = NULL;
hard_iface->if_status = BATADV_IF_NOT_IN_USE;
@@ -639,6 +675,7 @@ batadv_hardif_add_interface(struct net_device *net_dev)
goto free_sysfs;
INIT_LIST_HEAD(&hard_iface->list);
+ mutex_init(&hard_iface->bat_iv.ogm_buff_mutex);
INIT_WORK(&hard_iface->cleanup_work,
batadv_hardif_remove_interface_finish);
@@ -693,6 +730,32 @@ void batadv_hardif_remove_interfaces(void)
rtnl_unlock();
}
+/**
+ * batadv_hard_if_event_softif() - Handle events for soft interfaces
+ * @event: NETDEV_* event to handle
+ * @net_dev: net_device which generated an event
+ *
+ * Return: NOTIFY_* result
+ */
+static int batadv_hard_if_event_softif(unsigned long event,
+ struct net_device *net_dev)
+{
+ struct batadv_priv *bat_priv;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ batadv_sysfs_add_meshif(net_dev);
+ bat_priv = netdev_priv(net_dev);
+ batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS);
+ break;
+ case NETDEV_CHANGENAME:
+ batadv_debugfs_rename_meshif(net_dev);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
static int batadv_hard_if_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -701,12 +764,8 @@ static int batadv_hard_if_event(struct notifier_block *this,
struct batadv_hard_iface *primary_if = NULL;
struct batadv_priv *bat_priv;
- if (batadv_softif_is_valid(net_dev) && event == NETDEV_REGISTER) {
- batadv_sysfs_add_meshif(net_dev);
- bat_priv = netdev_priv(net_dev);
- batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS);
- return NOTIFY_DONE;
- }
+ if (batadv_softif_is_valid(net_dev))
+ return batadv_hard_if_event_softif(event, net_dev);
hard_iface = batadv_hardif_get_by_netdev(net_dev);
if (!hard_iface && event == NETDEV_REGISTER)
@@ -748,6 +807,9 @@ static int batadv_hard_if_event(struct notifier_block *this,
if (hard_iface == primary_if)
batadv_primary_if_update_addr(bat_priv, NULL);
break;
+ case NETDEV_CHANGENAME:
+ batadv_debugfs_rename_hardif(hard_iface);
+ break;
default:
break;
}
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 7b12ea8ea29d..4d74c0415911 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -61,18 +61,18 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
void batadv_hardif_remove_interfaces(void);
int batadv_hardif_min_mtu(struct net_device *soft_iface);
void batadv_update_min_mtu(struct net_device *soft_iface);
-void batadv_hardif_free_rcu(struct rcu_head *rcu);
+void batadv_hardif_release(struct batadv_hard_iface *hard_iface);
/**
* batadv_hardif_free_ref - decrement the hard interface refcounter and
- * possibly free it
+ * possibly release it
* @hard_iface: the hard interface to free
*/
static inline void
batadv_hardif_free_ref(struct batadv_hard_iface *hard_iface)
{
if (atomic_dec_and_test(&hard_iface->refcount))
- call_rcu(&hard_iface->rcu, batadv_hardif_free_rcu);
+ batadv_hardif_release(hard_iface);
}
static inline struct batadv_hard_iface *
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index d7f17c1aa4a4..8ba7b86579d4 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -159,24 +159,34 @@ int batadv_mesh_init(struct net_device *soft_iface)
INIT_HLIST_HEAD(&bat_priv->softif_vlan_list);
ret = batadv_originator_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_orig;
+ }
ret = batadv_tt_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_tt;
+ }
ret = batadv_bla_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_bla;
+ }
ret = batadv_dat_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_dat;
+ }
ret = batadv_nc_mesh_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_nc;
+ }
batadv_gw_init(bat_priv);
batadv_mcast_init(bat_priv);
@@ -186,8 +196,18 @@ int batadv_mesh_init(struct net_device *soft_iface)
return 0;
-err:
- batadv_mesh_free(soft_iface);
+err_nc:
+ batadv_dat_free(bat_priv);
+err_dat:
+ batadv_bla_free(bat_priv);
+err_bla:
+ batadv_tt_free(bat_priv);
+err_tt:
+ batadv_originator_free(bat_priv);
+err_orig:
+ batadv_purge_outstanding_packets(bat_priv, NULL);
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE);
+
return ret;
}
@@ -747,7 +767,7 @@ static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv,
struct batadv_tvlv_container *tvlv)
{
- lockdep_assert_held(&bat_priv->tvlv.handler_list_lock);
+ lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
if (!tvlv)
return;
@@ -1079,15 +1099,20 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
{
struct batadv_tvlv_handler *tvlv_handler;
+ spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
+
tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
if (tvlv_handler) {
+ spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
batadv_tvlv_handler_free_ref(tvlv_handler);
return;
}
tvlv_handler = kzalloc(sizeof(*tvlv_handler), GFP_ATOMIC);
- if (!tvlv_handler)
+ if (!tvlv_handler) {
+ spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
return;
+ }
tvlv_handler->ogm_handler = optr;
tvlv_handler->unicast_handler = uptr;
@@ -1097,7 +1122,6 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
atomic_set(&tvlv_handler->refcount, 1);
INIT_HLIST_NODE(&tvlv_handler->list);
- spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list);
spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
}
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 8aa2d65df86f..44965f71ad73 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -44,7 +44,9 @@
#include <net/addrconf.h>
#include <net/ipv6.h>
+#include "bridge_loop_avoidance.h"
#include "packet.h"
+#include "send.h"
#include "translation-table.h"
/**
@@ -806,6 +808,35 @@ void batadv_mcast_free(struct batadv_priv *bat_priv)
}
/**
+ * batadv_mcast_forw_send_orig() - send a multicast packet to an originator
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to send
+ * @vid: the vlan identifier
+ * @orig_node: the originator to send the packet to
+ *
+ * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
+ */
+int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ struct batadv_orig_node *orig_node)
+{
+ /* Avoid sending multicast-in-unicast packets to other BLA
+ * gateways - they already got the frame from the LAN side
+ * we share with them.
+ * TODO: Refactor to take BLA into account earlier, to avoid
+ * reducing the mcast_fanout count.
+ */
+ if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid)) {
+ dev_kfree_skb(skb);
+ return NET_XMIT_SUCCESS;
+ }
+
+ return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST, 0,
+ orig_node, vid);
+}
+
+/**
* batadv_mcast_purge_orig - reset originator global mcast state modifications
* @orig: the originator which is going to get purged
*/
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 8f3cb04b9f13..dd83ef07e2f2 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -44,6 +44,11 @@ enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
struct batadv_orig_node **mcast_single_orig);
+int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ struct batadv_orig_node *orig_node);
+
void batadv_mcast_init(struct batadv_priv *bat_priv);
void batadv_mcast_free(struct batadv_priv *bat_priv);
@@ -68,6 +73,16 @@ static inline int batadv_mcast_init(struct batadv_priv *bat_priv)
return 0;
}
+static inline int
+batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ struct batadv_orig_node *orig_node)
+{
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+}
+
static inline void batadv_mcast_free(struct batadv_priv *bat_priv)
{
}
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index d0956f726547..9317d872b9c0 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -159,8 +159,10 @@ int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
&batadv_nc_coding_hash_lock_class_key);
bat_priv->nc.decoding_hash = batadv_hash_new(128);
- if (!bat_priv->nc.decoding_hash)
+ if (!bat_priv->nc.decoding_hash) {
+ batadv_hash_destroy(bat_priv->nc.coding_hash);
goto err;
+ }
batadv_hash_set_lock_class(bat_priv->nc.decoding_hash,
&batadv_nc_decoding_hash_lock_class_key);
@@ -828,19 +830,29 @@ static struct batadv_nc_node
spinlock_t *lock; /* Used to lock list selected by "int in_coding" */
struct list_head *list;
+ /* Select ingoing or outgoing coding node */
+ if (in_coding) {
+ lock = &orig_neigh_node->in_coding_list_lock;
+ list = &orig_neigh_node->in_coding_list;
+ } else {
+ lock = &orig_neigh_node->out_coding_list_lock;
+ list = &orig_neigh_node->out_coding_list;
+ }
+
+ spin_lock_bh(lock);
+
/* Check if nc_node is already added */
nc_node = batadv_nc_find_nc_node(orig_node, orig_neigh_node, in_coding);
/* Node found */
if (nc_node)
- return nc_node;
+ goto unlock;
nc_node = kzalloc(sizeof(*nc_node), GFP_ATOMIC);
if (!nc_node)
- return NULL;
+ goto unlock;
- if (!atomic_inc_not_zero(&orig_neigh_node->refcount))
- goto free;
+ atomic_inc(&orig_neigh_node->refcount);
/* Initialize nc_node */
INIT_LIST_HEAD(&nc_node->list);
@@ -848,28 +860,15 @@ static struct batadv_nc_node
nc_node->orig_node = orig_neigh_node;
atomic_set(&nc_node->refcount, 2);
- /* Select ingoing or outgoing coding node */
- if (in_coding) {
- lock = &orig_neigh_node->in_coding_list_lock;
- list = &orig_neigh_node->in_coding_list;
- } else {
- lock = &orig_neigh_node->out_coding_list_lock;
- list = &orig_neigh_node->out_coding_list;
- }
-
batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_node %pM -> %pM\n",
nc_node->addr, nc_node->orig_node->orig);
/* Add nc_node to orig_node */
- spin_lock_bh(lock);
list_add_tail_rcu(&nc_node->list, list);
+unlock:
spin_unlock_bh(lock);
return nc_node;
-
-free:
- kfree(nc_node);
- return NULL;
}
/**
@@ -994,15 +993,8 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
*/
static u8 batadv_nc_random_weight_tq(u8 tq)
{
- u8 rand_val, rand_tq;
-
- get_random_bytes(&rand_val, sizeof(rand_val));
-
/* randomize the estimated packet loss (max TQ - estimated TQ) */
- rand_tq = rand_val * (BATADV_TQ_MAX_VALUE - tq);
-
- /* normalize the randomized packet loss */
- rand_tq /= BATADV_TQ_MAX_VALUE;
+ u8 rand_tq = prandom_u32_max(BATADV_TQ_MAX_VALUE + 1 - tq);
/* convert to (randomized) estimated tq again */
return BATADV_TQ_MAX_VALUE - rand_tq;
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 6282f021ddfb..b3013fbc417e 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -462,6 +462,8 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node,
{
struct batadv_neigh_node *neigh_node;
+ spin_lock_bh(&orig_node->neigh_list_lock);
+
neigh_node = batadv_neigh_node_get(orig_node, hard_iface, neigh_addr);
if (neigh_node)
goto out;
@@ -483,19 +485,20 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node,
ether_addr_copy(neigh_node->addr, neigh_addr);
neigh_node->if_incoming = hard_iface;
neigh_node->orig_node = orig_node;
+ neigh_node->last_seen = jiffies;
/* extra reference for return */
atomic_set(&neigh_node->refcount, 2);
- spin_lock_bh(&orig_node->neigh_list_lock);
hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
- spin_unlock_bh(&orig_node->neigh_list_lock);
batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv,
"Creating new neighbor %pM for orig_node %pM on interface %s\n",
neigh_addr, orig_node->orig, hard_iface->net_dev->name);
out:
+ spin_unlock_bh(&orig_node->neigh_list_lock);
+
return neigh_node;
}
@@ -561,6 +564,8 @@ static void batadv_orig_node_release(struct batadv_orig_node *orig_node)
struct hlist_node *node_tmp;
struct batadv_neigh_node *neigh_node;
struct batadv_orig_ifinfo *orig_ifinfo;
+ struct batadv_orig_node_vlan *vlan;
+ struct batadv_orig_ifinfo *last_candidate;
spin_lock_bh(&orig_node->neigh_list_lock);
@@ -576,8 +581,21 @@ static void batadv_orig_node_release(struct batadv_orig_node *orig_node)
hlist_del_rcu(&orig_ifinfo->list);
batadv_orig_ifinfo_free_ref(orig_ifinfo);
}
+
+ last_candidate = orig_node->last_bonding_candidate;
+ orig_node->last_bonding_candidate = NULL;
spin_unlock_bh(&orig_node->neigh_list_lock);
+ if (last_candidate)
+ batadv_orig_ifinfo_free_ref(last_candidate);
+
+ spin_lock_bh(&orig_node->vlan_list_lock);
+ hlist_for_each_entry_safe(vlan, node_tmp, &orig_node->vlan_list, list) {
+ hlist_del_rcu(&vlan->list);
+ batadv_orig_node_vlan_free_ref(vlan);
+ }
+ spin_unlock_bh(&orig_node->vlan_list_lock);
+
/* Free nc_nodes */
batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL);
@@ -1085,7 +1103,7 @@ out:
}
int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
- int max_if_num)
+ unsigned int max_if_num)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
@@ -1121,7 +1139,7 @@ err:
}
int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
- int max_if_num)
+ unsigned int max_if_num)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
struct batadv_hashtable *hash = bat_priv->orig_hash;
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index a5c37882b409..65824d892a6a 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -67,9 +67,9 @@ void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo);
int batadv_orig_seq_print_text(struct seq_file *seq, void *offset);
int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset);
int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
- int max_if_num);
+ unsigned int max_if_num);
int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
- int max_if_num);
+ unsigned int max_if_num);
struct batadv_orig_node_vlan *
batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
unsigned short vid);
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index d8a2f33e60e5..e470410abb44 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -359,6 +359,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
if (skb_cow(skb, ETH_HLEN) < 0)
goto out;
+ ethhdr = eth_hdr(skb);
icmph = (struct batadv_icmp_header *)skb->data;
icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmph;
if (icmp_packet_rr->rr_cur >= BATADV_RR_LEN)
@@ -439,6 +440,52 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
}
/**
+ * batadv_last_bonding_get - Get last_bonding_candidate of orig_node
+ * @orig_node: originator node whose last bonding candidate should be retrieved
+ *
+ * Return: last bonding candidate of router or NULL if not found
+ *
+ * The object is returned with refcounter increased by 1.
+ */
+static struct batadv_orig_ifinfo *
+batadv_last_bonding_get(struct batadv_orig_node *orig_node)
+{
+ struct batadv_orig_ifinfo *last_bonding_candidate;
+
+ spin_lock_bh(&orig_node->neigh_list_lock);
+ last_bonding_candidate = orig_node->last_bonding_candidate;
+
+ if (last_bonding_candidate)
+ atomic_inc(&last_bonding_candidate->refcount);
+ spin_unlock_bh(&orig_node->neigh_list_lock);
+
+ return last_bonding_candidate;
+}
+
+/**
+ * batadv_last_bonding_replace - Replace last_bonding_candidate of orig_node
+ * @orig_node: originator node whose bonding candidates should be replaced
+ * @new_candidate: new bonding candidate or NULL
+ */
+static void
+batadv_last_bonding_replace(struct batadv_orig_node *orig_node,
+ struct batadv_orig_ifinfo *new_candidate)
+{
+ struct batadv_orig_ifinfo *old_candidate;
+
+ spin_lock_bh(&orig_node->neigh_list_lock);
+ old_candidate = orig_node->last_bonding_candidate;
+
+ if (new_candidate)
+ atomic_inc(&new_candidate->refcount);
+ orig_node->last_bonding_candidate = new_candidate;
+ spin_unlock_bh(&orig_node->neigh_list_lock);
+
+ if (old_candidate)
+ batadv_orig_ifinfo_free_ref(old_candidate);
+}
+
+/**
* batadv_find_router - find a suitable router for this originator
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: the destination node
@@ -485,7 +532,7 @@ batadv_find_router(struct batadv_priv *bat_priv,
* router - obviously there are no other candidates.
*/
rcu_read_lock();
- last_candidate = orig_node->last_bonding_candidate;
+ last_candidate = batadv_last_bonding_get(orig_node);
if (last_candidate)
last_cand_router = rcu_dereference(last_candidate->router);
@@ -545,10 +592,6 @@ next:
}
rcu_read_unlock();
- /* last_bonding_candidate is reset below, remove the old reference. */
- if (orig_node->last_bonding_candidate)
- batadv_orig_ifinfo_free_ref(orig_node->last_bonding_candidate);
-
/* After finding candidates, handle the three cases:
* 1) there is a next candidate, use that
* 2) there is no next candidate, use the first of the list
@@ -557,23 +600,33 @@ next:
if (next_candidate) {
batadv_neigh_node_free_ref(router);
- /* remove references to first candidate, we don't need it. */
- if (first_candidate) {
- batadv_neigh_node_free_ref(first_candidate_router);
- batadv_orig_ifinfo_free_ref(first_candidate);
- }
+ atomic_inc(&next_candidate_router->refcount);
router = next_candidate_router;
- orig_node->last_bonding_candidate = next_candidate;
+ batadv_last_bonding_replace(orig_node, next_candidate);
} else if (first_candidate) {
batadv_neigh_node_free_ref(router);
- /* refcounting has already been done in the loop above. */
+ atomic_inc(&first_candidate_router->refcount);
router = first_candidate_router;
- orig_node->last_bonding_candidate = first_candidate;
+ batadv_last_bonding_replace(orig_node, first_candidate);
} else {
- orig_node->last_bonding_candidate = NULL;
+ batadv_last_bonding_replace(orig_node, NULL);
+ }
+
+ /* cleanup of candidates */
+ if (first_candidate) {
+ batadv_neigh_node_free_ref(first_candidate_router);
+ batadv_orig_ifinfo_free_ref(first_candidate);
}
+ if (next_candidate) {
+ batadv_neigh_node_free_ref(next_candidate_router);
+ batadv_orig_ifinfo_free_ref(next_candidate);
+ }
+
+ if (last_candidate)
+ batadv_orig_ifinfo_free_ref(last_candidate);
+
return router;
}
@@ -585,6 +638,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
struct batadv_unicast_packet *unicast_packet;
struct ethhdr *ethhdr = eth_hdr(skb);
int res, hdr_len, ret = NET_RX_DROP;
+ unsigned int len;
unicast_packet = (struct batadv_unicast_packet *)skb->data;
@@ -625,6 +679,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
if (hdr_len > 0)
batadv_skb_set_priority(skb, hdr_len);
+ len = skb->len;
res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
/* translate transmit result into receive result */
@@ -632,7 +687,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
/* skb was transmitted and consumed */
batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
- skb->len + ETH_HLEN);
+ len + ETH_HLEN);
ret = NET_RX_SUCCESS;
} else if (res == NET_XMIT_POLICED) {
@@ -649,6 +704,7 @@ out:
/**
* batadv_reroute_unicast_packet - update the unicast header for re-routing
* @bat_priv: the bat priv with all the soft interface information
+ * @skb: unicast packet to process
* @unicast_packet: the unicast header to be updated
* @dst_addr: the payload destination
* @vid: VLAN identifier
@@ -660,7 +716,7 @@ out:
* Returns true if the packet header has been updated, false otherwise
*/
static bool
-batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
+batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
struct batadv_unicast_packet *unicast_packet,
u8 *dst_addr, unsigned short vid)
{
@@ -689,8 +745,10 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
}
/* update the packet header */
+ skb_postpull_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
ether_addr_copy(unicast_packet->dest, orig_addr);
unicast_packet->ttvn = orig_ttvn;
+ skb_postpush_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
ret = true;
out:
@@ -724,13 +782,17 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
vid = batadv_get_vid(skb, hdr_len);
ethhdr = (struct ethhdr *)(skb->data + hdr_len);
+ /* do not reroute multicast frames in a unicast header */
+ if (is_multicast_ether_addr(ethhdr->h_dest))
+ return true;
+
/* check if the destination client was served by this node and it is now
* roaming. In this case, it means that the node has got a ROAM_ADV
* message and that it knows the new destination in the mesh to re-route
* the packet to
*/
if (batadv_tt_local_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) {
- if (batadv_reroute_unicast_packet(bat_priv, unicast_packet,
+ if (batadv_reroute_unicast_packet(bat_priv, skb, unicast_packet,
ethhdr->h_dest, vid))
batadv_dbg_ratelimited(BATADV_DBG_TT,
bat_priv,
@@ -776,7 +838,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
* destination can possibly be updated and forwarded towards the new
* target host
*/
- if (batadv_reroute_unicast_packet(bat_priv, unicast_packet,
+ if (batadv_reroute_unicast_packet(bat_priv, skb, unicast_packet,
ethhdr->h_dest, vid)) {
batadv_dbg_ratelimited(BATADV_DBG_TT, bat_priv,
"Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n",
@@ -799,12 +861,14 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
if (!primary_if)
return 0;
+ /* update the packet header */
+ skb_postpull_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr);
+ unicast_packet->ttvn = curr_ttvn;
+ skb_postpush_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
batadv_hardif_free_ref(primary_if);
- unicast_packet->ttvn = curr_ttvn;
-
return 1;
}
@@ -849,7 +913,6 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
bool is4addr;
unicast_packet = (struct batadv_unicast_packet *)skb->data;
- unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR;
/* the caller function should have already pulled 2 bytes */
@@ -870,9 +933,13 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size))
return NET_RX_DROP;
+ unicast_packet = (struct batadv_unicast_packet *)skb->data;
+
/* packet for me */
if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
if (is4addr) {
+ unicast_4addr_packet =
+ (struct batadv_unicast_4addr_packet *)skb->data;
subtype = unicast_4addr_packet->subtype;
batadv_dat_inc_counter(bat_priv, subtype);
@@ -998,6 +1065,12 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_RX);
batadv_add_counter(bat_priv, BATADV_CNT_FRAG_RX_BYTES, skb->len);
+ /* batadv_frag_skb_buffer will always consume the skb and
+ * the caller should therefore never try to free the
+ * skb after this point
+ */
+ ret = NET_RX_SUCCESS;
+
/* Add fragment to buffer and merge if possible. */
if (!batadv_frag_skb_buffer(&skb, orig_node_src))
goto out;
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 0e0c3b8ed927..11fbfb222c49 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -381,8 +381,8 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
struct batadv_orig_node *orig_node;
orig_node = batadv_gw_get_selected_orig(bat_priv);
- return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST, 0,
- orig_node, vid);
+ return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR,
+ BATADV_P_DATA, orig_node, vid);
}
void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface)
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 4812123e0a2c..5105e860d3aa 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -353,9 +353,8 @@ send:
goto dropped;
ret = batadv_send_skb_via_gw(bat_priv, skb, vid);
} else if (mcast_single_orig) {
- ret = batadv_send_skb_unicast(bat_priv, skb,
- BATADV_UNICAST, 0,
- mcast_single_orig, vid);
+ ret = batadv_mcast_forw_send_orig(bat_priv, skb, vid,
+ mcast_single_orig);
} else {
if (batadv_dat_snoop_outgoing_arp_request(bat_priv,
skb))
@@ -394,10 +393,10 @@ void batadv_interface_rx(struct net_device *soft_iface,
struct vlan_ethhdr *vhdr;
struct ethhdr *ethhdr;
unsigned short vid;
- bool is_bcast;
+ int packet_type;
batadv_bcast_packet = (struct batadv_bcast_packet *)skb->data;
- is_bcast = (batadv_bcast_packet->packet_type == BATADV_BCAST);
+ packet_type = batadv_bcast_packet->packet_type;
/* check if enough space is available for pulling, and pull */
if (!pskb_may_pull(skb, hdr_size))
@@ -445,7 +444,7 @@ void batadv_interface_rx(struct net_device *soft_iface,
/* Let the bridge loop avoidance check the packet. If will
* not handle it, we can safely push it up.
*/
- if (batadv_bla_rx(bat_priv, skb, vid, is_bcast))
+ if (batadv_bla_rx(bat_priv, skb, vid, packet_type))
goto out;
if (orig_node)
@@ -539,15 +538,20 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
struct batadv_softif_vlan *vlan;
int err;
+ spin_lock_bh(&bat_priv->softif_vlan_list_lock);
+
vlan = batadv_softif_vlan_get(bat_priv, vid);
if (vlan) {
batadv_softif_vlan_free_ref(vlan);
+ spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
return -EEXIST;
}
vlan = kzalloc(sizeof(*vlan), GFP_ATOMIC);
- if (!vlan)
+ if (!vlan) {
+ spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
return -ENOMEM;
+ }
vlan->bat_priv = bat_priv;
vlan->vid = vid;
@@ -555,16 +559,19 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
atomic_set(&vlan->ap_isolation, 0);
+ hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list);
+ spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
+
+ /* batadv_sysfs_add_vlan cannot be in the spinlock section due to the
+ * sleeping behavior of the sysfs functions and the fs_reclaim lock
+ */
err = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan);
if (err) {
- kfree(vlan);
+ /* ref for the list */
+ batadv_softif_vlan_free_ref(vlan);
return err;
}
- spin_lock_bh(&bat_priv->softif_vlan_list_lock);
- hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list);
- spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
-
/* add a new TT local entry. This one will be marked with the NOPURGE
* flag
*/
@@ -1000,7 +1007,9 @@ void batadv_softif_destroy_sysfs(struct net_device *soft_iface)
static void batadv_softif_destroy_netlink(struct net_device *soft_iface,
struct list_head *head)
{
+ struct batadv_priv *bat_priv = netdev_priv(soft_iface);
struct batadv_hard_iface *hard_iface;
+ struct batadv_softif_vlan *vlan;
list_for_each_entry(hard_iface, &batadv_hardif_list, list) {
if (hard_iface->soft_iface == soft_iface)
@@ -1008,6 +1017,13 @@ static void batadv_softif_destroy_netlink(struct net_device *soft_iface,
BATADV_IF_CLEANUP_KEEP);
}
+ /* destroy the "untagged" VLAN */
+ vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
+ if (vlan) {
+ batadv_softif_destroy_vlan(bat_priv, vlan);
+ batadv_softif_vlan_free_ref(vlan);
+ }
+
batadv_sysfs_del_meshif(soft_iface);
unregister_netdevice_queue(soft_iface, head);
}
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index ffd49b40e76a..1e71e0c9b47b 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -197,8 +197,11 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
static void
batadv_tt_local_entry_free_ref(struct batadv_tt_local_entry *tt_local_entry)
{
- if (atomic_dec_and_test(&tt_local_entry->common.refcount))
+ if (atomic_dec_and_test(&tt_local_entry->common.refcount)) {
+ batadv_softif_vlan_free_ref(tt_local_entry->vlan);
+
kfree_rcu(tt_local_entry, common.rcu);
+ }
}
/**
@@ -303,9 +306,11 @@ static void batadv_tt_global_size_mod(struct batadv_orig_node *orig_node,
if (atomic_add_return(v, &vlan->tt.num_entries) == 0) {
spin_lock_bh(&orig_node->vlan_list_lock);
- hlist_del_init_rcu(&vlan->list);
+ if (!hlist_unhashed(&vlan->list)) {
+ hlist_del_init_rcu(&vlan->list);
+ batadv_orig_node_vlan_free_ref(vlan);
+ }
spin_unlock_bh(&orig_node->vlan_list_lock);
- batadv_orig_node_vlan_free_ref(vlan);
}
batadv_orig_node_vlan_free_ref(vlan);
@@ -503,14 +508,26 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv,
struct batadv_tt_global_entry *tt_global,
const char *message)
{
+ struct batadv_tt_global_entry *tt_removed_entry;
+ struct hlist_node *tt_removed_node;
+
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Deleting global tt entry %pM (vid: %d): %s\n",
tt_global->common.addr,
BATADV_PRINT_VID(tt_global->common.vid), message);
- batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt,
- batadv_choose_tt, &tt_global->common);
- batadv_tt_global_entry_free_ref(tt_global);
+ tt_removed_node = batadv_hash_remove(bat_priv->tt.global_hash,
+ batadv_compare_tt,
+ batadv_choose_tt,
+ &tt_global->common);
+ if (!tt_removed_node)
+ return;
+
+ /* drop reference of remove hash entry */
+ tt_removed_entry = hlist_entry(tt_removed_node,
+ struct batadv_tt_global_entry,
+ common.hash_entry);
+ batadv_tt_global_entry_free_ref(tt_removed_entry);
}
/**
@@ -597,8 +614,10 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
/* increase the refcounter of the related vlan */
vlan = batadv_softif_vlan_get(bat_priv, vid);
- if (WARN(!vlan, "adding TT local entry %pM to non-existent VLAN %d",
- addr, BATADV_PRINT_VID(vid))) {
+ if (!vlan) {
+ net_ratelimited_function(batadv_info, soft_iface,
+ "adding TT local entry %pM to non-existent VLAN %d\n",
+ addr, BATADV_PRINT_VID(vid));
kfree(tt_local);
tt_local = NULL;
goto out;
@@ -636,7 +655,6 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
if (unlikely(hash_added != 0)) {
/* remove the reference for the hash */
batadv_tt_local_entry_free_ref(tt_local);
- batadv_softif_vlan_free_ref(vlan);
goto out;
}
@@ -740,7 +758,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
struct batadv_orig_node_vlan *vlan;
u8 *tt_change_ptr;
- rcu_read_lock();
+ spin_lock_bh(&orig_node->vlan_list_lock);
hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) {
num_vlan++;
num_entries += atomic_read(&vlan->tt.num_entries);
@@ -778,7 +796,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
*tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr;
out:
- rcu_read_unlock();
+ spin_unlock_bh(&orig_node->vlan_list_lock);
return tvlv_len;
}
@@ -809,15 +827,20 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
struct batadv_tvlv_tt_vlan_data *tt_vlan;
struct batadv_softif_vlan *vlan;
u16 num_vlan = 0;
- u16 num_entries = 0;
+ u16 vlan_entries = 0;
+ u16 total_entries = 0;
u16 tvlv_len;
u8 *tt_change_ptr;
int change_offset;
- rcu_read_lock();
+ spin_lock_bh(&bat_priv->softif_vlan_list_lock);
hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) {
+ vlan_entries = atomic_read(&vlan->tt.num_entries);
+ if (vlan_entries < 1)
+ continue;
+
num_vlan++;
- num_entries += atomic_read(&vlan->tt.num_entries);
+ total_entries += vlan_entries;
}
change_offset = sizeof(**tt_data);
@@ -825,7 +848,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
/* if tt_len is negative, allocate the space needed by the full table */
if (*tt_len < 0)
- *tt_len = batadv_tt_len(num_entries);
+ *tt_len = batadv_tt_len(total_entries);
tvlv_len = *tt_len;
tvlv_len += change_offset;
@@ -842,8 +865,13 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1);
hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) {
+ vlan_entries = atomic_read(&vlan->tt.num_entries);
+ if (vlan_entries < 1)
+ continue;
+
tt_vlan->vid = htons(vlan->vid);
tt_vlan->crc = htonl(vlan->tt.crc);
+ tt_vlan->reserved = 0;
tt_vlan++;
}
@@ -852,7 +880,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
*tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr;
out:
- rcu_read_unlock();
+ spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
return tvlv_len;
}
@@ -940,7 +968,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_tt_local_entry *tt_local;
struct batadv_hard_iface *primary_if;
- struct batadv_softif_vlan *vlan;
struct hlist_head *head;
unsigned short vid;
u32 i;
@@ -977,13 +1004,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
no_purge = tt_common_entry->flags & np_flag;
- vlan = batadv_softif_vlan_get(bat_priv, vid);
- if (!vlan) {
- seq_printf(seq, "Cannot retrieve VLAN %d\n",
- BATADV_PRINT_VID(vid));
- continue;
- }
-
seq_printf(seq,
" * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n",
tt_common_entry->addr,
@@ -1001,9 +1021,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'),
no_purge ? 0 : last_seen_secs,
no_purge ? 0 : last_seen_msecs,
- vlan->tt.crc);
-
- batadv_softif_vlan_free_ref(vlan);
+ tt_local->vlan->tt.crc);
}
rcu_read_unlock();
}
@@ -1046,10 +1064,10 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
unsigned short vid, const char *message,
bool roaming)
{
+ struct batadv_tt_local_entry *tt_removed_entry;
struct batadv_tt_local_entry *tt_local_entry;
u16 flags, curr_flags = BATADV_NO_FLAGS;
- struct batadv_softif_vlan *vlan;
- void *tt_entry_exists;
+ struct hlist_node *tt_removed_node;
tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid);
if (!tt_local_entry)
@@ -1078,23 +1096,18 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
*/
batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL);
- tt_entry_exists = batadv_hash_remove(bat_priv->tt.local_hash,
+ tt_removed_node = batadv_hash_remove(bat_priv->tt.local_hash,
batadv_compare_tt,
batadv_choose_tt,
&tt_local_entry->common);
- if (!tt_entry_exists)
- goto out;
-
- /* extra call to free the local tt entry */
- batadv_tt_local_entry_free_ref(tt_local_entry);
-
- /* decrease the reference held for this vlan */
- vlan = batadv_softif_vlan_get(bat_priv, vid);
- if (!vlan)
+ if (!tt_removed_node)
goto out;
- batadv_softif_vlan_free_ref(vlan);
- batadv_softif_vlan_free_ref(vlan);
+ /* drop reference of remove hash entry */
+ tt_removed_entry = hlist_entry(tt_removed_node,
+ struct batadv_tt_local_entry,
+ common.hash_entry);
+ batadv_tt_local_entry_free_ref(tt_removed_entry);
out:
if (tt_local_entry)
@@ -1168,7 +1181,6 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
spinlock_t *list_lock; /* protects write access to the hash lists */
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_tt_local_entry *tt_local;
- struct batadv_softif_vlan *vlan;
struct hlist_node *node_tmp;
struct hlist_head *head;
u32 i;
@@ -1190,14 +1202,6 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
struct batadv_tt_local_entry,
common);
- /* decrease the reference held for this vlan */
- vlan = batadv_softif_vlan_get(bat_priv,
- tt_common_entry->vid);
- if (vlan) {
- batadv_softif_vlan_free_ref(vlan);
- batadv_softif_vlan_free_ref(vlan);
- }
-
batadv_tt_local_entry_free_ref(tt_local);
}
spin_unlock_bh(list_lock);
@@ -1273,7 +1277,8 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
*/
static bool
batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry,
- const struct batadv_orig_node *orig_node)
+ const struct batadv_orig_node *orig_node,
+ u8 *flags)
{
struct batadv_tt_orig_list_entry *orig_entry;
bool found = false;
@@ -1281,25 +1286,64 @@ batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry,
orig_entry = batadv_tt_global_orig_entry_find(entry, orig_node);
if (orig_entry) {
found = true;
+
+ if (flags)
+ *flags = orig_entry->flags;
+
batadv_tt_orig_list_entry_free_ref(orig_entry);
}
return found;
}
+/**
+ * batadv_tt_global_sync_flags - update TT sync flags
+ * @tt_global: the TT global entry to update sync flags in
+ *
+ * Updates the sync flag bits in the tt_global flag attribute with a logical
+ * OR of all sync flags from any of its TT orig entries.
+ */
+static void
+batadv_tt_global_sync_flags(struct batadv_tt_global_entry *tt_global)
+{
+ struct batadv_tt_orig_list_entry *orig_entry;
+ const struct hlist_head *head;
+ u16 flags = BATADV_NO_FLAGS;
+
+ rcu_read_lock();
+ head = &tt_global->orig_list;
+ hlist_for_each_entry_rcu(orig_entry, head, list)
+ flags |= orig_entry->flags;
+ rcu_read_unlock();
+
+ flags |= tt_global->common.flags & (~BATADV_TT_SYNC_MASK);
+ tt_global->common.flags = flags;
+}
+
+/**
+ * batadv_tt_global_orig_entry_add - add or update a TT orig entry
+ * @tt_global: the TT global entry to add an orig entry in
+ * @orig_node: the originator to add an orig entry for
+ * @ttvn: translation table version number of this changeset
+ * @flags: TT sync flags
+ */
static void
batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
- struct batadv_orig_node *orig_node, int ttvn)
+ struct batadv_orig_node *orig_node, int ttvn,
+ u8 flags)
{
struct batadv_tt_orig_list_entry *orig_entry;
+ spin_lock_bh(&tt_global->list_lock);
+
orig_entry = batadv_tt_global_orig_entry_find(tt_global, orig_node);
if (orig_entry) {
/* refresh the ttvn: the current value could be a bogus one that
* was added during a "temporary client detection"
*/
orig_entry->ttvn = ttvn;
- goto out;
+ orig_entry->flags = flags;
+ goto sync_flags;
}
orig_entry = kzalloc(sizeof(*orig_entry), GFP_ATOMIC);
@@ -1311,17 +1355,20 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
batadv_tt_global_size_inc(orig_node, tt_global->common.vid);
orig_entry->orig_node = orig_node;
orig_entry->ttvn = ttvn;
+ orig_entry->flags = flags;
atomic_set(&orig_entry->refcount, 2);
- spin_lock_bh(&tt_global->list_lock);
hlist_add_head_rcu(&orig_entry->list,
&tt_global->orig_list);
- spin_unlock_bh(&tt_global->list_lock);
atomic_inc(&tt_global->orig_list_count);
+sync_flags:
+ batadv_tt_global_sync_flags(tt_global);
out:
if (orig_entry)
batadv_tt_orig_list_entry_free_ref(orig_entry);
+
+ spin_unlock_bh(&tt_global->list_lock);
}
/**
@@ -1379,7 +1426,9 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
ether_addr_copy(common->addr, tt_addr);
common->vid = vid;
- common->flags = flags;
+ if (!is_multicast_ether_addr(common->addr))
+ common->flags = flags & (~BATADV_TT_SYNC_MASK);
+
tt_global_entry->roam_at = 0;
/* node must store current time in case of roaming. This is
* needed to purge this entry out on timeout (if nobody claims
@@ -1420,7 +1469,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
if (!(common->flags & BATADV_TT_CLIENT_TEMP))
goto out;
if (batadv_tt_global_entry_has_orig(tt_global_entry,
- orig_node))
+ orig_node, NULL))
goto out_remove;
batadv_tt_global_del_orig_list(tt_global_entry);
goto add_orig_entry;
@@ -1441,7 +1490,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
* TT_CLIENT_WIFI, therefore they have to be copied in the
* client entry
*/
- tt_global_entry->common.flags |= flags;
+ if (!is_multicast_ether_addr(common->addr))
+ tt_global_entry->common.flags |= flags & (~BATADV_TT_SYNC_MASK);
/* If there is the BATADV_TT_CLIENT_ROAM flag set, there is only
* one originator left in the list and we previously received a
@@ -1458,7 +1508,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
}
add_orig_entry:
/* add the new orig_entry (if needed) or update it */
- batadv_tt_global_orig_entry_add(tt_global_entry, orig_node, ttvn);
+ batadv_tt_global_orig_entry_add(tt_global_entry, orig_node, ttvn,
+ flags & BATADV_TT_SYNC_MASK);
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Creating new global tt entry: %pM (vid: %d, via %pM)\n",
@@ -2111,6 +2162,7 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv,
unsigned short vid)
{
struct batadv_hashtable *hash = bat_priv->tt.global_hash;
+ struct batadv_tt_orig_list_entry *tt_orig;
struct batadv_tt_common_entry *tt_common;
struct batadv_tt_global_entry *tt_global;
struct hlist_head *head;
@@ -2149,8 +2201,9 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv,
/* find out if this global entry is announced by this
* originator
*/
- if (!batadv_tt_global_entry_has_orig(tt_global,
- orig_node))
+ tt_orig = batadv_tt_global_orig_entry_find(tt_global,
+ orig_node);
+ if (!tt_orig)
continue;
/* use network order to read the VID: this ensures that
@@ -2162,10 +2215,12 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv,
/* compute the CRC on flags that have to be kept in sync
* among nodes
*/
- flags = tt_common->flags & BATADV_TT_SYNC_MASK;
+ flags = tt_orig->flags;
crc_tmp = crc32c(crc_tmp, &flags, sizeof(flags));
crc ^= crc32c(crc_tmp, tt_common->addr, ETH_ALEN);
+
+ batadv_tt_orig_list_entry_free_ref(tt_orig);
}
rcu_read_unlock();
}
@@ -2230,6 +2285,29 @@ static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv,
return crc;
}
+/**
+ * batadv_tt_req_node_release - free tt_req node entry
+ * @ref: kref pointer of the tt req_node entry
+ */
+static void batadv_tt_req_node_release(struct kref *ref)
+{
+ struct batadv_tt_req_node *tt_req_node;
+
+ tt_req_node = container_of(ref, struct batadv_tt_req_node, refcount);
+
+ kfree(tt_req_node);
+}
+
+/**
+ * batadv_tt_req_node_put - decrement the tt_req_node refcounter and
+ * possibly release it
+ * @tt_req_node: tt_req_node to be free'd
+ */
+static void batadv_tt_req_node_put(struct batadv_tt_req_node *tt_req_node)
+{
+ kref_put(&tt_req_node->refcount, batadv_tt_req_node_release);
+}
+
static void batadv_tt_req_list_free(struct batadv_priv *bat_priv)
{
struct batadv_tt_req_node *node;
@@ -2239,7 +2317,7 @@ static void batadv_tt_req_list_free(struct batadv_priv *bat_priv)
hlist_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) {
hlist_del_init(&node->list);
- kfree(node);
+ batadv_tt_req_node_put(node);
}
spin_unlock_bh(&bat_priv->tt.req_list_lock);
@@ -2276,7 +2354,7 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv)
if (batadv_has_timed_out(node->issued_at,
BATADV_TT_REQUEST_TIMEOUT)) {
hlist_del_init(&node->list);
- kfree(node);
+ batadv_tt_req_node_put(node);
}
}
spin_unlock_bh(&bat_priv->tt.req_list_lock);
@@ -2308,9 +2386,11 @@ batadv_tt_req_node_new(struct batadv_priv *bat_priv,
if (!tt_req_node)
goto unlock;
+ kref_init(&tt_req_node->refcount);
ether_addr_copy(tt_req_node->addr, orig_node->orig);
tt_req_node->issued_at = jiffies;
+ kref_get(&tt_req_node->refcount);
hlist_add_head(&tt_req_node->list, &bat_priv->tt.req_list);
unlock:
spin_unlock_bh(&bat_priv->tt.req_list_lock);
@@ -2324,17 +2404,24 @@ unlock:
*
* Returns 1 if the entry is a valid, 0 otherwise.
*/
-static int batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr)
+static int batadv_tt_local_valid(const void *entry_ptr,
+ const void *data_ptr,
+ u8 *flags)
{
const struct batadv_tt_common_entry *tt_common_entry = entry_ptr;
if (tt_common_entry->flags & BATADV_TT_CLIENT_NEW)
return 0;
+
+ if (flags)
+ *flags = tt_common_entry->flags;
+
return 1;
}
static int batadv_tt_global_valid(const void *entry_ptr,
- const void *data_ptr)
+ const void *data_ptr,
+ u8 *flags)
{
const struct batadv_tt_common_entry *tt_common_entry = entry_ptr;
const struct batadv_tt_global_entry *tt_global_entry;
@@ -2348,7 +2435,8 @@ static int batadv_tt_global_valid(const void *entry_ptr,
struct batadv_tt_global_entry,
common);
- return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node);
+ return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node,
+ flags);
}
/**
@@ -2364,18 +2452,25 @@ static int batadv_tt_global_valid(const void *entry_ptr,
static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
struct batadv_hashtable *hash,
void *tvlv_buff, u16 tt_len,
- int (*valid_cb)(const void *, const void *),
+ int (*valid_cb)(const void *,
+ const void *,
+ u8 *flags),
void *cb_data)
{
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_tvlv_tt_change *tt_change;
struct hlist_head *head;
u16 tt_tot, tt_num_entries = 0;
+ u8 flags;
+ bool ret;
u32 i;
tt_tot = batadv_tt_entries(tt_len);
tt_change = (struct batadv_tvlv_tt_change *)tvlv_buff;
+ if (!valid_cb)
+ return;
+
rcu_read_lock();
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -2385,11 +2480,12 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
if (tt_tot == tt_num_entries)
break;
- if ((valid_cb) && (!valid_cb(tt_common_entry, cb_data)))
+ ret = valid_cb(tt_common_entry, cb_data, &flags);
+ if (!ret)
continue;
ether_addr_copy(tt_change->addr, tt_common_entry->addr);
- tt_change->flags = tt_common_entry->flags;
+ tt_change->flags = flags;
tt_change->vid = htons(tt_common_entry->vid);
memset(tt_change->reserved, 0,
sizeof(tt_change->reserved));
@@ -2560,13 +2656,19 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv,
out:
if (primary_if)
batadv_hardif_free_ref(primary_if);
+
if (ret && tt_req_node) {
spin_lock_bh(&bat_priv->tt.req_list_lock);
- /* hlist_del_init() verifies tt_req_node still is in the list */
- hlist_del_init(&tt_req_node->list);
+ if (!hlist_unhashed(&tt_req_node->list)) {
+ hlist_del_init(&tt_req_node->list);
+ batadv_tt_req_node_put(tt_req_node);
+ }
spin_unlock_bh(&bat_priv->tt.req_list_lock);
- kfree(tt_req_node);
}
+
+ if (tt_req_node)
+ batadv_tt_req_node_put(tt_req_node);
+
kfree(tvlv_tt_data);
return ret;
}
@@ -3002,7 +3104,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
if (!batadv_compare_eth(node->addr, resp_src))
continue;
hlist_del_init(&node->list);
- kfree(node);
+ batadv_tt_req_node_put(node);
}
spin_unlock_bh(&bat_priv->tt.req_list_lock);
@@ -3227,7 +3329,6 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
struct batadv_hashtable *hash = bat_priv->tt.local_hash;
struct batadv_tt_common_entry *tt_common;
struct batadv_tt_local_entry *tt_local;
- struct batadv_softif_vlan *vlan;
struct hlist_node *node_tmp;
struct hlist_head *head;
spinlock_t *list_lock; /* protects write access to the hash lists */
@@ -3257,13 +3358,6 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
struct batadv_tt_local_entry,
common);
- /* decrease the reference held for this vlan */
- vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid);
- if (vlan) {
- batadv_softif_vlan_free_ref(vlan);
- batadv_softif_vlan_free_ref(vlan);
- }
-
batadv_tt_local_entry_free_ref(tt_local);
}
spin_unlock_bh(list_lock);
@@ -3741,8 +3835,10 @@ int batadv_tt_init(struct batadv_priv *bat_priv)
return ret;
ret = batadv_tt_global_init(bat_priv);
- if (ret < 0)
+ if (ret < 0) {
+ batadv_tt_local_table_free(bat_priv);
return ret;
+ }
batadv_tvlv_handler_register(bat_priv, batadv_tt_tvlv_ogm_handler_v1,
batadv_tt_tvlv_unicast_handler_v1,
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index cbd347c2e4a5..8fce1241ad6d 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -77,11 +77,13 @@ enum batadv_dhcp_recipient {
* @ogm_buff: buffer holding the OGM packet
* @ogm_buff_len: length of the OGM packet buffer
* @ogm_seqno: OGM sequence number - used to identify each OGM
+ * @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len
*/
struct batadv_hard_iface_bat_iv {
unsigned char *ogm_buff;
int ogm_buff_len;
atomic_t ogm_seqno;
+ struct mutex ogm_buff_mutex;
};
/**
@@ -103,7 +105,7 @@ struct batadv_hard_iface_bat_iv {
*/
struct batadv_hard_iface {
struct list_head list;
- s16 if_num;
+ unsigned int if_num;
char if_status;
struct net_device *net_dev;
u8 num_bcasts;
@@ -287,7 +289,9 @@ struct batadv_orig_node {
DECLARE_BITMAP(bcast_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
u32 last_bcast_seqno;
struct hlist_head neigh_list;
- /* neigh_list_lock protects: neigh_list and router */
+ /* neigh_list_lock protects: neigh_list, ifinfo_list,
+ * last_bonding_candidate and router
+ */
spinlock_t neigh_list_lock;
struct hlist_node hash_entry;
struct batadv_priv *bat_priv;
@@ -806,7 +810,7 @@ struct batadv_priv {
atomic_t bcast_seqno;
atomic_t bcast_queue_left;
atomic_t batman_queue_left;
- char num_ifaces;
+ unsigned int num_ifaces;
struct kobject *mesh_obj;
struct dentry *debug_dir;
struct hlist_head forw_bat_list;
@@ -884,6 +888,7 @@ struct batadv_socket_packet {
* backbone gateway - no bcast traffic is formwared until the situation was
* resolved
* @crc: crc16 checksum over all claims
+ * @crc_lock: lock protecting crc
* @refcount: number of contexts the object is used
* @rcu: struct used for freeing in an RCU-safe manner
*/
@@ -897,6 +902,7 @@ struct batadv_bla_backbone_gw {
atomic_t wait_periods;
atomic_t request_sent;
u16 crc;
+ spinlock_t crc_lock; /* protects crc */
atomic_t refcount;
struct rcu_head rcu;
};
@@ -915,6 +921,7 @@ struct batadv_bla_claim {
u8 addr[ETH_ALEN];
unsigned short vid;
struct batadv_bla_backbone_gw *backbone_gw;
+ spinlock_t backbone_lock; /* protects backbone_gw */
unsigned long lasttime;
struct hlist_node hash_entry;
struct rcu_head rcu;
@@ -947,10 +954,12 @@ struct batadv_tt_common_entry {
* struct batadv_tt_local_entry - translation table local entry data
* @common: general translation table data
* @last_seen: timestamp used for purging stale tt local entries
+ * @vlan: soft-interface vlan of the entry
*/
struct batadv_tt_local_entry {
struct batadv_tt_common_entry common;
unsigned long last_seen;
+ struct batadv_softif_vlan *vlan;
};
/**
@@ -973,6 +982,7 @@ struct batadv_tt_global_entry {
* struct batadv_tt_orig_list_entry - orig node announcing a non-mesh client
* @orig_node: pointer to orig node announcing this non-mesh client
* @ttvn: translation table version number which added the non-mesh client
+ * @flags: per orig entry TT sync flags
* @list: list node for batadv_tt_global_entry::orig_list
* @refcount: number of contexts the object is used
* @rcu: struct used for freeing in an RCU-safe manner
@@ -980,6 +990,7 @@ struct batadv_tt_global_entry {
struct batadv_tt_orig_list_entry {
struct batadv_orig_node *orig_node;
u8 ttvn;
+ u8 flags;
struct hlist_node list;
atomic_t refcount;
struct rcu_head rcu;
@@ -999,11 +1010,13 @@ struct batadv_tt_change_node {
* struct batadv_tt_req_node - data to keep track of the tt requests in flight
* @addr: mac address address of the originator this request was sent to
* @issued_at: timestamp used for purging stale tt requests
+ * @refcount: number of contexts the object is used by
* @list: list node for batadv_priv_tt::req_list
*/
struct batadv_tt_req_node {
u8 addr[ETH_ALEN];
unsigned long issued_at;
+ struct kref refcount;
struct hlist_node list;
};
@@ -1168,9 +1181,9 @@ struct batadv_algo_ops {
struct batadv_hard_iface *hard_iface);
void (*bat_orig_free)(struct batadv_orig_node *orig_node);
int (*bat_orig_add_if)(struct batadv_orig_node *orig_node,
- int max_if_num);
+ unsigned int max_if_num);
int (*bat_orig_del_if)(struct batadv_orig_node *orig_node,
- int max_if_num, int del_if_num);
+ unsigned int max_if_num, unsigned int del_if_num);
};
/**
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 4cd6b8d811ff..11602902884b 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -57,6 +57,7 @@ static bool enable_6lowpan;
/* We are listening incoming connections via this channel
*/
static struct l2cap_chan *listen_chan;
+static DEFINE_MUTEX(set_lock);
struct lowpan_peer {
struct list_head list;
@@ -1195,12 +1196,14 @@ static void do_enable_set(struct work_struct *work)
enable_6lowpan = set_enable->flag;
+ mutex_lock(&set_lock);
if (listen_chan) {
l2cap_chan_close(listen_chan, 0);
l2cap_chan_put(listen_chan);
}
listen_chan = bt_6lowpan_listen();
+ mutex_unlock(&set_lock);
kfree(set_enable);
}
@@ -1252,11 +1255,13 @@ static ssize_t lowpan_control_write(struct file *fp,
if (ret == -EINVAL)
return ret;
+ mutex_lock(&set_lock);
if (listen_chan) {
l2cap_chan_close(listen_chan, 0);
l2cap_chan_put(listen_chan);
listen_chan = NULL;
}
+ mutex_unlock(&set_lock);
if (conn) {
struct lowpan_peer *peer;
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index 5f123c3320a7..fcd819ffda10 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -233,6 +233,9 @@ static int a2mp_discover_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
struct a2mp_info_req req;
found = true;
+
+ memset(&req, 0, sizeof(req));
+
req.id = cl->id;
a2mp_send(mgr, A2MP_GETINFO_REQ, __next_ident(mgr),
sizeof(req), &req);
@@ -312,6 +315,8 @@ static int a2mp_getinfo_req(struct amp_mgr *mgr, struct sk_buff *skb,
if (!hdev || hdev->dev_type != HCI_AMP) {
struct a2mp_info_rsp rsp;
+ memset(&rsp, 0, sizeof(rsp));
+
rsp.id = req->id;
rsp.status = A2MP_STATUS_INVALID_CTRL_ID;
@@ -355,6 +360,8 @@ static int a2mp_getinfo_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
if (!ctrl)
return -ENOMEM;
+ memset(&req, 0, sizeof(req));
+
req.id = rsp->id;
a2mp_send(mgr, A2MP_GETAMPASSOC_REQ, __next_ident(mgr), sizeof(req),
&req);
@@ -381,6 +388,8 @@ static int a2mp_getampassoc_req(struct amp_mgr *mgr, struct sk_buff *skb,
hdev = hci_dev_get(req->id);
if (!hdev || hdev->amp_type == AMP_TYPE_BREDR || tmp) {
struct a2mp_amp_assoc_rsp rsp;
+
+ memset(&rsp, 0, sizeof(rsp));
rsp.id = req->id;
if (tmp) {
@@ -471,7 +480,6 @@ static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
struct a2mp_cmd *hdr)
{
struct a2mp_physlink_req *req = (void *) skb->data;
-
struct a2mp_physlink_rsp rsp;
struct hci_dev *hdev;
struct hci_conn *hcon;
@@ -482,6 +490,8 @@ static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
BT_DBG("local_id %d, remote_id %d", req->local_id, req->remote_id);
+ memset(&rsp, 0, sizeof(rsp));
+
rsp.local_id = req->remote_id;
rsp.remote_id = req->local_id;
@@ -509,6 +519,7 @@ static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
assoc = kmemdup(req->amp_assoc, assoc_len, GFP_KERNEL);
if (!assoc) {
amp_ctrl_put(ctrl);
+ hci_dev_put(hdev);
return -ENOMEM;
}
@@ -560,6 +571,8 @@ static int a2mp_discphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
BT_DBG("local_id %d remote_id %d", req->local_id, req->remote_id);
+ memset(&rsp, 0, sizeof(rsp));
+
rsp.local_id = req->remote_id;
rsp.remote_id = req->local_id;
rsp.status = A2MP_STATUS_SUCCESS;
@@ -682,6 +695,8 @@ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
if (err) {
struct a2mp_cmd_rej rej;
+ memset(&rej, 0, sizeof(rej));
+
rej.reason = cpu_to_le16(0);
hdr = (void *) skb->data;
@@ -905,6 +920,8 @@ void a2mp_send_getinfo_rsp(struct hci_dev *hdev)
BT_DBG("%s mgr %p", hdev->name, mgr);
+ memset(&rsp, 0, sizeof(rsp));
+
rsp.id = hdev->id;
rsp.status = A2MP_STATUS_INVALID_CTRL_ID;
@@ -1002,6 +1019,8 @@ void a2mp_send_create_phy_link_rsp(struct hci_dev *hdev, u8 status)
if (!mgr)
return;
+ memset(&rsp, 0, sizeof(rsp));
+
hs_hcon = hci_conn_hash_lookup_state(hdev, AMP_LINK, BT_CONNECT);
if (!hs_hcon) {
rsp.status = A2MP_STATUS_UNABLE_START_LINK_CREATION;
@@ -1034,6 +1053,8 @@ void a2mp_discover_amp(struct l2cap_chan *chan)
mgr->bredr_chan = chan;
+ memset(&req, 0, sizeof(req));
+
req.mtu = cpu_to_le16(L2CAP_A2MP_DEFAULT_MTU);
req.ext_feat = 0;
a2mp_send(mgr, A2MP_DISCOVER_REQ, 1, sizeof(req), &req);
diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
index e32f34189007..b01b43ab6f83 100644
--- a/net/bluetooth/amp.c
+++ b/net/bluetooth/amp.c
@@ -305,6 +305,9 @@ void amp_read_loc_assoc_final_data(struct hci_dev *hdev,
struct hci_request req;
int err = 0;
+ if (!mgr)
+ return;
+
cp.phy_handle = hcon->handle;
cp.len_so_far = cpu_to_le16(0);
cp.max_len = cpu_to_le16(hdev->amp_assoc_size);
diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h
index c32638dddbf9..f6b9dc4e408f 100644
--- a/net/bluetooth/cmtp/cmtp.h
+++ b/net/bluetooth/cmtp/cmtp.h
@@ -26,7 +26,7 @@
#include <linux/types.h>
#include <net/bluetooth/bluetooth.h>
-#define BTNAMSIZ 18
+#define BTNAMSIZ 21
/* CMTP ioctl defines */
#define CMTPCONNADD _IOW('C', 200, int)
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 77f73bfa840b..2133b53eb152 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -392,6 +392,11 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
if (!(session->flags & BIT(CMTP_LOOPBACK))) {
err = cmtp_attach_device(session);
if (err < 0) {
+ /* Caller will call fput in case of failure, and so
+ * will cmtp_session kthread.
+ */
+ get_file(session->sock->file);
+
atomic_inc(&session->terminate);
wake_up_interruptible(sk_sleep(session->sock->sk));
up_write(&cmtp_session_sem);
@@ -495,9 +500,7 @@ static int __init cmtp_init(void)
{
BT_INFO("CMTP (CAPI Emulation) ver %s", VERSION);
- cmtp_init_sockets();
-
- return 0;
+ return cmtp_init_sockets();
}
static void __exit cmtp_exit(void)
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 211c2599ab96..6e5316031916 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1173,6 +1173,23 @@ int hci_conn_check_link_mode(struct hci_conn *conn)
return 0;
}
+ /* AES encryption is required for Level 4:
+ *
+ * BLUETOOTH CORE SPECIFICATION Version 5.2 | Vol 3, Part C
+ * page 1319:
+ *
+ * 128-bit equivalent strength for link and encryption keys
+ * required using FIPS approved algorithms (E0 not allowed,
+ * SAFER+ not allowed, and P-192 not allowed; encryption key
+ * not shortened)
+ */
+ if (conn->sec_level == BT_SECURITY_FIPS &&
+ !test_bit(HCI_CONN_AES_CCM, &conn->flags)) {
+ bt_dev_err(conn->hdev,
+ "Invalid security: Missing AES-CCM usage");
+ return 0;
+ }
+
if (hci_conn_ssp_enabled(conn) &&
!test_bit(HCI_CONN_ENCRYPT, &conn->flags))
return 0;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 5b95477c3453..f88076f55ce9 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -371,12 +371,17 @@ static int hci_req_sync(struct hci_dev *hdev,
{
int ret;
- if (!test_bit(HCI_UP, &hdev->flags))
- return -ENETDOWN;
-
/* Serialize all requests */
hci_req_lock(hdev);
- ret = __hci_req_sync(hdev, req, opt, timeout);
+ /* check the state after obtaing the lock to protect the HCI_UP
+ * against any races from hci_dev_do_close when the controller
+ * gets removed.
+ */
+ if (test_bit(HCI_UP, &hdev->flags))
+ ret = __hci_req_sync(hdev, req, opt, timeout);
+ else
+ ret = -ENETDOWN;
+
hci_req_unlock(hdev);
return ret;
@@ -1352,6 +1357,12 @@ int hci_inquiry(void __user *arg)
goto done;
}
+ /* Restrict maximum inquiry length to 60 seconds */
+ if (ir.length > 60) {
+ err = -EINVAL;
+ goto done;
+ }
+
hci_dev_lock(hdev);
if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX ||
inquiry_cache_empty(hdev) || ir.flags & IREQ_CACHE_FLUSH) {
@@ -1372,8 +1383,10 @@ int hci_inquiry(void __user *arg)
* cleared). If it is interrupted by a signal, return -EINTR.
*/
if (wait_on_bit(&hdev->flags, HCI_INQUIRY,
- TASK_INTERRUPTIBLE))
- return -EINTR;
+ TASK_INTERRUPTIBLE)) {
+ err = -EINTR;
+ goto done;
+ }
}
/* for unlimited number of responses we will use buffer with
@@ -1548,8 +1561,13 @@ static int hci_dev_do_open(struct hci_dev *hdev)
} else {
/* Init failed, cleanup */
flush_work(&hdev->tx_work);
- flush_work(&hdev->cmd_work);
+
+ /* Since hci_rx_work() is possible to awake new cmd_work
+ * it should be flushed first to avoid unexpected call of
+ * hci_cmd_work()
+ */
flush_work(&hdev->rx_work);
+ flush_work(&hdev->cmd_work);
skb_queue_purge(&hdev->cmd_q);
skb_queue_purge(&hdev->rx_q);
@@ -1667,6 +1685,14 @@ int hci_dev_do_close(struct hci_dev *hdev)
hci_req_cancel(hdev, ENODEV);
hci_req_lock(hdev);
+ if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) &&
+ !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
+ test_bit(HCI_UP, &hdev->flags)) {
+ /* Execute vendor specific shutdown routine */
+ if (hdev->shutdown)
+ hdev->shutdown(hdev);
+ }
+
if (!test_and_clear_bit(HCI_UP, &hdev->flags)) {
cancel_delayed_work_sync(&hdev->cmd_timer);
hci_req_unlock(hdev);
@@ -3433,6 +3459,7 @@ int hci_register_dev(struct hci_dev *hdev)
return id;
err_wqueue:
+ debugfs_remove_recursive(hdev->debugfs);
destroy_workqueue(hdev->workqueue);
destroy_workqueue(hdev->req_workqueue);
err:
@@ -3445,14 +3472,10 @@ EXPORT_SYMBOL(hci_register_dev);
/* Unregister HCI device */
void hci_unregister_dev(struct hci_dev *hdev)
{
- int id;
-
BT_DBG("%pK name %s bus %d", hdev, hdev->name, hdev->bus);
hci_dev_set_flag(hdev, HCI_UNREGISTER);
- id = hdev->id;
-
write_lock(&hci_dev_list_lock);
list_del(&hdev->list);
write_unlock(&hci_dev_list_lock);
@@ -3481,7 +3504,14 @@ void hci_unregister_dev(struct hci_dev *hdev)
}
device_del(&hdev->dev);
+ /* Actual cleanup is deferred until hci_cleanup_dev(). */
+ hci_dev_put(hdev);
+}
+EXPORT_SYMBOL(hci_unregister_dev);
+/* Cleanup HCI device */
+void hci_cleanup_dev(struct hci_dev *hdev)
+{
debugfs_remove_recursive(hdev->debugfs);
destroy_workqueue(hdev->workqueue);
@@ -3501,11 +3531,8 @@ void hci_unregister_dev(struct hci_dev *hdev)
hci_discovery_filter_clear(hdev);
hci_dev_unlock(hdev);
- hci_dev_put(hdev);
-
- ida_simple_remove(&hci_index_ida, id);
+ ida_simple_remove(&hci_index_ida, hdev->id);
}
-EXPORT_SYMBOL(hci_unregister_dev);
/* Suspend HCI device */
int hci_suspend_dev(struct hci_dev *hdev)
@@ -4459,7 +4486,14 @@ static void hci_rx_work(struct work_struct *work)
hci_send_to_sock(hdev, skb);
}
- if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
+ /* If the device has been opened in HCI_USER_CHANNEL,
+ * the userspace has exclusive access to device.
+ * When device is HCI_INIT, we still need to process
+ * the data packets to the driver in order
+ * to complete its setup().
+ */
+ if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
+ !test_bit(HCI_INIT, &hdev->flags)) {
kfree_skb(skb);
continue;
}
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index cc1b7488861b..60cddf0b36e6 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -41,12 +41,27 @@
/* Handle HCI Event packets */
-static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb)
+static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb,
+ u8 *new_status)
{
__u8 status = *((__u8 *) skb->data);
BT_DBG("%s status 0x%2.2x", hdev->name, status);
+ /* It is possible that we receive Inquiry Complete event right
+ * before we receive Inquiry Cancel Command Complete event, in
+ * which case the latter event should have status of Command
+ * Disallowed (0x0c). This should not be treated as error, since
+ * we actually achieve what Inquiry Cancel wants to achieve,
+ * which is to end the last Inquiry session.
+ */
+ if (status == 0x0c && !test_bit(HCI_INQUIRY, &hdev->flags)) {
+ bt_dev_warn(hdev, "Ignoring error of Inquiry Cancel command");
+ status = 0x00;
+ }
+
+ *new_status = status;
+
if (status)
return;
@@ -1118,6 +1133,9 @@ static void store_pending_adv_report(struct hci_dev *hdev, bdaddr_t *bdaddr,
{
struct discovery_state *d = &hdev->discovery;
+ if (len > HCI_MAX_AD_LENGTH)
+ return;
+
bacpy(&d->last_adv_addr, bdaddr);
d->last_adv_addr_type = bdaddr_type;
d->last_adv_rssi = rssi;
@@ -2094,7 +2112,7 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s num_rsp %d", hdev->name, num_rsp);
- if (!num_rsp)
+ if (!num_rsp || skb->len < num_rsp * sizeof(*info) + 1)
return;
if (hci_dev_test_flag(hdev, HCI_PERIODIC_INQ))
@@ -2464,7 +2482,7 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
&cp);
} else {
clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
- hci_encrypt_cfm(conn, ev->status, 0x00);
+ hci_encrypt_cfm(conn, ev->status);
}
}
@@ -2550,22 +2568,7 @@ static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status,
conn->enc_key_size = rp->key_size;
}
- if (conn->state == BT_CONFIG) {
- conn->state = BT_CONNECTED;
- hci_connect_cfm(conn, 0);
- hci_conn_drop(conn);
- } else {
- u8 encrypt;
-
- if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags))
- encrypt = 0x00;
- else if (test_bit(HCI_CONN_AES_CCM, &conn->flags))
- encrypt = 0x02;
- else
- encrypt = 0x01;
-
- hci_encrypt_cfm(conn, 0, encrypt);
- }
+ hci_encrypt_cfm(conn, 0);
unlock:
hci_dev_unlock(hdev);
@@ -2612,24 +2615,20 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
+ /* Check link security requirements are met */
+ if (!hci_conn_check_link_mode(conn))
+ ev->status = HCI_ERROR_AUTH_FAILURE;
+
if (ev->status && conn->state == BT_CONNECTED) {
+ /* Notify upper layers so they can cleanup before
+ * disconnecting.
+ */
+ hci_encrypt_cfm(conn, ev->status);
hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
hci_conn_drop(conn);
goto unlock;
}
- /* In Secure Connections Only mode, do not allow any connections
- * that are not encrypted with AES-CCM using a P-256 authenticated
- * combination key.
- */
- if (hci_dev_test_flag(hdev, HCI_SC_ONLY) &&
- (!test_bit(HCI_CONN_AES_CCM, &conn->flags) ||
- conn->key_type != HCI_LK_AUTH_COMBINATION_P256)) {
- hci_connect_cfm(conn, HCI_ERROR_AUTH_FAILURE);
- hci_conn_drop(conn);
- goto unlock;
- }
-
/* Try reading the encryption key size for encrypted ACL links */
if (!ev->status && ev->encrypt && conn->type == ACL_LINK) {
struct hci_cp_read_enc_key_size cp;
@@ -2659,14 +2658,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
}
notify:
- if (conn->state == BT_CONFIG) {
- if (!ev->status)
- conn->state = BT_CONNECTED;
-
- hci_connect_cfm(conn, ev->status);
- hci_conn_drop(conn);
- } else
- hci_encrypt_cfm(conn, ev->status, ev->encrypt);
+ hci_encrypt_cfm(conn, ev->status);
unlock:
hci_dev_unlock(hdev);
@@ -2758,7 +2750,7 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
switch (*opcode) {
case HCI_OP_INQUIRY_CANCEL:
- hci_cc_inquiry_cancel(hdev, skb);
+ hci_cc_inquiry_cancel(hdev, skb, status);
break;
case HCI_OP_PERIODIC_INQ:
@@ -3609,6 +3601,9 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev,
struct inquiry_info_with_rssi_and_pscan_mode *info;
info = (void *) (skb->data + 1);
+ if (skb->len < num_rsp * sizeof(*info) + 1)
+ goto unlock;
+
for (; num_rsp; num_rsp--, info++) {
u32 flags;
@@ -3630,6 +3625,9 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev,
} else {
struct inquiry_info_with_rssi *info = (void *) (skb->data + 1);
+ if (skb->len < num_rsp * sizeof(*info) + 1)
+ goto unlock;
+
for (; num_rsp; num_rsp--, info++) {
u32 flags;
@@ -3650,6 +3648,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev,
}
}
+unlock:
hci_dev_unlock(hdev);
}
@@ -3748,6 +3747,21 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
switch (ev->status) {
case 0x00:
+ /* The synchronous connection complete event should only be
+ * sent once per new connection. Receiving a successful
+ * complete event when the connection status is already
+ * BT_CONNECTED means that the device is misbehaving and sent
+ * multiple complete event packets for the same new connection.
+ *
+ * Registering the device more than once can corrupt kernel
+ * memory, hence upon detecting this invalid event, we report
+ * an error and ignore the packet.
+ */
+ if (conn->state == BT_CONNECTED) {
+ bt_dev_err(hdev, "Ignoring connect complete event for existing connection");
+ goto unlock;
+ }
+
conn->handle = __le16_to_cpu(ev->handle);
conn->state = BT_CONNECTED;
conn->type = ev->link_type;
@@ -3761,6 +3775,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
case 0x11: /* Unsupported Feature or Parameter Value */
case 0x1c: /* SCO interval rejected */
case 0x1a: /* Unsupported Remote Feature */
+ case 0x1e: /* Invalid LMP Parameters */
case 0x1f: /* Unspecified error */
case 0x20: /* Unsupported LMP Parameter value */
if (conn->out) {
@@ -3811,7 +3826,7 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev,
BT_DBG("%s num_rsp %d", hdev->name, num_rsp);
- if (!num_rsp)
+ if (!num_rsp || skb->len < num_rsp * sizeof(*info) + 1)
return;
if (hci_dev_test_flag(hdev, HCI_PERIODIC_INQ))
@@ -4336,6 +4351,11 @@ static void hci_phy_link_complete_evt(struct hci_dev *hdev,
return;
}
+ if (!hcon->amp_mgr) {
+ hci_dev_unlock(hdev);
+ return;
+ }
+
if (ev->status) {
hci_conn_del(hcon);
hci_dev_unlock(hdev);
@@ -4380,6 +4400,7 @@ static void hci_loglink_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
return;
hchan->handle = le16_to_cpu(ev->handle);
+ hchan->amp = true;
BT_DBG("hcon %pK mgr %pK hchan %pK", hcon, hcon->amp_mgr, hchan);
@@ -4412,7 +4433,7 @@ static void hci_disconn_loglink_complete_evt(struct hci_dev *hdev,
hci_dev_lock(hdev);
hchan = hci_chan_lookup_handle(hdev, le16_to_cpu(ev->handle));
- if (!hchan)
+ if (!hchan || !hchan->amp)
goto unlock;
amp_destroy_logical_link(hchan, ev->reason);
@@ -4729,6 +4750,11 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
u32 flags;
u8 *ptr, real_len;
+ if (len > HCI_MAX_AD_LENGTH) {
+ pr_err_ratelimited("legacy adv larger than 31 bytes");
+ return;
+ }
+
/* Find the end of the data in case the report contains padded zero
* bytes at the end causing an invalid length value.
*
@@ -4789,7 +4815,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
*/
conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, type,
direct_addr);
- if (conn && type == LE_ADV_IND) {
+ if (conn && type == LE_ADV_IND && len <= HCI_MAX_AD_LENGTH) {
/* Store report for later inclusion by
* mgmt_device_connected
*/
@@ -4914,10 +4940,20 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
struct hci_ev_le_advertising_info *ev = ptr;
s8 rssi;
- rssi = ev->data[ev->length];
- process_adv_report(hdev, ev->evt_type, &ev->bdaddr,
- ev->bdaddr_type, NULL, 0, rssi,
- ev->data, ev->length);
+ if (ptr > (void *)skb_tail_pointer(skb) - sizeof(*ev)) {
+ bt_dev_err(hdev, "Malicious advertising data.");
+ break;
+ }
+
+ if (ev->length <= HCI_MAX_AD_LENGTH &&
+ ev->data + ev->length <= skb_tail_pointer(skb)) {
+ rssi = ev->data[ev->length];
+ process_adv_report(hdev, ev->evt_type, &ev->bdaddr,
+ ev->bdaddr_type, NULL, 0, rssi,
+ ev->data, ev->length);
+ } else {
+ bt_dev_err(hdev, "Dropping invalid advertising data");
+ }
ptr += sizeof(*ev) + ev->length + 1;
}
@@ -5105,20 +5141,18 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev,
struct sk_buff *skb)
{
u8 num_reports = skb->data[0];
- void *ptr = &skb->data[1];
+ struct hci_ev_le_direct_adv_info *ev = (void *)&skb->data[1];
- hci_dev_lock(hdev);
+ if (!num_reports || skb->len < num_reports * sizeof(*ev) + 1)
+ return;
- while (num_reports--) {
- struct hci_ev_le_direct_adv_info *ev = ptr;
+ hci_dev_lock(hdev);
+ for (; num_reports; num_reports--, ev++)
process_adv_report(hdev, ev->evt_type, &ev->bdaddr,
ev->bdaddr_type, &ev->direct_addr,
ev->direct_addr_type, ev->rssi, NULL, 0);
- ptr += sizeof(*ev);
- }
-
hci_dev_unlock(hdev);
}
@@ -5222,6 +5256,11 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
u8 status = 0, event = hdr->evt, req_evt = 0;
u16 opcode = HCI_OP_NOP;
+ if (!event) {
+ bt_dev_warn(hdev, "Received unexpected HCI Event 00000000");
+ goto done;
+ }
+
if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->hci.req_event == event) {
struct hci_command_hdr *cmd_hdr = (void *) hdev->sent_cmd->data;
opcode = __le16_to_cpu(cmd_hdr->opcode);
@@ -5433,6 +5472,7 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
req_complete_skb(hdev, status, opcode, orig_skb);
}
+done:
kfree_skb(orig_skb);
kfree_skb(skb);
hdev->stat.evt_rx++;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 4f6f5d89278b..6ab9dda306b9 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -53,6 +53,17 @@ struct hci_pinfo {
unsigned long flags;
};
+static struct hci_dev *hci_hdev_from_sock(struct sock *sk)
+{
+ struct hci_dev *hdev = hci_pi(sk)->hdev;
+
+ if (!hdev)
+ return ERR_PTR(-EBADFD);
+ if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+ return ERR_PTR(-EPIPE);
+ return hdev;
+}
+
void hci_sock_set_flag(struct sock *sk, int nr)
{
set_bit(nr, &hci_pi(sk)->flags);
@@ -322,7 +333,8 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
ni->type = hdev->dev_type;
ni->bus = hdev->bus;
bacpy(&ni->bdaddr, &hdev->bdaddr);
- memcpy(ni->name, hdev->name, 8);
+ memcpy_and_pad(ni->name, sizeof(ni->name), hdev->name,
+ strnlen(hdev->name, sizeof(ni->name)), '\0');
opcode = cpu_to_le16(HCI_MON_NEW_INDEX);
break;
@@ -480,19 +492,13 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event)
if (event == HCI_DEV_UNREG) {
struct sock *sk;
- /* Detach sockets from device */
+ /* Wake up sockets using this dead device */
read_lock(&hci_sk_list.lock);
sk_for_each(sk, &hci_sk_list.head) {
- bh_lock_sock_nested(sk);
if (hci_pi(sk)->hdev == hdev) {
- hci_pi(sk)->hdev = NULL;
sk->sk_err = EPIPE;
- sk->sk_state = BT_OPEN;
sk->sk_state_change(sk);
-
- hci_dev_put(hdev);
}
- bh_unlock_sock(sk);
}
read_unlock(&hci_sk_list.lock);
}
@@ -631,10 +637,10 @@ static int hci_sock_blacklist_del(struct hci_dev *hdev, void __user *arg)
static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
unsigned long arg)
{
- struct hci_dev *hdev = hci_pi(sk)->hdev;
+ struct hci_dev *hdev = hci_hdev_from_sock(sk);
- if (!hdev)
- return -EBADFD;
+ if (IS_ERR(hdev))
+ return PTR_ERR(hdev);
if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
return -EBUSY;
@@ -766,6 +772,18 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
lock_sock(sk);
+ /* Allow detaching from dead device and attaching to alive device, if
+ * the caller wants to re-bind (instead of close) this socket in
+ * response to hci_sock_dev_event(HCI_DEV_UNREG) notification.
+ */
+ hdev = hci_pi(sk)->hdev;
+ if (hdev && hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
+ hci_pi(sk)->hdev = NULL;
+ sk->sk_state = BT_OPEN;
+ hci_dev_put(hdev);
+ }
+ hdev = NULL;
+
if (sk->sk_state == BT_BOUND) {
err = -EALREADY;
goto done;
@@ -937,9 +955,9 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
lock_sock(sk);
- hdev = hci_pi(sk)->hdev;
- if (!hdev) {
- err = -EBADFD;
+ hdev = hci_hdev_from_sock(sk);
+ if (IS_ERR(hdev)) {
+ err = PTR_ERR(hdev);
goto done;
}
@@ -1191,9 +1209,9 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
goto done;
}
- hdev = hci_pi(sk)->hdev;
- if (!hdev) {
- err = -EBADFD;
+ hdev = hci_hdev_from_sock(sk);
+ if (IS_ERR(hdev)) {
+ err = PTR_ERR(hdev);
goto done;
}
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 4f78b28686ff..a76b1371a7fc 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -180,6 +180,9 @@ ATTRIBUTE_GROUPS(bt_host);
static void bt_host_release(struct device *dev)
{
struct hci_dev *hdev = to_hci_dev(dev);
+
+ if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+ hci_cleanup_dev(hdev);
kfree(hdev);
module_put(THIS_MODULE);
}
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index e614940c4e98..8e47a5392129 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -735,7 +735,7 @@ static void hidp_stop(struct hid_device *hid)
hid->claimed = 0;
}
-static struct hid_ll_driver hidp_hid_driver = {
+struct hid_ll_driver hidp_hid_driver = {
.parse = hidp_parse,
.start = hidp_start,
.stop = hidp_stop,
@@ -744,6 +744,7 @@ static struct hid_ll_driver hidp_hid_driver = {
.raw_request = hidp_raw_request,
.output_report = hidp_output_report,
};
+EXPORT_SYMBOL_GPL(hidp_hid_driver);
/* This function sets up the hid device. It does not add it
to the HID system. That is done in hidp_add_connection(). */
@@ -1283,7 +1284,7 @@ static int hidp_session_thread(void *arg)
/* cleanup runtime environment */
remove_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait);
- remove_wait_queue(sk_sleep(session->intr_sock->sk), &ctrl_wait);
+ remove_wait_queue(sk_sleep(session->ctrl_sock->sk), &ctrl_wait);
wake_up_interruptible(&session->report_queue);
hidp_del_timer(session);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 9497acbf9154..820c78945e16 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -403,6 +403,9 @@ static void l2cap_chan_timeout(struct work_struct *work)
BT_DBG("chan %pK state %s", chan, state_to_string(chan->state));
mutex_lock(&conn->chan_lock);
+ /* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling
+ * this work. No need to call l2cap_chan_hold(chan) here again.
+ */
l2cap_chan_lock(chan);
if (chan->state == BT_CONNECTED || chan->state == BT_CONFIG)
@@ -415,12 +418,12 @@ static void l2cap_chan_timeout(struct work_struct *work)
l2cap_chan_close(chan, reason);
- l2cap_chan_unlock(chan);
-
chan->ops->close(chan);
- mutex_unlock(&conn->chan_lock);
+ l2cap_chan_unlock(chan);
l2cap_chan_put(chan);
+
+ mutex_unlock(&conn->chan_lock);
}
struct l2cap_chan *l2cap_chan_create(void)
@@ -431,6 +434,8 @@ struct l2cap_chan *l2cap_chan_create(void)
if (!chan)
return NULL;
+ skb_queue_head_init(&chan->tx_q);
+ skb_queue_head_init(&chan->srej_q);
mutex_init(&chan->lock);
/* Set default lock nesting level */
@@ -496,7 +501,9 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan)
chan->flush_to = L2CAP_DEFAULT_FLUSH_TO;
chan->retrans_timeout = L2CAP_DEFAULT_RETRANS_TO;
chan->monitor_timeout = L2CAP_DEFAULT_MONITOR_TO;
+
chan->conf_state = 0;
+ set_bit(CONF_NOT_COMPLETE, &chan->conf_state);
set_bit(FLAG_FORCE_ACTIVE, &chan->flags);
}
@@ -1714,9 +1721,9 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
l2cap_chan_del(chan, err);
- l2cap_chan_unlock(chan);
-
chan->ops->close(chan);
+
+ l2cap_chan_unlock(chan);
l2cap_chan_put(chan);
}
@@ -4093,7 +4100,8 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
return 0;
}
- if (chan->state != BT_CONFIG && chan->state != BT_CONNECT2) {
+ if (chan->state != BT_CONFIG && chan->state != BT_CONNECT2 &&
+ chan->state != BT_CONNECTED) {
cmd_reject_invalid_cid(conn, cmd->ident, chan->scid,
chan->dcid);
goto unlock;
@@ -4316,6 +4324,7 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn,
return 0;
}
+ l2cap_chan_hold(chan);
l2cap_chan_lock(chan);
rsp.dcid = cpu_to_le16(chan->scid);
@@ -4324,12 +4333,11 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn,
chan->ops->set_shutdown(chan);
- l2cap_chan_hold(chan);
l2cap_chan_del(chan, ECONNRESET);
- l2cap_chan_unlock(chan);
-
chan->ops->close(chan);
+
+ l2cap_chan_unlock(chan);
l2cap_chan_put(chan);
mutex_unlock(&conn->chan_lock);
@@ -4361,20 +4369,21 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn,
return 0;
}
+ l2cap_chan_hold(chan);
l2cap_chan_lock(chan);
if (chan->state != BT_DISCONN) {
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
mutex_unlock(&conn->chan_lock);
return 0;
}
- l2cap_chan_hold(chan);
l2cap_chan_del(chan, 0);
- l2cap_chan_unlock(chan);
-
chan->ops->close(chan);
+
+ l2cap_chan_unlock(chan);
l2cap_chan_put(chan);
mutex_unlock(&conn->chan_lock);
@@ -4898,10 +4907,8 @@ void __l2cap_physical_cfm(struct l2cap_chan *chan, int result)
BT_DBG("chan %pK, result %d, local_amp_id %d, remote_amp_id %d",
chan, result, local_amp_id, remote_amp_id);
- if (chan->state == BT_DISCONN || chan->state == BT_CLOSED) {
- l2cap_chan_unlock(chan);
+ if (chan->state == BT_DISCONN || chan->state == BT_CLOSED)
return;
- }
if (chan->state != BT_CONNECTED) {
l2cap_do_create(chan, result, local_amp_id, remote_amp_id);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index bd3c021932be..6c127f4ac3a2 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1038,7 +1038,7 @@ done:
}
/* Kill socket (only if zapped and orphan)
- * Must be called on unlocked socket.
+ * Must be called on unlocked socket, with l2cap channel lock.
*/
static void l2cap_sock_kill(struct sock *sk)
{
@@ -1189,6 +1189,7 @@ static int l2cap_sock_release(struct socket *sock)
{
struct sock *sk = sock->sk;
int err;
+ struct l2cap_chan *chan;
BT_DBG("sock %pK, sk %pK", sock, sk);
@@ -1198,9 +1199,17 @@ static int l2cap_sock_release(struct socket *sock)
bt_sock_unlink(&l2cap_sk_list, sk);
err = l2cap_sock_shutdown(sock, 2);
+ chan = l2cap_pi(sk)->chan;
+
+ l2cap_chan_hold(chan);
+ l2cap_chan_lock(chan);
sock_orphan(sk);
l2cap_sock_kill(sk);
+
+ l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
+
return err;
}
@@ -1218,12 +1227,15 @@ static void l2cap_sock_cleanup_listen(struct sock *parent)
BT_DBG("child chan %pK state %s", chan,
state_to_string(chan->state));
+ l2cap_chan_hold(chan);
l2cap_chan_lock(chan);
+
__clear_chan_timer(chan);
l2cap_chan_close(chan, ECONNRESET);
- l2cap_chan_unlock(chan);
-
l2cap_sock_kill(sk);
+
+ l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
}
}
@@ -1297,6 +1309,9 @@ static void l2cap_sock_close_cb(struct l2cap_chan *chan)
{
struct sock *sk = chan->data;
+ if (!sk)
+ return;
+
l2cap_sock_kill(sk);
}
@@ -1305,6 +1320,9 @@ static void l2cap_sock_teardown_cb(struct l2cap_chan *chan, int err)
struct sock *sk = chan->data;
struct sock *parent;
+ if (!sk)
+ return;
+
BT_DBG("chan %pK state %s", chan, state_to_string(chan->state));
/* This callback can be called both for server (BT_LISTEN)
@@ -1318,8 +1336,6 @@ static void l2cap_sock_teardown_cb(struct l2cap_chan *chan, int err)
parent = bt_sk(sk)->parent;
- sock_set_flag(sk, SOCK_ZAPPED);
-
switch (chan->state) {
case BT_OPEN:
case BT_BOUND:
@@ -1346,8 +1362,11 @@ static void l2cap_sock_teardown_cb(struct l2cap_chan *chan, int err)
break;
}
-
release_sock(sk);
+
+ /* Only zap after cleanup to avoid use after free race */
+ sock_set_flag(sk, SOCK_ZAPPED);
+
}
static void l2cap_sock_state_change_cb(struct l2cap_chan *chan, int state,
@@ -1473,8 +1492,10 @@ static void l2cap_sock_destruct(struct sock *sk)
{
BT_DBG("sk %pK", sk);
- if (l2cap_pi(sk)->chan)
+ if (l2cap_pi(sk)->chan) {
+ l2cap_pi(sk)->chan->data = NULL;
l2cap_chan_put(l2cap_pi(sk)->chan);
+ }
if (l2cap_pi(sk)->rx_busy_skb) {
kfree_skb(l2cap_pi(sk)->rx_busy_skb);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 1f4a4a790df6..9ffabc81057c 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -212,12 +212,15 @@ static u8 mgmt_status_table[] = {
MGMT_STATUS_TIMEOUT, /* Instant Passed */
MGMT_STATUS_NOT_SUPPORTED, /* Pairing Not Supported */
MGMT_STATUS_FAILED, /* Transaction Collision */
+ MGMT_STATUS_FAILED, /* Reserved for future use */
MGMT_STATUS_INVALID_PARAMS, /* Unacceptable Parameter */
MGMT_STATUS_REJECTED, /* QoS Rejected */
MGMT_STATUS_NOT_SUPPORTED, /* Classification Not Supported */
MGMT_STATUS_REJECTED, /* Insufficient Security */
MGMT_STATUS_INVALID_PARAMS, /* Parameter Out Of Range */
+ MGMT_STATUS_FAILED, /* Reserved for future use */
MGMT_STATUS_BUSY, /* Role Switch Pending */
+ MGMT_STATUS_FAILED, /* Reserved for future use */
MGMT_STATUS_FAILED, /* Slot Violation */
MGMT_STATUS_FAILED, /* Role Switch Failed */
MGMT_STATUS_INVALID_PARAMS, /* EIR Too Large */
@@ -628,7 +631,8 @@ static u32 get_supported_settings(struct hci_dev *hdev)
if (lmp_ssp_capable(hdev)) {
settings |= MGMT_SETTING_SSP;
- settings |= MGMT_SETTING_HS;
+ if (IS_ENABLED(CONFIG_BT_HS))
+ settings |= MGMT_SETTING_HS;
}
if (lmp_sc_capable(hdev))
@@ -2430,6 +2434,10 @@ static int set_hs(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
BT_DBG("request for %s", hdev->name);
+ if (!IS_ENABLED(CONFIG_BT_HS))
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS,
+ MGMT_STATUS_NOT_SUPPORTED);
+
status = mgmt_bredr_support(hdev);
if (status)
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, status);
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 71f8126be12b..d3f29174caa5 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -413,10 +413,8 @@ static int __rfcomm_create_dev(struct sock *sk, void __user *arg)
dlc = rfcomm_dlc_exists(&req.src, &req.dst, req.channel);
if (IS_ERR(dlc))
return PTR_ERR(dlc);
- else if (dlc) {
- rfcomm_dlc_put(dlc);
+ if (dlc)
return -EBUSY;
- }
dlc = rfcomm_dlc_alloc(GFP_KERNEL);
if (!dlc)
return -ENOMEM;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 46c3f086a261..4c20ceaf3089 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -83,7 +83,6 @@ static void sco_sock_timeout(unsigned long arg)
sk->sk_state_change(sk);
bh_unlock_sock(sk);
- sco_sock_kill(sk);
sock_put(sk);
}
@@ -175,7 +174,6 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
sco_sock_clear_timer(sk);
sco_chan_del(sk, err);
bh_unlock_sock(sk);
- sco_sock_kill(sk);
sock_put(sk);
}
@@ -271,7 +269,8 @@ done:
return err;
}
-static int sco_send_frame(struct sock *sk, struct msghdr *msg, int len)
+static int sco_send_frame(struct sock *sk, void *buf, int len,
+ unsigned int msg_flags)
{
struct sco_conn *conn = sco_pi(sk)->conn;
struct sk_buff *skb;
@@ -283,15 +282,11 @@ static int sco_send_frame(struct sock *sk, struct msghdr *msg, int len)
BT_DBG("sk %pK len %d", sk, len);
- skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err);
+ skb = bt_skb_send_alloc(sk, len, msg_flags & MSG_DONTWAIT, &err);
if (!skb)
return err;
- if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
- kfree_skb(skb);
- return -EFAULT;
- }
-
+ memcpy(skb_put(skb, len), buf, len);
hci_send_sco(conn->hcon, skb);
return len;
@@ -392,8 +387,7 @@ static void sco_sock_cleanup_listen(struct sock *parent)
*/
static void sco_sock_kill(struct sock *sk)
{
- if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket ||
- sock_flag(sk, SOCK_DEAD))
+ if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
return;
BT_DBG("sk %pK state %d", sk, sk->sk_state);
@@ -445,7 +439,6 @@ static void sco_sock_close(struct sock *sk)
lock_sock(sk);
__sco_sock_close(sk);
release_sock(sk);
- sco_sock_kill(sk);
}
static void sco_sock_init(struct sock *sk, struct sock *parent)
@@ -704,6 +697,7 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
struct sock *sk = sock->sk;
+ void *buf;
int err;
BT_DBG("sock %pK, sk %pK", sock, sk);
@@ -715,14 +709,24 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg,
if (msg->msg_flags & MSG_OOB)
return -EOPNOTSUPP;
+ buf = kmalloc(len, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ if (memcpy_from_msg(buf, msg, len)) {
+ kfree(buf);
+ return -EFAULT;
+ }
+
lock_sock(sk);
if (sk->sk_state == BT_CONNECTED)
- err = sco_send_frame(sk, msg, len);
+ err = sco_send_frame(sk, buf, len, msg->msg_flags);
else
err = -ENOTCONN;
release_sock(sk);
+ kfree(buf);
return err;
}
@@ -763,6 +767,11 @@ static void sco_conn_defer_accept(struct hci_conn *conn, u16 setting)
cp.max_latency = cpu_to_le16(0xffff);
cp.retrans_effort = 0xff;
break;
+ default:
+ /* use CVSD settings as fallback */
+ cp.max_latency = cpu_to_le16(0xffff);
+ cp.retrans_effort = 0xff;
+ break;
}
hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ,
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index d3114dfd8adf..1d9c517dd3d3 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -2654,6 +2654,15 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
if (skb->len < sizeof(*key))
return SMP_INVALID_PARAMS;
+ /* Check if remote and local public keys are the same and debug key is
+ * not in use.
+ */
+ if (!test_bit(SMP_FLAG_DEBUG_KEY, &smp->flags) &&
+ !crypto_memneq(key, smp->local_pk, 64)) {
+ bt_dev_err(hdev, "Remote and local public keys are identical");
+ return SMP_UNSPECIFIED;
+ }
+
memcpy(smp->remote_pk, key, 64);
if (test_bit(SMP_FLAG_REMOTE_OOB, &smp->flags)) {
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 0346c215ff6a..49c29fa7fd30 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -167,6 +167,7 @@ static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev,
sum.rx_packets += tmp.rx_packets;
}
+ netdev_stats_to_stats64(stats, &dev->stats);
stats->tx_bytes = sum.tx_bytes;
stats->tx_packets = sum.tx_packets;
stats->rx_bytes = sum.rx_bytes;
@@ -200,6 +201,12 @@ static int br_set_mac_address(struct net_device *dev, void *p)
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
+ /* dev_set_mac_addr() can be called by a master device on bridge's
+ * NETDEV_UNREGISTER, but since it's being destroyed do nothing
+ */
+ if (dev->reg_state != NETREG_REGISTERED)
+ return -EBUSY;
+
spin_lock_bh(&br->lock);
if (!ether_addr_equal(dev->dev_addr, addr->sa_data)) {
/* Mac address will be changed in br_stp_change_bridge_id(). */
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 09442e0f7f67..1aa1d3d4979f 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -266,7 +266,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
/* If old entry was unassociated with any port, then delete it. */
f = __br_fdb_get(br, br->dev->dev_addr, 0);
- if (f && f->is_local && !f->dst)
+ if (f && f->is_local && !f->dst && !f->added_by_user)
fdb_delete_local(br, NULL, f);
fdb_insert(br, NULL, newaddr, 0);
@@ -281,7 +281,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
if (!br_vlan_should_use(v))
continue;
f = __br_fdb_get(br, br->dev->dev_addr, v->vid);
- if (f && f->is_local && !f->dst)
+ if (f && f->is_local && !f->dst && !f->added_by_user)
fdb_delete_local(br, NULL, f);
fdb_insert(br, NULL, newaddr, v->vid);
}
@@ -758,20 +758,25 @@ out:
}
/* Update (create or replace) forwarding database entry */
-static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
- __u16 state, __u16 flags, __u16 vid)
+static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
+ const __u8 *addr, __u16 state, __u16 flags, __u16 vid)
{
- struct net_bridge *br = source->br;
struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
struct net_bridge_fdb_entry *fdb;
bool modified = false;
/* If the port cannot learn allow only local and static entries */
- if (!(state & NUD_PERMANENT) && !(state & NUD_NOARP) &&
+ if (source && !(state & NUD_PERMANENT) && !(state & NUD_NOARP) &&
!(source->state == BR_STATE_LEARNING ||
source->state == BR_STATE_FORWARDING))
return -EPERM;
+ if (!source && !(state & NUD_PERMANENT)) {
+ pr_info("bridge: RTM_NEWNEIGH %s without NUD_PERMANENT\n",
+ br->dev->name);
+ return -EINVAL;
+ }
+
fdb = fdb_find(head, addr, vid);
if (fdb == NULL) {
if (!(flags & NLM_F_CREATE))
@@ -826,22 +831,28 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
return 0;
}
-static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
- const unsigned char *addr, u16 nlh_flags, u16 vid)
+static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
+ struct net_bridge_port *p, const unsigned char *addr,
+ u16 nlh_flags, u16 vid)
{
int err = 0;
if (ndm->ndm_flags & NTF_USE) {
+ if (!p) {
+ pr_info("bridge: RTM_NEWNEIGH %s with NTF_USE is not supported\n",
+ br->dev->name);
+ return -EINVAL;
+ }
local_bh_disable();
rcu_read_lock();
- br_fdb_update(p->br, p, addr, vid, true);
+ br_fdb_update(br, p, addr, vid, true);
rcu_read_unlock();
local_bh_enable();
} else {
- spin_lock_bh(&p->br->hash_lock);
- err = fdb_add_entry(p, addr, ndm->ndm_state,
+ spin_lock_bh(&br->hash_lock);
+ err = fdb_add_entry(br, p, addr, ndm->ndm_state,
nlh_flags, vid);
- spin_unlock_bh(&p->br->hash_lock);
+ spin_unlock_bh(&br->hash_lock);
}
return err;
@@ -878,6 +889,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
dev->name);
return -EINVAL;
}
+ br = p->br;
vg = nbp_vlan_group(p);
}
@@ -889,15 +901,9 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
}
/* VID was specified, so use it. */
- if (dev->priv_flags & IFF_EBRIDGE)
- err = br_fdb_insert(br, NULL, addr, vid);
- else
- err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
+ err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid);
} else {
- if (dev->priv_flags & IFF_EBRIDGE)
- err = br_fdb_insert(br, NULL, addr, 0);
- else
- err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
+ err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0);
if (err || !vg || !vg->num_vlans)
goto out;
@@ -908,11 +914,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
list_for_each_entry(v, &vg->vlan_list, vlist) {
if (!br_vlan_should_use(v))
continue;
- if (dev->priv_flags & IFF_EBRIDGE)
- err = br_fdb_insert(br, NULL, addr, v->vid);
- else
- err = __br_fdb_add(ndm, p, addr, nlh_flags,
- v->vid);
+ err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid);
if (err)
goto out;
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index e24754a0e052..920b7c0f1e2d 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -78,13 +78,10 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
- if (dev->flags & IFF_NOARP)
+ if ((dev->flags & IFF_NOARP) ||
+ !pskb_may_pull(skb, arp_hdr_len(dev)))
return;
- if (!pskb_may_pull(skb, arp_hdr_len(dev))) {
- dev->stats.tx_dropped++;
- return;
- }
parp = arp_hdr(skb);
if (parp->ar_pro != htons(ETH_P_IP) ||
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 2ae0451fd634..f580dbaac5a9 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -638,6 +638,9 @@ static unsigned int br_nf_forward_arp(void *priv,
nf_bridge_pull_encap_header(skb);
}
+ if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr))))
+ return NF_DROP;
+
if (arp_hdr(skb)->ar_pln != 4) {
if (IS_VLAN_ARP(skb))
nf_bridge_push_encap_header(skb);
@@ -708,9 +711,17 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
mtu_reserved = nf_bridge_mtu_reduction(skb);
mtu = skb->dev->mtu;
+ if (nf_bridge->pkt_otherhost) {
+ skb->pkt_type = PACKET_OTHERHOST;
+ nf_bridge->pkt_otherhost = false;
+ }
+
if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu)
mtu = nf_bridge->frag_max_size;
+ nf_bridge_update_protocol(skb);
+ nf_bridge_push_encap_header(skb);
+
if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) {
nf_bridge_info_free(skb);
return br_dev_queue_push_xmit(net, sk, skb);
@@ -728,8 +739,6 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
- nf_bridge_update_protocol(skb);
-
data = this_cpu_ptr(&brnf_frag_data_storage);
data->vlan_tci = skb->vlan_tci;
@@ -752,8 +761,6 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
- nf_bridge_update_protocol(skb);
-
data = this_cpu_ptr(&brnf_frag_data_storage);
data->encap_size = nf_bridge_encap_header_len(skb);
data->size = ETH_HLEN + data->encap_size;
@@ -801,8 +808,6 @@ static unsigned int br_nf_post_routing(void *priv,
else
return NF_ACCEPT;
- /* We assume any code from br_dev_queue_push_xmit onwards doesn't care
- * about the value of skb->pkt_type. */
if (skb->pkt_type == PACKET_OTHERHOST) {
skb->pkt_type = PACKET_HOST;
nf_bridge->pkt_otherhost = true;
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index a7953962112a..1b848a45047b 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -225,8 +225,10 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
}
masterv = br_vlan_get_master(br, v->vid);
- if (!masterv)
+ if (!masterv) {
+ err = -ENOMEM;
goto out_filt;
+ }
v->brvlan = masterv;
}
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 517e78befcb2..61a9f1be1263 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -105,6 +105,7 @@ static struct xt_match ebt_limit_mt_reg __read_mostly = {
.match = ebt_limit_mt,
.checkentry = ebt_limit_mt_check,
.matchsize = sizeof(struct ebt_limit_info),
+ .usersize = offsetof(struct ebt_limit_info, prev),
#ifdef CONFIG_COMPAT
.compatsize = sizeof(struct ebt_compat_limit_info),
#endif
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index d9471e3ef216..e7c170949b21 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1883,7 +1883,7 @@ static int ebt_buf_count(struct ebt_entries_buf_state *state, unsigned int sz)
}
static int ebt_buf_add(struct ebt_entries_buf_state *state,
- void *data, unsigned int sz)
+ const void *data, unsigned int sz)
{
if (state->buf_kern_start == NULL)
goto count_only;
@@ -1917,7 +1917,7 @@ enum compat_mwt {
EBT_COMPAT_TARGET,
};
-static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
+static int compat_mtw_from_user(const struct compat_ebt_entry_mwt *mwt,
enum compat_mwt compat_mwt,
struct ebt_entries_buf_state *state,
const unsigned char *base)
@@ -1994,22 +1994,23 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
* return size of all matches, watchers or target, including necessary
* alignment and padding.
*/
-static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
+static int ebt_size_mwt(const struct compat_ebt_entry_mwt *match32,
unsigned int size_left, enum compat_mwt type,
struct ebt_entries_buf_state *state, const void *base)
{
+ const char *buf = (const char *)match32;
int growth = 0;
- char *buf;
if (size_left == 0)
return 0;
- buf = (char *) match32;
-
- while (size_left >= sizeof(*match32)) {
+ do {
struct ebt_entry_match *match_kern;
int ret;
+ if (size_left < sizeof(*match32))
+ return -EINVAL;
+
match_kern = (struct ebt_entry_match *) state->buf_kern_start;
if (match_kern) {
char *tmp;
@@ -2046,22 +2047,18 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
if (match_kern)
match_kern->match_size = ret;
- /* rule should have no remaining data after target */
- if (type == EBT_COMPAT_TARGET && size_left)
- return -EINVAL;
-
match32 = (struct compat_ebt_entry_mwt *) buf;
- }
+ } while (size_left);
return growth;
}
/* called for all ebt_entry structures. */
-static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
+static int size_entry_mwt(const struct ebt_entry *entry, const unsigned char *base,
unsigned int *total,
struct ebt_entries_buf_state *state)
{
- unsigned int i, j, startoff, new_offset = 0;
+ unsigned int i, j, startoff, next_expected_off, new_offset = 0;
/* stores match/watchers/targets & offset of next struct ebt_entry: */
unsigned int offsets[4];
unsigned int *offsets_update = NULL;
@@ -2149,11 +2146,13 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
return ret;
}
- startoff = state->buf_user_offset - startoff;
+ next_expected_off = state->buf_user_offset - startoff;
+ if (next_expected_off != entry->next_offset)
+ return -EINVAL;
- if (WARN_ON(*total < startoff))
+ if (*total < entry->next_offset)
return -EINVAL;
- *total -= startoff;
+ *total -= entry->next_offset;
return 0;
}
@@ -2175,7 +2174,9 @@ static int compat_copy_entries(unsigned char *data, unsigned int size_user,
if (ret < 0)
return ret;
- WARN_ON(size_remaining);
+ if (size_remaining)
+ return -EINVAL;
+
return state->buf_kern_offset;
}
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 6e48aa69fa24..d7af67a3f19c 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -35,6 +35,12 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb,
ether_addr_copy(eth->h_dest, eth_hdr(oldskb)->h_source);
eth->h_proto = eth_hdr(oldskb)->h_proto;
skb_pull(nskb, ETH_HLEN);
+
+ if (skb_vlan_tag_present(oldskb)) {
+ u16 vid = skb_vlan_tag_get(oldskb);
+
+ __vlan_hwaccel_put_tag(nskb, oldskb->vlan_proto, vid);
+ }
}
/* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT)
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index a0443d40d677..a28ffbbf7450 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -303,7 +303,7 @@ static void dev_flowctrl(struct net_device *dev, int on)
caifd_put(caifd);
}
-void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
+int caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
struct cflayer *link_support, int head_room,
struct cflayer **layer,
int (**rcv_func)(struct sk_buff *, struct net_device *,
@@ -314,11 +314,12 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
enum cfcnfg_phy_preference pref;
struct cfcnfg *cfg = get_cfcnfg(dev_net(dev));
struct caif_device_entry_list *caifdevs;
+ int res;
caifdevs = caif_device_list(dev_net(dev));
caifd = caif_device_alloc(dev);
if (!caifd)
- return;
+ return -ENOMEM;
*layer = &caifd->layer;
spin_lock_init(&caifd->flow_lock);
@@ -340,7 +341,7 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
sizeof(caifd->layer.name) - 1);
caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0;
caifd->layer.transmit = transmit;
- cfcnfg_add_phy_layer(cfg,
+ res = cfcnfg_add_phy_layer(cfg,
dev,
&caifd->layer,
pref,
@@ -350,6 +351,7 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
mutex_unlock(&caifdevs->lock);
if (rcv_func)
*rcv_func = receive;
+ return res;
}
EXPORT_SYMBOL(caif_enroll_dev);
@@ -364,6 +366,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
struct cflayer *layer, *link_support;
int head_room = 0;
struct caif_device_entry_list *caifdevs;
+ int res;
cfg = get_cfcnfg(dev_net(dev));
caifdevs = caif_device_list(dev_net(dev));
@@ -389,8 +392,10 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
break;
}
}
- caif_enroll_dev(dev, caifdev, link_support, head_room,
+ res = caif_enroll_dev(dev, caifdev, link_support, head_room,
&layer, NULL);
+ if (res)
+ cfserl_release(link_support);
caifdev->flowctrl = dev_flowctrl;
break;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index aa209b1066c9..3cfd413aa2c8 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -539,7 +539,8 @@ static int caif_seqpkt_sendmsg(struct socket *sock, struct msghdr *msg,
goto err;
ret = -EINVAL;
- if (unlikely(msg->msg_iter.iov->iov_base == NULL))
+ if (unlikely(msg->msg_iter.nr_segs == 0) ||
+ unlikely(msg->msg_iter.iov->iov_base == NULL))
goto err;
noblock = msg->msg_flags & MSG_DONTWAIT;
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index 5cd44f001f64..485dde566c1a 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -116,6 +116,11 @@ static struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN],
return (struct cflayer *) this;
}
+static void cfusbl_release(struct cflayer *layer)
+{
+ kfree(layer);
+}
+
static struct packet_type caif_usb_type __read_mostly = {
.type = cpu_to_be16(ETH_P_802_EX1),
};
@@ -128,6 +133,7 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
struct cflayer *layer, *link_support;
struct usbnet *usbnet;
struct usb_device *usbdev;
+ int res;
/* Check whether we have a NCM device, and find its VID/PID. */
if (!(dev->dev.parent && dev->dev.parent->driver &&
@@ -170,8 +176,11 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
if (dev->num_tx_queues > 1)
pr_warn("USB device uses more than one tx queue\n");
- caif_enroll_dev(dev, &common, link_support, CFUSB_MAX_HEADLEN,
+ res = caif_enroll_dev(dev, &common, link_support, CFUSB_MAX_HEADLEN,
&layer, &caif_usb_type.func);
+ if (res)
+ goto err;
+
if (!pack_added)
dev_add_pack(&caif_usb_type);
pack_added = true;
@@ -181,6 +190,9 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
layer->name[sizeof(layer->name) - 1] = 0;
return 0;
+err:
+ cfusbl_release(link_support);
+ return res;
}
static struct notifier_block caif_device_notifier = {
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index fa39fc298708..c45b531a6cd5 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -455,7 +455,7 @@ unlock:
rcu_read_unlock();
}
-void
+int
cfcnfg_add_phy_layer(struct cfcnfg *cnfg,
struct net_device *dev, struct cflayer *phy_layer,
enum cfcnfg_phy_preference pref,
@@ -464,7 +464,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg,
{
struct cflayer *frml;
struct cfcnfg_phyinfo *phyinfo = NULL;
- int i;
+ int i, res = 0;
u8 phyid;
mutex_lock(&cnfg->lock);
@@ -478,12 +478,15 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg,
goto got_phyid;
}
pr_warn("Too many CAIF Link Layers (max 6)\n");
+ res = -EEXIST;
goto out;
got_phyid:
phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC);
- if (!phyinfo)
+ if (!phyinfo) {
+ res = -ENOMEM;
goto out_err;
+ }
phy_layer->id = phyid;
phyinfo->pref = pref;
@@ -497,8 +500,10 @@ got_phyid:
frml = cffrml_create(phyid, fcs);
- if (!frml)
+ if (!frml) {
+ res = -ENOMEM;
goto out_err;
+ }
phyinfo->frm_layer = frml;
layer_set_up(frml, cnfg->mux);
@@ -516,11 +521,12 @@ got_phyid:
list_add_rcu(&phyinfo->node, &cnfg->phys);
out:
mutex_unlock(&cnfg->lock);
- return;
+ return res;
out_err:
kfree(phyinfo);
mutex_unlock(&cnfg->lock);
+ return res;
}
EXPORT_SYMBOL(cfcnfg_add_phy_layer);
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index ce60f06d76de..af1e1e36dc90 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -31,6 +31,11 @@ static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
int phyid);
+void cfserl_release(struct cflayer *layer)
+{
+ kfree(layer);
+}
+
struct cflayer *cfserl_create(int instance, bool use_stx)
{
struct cfserl *this = kzalloc(sizeof(struct cfserl), GFP_ATOMIC);
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 67a4a36febd1..40f032f62029 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -56,20 +56,6 @@ struct chnl_net {
enum caif_states state;
};
-static void robust_list_del(struct list_head *delete_node)
-{
- struct list_head *list_node;
- struct list_head *n;
- ASSERT_RTNL();
- list_for_each_safe(list_node, n, &chnl_net_list) {
- if (list_node == delete_node) {
- list_del(list_node);
- return;
- }
- }
- WARN_ON(1);
-}
-
static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
{
struct sk_buff *skb;
@@ -371,6 +357,7 @@ static int chnl_net_init(struct net_device *dev)
ASSERT_RTNL();
priv = netdev_priv(dev);
strncpy(priv->name, dev->name, sizeof(priv->name));
+ INIT_LIST_HEAD(&priv->list_field);
return 0;
}
@@ -379,7 +366,7 @@ static void chnl_net_uninit(struct net_device *dev)
struct chnl_net *priv;
ASSERT_RTNL();
priv = netdev_priv(dev);
- robust_list_del(&priv->list_field);
+ list_del_init(&priv->list_field);
}
static const struct net_device_ops netdev_ops = {
@@ -542,7 +529,7 @@ static void __exit chnl_exit_module(void)
rtnl_lock();
list_for_each_safe(list_node, _tmp, &chnl_net_list) {
dev = list_entry(list_node, struct chnl_net, list_field);
- list_del(list_node);
+ list_del_init(list_node);
delete_device(dev);
}
rtnl_unlock();
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 1f15622d3c65..549ee0de456f 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -121,7 +121,7 @@ struct bcm_sock {
struct sock sk;
int bound;
int ifindex;
- struct notifier_block notifier;
+ struct list_head notifier;
struct list_head rx_ops;
struct list_head tx_ops;
unsigned long dropped_usr_msgs;
@@ -129,6 +129,10 @@ struct bcm_sock {
char procname [32]; /* inode number in decimal with \0 */
};
+static LIST_HEAD(bcm_notifier_list);
+static DEFINE_SPINLOCK(bcm_notifier_lock);
+static struct bcm_sock *bcm_busy_notifier;
+
static inline struct bcm_sock *bcm_sk(const struct sock *sk)
{
return (struct bcm_sock *)sk;
@@ -392,6 +396,7 @@ static void bcm_tx_timeout_tsklet(unsigned long data)
if (!op->count && (op->flags & TX_COUNTEVT)) {
/* create notification to user */
+ memset(&msg_head, 0, sizeof(msg_head));
msg_head.opcode = TX_EXPIRED;
msg_head.flags = op->flags;
msg_head.count = op->count;
@@ -439,6 +444,7 @@ static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data)
/* this element is not throttled anymore */
data->can_dlc &= (BCM_CAN_DLC_MASK|RX_RECV);
+ memset(&head, 0, sizeof(head));
head.opcode = RX_CHANGED;
head.flags = op->flags;
head.count = op->count;
@@ -550,6 +556,7 @@ static void bcm_rx_timeout_tsklet(unsigned long data)
struct bcm_msg_head msg_head;
/* create notification to user */
+ memset(&msg_head, 0, sizeof(msg_head));
msg_head.opcode = RX_TIMEOUT;
msg_head.flags = op->flags;
msg_head.count = op->count;
@@ -730,21 +737,21 @@ static struct bcm_op *bcm_find_op(struct list_head *ops, canid_t can_id,
static void bcm_remove_op(struct bcm_op *op)
{
if (op->tsklet.func) {
- while (test_bit(TASKLET_STATE_SCHED, &op->tsklet.state) ||
- test_bit(TASKLET_STATE_RUN, &op->tsklet.state) ||
- hrtimer_active(&op->timer)) {
- hrtimer_cancel(&op->timer);
+ do {
tasklet_kill(&op->tsklet);
- }
+ hrtimer_cancel(&op->timer);
+ } while (test_bit(TASKLET_STATE_SCHED, &op->tsklet.state) ||
+ test_bit(TASKLET_STATE_RUN, &op->tsklet.state) ||
+ hrtimer_active(&op->timer));
}
if (op->thrtsklet.func) {
- while (test_bit(TASKLET_STATE_SCHED, &op->thrtsklet.state) ||
- test_bit(TASKLET_STATE_RUN, &op->thrtsklet.state) ||
- hrtimer_active(&op->thrtimer)) {
- hrtimer_cancel(&op->thrtimer);
+ do {
tasklet_kill(&op->thrtsklet);
- }
+ hrtimer_cancel(&op->thrtimer);
+ } while (test_bit(TASKLET_STATE_SCHED, &op->thrtsklet.state) ||
+ test_bit(TASKLET_STATE_RUN, &op->thrtsklet.state) ||
+ hrtimer_active(&op->thrtimer));
}
if ((op->frames) && (op->frames != &op->sframe))
@@ -806,6 +813,7 @@ static int bcm_delete_rx_op(struct list_head *ops, canid_t can_id, int ifindex)
bcm_rx_handler, op);
list_del(&op->list);
+ synchronize_rcu();
bcm_remove_op(op);
return 1; /* done */
}
@@ -1385,20 +1393,15 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
/*
* notification handler for netdevice status changes
*/
-static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
- void *ptr)
+static void bcm_notify(struct bcm_sock *bo, unsigned long msg,
+ struct net_device *dev)
{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct bcm_sock *bo = container_of(nb, struct bcm_sock, notifier);
struct sock *sk = &bo->sk;
struct bcm_op *op;
int notify_enodev = 0;
if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
-
- if (dev->type != ARPHRD_CAN)
- return NOTIFY_DONE;
+ return;
switch (msg) {
@@ -1433,7 +1436,28 @@ static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
sk->sk_error_report(sk);
}
}
+}
+static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ if (dev->type != ARPHRD_CAN)
+ return NOTIFY_DONE;
+ if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN)
+ return NOTIFY_DONE;
+ if (unlikely(bcm_busy_notifier)) /* Check for reentrant bug. */
+ return NOTIFY_DONE;
+
+ spin_lock(&bcm_notifier_lock);
+ list_for_each_entry(bcm_busy_notifier, &bcm_notifier_list, notifier) {
+ spin_unlock(&bcm_notifier_lock);
+ bcm_notify(bcm_busy_notifier, msg, dev);
+ spin_lock(&bcm_notifier_lock);
+ }
+ bcm_busy_notifier = NULL;
+ spin_unlock(&bcm_notifier_lock);
return NOTIFY_DONE;
}
@@ -1453,9 +1477,9 @@ static int bcm_init(struct sock *sk)
INIT_LIST_HEAD(&bo->rx_ops);
/* set notifier */
- bo->notifier.notifier_call = bcm_notifier;
-
- register_netdevice_notifier(&bo->notifier);
+ spin_lock(&bcm_notifier_lock);
+ list_add_tail(&bo->notifier, &bcm_notifier_list);
+ spin_unlock(&bcm_notifier_lock);
return 0;
}
@@ -1476,7 +1500,14 @@ static int bcm_release(struct socket *sock)
/* remove bcm_ops, timer, rx_unregister(), etc. */
- unregister_netdevice_notifier(&bo->notifier);
+ spin_lock(&bcm_notifier_lock);
+ while (bcm_busy_notifier == bo) {
+ spin_unlock(&bcm_notifier_lock);
+ schedule_timeout_uninterruptible(1);
+ spin_lock(&bcm_notifier_lock);
+ }
+ list_del(&bo->notifier);
+ spin_unlock(&bcm_notifier_lock);
lock_sock(sk);
@@ -1508,9 +1539,13 @@ static int bcm_release(struct socket *sock)
REGMASK(op->can_id),
bcm_rx_handler, op);
- bcm_remove_op(op);
}
+ synchronize_rcu();
+
+ list_for_each_entry_safe(op, next, &bo->rx_ops, list)
+ bcm_remove_op(op);
+
/* remove procfs entry */
if (proc_dir && bo->bcm_proc_read)
remove_proc_entry(bo->procname, proc_dir);
@@ -1662,6 +1697,10 @@ static const struct can_proto bcm_can_proto = {
.prot = &bcm_proto,
};
+static struct notifier_block canbcm_notifier = {
+ .notifier_call = bcm_notifier
+};
+
static int __init bcm_module_init(void)
{
int err;
@@ -1676,6 +1715,8 @@ static int __init bcm_module_init(void)
/* create /proc/net/can-bcm directory */
proc_dir = proc_mkdir("can-bcm", init_net.proc_net);
+ register_netdevice_notifier(&canbcm_notifier);
+
return 0;
}
@@ -1685,6 +1726,8 @@ static void __exit bcm_module_exit(void)
if (proc_dir)
remove_proc_entry("can-bcm", init_net.proc_net);
+
+ unregister_netdevice_notifier(&canbcm_notifier);
}
module_init(bcm_module_init);
diff --git a/net/can/gw.c b/net/can/gw.c
index 81650affa3fa..1867000f8a65 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -497,6 +497,7 @@ static int cgw_notifier(struct notifier_block *nb,
if (gwj->src.dev == dev || gwj->dst.dev == dev) {
hlist_del(&gwj->list);
cgw_unregister_filter(gwj);
+ synchronize_rcu();
kmem_cache_free(cgw_cache, gwj);
}
}
@@ -941,6 +942,7 @@ static void cgw_remove_all_jobs(void)
hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) {
hlist_del(&gwj->list);
cgw_unregister_filter(gwj);
+ synchronize_rcu();
kmem_cache_free(cgw_cache, gwj);
}
}
@@ -1008,6 +1010,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
hlist_del(&gwj->list);
cgw_unregister_filter(gwj);
+ synchronize_rcu();
kmem_cache_free(cgw_cache, gwj);
err = 0;
break;
diff --git a/net/can/raw.c b/net/can/raw.c
index e9403a26a1d5..1c2bf97ca168 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -84,7 +84,7 @@ struct raw_sock {
struct sock sk;
int bound;
int ifindex;
- struct notifier_block notifier;
+ struct list_head notifier;
int loopback;
int recv_own_msgs;
int fd_frames;
@@ -96,6 +96,10 @@ struct raw_sock {
struct uniqframe __percpu *uniq;
};
+static LIST_HEAD(raw_notifier_list);
+static DEFINE_SPINLOCK(raw_notifier_lock);
+static struct raw_sock *raw_busy_notifier;
+
/*
* Return pointer to store the extra msg flags for raw_recvmsg().
* We use the space of one unsigned int beyond the 'struct sockaddr_can'
@@ -260,21 +264,16 @@ static int raw_enable_allfilters(struct net_device *dev, struct sock *sk)
return err;
}
-static int raw_notifier(struct notifier_block *nb,
- unsigned long msg, void *ptr)
+static void raw_notify(struct raw_sock *ro, unsigned long msg,
+ struct net_device *dev)
{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct raw_sock *ro = container_of(nb, struct raw_sock, notifier);
struct sock *sk = &ro->sk;
if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
-
- if (dev->type != ARPHRD_CAN)
- return NOTIFY_DONE;
+ return;
if (ro->ifindex != dev->ifindex)
- return NOTIFY_DONE;
+ return;
switch (msg) {
@@ -303,7 +302,28 @@ static int raw_notifier(struct notifier_block *nb,
sk->sk_error_report(sk);
break;
}
+}
+
+static int raw_notifier(struct notifier_block *nb, unsigned long msg,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ if (dev->type != ARPHRD_CAN)
+ return NOTIFY_DONE;
+ if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN)
+ return NOTIFY_DONE;
+ if (unlikely(raw_busy_notifier)) /* Check for reentrant bug. */
+ return NOTIFY_DONE;
+ spin_lock(&raw_notifier_lock);
+ list_for_each_entry(raw_busy_notifier, &raw_notifier_list, notifier) {
+ spin_unlock(&raw_notifier_lock);
+ raw_notify(raw_busy_notifier, msg, dev);
+ spin_lock(&raw_notifier_lock);
+ }
+ raw_busy_notifier = NULL;
+ spin_unlock(&raw_notifier_lock);
return NOTIFY_DONE;
}
@@ -332,9 +352,9 @@ static int raw_init(struct sock *sk)
return -ENOMEM;
/* set notifier */
- ro->notifier.notifier_call = raw_notifier;
-
- register_netdevice_notifier(&ro->notifier);
+ spin_lock(&raw_notifier_lock);
+ list_add_tail(&ro->notifier, &raw_notifier_list);
+ spin_unlock(&raw_notifier_lock);
return 0;
}
@@ -349,7 +369,14 @@ static int raw_release(struct socket *sock)
ro = raw_sk(sk);
- unregister_netdevice_notifier(&ro->notifier);
+ spin_lock(&raw_notifier_lock);
+ while (raw_busy_notifier == ro) {
+ spin_unlock(&raw_notifier_lock);
+ schedule_timeout_uninterruptible(1);
+ spin_lock(&raw_notifier_lock);
+ }
+ list_del(&ro->notifier);
+ spin_unlock(&raw_notifier_lock);
lock_sock(sk);
@@ -514,10 +541,18 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
return -EFAULT;
}
+ rtnl_lock();
lock_sock(sk);
- if (ro->bound && ro->ifindex)
+ if (ro->bound && ro->ifindex) {
dev = dev_get_by_index(&init_net, ro->ifindex);
+ if (!dev) {
+ if (count > 1)
+ kfree(filter);
+ err = -ENODEV;
+ goto out_fil;
+ }
+ }
if (ro->bound) {
/* (try to) register the new filters */
@@ -554,6 +589,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
dev_put(dev);
release_sock(sk);
+ rtnl_unlock();
break;
@@ -566,10 +602,16 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
err_mask &= CAN_ERR_MASK;
+ rtnl_lock();
lock_sock(sk);
- if (ro->bound && ro->ifindex)
+ if (ro->bound && ro->ifindex) {
dev = dev_get_by_index(&init_net, ro->ifindex);
+ if (!dev) {
+ err = -ENODEV;
+ goto out_err;
+ }
+ }
/* remove current error mask */
if (ro->bound) {
@@ -591,6 +633,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
dev_put(dev);
release_sock(sk);
+ rtnl_unlock();
break;
@@ -857,6 +900,10 @@ static const struct can_proto raw_can_proto = {
.prot = &raw_proto,
};
+static struct notifier_block canraw_notifier = {
+ .notifier_call = raw_notifier
+};
+
static __init int raw_module_init(void)
{
int err;
@@ -866,6 +913,8 @@ static __init int raw_module_init(void)
err = can_proto_register(&raw_can_proto);
if (err < 0)
printk(KERN_ERR "can: registration of raw protocol failed\n");
+ else
+ register_netdevice_notifier(&canraw_notifier);
return err;
}
@@ -873,6 +922,7 @@ static __init int raw_module_init(void)
static __exit void raw_module_exit(void)
{
can_proto_unregister(&raw_can_proto);
+ unregister_netdevice_notifier(&canraw_notifier);
}
module_init(raw_module_init);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 3ed2796d008b..3fbc312e43ce 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2976,6 +2976,11 @@ static void con_fault(struct ceph_connection *con)
ceph_msg_put(con->in_msg);
con->in_msg = NULL;
}
+ if (con->out_msg) {
+ BUG_ON(con->out_msg->con != con);
+ ceph_msg_put(con->out_msg);
+ con->out_msg = NULL;
+ }
/* Requeue anything that hasn't been acked */
list_splice_init(&con->out_sent, &con->out_queue);
diff --git a/net/compat.c b/net/compat.c
index d67684010455..53e58bfe0dc6 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -159,7 +159,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
if (kcmlen > stackbuf_size)
kcmsg_base = kcmsg = sock_kmalloc(sk, kcmlen, GFP_KERNEL);
if (kcmsg == NULL)
- return -ENOBUFS;
+ return -ENOMEM;
/* Now copy them over neatly. */
memset(kcmsg, 0, kcmlen);
@@ -284,6 +284,7 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm)
break;
}
/* Bump the usage count and install the file. */
+ __receive_sock(fp[i]);
fd_install(new_fd, get_file(fp[i]));
}
@@ -355,7 +356,8 @@ static int do_set_sock_timeout(struct socket *sock, int level,
static int compat_sock_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, unsigned int optlen)
{
- if (optname == SO_ATTACH_FILTER)
+ if (optname == SO_ATTACH_FILTER ||
+ optname == SO_ATTACH_REUSEPORT_CBPF)
return do_set_attach_filter(sock, level, optname,
optval, optlen);
if (!COMPAT_USE_64BIT_TIME &&
diff --git a/net/core/Makefile b/net/core/Makefile
index 0d35bba614a9..5e3c5fe87379 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
- sock_diag.o dev_ioctl.o tso.o
+ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o
obj-$(CONFIG_XFRM) += flow.o
obj-y += net-sysfs.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 1aa1261a5934..79f7b6fe9ce1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -82,6 +82,7 @@
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/mutex.h>
+#include <linux/rwsem.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
@@ -94,6 +95,7 @@
#include <linux/ethtool.h>
#include <linux/notifier.h>
#include <linux/skbuff.h>
+#include <linux/bpf.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <linux/rtnetlink.h>
@@ -187,7 +189,7 @@ static DEFINE_SPINLOCK(napi_hash_lock);
static unsigned int napi_gen_id = NR_CPUS;
static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
-static seqcount_t devnet_rename_seq;
+static DECLARE_RWSEM(devnet_rename_sem);
static inline void dev_base_seq_inc(struct net *net)
{
@@ -864,33 +866,28 @@ EXPORT_SYMBOL(dev_get_by_index);
* @net: network namespace
* @name: a pointer to the buffer where the name will be stored.
* @ifindex: the ifindex of the interface to get the name from.
- *
- * The use of raw_seqcount_begin() and cond_resched() before
- * retrying is required as we want to give the writers a chance
- * to complete when CONFIG_PREEMPT is not set.
*/
int netdev_get_name(struct net *net, char *name, int ifindex)
{
struct net_device *dev;
- unsigned int seq;
+ int ret;
-retry:
- seq = raw_seqcount_begin(&devnet_rename_seq);
+ down_read(&devnet_rename_sem);
rcu_read_lock();
+
dev = dev_get_by_index_rcu(net, ifindex);
if (!dev) {
- rcu_read_unlock();
- return -ENODEV;
+ ret = -ENODEV;
+ goto out;
}
strcpy(name, dev->name);
- rcu_read_unlock();
- if (read_seqcount_retry(&devnet_rename_seq, seq)) {
- cond_resched();
- goto retry;
- }
- return 0;
+ ret = 0;
+out:
+ rcu_read_unlock();
+ up_read(&devnet_rename_sem);
+ return ret;
}
/**
@@ -1155,10 +1152,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
if (dev->flags & IFF_UP)
return -EBUSY;
- write_seqcount_begin(&devnet_rename_seq);
+ down_write(&devnet_rename_sem);
if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
- write_seqcount_end(&devnet_rename_seq);
+ up_write(&devnet_rename_sem);
return 0;
}
@@ -1166,7 +1163,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
err = dev_get_valid_name(net, dev, newname);
if (err < 0) {
- write_seqcount_end(&devnet_rename_seq);
+ up_write(&devnet_rename_sem);
return err;
}
@@ -1181,11 +1178,11 @@ rollback:
if (ret) {
memcpy(dev->name, oldname, IFNAMSIZ);
dev->name_assign_type = old_assign_type;
- write_seqcount_end(&devnet_rename_seq);
+ up_write(&devnet_rename_sem);
return ret;
}
- write_seqcount_end(&devnet_rename_seq);
+ up_write(&devnet_rename_sem);
netdev_adjacent_rename_links(dev, oldname);
@@ -1206,7 +1203,7 @@ rollback:
/* err >= 0 after dev_alloc_name() or stores the first errno */
if (err >= 0) {
err = ret;
- write_seqcount_begin(&devnet_rename_seq);
+ down_write(&devnet_rename_sem);
memcpy(dev->name, oldname, IFNAMSIZ);
memcpy(oldname, newname, IFNAMSIZ);
dev->name_assign_type = old_assign_type;
@@ -1794,19 +1791,14 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable);
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
{
- if (skb_orphan_frags(skb, GFP_ATOMIC) ||
- unlikely(!is_skb_forwardable(dev, skb))) {
- atomic_long_inc(&dev->rx_dropped);
- kfree_skb(skb);
- return NET_RX_DROP;
- }
+ int ret = ____dev_forward_skb(dev, skb);
- skb_scrub_packet(skb, true);
- skb->priority = 0;
- skb->protocol = eth_type_trans(skb, dev);
- skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+ if (likely(!ret)) {
+ skb->protocol = eth_type_trans(skb, dev);
+ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+ }
- return 0;
+ return ret;
}
EXPORT_SYMBOL_GPL(__dev_forward_skb);
@@ -3014,7 +3006,8 @@ static void skb_update_prio(struct sk_buff *skb)
struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
if (!skb->priority && skb->sk && map) {
- unsigned int prioidx = skb->sk->sk_cgrp_prioidx;
+ unsigned int prioidx =
+ sock_cgroup_prioidx(&skb->sk->sk_cgrp_data);
if (prioidx < map->priomap_len)
skb->priority = map->priomap[prioidx];
@@ -3027,8 +3020,6 @@ static void skb_update_prio(struct sk_buff *skb)
DEFINE_PER_CPU(int, xmit_recursion);
EXPORT_SYMBOL(xmit_recursion);
-#define RECURSION_LIMIT 10
-
/**
* dev_loopback_xmit - loop back @skb
* @net: network namespace this loopback is happening in
@@ -3220,8 +3211,8 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
int cpu = smp_processor_id(); /* ok because BHs are off */
if (txq->xmit_lock_owner != cpu) {
-
- if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
+ if (unlikely(__this_cpu_read(xmit_recursion) >
+ XMIT_RECURSION_LIMIT))
goto recursion_alert;
skb = validate_xmit_skb(skb, dev);
@@ -4328,6 +4319,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->free = 0;
NAPI_GRO_CB(skb)->encap_mark = 0;
NAPI_GRO_CB(skb)->recursion_counter = 0;
+ NAPI_GRO_CB(skb)->is_fou = 0;
NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
/* Setup for GRO checksum validation */
@@ -4747,11 +4739,18 @@ EXPORT_SYMBOL(__napi_schedule);
* __napi_schedule_irqoff - schedule for receive
* @n: entry to schedule
*
- * Variant of __napi_schedule() assuming hard irqs are masked
+ * Variant of __napi_schedule() assuming hard irqs are masked.
+ *
+ * On PREEMPT_RT enabled kernels this maps to __napi_schedule()
+ * because the interrupt disabled assumption might not be true
+ * due to force-threaded interrupts and spinlock substitution.
*/
void __napi_schedule_irqoff(struct napi_struct *n)
{
- ____napi_schedule(this_cpu_ptr(&softnet_data), n);
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ ____napi_schedule(this_cpu_ptr(&softnet_data), n);
+ else
+ __napi_schedule(n);
}
EXPORT_SYMBOL(__napi_schedule_irqoff);
@@ -4876,13 +4875,14 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
pr_err_once("netif_napi_add() called with weight %d on device %s\n",
weight, dev->name);
napi->weight = weight;
- list_add(&napi->dev_list, &dev->napi_list);
napi->dev = dev;
#ifdef CONFIG_NETPOLL
spin_lock_init(&napi->poll_lock);
napi->poll_owner = -1;
#endif
set_bit(NAPI_STATE_SCHED, &napi->state);
+ set_bit(NAPI_STATE_NPSVC, &napi->state);
+ list_add_rcu(&napi->dev_list, &dev->napi_list);
}
EXPORT_SYMBOL(netif_napi_add);
@@ -5820,7 +5820,7 @@ EXPORT_SYMBOL(netdev_lower_dev_get_private);
int dev_get_nest_level(struct net_device *dev,
- bool (*type_check)(struct net_device *dev))
+ bool (*type_check)(const struct net_device *dev))
{
struct net_device *lower = NULL;
struct list_head *iter;
@@ -6162,7 +6162,8 @@ static int __dev_set_mtu(struct net_device *dev, int new_mtu)
if (ops->ndo_change_mtu)
return ops->ndo_change_mtu(dev, new_mtu);
- dev->mtu = new_mtu;
+ /* Pairs with all the lockless reads of dev->mtu in the stack */
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
@@ -6327,6 +6328,38 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
EXPORT_SYMBOL(dev_change_proto_down);
/**
+ * dev_change_xdp_fd - set or clear a bpf program for a device rx path
+ * @dev: device
+ * @fd: new program fd or negative value to clear
+ *
+ * Set or clear a bpf program for a device
+ */
+int dev_change_xdp_fd(struct net_device *dev, int fd)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct bpf_prog *prog = NULL;
+ struct netdev_xdp xdp = {};
+ int err;
+
+ if (!ops->ndo_xdp)
+ return -EOPNOTSUPP;
+ if (fd >= 0) {
+ prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+ }
+
+ xdp.command = XDP_SETUP_PROG;
+ xdp.prog = prog;
+ err = ops->ndo_xdp(dev, &xdp);
+ if (err < 0 && prog)
+ bpf_prog_put(prog);
+
+ return err;
+}
+EXPORT_SYMBOL(dev_change_xdp_fd);
+
+/**
* dev_new_index - allocate an ifindex
* @net: the applicable net namespace
*
@@ -6483,11 +6516,13 @@ static void netdev_sync_lower_features(struct net_device *upper,
netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
&feature, lower->name);
lower->wanted_features &= ~feature;
- netdev_update_features(lower);
+ __netdev_update_features(lower);
if (unlikely(lower->features & feature))
netdev_WARN(upper, "failed to disable %pNF on %s!\n",
&feature, lower->name);
+ else
+ netdev_features_change(lower);
}
}
}
@@ -6876,6 +6911,13 @@ int register_netdevice(struct net_device *dev)
rcu_barrier();
dev->reg_state = NETREG_UNREGISTERED;
+ /* We should put the kobject that hold in
+ * netdev_unregister_kobject(), otherwise
+ * the net device cannot be freed when
+ * driver calls free_netdev(), because the
+ * kobject is being hold.
+ */
+ kobject_put(&dev->dev.kobj);
}
/*
* Prevent userspace races by waiting until the network
@@ -7808,7 +7850,7 @@ static void __net_exit default_device_exit(struct net *net)
continue;
/* Leave virtual devices for the generic cleanup */
- if (dev->rtnl_link_ops)
+ if (dev->rtnl_link_ops && !dev->rtnl_link_ops->netns_refund)
continue;
/* Push remaining network devices to init_net */
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index a2270188b864..9bcc6fdade3e 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -159,6 +159,7 @@ static void sched_send_work(unsigned long _data)
static void trace_drop_common(struct sk_buff *skb, void *location)
{
struct net_dm_alert_msg *msg;
+ struct net_dm_drop_point *point;
struct nlmsghdr *nlh;
struct nlattr *nla;
int i;
@@ -177,11 +178,13 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
nlh = (struct nlmsghdr *)dskb->data;
nla = genlmsg_data(nlmsg_data(nlh));
msg = nla_data(nla);
+ point = msg->points;
for (i = 0; i < msg->entries; i++) {
- if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) {
- msg->points[i].count++;
+ if (!memcmp(&location, &point->pc, sizeof(void *))) {
+ point->count++;
goto out;
}
+ point++;
}
if (msg->entries == dm_hit_limit)
goto out;
@@ -190,8 +193,8 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
*/
__nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point));
nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
- memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
- msg->points[msg->entries].count = 1;
+ memcpy(point->pc, &location, sizeof(void *));
+ point->count = 1;
msg->entries++;
if (!timer_pending(&data->send_timer)) {
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 7e4e7deb2542..b0c4440e8514 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -613,6 +613,37 @@ void netdev_rss_key_fill(void *buffer, size_t len)
}
EXPORT_SYMBOL(netdev_rss_key_fill);
+static int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max)
+{
+ u32 dev_size, current_max = 0;
+ u32 *indir;
+ int ret;
+
+ if (!dev->ethtool_ops->get_rxfh_indir_size ||
+ !dev->ethtool_ops->get_rxfh)
+ return -EOPNOTSUPP;
+ dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
+ if (dev_size == 0)
+ return -EOPNOTSUPP;
+
+ indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
+ if (!indir)
+ return -ENOMEM;
+
+ ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL);
+ if (ret)
+ goto out;
+
+ while (dev_size--)
+ current_max = max(current_max, indir[dev_size]);
+
+ *max = current_max;
+
+out:
+ kfree(indir);
+ return ret;
+}
+
static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
void __user *useraddr)
{
@@ -709,6 +740,14 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
}
ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE);
+ if (ret)
+ goto out;
+
+ /* indicate whether rxfh was set to default */
+ if (user_size == 0)
+ dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
+ else
+ dev->priv_flags |= IFF_RXFH_CONFIGURED;
out:
kfree(indir);
@@ -868,6 +907,14 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
}
ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
+ if (ret)
+ goto out;
+
+ /* indicate whether rxfh was set to default */
+ if (rxfh.indir_size == 0)
+ dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
+ else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
+ dev->priv_flags |= IFF_RXFH_CONFIGURED;
out:
kfree(rss_config);
@@ -1210,6 +1257,7 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
void __user *useraddr)
{
struct ethtool_channels channels;
+ u32 max_rx_in_use = 0;
if (!dev->ethtool_ops->set_channels)
return -EOPNOTSUPP;
@@ -1217,6 +1265,13 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
if (copy_from_user(&channels, useraddr, sizeof(channels)))
return -EFAULT;
+ /* ensure the new Rx count fits within the configured Rx flow
+ * indirection table settings */
+ if (netif_is_rxfh_configured(dev) &&
+ !ethtool_get_max_rxfh_channel(dev, &max_rx_in_use) &&
+ (channels.combined_count + channels.rx_count) <= max_rx_in_use)
+ return -EINVAL;
+
return dev->ethtool_ops->set_channels(dev, &channels);
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index cb744a352167..dcd40a44b93d 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -638,7 +638,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
frh = nlmsg_data(nlh);
frh->family = ops->family;
- frh->table = rule->table;
+ frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT;
if (nla_put_u32(skb, FRA_TABLE, rule->table))
goto nla_put_failure;
if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
@@ -765,7 +765,7 @@ static void notify_rule_change(int event, struct fib_rule *rule,
{
struct net *net;
struct sk_buff *skb;
- int err = -ENOBUFS;
+ int err = -ENOMEM;
net = ops->fro_net;
skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
diff --git a/net/core/filter.c b/net/core/filter.c
index 3c5f51198c41..573af321ee93 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -26,6 +26,7 @@
#include <linux/mm.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
+#include <linux/sock_diag.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
@@ -50,6 +51,7 @@
#include <net/cls_cgroup.h>
#include <net/dst_metadata.h>
#include <net/dst.h>
+#include <net/sock_reuseport.h>
/**
* sk_filter_trim_cap - run a packet through a socket filter
@@ -77,6 +79,10 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
return -ENOMEM;
+ err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
+ if (err)
+ return err;
+
err = security_sock_rcv_skb(sk, skb);
if (err)
return err;
@@ -84,7 +90,12 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
rcu_read_lock();
filter = rcu_dereference(sk->sk_filter);
if (filter) {
- unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
+ struct sock *save_sk = skb->sk;
+ unsigned int pkt_len;
+
+ skb->sk = sk;
+ pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
+ skb->sk = save_sk;
err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
}
rcu_read_unlock();
@@ -93,14 +104,13 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
}
EXPORT_SYMBOL(sk_filter_trim_cap);
-static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+BPF_CALL_1(__skb_get_pay_offset, struct sk_buff *, skb)
{
- return skb_get_poff((struct sk_buff *)(unsigned long) ctx);
+ return skb_get_poff(skb);
}
-static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+BPF_CALL_3(__skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
{
- struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
struct nlattr *nla;
if (skb_is_nonlinear(skb))
@@ -119,9 +129,8 @@ static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
return 0;
}
-static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+BPF_CALL_3(__skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
{
- struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
struct nlattr *nla;
if (skb_is_nonlinear(skb))
@@ -144,11 +153,17 @@ static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
return 0;
}
-static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+BPF_CALL_0(__get_raw_cpu_id)
{
return raw_smp_processor_id();
}
+static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
+ .func = __get_raw_cpu_id,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+};
+
static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
struct bpf_insn *insn_buf)
{
@@ -226,9 +241,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
case SKF_AD_OFF + SKF_AD_HATYPE:
BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
- BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0);
- *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
BPF_REG_TMP, BPF_REG_CTX,
offsetof(struct sk_buff, dev));
/* if (tmp != 0) goto pc + 1 */
@@ -348,12 +362,6 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
* jump offsets, 2nd pass remapping:
* new_prog = kmalloc(sizeof(struct bpf_insn) * new_len);
* bpf_convert_filter(old_prog, old_len, new_prog, &new_len);
- *
- * User BPF's register A is mapped to our BPF register 6, user BPF
- * register X is mapped to BPF register 7; frame pointer is always
- * register 10; Context 'void *ctx' is stored in register 1, that is,
- * for socket filters: ctx == 'struct sk_buff *', for seccomp:
- * ctx == 'struct seccomp_data *'.
*/
static int bpf_convert_filter(struct sock_filter *prog, int len,
struct bpf_insn *new_prog, int *new_len)
@@ -381,9 +389,22 @@ do_pass:
new_insn = new_prog;
fp = prog;
- if (new_insn)
- *new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
- new_insn++;
+ /* Classic BPF related prologue emission. */
+ if (new_insn) {
+ /* Classic BPF expects A and X to be reset first. These need
+ * to be guaranteed to be the first two instructions.
+ */
+ *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
+ *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
+
+ /* All programs must keep CTX in callee saved BPF_REG_CTX.
+ * In eBPF case it's done by the compiler, here we need to
+ * do this ourself. Initial CTX is present in BPF_REG_ARG1.
+ */
+ *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
+ } else {
+ new_insn += 3;
+ }
for (i = 0; i < len; fp++, i++) {
struct bpf_insn tmp_insns[6] = { };
@@ -491,14 +512,27 @@ do_pass:
break;
}
- /* Convert JEQ into JNE when 'jump_true' is next insn. */
- if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
- insn->code = BPF_JMP | BPF_JNE | bpf_src;
+ /* Convert some jumps when 'jump_true' is next insn. */
+ if (fp->jt == 0) {
+ switch (BPF_OP(fp->code)) {
+ case BPF_JEQ:
+ insn->code = BPF_JMP | BPF_JNE | bpf_src;
+ break;
+ case BPF_JGT:
+ insn->code = BPF_JMP | BPF_JLE | bpf_src;
+ break;
+ case BPF_JGE:
+ insn->code = BPF_JMP | BPF_JLT | bpf_src;
+ break;
+ default:
+ goto jmp_rest;
+ }
+
target = i + fp->jf + 1;
BPF_EMIT_JMP;
break;
}
-
+jmp_rest:
/* Other jumps are mapped into two insns: Jxx and JA. */
target = i + fp->jt + 1;
insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
@@ -526,12 +560,14 @@ do_pass:
*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
break;
- /* RET_K, RET_A are remaped into 2 insns. */
+ /* RET_K is remaped into 2 insns. RET_A case doesn't need an
+ * extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
+ */
case BPF_RET | BPF_A:
case BPF_RET | BPF_K:
- *insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ?
- BPF_K : BPF_X, BPF_REG_0,
- BPF_REG_A, fp->k);
+ if (BPF_RVAL(fp->code) == BPF_K)
+ *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
+ 0, fp->k);
*insn = BPF_EXIT_INSN();
break;
@@ -996,7 +1032,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
*/
goto out_err_free;
- err = bpf_prog_select_runtime(fp);
+ fp = bpf_prog_select_runtime(fp, &err);
if (err)
goto out_err_free;
@@ -1153,8 +1189,7 @@ void bpf_prog_destroy(struct bpf_prog *fp)
}
EXPORT_SYMBOL_GPL(bpf_prog_destroy);
-static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk,
- bool locked)
+static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
{
struct sk_filter *fp, *old_fp;
@@ -1170,45 +1205,61 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk,
return -ENOMEM;
}
- old_fp = rcu_dereference_protected(sk->sk_filter, locked);
+ old_fp = rcu_dereference_protected(sk->sk_filter,
+ lockdep_sock_is_held(sk));
rcu_assign_pointer(sk->sk_filter, fp);
+
if (old_fp)
sk_filter_uncharge(sk, old_fp);
return 0;
}
-/**
- * sk_attach_filter - attach a socket filter
- * @fprog: the filter program
- * @sk: the socket to use
- *
- * Attach the user's filter code. We first run some sanity checks on
- * it to make sure it does not explode on us later. If an error
- * occurs or there is insufficient memory for the filter a negative
- * errno code is returned. On success the return is zero.
- */
-int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk,
- bool locked)
+static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
+{
+ struct bpf_prog *old_prog;
+ int err;
+
+ if (bpf_prog_size(prog->len) > sysctl_optmem_max)
+ return -ENOMEM;
+
+ if (sk_unhashed(sk) && sk->sk_reuseport) {
+ err = reuseport_alloc(sk);
+ if (err)
+ return err;
+ } else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
+ /* The socket wasn't bound with SO_REUSEPORT */
+ return -EINVAL;
+ }
+
+ old_prog = reuseport_attach_prog(sk, prog);
+ if (old_prog)
+ bpf_prog_destroy(old_prog);
+
+ return 0;
+}
+
+static
+struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
{
unsigned int fsize = bpf_classic_proglen(fprog);
struct bpf_prog *prog;
int err;
if (sock_flag(sk, SOCK_FILTER_LOCKED))
- return -EPERM;
+ return ERR_PTR(-EPERM);
/* Make sure new filter is there and in the right amounts. */
if (!bpf_check_basics_ok(fprog->filter, fprog->len))
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
if (!prog)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
if (copy_from_user(prog->insns, fprog->filter, fsize)) {
__bpf_prog_free(prog);
- return -EFAULT;
+ return ERR_PTR(-EFAULT);
}
prog->len = fprog->len;
@@ -1216,17 +1267,34 @@ int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk,
err = bpf_prog_store_orig_filter(prog, fprog);
if (err) {
__bpf_prog_free(prog);
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
}
/* bpf_prepare_filter() already takes care of freeing
* memory in case something goes wrong.
*/
- prog = bpf_prepare_filter(prog, NULL);
+ return bpf_prepare_filter(prog, NULL);
+}
+
+/**
+ * sk_attach_filter - attach a socket filter
+ * @fprog: the filter program
+ * @sk: the socket to use
+ *
+ * Attach the user's filter code. We first run some sanity checks on
+ * it to make sure it does not explode on us later. If an error
+ * occurs or there is insufficient memory for the filter a negative
+ * errno code is returned. On success the return is zero.
+ */
+int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+{
+ struct bpf_prog *prog = __get_filter(fprog, sk);
+ int err;
+
if (IS_ERR(prog))
return PTR_ERR(prog);
- err = __sk_attach_prog(prog, sk, locked);
+ err = __sk_attach_prog(prog, sk);
if (err < 0) {
__bpf_prog_release(prog);
return err;
@@ -1234,31 +1302,59 @@ int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk,
return 0;
}
-EXPORT_SYMBOL_GPL(__sk_attach_filter);
+EXPORT_SYMBOL_GPL(sk_attach_filter);
-int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+{
+ struct bpf_prog *prog = __get_filter(fprog, sk);
+ int err;
+
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ err = __reuseport_attach_prog(prog, sk);
+ if (err < 0) {
+ __bpf_prog_release(prog);
+ return err;
+ }
+
+ return 0;
+}
+
+static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
{
- return __sk_attach_filter(fprog, sk, sock_owned_by_user(sk));
+ if (sock_flag(sk, SOCK_FILTER_LOCKED))
+ return ERR_PTR(-EPERM);
+
+ return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
}
int sk_attach_bpf(u32 ufd, struct sock *sk)
{
- struct bpf_prog *prog;
+ struct bpf_prog *prog = __get_bpf(ufd, sk);
int err;
- if (sock_flag(sk, SOCK_FILTER_LOCKED))
- return -EPERM;
-
- prog = bpf_prog_get(ufd);
if (IS_ERR(prog))
return PTR_ERR(prog);
- if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
+ err = __sk_attach_prog(prog, sk);
+ if (err < 0) {
bpf_prog_put(prog);
- return -EINVAL;
+ return err;
}
- err = __sk_attach_prog(prog, sk, sock_owned_by_user(sk));
+ return 0;
+}
+
+int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
+{
+ struct bpf_prog *prog = __get_bpf(ufd, sk);
+ int err;
+
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ err = __reuseport_attach_prog(prog, sk);
if (err < 0) {
bpf_prog_put(prog);
return err;
@@ -1267,49 +1363,74 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
return 0;
}
-#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1)
+struct bpf_scratchpad {
+ union {
+ __be32 diff[MAX_BPF_STACK / sizeof(__be32)];
+ u8 buff[MAX_BPF_STACK];
+ };
+};
+
+static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
+
+static inline int __bpf_try_make_writable(struct sk_buff *skb,
+ unsigned int write_len)
+{
+ return skb_ensure_writable(skb, write_len);
+}
+
+static inline int bpf_try_make_writable(struct sk_buff *skb,
+ unsigned int write_len)
+{
+ int err = __bpf_try_make_writable(skb, write_len);
+
+ bpf_compute_data_end(skb);
+ return err;
+}
+
+static int bpf_try_make_head_writable(struct sk_buff *skb)
+{
+ return bpf_try_make_writable(skb, skb_headlen(skb));
+}
+
+static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
+{
+ if (skb_at_tc_ingress(skb))
+ skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
+}
-static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
+static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
+{
+ if (skb_at_tc_ingress(skb))
+ skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
+}
+
+BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
+ const void *, from, u32, len, u64, flags)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1;
- int offset = (int) r2;
- void *from = (void *) (long) r3;
- unsigned int len = (unsigned int) r4;
- char buf[16];
void *ptr;
- /* bpf verifier guarantees that:
- * 'from' pointer points to bpf program stack
- * 'len' bytes of it were initialized
- * 'len' > 0
- * 'skb' is a valid pointer to 'struct sk_buff'
- *
- * so check for invalid 'offset' and too large 'len'
- */
- if (unlikely((u32) offset > 0xffff || len > sizeof(buf)))
+ if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
+ return -EINVAL;
+ if (unlikely(offset > INT_MAX))
return -EFAULT;
- if (unlikely(skb_try_make_writable(skb, offset + len)))
+ if (unlikely(bpf_try_make_writable(skb, offset + len)))
return -EFAULT;
- ptr = skb_header_pointer(skb, offset, len, buf);
- if (unlikely(!ptr))
- return -EFAULT;
-
- if (BPF_RECOMPUTE_CSUM(flags))
- skb_postpull_rcsum(skb, ptr, len);
+ ptr = skb->data + offset;
+ if (flags & BPF_F_RECOMPUTE_CSUM)
+ __skb_postpull_rcsum(skb, ptr, len, offset);
memcpy(ptr, from, len);
- if (ptr == buf)
- /* skb_store_bits cannot return -EFAULT here */
- skb_store_bits(skb, offset, ptr, len);
+ if (flags & BPF_F_RECOMPUTE_CSUM)
+ __skb_postpush_rcsum(skb, ptr, len, offset);
+ if (flags & BPF_F_INVALIDATE_HASH)
+ skb_clear_hash(skb);
- if (BPF_RECOMPUTE_CSUM(flags) && skb->ip_summed == CHECKSUM_COMPLETE)
- skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0));
return 0;
}
-const struct bpf_func_proto bpf_skb_store_bytes_proto = {
+static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
.func = bpf_skb_store_bytes,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1320,26 +1441,78 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = {
.arg5_type = ARG_ANYTHING,
};
-#define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f)
-#define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10)
+BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
+ void *, to, u32, len)
+{
+ void *ptr;
+
+ if (unlikely(offset > INT_MAX))
+ goto err_clear;
+
+ ptr = skb_header_pointer(skb, offset, len, to);
+ if (unlikely(!ptr))
+ goto err_clear;
+ if (ptr != to)
+ memcpy(to, ptr, len);
+
+ return 0;
+err_clear:
+ memset(to, 0, len);
+ return -EFAULT;
+}
+
+static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
+ .func = bpf_skb_load_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_RAW_STACK,
+ .arg4_type = ARG_CONST_STACK_SIZE,
+};
+
+BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
+{
+ /* Idea is the following: should the needed direct read/write
+ * test fail during runtime, we can pull in more data and redo
+ * again, since implicitly, we invalidate previous checks here.
+ *
+ * Or, since we know how much we need to make read/writeable,
+ * this can be done once at the program beginning for direct
+ * access case. By this we overcome limitations of only current
+ * headroom being accessible.
+ */
+ return bpf_try_make_writable(skb, len ? : skb_headlen(skb));
+}
+
+static const struct bpf_func_proto bpf_skb_pull_data_proto = {
+ .func = bpf_skb_pull_data,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
-static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
+BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
+ u64, from, u64, to, u64, flags)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1;
- int offset = (int) r2;
- __sum16 sum, *ptr;
+ __sum16 *ptr;
- if (unlikely((u32) offset > 0xffff))
+ if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
+ return -EINVAL;
+ if (unlikely(offset > 0xffff || offset & 1))
return -EFAULT;
-
- if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
+ if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
return -EFAULT;
- ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
- if (unlikely(!ptr))
- return -EFAULT;
+ ptr = (__sum16 *)(skb->data + offset);
+ switch (flags & BPF_F_HDR_FIELD_MASK) {
+ case 0:
+ if (unlikely(from != 0))
+ return -EINVAL;
- switch (BPF_HEADER_FIELD_SIZE(flags)) {
+ csum_replace_by_diff(ptr, to);
+ break;
case 2:
csum_replace2(ptr, from, to);
break;
@@ -1350,14 +1523,10 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
return -EINVAL;
}
- if (ptr == &sum)
- /* skb_store_bits guaranteed to not return -EFAULT here */
- skb_store_bits(skb, offset, ptr, sizeof(sum));
-
return 0;
}
-const struct bpf_func_proto bpf_l3_csum_replace_proto = {
+static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
.func = bpf_l3_csum_replace,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1368,23 +1537,32 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = {
.arg5_type = ARG_ANYTHING,
};
-static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
+BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
+ u64, from, u64, to, u64, flags)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1;
- bool is_pseudo = !!BPF_IS_PSEUDO_HEADER(flags);
- int offset = (int) r2;
- __sum16 sum, *ptr;
+ bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
+ bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
+ __sum16 *ptr;
- if (unlikely((u32) offset > 0xffff))
+ if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR |
+ BPF_F_HDR_FIELD_MASK)))
+ return -EINVAL;
+ if (unlikely(offset > 0xffff || offset & 1))
return -EFAULT;
- if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
+ if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
return -EFAULT;
- ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
- if (unlikely(!ptr))
- return -EFAULT;
+ ptr = (__sum16 *)(skb->data + offset);
+ if (is_mmzero && !*ptr)
+ return 0;
- switch (BPF_HEADER_FIELD_SIZE(flags)) {
+ switch (flags & BPF_F_HDR_FIELD_MASK) {
+ case 0:
+ if (unlikely(from != 0))
+ return -EINVAL;
+
+ inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
+ break;
case 2:
inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
break;
@@ -1395,14 +1573,12 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
return -EINVAL;
}
- if (ptr == &sum)
- /* skb_store_bits guaranteed to not return -EFAULT here */
- skb_store_bits(skb, offset, ptr, sizeof(sum));
-
+ if (is_mmzero && !*ptr)
+ *ptr = CSUM_MANGLED_0;
return 0;
}
-const struct bpf_func_proto bpf_l4_csum_replace_proto = {
+static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
.func = bpf_l4_csum_replace,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1413,30 +1589,182 @@ const struct bpf_func_proto bpf_l4_csum_replace_proto = {
.arg5_type = ARG_ANYTHING,
};
-#define BPF_IS_REDIRECT_INGRESS(flags) ((flags) & 1)
+BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
+ __be32 *, to, u32, to_size, __wsum, seed)
+{
+ struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
+ u32 diff_size = from_size + to_size;
+ int i, j = 0;
+
+ /* This is quite flexible, some examples:
+ *
+ * from_size == 0, to_size > 0, seed := csum --> pushing data
+ * from_size > 0, to_size == 0, seed := csum --> pulling data
+ * from_size > 0, to_size > 0, seed := 0 --> diffing data
+ *
+ * Even for diffing, from_size and to_size don't need to be equal.
+ */
+ if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
+ diff_size > sizeof(sp->diff)))
+ return -EINVAL;
+
+ for (i = 0; i < from_size / sizeof(__be32); i++, j++)
+ sp->diff[j] = ~from[i];
+ for (i = 0; i < to_size / sizeof(__be32); i++, j++)
+ sp->diff[j] = to[i];
-static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
+ return csum_partial(sp->diff, diff_size, seed);
+}
+
+static const struct bpf_func_proto bpf_csum_diff_proto = {
+ .func = bpf_csum_diff,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_STACK,
+ .arg2_type = ARG_CONST_STACK_SIZE_OR_ZERO,
+ .arg3_type = ARG_PTR_TO_STACK,
+ .arg4_type = ARG_CONST_STACK_SIZE_OR_ZERO,
+ .arg5_type = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum)
+{
+ /* The interface is to be used in combination with bpf_csum_diff()
+ * for direct packet writes. csum rotation for alignment as well
+ * as emulating csum_sub() can be done from the eBPF program.
+ */
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ return (skb->csum = csum_add(skb->csum, csum));
+
+ return -ENOTSUPP;
+}
+
+static const struct bpf_func_proto bpf_csum_update_proto = {
+ .func = bpf_csum_update,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
+static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
+{
+ return dev_forward_skb(dev, skb);
+}
+
+static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
+ struct sk_buff *skb)
+{
+ int ret = ____dev_forward_skb(dev, skb);
+
+ if (likely(!ret)) {
+ skb->dev = dev;
+ ret = netif_rx(skb);
+ }
+
+ return ret;
+}
+
+static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
+{
+ int ret;
+
+ if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) {
+ net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
+ kfree_skb(skb);
+ return -ENETDOWN;
+ }
+
+ skb->dev = dev;
+
+ __this_cpu_inc(xmit_recursion);
+ ret = dev_queue_xmit(skb);
+ __this_cpu_dec(xmit_recursion);
+
+ return ret;
+}
+
+static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
+ u32 flags)
+{
+ /* skb->mac_len is not set on normal egress */
+ unsigned int mlen = skb->network_header - skb->mac_header;
+
+ __skb_pull(skb, mlen);
+
+ /* At ingress, the mac header has already been pulled once.
+ * At egress, skb_pospull_rcsum has to be done in case that
+ * the skb is originated from ingress (i.e. a forwarded skb)
+ * to ensure that rcsum starts at net header.
+ */
+ if (!skb_at_tc_ingress(skb))
+ skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
+ skb_pop_mac_header(skb);
+ skb_reset_mac_len(skb);
+ return flags & BPF_F_INGRESS ?
+ __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
+}
+
+static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
+ u32 flags)
+{
+ bpf_push_mac_rcsum(skb);
+ return flags & BPF_F_INGRESS ?
+ __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
+}
+
+static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
+ u32 flags)
+{
+ switch (dev->type) {
+ case ARPHRD_TUNNEL:
+ case ARPHRD_TUNNEL6:
+ case ARPHRD_SIT:
+ case ARPHRD_IPGRE:
+ case ARPHRD_VOID:
+ case ARPHRD_NONE:
+#ifdef ARPHRD_RAWIP
+ case ARPHRD_RAWIP:
+#endif
+ return __bpf_redirect_no_mac(skb, dev, flags);
+ default:
+ return __bpf_redirect_common(skb, dev, flags);
+ }
+}
+
+BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2;
struct net_device *dev;
+ struct sk_buff *clone;
+ int ret;
+
+ if (unlikely(flags & ~(BPF_F_INGRESS)))
+ return -EINVAL;
dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
if (unlikely(!dev))
return -EINVAL;
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (unlikely(!skb2))
+ clone = skb_clone(skb, GFP_ATOMIC);
+ if (unlikely(!clone))
return -ENOMEM;
- if (BPF_IS_REDIRECT_INGRESS(flags))
- return dev_forward_skb(dev, skb2);
+ /* For direct write, we need to keep the invariant that the skbs
+ * we're dealing with need to be uncloned. Should uncloning fail
+ * here, we need to free the just generated clone to unclone once
+ * again.
+ */
+ ret = bpf_try_make_head_writable(skb);
+ if (unlikely(ret)) {
+ kfree_skb(clone);
+ return -ENOMEM;
+ }
- skb2->dev = dev;
- skb_sender_cpu_clear(skb2);
- return dev_queue_xmit(skb2);
+ return __bpf_redirect(clone, dev, flags);
}
-const struct bpf_func_proto bpf_clone_redirect_proto = {
+static const struct bpf_func_proto bpf_clone_redirect_proto = {
.func = bpf_clone_redirect,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1451,12 +1779,17 @@ struct redirect_info {
};
static DEFINE_PER_CPU(struct redirect_info, redirect_info);
-static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5)
+
+BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ if (unlikely(flags & ~(BPF_F_INGRESS)))
+ return TC_ACT_SHOT;
+
ri->ifindex = ifindex;
ri->flags = flags;
+
return TC_ACT_REDIRECT;
}
@@ -1472,15 +1805,10 @@ int skb_do_redirect(struct sk_buff *skb)
return -EINVAL;
}
- if (BPF_IS_REDIRECT_INGRESS(ri->flags))
- return dev_forward_skb(dev, skb);
-
- skb->dev = dev;
- skb_sender_cpu_clear(skb);
- return dev_queue_xmit(skb);
+ return __bpf_redirect(skb, dev, ri->flags);
}
-const struct bpf_func_proto bpf_redirect_proto = {
+static const struct bpf_func_proto bpf_redirect_proto = {
.func = bpf_redirect,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1488,9 +1816,9 @@ const struct bpf_func_proto bpf_redirect_proto = {
.arg2_type = ARG_ANYTHING,
};
-static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
{
- return task_get_classid((struct sk_buff *) (unsigned long) r1);
+ return task_get_classid(skb);
}
static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
@@ -1500,16 +1828,9 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
-static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb)
{
-#ifdef CONFIG_IP_ROUTE_CLASSID
- const struct dst_entry *dst;
-
- dst = skb_dst((struct sk_buff *) (unsigned long) r1);
- if (dst)
- return dst->tclassid;
-#endif
- return 0;
+ return dst_tclassid(skb);
}
static const struct bpf_func_proto bpf_get_route_realm_proto = {
@@ -1519,16 +1840,54 @@ static const struct bpf_func_proto bpf_get_route_realm_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
-static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
+BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1;
- __be16 vlan_proto = (__force __be16) r2;
+ /* If skb_clear_hash() was called due to mangling, we can
+ * trigger SW recalculation here. Later access to hash
+ * can then use the inline skb->hash via context directly
+ * instead of calling this helper again.
+ */
+ return skb_get_hash(skb);
+}
+
+static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
+ .func = bpf_get_hash_recalc,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb)
+{
+ /* After all direct packet write, this can be used once for
+ * triggering a lazy recalc on next skb_get_hash() invocation.
+ */
+ skb_clear_hash(skb);
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
+ .func = bpf_set_hash_invalid,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
+ u16, vlan_tci)
+{
+ int ret;
if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
vlan_proto != htons(ETH_P_8021AD)))
vlan_proto = htons(ETH_P_8021Q);
- return skb_vlan_push(skb, vlan_proto, vlan_tci);
+ bpf_push_mac_rcsum(skb);
+ ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
+ bpf_pull_mac_rcsum(skb);
+
+ bpf_compute_data_end(skb);
+ return ret;
}
const struct bpf_func_proto bpf_skb_vlan_push_proto = {
@@ -1541,11 +1900,16 @@ const struct bpf_func_proto bpf_skb_vlan_push_proto = {
};
EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto);
-static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ int ret;
+
+ bpf_push_mac_rcsum(skb);
+ ret = skb_vlan_pop(skb);
+ bpf_pull_mac_rcsum(skb);
- return skb_vlan_pop(skb);
+ bpf_compute_data_end(skb);
+ return ret;
}
const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
@@ -1556,59 +1920,487 @@ const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
};
EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto);
+static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
+{
+ /* Caller already did skb_cow() with len as headroom,
+ * so no need to do it here.
+ */
+ skb_push(skb, len);
+ memmove(skb->data, skb->data + len, off);
+ memset(skb->data + off, 0, len);
+
+ /* No skb_postpush_rcsum(skb, skb->data + off, len)
+ * needed here as it does not change the skb->csum
+ * result for checksum complete when summing over
+ * zeroed blocks.
+ */
+ return 0;
+}
+
+static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
+{
+ /* skb_ensure_writable() is not needed here, as we're
+ * already working on an uncloned skb.
+ */
+ if (unlikely(!pskb_may_pull(skb, off + len)))
+ return -ENOMEM;
+
+ skb_postpull_rcsum(skb, skb->data + off, len);
+ memmove(skb->data + len, skb->data, off);
+ __skb_pull(skb, len);
+
+ return 0;
+}
+
+static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
+{
+ bool trans_same = skb->transport_header == skb->network_header;
+ int ret;
+
+ /* There's no need for __skb_push()/__skb_pull() pair to
+ * get to the start of the mac header as we're guaranteed
+ * to always start from here under eBPF.
+ */
+ ret = bpf_skb_generic_push(skb, off, len);
+ if (likely(!ret)) {
+ skb->mac_header -= len;
+ skb->network_header -= len;
+ if (trans_same)
+ skb->transport_header = skb->network_header;
+ }
+
+ return ret;
+}
+
+static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
+{
+ bool trans_same = skb->transport_header == skb->network_header;
+ int ret;
+
+ /* Same here, __skb_push()/__skb_pull() pair not needed. */
+ ret = bpf_skb_generic_pop(skb, off, len);
+ if (likely(!ret)) {
+ skb->mac_header += len;
+ skb->network_header += len;
+ if (trans_same)
+ skb->transport_header = skb->network_header;
+ }
+
+ return ret;
+}
+
+static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
+{
+ const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
+ u32 off = skb->network_header - skb->mac_header;
+ int ret;
+
+ ret = skb_cow(skb, len_diff);
+ if (unlikely(ret < 0))
+ return ret;
+
+ ret = bpf_skb_net_hdr_push(skb, off, len_diff);
+ if (unlikely(ret < 0))
+ return ret;
+
+ if (skb_is_gso(skb)) {
+ /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to
+ * be changed into SKB_GSO_TCPV6.
+ */
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
+ skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
+ }
+
+ /* Header must be checked, and gso_segs recomputed. */
+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+ skb_shinfo(skb)->gso_segs = 0;
+ }
+
+ skb->protocol = htons(ETH_P_IPV6);
+ skb_clear_hash(skb);
+
+ return 0;
+}
+
+static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
+{
+ const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
+ u32 off = skb->network_header - skb->mac_header;
+ int ret;
+
+ ret = skb_unclone(skb, GFP_ATOMIC);
+ if (unlikely(ret < 0))
+ return ret;
+
+ ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
+ if (unlikely(ret < 0))
+ return ret;
+
+ if (skb_is_gso(skb)) {
+ /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to
+ * be changed into SKB_GSO_TCPV4.
+ */
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
+ skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
+ }
+
+ /* Header must be checked, and gso_segs recomputed. */
+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+ skb_shinfo(skb)->gso_segs = 0;
+ }
+
+ skb->protocol = htons(ETH_P_IP);
+ skb_clear_hash(skb);
+
+ return 0;
+}
+
+static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
+{
+ __be16 from_proto = skb->protocol;
+
+ if (from_proto == htons(ETH_P_IP) &&
+ to_proto == htons(ETH_P_IPV6))
+ return bpf_skb_proto_4_to_6(skb);
+
+ if (from_proto == htons(ETH_P_IPV6) &&
+ to_proto == htons(ETH_P_IP))
+ return bpf_skb_proto_6_to_4(skb);
+
+ return -ENOTSUPP;
+}
+
+BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
+ u64, flags)
+{
+ int ret;
+
+ if (unlikely(flags))
+ return -EINVAL;
+
+ /* General idea is that this helper does the basic groundwork
+ * needed for changing the protocol, and eBPF program fills the
+ * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace()
+ * and other helpers, rather than passing a raw buffer here.
+ *
+ * The rationale is to keep this minimal and without a need to
+ * deal with raw packet data. F.e. even if we would pass buffers
+ * here, the program still needs to call the bpf_lX_csum_replace()
+ * helpers anyway. Plus, this way we keep also separation of
+ * concerns, since f.e. bpf_skb_store_bytes() should only take
+ * care of stores.
+ *
+ * Currently, additional options and extension header space are
+ * not supported, but flags register is reserved so we can adapt
+ * that. For offloads, we mark packet as dodgy, so that headers
+ * need to be verified first.
+ */
+ ret = bpf_skb_proto_xlat(skb, proto);
+ bpf_compute_data_end(skb);
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_skb_change_proto_proto = {
+ .func = bpf_skb_change_proto,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type)
+{
+ /* We only allow a restricted subset to be changed for now. */
+ if (unlikely(!skb_pkt_type_ok(skb->pkt_type) ||
+ !skb_pkt_type_ok(pkt_type)))
+ return -EINVAL;
+
+ skb->pkt_type = pkt_type;
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_skb_change_type_proto = {
+ .func = bpf_skb_change_type,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
+static u32 __bpf_skb_min_len(const struct sk_buff *skb)
+{
+ u32 min_len = skb_network_offset(skb);
+
+ if (skb_transport_header_was_set(skb))
+ min_len = skb_transport_offset(skb);
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ min_len = skb_checksum_start_offset(skb) +
+ skb->csum_offset + sizeof(__sum16);
+ return min_len;
+}
+
+#define BPF_SKB_MAX_LEN SKB_MAX_ALLOC
+
+static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
+{
+ unsigned int old_len = skb->len;
+ int ret;
+
+ ret = __skb_grow_rcsum(skb, new_len);
+ if (!ret)
+ memset(skb->data + old_len, 0, new_len - old_len);
+ return ret;
+}
+
+static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
+{
+ return __skb_trim_rcsum(skb, new_len);
+}
+
+BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
+ u64, flags)
+{
+ u32 max_len = BPF_SKB_MAX_LEN;
+ u32 min_len = __bpf_skb_min_len(skb);
+ int ret;
+
+ if (unlikely(flags || new_len > max_len || new_len < min_len))
+ return -EINVAL;
+ if (skb->encapsulation)
+ return -ENOTSUPP;
+
+ /* The basic idea of this helper is that it's performing the
+ * needed work to either grow or trim an skb, and eBPF program
+ * rewrites the rest via helpers like bpf_skb_store_bytes(),
+ * bpf_lX_csum_replace() and others rather than passing a raw
+ * buffer here. This one is a slow path helper and intended
+ * for replies with control messages.
+ *
+ * Like in bpf_skb_change_proto(), we want to keep this rather
+ * minimal and without protocol specifics so that we are able
+ * to separate concerns as in bpf_skb_store_bytes() should only
+ * be the one responsible for writing buffers.
+ *
+ * It's really expected to be a slow path operation here for
+ * control message replies, so we're implicitly linearizing,
+ * uncloning and drop offloads from the skb by this.
+ */
+ ret = __bpf_try_make_writable(skb, skb->len);
+ if (!ret) {
+ if (new_len > skb->len)
+ ret = bpf_skb_grow_rcsum(skb, new_len);
+ else if (new_len < skb->len)
+ ret = bpf_skb_trim_rcsum(skb, new_len);
+ if (!ret && skb_is_gso(skb))
+ skb_gso_reset(skb);
+ }
+
+ bpf_compute_data_end(skb);
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_skb_change_tail_proto = {
+ .func = bpf_skb_change_tail,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
bool bpf_helper_changes_skb_data(void *func)
{
- if (func == bpf_skb_vlan_push)
- return true;
- if (func == bpf_skb_vlan_pop)
- return true;
- if (func == bpf_skb_store_bytes)
- return true;
- if (func == bpf_l3_csum_replace)
- return true;
- if (func == bpf_l4_csum_replace)
+ if (func == bpf_skb_vlan_push ||
+ func == bpf_skb_vlan_pop ||
+ func == bpf_skb_store_bytes ||
+ func == bpf_skb_change_proto ||
+ func == bpf_skb_change_tail ||
+ func == bpf_skb_pull_data ||
+ func == bpf_clone_redirect ||
+ func == bpf_l3_csum_replace ||
+ func == bpf_l4_csum_replace)
return true;
return false;
}
-static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
+static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
+ unsigned long off, unsigned long len)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1;
- struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2;
- struct ip_tunnel_info *info = skb_tunnel_info(skb);
+ void *ptr = skb_header_pointer(skb, off, len, dst_buff);
- if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags || !info))
- return -EINVAL;
- if (ip_tunnel_info_af(info) != AF_INET)
+ if (unlikely(!ptr))
+ return len;
+ if (ptr != dst_buff)
+ memcpy(dst_buff, ptr, len);
+
+ return 0;
+}
+
+BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
+ u64, flags, void *, meta, u64, meta_size)
+{
+ u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
+
+ if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
return -EINVAL;
+ if (unlikely(skb_size > skb->len))
+ return -EFAULT;
+
+ return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
+ bpf_skb_copy);
+}
+
+static const struct bpf_func_proto bpf_skb_event_output_proto = {
+ .func = bpf_skb_event_output,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_STACK,
+ .arg5_type = ARG_CONST_STACK_SIZE,
+};
+
+static unsigned short bpf_tunnel_key_af(u64 flags)
+{
+ return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
+}
+
+BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to,
+ u32, size, u64, flags)
+{
+ const struct ip_tunnel_info *info = skb_tunnel_info(skb);
+ u8 compat[sizeof(struct bpf_tunnel_key)];
+ void *to_orig = to;
+ int err;
+
+ if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) {
+ err = -EINVAL;
+ goto err_clear;
+ }
+ if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) {
+ err = -EPROTO;
+ goto err_clear;
+ }
+ if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
+ err = -EINVAL;
+ switch (size) {
+ case offsetof(struct bpf_tunnel_key, tunnel_label):
+ case offsetof(struct bpf_tunnel_key, tunnel_ext):
+ goto set_compat;
+ case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
+ /* Fixup deprecated structure layouts here, so we have
+ * a common path later on.
+ */
+ if (ip_tunnel_info_af(info) != AF_INET)
+ goto err_clear;
+set_compat:
+ to = (struct bpf_tunnel_key *)compat;
+ break;
+ default:
+ goto err_clear;
+ }
+ }
to->tunnel_id = be64_to_cpu(info->key.tun_id);
- to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
+ to->tunnel_tos = info->key.tos;
+ to->tunnel_ttl = info->key.ttl;
+
+ if (flags & BPF_F_TUNINFO_IPV6) {
+ memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
+ sizeof(to->remote_ipv6));
+ to->tunnel_label = be32_to_cpu(info->key.label);
+ } else {
+ to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
+ }
+
+ if (unlikely(size != sizeof(struct bpf_tunnel_key)))
+ memcpy(to_orig, to, size);
return 0;
+err_clear:
+ memset(to_orig, 0, size);
+ return err;
}
-const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
+static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
.func = bpf_skb_get_tunnel_key,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_STACK,
+ .arg2_type = ARG_PTR_TO_RAW_STACK,
.arg3_type = ARG_CONST_STACK_SIZE,
.arg4_type = ARG_ANYTHING,
};
+BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size)
+{
+ const struct ip_tunnel_info *info = skb_tunnel_info(skb);
+ int err;
+
+ if (unlikely(!info ||
+ !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) {
+ err = -ENOENT;
+ goto err_clear;
+ }
+ if (unlikely(size < info->options_len)) {
+ err = -ENOMEM;
+ goto err_clear;
+ }
+
+ ip_tunnel_info_opts_get(to, info);
+ if (size > info->options_len)
+ memset(to + info->options_len, 0, size - info->options_len);
+
+ return info->options_len;
+err_clear:
+ memset(to, 0, size);
+ return err;
+}
+
+static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
+ .func = bpf_skb_get_tunnel_opt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_RAW_STACK,
+ .arg3_type = ARG_CONST_STACK_SIZE,
+};
+
static struct metadata_dst __percpu *md_dst;
-static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
+BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
+ const struct bpf_tunnel_key *, from, u32, size, u64, flags)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1;
- struct bpf_tunnel_key *from = (struct bpf_tunnel_key *) (long) r2;
struct metadata_dst *md = this_cpu_ptr(md_dst);
+ u8 compat[sizeof(struct bpf_tunnel_key)];
struct ip_tunnel_info *info;
- if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags))
+ if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
+ BPF_F_DONT_FRAGMENT)))
+ return -EINVAL;
+ if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
+ switch (size) {
+ case offsetof(struct bpf_tunnel_key, tunnel_label):
+ case offsetof(struct bpf_tunnel_key, tunnel_ext):
+ case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
+ /* Fixup deprecated structure layouts here, so we have
+ * a common path later on.
+ */
+ memcpy(compat, from, size);
+ memset(compat + size, 0, sizeof(compat) - size);
+ from = (const struct bpf_tunnel_key *) compat;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+ if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) ||
+ from->tunnel_ext))
return -EINVAL;
skb_dst_drop(skb);
@@ -1617,14 +2409,31 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
info = &md->u.tun_info;
info->mode = IP_TUNNEL_INFO_TX;
- info->key.tun_flags = TUNNEL_KEY;
+
+ info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM;
+ if (flags & BPF_F_DONT_FRAGMENT)
+ info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
+
info->key.tun_id = cpu_to_be64(from->tunnel_id);
- info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
+ info->key.tos = from->tunnel_tos;
+ info->key.ttl = from->tunnel_ttl;
+
+ if (flags & BPF_F_TUNINFO_IPV6) {
+ info->mode |= IP_TUNNEL_INFO_IPV6;
+ memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
+ sizeof(from->remote_ipv6));
+ info->key.label = cpu_to_be32(from->tunnel_label) &
+ IPV6_FLOWLABEL_MASK;
+ } else {
+ info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
+ if (flags & BPF_F_ZERO_CSUM_TX)
+ info->key.tun_flags &= ~TUNNEL_CSUM;
+ }
return 0;
}
-const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
+static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
.func = bpf_skb_set_tunnel_key,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1634,19 +2443,145 @@ const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
.arg4_type = ARG_ANYTHING,
};
-static const struct bpf_func_proto *bpf_get_skb_set_tunnel_key_proto(void)
+BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
+ const u8 *, from, u32, size)
+{
+ struct ip_tunnel_info *info = skb_tunnel_info(skb);
+ const struct metadata_dst *md = this_cpu_ptr(md_dst);
+
+ if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
+ return -EINVAL;
+ if (unlikely(size > IP_TUNNEL_OPTS_MAX))
+ return -ENOMEM;
+
+ ip_tunnel_info_opts_set(info, from, size);
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
+ .func = bpf_skb_set_tunnel_opt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_STACK,
+ .arg3_type = ARG_CONST_STACK_SIZE,
+};
+
+static const struct bpf_func_proto *
+bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
{
if (!md_dst) {
- /* race is not possible, since it's called from
- * verifier that is holding verifier mutex
+ /* Race is not possible, since it's called from verifier
+ * that is holding verifier mutex.
*/
- md_dst = metadata_dst_alloc_percpu(0, GFP_KERNEL);
+ md_dst = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
+ GFP_KERNEL);
if (!md_dst)
return NULL;
}
- return &bpf_skb_set_tunnel_key_proto;
+
+ switch (which) {
+ case BPF_FUNC_skb_set_tunnel_key:
+ return &bpf_skb_set_tunnel_key_proto;
+ case BPF_FUNC_skb_set_tunnel_opt:
+ return &bpf_skb_set_tunnel_opt_proto;
+ default:
+ return NULL;
+ }
+}
+
+BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map,
+ u32, idx)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ struct cgroup *cgrp;
+ struct sock *sk;
+
+ sk = skb_to_full_sk(skb);
+ if (!sk || !sk_fullsock(sk))
+ return -ENOENT;
+ if (unlikely(idx >= array->map.max_entries))
+ return -E2BIG;
+
+ cgrp = READ_ONCE(array->ptrs[idx]);
+ if (unlikely(!cgrp))
+ return -EAGAIN;
+
+ return sk_under_cgroup_hierarchy(sk, cgrp);
+}
+
+static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
+ .func = bpf_skb_under_cgroup,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+};
+
+static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
+ unsigned long off, unsigned long len)
+{
+ memcpy(dst_buff, src_buff + off, len);
+ return 0;
+}
+
+BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
+ u64, flags, void *, meta, u64, meta_size)
+{
+ u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
+
+ if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
+ return -EINVAL;
+ if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
+ return -EFAULT;
+
+ return bpf_event_output(map, flags, meta, meta_size, xdp, xdp_size,
+ bpf_xdp_copy);
+}
+
+static const struct bpf_func_proto bpf_xdp_event_output_proto = {
+ .func = bpf_xdp_event_output,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_STACK,
+ .arg5_type = ARG_CONST_STACK_SIZE,
+};
+
+BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
+{
+ return skb->sk ? sock_gen_cookie(skb->sk) : 0;
+}
+
+static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
+ .func = bpf_get_socket_cookie,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
+{
+ struct sock *sk = sk_to_full_sk(skb->sk);
+ kuid_t kuid;
+
+ if (!sk || !sk_fullsock(sk))
+ return overflowuid;
+ kuid = sock_net_uid(sock_net(sk), sk);
+ return from_kuid_munged(sock_net(sk)->user_ns, kuid);
}
+static const struct bpf_func_proto bpf_get_socket_uid_proto = {
+ .func = bpf_get_socket_uid,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
static const struct bpf_func_proto *
sk_filter_func_proto(enum bpf_func_id func_id)
{
@@ -1660,7 +2595,7 @@ sk_filter_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_get_smp_processor_id:
- return &bpf_get_smp_processor_id_proto;
+ return &bpf_get_raw_smp_processor_id_proto;
case BPF_FUNC_tail_call:
return &bpf_tail_call_proto;
case BPF_FUNC_ktime_get_ns:
@@ -1668,6 +2603,12 @@ sk_filter_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_trace_printk:
if (capable(CAP_SYS_ADMIN))
return bpf_get_trace_printk_proto();
+ case BPF_FUNC_get_socket_cookie:
+ return &bpf_get_socket_cookie_proto;
+ case BPF_FUNC_get_socket_uid:
+ return &bpf_get_socket_uid_proto;
+ case BPF_FUNC_get_current_uid_gid:
+ return &bpf_get_current_uid_gid_proto;
default:
return NULL;
}
@@ -1679,6 +2620,14 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
switch (func_id) {
case BPF_FUNC_skb_store_bytes:
return &bpf_skb_store_bytes_proto;
+ case BPF_FUNC_skb_load_bytes:
+ return &bpf_skb_load_bytes_proto;
+ case BPF_FUNC_skb_pull_data:
+ return &bpf_skb_pull_data_proto;
+ case BPF_FUNC_csum_diff:
+ return &bpf_csum_diff_proto;
+ case BPF_FUNC_csum_update:
+ return &bpf_csum_update_proto;
case BPF_FUNC_l3_csum_replace:
return &bpf_l3_csum_replace_proto;
case BPF_FUNC_l4_csum_replace:
@@ -1691,14 +2640,58 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_skb_vlan_push_proto;
case BPF_FUNC_skb_vlan_pop:
return &bpf_skb_vlan_pop_proto;
+ case BPF_FUNC_skb_change_proto:
+ return &bpf_skb_change_proto_proto;
+ case BPF_FUNC_skb_change_type:
+ return &bpf_skb_change_type_proto;
+ case BPF_FUNC_skb_change_tail:
+ return &bpf_skb_change_tail_proto;
case BPF_FUNC_skb_get_tunnel_key:
return &bpf_skb_get_tunnel_key_proto;
case BPF_FUNC_skb_set_tunnel_key:
- return bpf_get_skb_set_tunnel_key_proto();
+ return bpf_get_skb_set_tunnel_proto(func_id);
+ case BPF_FUNC_skb_get_tunnel_opt:
+ return &bpf_skb_get_tunnel_opt_proto;
+ case BPF_FUNC_skb_set_tunnel_opt:
+ return bpf_get_skb_set_tunnel_proto(func_id);
case BPF_FUNC_redirect:
return &bpf_redirect_proto;
case BPF_FUNC_get_route_realm:
return &bpf_get_route_realm_proto;
+ case BPF_FUNC_get_hash_recalc:
+ return &bpf_get_hash_recalc_proto;
+ case BPF_FUNC_set_hash_invalid:
+ return &bpf_set_hash_invalid_proto;
+ case BPF_FUNC_perf_event_output:
+ return &bpf_skb_event_output_proto;
+ case BPF_FUNC_get_smp_processor_id:
+ return &bpf_get_smp_processor_id_proto;
+ case BPF_FUNC_skb_under_cgroup:
+ return &bpf_skb_under_cgroup_proto;
+ default:
+ return sk_filter_func_proto(func_id);
+ }
+}
+
+static const struct bpf_func_proto *
+xdp_func_proto(enum bpf_func_id func_id)
+{
+ switch (func_id) {
+ case BPF_FUNC_perf_event_output:
+ return &bpf_xdp_event_output_proto;
+ case BPF_FUNC_get_smp_processor_id:
+ return &bpf_get_smp_processor_id_proto;
+ default:
+ return sk_filter_func_proto(func_id);
+ }
+}
+
+static const struct bpf_func_proto *
+cg_skb_func_proto(enum bpf_func_id func_id)
+{
+ switch (func_id) {
+ case BPF_FUNC_skb_load_bytes:
+ return &bpf_skb_load_bytes_proto;
default:
return sk_filter_func_proto(func_id);
}
@@ -1706,31 +2699,32 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
static bool __is_valid_access(int off, int size, enum bpf_access_type type)
{
- /* check bounds */
if (off < 0 || off >= sizeof(struct __sk_buff))
return false;
-
- /* disallow misaligned access */
+ /* The verifier guarantees that size > 0. */
if (off % size != 0)
return false;
-
- /* all __sk_buff fields are __u32 */
- if (size != 4)
+ if (size != sizeof(__u32))
return false;
return true;
}
static bool sk_filter_is_valid_access(int off, int size,
- enum bpf_access_type type)
+ enum bpf_access_type type,
+ enum bpf_reg_type *reg_type)
{
- if (off == offsetof(struct __sk_buff, tc_classid))
+ switch (off) {
+ case offsetof(struct __sk_buff, tc_classid):
+ case offsetof(struct __sk_buff, data):
+ case offsetof(struct __sk_buff, data_end):
return false;
+ }
if (type == BPF_WRITE) {
switch (off) {
case offsetof(struct __sk_buff, cb[0]) ...
- offsetof(struct __sk_buff, cb[4]):
+ offsetof(struct __sk_buff, cb[4]):
break;
default:
return false;
@@ -1740,31 +2734,140 @@ static bool sk_filter_is_valid_access(int off, int size,
return __is_valid_access(off, size, type);
}
-static bool tc_cls_act_is_valid_access(int off, int size,
- enum bpf_access_type type)
+static bool sock_filter_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ enum bpf_reg_type *reg_type)
+{
+ if (type == BPF_WRITE) {
+ switch (off) {
+ case offsetof(struct bpf_sock, bound_dev_if):
+ break;
+ default:
+ return false;
+ }
+ }
+
+ if (off < 0 || off + size > sizeof(struct bpf_sock))
+ return false;
+
+ /* The verifier guarantees that size > 0. */
+ if (off % size != 0)
+ return false;
+
+ return true;
+}
+
+static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
+ const struct bpf_prog *prog)
{
- if (off == offsetof(struct __sk_buff, tc_classid))
- return type == BPF_WRITE ? true : false;
+ struct bpf_insn *insn = insn_buf;
+
+ if (!direct_write)
+ return 0;
+
+ /* if (!skb->cloned)
+ * goto start;
+ *
+ * (Fast-path, otherwise approximation that we might be
+ * a clone, do the rest in helper.)
+ */
+ *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET());
+ *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK);
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7);
+
+ /* ret = bpf_skb_pull_data(skb, 0); */
+ *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+ *insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2);
+ *insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_pull_data);
+ /* if (!ret)
+ * goto restore;
+ * return TC_ACT_SHOT;
+ */
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2);
+ *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, TC_ACT_SHOT);
+ *insn++ = BPF_EXIT_INSN();
+ /* restore: */
+ *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
+ /* start: */
+ *insn++ = prog->insnsi[0];
+
+ return insn - insn_buf;
+}
+
+static bool tc_cls_act_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ enum bpf_reg_type *reg_type)
+{
if (type == BPF_WRITE) {
switch (off) {
case offsetof(struct __sk_buff, mark):
case offsetof(struct __sk_buff, tc_index):
case offsetof(struct __sk_buff, priority):
case offsetof(struct __sk_buff, cb[0]) ...
- offsetof(struct __sk_buff, cb[4]):
+ offsetof(struct __sk_buff, cb[4]):
+ case offsetof(struct __sk_buff, tc_classid):
break;
default:
return false;
}
}
+
+ switch (off) {
+ case offsetof(struct __sk_buff, data):
+ *reg_type = PTR_TO_PACKET;
+ break;
+ case offsetof(struct __sk_buff, data_end):
+ *reg_type = PTR_TO_PACKET_END;
+ break;
+ }
+
return __is_valid_access(off, size, type);
}
-static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
- int src_reg, int ctx_off,
- struct bpf_insn *insn_buf,
- struct bpf_prog *prog)
+static bool __is_valid_xdp_access(int off, int size,
+ enum bpf_access_type type)
+{
+ if (off < 0 || off >= sizeof(struct xdp_md))
+ return false;
+ if (off % size != 0)
+ return false;
+ if (size != sizeof(__u32))
+ return false;
+
+ return true;
+}
+
+static bool xdp_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ enum bpf_reg_type *reg_type)
+{
+ if (type == BPF_WRITE)
+ return false;
+
+ switch (off) {
+ case offsetof(struct xdp_md, data):
+ *reg_type = PTR_TO_PACKET;
+ break;
+ case offsetof(struct xdp_md, data_end):
+ *reg_type = PTR_TO_PACKET_END;
+ break;
+ }
+
+ return __is_valid_xdp_access(off, size, type);
+}
+
+void bpf_warn_invalid_xdp_action(u32 act)
+{
+ WARN_ONCE(1, "Illegal XDP return value %u, expect packet loss\n", act);
+}
+EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
+
+static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+ int src_reg, int ctx_off,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
{
struct bpf_insn *insn = insn_buf;
@@ -1811,7 +2914,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
case offsetof(struct __sk_buff, ifindex):
BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
- *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
dst_reg, src_reg,
offsetof(struct sk_buff, dev));
*insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1);
@@ -1852,7 +2955,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
dst_reg, src_reg, insn);
case offsetof(struct __sk_buff, cb[0]) ...
- offsetof(struct __sk_buff, cb[4]):
+ offsetof(struct __sk_buff, cb[4]):
BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
prog->cb_access = 1;
@@ -1869,8 +2972,24 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
ctx_off -= offsetof(struct __sk_buff, tc_classid);
ctx_off += offsetof(struct sk_buff, cb);
ctx_off += offsetof(struct qdisc_skb_cb, tc_classid);
- WARN_ON(type != BPF_WRITE);
- *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
+ else
+ *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
+ break;
+
+ case offsetof(struct __sk_buff, data):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
+ dst_reg, src_reg,
+ offsetof(struct sk_buff, data));
+ break;
+
+ case offsetof(struct __sk_buff, data_end):
+ ctx_off -= offsetof(struct __sk_buff, data_end);
+ ctx_off += offsetof(struct sk_buff, cb);
+ ctx_off += offsetof(struct bpf_skb_data_end, data_end);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), dst_reg, src_reg,
+ ctx_off);
break;
case offsetof(struct __sk_buff, tc_index):
@@ -1896,31 +3015,137 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
return insn - insn_buf;
}
+static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
+ int dst_reg, int src_reg,
+ int ctx_off,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (ctx_off) {
+ case offsetof(struct bpf_sock, bound_dev_if):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
+
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sock, sk_bound_dev_if));
+ else
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sock, sk_bound_dev_if));
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
+static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+ int src_reg, int ctx_off,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (ctx_off) {
+ case offsetof(struct __sk_buff, ifindex):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
+ dst_reg, src_reg,
+ offsetof(struct sk_buff, dev));
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg,
+ offsetof(struct net_device, ifindex));
+ break;
+ default:
+ return sk_filter_convert_ctx_access(type, dst_reg, src_reg,
+ ctx_off, insn_buf, prog);
+ }
+
+ return insn - insn_buf;
+}
+
+static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+ int src_reg, int ctx_off,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (ctx_off) {
+ case offsetof(struct xdp_md, data):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data),
+ dst_reg, src_reg,
+ offsetof(struct xdp_buff, data));
+ break;
+ case offsetof(struct xdp_md, data_end):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
+ dst_reg, src_reg,
+ offsetof(struct xdp_buff, data_end));
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
static const struct bpf_verifier_ops sk_filter_ops = {
- .get_func_proto = sk_filter_func_proto,
- .is_valid_access = sk_filter_is_valid_access,
- .convert_ctx_access = bpf_net_convert_ctx_access,
+ .get_func_proto = sk_filter_func_proto,
+ .is_valid_access = sk_filter_is_valid_access,
+ .convert_ctx_access = sk_filter_convert_ctx_access,
};
static const struct bpf_verifier_ops tc_cls_act_ops = {
- .get_func_proto = tc_cls_act_func_proto,
- .is_valid_access = tc_cls_act_is_valid_access,
- .convert_ctx_access = bpf_net_convert_ctx_access,
+ .get_func_proto = tc_cls_act_func_proto,
+ .is_valid_access = tc_cls_act_is_valid_access,
+ .convert_ctx_access = tc_cls_act_convert_ctx_access,
+ .gen_prologue = tc_cls_act_prologue,
+};
+
+static const struct bpf_verifier_ops xdp_ops = {
+ .get_func_proto = xdp_func_proto,
+ .is_valid_access = xdp_is_valid_access,
+ .convert_ctx_access = xdp_convert_ctx_access,
+};
+
+static const struct bpf_verifier_ops cg_skb_ops = {
+ .get_func_proto = cg_skb_func_proto,
+ .is_valid_access = sk_filter_is_valid_access,
+ .convert_ctx_access = sk_filter_convert_ctx_access,
+};
+
+static const struct bpf_verifier_ops cg_sock_ops = {
+ .get_func_proto = sk_filter_func_proto,
+ .is_valid_access = sock_filter_is_valid_access,
+ .convert_ctx_access = sock_filter_convert_ctx_access,
};
static struct bpf_prog_type_list sk_filter_type __read_mostly = {
- .ops = &sk_filter_ops,
- .type = BPF_PROG_TYPE_SOCKET_FILTER,
+ .ops = &sk_filter_ops,
+ .type = BPF_PROG_TYPE_SOCKET_FILTER,
};
static struct bpf_prog_type_list sched_cls_type __read_mostly = {
- .ops = &tc_cls_act_ops,
- .type = BPF_PROG_TYPE_SCHED_CLS,
+ .ops = &tc_cls_act_ops,
+ .type = BPF_PROG_TYPE_SCHED_CLS,
};
static struct bpf_prog_type_list sched_act_type __read_mostly = {
- .ops = &tc_cls_act_ops,
- .type = BPF_PROG_TYPE_SCHED_ACT,
+ .ops = &tc_cls_act_ops,
+ .type = BPF_PROG_TYPE_SCHED_ACT,
+};
+
+static struct bpf_prog_type_list xdp_type __read_mostly = {
+ .ops = &xdp_ops,
+ .type = BPF_PROG_TYPE_XDP,
+};
+
+static struct bpf_prog_type_list cg_skb_type __read_mostly = {
+ .ops = &cg_skb_ops,
+ .type = BPF_PROG_TYPE_CGROUP_SKB,
+};
+
+static struct bpf_prog_type_list cg_sock_type __read_mostly = {
+ .ops = &cg_sock_ops,
+ .type = BPF_PROG_TYPE_CGROUP_SOCK
};
static int __init register_sk_filter_ops(void)
@@ -1928,12 +3153,15 @@ static int __init register_sk_filter_ops(void)
bpf_register_prog_type(&sk_filter_type);
bpf_register_prog_type(&sched_cls_type);
bpf_register_prog_type(&sched_act_type);
+ bpf_register_prog_type(&xdp_type);
+ bpf_register_prog_type(&cg_skb_type);
+ bpf_register_prog_type(&cg_sock_type);
return 0;
}
late_initcall(register_sk_filter_ops);
-int __sk_detach_filter(struct sock *sk, bool locked)
+int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
struct sk_filter *filter;
@@ -1941,7 +3169,8 @@ int __sk_detach_filter(struct sock *sk, bool locked)
if (sock_flag(sk, SOCK_FILTER_LOCKED))
return -EPERM;
- filter = rcu_dereference_protected(sk->sk_filter, locked);
+ filter = rcu_dereference_protected(sk->sk_filter,
+ lockdep_sock_is_held(sk));
if (filter) {
RCU_INIT_POINTER(sk->sk_filter, NULL);
sk_filter_uncharge(sk, filter);
@@ -1950,12 +3179,7 @@ int __sk_detach_filter(struct sock *sk, bool locked)
return ret;
}
-EXPORT_SYMBOL_GPL(__sk_detach_filter);
-
-int sk_detach_filter(struct sock *sk)
-{
- return __sk_detach_filter(sk, sock_owned_by_user(sk));
-}
+EXPORT_SYMBOL_GPL(sk_detach_filter);
int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
unsigned int len)
@@ -1966,7 +3190,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
lock_sock(sk);
filter = rcu_dereference_protected(sk->sk_filter,
- sock_owned_by_user(sk));
+ lockdep_sock_is_held(sk));
if (!filter)
goto out;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index b4f2c30e8313..9c31d7cde80b 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -180,15 +180,16 @@ ip:
ip_proto = iph->protocol;
- if (!dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_IPV4_ADDRS))
- break;
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+ key_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ target_container);
- key_addrs = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_IPV4_ADDRS, target_container);
- memcpy(&key_addrs->v4addrs, &iph->saddr,
- sizeof(key_addrs->v4addrs));
- key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ memcpy(&key_addrs->v4addrs, &iph->saddr,
+ sizeof(key_addrs->v4addrs));
+ key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ }
if (ip_is_fragment(iph)) {
key_control->flags |= FLOW_DIS_IS_FRAGMENT;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index bfd16b515ab9..31393e30b6b4 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -18,6 +18,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/slab.h>
+#include <linux/kmemleak.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -325,12 +326,14 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
if (!ret)
return NULL;
- if (size <= PAGE_SIZE)
+ if (size <= PAGE_SIZE) {
buckets = kzalloc(size, GFP_ATOMIC);
- else
+ } else {
buckets = (struct neighbour __rcu **)
__get_free_pages(GFP_ATOMIC | __GFP_ZERO,
get_order(size));
+ kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
+ }
if (!buckets) {
kfree(ret);
return NULL;
@@ -350,10 +353,12 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
struct neighbour __rcu **buckets = nht->hash_buckets;
- if (size <= PAGE_SIZE)
+ if (size <= PAGE_SIZE) {
kfree(buckets);
- else
+ } else {
+ kmemleak_free(buckets);
free_pages((unsigned long)buckets, get_order(size));
+ }
kfree(nht);
}
@@ -592,7 +597,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
ASSERT_RTNL();
- n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
+ n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
if (!n)
goto out;
@@ -1053,7 +1058,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
if (update) {
hh = &neigh->hh;
- if (hh->hh_len) {
+ if (READ_ONCE(hh->hh_len)) {
write_seqlock_bh(&hh->hh_lock);
update(hh, neigh->dev, neigh->ha);
write_sequnlock_bh(&hh->hh_lock);
@@ -1229,7 +1234,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
* we can reinject the packet there.
*/
n2 = NULL;
- if (dst) {
+ if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
n2 = dst_neigh_lookup_skb(dst, skb);
if (n2)
n1 = n2;
@@ -1318,7 +1323,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
struct net_device *dev = neigh->dev;
unsigned int seq;
- if (dev->header_ops->cache && !neigh->hh.hh_len)
+ if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
neigh_hh_init(neigh);
do {
@@ -1832,8 +1837,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
goto nla_put_failure;
{
unsigned long now = jiffies;
- unsigned int flush_delta = now - tbl->last_flush;
- unsigned int rand_delta = now - tbl->last_rand;
+ long flush_delta = now - tbl->last_flush;
+ long rand_delta = now - tbl->last_rand;
struct neigh_hash_table *nht;
struct ndt_config ndc = {
.ndtc_key_len = tbl->key_len,
@@ -2793,6 +2798,7 @@ static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
*pos = cpu+1;
return per_cpu_ptr(tbl->stats, cpu);
}
+ (*pos)++;
return NULL;
}
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 77969b71a50a..d6161fba15c3 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -208,12 +208,23 @@ static const struct file_operations softnet_seq_fops = {
.release = seq_release,
};
-static void *ptype_get_idx(loff_t pos)
+static void *ptype_get_idx(struct seq_file *seq, loff_t pos)
{
+ struct list_head *ptype_list = NULL;
struct packet_type *pt = NULL;
+ struct net_device *dev;
loff_t i = 0;
int t;
+ for_each_netdev_rcu(seq_file_net(seq), dev) {
+ ptype_list = &dev->ptype_all;
+ list_for_each_entry_rcu(pt, ptype_list, list) {
+ if (i == pos)
+ return pt;
+ ++i;
+ }
+ }
+
list_for_each_entry_rcu(pt, &ptype_all, list) {
if (i == pos)
return pt;
@@ -234,22 +245,40 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
rcu_read_lock();
- return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
+ return *pos ? ptype_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct net_device *dev;
struct packet_type *pt;
struct list_head *nxt;
int hash;
++*pos;
if (v == SEQ_START_TOKEN)
- return ptype_get_idx(0);
+ return ptype_get_idx(seq, 0);
pt = v;
nxt = pt->list.next;
+ if (pt->dev) {
+ if (nxt != &pt->dev->ptype_all)
+ goto found;
+
+ dev = pt->dev;
+ for_each_netdev_continue_rcu(seq_file_net(seq), dev) {
+ if (!list_empty(&dev->ptype_all)) {
+ nxt = dev->ptype_all.next;
+ goto found;
+ }
+ }
+
+ nxt = ptype_all.next;
+ goto ptype_all;
+ }
+
if (pt->type == htons(ETH_P_ALL)) {
+ptype_all:
if (nxt != &ptype_all)
goto found;
hash = 0;
@@ -278,7 +307,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
if (v == SEQ_START_TOKEN)
seq_puts(seq, "Type Device Function\n");
- else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
+ else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) &&
+ (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) {
if (pt->type == htons(ETH_P_ALL))
seq_puts(seq, "ALL ");
else
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 579d351f6ddd..c976fd132c3b 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -999,7 +999,7 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue,
trans_timeout = queue->trans_timeout;
spin_unlock_irq(&queue->_xmit_lock);
- return sprintf(buf, "%lu", trans_timeout);
+ return sprintf(buf, fmt_ulong, trans_timeout);
}
#ifdef CONFIG_XPS
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 087ce1598b74..3283b97cff8d 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -130,8 +130,10 @@ static void ops_exit_list(const struct pernet_operations *ops,
{
struct net *net;
if (ops->exit) {
- list_for_each_entry(net, net_exit_list, exit_list)
+ list_for_each_entry(net, net_exit_list, exit_list) {
ops->exit(net);
+ cond_resched();
+ }
}
if (ops->exit_batch)
ops->exit_batch(net_exit_list);
@@ -778,7 +780,8 @@ static int __init net_ns_init(void)
mutex_unlock(&net_mutex);
- register_pernet_subsys(&net_ns_ops);
+ if (register_pernet_subsys(&net_ns_ops))
+ panic("Could not register network namespace subsystems");
rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
@@ -1018,11 +1021,17 @@ static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
return 0;
}
+static struct user_namespace *netns_owner(struct ns_common *ns)
+{
+ return to_net_ns(ns)->user_ns;
+}
+
const struct proc_ns_operations netns_operations = {
.name = "net",
.type = CLONE_NEWNET,
.get = netns_get,
.put = netns_put,
.install = netns_install,
+ .owner = netns_owner,
};
#endif
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index d9ee8d08a3a6..0260c84ed83c 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -61,9 +61,12 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n)
int err;
struct socket *sock = sock_from_file(file, &err);
- if (sock)
- sock->sk->sk_classid = (u32)(unsigned long)v;
-
+ if (sock) {
+ spin_lock(&cgroup_sk_update_lock);
+ sock_cgroup_set_classid(&sock->sk->sk_cgrp_data,
+ (unsigned long)v);
+ spin_unlock(&cgroup_sk_update_lock);
+ }
return 0;
}
@@ -100,6 +103,8 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
{
struct cgroup_cls_state *cs = css_cls_state(css);
+ cgroup_sk_alloc_disable();
+
cs->classid = (u32)value;
update_classid(css, (void *)(unsigned long)cs->classid);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 8f445f6e8328..8d612cefb5f3 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/if_vlan.h>
+#include <net/dsa.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <net/addrconf.h>
@@ -178,7 +179,7 @@ static void poll_napi(struct net_device *dev)
{
struct napi_struct *napi;
- list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) {
if (napi->poll_owner != smp_processor_id() &&
spin_trylock(&napi->poll_lock)) {
poll_one_napi(napi);
@@ -661,15 +662,15 @@ EXPORT_SYMBOL_GPL(__netpoll_setup);
int netpoll_setup(struct netpoll *np)
{
- struct net_device *ndev = NULL;
+ struct net_device *ndev = NULL, *dev = NULL;
+ struct net *net = current->nsproxy->net_ns;
struct in_device *in_dev;
int err;
rtnl_lock();
- if (np->dev_name[0]) {
- struct net *net = current->nsproxy->net_ns;
+ if (np->dev_name[0])
ndev = __dev_get_by_name(net, np->dev_name);
- }
+
if (!ndev) {
np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
err = -ENODEV;
@@ -677,6 +678,19 @@ int netpoll_setup(struct netpoll *np)
}
dev_hold(ndev);
+ /* bring up DSA management network devices up first */
+ for_each_netdev(net, dev) {
+ if (!netdev_uses_dsa(dev))
+ continue;
+
+ err = dev_change_flags(dev, dev->flags | IFF_UP);
+ if (err < 0) {
+ np_err(np, "%s failed to open %s\n",
+ np->dev_name, dev->name);
+ goto put;
+ }
+ }
+
if (netdev_master_upper_dev_get(ndev)) {
np_err(np, "%s is a slave device, aborting\n", np->dev_name);
err = -EBUSY;
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 40fd09fe06ae..3c1d9a1a5f7d 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -27,6 +27,12 @@
#include <linux/fdtable.h>
+/*
+ * netprio allocates per-net_device priomap array which is indexed by
+ * css->id. Limiting css ID to 16bits doesn't lose anything.
+ */
+#define NETPRIO_ID_MAX USHRT_MAX
+
#define PRIOMAP_MIN_SZ 128
/*
@@ -144,6 +150,9 @@ static int cgrp_css_online(struct cgroup_subsys_state *css)
struct net_device *dev;
int ret = 0;
+ if (css->id > NETPRIO_ID_MAX)
+ return -ENOSPC;
+
if (!parent_css)
return 0;
@@ -200,6 +209,8 @@ static ssize_t write_priomap(struct kernfs_open_file *of,
if (!dev)
return -ENODEV;
+ cgroup_sk_alloc_disable();
+
rtnl_lock();
ret = netprio_set_prio(of_css(of), dev, prio);
@@ -213,8 +224,12 @@ static int update_netprio(const void *v, struct file *file, unsigned n)
{
int err;
struct socket *sock = sock_from_file(file, &err);
- if (sock)
- sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v;
+ if (sock) {
+ spin_lock(&cgroup_sk_update_lock);
+ sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data,
+ (unsigned long)v);
+ spin_unlock(&cgroup_sk_update_lock);
+ }
return 0;
}
@@ -223,6 +238,8 @@ static void net_prio_attach(struct cgroup_taskset *tset)
struct task_struct *p;
struct cgroup_subsys_state *css;
+ cgroup_sk_alloc_disable();
+
cgroup_taskset_for_each(p, css, tset) {
void *v = (void *)(unsigned long)css->cgroup->id;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 4ea957c1e7ee..5d0759e2102e 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3519,7 +3519,7 @@ static int pktgen_thread_worker(void *arg)
struct pktgen_dev *pkt_dev = NULL;
int cpu = t->cpu;
- BUG_ON(smp_processor_id() != cpu);
+ WARN_ON(smp_processor_id() != cpu);
init_waitqueue_head(&t->queue);
complete(&t->start_done);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a9da58204afa..7d6fe9ba9a24 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -253,6 +253,7 @@ int rtnl_unregister(int protocol, int msgtype)
rtnl_msg_handlers[protocol][msgindex].doit = NULL;
rtnl_msg_handlers[protocol][msgindex].dumpit = NULL;
+ rtnl_msg_handlers[protocol][msgindex].calcit = NULL;
return 0;
}
@@ -2104,7 +2105,7 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
}
if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) {
- __dev_notify_flags(dev, old_flags, 0U);
+ __dev_notify_flags(dev, old_flags, (old_flags ^ dev->flags));
} else {
dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
__dev_notify_flags(dev, old_flags, ~0U);
@@ -3239,6 +3240,10 @@ static int rtnl_bridge_notify(struct net_device *dev)
if (err < 0)
goto errout;
+ /* Notification info is only filled for bridge ports, not the bridge
+ * device itself. Therefore, a zero notification length is valid and
+ * should not result in an error.
+ */
if (!skb->len)
goto errout;
diff --git a/net/core/scm.c b/net/core/scm.c
index dce0acb929f1..2696aefdc148 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -295,8 +295,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
/* Bump the usage count and install the file. */
sock = sock_from_file(fp[i], &err);
if (sock) {
- sock_update_netprioidx(sock->sk);
- sock_update_classid(sock->sk);
+ sock_update_netprioidx(&sock->sk->sk_cgrp_data);
+ sock_update_classid(&sock->sk->sk_cgrp_data);
}
fd_install(new_fd, get_file(fp[i]));
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 47520c651632..15a7a2667e1e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -429,7 +429,11 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
if (IS_ENABLED(CONFIG_FORCE_ALLOC_FROM_DMA_ZONE))
gfp_mask |= GFP_DMA;
- if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
+ /* If requested length is either too small or too big,
+ * we use kmalloc() for skb->head allocation.
+ */
+ if (len <= SKB_WITH_OVERHEAD(1024) ||
+ len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
if (!skb)
@@ -506,13 +510,17 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
gfp_t gfp_mask)
{
- struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+ struct page_frag_cache *nc;
struct sk_buff *skb;
void *data;
len += NET_SKB_PAD + NET_IP_ALIGN;
- if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
+ /* If requested length is either too small or too big,
+ * we use kmalloc() for skb->head allocation.
+ */
+ if (len <= SKB_WITH_OVERHEAD(1024) ||
+ len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
if (!skb)
@@ -520,6 +528,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
goto skb_success;
}
+ nc = this_cpu_ptr(&napi_alloc_cache);
len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
len = SKB_DATA_ALIGN(len);
@@ -1542,6 +1551,12 @@ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len)
skb->csum = csum_block_sub(skb->csum,
skb_checksum(skb, len, delta, 0),
len);
+ } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ int hdlen = (len > skb_headlen(skb)) ? skb_headlen(skb) : len;
+ int offset = skb_checksum_start_offset(skb) + skb->csum_offset;
+
+ if (offset + sizeof(__sum16) > hdlen)
+ return -EINVAL;
}
return __pskb_trim(skb, len);
}
@@ -2254,8 +2269,11 @@ skb_zerocopy_headlen(const struct sk_buff *from)
if (!from->head_frag ||
skb_headlen(from) < L1_CACHE_BYTES ||
- skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
+ skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) {
hlen = skb_headlen(from);
+ if (!hlen)
+ hlen = from->len;
+ }
if (skb_has_frag_list(from))
hlen = from->len;
@@ -2639,7 +2657,19 @@ EXPORT_SYMBOL(skb_split);
*/
static int skb_prepare_for_shift(struct sk_buff *skb)
{
- return skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ int ret = 0;
+
+ if (skb_cloned(skb)) {
+ /* Save and restore truesize: pskb_expand_head() may reallocate
+ * memory where ksize(kmalloc(S)) != ksize(kmalloc(S)), but we
+ * cannot change truesize at this point.
+ */
+ unsigned int save_truesize = skb->truesize;
+
+ ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ skb->truesize = save_truesize;
+ }
+ return ret;
}
/**
@@ -3075,6 +3105,25 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
int pos;
int dummy;
+ if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) &&
+ (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) {
+ /* gso_size is untrusted, and we have a frag_list with a linear
+ * non head_frag head.
+ *
+ * (we assume checking the first list_skb member suffices;
+ * i.e if either of the list_skb members have non head_frag
+ * head, then the first one has too).
+ *
+ * If head_skb's headlen does not fit requested gso_size, it
+ * means that the frag_list members do NOT terminate on exact
+ * gso_size boundaries. Hence we cannot perform skb_frag_t page
+ * sharing. Therefore we must fallback to copying the frag_list
+ * skbs; we do so by disabling SG.
+ */
+ if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb))
+ features &= ~NETIF_F_SG;
+ }
+
__skb_push(head_skb, doffset);
proto = skb_network_protocol(head_skb, &dummy);
if (unlikely(!proto))
@@ -3092,9 +3141,13 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
int hsize;
int size;
- len = head_skb->len - offset;
- if (len > mss)
- len = mss;
+ if (unlikely(mss == GSO_BY_FRAGS)) {
+ len = list_skb->len;
+ } else {
+ len = head_skb->len - offset;
+ if (len > mss)
+ len = mss;
+ }
hsize = skb_headlen(head_skb) - offset;
if (hsize < 0)
@@ -4396,8 +4449,8 @@ struct sk_buff *skb_vlan_untag(struct sk_buff *skb)
skb = skb_share_check(skb, GFP_ATOMIC);
if (unlikely(!skb))
goto err_free;
-
- if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
+ /* We may access the two bytes after vlan_hdr in vlan_set_encap_proto(). */
+ if (unlikely(!pskb_may_pull(skb, VLAN_HLEN + sizeof(unsigned short))))
goto err_free;
vhdr = (struct vlan_hdr *)skb->data;
@@ -4477,9 +4530,8 @@ int skb_vlan_pop(struct sk_buff *skb)
if (likely(skb_vlan_tag_present(skb))) {
skb->vlan_tci = 0;
} else {
- if (unlikely((skb->protocol != htons(ETH_P_8021Q) &&
- skb->protocol != htons(ETH_P_8021AD)) ||
- skb->len < VLAN_ETH_HLEN))
+ if (unlikely(skb->protocol != htons(ETH_P_8021Q) &&
+ skb->protocol != htons(ETH_P_8021AD)))
return 0;
err = __skb_vlan_pop(skb, &vlan_tci);
@@ -4487,9 +4539,8 @@ int skb_vlan_pop(struct sk_buff *skb)
return err;
}
/* move next vlan tag to hw accel tag */
- if (likely((skb->protocol != htons(ETH_P_8021Q) &&
- skb->protocol != htons(ETH_P_8021AD)) ||
- skb->len < VLAN_ETH_HLEN))
+ if (likely(skb->protocol != htons(ETH_P_8021Q) &&
+ skb->protocol != htons(ETH_P_8021AD)))
return 0;
vlan_proto = skb->protocol;
diff --git a/net/core/sock.c b/net/core/sock.c
index 14dcc631f922..0ac6e6d306f7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -134,6 +134,7 @@
#include <linux/sock_diag.h>
#include <linux/filter.h>
+#include <net/sock_reuseport.h>
#include <trace/events/sock.h>
@@ -484,11 +485,12 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_queue_rcv_skb);
-int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
+int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
+ const int nested, unsigned int trim_cap)
{
int rc = NET_RX_SUCCESS;
- if (sk_filter(sk, skb))
+ if (sk_filter_trim_cap(sk, skb, trim_cap))
goto discard_and_relse;
skb->dev = NULL;
@@ -524,7 +526,7 @@ discard_and_relse:
kfree_skb(skb);
goto out;
}
-EXPORT_SYMBOL(sk_receive_skb);
+EXPORT_SYMBOL(__sk_receive_skb);
struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
{
@@ -933,6 +935,32 @@ set_rcvbuf:
}
break;
+ case SO_ATTACH_REUSEPORT_CBPF:
+ ret = -EINVAL;
+ if (optlen == sizeof(struct sock_fprog)) {
+ struct sock_fprog fprog;
+
+ ret = -EFAULT;
+ if (copy_from_user(&fprog, optval, sizeof(fprog)))
+ break;
+
+ ret = sk_reuseport_attach_filter(&fprog, sk);
+ }
+ break;
+
+ case SO_ATTACH_REUSEPORT_EBPF:
+ ret = -EINVAL;
+ if (optlen == sizeof(u32)) {
+ u32 ufd;
+
+ ret = -EFAULT;
+ if (copy_from_user(&ufd, optval, sizeof(ufd)))
+ break;
+
+ ret = sk_reuseport_attach_bpf(ufd, sk);
+ }
+ break;
+
case SO_DETACH_FILTER:
ret = sk_detach_filter(sk);
break;
@@ -1013,7 +1041,6 @@ set_rcvbuf:
}
EXPORT_SYMBOL(sock_setsockopt);
-
static void cred_to_ucred(struct pid *pid, const struct cred *cred,
struct ucred *ucred)
{
@@ -1034,6 +1061,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
union {
int val;
+ u64 val64;
struct linger ling;
struct timeval tm;
} v;
@@ -1173,7 +1201,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
struct ucred peercred;
if (len > sizeof(peercred))
len = sizeof(peercred);
+
+ spin_lock(&sk->sk_peer_lock);
cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
+ spin_unlock(&sk->sk_peer_lock);
+
if (copy_to_user(optval, &peercred, len))
return -EFAULT;
goto lenout;
@@ -1264,6 +1296,13 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_incoming_cpu;
break;
+
+ case SO_COOKIE:
+ lv = sizeof(u64);
+ if (len < lv)
+ return -EINVAL;
+ v.val64 = sock_gen_cookie(sk);
+ break;
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
@@ -1385,6 +1424,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
owner = prot->owner;
slab = prot->slab;
+ cgroup_sk_free(&sk->sk_cgrp_data);
security_sk_free(sk);
if (slab != NULL)
kmem_cache_free(slab, sk);
@@ -1393,17 +1433,6 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
module_put(owner);
}
-#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
-void sock_update_netprioidx(struct sock *sk)
-{
- if (in_interrupt())
- return;
-
- sk->sk_cgrp_prioidx = task_netprioidx(current);
-}
-EXPORT_SYMBOL_GPL(sock_update_netprioidx);
-#endif
-
/**
* sk_alloc - All socket objects are allocated here
* @net: the applicable net namespace
@@ -1432,8 +1461,10 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
sock_net_set(sk, net);
atomic_set(&sk->sk_wmem_alloc, 1);
- sock_update_classid(sk);
- sock_update_netprioidx(sk);
+ cgroup_sk_alloc(&sk->sk_cgrp_data);
+ sock_update_classid(&sk->sk_cgrp_data);
+ sock_update_netprioidx(&sk->sk_cgrp_data);
+ sk_tx_queue_clear(sk);
}
return sk;
@@ -1457,6 +1488,8 @@ static void __sk_destruct(struct rcu_head *head)
sk_filter_uncharge(sk, filter);
RCU_INIT_POINTER(sk->sk_filter, NULL);
}
+ if (rcu_access_pointer(sk->sk_reuseport_cb))
+ reuseport_detach_sock(sk);
sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
@@ -1469,9 +1502,10 @@ static void __sk_destruct(struct rcu_head *head)
sk->sk_frag.page = NULL;
}
- if (sk->sk_peer_cred)
- put_cred(sk->sk_peer_cred);
+ /* We do not need to acquire sk->sk_peer_lock, we are the last user. */
+ put_cred(sk->sk_peer_cred);
put_pid(sk->sk_peer_pid);
+
if (likely(sk->sk_net_refcnt))
put_net(sock_net(sk));
sk_prot_free(sk->sk_prot_creator, sk);
@@ -1561,6 +1595,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
sock_reset_flag(newsk, SOCK_DONE);
+ cgroup_sk_clone(&newsk->sk_cgrp_data);
+
skb_queue_head_init(&newsk->sk_error_queue);
filter = rcu_dereference_protected(newsk->sk_filter, 1);
@@ -1586,12 +1622,16 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk = NULL;
goto out;
}
+ RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
newsk->sk_err = 0;
newsk->sk_err_soft = 0;
newsk->sk_priority = 0;
newsk->sk_incoming_cpu = raw_smp_processor_id();
atomic64_set(&newsk->sk_cookie, 0);
+
+ cgroup_sk_alloc(&newsk->sk_cgrp_data);
+
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
@@ -1612,6 +1652,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
*/
sk_refcnt_debug_inc(newsk);
sk_set_socket(newsk, NULL);
+ sk_tx_queue_clear(newsk);
newsk->sk_wq = NULL;
sk_update_clone(sk, newsk);
@@ -2134,7 +2175,7 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
}
if (sk_has_memory_pressure(sk)) {
- int alloc;
+ u64 alloc;
if (!sk_under_memory_pressure(sk))
return 1;
@@ -2284,6 +2325,27 @@ int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *
}
EXPORT_SYMBOL(sock_no_mmap);
+/*
+ * When a file is received (via SCM_RIGHTS, etc), we must bump the
+ * various sock-based usage counts.
+ */
+void __receive_sock(struct file *file)
+{
+ struct socket *sock;
+ int error;
+
+ /*
+ * The resulting value of "error" is ignored here since we only
+ * need to take action when the file is a socket and testing
+ * "sock" for NULL is sufficient.
+ */
+ sock = sock_from_file(file, &error);
+ if (sock) {
+ sock_update_netprioidx(&sock->sk->sk_cgrp_data);
+ sock_update_classid(&sock->sk->sk_cgrp_data);
+ }
+}
+
ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
{
ssize_t res;
@@ -2433,6 +2495,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_peer_pid = NULL;
sk->sk_peer_cred = NULL;
+ spin_lock_init(&sk->sk_peer_lock);
+
sk->sk_write_pending = 0;
sk->sk_rcvlowat = 1;
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 9653798da293..e25c72918b96 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -19,7 +19,7 @@ static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
static DEFINE_MUTEX(sock_diag_table_mutex);
static struct workqueue_struct *broadcast_wq;
-static u64 sock_gen_cookie(struct sock *sk)
+u64 sock_gen_cookie(struct sock *sk)
{
while (1) {
u64 res = atomic64_read(&sk->sk_cookie);
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
new file mode 100644
index 000000000000..2ba3ae7720a8
--- /dev/null
+++ b/net/core/sock_reuseport.c
@@ -0,0 +1,269 @@
+/*
+ * To speed up listener socket lookup, create an array to store all sockets
+ * listening on the same port. This allows a decision to be made after finding
+ * the first socket. An optional BPF program can also be configured for
+ * selecting the socket index from the array of available sockets.
+ */
+
+#include <net/sock_reuseport.h>
+#include <linux/bpf.h>
+#include <linux/rcupdate.h>
+
+#define INIT_SOCKS 128
+
+static DEFINE_SPINLOCK(reuseport_lock);
+
+static struct sock_reuseport *__reuseport_alloc(u16 max_socks)
+{
+ size_t size = sizeof(struct sock_reuseport) +
+ sizeof(struct sock *) * max_socks;
+ struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC);
+
+ if (!reuse)
+ return NULL;
+
+ reuse->max_socks = max_socks;
+
+ RCU_INIT_POINTER(reuse->prog, NULL);
+ return reuse;
+}
+
+int reuseport_alloc(struct sock *sk)
+{
+ struct sock_reuseport *reuse;
+
+ /* bh lock used since this function call may precede hlist lock in
+ * soft irq of receive path or setsockopt from process context
+ */
+ spin_lock_bh(&reuseport_lock);
+
+ /* Allocation attempts can occur concurrently via the setsockopt path
+ * and the bind/hash path. Nothing to do when we lose the race.
+ */
+ if (rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock)))
+ goto out;
+
+ reuse = __reuseport_alloc(INIT_SOCKS);
+ if (!reuse) {
+ spin_unlock_bh(&reuseport_lock);
+ return -ENOMEM;
+ }
+
+ reuse->socks[0] = sk;
+ reuse->num_socks = 1;
+ rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
+
+out:
+ spin_unlock_bh(&reuseport_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(reuseport_alloc);
+
+static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
+{
+ struct sock_reuseport *more_reuse;
+ u32 more_socks_size, i;
+
+ more_socks_size = reuse->max_socks * 2U;
+ if (more_socks_size > U16_MAX)
+ return NULL;
+
+ more_reuse = __reuseport_alloc(more_socks_size);
+ if (!more_reuse)
+ return NULL;
+
+ more_reuse->max_socks = more_socks_size;
+ more_reuse->num_socks = reuse->num_socks;
+ more_reuse->prog = reuse->prog;
+
+ memcpy(more_reuse->socks, reuse->socks,
+ reuse->num_socks * sizeof(struct sock *));
+
+ for (i = 0; i < reuse->num_socks; ++i)
+ rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
+ more_reuse);
+
+ /* Note: we use kfree_rcu here instead of reuseport_free_rcu so
+ * that reuse and more_reuse can temporarily share a reference
+ * to prog.
+ */
+ kfree_rcu(reuse, rcu);
+ return more_reuse;
+}
+
+static void reuseport_free_rcu(struct rcu_head *head)
+{
+ struct sock_reuseport *reuse;
+
+ reuse = container_of(head, struct sock_reuseport, rcu);
+ if (reuse->prog)
+ bpf_prog_destroy(reuse->prog);
+ kfree(reuse);
+}
+
+/**
+ * reuseport_add_sock - Add a socket to the reuseport group of another.
+ * @sk: New socket to add to the group.
+ * @sk2: Socket belonging to the existing reuseport group.
+ * May return ENOMEM and not add socket to group under memory pressure.
+ */
+int reuseport_add_sock(struct sock *sk, struct sock *sk2)
+{
+ struct sock_reuseport *old_reuse, *reuse;
+
+ if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
+ int err = reuseport_alloc(sk2);
+
+ if (err)
+ return err;
+ }
+
+ spin_lock_bh(&reuseport_lock);
+ reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock));
+ old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock));
+ if (old_reuse && old_reuse->num_socks != 1) {
+ spin_unlock_bh(&reuseport_lock);
+ return -EBUSY;
+ }
+
+ if (reuse->num_socks == reuse->max_socks) {
+ reuse = reuseport_grow(reuse);
+ if (!reuse) {
+ spin_unlock_bh(&reuseport_lock);
+ return -ENOMEM;
+ }
+ }
+
+ reuse->socks[reuse->num_socks] = sk;
+ /* paired with smp_rmb() in reuseport_select_sock() */
+ smp_wmb();
+ reuse->num_socks++;
+ rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
+
+ spin_unlock_bh(&reuseport_lock);
+
+ if (old_reuse)
+ call_rcu(&old_reuse->rcu, reuseport_free_rcu);
+ return 0;
+}
+EXPORT_SYMBOL(reuseport_add_sock);
+
+void reuseport_detach_sock(struct sock *sk)
+{
+ struct sock_reuseport *reuse;
+ int i;
+
+ spin_lock_bh(&reuseport_lock);
+ reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock));
+ rcu_assign_pointer(sk->sk_reuseport_cb, NULL);
+
+ for (i = 0; i < reuse->num_socks; i++) {
+ if (reuse->socks[i] == sk) {
+ reuse->socks[i] = reuse->socks[reuse->num_socks - 1];
+ reuse->num_socks--;
+ if (reuse->num_socks == 0)
+ call_rcu(&reuse->rcu, reuseport_free_rcu);
+ break;
+ }
+ }
+ spin_unlock_bh(&reuseport_lock);
+}
+EXPORT_SYMBOL(reuseport_detach_sock);
+
+static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks,
+ struct bpf_prog *prog, struct sk_buff *skb,
+ int hdr_len)
+{
+ struct sk_buff *nskb = NULL;
+ u32 index;
+
+ if (skb_shared(skb)) {
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (!nskb)
+ return NULL;
+ skb = nskb;
+ }
+
+ /* temporarily advance data past protocol header */
+ if (!pskb_pull(skb, hdr_len)) {
+ kfree_skb(nskb);
+ return NULL;
+ }
+ index = bpf_prog_run_save_cb(prog, skb);
+ __skb_push(skb, hdr_len);
+
+ consume_skb(nskb);
+
+ if (index >= socks)
+ return NULL;
+
+ return reuse->socks[index];
+}
+
+/**
+ * reuseport_select_sock - Select a socket from an SO_REUSEPORT group.
+ * @sk: First socket in the group.
+ * @hash: When no BPF filter is available, use this hash to select.
+ * @skb: skb to run through BPF filter.
+ * @hdr_len: BPF filter expects skb data pointer at payload data. If
+ * the skb does not yet point at the payload, this parameter represents
+ * how far the pointer needs to advance to reach the payload.
+ * Returns a socket that should receive the packet (or NULL on error).
+ */
+struct sock *reuseport_select_sock(struct sock *sk,
+ u32 hash,
+ struct sk_buff *skb,
+ int hdr_len)
+{
+ struct sock_reuseport *reuse;
+ struct bpf_prog *prog;
+ struct sock *sk2 = NULL;
+ u16 socks;
+
+ rcu_read_lock();
+ reuse = rcu_dereference(sk->sk_reuseport_cb);
+
+ /* if memory allocation failed or add call is not yet complete */
+ if (!reuse)
+ goto out;
+
+ prog = rcu_dereference(reuse->prog);
+ socks = READ_ONCE(reuse->num_socks);
+ if (likely(socks)) {
+ /* paired with smp_wmb() in reuseport_add_sock() */
+ smp_rmb();
+
+ if (prog && skb)
+ sk2 = run_bpf(reuse, socks, prog, skb, hdr_len);
+ else
+ sk2 = reuse->socks[reciprocal_scale(hash, socks)];
+ }
+
+out:
+ rcu_read_unlock();
+ return sk2;
+}
+EXPORT_SYMBOL(reuseport_select_sock);
+
+struct bpf_prog *
+reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
+{
+ struct sock_reuseport *reuse;
+ struct bpf_prog *old_prog;
+
+ spin_lock_bh(&reuseport_lock);
+ reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock));
+ old_prog = rcu_dereference_protected(reuse->prog,
+ lockdep_is_held(&reuseport_lock));
+ rcu_assign_pointer(reuse->prog, prog);
+ spin_unlock_bh(&reuseport_lock);
+
+ return old_prog;
+}
+EXPORT_SYMBOL(reuseport_attach_prog);
diff --git a/net/core/stream.c b/net/core/stream.c
index 3089b014bb53..2c50c71cb806 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -194,9 +194,6 @@ void sk_stream_kill_queues(struct sock *sk)
/* First the read buffer. */
__skb_queue_purge(&sk->sk_receive_queue);
- /* Next, the error queue. */
- __skb_queue_purge(&sk->sk_error_queue);
-
/* Next, the write queue. */
WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 32898247d8bf..f62e177267c3 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -24,9 +24,12 @@
static int zero = 0;
static int one = 1;
+static int two __maybe_unused = 2;
static int min_sndbuf = SOCK_MIN_SNDBUF;
static int min_rcvbuf = SOCK_MIN_RCVBUF;
static int max_skb_frags = MAX_SKB_FRAGS;
+static long long_one __maybe_unused = 1;
+static long long_max __maybe_unused = LONG_MAX;
static int net_msg_warn; /* Unused, but still a sysctl */
@@ -231,6 +234,52 @@ static int proc_do_rss_key(struct ctl_table *table, int write,
return proc_dostring(&fake_table, write, buffer, lenp, ppos);
}
+#ifdef CONFIG_BPF_JIT
+static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret, jit_enable = *(int *)table->data;
+ struct ctl_table tmp = *table;
+
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ tmp.data = &jit_enable;
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+ if (write && !ret) {
+ *(int *)table->data = jit_enable;
+ if (jit_enable == 2)
+ pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n");
+ }
+ return ret;
+}
+
+# ifdef CONFIG_HAVE_EBPF_JIT
+static int
+proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+}
+# endif /* CONFIG_HAVE_EBPF_JIT */
+
+static int
+proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
+}
+#endif
+
static struct ctl_table net_core_table[] = {
#ifdef CONFIG_NET
{
@@ -292,13 +341,34 @@ static struct ctl_table net_core_table[] = {
.data = &bpf_jit_enable,
.maxlen = sizeof(int),
.mode = 0644,
-#ifndef CONFIG_BPF_JIT_ALWAYS_ON
- .proc_handler = proc_dointvec
-#else
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_dointvec_minmax_bpf_enable,
+# ifdef CONFIG_BPF_JIT_ALWAYS_ON
.extra1 = &one,
.extra2 = &one,
-#endif
+# else
+ .extra1 = &zero,
+ .extra2 = &two,
+# endif
+ },
+# ifdef CONFIG_HAVE_EBPF_JIT
+ {
+ .procname = "bpf_jit_harden",
+ .data = &bpf_jit_harden,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax_bpf_restricted,
+ .extra1 = &zero,
+ .extra2 = &two,
+ },
+# endif
+ {
+ .procname = "bpf_jit_limit",
+ .data = &bpf_jit_limit,
+ .maxlen = sizeof(long),
+ .mode = 0600,
+ .proc_handler = proc_dolongvec_minmax_bpf_restricted,
+ .extra1 = &long_one,
+ .extra2 = &bpf_jit_limit_max,
},
#endif
{
diff --git a/net/core/utils.c b/net/core/utils.c
index 3d17ca8b4744..13eb3552de07 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -316,6 +316,23 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
}
EXPORT_SYMBOL(inet_proto_csum_replace4);
+/**
+ * inet_proto_csum_replace16 - update layer 4 header checksum field
+ * @sum: Layer 4 header checksum field
+ * @skb: sk_buff for the packet
+ * @from: old IPv6 address
+ * @to: new IPv6 address
+ * @pseudohdr: True if layer 4 header checksum includes pseudoheader
+ *
+ * Update layer 4 header as per the update in IPv6 src/dst address.
+ *
+ * There is no need to update skb->csum in this function, because update in two
+ * fields a.) IPv6 src/dst address and b.) L4 header checksum cancels each other
+ * for skb->csum calculation. Whereas inet_proto_csum_replace4 function needs to
+ * update skb->csum, because update in 3 fields a.) IPv4 src/dst address,
+ * b.) IPv4 Header checksum and c.) L4 header checksum results in same diff as
+ * L4 Header checksum for skb->csum calculation.
+ */
void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
const __be32 *from, const __be32 *to,
bool pseudohdr)
@@ -327,9 +344,6 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
if (skb->ip_summed != CHECKSUM_PARTIAL) {
*sum = csum_fold(csum_partial(diff, sizeof(diff),
~csum_unfold(*sum)));
- if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
- skb->csum = ~csum_partial(diff, sizeof(diff),
- ~skb->csum);
} else if (pseudohdr)
*sum = ~csum_fold(csum_partial(diff, sizeof(diff),
csum_unfold(*sum)));
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 6fe2b615518c..426c30f9fdb0 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1725,6 +1725,8 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
fn = &reply_funcs[dcb->cmd];
if (!fn->cb)
return -EOPNOTSUPP;
+ if (fn->type == RTM_SETDCB && !netlink_capable(skb, CAP_NET_ADMIN))
+ return -EPERM;
if (!tb[DCB_ATTR_IFNAME])
return -EINVAL;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index b0e28d24e1a7..e50fc19690c8 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -44,9 +44,9 @@ extern bool dccp_debug;
#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a)
#define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a)
#else
-#define dccp_pr_debug(format, a...)
-#define dccp_pr_debug_cat(format, a...)
-#define dccp_debug(format, a...)
+#define dccp_pr_debug(format, a...) do {} while (0)
+#define dccp_pr_debug_cat(format, a...) do {} while (0)
+#define dccp_debug(format, a...) do {} while (0)
#endif
extern struct inet_hashinfo dccp_hashinfo;
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index f227f002c73d..db87d9f58019 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -738,7 +738,12 @@ static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local,
if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len))
return -ENOMEM;
- return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval);
+ if (dccp_feat_push_change(fn, feat, is_local, mandatory, &fval)) {
+ kfree(fval.sp.vec);
+ return -ENOMEM;
+ }
+
+ return 0;
}
/**
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ef4c44d46293..11d79958a767 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -465,7 +465,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
.fl4_dport = dccp_hdr(skb)->dccph_sport,
};
- security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
+ security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4));
rt = ip_route_output_flow(net, &fl4, sk);
if (IS_ERR(rt)) {
IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
@@ -804,7 +804,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
}
lookup:
- sk = __inet_lookup_skb(&dccp_hashinfo, skb,
+ sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
dh->dccph_sport, dh->dccph_dport);
if (!sk) {
dccp_pr_debug("failed to look up flow ID in table and "
@@ -868,7 +868,7 @@ lookup:
goto discard_and_relse;
nf_reset(skb);
- return sk_receive_skb(sk, skb, 1);
+ return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4);
no_dccp_socket:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index d2caa4d69159..64f0e88fe0e8 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -202,14 +202,14 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
fl6.flowi6_oif = ireq->ir_iif;
fl6.fl6_dport = ireq->ir_rmt_port;
fl6.fl6_sport = htons(ireq->ir_num);
- security_req_classify_flow(req, flowi6_to_flowi(&fl6));
+ security_req_classify_flow(req, flowi6_to_flowi_common(&fl6));
rcu_read_lock();
final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
rcu_read_unlock();
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
dst = NULL;
@@ -273,10 +273,10 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
fl6.flowi6_oif = inet6_iif(rxskb);
fl6.fl6_dport = dccp_hdr(skb)->dccph_dport;
fl6.fl6_sport = dccp_hdr(skb)->dccph_sport;
- security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6));
+ security_skb_classify_flow(rxskb, flowi6_to_flowi_common(&fl6));
/* sk = NULL, but it is safe for now. RST socket required. */
- dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
+ dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
if (!IS_ERR(dst)) {
skb_dst_set(skb, dst);
ip6_xmit(ctl_sk, skb, &fl6, NULL, 0);
@@ -313,6 +313,11 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (!ipv6_unicast_destination(skb))
return 0; /* discard, don't send a reset here */
+ if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
+ IP6_INC_STATS_BH(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
+ return 0;
+ }
+
if (dccp_bad_service_code(sk, service)) {
dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
goto drop;
@@ -679,7 +684,7 @@ static int dccp_v6_rcv(struct sk_buff *skb)
DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
lookup:
- sk = __inet6_lookup_skb(&dccp_hashinfo, skb,
+ sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
dh->dccph_sport, dh->dccph_dport,
inet6_iif(skb));
if (!sk) {
@@ -741,7 +746,7 @@ lookup:
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
- return sk_receive_skb(sk, skb, 1) ? -1 : 0;
+ return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4) ? -1 : 0;
no_dccp_socket:
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
@@ -874,12 +879,12 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.fl6_dport = usin->sin6_port;
fl6.fl6_sport = inet->inet_sport;
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
final_p = fl6_update_dst(&fl6, opt, &final);
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto failure;
@@ -1005,7 +1010,7 @@ static struct proto dccp_v6_prot = {
.sendmsg = dccp_sendmsg,
.recvmsg = dccp_recvmsg,
.backlog_rcv = dccp_v6_do_rcv,
- .hash = inet_hash,
+ .hash = inet6_hash,
.unhash = inet_unhash,
.accept = inet_csk_accept,
.get_port = inet_csk_get_port,
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 68eed344b471..1f03a590288d 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -92,6 +92,8 @@ struct sock *dccp_create_openreq_child(const struct sock *sk,
newdp->dccps_role = DCCP_ROLE_SERVER;
newdp->dccps_hc_rx_ackvec = NULL;
newdp->dccps_service_list = NULL;
+ newdp->dccps_hc_rx_ccid = NULL;
+ newdp->dccps_hc_tx_ccid = NULL;
newdp->dccps_service = dreq->dreq_service;
newdp->dccps_timestamp_echo = dreq->dreq_timestamp_echo;
newdp->dccps_timestamp_time = dreq->dreq_timestamp_time;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 9d8fcdefefc0..ee297964fcd2 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -823,7 +823,7 @@ static int dn_auto_bind(struct socket *sock)
static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
{
struct dn_scp *scp = DN_SK(sk);
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
int err;
if (scp->state != DN_CR)
@@ -833,11 +833,11 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
scp->segsize_loc = dst_metric_advmss(__sk_dst_get(sk));
dn_send_conn_conf(sk, allocation);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ add_wait_queue(sk_sleep(sk), &wait);
for(;;) {
release_sock(sk);
if (scp->state == DN_CC)
- *timeo = schedule_timeout(*timeo);
+ *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo);
lock_sock(sk);
err = 0;
if (scp->state == DN_RUN)
@@ -851,9 +851,8 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
err = -EAGAIN;
if (!*timeo)
break;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
}
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
if (err == 0) {
sk->sk_socket->state = SS_CONNECTED;
} else if (scp->state != DN_CC) {
@@ -865,7 +864,7 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
static int dn_wait_run(struct sock *sk, long *timeo)
{
struct dn_scp *scp = DN_SK(sk);
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
int err = 0;
if (scp->state == DN_RUN)
@@ -874,11 +873,11 @@ static int dn_wait_run(struct sock *sk, long *timeo)
if (!*timeo)
return -EALREADY;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ add_wait_queue(sk_sleep(sk), &wait);
for(;;) {
release_sock(sk);
if (scp->state == DN_CI || scp->state == DN_CC)
- *timeo = schedule_timeout(*timeo);
+ *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo);
lock_sock(sk);
err = 0;
if (scp->state == DN_RUN)
@@ -892,9 +891,8 @@ static int dn_wait_run(struct sock *sk, long *timeo)
err = -ETIMEDOUT;
if (!*timeo)
break;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
}
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
out:
if (err == 0) {
sk->sk_socket->state = SS_CONNECTED;
@@ -1039,16 +1037,16 @@ static void dn_user_copy(struct sk_buff *skb, struct optdata_dn *opt)
static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
{
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sk_buff *skb = NULL;
int err = 0;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ add_wait_queue(sk_sleep(sk), &wait);
for(;;) {
release_sock(sk);
skb = skb_dequeue(&sk->sk_receive_queue);
if (skb == NULL) {
- *timeo = schedule_timeout(*timeo);
+ *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo);
skb = skb_dequeue(&sk->sk_receive_queue);
}
lock_sock(sk);
@@ -1063,9 +1061,8 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
err = -EAGAIN;
if (!*timeo)
break;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
}
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
return skb == NULL ? ERR_PTR(err) : skb;
}
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index b2c26b081134..80554e7e9a0f 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -55,7 +55,7 @@
#include <net/dn_neigh.h>
#include <net/dn_fib.h>
-#define DN_IFREQ_SIZE (sizeof(struct ifreq) - sizeof(struct sockaddr) + sizeof(struct sockaddr_dn))
+#define DN_IFREQ_SIZE (offsetof(struct ifreq, ifr_ifru) + sizeof(struct sockaddr_dn))
static char dn_rt_all_end_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x04,0x00,0x00};
static char dn_rt_all_rt_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x03,0x00,0x00};
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 4256ac95a141..061c3939f93b 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1031,7 +1031,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
p->phy_interface = mode;
phy_dn = of_parse_phandle(port_dn, "phy-handle", 0);
- if (of_phy_is_fixed_link(port_dn)) {
+ if (!phy_dn && of_phy_is_fixed_link(port_dn)) {
/* In the case of a fixed PHY, the DT node associated
* to the fixed PHY is the Port DT node
*/
@@ -1041,7 +1041,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
return ret;
}
phy_is_fixed = true;
- phy_dn = port_dn;
+ phy_dn = of_node_get(port_dn);
}
if (ds->drv->get_phy_flags)
@@ -1060,6 +1060,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
ret = dsa_slave_phy_connect(p, slave_dev, phy_id);
if (ret) {
netdev_err(slave_dev, "failed to connect to phy%d: %d\n", phy_id, ret);
+ of_node_put(phy_dn);
return ret;
}
} else {
@@ -1068,6 +1069,8 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
phy_flags,
p->phy_interface);
}
+
+ of_node_put(phy_dn);
}
if (p->phy && phy_is_fixed)
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index e2aadb73111d..657f7b1af315 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -84,6 +84,8 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
brcm_tag[2] = BRCM_IG_DSTMAP2_MASK;
brcm_tag[3] = (1 << p->port) & BRCM_IG_DSTMAP1_MASK;
+ skb->offload_fwd_mark = 1;
+
return skb;
out_free:
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 52dcd414c2af..3f51b4e590b1 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -235,7 +235,12 @@ int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16
eth->h_proto = type;
memcpy(eth->h_source, dev->dev_addr, ETH_ALEN);
memcpy(eth->h_dest, neigh->ha, ETH_ALEN);
- hh->hh_len = ETH_HLEN;
+
+ /* Pairs with READ_ONCE() in neigh_resolve_output(),
+ * neigh_hh_output() and neigh_update_hhs().
+ */
+ smp_store_release(&hh->hh_len, ETH_HLEN);
+
return 0;
}
EXPORT_SYMBOL(eth_header_cache);
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 943378d6e4c3..8dd239214a14 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -289,6 +289,8 @@ static void send_hsr_supervision_frame(struct hsr_port *master, u8 type)
skb->dev->dev_addr, skb->len) <= 0)
goto out;
skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
hsr_stag = (typeof(hsr_stag)) skb_put(skb, sizeof(*hsr_stag));
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 46415839e67e..afcde16a94e2 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -297,7 +297,8 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
node_dst = find_node_by_AddrA(&port->hsr->node_db, eth_hdr(skb)->h_dest);
if (!node_dst) {
- WARN_ONCE(1, "%s: Unknown node\n", __func__);
+ if (net_ratelimit())
+ netdev_err(skb->dev, "%s: Unknown node\n", __func__);
return;
}
if (port->type != node_dst->AddrB_port)
@@ -455,13 +456,9 @@ int hsr_get_node_data(struct hsr_priv *hsr,
struct hsr_port *port;
unsigned long tdiff;
-
- rcu_read_lock();
node = find_node_by_AddrA(&hsr->node_db, addr);
- if (!node) {
- rcu_read_unlock();
- return -ENOENT; /* No such entry */
- }
+ if (!node)
+ return -ENOENT;
ether_addr_copy(addr_b, node->MacAddressB);
@@ -496,7 +493,5 @@ int hsr_get_node_data(struct hsr_priv *hsr,
*addr_b_ifindex = -1;
}
- rcu_read_unlock();
-
return 0;
}
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index a2c7e4c0ac1e..0a9a178f221a 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -132,6 +132,7 @@ static struct genl_family hsr_genl_family = {
.name = "HSR",
.version = 1,
.maxattr = HSR_A_MAX,
+ .netnsok = true,
};
static const struct genl_multicast_group hsr_mcgrps[] = {
@@ -259,17 +260,16 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info)
if (!na)
goto invalid;
- hsr_dev = __dev_get_by_index(genl_info_net(info),
- nla_get_u32(info->attrs[HSR_A_IFINDEX]));
+ rcu_read_lock();
+ hsr_dev = dev_get_by_index_rcu(genl_info_net(info),
+ nla_get_u32(info->attrs[HSR_A_IFINDEX]));
if (!hsr_dev)
- goto invalid;
+ goto rcu_unlock;
if (!is_hsr_master(hsr_dev))
- goto invalid;
-
+ goto rcu_unlock;
/* Send reply */
-
- skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb_out) {
res = -ENOMEM;
goto fail;
@@ -321,12 +321,10 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info)
res = nla_put_u16(skb_out, HSR_A_IF1_SEQ, hsr_node_if1_seq);
if (res < 0)
goto nla_put_failure;
- rcu_read_lock();
port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A);
if (port)
res = nla_put_u32(skb_out, HSR_A_IF1_IFINDEX,
port->dev->ifindex);
- rcu_read_unlock();
if (res < 0)
goto nla_put_failure;
@@ -336,20 +334,22 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info)
res = nla_put_u16(skb_out, HSR_A_IF2_SEQ, hsr_node_if2_seq);
if (res < 0)
goto nla_put_failure;
- rcu_read_lock();
port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B);
if (port)
res = nla_put_u32(skb_out, HSR_A_IF2_IFINDEX,
port->dev->ifindex);
- rcu_read_unlock();
if (res < 0)
goto nla_put_failure;
+ rcu_read_unlock();
+
genlmsg_end(skb_out, msg_head);
genlmsg_unicast(genl_info_net(info), skb_out, info->snd_portid);
return 0;
+rcu_unlock:
+ rcu_read_unlock();
invalid:
netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL);
return 0;
@@ -359,6 +359,7 @@ nla_put_failure:
/* Fall through */
fail:
+ rcu_read_unlock();
return res;
}
@@ -366,16 +367,14 @@ fail:
*/
static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info)
{
- /* For receiving */
- struct nlattr *na;
+ unsigned char addr[ETH_ALEN];
struct net_device *hsr_dev;
-
- /* For sending */
struct sk_buff *skb_out;
- void *msg_head;
struct hsr_priv *hsr;
- void *pos;
- unsigned char addr[ETH_ALEN];
+ bool restart = false;
+ struct nlattr *na;
+ void *pos = NULL;
+ void *msg_head;
int res;
if (!info)
@@ -385,17 +384,17 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info)
if (!na)
goto invalid;
- hsr_dev = __dev_get_by_index(genl_info_net(info),
- nla_get_u32(info->attrs[HSR_A_IFINDEX]));
+ rcu_read_lock();
+ hsr_dev = dev_get_by_index_rcu(genl_info_net(info),
+ nla_get_u32(info->attrs[HSR_A_IFINDEX]));
if (!hsr_dev)
- goto invalid;
+ goto rcu_unlock;
if (!is_hsr_master(hsr_dev))
- goto invalid;
-
+ goto rcu_unlock;
+restart:
/* Send reply */
-
- skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb_out = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (!skb_out) {
res = -ENOMEM;
goto fail;
@@ -409,18 +408,26 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info)
goto nla_put_failure;
}
- res = nla_put_u32(skb_out, HSR_A_IFINDEX, hsr_dev->ifindex);
- if (res < 0)
- goto nla_put_failure;
+ if (!restart) {
+ res = nla_put_u32(skb_out, HSR_A_IFINDEX, hsr_dev->ifindex);
+ if (res < 0)
+ goto nla_put_failure;
+ }
hsr = netdev_priv(hsr_dev);
- rcu_read_lock();
- pos = hsr_get_next_node(hsr, NULL, addr);
+ if (!pos)
+ pos = hsr_get_next_node(hsr, NULL, addr);
while (pos) {
res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN, addr);
if (res < 0) {
- rcu_read_unlock();
+ if (res == -EMSGSIZE) {
+ genlmsg_end(skb_out, msg_head);
+ genlmsg_unicast(genl_info_net(info), skb_out,
+ info->snd_portid);
+ restart = true;
+ goto restart;
+ }
goto nla_put_failure;
}
pos = hsr_get_next_node(hsr, pos, addr);
@@ -432,15 +439,18 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info)
return 0;
+rcu_unlock:
+ rcu_read_unlock();
invalid:
netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL);
return 0;
nla_put_failure:
- kfree_skb(skb_out);
+ nlmsg_free(skb_out);
/* Fall through */
fail:
+ rcu_read_unlock();
return res;
}
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index 7d37366cc695..db14b452adfa 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -30,6 +30,8 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)
rcu_read_lock(); /* hsr->node_db, hsr->ports */
port = hsr_port_get_rcu(skb->dev);
+ if (!port)
+ goto finish_pass;
if (hsr_addr_is_self(port->hsr, eth_hdr(skb)->h_source)) {
/* Directly kill frames sent by ourselves */
@@ -147,16 +149,16 @@ int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev,
if (port == NULL)
return -ENOMEM;
+ port->hsr = hsr;
+ port->dev = dev;
+ port->type = type;
+
if (type != HSR_PT_MASTER) {
res = hsr_portdev_setup(dev, port);
if (res)
goto fail_dev_setup;
}
- port->hsr = hsr;
- port->dev = dev;
- port->type = type;
-
list_add_tail_rcu(&port->port_list, &hsr->ports);
synchronize_rcu();
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 6183730d38db..e728dae467c3 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -634,7 +634,7 @@ err_sysctl:
void lowpan_net_frag_exit(void)
{
- inet_frags_fini(&lowpan_frags);
lowpan_frags_sysctl_unregister();
unregister_pernet_subsys(&lowpan_frags_ops);
+ inet_frags_fini(&lowpan_frags);
}
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 3503c38954f9..fe31df8dc804 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -557,9 +557,7 @@ ieee802154_llsec_parse_key_id(struct genl_info *info,
desc->mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]);
if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) {
- if (!info->attrs[IEEE802154_ATTR_PAN_ID] &&
- !(info->attrs[IEEE802154_ATTR_SHORT_ADDR] ||
- info->attrs[IEEE802154_ATTR_HW_ADDR]))
+ if (!info->attrs[IEEE802154_ATTR_PAN_ID])
return -EINVAL;
desc->device_addr.pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]);
@@ -568,6 +566,9 @@ ieee802154_llsec_parse_key_id(struct genl_info *info,
desc->device_addr.mode = IEEE802154_ADDR_SHORT;
desc->device_addr.short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]);
} else {
+ if (!info->attrs[IEEE802154_ATTR_HW_ADDR])
+ return -EINVAL;
+
desc->device_addr.mode = IEEE802154_ADDR_LONG;
desc->device_addr.extended_addr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
}
@@ -684,8 +685,10 @@ int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info)
nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVEL, params.out_level) ||
nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER,
be32_to_cpu(params.frame_counter)) ||
- ieee802154_llsec_fill_key_id(msg, &params.out_key))
+ ieee802154_llsec_fill_key_id(msg, &params.out_key)) {
+ rc = -ENOBUFS;
goto out_free;
+ }
dev_put(dev);
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index 77d73014bde3..11f53dc0c1c0 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -249,8 +249,10 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info)
}
if (nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) ||
- nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name))
+ nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name)) {
+ rc = -EMSGSIZE;
goto nla_put_failure;
+ }
dev_put(dev);
wpan_phy_put(phy);
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 16ef0d9f566e..b2ff2f7329c3 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -843,8 +843,13 @@ nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags,
goto nla_put_failure;
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR)
+ goto out;
+
if (nl802154_get_llsec_params(msg, rdev, wpan_dev) < 0)
goto nla_put_failure;
+
+out:
#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
genlmsg_end(msg, hdr);
@@ -1367,6 +1372,9 @@ static int nl802154_set_llsec_params(struct sk_buff *skb,
u32 changed = 0;
int ret;
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR)
+ return -EOPNOTSUPP;
+
if (info->attrs[NL802154_ATTR_SEC_ENABLED]) {
u8 enabled;
@@ -1473,6 +1481,11 @@ nl802154_dump_llsec_key(struct sk_buff *skb, struct netlink_callback *cb)
if (err)
return err;
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) {
+ err = skb->len;
+ goto out_err;
+ }
+
if (!wpan_dev->netdev) {
err = -EINVAL;
goto out_err;
@@ -1527,7 +1540,8 @@ static int nl802154_add_llsec_key(struct sk_buff *skb, struct genl_info *info)
struct ieee802154_llsec_key_id id = { };
u32 commands[NL802154_CMD_FRAME_NR_IDS / 32] = { };
- if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX,
+ if (!info->attrs[NL802154_ATTR_SEC_KEY] ||
+ nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX,
info->attrs[NL802154_ATTR_SEC_KEY],
nl802154_key_policy))
return -EINVAL;
@@ -1577,7 +1591,8 @@ static int nl802154_del_llsec_key(struct sk_buff *skb, struct genl_info *info)
struct nlattr *attrs[NL802154_KEY_ATTR_MAX + 1];
struct ieee802154_llsec_key_id id;
- if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX,
+ if (!info->attrs[NL802154_ATTR_SEC_KEY] ||
+ nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX,
info->attrs[NL802154_ATTR_SEC_KEY],
nl802154_key_policy))
return -EINVAL;
@@ -1643,6 +1658,11 @@ nl802154_dump_llsec_dev(struct sk_buff *skb, struct netlink_callback *cb)
if (err)
return err;
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) {
+ err = skb->len;
+ goto out_err;
+ }
+
if (!wpan_dev->netdev) {
err = -EINVAL;
goto out_err;
@@ -1730,6 +1750,9 @@ static int nl802154_add_llsec_dev(struct sk_buff *skb, struct genl_info *info)
struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
struct ieee802154_llsec_device dev_desc;
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR)
+ return -EOPNOTSUPP;
+
if (ieee802154_llsec_parse_device(info->attrs[NL802154_ATTR_SEC_DEVICE],
&dev_desc) < 0)
return -EINVAL;
@@ -1745,7 +1768,8 @@ static int nl802154_del_llsec_dev(struct sk_buff *skb, struct genl_info *info)
struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1];
__le64 extended_addr;
- if (nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX,
+ if (!info->attrs[NL802154_ATTR_SEC_DEVICE] ||
+ nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX,
info->attrs[NL802154_ATTR_SEC_DEVICE],
nl802154_dev_policy))
return -EINVAL;
@@ -1815,6 +1839,11 @@ nl802154_dump_llsec_devkey(struct sk_buff *skb, struct netlink_callback *cb)
if (err)
return err;
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) {
+ err = skb->len;
+ goto out_err;
+ }
+
if (!wpan_dev->netdev) {
err = -EINVAL;
goto out_err;
@@ -1872,6 +1901,9 @@ static int nl802154_add_llsec_devkey(struct sk_buff *skb, struct genl_info *info
struct ieee802154_llsec_device_key key;
__le64 extended_addr;
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR)
+ return -EOPNOTSUPP;
+
if (!info->attrs[NL802154_ATTR_SEC_DEVKEY] ||
nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX,
info->attrs[NL802154_ATTR_SEC_DEVKEY],
@@ -1905,7 +1937,8 @@ static int nl802154_del_llsec_devkey(struct sk_buff *skb, struct genl_info *info
struct ieee802154_llsec_device_key key;
__le64 extended_addr;
- if (nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX,
+ if (!info->attrs[NL802154_ATTR_SEC_DEVKEY] ||
+ nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX,
info->attrs[NL802154_ATTR_SEC_DEVKEY],
nl802154_devkey_policy))
return -EINVAL;
@@ -1980,6 +2013,11 @@ nl802154_dump_llsec_seclevel(struct sk_buff *skb, struct netlink_callback *cb)
if (err)
return err;
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) {
+ err = skb->len;
+ goto out_err;
+ }
+
if (!wpan_dev->netdev) {
err = -EINVAL;
goto out_err;
@@ -2065,6 +2103,9 @@ static int nl802154_add_llsec_seclevel(struct sk_buff *skb,
struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
struct ieee802154_llsec_seclevel sl;
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR)
+ return -EOPNOTSUPP;
+
if (llsec_parse_seclevel(info->attrs[NL802154_ATTR_SEC_LEVEL],
&sl) < 0)
return -EINVAL;
@@ -2080,6 +2121,9 @@ static int nl802154_del_llsec_seclevel(struct sk_buff *skb,
struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
struct ieee802154_llsec_seclevel sl;
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR)
+ return -EOPNOTSUPP;
+
if (!info->attrs[NL802154_ATTR_SEC_LEVEL] ||
llsec_parse_seclevel(info->attrs[NL802154_ATTR_SEC_LEVEL],
&sl) < 0)
diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c
index 35c432668454..040983fc15da 100644
--- a/net/ieee802154/nl_policy.c
+++ b/net/ieee802154/nl_policy.c
@@ -30,7 +30,13 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = {
[IEEE802154_ATTR_HW_ADDR] = { .type = NLA_HW_ADDR, },
[IEEE802154_ATTR_PAN_ID] = { .type = NLA_U16, },
[IEEE802154_ATTR_CHANNEL] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_BCN_ORD] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_SF_ORD] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_PAN_COORD] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_BAT_EXT] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_COORD_REALIGN] = { .type = NLA_U8, },
[IEEE802154_ATTR_PAGE] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_DEV_TYPE] = { .type = NLA_U8, },
[IEEE802154_ATTR_COORD_SHORT_ADDR] = { .type = NLA_U16, },
[IEEE802154_ATTR_COORD_HW_ADDR] = { .type = NLA_HW_ADDR, },
[IEEE802154_ATTR_COORD_PAN_ID] = { .type = NLA_U16, },
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index cb6c0772ea36..6383627b783e 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -182,12 +182,14 @@ static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd,
static HLIST_HEAD(raw_head);
static DEFINE_RWLOCK(raw_lock);
-static void raw_hash(struct sock *sk)
+static int raw_hash(struct sock *sk)
{
write_lock_bh(&raw_lock);
sk_add_node(sk, &raw_head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock_bh(&raw_lock);
+
+ return 0;
}
static void raw_unhash(struct sock *sk)
@@ -462,12 +464,14 @@ static inline struct dgram_sock *dgram_sk(const struct sock *sk)
return container_of(sk, struct dgram_sock, sk);
}
-static void dgram_hash(struct sock *sk)
+static int dgram_hash(struct sock *sk)
{
write_lock_bh(&dgram_lock);
sk_add_node(sk, &dgram_head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock_bh(&dgram_lock);
+
+ return 0;
}
static void dgram_unhash(struct sock *sk)
@@ -983,6 +987,11 @@ static const struct proto_ops ieee802154_dgram_ops = {
#endif
};
+static void ieee802154_sock_destruct(struct sock *sk)
+{
+ skb_queue_purge(&sk->sk_receive_queue);
+}
+
/* Create a socket. Initialise the socket, blank the addresses
* set the state.
*/
@@ -1023,14 +1032,19 @@ static int ieee802154_create(struct net *net, struct socket *sock,
sock->ops = ops;
sock_init_data(sock, sk);
- /* FIXME: sk->sk_destruct */
+ sk->sk_destruct = ieee802154_sock_destruct;
sk->sk_family = PF_IEEE802154;
/* Checksums on by default */
sock_set_flag(sk, SOCK_ZAPPED);
- if (sk->sk_prot->hash)
- sk->sk_prot->hash(sk);
+ if (sk->sk_prot->hash) {
+ rc = sk->sk_prot->hash(sk);
+ if (rc) {
+ sk_common_release(sk);
+ goto out;
+ }
+ }
if (sk->sk_prot->init) {
rc = sk->sk_prot->init(sk);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 395d82754626..daad05c6f25b 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -298,6 +298,7 @@ config SYN_COOKIES
config NET_IPVTI
tristate "Virtual (secure) IP: tunneling"
+ depends on IPV6 || IPV6=n
select INET_TUNNEL
select NET_IP_TUNNEL
depends on INET_XFRM_MODE_TUNNEL
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 55eff963d1fe..a35c252235ae 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -89,7 +89,6 @@
#include <linux/netfilter_ipv4.h>
#include <linux/random.h>
#include <linux/slab.h>
-#include <linux/netfilter/xt_qtaguid.h>
#include <asm/uaccess.h>
@@ -388,13 +387,27 @@ lookup_protocol:
*/
inet->inet_sport = htons(inet->inet_num);
/* Add to protocol hash chains. */
- sk->sk_prot->hash(sk);
+ err = sk->sk_prot->hash(sk);
+ if (err) {
+ sk_common_release(sk);
+ goto out;
+ }
}
if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk);
- if (err)
+ if (err) {
sk_common_release(sk);
+ goto out;
+ }
+ }
+
+ if (!kern) {
+ err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
+ if (err) {
+ sk_common_release(sk);
+ goto out;
+ }
}
out:
return err;
@@ -416,9 +429,6 @@ int inet_release(struct socket *sock)
if (sk) {
long timeout;
-#ifdef CONFIG_NETFILTER_XT_MATCH_QTAGUID
- qtaguid_untag(sock, true);
-#endif
/* Applications forget to leave groups before exiting */
ip_mc_drop_socket(sk);
@@ -1163,8 +1173,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
* Besides that, it does not check for connection
* uniqueness. Wait for troubles.
*/
- __sk_prot_rehash(sk);
- return 0;
+ return __sk_prot_rehash(sk);
}
int inet_sk_rebuild_header(struct sock *sk)
@@ -1802,6 +1811,10 @@ static int __init inet_init(void)
tcp_v4_init();
+ /* Initialise per-cpu ipv4 mibs */
+ if (init_ipv4_mibs())
+ panic("%s: Cannot init ipv4 mibs\n", __func__);
+
/* Setup TCP slab cache for open requests. */
tcp_init();
@@ -1830,12 +1843,6 @@ static int __init inet_init(void)
if (init_inet_pernet_ops())
pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);
- /*
- * Initialise per-cpu ipv4 mibs
- */
-
- if (init_ipv4_mibs())
- pr_crit("%s: Cannot init ipv4 mibs\n", __func__);
ipv4_proc_init();
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index bfa79831873f..f1e30ff3cfd7 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -743,6 +743,14 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
goto out;
/*
+ * For some 802.11 wireless deployments (and possibly other networks),
+ * there will be an ARP proxy and gratuitous ARP frames are attacks
+ * and thus should not be accepted.
+ */
+ if (sip == tip && IN_DEV_ORCONF(in_dev, DROP_GRATUITOUS_ARP))
+ goto out;
+
+/*
* Special case: We must set Frame Relay source Q.922 address
*/
if (dev_type == ARPHRD_DLCI)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 7fe643062013..e798e27b3c7d 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -557,6 +557,7 @@ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def)
kfree(doi_def->map.std->lvl.local);
kfree(doi_def->map.std->cat.cipso);
kfree(doi_def->map.std->cat.local);
+ kfree(doi_def->map.std);
break;
}
kfree(doi_def);
@@ -1343,7 +1344,8 @@ static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def,
return ret_val;
}
- secattr->flags |= NETLBL_SECATTR_MLS_CAT;
+ if (secattr->attr.mls.cat)
+ secattr->flags |= NETLBL_SECATTR_MLS_CAT;
}
return 0;
@@ -1524,7 +1526,8 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def,
return ret_val;
}
- secattr->flags |= NETLBL_SECATTR_MLS_CAT;
+ if (secattr->attr.mls.cat)
+ secattr->flags |= NETLBL_SECATTR_MLS_CAT;
}
return 0;
@@ -1809,6 +1812,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
{
unsigned char optbuf[sizeof(struct ip_options) + 40];
struct ip_options *opt = (struct ip_options *)optbuf;
+ int res;
if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
return;
@@ -1820,7 +1824,11 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
memset(opt, 0, sizeof(struct ip_options));
opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
- if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL))
+ rcu_read_lock();
+ res = __ip_options_compile(dev_net(skb->dev), opt, skb, NULL);
+ rcu_read_unlock();
+
+ if (res)
return;
if (gateway)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e9df88f8579c..7accf1ebe947 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -262,6 +262,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
err = devinet_sysctl_register(in_dev);
if (err) {
in_dev->dead = 1;
+ neigh_parms_release(&arp_tbl, in_dev->arp_parms);
in_dev_put(in_dev);
in_dev = NULL;
goto out;
@@ -560,12 +561,15 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
return NULL;
}
-static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
+static int ip_mc_autojoin_config(struct net *net, bool join,
+ const struct in_ifaddr *ifa)
{
+#if defined(CONFIG_IP_MULTICAST)
struct ip_mreqn mreq = {
.imr_multiaddr.s_addr = ifa->ifa_address,
.imr_ifindex = ifa->ifa_dev->dev->ifindex,
};
+ struct sock *sk = net->ipv4.mc_autojoin_sk;
int ret;
ASSERT_RTNL();
@@ -578,6 +582,9 @@ static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
release_sock(sk);
return ret;
+#else
+ return -EOPNOTSUPP;
+#endif
}
static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -617,7 +624,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
continue;
if (ipv4_is_multicast(ifa->ifa_address))
- ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
+ ip_mc_autojoin_config(net, false, ifa);
__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
return 0;
}
@@ -873,8 +880,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
*/
set_ifa_lifetime(ifa, valid_lft, prefered_lft);
if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
- int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
- true, ifa);
+ int ret = ip_mc_autojoin_config(net, true, ifa);
if (ret < 0) {
inet_free_ifa(ifa);
@@ -1364,11 +1370,6 @@ skip:
}
}
-static bool inetdev_valid_mtu(unsigned int mtu)
-{
- return mtu >= IPV4_MIN_MTU;
-}
-
static void inetdev_send_gratuitous_arp(struct net_device *dev,
struct in_device *in_dev)
@@ -1814,7 +1815,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
+ skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
if (!skb)
goto errout;
@@ -1826,7 +1827,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
return;
errout:
if (err < 0)
@@ -1883,7 +1884,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
}
err = -ENOBUFS;
- skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
+ skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_KERNEL);
if (!skb)
goto errout;
@@ -2007,16 +2008,16 @@ static void inet_forward_change(struct net *net)
for_each_netdev(net, dev) {
struct in_device *in_dev;
+
if (on)
dev_disable_lro(dev);
- rcu_read_lock();
- in_dev = __in_dev_get_rcu(dev);
+
+ in_dev = __in_dev_get_rtnl(dev);
if (in_dev) {
IN_DEV_CONF_SET(in_dev, FORWARDING, on);
inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
dev->ifindex, &in_dev->cnf);
}
- rcu_read_unlock();
}
}
@@ -2197,6 +2198,8 @@ static struct devinet_sysctl_table {
"igmpv3_unsolicited_report_interval"),
DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
"ignore_routes_with_linkdown"),
+ DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
+ "drop_gratuitous_arp"),
DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
@@ -2204,6 +2207,8 @@ static struct devinet_sysctl_table {
"promote_secondaries"),
DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
"route_localnet"),
+ DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
+ "drop_unicast_in_l2_multicast"),
DEVINET_SYSCTL_RW_ENTRY(NF_IPV4_DEFRAG_SKIP,
"nf_ipv4_defrag_skip"),
},
@@ -2238,7 +2243,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
free:
kfree(t);
out:
- return -ENOBUFS;
+ return -ENOMEM;
}
static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8dc9073d4a76..c01149331f46 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -299,7 +299,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
.flowi4_iif = LOOPBACK_IFINDEX,
.flowi4_oif = l3mdev_master_ifindex_rcu(dev),
.daddr = ip_hdr(skb)->saddr,
- .flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
+ .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK,
.flowi4_scope = scope,
.flowi4_mark = vmark ? skb->mark : 0,
};
@@ -509,6 +509,7 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
if (!dev)
return -ENODEV;
cfg->fc_oif = dev->ifindex;
+ cfg->fc_table = l3mdev_fib_table(dev);
if (colon) {
struct in_ifaddr *ifa;
struct in_device *in_dev = __in_dev_get_rtnl(dev);
@@ -1035,7 +1036,7 @@ no_promotions:
* First of all, we scan fib_info list searching
* for stray nexthop entries, then ignite fib_flush.
*/
- if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
+ if (fib_sync_down_addr(dev, ifa->ifa_local))
fib_flush(dev_net(dev));
}
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3109b9bb95d2..3c1e42d49520 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -776,7 +776,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
if (fl4.flowi4_scope < RT_SCOPE_LINK)
fl4.flowi4_scope = RT_SCOPE_LINK;
- if (cfg->fc_table)
+ if (cfg->fc_table && cfg->fc_table != RT_TABLE_MAIN)
tbl = fib_get_table(net, cfg->fc_table);
if (tbl)
@@ -1069,6 +1069,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
fi->fib_priority = cfg->fc_priority;
fi->fib_prefsrc = cfg->fc_prefsrc;
fi->fib_type = cfg->fc_type;
+ fi->fib_tb_id = cfg->fc_table;
fi->fib_nhs = nhs;
change_nexthops(fi) {
@@ -1352,18 +1353,21 @@ nla_put_failure:
* referring to it.
* - device went down -> we must shutdown all nexthops going via it.
*/
-int fib_sync_down_addr(struct net *net, __be32 local)
+int fib_sync_down_addr(struct net_device *dev, __be32 local)
{
int ret = 0;
unsigned int hash = fib_laddr_hashfn(local);
struct hlist_head *head = &fib_info_laddrhash[hash];
+ int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
+ struct net *net = dev_net(dev);
struct fib_info *fi;
if (!fib_info_laddrhash || local == 0)
return 0;
hlist_for_each_entry(fi, head, fib_lhash) {
- if (!net_eq(fi->fib_net, net))
+ if (!net_eq(fi->fib_net, net) ||
+ fi->fib_tb_id != tb_id)
continue;
if (fi->fib_prefsrc == local) {
fi->fib_flags |= RTNH_F_DEAD;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 9b14f8958dcc..e3fdd7ac20bf 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1696,7 +1696,7 @@ struct fib_table *fib_trie_unmerge(struct fib_table *oldtb)
while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
struct key_vector *local_l = NULL, *local_tp = NULL;
- hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
+ hlist_for_each_entry(fa, &l->leaf, fa_list) {
struct fib_alias *new_fa;
if (local_tb->tb_id != fa->tb_id)
@@ -1714,8 +1714,10 @@ struct fib_table *fib_trie_unmerge(struct fib_table *oldtb)
local_l = fib_find_node(lt, &local_tp, l->key);
if (fib_insert_alias(lt, local_tp, local_l, new_fa,
- NULL, l->key))
+ NULL, l->key)) {
+ kmem_cache_free(fn_alias_kmem, new_fa);
goto out;
+ }
}
/* stop loop if key wrapped back to 0 */
@@ -2230,6 +2232,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
" %Zd bytes, size of tnode: %Zd bytes.\n",
LEAF_SIZE, TNODE_SIZE(0));
+ rcu_read_lock();
for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
struct hlist_head *head = &net->ipv4.fib_table_hash[h];
struct fib_table *tb;
@@ -2249,7 +2252,9 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
trie_show_usage(seq, t->stats);
#endif
}
+ cond_resched_rcu();
}
+ rcu_read_unlock();
return 0;
}
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index b5a137338e50..7ac370505e44 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -205,6 +205,9 @@ static struct sk_buff **fou_gro_receive(struct sk_buff **head,
*/
NAPI_GRO_CB(skb)->encap_mark = 0;
+ /* Flag this frame as already having an outer encap header */
+ NAPI_GRO_CB(skb)->is_fou = 1;
+
rcu_read_lock();
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
ops = rcu_dereference(offloads[proto]);
@@ -372,6 +375,9 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
*/
NAPI_GRO_CB(skb)->encap_mark = 0;
+ /* Flag this frame as already having an outer encap header */
+ NAPI_GRO_CB(skb)->is_fou = 1;
+
rcu_read_lock();
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
ops = rcu_dereference(offloads[guehdr->proto_ctype]);
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 79ae0d7becbf..d9268af2ea44 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -151,6 +151,14 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
if ((greh->flags & ~(GRE_KEY|GRE_CSUM)) != 0)
goto out;
+ /* We can only support GRE_CSUM if we can track the location of
+ * the GRE header. In the case of FOU/GUE we cannot because the
+ * outer UDP header displaces the GRE header leaving us in a state
+ * of limbo.
+ */
+ if ((greh->flags & GRE_CSUM) && NAPI_GRO_CB(skb)->is_fou)
+ goto out;
+
type = greh->protocol;
rcu_read_lock();
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index a51f0dd6a49e..2c14d607a683 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -246,7 +246,7 @@ static struct {
/**
* icmp_global_allow - Are we allowed to send one more ICMP message ?
*
- * Uses a token bucket to limit our ICMP messages to sysctl_icmp_msgs_per_sec.
+ * Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec.
* Returns false if we reached the limit and can not send another packet.
* Note: called with BH disabled
*/
@@ -256,10 +256,11 @@ bool icmp_global_allow(void)
bool rc = false;
/* Check if token bucket is empty and cannot be refilled
- * without taking the spinlock.
+ * without taking the spinlock. The READ_ONCE() are paired
+ * with the following WRITE_ONCE() in this same function.
*/
- if (!icmp_global.credit) {
- delta = min_t(u32, now - icmp_global.stamp, HZ);
+ if (!READ_ONCE(icmp_global.credit)) {
+ delta = min_t(u32, now - READ_ONCE(icmp_global.stamp), HZ);
if (delta < HZ / 50)
return false;
}
@@ -269,14 +270,17 @@ bool icmp_global_allow(void)
if (delta >= HZ / 50) {
incr = sysctl_icmp_msgs_per_sec * delta / HZ ;
if (incr)
- icmp_global.stamp = now;
+ WRITE_ONCE(icmp_global.stamp, now);
}
credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst);
if (credit) {
- credit--;
+ /* We want to use a credit of one in average, but need to randomize
+ * it for security reasons.
+ */
+ credit = max_t(int, credit - prandom_u32_max(3), 0);
rc = true;
}
- icmp_global.credit = credit;
+ WRITE_ONCE(icmp_global.credit, credit);
spin_unlock(&icmp_global.lock);
return rc;
}
@@ -429,7 +433,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
fl4.flowi4_proto = IPPROTO_ICMP;
fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
- security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
+ security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4));
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
goto out_unlock;
@@ -457,6 +461,23 @@ static int icmp_multipath_hash_skb(const struct sk_buff *skb)
#endif
+/*
+ * The device used for looking up which routing table to use for sending an ICMP
+ * error is preferably the source whenever it is set, which should ensure the
+ * icmp error can be sent to the source host, else lookup using the routing
+ * table of the destination device, else use the main routing table (index 0).
+ */
+static struct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb)
+{
+ struct net_device *route_lookup_dev = NULL;
+
+ if (skb->dev)
+ route_lookup_dev = skb->dev;
+ else if (skb_dst(skb))
+ route_lookup_dev = skb_dst(skb)->dev;
+ return route_lookup_dev;
+}
+
static struct rtable *icmp_route_lookup(struct net *net,
struct flowi4 *fl4,
struct sk_buff *skb_in,
@@ -465,6 +486,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
int type, int code,
struct icmp_bxm *param)
{
+ struct net_device *route_lookup_dev;
struct rtable *rt, *rt2;
struct flowi4 fl4_dec;
int err;
@@ -479,9 +501,10 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
- fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev);
+ route_lookup_dev = icmp_get_route_lookup_dev(skb_in);
+ fl4->flowi4_oif = l3mdev_master_ifindex(route_lookup_dev);
- security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
+ security_skb_classify_flow(skb_in, flowi4_to_flowi_common(fl4));
rt = __ip_route_output_key_hash(net, fl4,
icmp_multipath_hash_skb(skb_in));
if (IS_ERR(rt))
@@ -504,7 +527,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
if (err)
goto relookup_failed;
- if (inet_addr_type_dev_table(net, skb_in->dev,
+ if (inet_addr_type_dev_table(net, route_lookup_dev,
fl4_dec.saddr) == RTN_LOCAL) {
rt2 = __ip_route_output_key(net, &fl4_dec);
if (IS_ERR(rt2))
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c67efa3e79dd..7b0bbda676b3 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2631,6 +2631,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u
rv = 1;
} else if (im) {
if (src_addr) {
+ spin_lock_bh(&im->lock);
for (psf = im->sources; psf; psf = psf->sf_next) {
if (psf->sf_inaddr == src_addr)
break;
@@ -2641,6 +2642,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u
im->sfcount[MCAST_EXCLUDE];
else
rv = im->sfcount[MCAST_EXCLUDE] != 0;
+ spin_unlock_bh(&im->lock);
} else
rv = 1; /* unspecified source; tentatively allow */
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 0b6a72428bc2..fa9df2e6d330 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -24,6 +24,7 @@
#include <net/tcp_states.h>
#include <net/xfrm.h>
#include <net/tcp.h>
+#include <net/sock_reuseport.h>
#ifdef INET_CSK_DEBUG
const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
@@ -67,7 +68,8 @@ int inet_csk_bind_conflict(const struct sock *sk,
if ((!reuse || !sk2->sk_reuse ||
sk2->sk_state == TCP_LISTEN) &&
(!reuseport || !sk2->sk_reuseport ||
- (sk2->sk_state != TCP_TIME_WAIT &&
+ rcu_access_pointer(sk->sk_reuseport_cb) ||
+ (sk2->sk_state != TCP_TIME_WAIT &&
!uid_eq(uid, sock_i_uid(sk2))))) {
if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
@@ -87,6 +89,31 @@ int inet_csk_bind_conflict(const struct sock *sk,
}
EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
+void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
+ struct sock *sk)
+{
+ kuid_t uid = sock_i_uid(sk);
+
+ if (hlist_empty(&tb->owners)) {
+ if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
+ tb->fastreuse = 1;
+ else
+ tb->fastreuse = 0;
+ if (sk->sk_reuseport) {
+ tb->fastreuseport = 1;
+ tb->fastuid = uid;
+ } else
+ tb->fastreuseport = 0;
+ } else {
+ if (tb->fastreuse &&
+ (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
+ tb->fastreuse = 0;
+ if (tb->fastreuseport &&
+ (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid)))
+ tb->fastreuseport = 0;
+ }
+}
+
/* Obtain a reference to a local port for the given sock,
* if snum is zero it means select any available local port.
*/
@@ -132,6 +159,7 @@ again:
sk->sk_state != TCP_LISTEN) ||
(tb->fastreuseport > 0 &&
sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
uid_eq(tb->fastuid, uid))) &&
(tb->num_owners < smallest_size || smallest_size == -1)) {
smallest_size = tb->num_owners;
@@ -200,15 +228,18 @@ tb_found:
if (((tb->fastreuse > 0 &&
sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
(tb->fastreuseport > 0 &&
- sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
- smallest_size == -1) {
+ sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ uid_eq(tb->fastuid, uid))) && smallest_size == -1) {
goto success;
} else {
ret = 1;
if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
(tb->fastreuseport > 0 &&
- sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
+ sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ uid_eq(tb->fastuid, uid))) &&
smallest_size != -1 && --attempts >= 0) {
spin_unlock(&head->lock);
goto again;
@@ -223,24 +254,9 @@ tb_not_found:
if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep,
net, head, snum)) == NULL)
goto fail_unlock;
- if (hlist_empty(&tb->owners)) {
- if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
- tb->fastreuse = 1;
- else
- tb->fastreuse = 0;
- if (sk->sk_reuseport) {
- tb->fastreuseport = 1;
- tb->fastuid = uid;
- } else
- tb->fastreuseport = 0;
- } else {
- if (tb->fastreuse &&
- (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
- tb->fastreuse = 0;
- if (tb->fastreuseport &&
- (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid)))
- tb->fastreuseport = 0;
- }
+
+ inet_csk_update_fastreuse(tb, sk);
+
success:
if (!inet_csk(sk)->icsk_bind_hash)
inet_bind_hash(sk, tb, snum);
@@ -430,7 +446,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
(opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
ireq->ir_loc_addr, ireq->ir_rmt_port,
htons(ireq->ir_num), sk->sk_uid);
- security_req_classify_flow(req, flowi4_to_flowi(fl4));
+ security_req_classify_flow(req, flowi4_to_flowi_common(fl4));
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
goto no_route;
@@ -466,7 +482,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
(opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
ireq->ir_loc_addr, ireq->ir_rmt_port,
htons(ireq->ir_num), sk->sk_uid);
- security_req_classify_flow(req, flowi4_to_flowi(fl4));
+ security_req_classify_flow(req, flowi4_to_flowi_common(fl4));
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
goto no_route;
@@ -745,6 +761,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet = inet_sk(sk);
+ int err = -EADDRINUSE;
reqsk_queue_alloc(&icsk->icsk_accept_queue);
@@ -762,13 +779,14 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
inet->inet_sport = htons(inet->inet_num);
sk_dst_reset(sk);
- sk->sk_prot->hash(sk);
+ err = sk->sk_prot->hash(sk);
- return 0;
+ if (likely(!err))
+ return 0;
}
sk->sk_state = TCP_CLOSE;
- return -EADDRINUSE;
+ return err;
}
EXPORT_SYMBOL_GPL(inet_csk_listen_start);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index fcb83b2a61f0..1582fe5b04c3 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -370,18 +370,18 @@ struct sock *inet_diag_find_one_icsk(struct net *net,
struct sock *sk;
if (req->sdiag_family == AF_INET)
- sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0],
+ sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0],
req->id.idiag_dport, req->id.idiag_src[0],
req->id.idiag_sport, req->id.idiag_if);
#if IS_ENABLED(CONFIG_IPV6)
else if (req->sdiag_family == AF_INET6) {
if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
- sk = inet_lookup(net, hashinfo, req->id.idiag_dst[3],
+ sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[3],
req->id.idiag_dport, req->id.idiag_src[3],
req->id.idiag_sport, req->id.idiag_if);
else
- sk = inet6_lookup(net, hashinfo,
+ sk = inet6_lookup(net, hashinfo, NULL, 0,
(struct in6_addr *)req->id.idiag_dst,
req->id.idiag_dport,
(struct in6_addr *)req->id.idiag_src,
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index ccc5980797fc..b947e4c9be18 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -20,10 +20,12 @@
#include <linux/wait.h>
#include <linux/vmalloc.h>
+#include <net/addrconf.h>
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/secure_seq.h>
#include <net/ip.h>
+#include <net/sock_reuseport.h>
static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
const __u16 lport, const __be32 faddr,
@@ -160,6 +162,7 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
return -ENOMEM;
}
}
+ inet_csk_update_fastreuse(tb, child);
}
inet_bind_hash(child, tb, port);
spin_unlock(&head->lock);
@@ -205,6 +208,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
struct sock *__inet_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
const __be32 saddr, __be16 sport,
const __be32 daddr, const unsigned short hnum,
const int dif)
@@ -214,6 +218,7 @@ struct sock *__inet_lookup_listener(struct net *net,
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
int score, hiscore, matches = 0, reuseport = 0;
+ bool select_ok = true;
u32 phash = 0;
rcu_read_lock();
@@ -229,6 +234,15 @@ begin:
if (reuseport) {
phash = inet_ehashfn(net, daddr, hnum,
saddr, sport);
+ if (select_ok) {
+ struct sock *sk2;
+ sk2 = reuseport_select_sock(sk, phash,
+ skb, doff);
+ if (sk2) {
+ result = sk2;
+ goto found;
+ }
+ }
matches = 1;
}
} else if (score == hiscore && reuseport) {
@@ -246,11 +260,13 @@ begin:
if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
goto begin;
if (result) {
+found:
if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
result = NULL;
else if (unlikely(compute_score(result, net, hnum, daddr,
dif) < hiscore)) {
sock_put(result);
+ select_ok = false;
goto begin;
}
}
@@ -449,32 +465,73 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
}
EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
-void __inet_hash(struct sock *sk, struct sock *osk)
+static int inet_reuseport_add_sock(struct sock *sk,
+ struct inet_listen_hashbucket *ilb,
+ int (*saddr_same)(const struct sock *sk1,
+ const struct sock *sk2,
+ bool match_wildcard))
+{
+ struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
+ struct sock *sk2;
+ struct hlist_nulls_node *node;
+ kuid_t uid = sock_i_uid(sk);
+
+ sk_nulls_for_each_rcu(sk2, node, &ilb->head) {
+ if (sk2 != sk &&
+ sk2->sk_family == sk->sk_family &&
+ ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
+ sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
+ inet_csk(sk2)->icsk_bind_hash == tb &&
+ sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
+ saddr_same(sk, sk2, false))
+ return reuseport_add_sock(sk, sk2);
+ }
+
+ return reuseport_alloc(sk);
+}
+
+int __inet_hash(struct sock *sk, struct sock *osk,
+ int (*saddr_same)(const struct sock *sk1,
+ const struct sock *sk2,
+ bool match_wildcard))
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_listen_hashbucket *ilb;
+ int err = 0;
if (sk->sk_state != TCP_LISTEN) {
inet_ehash_nolisten(sk, osk);
- return;
+ return 0;
}
WARN_ON(!sk_unhashed(sk));
ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
spin_lock(&ilb->lock);
+ if (sk->sk_reuseport) {
+ err = inet_reuseport_add_sock(sk, ilb, saddr_same);
+ if (err)
+ goto unlock;
+ }
__sk_nulls_add_node_rcu(sk, &ilb->head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+unlock:
spin_unlock(&ilb->lock);
+
+ return err;
}
EXPORT_SYMBOL(__inet_hash);
-void inet_hash(struct sock *sk)
+int inet_hash(struct sock *sk)
{
+ int err = 0;
+
if (sk->sk_state != TCP_CLOSE) {
local_bh_disable();
- __inet_hash(sk, NULL);
+ err = __inet_hash(sk, NULL, ipv4_rcv_saddr_equal);
local_bh_enable();
}
+
+ return err;
}
EXPORT_SYMBOL_GPL(inet_hash);
@@ -493,6 +550,8 @@ void inet_unhash(struct sock *sk)
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
spin_lock_bh(lock);
+ if (rcu_access_pointer(sk->sk_reuseport_cb))
+ reuseport_detach_sock(sk);
done = __sk_nulls_del_node_init_rcu(sk);
if (done)
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 3e4184088082..63f7bacf628a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -399,7 +399,10 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
iph->saddr, iph->daddr, tpi->key);
if (tunnel) {
- skb_pop_mac_header(skb);
+ if (tunnel->dev->type != ARPHRD_NONE)
+ skb_pop_mac_header(skb);
+ else
+ skb_reset_mac_header(skb);
if (tunnel->collect_md) {
__be16 flags;
__be64 tun_id;
@@ -499,6 +502,10 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
bool csum)
{
+ unsigned char *skb_checksum_start = skb->head + skb->csum_start;
+
+ if (csum && skb_checksum_start < skb->data)
+ return ERR_PTR(-EINVAL);
return iptunnel_handle_offloads(skb, csum,
csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
}
@@ -520,7 +527,8 @@ static struct rtable *gre_get_rt(struct sk_buff *skb,
return ip_route_output_key(net, fl);
}
-static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
+static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
+ __be16 proto)
{
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
@@ -563,7 +571,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
}
flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
- build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB),
+ build_header(skb, tunnel_hlen, flags, proto,
tunnel_id_to_key(tun_info->key.tun_id), 0);
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
@@ -605,7 +613,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
const struct iphdr *tnl_params;
if (tunnel->collect_md) {
- gre_fb_xmit(skb, dev);
+ gre_fb_xmit(skb, dev, skb->protocol);
return NETDEV_TX_OK;
}
@@ -649,7 +657,7 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
struct ip_tunnel *tunnel = netdev_priv(dev);
if (tunnel->collect_md) {
- gre_fb_xmit(skb, dev);
+ gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
return NETDEV_TX_OK;
}
@@ -851,9 +859,16 @@ static void __gre_tunnel_init(struct net_device *dev)
dev->hw_features |= GRE_FEATURES;
if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
- /* TCP offload with GRE SEQ is not supported. */
- dev->features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ /* TCP offload with GRE SEQ is not supported, nor
+ * can we support 2 levels of outer headers requiring
+ * an update.
+ */
+ if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
+ (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ }
+
/* Can use a lockless transmit, unless we generate
* output sequences
*/
@@ -875,7 +890,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
netif_keep_dst(dev);
dev->addr_len = 4;
- if (iph->daddr) {
+ if (iph->daddr && !tunnel->collect_md) {
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (ipv4_is_multicast(iph->daddr)) {
if (!iph->saddr)
@@ -884,8 +899,9 @@ static int ipgre_tunnel_init(struct net_device *dev)
dev->header_ops = &ipgre_header_ops;
}
#endif
- } else
+ } else if (!tunnel->collect_md) {
dev->header_ops = &ipgre_header_ops;
+ }
return ip_tunnel_init(dev);
}
@@ -928,6 +944,11 @@ static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
if (flags & (GRE_VERSION|GRE_ROUTING))
return -EINVAL;
+ if (data[IFLA_GRE_COLLECT_METADATA] &&
+ data[IFLA_GRE_ENCAP_TYPE] &&
+ nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
+ return -EINVAL;
+
return 0;
}
@@ -1001,6 +1022,8 @@ static void ipgre_netlink_parms(struct net_device *dev,
struct ip_tunnel *t = netdev_priv(dev);
t->collect_md = true;
+ if (dev->type == ARPHRD_IPGRE)
+ dev->type = ARPHRD_NONE;
}
}
@@ -1230,6 +1253,7 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
{
struct nlattr *tb[IFLA_MAX + 1];
struct net_device *dev;
+ LIST_HEAD(list_kill);
struct ip_tunnel *t;
int err;
@@ -1245,8 +1269,10 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
t->collect_md = true;
err = ipgre_newlink(net, dev, tb, NULL);
- if (err < 0)
- goto out;
+ if (err < 0) {
+ free_netdev(dev);
+ return ERR_PTR(err);
+ }
/* openvswitch users expect packet sizes to be unrestricted,
* so set the largest MTU we can.
@@ -1255,9 +1281,14 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
if (err)
goto out;
+ err = rtnl_configure_link(dev, NULL);
+ if (err < 0)
+ goto out;
+
return dev;
out:
- free_netdev(dev);
+ ip_tunnel_dellink(dev, &list_kill);
+ unregister_netdevice_many(&list_kill);
return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index eb1834f2682f..317c77eae4a3 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -359,8 +359,31 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
rt = skb_rtable(skb);
if (rt->rt_type == RTN_MULTICAST) {
IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INMCAST, skb->len);
- } else if (rt->rt_type == RTN_BROADCAST)
+ } else if (rt->rt_type == RTN_BROADCAST) {
IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INBCAST, skb->len);
+ } else if (skb->pkt_type == PACKET_BROADCAST ||
+ skb->pkt_type == PACKET_MULTICAST) {
+ struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+
+ /* RFC 1122 3.3.6:
+ *
+ * When a host sends a datagram to a link-layer broadcast
+ * address, the IP destination address MUST be a legal IP
+ * broadcast or IP multicast address.
+ *
+ * A host SHOULD silently discard a datagram that is received
+ * via a link-layer broadcast (see Section 2.4) but does not
+ * specify an IP multicast or broadcast destination address.
+ *
+ * This doesn't explicitly say L2 *broadcast*, but broadcast is
+ * in a way a form of multicast and the most common use case for
+ * this is 802.11 protecting against cross-station spoofing (the
+ * so-called "hole-196" attack) so do it for both.
+ */
+ if (in_dev &&
+ IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST))
+ goto drop;
+ }
return dst_input(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c2380bb1fdab..a0739ef47227 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -73,6 +73,8 @@
#include <net/icmp.h>
#include <net/checksum.h>
#include <net/inetpeer.h>
+#include <net/inet_ecn.h>
+#include <linux/bpf-cgroup.h>
#include <linux/igmp.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_bridge.h>
@@ -154,12 +156,19 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
iph->saddr = saddr;
iph->protocol = sk->sk_protocol;
- if (ip_dont_fragment(sk, &rt->dst)) {
+ /* Do not bother generating IPID for small packets (eg SYNACK) */
+ if (skb->len <= IPV4_MIN_MTU || ip_dont_fragment(sk, &rt->dst)) {
iph->frag_off = htons(IP_DF);
iph->id = 0;
} else {
iph->frag_off = 0;
- __ip_select_ident(net, iph, 1);
+ /* TCP packets here are SYNACK with fat IPv4/TCP options.
+ * Avoid using the hashed IP ident generator.
+ */
+ if (sk->sk_protocol == IPPROTO_TCP)
+ iph->id = (__force __be16)prandom_u32();
+ else
+ __ip_select_ident(net, iph, 1);
}
if (opt && opt->opt.optlen) {
@@ -270,6 +279,13 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
unsigned int mtu;
+ int ret;
+
+ ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+ if (ret) {
+ kfree_skb(skb);
+ return ret;
+ }
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
@@ -282,12 +298,26 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
if (skb_is_gso(skb))
return ip_finish_output_gso(net, sk, skb, mtu);
- if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU))
+ if (skb->len > mtu || IPCB(skb)->frag_max_size)
return ip_fragment(net, sk, skb, mtu, ip_finish_output2);
return ip_finish_output2(net, sk, skb);
}
+static int ip_mc_finish_output(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ int ret;
+
+ ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+ if (ret) {
+ kfree_skb(skb);
+ return ret;
+ }
+
+ return dev_loopback_xmit(net, sk, skb);
+}
+
int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct rtable *rt = skb_rtable(skb);
@@ -325,7 +355,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (newskb)
NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
net, sk, newskb, NULL, newskb->dev,
- dev_loopback_xmit);
+ ip_mc_finish_output);
}
/* Multicasts with ttl 0 must not go beyond the host */
@@ -341,7 +371,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (newskb)
NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
net, sk, newskb, NULL, newskb->dev,
- dev_loopback_xmit);
+ ip_mc_finish_output);
}
return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
@@ -375,8 +405,9 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
{
BUILD_BUG_ON(offsetof(typeof(*fl4), daddr) !=
offsetof(typeof(*fl4), saddr) + sizeof(fl4->saddr));
- memcpy(&iph->saddr, &fl4->saddr,
- sizeof(fl4->saddr) + sizeof(fl4->daddr));
+
+ iph->saddr = fl4->saddr;
+ iph->daddr = fl4->daddr;
}
/* Note: skb->sk can be different from sk, in case of tunnels */
@@ -1145,13 +1176,17 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
rt = *rtp;
if (unlikely(!rt))
return -EFAULT;
- /*
- * We steal reference to this route, caller should not release it
- */
- *rtp = NULL;
+
cork->fragsize = ip_sk_use_pmtu(sk) ?
- dst_mtu(&rt->dst) : rt->dst.dev->mtu;
+ dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu);
+
+ if (!inetdev_valid_mtu(cork->fragsize))
+ return -ENETUNREACH;
+
cork->dst = &rt->dst;
+ /* We stole this route, caller should not release it. */
+ *rtp = NULL;
+
cork->length = 0;
cork->ttl = ipc->ttl;
cork->tos = ipc->tos;
@@ -1589,12 +1624,12 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
daddr, saddr,
tcp_hdr(skb)->source, tcp_hdr(skb)->dest,
arg->uid);
- security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
+ security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4));
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
return;
- inet_sk(sk)->tos = arg->tos;
+ inet_sk(sk)->tos = arg->tos & ~INET_ECN_MASK;
sk->sk_priority = skb->priority;
sk->sk_protocol = ip_hdr(skb)->protocol;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 1ea36bf778e6..9a7b60d6c670 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -279,9 +279,12 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc,
ipc->ttl = val;
break;
case IP_TOS:
- if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+ if (cmsg->cmsg_len == CMSG_LEN(sizeof(int)))
+ val = *(int *)CMSG_DATA(cmsg);
+ else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8)))
+ val = *(u8 *)CMSG_DATA(cmsg);
+ else
return -EINVAL;
- val = *(int *)CMSG_DATA(cmsg);
if (val < 0 || val > 255)
return -EINVAL;
ipc->tos = val;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 91ae061d46ac..dc92780f9e8c 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -98,9 +98,10 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
__be32 remote, __be32 local,
__be32 key)
{
- unsigned int hash;
struct ip_tunnel *t, *cand = NULL;
struct hlist_head *head;
+ struct net_device *ndev;
+ unsigned int hash;
hash = ip_tunnel_hash(key, remote);
head = &itn->tunnels[hash];
@@ -155,11 +156,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
cand = t;
}
- if (flags & TUNNEL_NO_KEY)
- goto skip_key_lookup;
-
hlist_for_each_entry_rcu(t, head, hash_node) {
- if (t->parms.i_key != key ||
+ if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
t->parms.iph.saddr != 0 ||
t->parms.iph.daddr != 0 ||
!(t->dev->flags & IFF_UP))
@@ -171,7 +169,6 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
cand = t;
}
-skip_key_lookup:
if (cand)
return cand;
@@ -179,8 +176,9 @@ skip_key_lookup:
if (t)
return t;
- if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
- return netdev_priv(itn->fb_tunnel_dev);
+ ndev = READ_ONCE(itn->fb_tunnel_dev);
+ if (ndev && ndev->flags & IFF_UP)
+ return netdev_priv(ndev);
return NULL;
}
@@ -710,7 +708,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
- if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
+ df = tnl_params->frag_off;
+ if (skb->protocol == htons(ETH_P_IP))
+ df |= (inner_iph->frag_off & htons(IP_DF));
+
+ if (tnl_update_pmtu(dev, skb, rt, df, inner_iph)) {
ip_rt_put(rt);
goto tx_error;
}
@@ -738,10 +740,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
ttl = ip4_dst_hoplimit(&rt->dst);
}
- df = tnl_params->frag_off;
- if (skb->protocol == htons(ETH_P_IP))
- df |= (inner_iph->frag_off&htons(IP_DF));
-
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
if (max_headroom > dev->needed_headroom)
@@ -1151,10 +1149,8 @@ int ip_tunnel_init(struct net_device *dev)
iph->version = 4;
iph->ihl = 5;
- if (tunnel->collect_md) {
- dev->features |= NETIF_F_NETNS_LOCAL;
+ if (tunnel->collect_md)
netif_keep_dst(dev);
- }
return 0;
}
EXPORT_SYMBOL_GPL(ip_tunnel_init);
@@ -1166,9 +1162,9 @@ void ip_tunnel_uninit(struct net_device *dev)
struct ip_tunnel_net *itn;
itn = net_generic(net, tunnel->ip_tnl_net_id);
- /* fb_tunnel_dev will be unregisted in net-exit call. */
- if (itn->fb_tunnel_dev != dev)
- ip_tunnel_del(itn, netdev_priv(dev));
+ ip_tunnel_del(itn, netdev_priv(dev));
+ if (itn->fb_tunnel_dev == dev)
+ WRITE_ONCE(itn->fb_tunnel_dev, NULL);
dst_cache_reset(&tunnel->dst_cache);
}
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 4916d1857b75..4d64aa76d285 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -407,6 +407,12 @@ static const struct lwtunnel_encap_ops ip6_tun_lwt_ops = {
void __init ip_tunnel_core_init(void)
{
+ /* If you land here, make sure whether increasing ip_tunnel_info's
+ * options_len is a reasonable choice with its usage in front ends
+ * (f.e., it's part of flow keys, etc).
+ */
+ BUILD_BUG_ON(IP_TUNNEL_OPTS_MAX != 255);
+
lwtunnel_encap_add_ops(&ip_tun_lwt_ops, LWTUNNEL_ENCAP_IP);
lwtunnel_encap_add_ops(&ip6_tun_lwt_ops, LWTUNNEL_ENCAP_IP6);
}
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index bbcbbc1cc2cc..abcf431376a0 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -51,7 +51,7 @@ static int vti_net_id __read_mostly;
static int vti_tunnel_init(struct net_device *dev);
static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
- int encap_type)
+ int encap_type, bool update_skb_dev)
{
struct ip_tunnel *tunnel;
const struct iphdr *iph = ip_hdr(skb);
@@ -66,6 +66,9 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
+ if (update_skb_dev)
+ skb->dev = tunnel->dev;
+
return xfrm_input(skb, nexthdr, spi, encap_type);
}
@@ -75,25 +78,43 @@ drop:
return 0;
}
-static int vti_input_ipip(struct sk_buff *skb, int nexthdr, __be32 spi,
- int encap_type)
+static int vti_input_proto(struct sk_buff *skb, int nexthdr, __be32 spi,
+ int encap_type)
{
- struct ip_tunnel *tunnel;
+ return vti_input(skb, nexthdr, spi, encap_type, false);
+}
+
+static int vti_rcv(struct sk_buff *skb, __be32 spi, bool update_skb_dev)
+{
+ XFRM_SPI_SKB_CB(skb)->family = AF_INET;
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
+
+ return vti_input(skb, ip_hdr(skb)->protocol, spi, 0, update_skb_dev);
+}
+
+static int vti_rcv_proto(struct sk_buff *skb)
+{
+ return vti_rcv(skb, 0, false);
+}
+
+static int vti_rcv_tunnel(struct sk_buff *skb)
+{
+ struct ip_tunnel_net *itn = net_generic(dev_net(skb->dev), vti_net_id);
const struct iphdr *iph = ip_hdr(skb);
- struct net *net = dev_net(skb->dev);
- struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
+ struct ip_tunnel *tunnel;
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
iph->saddr, iph->daddr, 0);
if (tunnel) {
+ struct tnl_ptk_info tpi = {
+ .proto = htons(ETH_P_IP),
+ };
+
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
-
- XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
-
- skb->dev = tunnel->dev;
-
- return xfrm_input(skb, nexthdr, spi, encap_type);
+ if (iptunnel_pull_header(skb, 0, tpi.proto))
+ goto drop;
+ return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, false);
}
return -EINVAL;
@@ -102,22 +123,6 @@ drop:
return 0;
}
-static int vti_rcv(struct sk_buff *skb)
-{
- XFRM_SPI_SKB_CB(skb)->family = AF_INET;
- XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
-
- return vti_input(skb, ip_hdr(skb)->protocol, 0, 0);
-}
-
-static int vti_rcv_ipip(struct sk_buff *skb)
-{
- XFRM_SPI_SKB_CB(skb)->family = AF_INET;
- XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
-
- return vti_input_ipip(skb, ip_hdr(skb)->protocol, ip_hdr(skb)->saddr, 0);
-}
-
static int vti_rcv_cb(struct sk_buff *skb, int err)
{
unsigned short family;
@@ -195,8 +200,39 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
int err;
if (!dst) {
- dev->stats.tx_carrier_errors++;
- goto tx_error_icmp;
+ switch (skb->protocol) {
+ case htons(ETH_P_IP): {
+ struct rtable *rt;
+
+ fl->u.ip4.flowi4_oif = dev->ifindex;
+ fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC;
+ rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4);
+ if (IS_ERR(rt)) {
+ dev->stats.tx_carrier_errors++;
+ goto tx_error_icmp;
+ }
+ dst = &rt->dst;
+ skb_dst_set(skb, dst);
+ break;
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ fl->u.ip6.flowi6_oif = dev->ifindex;
+ fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
+ dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6);
+ if (dst->error) {
+ dst_release(dst);
+ dst = NULL;
+ dev->stats.tx_carrier_errors++;
+ goto tx_error_icmp;
+ }
+ skb_dst_set(skb, dst);
+ break;
+#endif
+ default:
+ dev->stats.tx_carrier_errors++;
+ goto tx_error_icmp;
+ }
}
dst_hold(dst);
@@ -421,31 +457,31 @@ static void __net_init vti_fb_tunnel_init(struct net_device *dev)
}
static struct xfrm4_protocol vti_esp4_protocol __read_mostly = {
- .handler = vti_rcv,
- .input_handler = vti_input,
+ .handler = vti_rcv_proto,
+ .input_handler = vti_input_proto,
.cb_handler = vti_rcv_cb,
.err_handler = vti4_err,
.priority = 100,
};
static struct xfrm4_protocol vti_ah4_protocol __read_mostly = {
- .handler = vti_rcv,
- .input_handler = vti_input,
+ .handler = vti_rcv_proto,
+ .input_handler = vti_input_proto,
.cb_handler = vti_rcv_cb,
.err_handler = vti4_err,
.priority = 100,
};
static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = {
- .handler = vti_rcv,
- .input_handler = vti_input,
+ .handler = vti_rcv_proto,
+ .input_handler = vti_input_proto,
.cb_handler = vti_rcv_cb,
.err_handler = vti4_err,
.priority = 100,
};
static struct xfrm_tunnel ipip_handler __read_mostly = {
- .handler = vti_rcv_ipip,
+ .handler = vti_rcv_tunnel,
.err_handler = vti4_err,
.priority = 0,
};
@@ -635,10 +671,8 @@ static int __init vti_init(void)
msg = "ipip tunnel";
err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
- if (err < 0) {
- pr_info("%s: cant't register tunnel\n",__func__);
+ if (err < 0)
goto xfrm_tunnel_failed;
- }
msg = "netlink interface";
err = rtnl_link_register(&vti_link_ops);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 60f564db25a3..173777aa5add 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -890,7 +890,7 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
/*
- * Copy BOOTP-supplied string if not already set.
+ * Copy BOOTP-supplied string
*/
static int __init ic_bootp_string(char *dest, char *src, int len, int max)
{
@@ -941,12 +941,15 @@ static void __init ic_do_bootp_ext(u8 *ext)
}
break;
case 12: /* Host name */
- ic_bootp_string(utsname()->nodename, ext+1, *ext,
- __NEW_UTS_LEN);
- ic_host_name_set = 1;
+ if (!ic_host_name_set) {
+ ic_bootp_string(utsname()->nodename, ext+1, *ext,
+ __NEW_UTS_LEN);
+ ic_host_name_set = 1;
+ }
break;
case 15: /* Domain name (DNS) */
- ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain));
+ if (!ic_domain[0])
+ ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain));
break;
case 17: /* Root path */
if (!root_server_path[0])
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index cbe630aab44a..ea164fd61a7c 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -488,11 +488,12 @@ next:
return 1;
}
-static inline int check_target(struct arpt_entry *e, const char *name)
+static int check_target(struct arpt_entry *e, struct net *net, const char *name)
{
struct xt_entry_target *t = arpt_get_target(e);
int ret;
struct xt_tgchk_param par = {
+ .net = net,
.table = name,
.entryinfo = e,
.target = t->u.kernel.target,
@@ -510,8 +511,9 @@ static inline int check_target(struct arpt_entry *e, const char *name)
return 0;
}
-static inline int
-find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
+static int
+find_check_entry(struct arpt_entry *e, struct net *net, const char *name,
+ unsigned int size,
struct xt_percpu_counter_alloc_state *alloc_state)
{
struct xt_entry_target *t;
@@ -531,7 +533,7 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
}
t->u.kernel.target = target;
- ret = check_target(e, name);
+ ret = check_target(e, net, name);
if (ret)
goto err;
return 0;
@@ -632,7 +634,9 @@ static inline void cleanup_entry(struct arpt_entry *e)
/* Checks and translates the user-supplied table segment (held in
* newinfo).
*/
-static int translate_table(struct xt_table_info *newinfo, void *entry0,
+static int translate_table(struct net *net,
+ struct xt_table_info *newinfo,
+ void *entry0,
const struct arpt_replace *repl)
{
struct xt_percpu_counter_alloc_state alloc_state = { 0 };
@@ -709,7 +713,7 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
/* Finally, each sanity check must pass */
i = 0;
xt_entry_foreach(iter, entry0, newinfo->size) {
- ret = find_check_entry(iter, repl->name, repl->size,
+ ret = find_check_entry(iter, net, repl->name, repl->size,
&alloc_state);
if (ret != 0)
break;
@@ -1114,7 +1118,7 @@ static int do_replace(struct net *net, const void __user *user,
goto free_newinfo;
}
- ret = translate_table(newinfo, loc_cpu_entry, &tmp);
+ ret = translate_table(net, newinfo, loc_cpu_entry, &tmp);
if (ret != 0)
goto free_newinfo;
@@ -1301,7 +1305,8 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
}
}
-static int translate_compat_table(struct xt_table_info **pinfo,
+static int translate_compat_table(struct net *net,
+ struct xt_table_info **pinfo,
void **pentry0,
const struct compat_arpt_replace *compatr)
{
@@ -1344,6 +1349,8 @@ static int translate_compat_table(struct xt_table_info **pinfo,
if (!newinfo)
goto out_unlock;
+ memset(newinfo->entries, 0, size);
+
newinfo->number = compatr->num_entries;
for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
newinfo->hook_entry[i] = compatr->hook_entry[i];
@@ -1371,7 +1378,7 @@ static int translate_compat_table(struct xt_table_info **pinfo,
repl.num_counters = 0;
repl.counters = NULL;
repl.size = newinfo->size;
- ret = translate_table(newinfo, entry1, &repl);
+ ret = translate_table(net, newinfo, entry1, &repl);
if (ret)
goto free_newinfo;
@@ -1426,7 +1433,7 @@ static int compat_do_replace(struct net *net, void __user *user,
goto free_newinfo;
}
- ret = translate_compat_table(&newinfo, &loc_cpu_entry, &tmp);
+ ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp);
if (ret != 0)
goto free_newinfo;
@@ -1696,7 +1703,7 @@ struct xt_table *arpt_register_table(struct net *net,
loc_cpu_entry = newinfo->entries;
memcpy(loc_cpu_entry, repl->entries, repl->size);
- ret = translate_table(newinfo, loc_cpu_entry, repl);
+ ret = translate_table(net, newinfo, loc_cpu_entry, repl);
duprintf("arpt_register_table: translate table gives %d\n", ret);
if (ret != 0)
goto out_free;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 53d664a7774c..73b1d8e64658 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -964,10 +964,6 @@ copy_entries_to_user(unsigned int total_size,
return PTR_ERR(counters);
loc_cpu_entry = private->entries;
- if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
- ret = -EFAULT;
- goto free_counters;
- }
/* FIXME: use iterator macros --RR */
/* ... then go back and fix counters and names */
@@ -977,6 +973,10 @@ copy_entries_to_user(unsigned int total_size,
const struct xt_entry_target *t;
e = (struct ipt_entry *)(loc_cpu_entry + off);
+ if (copy_to_user(userptr + off, e, sizeof(*e))) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
if (copy_to_user(userptr + off
+ offsetof(struct ipt_entry, counters),
&counters[num],
@@ -990,23 +990,14 @@ copy_entries_to_user(unsigned int total_size,
i += m->u.match_size) {
m = (void *)e + i;
- if (copy_to_user(userptr + off + i
- + offsetof(struct xt_entry_match,
- u.user.name),
- m->u.kernel.match->name,
- strlen(m->u.kernel.match->name)+1)
- != 0) {
+ if (xt_match_to_user(m, userptr + off + i)) {
ret = -EFAULT;
goto free_counters;
}
}
t = ipt_get_target_c(e);
- if (copy_to_user(userptr + off + e->target_offset
- + offsetof(struct xt_entry_target,
- u.user.name),
- t->u.kernel.target->name,
- strlen(t->u.kernel.target->name)+1) != 0) {
+ if (xt_target_to_user(t, userptr + off + e->target_offset)) {
ret = -EFAULT;
goto free_counters;
}
@@ -1610,6 +1601,8 @@ translate_compat_table(struct net *net,
if (!newinfo)
goto out_unlock;
+ memset(newinfo->entries, 0, size);
+
newinfo->number = compatr->num_entries;
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
newinfo->hook_entry[i] = compatr->hook_entry[i];
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 16599bae11dd..28bcde0a2749 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -478,6 +478,7 @@ static struct xt_target clusterip_tg_reg __read_mostly = {
.checkentry = clusterip_tg_check,
.destroy = clusterip_tg_destroy,
.targetsize = sizeof(struct ipt_clusterip_tgt_info),
+ .usersize = offsetof(struct ipt_clusterip_tgt_info, config),
#ifdef CONFIG_COMPAT
.compatsize = sizeof(struct compat_ipt_clusterip_tgt_info),
#endif /* CONFIG_COMPAT */
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 78cc64eddfc1..32a363465e0a 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -92,7 +92,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
flow.saddr = rpfilter_get_saddr(iph->daddr);
flow.flowi4_oif = 0;
flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
- flow.flowi4_tos = RT_TOS(iph->tos);
+ flow.flowi4_tos = iph->tos & IPTOS_RT_MASK;
flow.flowi4_scope = RT_SCOPE_UNIVERSE;
return rpfilter_lookup_reverse(par->net, &flow, par->in, info->flags) ^ invert;
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index b3ca21b2ba9b..ddbf93e70069 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -156,8 +156,7 @@ pptp_outbound_pkt(struct sk_buff *skb,
break;
default:
pr_debug("unknown outbound packet 0x%04x:%s\n", msg,
- msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] :
- pptp_msg_name[0]);
+ pptp_msg_name(msg));
/* fall through */
case PPTP_SET_LINK_INFO:
/* only need to NAT in case PAC is behind NAT box */
@@ -250,9 +249,7 @@ pptp_inbound_pkt(struct sk_buff *skb,
pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID);
break;
default:
- pr_debug("unknown inbound packet %s\n",
- msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] :
- pptp_msg_name[0]);
+ pr_debug("unknown inbound packet %s\n", pptp_msg_name(msg));
/* fall through */
case PPTP_START_SESSION_REQUEST:
case PPTP_START_SESSION_REPLY:
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
index bf855e64fc45..0c01a270bf9f 100644
--- a/net/ipv4/netfilter/nft_dup_ipv4.c
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c
@@ -28,7 +28,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr,
struct in_addr gw = {
.s_addr = (__force __be32)regs->data[priv->sreg_addr],
};
- int oif = regs->data[priv->sreg_dev];
+ int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1;
nf_dup_ipv4(pkt->net, pkt->skb, pkt->hook, &gw, oif);
}
@@ -59,7 +59,9 @@ static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_dup_ipv4 *priv = nft_expr_priv(expr);
- if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
+ if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr))
+ goto nla_put_failure;
+ if (priv->sreg_dev &&
nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
goto nla_put_failure;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index c0b633ee6c1e..cc04672d4d8c 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -145,10 +145,12 @@ fail:
}
EXPORT_SYMBOL_GPL(ping_get_port);
-void ping_hash(struct sock *sk)
+int ping_hash(struct sock *sk)
{
pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
BUG(); /* "Please do not press this button again." */
+
+ return 0;
}
void ping_unhash(struct sock *sk)
@@ -803,7 +805,10 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
inet_sk_flowi_flags(sk), faddr, saddr, 0, 0,
sk->sk_uid);
- security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
+ fl4.fl4_icmp_type = user_icmph.type;
+ fl4.fl4_icmp_code = user_icmph.code;
+
+ security_sk_classify_flow(sk, flowi4_to_flowi_common(&fl4));
rt = ip_route_output_flow(net, &fl4, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
@@ -976,6 +981,7 @@ bool ping_rcv(struct sk_buff *skb)
struct sock *sk;
struct net *net = dev_net(skb->dev);
struct icmphdr *icmph = icmp_hdr(skb);
+ bool rc = false;
/* We assume the packet has already been checked by icmp_rcv */
@@ -990,14 +996,15 @@ bool ping_rcv(struct sk_buff *skb)
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
pr_debug("rcv on socket %p\n", sk);
- if (skb2)
- ping_queue_rcv_skb(sk, skb2);
+ if (skb2 && !ping_queue_rcv_skb(sk, skb2))
+ rc = true;
sock_put(sk);
- return true;
}
- pr_debug("no socket, dropping\n");
- return false;
+ if (!rc)
+ pr_debug("no socket, dropping\n");
+
+ return rc;
}
EXPORT_SYMBOL_GPL(ping_rcv);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 01f8d24f3af7..fc2fb170171b 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -93,7 +93,7 @@ static struct raw_hashinfo raw_v4_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
};
-void raw_hash_sk(struct sock *sk)
+int raw_hash_sk(struct sock *sk)
{
struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
struct hlist_head *head;
@@ -104,6 +104,8 @@ void raw_hash_sk(struct sock *sk)
sk_add_node(sk, head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock_bh(&h->lock);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(raw_hash_sk);
@@ -507,9 +509,11 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto out;
/* hdrincl should be READ_ONCE(inet->hdrincl)
- * but READ_ONCE() doesn't work with bit fields
+ * but READ_ONCE() doesn't work with bit fields.
+ * Doing this indirectly yields the same result.
*/
hdrincl = inet->hdrincl;
+ hdrincl = READ_ONCE(hdrincl);
/*
* Check the flags.
*/
@@ -626,7 +630,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto done;
}
- security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
+ security_sk_classify_flow(sk, flowi4_to_flowi_common(&fl4));
rt = ip_route_output_flow(net, &fl4, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
@@ -707,6 +711,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
int ret = -EINVAL;
int chk_addr_ret;
+ lock_sock(sk);
if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
goto out;
chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
@@ -719,7 +724,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_saddr = 0; /* Use device */
sk_dst_reset(sk);
ret = 0;
-out: return ret;
+out:
+ release_sock(sk);
+ return ret;
}
/*
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6e17149b0983..f79f9a6dd046 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -70,6 +70,7 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/bootmem.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/sockios.h>
@@ -131,8 +132,6 @@ static int ip_rt_min_advmss __read_mostly = 256;
static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
-static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU;
-
/*
* Interface to generic destination cache.
*/
@@ -273,6 +272,7 @@ static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
*pos = cpu+1;
return &per_cpu(rt_cache_stat, cpu);
}
+ (*pos)++;
return NULL;
}
@@ -464,8 +464,10 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
return neigh_create(&arp_tbl, pkey, dev);
}
-#define IP_IDENTS_SZ 2048u
-
+/* Hash tables of size 2048..262144 depending on RAM size.
+ * Each bucket uses 8 bytes.
+ */
+static u32 ip_idents_mask __read_mostly;
static atomic_t *ip_idents __read_mostly;
static u32 *ip_tstamps __read_mostly;
@@ -475,15 +477,23 @@ static u32 *ip_tstamps __read_mostly;
*/
u32 ip_idents_reserve(u32 hash, int segs)
{
- u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
- atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
- u32 old = ACCESS_ONCE(*p_tstamp);
- u32 now = (u32)jiffies;
+ u32 bucket, old, now = (u32)jiffies;
+ atomic_t *p_id;
+ u32 *p_tstamp;
u32 delta = 0;
+ bucket = hash & ip_idents_mask;
+ p_tstamp = ip_tstamps + bucket;
+ p_id = ip_idents + bucket;
+ old = ACCESS_ONCE(*p_tstamp);
+
if (old != now && cmpxchg(p_tstamp, old, now) == old)
delta = prandom_u32_max(now - old);
+ /* If UBSAN reports an error there, please make sure your compiler
+ * supports -fno-strict-overflow before reporting it that was a bug
+ * in UBSAN, and it has been fixed in GCC-8.
+ */
return atomic_add_return(segs + delta, p_id) - segs;
}
EXPORT_SYMBOL(ip_idents_reserve);
@@ -590,18 +600,25 @@ static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
}
}
-static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
+static void fnhe_remove_oldest(struct fnhe_hash_bucket *hash)
{
- struct fib_nh_exception *fnhe, *oldest;
+ struct fib_nh_exception __rcu **fnhe_p, **oldest_p;
+ struct fib_nh_exception *fnhe, *oldest = NULL;
- oldest = rcu_dereference(hash->chain);
- for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
- fnhe = rcu_dereference(fnhe->fnhe_next)) {
- if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
+ for (fnhe_p = &hash->chain; ; fnhe_p = &fnhe->fnhe_next) {
+ fnhe = rcu_dereference_protected(*fnhe_p,
+ lockdep_is_held(&fnhe_lock));
+ if (!fnhe)
+ break;
+ if (!oldest ||
+ time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) {
oldest = fnhe;
+ oldest_p = fnhe_p;
+ }
}
fnhe_flush_routes(oldest);
- return oldest;
+ *oldest_p = oldest->fnhe_next;
+ kfree_rcu(oldest, rcu);
}
static inline u32 fnhe_hashfun(__be32 daddr)
@@ -678,16 +695,21 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
if (rt)
fill_route_from_fnhe(rt, fnhe);
} else {
- if (depth > FNHE_RECLAIM_DEPTH)
- fnhe = fnhe_oldest(hash);
- else {
- fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
- if (!fnhe)
- goto out_unlock;
-
- fnhe->fnhe_next = hash->chain;
- rcu_assign_pointer(hash->chain, fnhe);
+ /* Randomize max depth to avoid some side channels attacks. */
+ int max_depth = FNHE_RECLAIM_DEPTH +
+ prandom_u32_max(FNHE_RECLAIM_DEPTH);
+
+ while (depth > max_depth) {
+ fnhe_remove_oldest(hash);
+ depth--;
}
+
+ fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
+ if (!fnhe)
+ goto out_unlock;
+
+ fnhe->fnhe_next = hash->chain;
+
fnhe->fnhe_genid = genid;
fnhe->fnhe_daddr = daddr;
fnhe->fnhe_gw = gw;
@@ -695,6 +717,8 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_mtu_locked = lock;
fnhe->fnhe_expires = expires;
+ rcu_assign_pointer(hash->chain, fnhe);
+
/* Exception created; mark the cached routes for the nexthop
* stale, so anyone caching it rechecks if this exception
* applies to them.
@@ -898,7 +922,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
/* Check for load limit; set rate_last to the latest sent
* redirect.
*/
- if (peer->rate_tokens == 0 ||
+ if (peer->n_redirects == 0 ||
time_after(jiffies,
(peer->rate_last +
(ip_rt_redirect_load << peer->n_redirects)))) {
@@ -989,21 +1013,22 @@ out: kfree_skb(skb);
static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
{
struct dst_entry *dst = &rt->dst;
+ u32 old_mtu = ipv4_mtu(dst);
struct fib_result res;
bool lock = false;
if (ip_mtu_locked(dst))
return;
- if (ipv4_mtu(dst) < mtu)
+ if (old_mtu < mtu)
return;
if (mtu < ip_rt_min_pmtu) {
lock = true;
- mtu = ip_rt_min_pmtu;
+ mtu = min(old_mtu, ip_rt_min_pmtu);
}
- if (rt->rt_pmtu == mtu &&
+ if (rt->rt_pmtu == mtu && !lock &&
time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
return;
@@ -1501,9 +1526,9 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
#endif
}
-static struct rtable *rt_dst_alloc(struct net_device *dev,
- unsigned int flags, u16 type,
- bool nopolicy, bool noxfrm, bool will_cache)
+struct rtable *rt_dst_alloc(struct net_device *dev,
+ unsigned int flags, u16 type,
+ bool nopolicy, bool noxfrm, bool will_cache)
{
struct rtable *rt;
@@ -1532,6 +1557,7 @@ static struct rtable *rt_dst_alloc(struct net_device *dev,
return rt;
}
+EXPORT_SYMBOL(rt_dst_alloc);
/* called in rcu_read_lock() section */
static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
@@ -2721,6 +2747,7 @@ void ip_rt_multicast_event(struct in_device *in_dev)
static int ip_rt_gc_interval __read_mostly = 60 * HZ;
static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
static int ip_rt_gc_elasticity __read_mostly = 8;
+static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU;
static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
void __user *buffer,
@@ -2947,18 +2974,27 @@ struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
int __init ip_rt_init(void)
{
+ void *idents_hash;
int rc = 0;
int cpu;
- ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
- if (!ip_idents)
- panic("IP: failed to allocate ip_idents\n");
+ /* For modern hosts, this will use 2 MB of memory */
+ idents_hash = alloc_large_system_hash("IP idents",
+ sizeof(*ip_idents) + sizeof(*ip_tstamps),
+ 0,
+ 16, /* one bucket per 64 KB */
+ 0,
+ NULL,
+ &ip_idents_mask,
+ 2048,
+ 256*1024);
+
+ ip_idents = idents_hash;
- prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
+ prandom_bytes(ip_idents, (ip_idents_mask + 1) * sizeof(*ip_idents));
- ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
- if (!ip_tstamps)
- panic("IP: failed to allocate ip_tstamps\n");
+ ip_tstamps = idents_hash + (ip_idents_mask + 1) * sizeof(*ip_idents);
+ memset(ip_tstamps, 0, (ip_idents_mask + 1) * sizeof(*ip_tstamps));
for_each_possible_cpu(cpu) {
struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 4cebe913a0b3..6acc73d3c83d 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -307,7 +307,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
__u32 cookie = ntohl(th->ack_seq) - 1;
struct sock *ret = sk;
struct request_sock *req;
- int mss;
+ int full_space, mss;
struct rtable *rt;
__u8 rcv_wscale;
struct flowi4 fl4;
@@ -382,7 +382,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
inet_sk_flowi_flags(sk),
opt->srr ? opt->faddr : ireq->ir_rmt_addr,
ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid);
- security_req_classify_flow(req, flowi4_to_flowi(&fl4));
+ security_req_classify_flow(req, flowi4_to_flowi_common(&fl4));
rt = ip_route_output_key(sock_net(sk), &fl4);
if (IS_ERR(rt)) {
reqsk_free(req);
@@ -391,8 +391,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
/* Try to redo what tcp_v4_send_synack did. */
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
+ /* limit the window selection if the user enforce a smaller rx buffer */
+ full_space = tcp_full_space(sk);
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
+ (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
+ req->rsk_window_clamp = full_space;
- tcp_select_initial_window(tcp_full_space(sk), req->mss,
+ tcp_select_initial_window(full_space, req->mss,
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(&rt->dst, RTAX_INITRWND));
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index abae27d60f67..f5cfc467c920 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2268,8 +2268,12 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tp->snd_cwnd_cnt = 0;
tp->window_clamp = 0;
+ if (icsk->icsk_ca_ops->release)
+ icsk->icsk_ca_ops->release(sk);
+ memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
tcp_set_ca_state(sk, TCP_CA_Open);
tcp_clear_retrans(tp);
+ tp->total_retrans = 0;
inet_csk_delack_init(sk);
/* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
* issue in __tcp_select_window()
@@ -2281,6 +2285,8 @@ int tcp_disconnect(struct sock *sk, int flags)
dst_release(sk->sk_rx_dst);
sk->sk_rx_dst = NULL;
tcp_saved_syn_free(tp);
+ tp->segs_in = 0;
+ tp->segs_out = 0;
tp->bytes_acked = 0;
tp->bytes_received = 0;
@@ -2599,10 +2605,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
#ifdef CONFIG_TCP_MD5SIG
case TCP_MD5SIG:
- if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
- err = tp->af_specific->md5_parse(sk, optval, optlen);
- else
- err = -EINVAL;
+ err = tp->af_specific->md5_parse(sk, optval, optlen);
break;
#endif
case TCP_USER_TIMEOUT:
@@ -3099,9 +3102,12 @@ EXPORT_SYMBOL(tcp_md5_hash_skb_data);
int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key)
{
+ u8 keylen = READ_ONCE(key->keylen); /* paired with WRITE_ONCE() in tcp_md5_do_add */
struct scatterlist sg;
- sg_init_one(&sg, key->key, key->keylen);
+ sg_init_one(&sg, key->key, keylen);
+
+ /* tcp_md5_do_add() might change key->key under us */
return crypto_hash_update(&hp->md5_desc, &sg, key->keylen);
}
EXPORT_SYMBOL(tcp_md5_hash_key);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index aafe68134763..f6b64efc22e8 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -201,7 +201,7 @@ static void tcp_reinit_congestion_control(struct sock *sk,
icsk->icsk_ca_ops = ca;
icsk->icsk_ca_setsockopt = 1;
- if (sk->sk_state != TCP_CLOSE)
+ if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
tcp_init_congestion_control(sk);
}
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 448c2615fece..e0b3b194b604 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -342,8 +342,6 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
return;
if (tcp_in_slow_start(tp)) {
- if (hystart && after(ack, ca->end_seq))
- bictcp_hystart_reset(sk);
acked = tcp_slow_start(tp, acked);
if (!acked)
return;
@@ -394,6 +392,9 @@ static void hystart_update(struct sock *sk, u32 delay)
if (ca->found & hystart_detect)
return;
+ if (after(tp->snd_una, ca->end_seq))
+ bictcp_hystart_reset(sk);
+
if (hystart_detect & HYSTART_ACK_TRAIN) {
u32 now = bictcp_clock();
@@ -414,6 +415,8 @@ static void hystart_update(struct sock *sk, u32 delay)
if (hystart_detect & HYSTART_DELAY) {
/* obtain the minimum delay of more than sampling packets */
+ if (ca->curr_rtt > delay)
+ ca->curr_rtt = delay;
if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
if (ca->curr_rtt == 0 || ca->curr_rtt > delay)
ca->curr_rtt = delay;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2f4896377fbc..14ed25f50ed5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -896,9 +896,10 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
/* This must be called before lost_out is incremented */
static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
{
- if (!tp->retransmit_skb_hint ||
- before(TCP_SKB_CB(skb)->seq,
- TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
+ if ((!tp->retransmit_skb_hint && tp->retrans_out >= tp->lost_out) ||
+ (tp->retransmit_skb_hint &&
+ before(TCP_SKB_CB(skb)->seq,
+ TCP_SKB_CB(tp->retransmit_skb_hint)->seq)))
tp->retransmit_skb_hint = skb;
if (!tp->lost_out ||
@@ -1686,8 +1687,11 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
}
/* Ignore very old stuff early */
- if (!after(sp[used_sacks].end_seq, prior_snd_una))
+ if (!after(sp[used_sacks].end_seq, prior_snd_una)) {
+ if (i == 0)
+ first_sack_index = -1;
continue;
+ }
used_sacks++;
}
@@ -2923,7 +2927,10 @@ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
{
const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
struct rtt_meas *m = tcp_sk(sk)->rtt_min;
- struct rtt_meas rttm = { .rtt = (rtt_us ? : 1), .ts = now };
+ struct rtt_meas rttm = {
+ .rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1),
+ .ts = now,
+ };
u32 elapsed;
/* Check if the new measurement updates the 1st, 2nd, or 3rd choices */
@@ -3511,10 +3518,8 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
}
}
-/* This routine deals with acks during a TLP episode.
- * We mark the end of a TLP episode on receiving TLP dupack or when
- * ack is after tlp_high_seq.
- * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe.
+/* This routine deals with acks during a TLP episode and ends an episode by
+ * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack
*/
static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
{
@@ -3523,7 +3528,10 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
if (before(ack, tp->tlp_high_seq))
return;
- if (flag & FLAG_DSACKING_ACK) {
+ if (!tp->tlp_retrans) {
+ /* TLP of new data has been acknowledged */
+ tp->tlp_high_seq = 0;
+ } else if (flag & FLAG_DSACKING_ACK) {
/* This DSACK means original and TLP probe arrived; no loss */
tp->tlp_high_seq = 0;
} else if (after(ack, tp->tlp_high_seq)) {
@@ -4459,7 +4467,11 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
skb, &fragstolen)) {
coalesce_done:
- tcp_grow_window(sk, skb);
+ /* For non sack flows, do not grow window to force DUPACK
+ * and trigger fast retransmit.
+ */
+ if (tcp_is_sack(tp))
+ tcp_grow_window(sk, skb);
kfree_skb_partial(skb, fragstolen);
skb = NULL;
goto add_sack;
@@ -4539,7 +4551,11 @@ add_sack:
tcp_sack_new_ofo_skb(sk, seq, end_seq);
end:
if (skb) {
- tcp_grow_window(sk, skb);
+ /* For non sack flows, do not grow window to force DUPACK
+ * and trigger fast retransmit.
+ */
+ if (tcp_is_sack(tp))
+ tcp_grow_window(sk, skb);
skb_set_owner_r(skb, sk);
}
}
@@ -5517,6 +5533,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tcp_data_snd_check(sk);
if (!inet_csk_ack_scheduled(sk))
goto no_ack;
+ } else {
+ tcp_update_wl(tp, TCP_SKB_CB(skb)->seq);
}
__tcp_ack_snd_check(sk, 0);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1763e02103a3..4a9d8c117794 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -277,7 +277,7 @@ void tcp_v4_mtu_reduced(struct sock *sk)
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
return;
- mtu = tcp_sk(sk)->mtu_info;
+ mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
dst = inet_csk_update_pmtu(sk, mtu);
if (!dst)
return;
@@ -444,7 +444,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
if (sk->sk_state == TCP_LISTEN)
goto out;
- tp->mtu_info = info;
+ WRITE_ONCE(tp->mtu_info, info);
if (!sock_owned_by_user(sk)) {
tcp_v4_mtu_reduced(sk);
} else {
@@ -643,8 +643,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
* Incoming packet is checked with md5 hash with finding key,
* no RST generated if md5 hash doesn't match.
*/
- sk1 = __inet_lookup_listener(net,
- &tcp_hashinfo, ip_hdr(skb)->saddr,
+ sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
+ ip_hdr(skb)->saddr,
th->source, ip_hdr(skb)->daddr,
ntohs(th->source), inet_iif(skb));
/* don't send rst if it can't find key */
@@ -935,9 +935,18 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
key = tcp_md5_do_lookup(sk, addr, family);
if (key) {
- /* Pre-existing entry - just update that one. */
+ /* Pre-existing entry - just update that one.
+ * Note that the key might be used concurrently.
+ */
memcpy(key->key, newkey, newkeylen);
- key->keylen = newkeylen;
+
+ /* Pairs with READ_ONCE() in tcp_md5_hash_key().
+ * Also note that a reader could catch new key->keylen value
+ * but old key->key[], this is the reason we use __GFP_ZERO
+ * at sock_kmalloc() time below these lines.
+ */
+ WRITE_ONCE(key->keylen, newkeylen);
+
return 0;
}
@@ -954,7 +963,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
rcu_assign_pointer(tp->md5sig_info, md5sig);
}
- key = sock_kmalloc(sk, sizeof(*key), gfp);
+ key = sock_kmalloc(sk, sizeof(*key), gfp | __GFP_ZERO);
if (!key)
return -ENOMEM;
if (!tcp_alloc_md5sig_pool()) {
@@ -1622,7 +1631,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->sacked = 0;
lookup:
- sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
+ sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
+ th->dest);
if (!sk)
goto no_tcp_socket;
@@ -1745,7 +1755,8 @@ do_time_wait:
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
- &tcp_hashinfo,
+ &tcp_hashinfo, skb,
+ __tcp_hdrlen(th),
iph->saddr, th->source,
iph->daddr, th->dest,
inet_iif(skb));
@@ -2035,6 +2046,7 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
static void *tcp_seek_last_pos(struct seq_file *seq)
{
struct tcp_iter_state *st = seq->private;
+ int bucket = st->bucket;
int offset = st->offset;
int orig_num = st->num;
void *rc = NULL;
@@ -2045,7 +2057,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
break;
st->state = TCP_SEQ_STATE_LISTENING;
rc = listening_get_next(seq, NULL);
- while (offset-- && rc)
+ while (offset-- && rc && bucket == st->bucket)
rc = listening_get_next(seq, rc);
if (rc)
break;
@@ -2056,7 +2068,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
if (st->bucket > tcp_hashinfo.ehash_mask)
break;
rc = established_get_first(seq);
- while (offset-- && rc)
+ while (offset-- && rc && bucket == st->bucket)
rc = established_get_next(seq, rc);
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e65c211d3f4b..17388adedc7d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -710,8 +710,9 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
min_t(unsigned int, eff_sacks,
(remaining - TCPOLEN_SACK_BASE_ALIGNED) /
TCPOLEN_SACK_PERBLOCK);
- size += TCPOLEN_SACK_BASE_ALIGNED +
- opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
+ if (likely(opts->num_sack_blocks))
+ size += TCPOLEN_SACK_BASE_ALIGNED +
+ opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
}
return size;
@@ -1352,6 +1353,7 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu)
return __tcp_mtu_to_mss(sk, pmtu) -
(tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr));
}
+EXPORT_SYMBOL(tcp_mtu_to_mss);
/* Inverse of above */
int tcp_mss_to_mtu(struct sock *sk, int mss)
@@ -1500,7 +1502,8 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
* window, and remember whether we were cwnd-limited then.
*/
if (!before(tp->snd_una, tp->max_packets_seq) ||
- tp->packets_out > tp->max_packets_out) {
+ tp->packets_out > tp->max_packets_out ||
+ is_cwnd_limited) {
tp->max_packets_out = tp->packets_out;
tp->max_packets_seq = tp->snd_nxt;
tp->is_cwnd_limited = is_cwnd_limited;
@@ -2147,6 +2150,14 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
break;
}
+ /* Argh, we hit an empty skb(), presumably a thread
+ * is sleeping in sendmsg()/sk_stream_wait_memory().
+ * We do not want to send a pure-ack packet and have
+ * a strange looking rtx queue with empty packet(s).
+ */
+ if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq)
+ break;
+
if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
break;
@@ -2163,6 +2174,10 @@ repair:
break;
}
+ is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
+ if (likely(sent_pkts || is_cwnd_limited))
+ tcp_cwnd_validate(sk, is_cwnd_limited);
+
if (likely(sent_pkts)) {
if (tcp_in_cwnd_reduction(sk))
tp->prr_out += sent_pkts;
@@ -2170,8 +2185,6 @@ repair:
/* Send one loss probe per tail loss episode. */
if (push_one != 2)
tcp_schedule_loss_probe(sk);
- is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
- tcp_cwnd_validate(sk, is_cwnd_limited);
return false;
}
return !tp->packets_out && tcp_send_head(sk);
@@ -2262,6 +2275,11 @@ void tcp_send_loss_probe(struct sock *sk)
int pcount;
int mss = tcp_current_mss(sk);
+ /* At most one outstanding TLP */
+ if (tp->tlp_high_seq)
+ goto rearm_timer;
+
+ tp->tlp_retrans = 0;
skb = tcp_send_head(sk);
if (skb) {
if (tcp_snd_wnd_test(tp, skb, mss)) {
@@ -2284,10 +2302,6 @@ void tcp_send_loss_probe(struct sock *sk)
return;
}
- /* At most one outstanding TLP retransmission. */
- if (tp->tlp_high_seq)
- goto rearm_timer;
-
if (skb_still_in_host_queue(sk, skb))
goto rearm_timer;
@@ -2308,10 +2322,12 @@ void tcp_send_loss_probe(struct sock *sk)
if (__tcp_retransmit_skb(sk, skb))
goto rearm_timer;
+ tp->tlp_retrans = 1;
+
+probe_sent:
/* Record snd_nxt for loss detection. */
tp->tlp_high_seq = tp->snd_nxt;
-probe_sent:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSPROBES);
/* Reset s.t. tcp_rearm_rto will restart timer from now */
inet_csk(sk)->icsk_pending = 0;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 28fed4ade750..f336d87fe188 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -370,7 +370,7 @@ static void tcp_probe_timer(struct sock *sk)
return;
}
- if (icsk->icsk_probes_out > max_probes) {
+ if (icsk->icsk_probes_out >= max_probes) {
abort: tcp_write_err(sk);
} else {
/* Only send another probe if we didn't close things up. */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ce90a12b1e6e..1cf00a9d23f5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -113,6 +113,7 @@
#include <trace/events/skb.h>
#include <net/busy_poll.h>
#include "udp_impl.h"
+#include <net/sock_reuseport.h>
struct udp_table udp_table __read_mostly;
EXPORT_SYMBOL(udp_table);
@@ -137,14 +138,14 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
unsigned long *bitmap,
struct sock *sk,
int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2),
+ const struct sock *sk2,
+ bool match_wildcard),
unsigned int log)
{
struct sock *sk2;
- struct hlist_nulls_node *node;
kuid_t uid = sock_i_uid(sk);
- sk_nulls_for_each(sk2, node, &hslot->head) {
+ sk_for_each(sk2, &hslot->head) {
if (net_eq(sock_net(sk2), net) &&
sk2 != sk &&
(bitmap || udp_sk(sk2)->udp_port_hash == num) &&
@@ -152,8 +153,9 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
(!sk2->sk_reuseport || !sk->sk_reuseport ||
+ rcu_access_pointer(sk->sk_reuseport_cb) ||
!uid_eq(uid, sock_i_uid(sk2))) &&
- saddr_comp(sk, sk2)) {
+ saddr_comp(sk, sk2, true)) {
if (!bitmap)
return 1;
__set_bit(udp_sk(sk2)->udp_port_hash >> log, bitmap);
@@ -170,15 +172,15 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
struct udp_hslot *hslot2,
struct sock *sk,
int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2))
+ const struct sock *sk2,
+ bool match_wildcard))
{
struct sock *sk2;
- struct hlist_nulls_node *node;
kuid_t uid = sock_i_uid(sk);
int res = 0;
spin_lock(&hslot2->lock);
- udp_portaddr_for_each_entry(sk2, node, &hslot2->head) {
+ udp_portaddr_for_each_entry(sk2, &hslot2->head) {
if (net_eq(sock_net(sk2), net) &&
sk2 != sk &&
(udp_sk(sk2)->udp_port_hash == num) &&
@@ -186,8 +188,9 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
(!sk2->sk_reuseport || !sk->sk_reuseport ||
+ rcu_access_pointer(sk->sk_reuseport_cb) ||
!uid_eq(uid, sock_i_uid(sk2))) &&
- saddr_comp(sk, sk2)) {
+ saddr_comp(sk, sk2, true)) {
res = 1;
break;
}
@@ -196,6 +199,31 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
return res;
}
+static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
+ int (*saddr_same)(const struct sock *sk1,
+ const struct sock *sk2,
+ bool match_wildcard))
+{
+ struct net *net = sock_net(sk);
+ kuid_t uid = sock_i_uid(sk);
+ struct sock *sk2;
+
+ sk_for_each(sk2, &hslot->head) {
+ if (net_eq(sock_net(sk2), net) &&
+ sk2 != sk &&
+ sk2->sk_family == sk->sk_family &&
+ ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
+ (udp_sk(sk2)->udp_port_hash == udp_sk(sk)->udp_port_hash) &&
+ (sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+ sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
+ (*saddr_same)(sk, sk2, false)) {
+ return reuseport_add_sock(sk, sk2);
+ }
+ }
+
+ return reuseport_alloc(sk);
+}
+
/**
* udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
*
@@ -207,7 +235,8 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
*/
int udp_lib_get_port(struct sock *sk, unsigned short snum,
int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2),
+ const struct sock *sk2,
+ bool match_wildcard),
unsigned int hash2_nulladdr)
{
struct udp_hslot *hslot, *hslot2;
@@ -295,17 +324,31 @@ found:
udp_sk(sk)->udp_port_hash = snum;
udp_sk(sk)->udp_portaddr_hash ^= snum;
if (sk_unhashed(sk)) {
- sk_nulls_add_node_rcu(sk, &hslot->head);
+ if (sk->sk_reuseport &&
+ udp_reuseport_add_sock(sk, hslot, saddr_comp)) {
+ inet_sk(sk)->inet_num = 0;
+ udp_sk(sk)->udp_port_hash = 0;
+ udp_sk(sk)->udp_portaddr_hash ^= snum;
+ goto fail_unlock;
+ }
+
+ sk_add_node_rcu(sk, &hslot->head);
hslot->count++;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
spin_lock(&hslot2->lock);
- hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
- &hslot2->head);
+ if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
+ sk->sk_family == AF_INET6)
+ hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
+ &hslot2->head);
+ else
+ hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
+ &hslot2->head);
hslot2->count++;
spin_unlock(&hslot2->lock);
}
+ sock_set_flag(sk, SOCK_RCU_FREE);
error = 0;
fail_unlock:
spin_unlock_bh(&hslot->lock);
@@ -314,13 +357,22 @@ fail:
}
EXPORT_SYMBOL(udp_lib_get_port);
-static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses
+ * match_wildcard == false: addresses must be exactly the same, i.e.
+ * 0.0.0.0 only equals to 0.0.0.0
+ */
+int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
+ bool match_wildcard)
{
struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
- return (!ipv6_only_sock(sk2) &&
- (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr ||
- inet1->inet_rcv_saddr == inet2->inet_rcv_saddr));
+ if (!ipv6_only_sock(sk2)) {
+ if (inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)
+ return 1;
+ if (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr)
+ return match_wildcard;
+ }
+ return 0;
}
static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr,
@@ -446,28 +498,31 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned int hnum, int dif,
- struct udp_hslot *hslot2, unsigned int slot2)
+ struct udp_hslot *hslot2, unsigned int slot2,
+ struct sk_buff *skb)
{
struct sock *sk, *result;
- struct hlist_nulls_node *node;
int score, badness, matches = 0, reuseport = 0;
u32 hash = 0;
-begin:
result = NULL;
badness = 0;
- udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
+ udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
score = compute_score2(sk, net, saddr, sport,
daddr, hnum, dif);
if (score > badness) {
- result = sk;
- badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
+ result = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ if (result)
+ return result;
matches = 1;
}
+ badness = score;
+ result = sk;
} else if (score == badness && reuseport) {
matches++;
if (reciprocal_scale(hash, matches) == 0)
@@ -475,22 +530,6 @@ begin:
hash = next_pseudo_random32(hash);
}
}
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot2)
- goto begin;
- if (result) {
- if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(compute_score2(result, net, saddr, sport,
- daddr, hnum, dif) < badness)) {
- sock_put(result);
- goto begin;
- }
- }
return result;
}
@@ -499,17 +538,15 @@ begin:
*/
struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
__be16 sport, __be32 daddr, __be16 dport,
- int dif, struct udp_table *udptable)
+ int dif, struct udp_table *udptable, struct sk_buff *skb)
{
struct sock *sk, *result;
- struct hlist_nulls_node *node;
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
int score, badness, matches = 0, reuseport = 0;
u32 hash = 0;
- rcu_read_lock();
if (hslot->count > 10) {
hash2 = udp4_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
@@ -519,7 +556,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
result = udp4_lib_lookup2(net, saddr, sport,
daddr, hnum, dif,
- hslot2, slot2);
+ hslot2, slot2, skb);
if (!result) {
hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
slot2 = hash2 & udptable->mask;
@@ -529,26 +566,29 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
result = udp4_lib_lookup2(net, saddr, sport,
htonl(INADDR_ANY), hnum, dif,
- hslot2, slot2);
+ hslot2, slot2, skb);
}
- rcu_read_unlock();
return result;
}
begin:
result = NULL;
badness = 0;
- sk_nulls_for_each_rcu(sk, node, &hslot->head) {
+ sk_for_each_rcu(sk, &hslot->head) {
score = compute_score(sk, net, saddr, hnum, sport,
daddr, dport, dif);
if (score > badness) {
- result = sk;
- badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
+ result = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ if (result)
+ return result;
matches = 1;
}
+ result = sk;
+ badness = score;
} else if (score == badness && reuseport) {
matches++;
if (reciprocal_scale(hash, matches) == 0)
@@ -556,24 +596,6 @@ begin:
hash = next_pseudo_random32(hash);
}
}
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot)
- goto begin;
-
- if (result) {
- if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(compute_score(result, net, saddr, hnum, sport,
- daddr, dport, dif) < badness)) {
- sock_put(result);
- goto begin;
- }
- }
- rcu_read_unlock();
return result;
}
EXPORT_SYMBOL_GPL(__udp4_lib_lookup);
@@ -586,15 +608,27 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport,
iph->daddr, dport, inet_iif(skb),
- udptable);
+ udptable, skb);
}
+/* Must be called under rcu_read_lock().
+ * Does increment socket refcount.
+ */
+#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \
+ IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY)
struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif)
{
- return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
+ struct sock *sk;
+
+ sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport,
+ dif, &udp_table, NULL);
+ if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+ sk = NULL;
+ return sk;
}
EXPORT_SYMBOL_GPL(udp4_lib_lookup);
+#endif
static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
__be16 loc_port, __be32 loc_addr,
@@ -640,7 +674,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
struct net *net = dev_net(skb->dev);
sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
- iph->saddr, uh->source, skb->dev->ifindex, udptable);
+ iph->saddr, uh->source, skb->dev->ifindex, udptable,
+ NULL);
if (!sk) {
ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
return; /* No socket for error */
@@ -695,7 +730,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
sk->sk_err = err;
sk->sk_error_report(sk);
out:
- sock_put(sk);
+ return;
}
void udp_err(struct sk_buff *skb, u32 info)
@@ -1041,7 +1076,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto out;
}
- security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
+ security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
@@ -1411,13 +1446,15 @@ void udp_lib_unhash(struct sock *sk)
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
spin_lock_bh(&hslot->lock);
- if (sk_nulls_del_node_init_rcu(sk)) {
+ if (rcu_access_pointer(sk->sk_reuseport_cb))
+ reuseport_detach_sock(sk);
+ if (sk_del_node_init_rcu(sk)) {
hslot->count--;
inet_sk(sk)->inet_num = 0;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
spin_lock(&hslot2->lock);
- hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
+ hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
hslot2->count--;
spin_unlock(&hslot2->lock);
}
@@ -1438,22 +1475,28 @@ void udp_lib_rehash(struct sock *sk, u16 newhash)
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
nhslot2 = udp_hashslot2(udptable, newhash);
udp_sk(sk)->udp_portaddr_hash = newhash;
- if (hslot2 != nhslot2) {
+
+ if (hslot2 != nhslot2 ||
+ rcu_access_pointer(sk->sk_reuseport_cb)) {
hslot = udp_hashslot(udptable, sock_net(sk),
udp_sk(sk)->udp_port_hash);
/* we must lock primary chain too */
spin_lock_bh(&hslot->lock);
-
- spin_lock(&hslot2->lock);
- hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
- hslot2->count--;
- spin_unlock(&hslot2->lock);
-
- spin_lock(&nhslot2->lock);
- hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
- &nhslot2->head);
- nhslot2->count++;
- spin_unlock(&nhslot2->lock);
+ if (rcu_access_pointer(sk->sk_reuseport_cb))
+ reuseport_detach_sock(sk);
+
+ if (hslot2 != nhslot2) {
+ spin_lock(&hslot2->lock);
+ hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
+ hslot2->count--;
+ spin_unlock(&hslot2->lock);
+
+ spin_lock(&nhslot2->lock);
+ hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
+ &nhslot2->head);
+ nhslot2->count++;
+ spin_unlock(&nhslot2->lock);
+ }
spin_unlock_bh(&hslot->lock);
}
@@ -1564,7 +1607,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
/*
* UDP-Lite specific tests, ignored on UDP sockets
*/
- if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
+ if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
/*
* MIB statistics other than incrementing the error count are
@@ -1595,8 +1638,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
}
- if (rcu_access_pointer(sk->sk_filter) &&
- udp_lib_checksum_complete(skb))
+ if (udp_lib_checksum_complete(skb))
goto csum_error;
if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
@@ -1628,35 +1670,6 @@ drop:
return -1;
}
-static void flush_stack(struct sock **stack, unsigned int count,
- struct sk_buff *skb, unsigned int final)
-{
- unsigned int i;
- struct sk_buff *skb1 = NULL;
- struct sock *sk;
-
- for (i = 0; i < count; i++) {
- sk = stack[i];
- if (likely(!skb1))
- skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
-
- if (!skb1) {
- atomic_inc(&sk->sk_drops);
- UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
- IS_UDPLITE(sk));
- UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
- IS_UDPLITE(sk));
- }
-
- if (skb1 && udp_queue_rcv_skb(sk, skb1) <= 0)
- skb1 = NULL;
-
- sock_put(sk);
- }
- if (unlikely(skb1))
- kfree_skb(skb1);
-}
-
/* For TCP sockets, sk_rx_dst is protected by socket lock
* For UDP, we use xchg() to guard against concurrent changes.
*/
@@ -1680,41 +1693,46 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct udp_table *udptable,
int proto)
{
- struct sock *sk, *stack[256 / sizeof(struct sock *)];
- struct hlist_nulls_node *node;
+ struct sock *sk, *first = NULL;
unsigned short hnum = ntohs(uh->dest);
struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
- int dif = skb->dev->ifindex;
- unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node);
unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
- bool inner_flushed = false;
+ unsigned int offset = offsetof(typeof(*sk), sk_node);
+ int dif = skb->dev->ifindex;
+ struct hlist_node *node;
+ struct sk_buff *nskb;
if (use_hash2) {
hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
- udp_table.mask;
- hash2 = udp4_portaddr_hash(net, daddr, hnum) & udp_table.mask;
+ udptable->mask;
+ hash2 = udp4_portaddr_hash(net, daddr, hnum) & udptable->mask;
start_lookup:
- hslot = &udp_table.hash2[hash2];
+ hslot = &udptable->hash2[hash2];
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
}
- spin_lock(&hslot->lock);
- sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) {
- if (__udp_is_mcast_sock(net, sk,
- uh->dest, daddr,
- uh->source, saddr,
- dif, hnum)) {
- if (unlikely(count == ARRAY_SIZE(stack))) {
- flush_stack(stack, count, skb, ~0);
- inner_flushed = true;
- count = 0;
- }
- stack[count++] = sk;
- sock_hold(sk);
+ sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
+ if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr,
+ uh->source, saddr, dif, hnum))
+ continue;
+
+ if (!first) {
+ first = sk;
+ continue;
}
- }
+ nskb = skb_clone(skb, GFP_ATOMIC);
- spin_unlock(&hslot->lock);
+ if (unlikely(!nskb)) {
+ atomic_inc(&sk->sk_drops);
+ UDP_INC_STATS_BH(net, UDP_MIB_RCVBUFERRORS,
+ IS_UDPLITE(sk));
+ UDP_INC_STATS_BH(net, UDP_MIB_INERRORS,
+ IS_UDPLITE(sk));
+ continue;
+ }
+ if (udp_queue_rcv_skb(sk, nskb) > 0)
+ consume_skb(nskb);
+ }
/* Also lookup *:port if we are using hash2 and haven't done so yet. */
if (use_hash2 && hash2 != hash2_any) {
@@ -1722,16 +1740,13 @@ start_lookup:
goto start_lookup;
}
- /*
- * do the slow work with no lock held
- */
- if (count) {
- flush_stack(stack, count, skb, count - 1);
+ if (first) {
+ if (udp_queue_rcv_skb(first, skb) > 0)
+ consume_skb(skb);
} else {
- if (!inner_flushed)
- UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
- proto == IPPROTO_UDPLITE);
- consume_skb(skb);
+ kfree_skb(skb);
+ UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
+ proto == IPPROTO_UDPLITE);
}
return 0;
}
@@ -1760,8 +1775,11 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
}
}
- return skb_checksum_init_zero_check(skb, proto, uh->check,
- inet_compute_pseudo);
+ /* Note, we are only interested in != 0 or == 0, thus the
+ * force to int.
+ */
+ return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
+ inet_compute_pseudo);
}
/*
@@ -1833,7 +1851,6 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
inet_compute_pseudo);
ret = udp_queue_rcv_skb(sk, skb);
- sock_put(sk);
/* a return value > 0 means to resubmit the input, but
* it wants the return to be -protocol, or 0
@@ -1894,49 +1911,24 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
int dif)
{
struct sock *sk, *result;
- struct hlist_nulls_node *node;
unsigned short hnum = ntohs(loc_port);
- unsigned int count, slot = udp_hashfn(net, hnum, udp_table.mask);
+ unsigned int slot = udp_hashfn(net, hnum, udp_table.mask);
struct udp_hslot *hslot = &udp_table.hash[slot];
/* Do not bother scanning a too big list */
if (hslot->count > 10)
return NULL;
- rcu_read_lock();
-begin:
- count = 0;
result = NULL;
- sk_nulls_for_each_rcu(sk, node, &hslot->head) {
- if (__udp_is_mcast_sock(net, sk,
- loc_port, loc_addr,
- rmt_port, rmt_addr,
- dif, hnum)) {
+ sk_for_each_rcu(sk, &hslot->head) {
+ if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr,
+ rmt_port, rmt_addr, dif, hnum)) {
+ if (result)
+ return NULL;
result = sk;
- ++count;
- }
- }
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot)
- goto begin;
-
- if (result) {
- if (count != 1 ||
- unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(!__udp_is_mcast_sock(net, result,
- loc_port, loc_addr,
- rmt_port, rmt_addr,
- dif, hnum))) {
- sock_put(result);
- result = NULL;
}
}
- rcu_read_unlock();
+
return result;
}
@@ -1949,37 +1941,22 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
__be16 rmt_port, __be32 rmt_addr,
int dif)
{
- struct sock *sk, *result;
- struct hlist_nulls_node *node;
unsigned short hnum = ntohs(loc_port);
unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);
unsigned int slot2 = hash2 & udp_table.mask;
struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
+ struct sock *sk;
- rcu_read_lock();
- result = NULL;
- udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
- if (INET_MATCH(sk, net, acookie,
- rmt_addr, loc_addr, ports, dif))
- result = sk;
+ udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
+ if (INET_MATCH(sk, net, acookie, rmt_addr,
+ loc_addr, ports, dif))
+ return sk;
/* Only check first socket in chain */
break;
}
-
- if (result) {
- if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(!INET_MATCH(sk, net, acookie,
- rmt_addr, loc_addr,
- ports, dif))) {
- sock_put(result);
- result = NULL;
- }
- }
- rcu_read_unlock();
- return result;
+ return NULL;
}
void udp_v4_early_demux(struct sk_buff *skb)
@@ -1987,7 +1964,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
struct net *net = dev_net(skb->dev);
const struct iphdr *iph;
const struct udphdr *uh;
- struct sock *sk;
+ struct sock *sk = NULL;
struct dst_entry *dst;
int dif = skb->dev->ifindex;
int ours;
@@ -2019,11 +1996,9 @@ void udp_v4_early_demux(struct sk_buff *skb)
} else if (skb->pkt_type == PACKET_HOST) {
sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
uh->source, iph->saddr, dif);
- } else {
- return;
}
- if (!sk)
+ if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2))
return;
skb->sk = sk;
@@ -2316,7 +2291,6 @@ struct proto udp_prot = {
.sysctl_wmem = &sysctl_udp_wmem_min,
.sysctl_rmem = &sysctl_udp_rmem_min,
.obj_size = sizeof(struct udp_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
.h.udp_table = &udp_table,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udp_setsockopt,
@@ -2338,14 +2312,13 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
for (state->bucket = start; state->bucket <= state->udp_table->mask;
++state->bucket) {
- struct hlist_nulls_node *node;
struct udp_hslot *hslot = &state->udp_table->hash[state->bucket];
- if (hlist_nulls_empty(&hslot->head))
+ if (hlist_empty(&hslot->head))
continue;
spin_lock_bh(&hslot->lock);
- sk_nulls_for_each(sk, node, &hslot->head) {
+ sk_for_each(sk, &hslot->head) {
if (!net_eq(sock_net(sk), net))
continue;
if (sk->sk_family == state->family)
@@ -2364,7 +2337,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
struct net *net = seq_file_net(seq);
do {
- sk = sk_nulls_next(sk);
+ sk = sk_next(sk);
} while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
if (!sk) {
@@ -2488,7 +2461,7 @@ int udp4_seq_show(struct seq_file *seq, void *v)
{
seq_setwidth(seq, 127);
if (v == SEQ_START_TOKEN)
- seq_puts(seq, " sl local_address rem_address st tx_queue "
+ seq_puts(seq, " sl local_address rem_address st tx_queue "
"rx_queue tr tm->when retrnsmt uid timeout "
"inode ref pointer drops");
else {
@@ -2579,12 +2552,12 @@ void __init udp_table_init(struct udp_table *table, const char *name)
table->hash2 = table->hash + (table->mask + 1);
for (i = 0; i <= table->mask; i++) {
- INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i);
+ INIT_HLIST_HEAD(&table->hash[i].head);
table->hash[i].count = 0;
spin_lock_init(&table->hash[i].lock);
}
for (i = 0; i <= table->mask; i++) {
- INIT_HLIST_NULLS_HEAD(&table->hash2[i].head, i);
+ INIT_HLIST_HEAD(&table->hash2[i].head);
table->hash2[i].count = 0;
spin_lock_init(&table->hash2[i].lock);
}
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 092aa60e8b92..9a89c10a55f0 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -36,15 +36,16 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
const struct inet_diag_req_v2 *req)
{
int err = -EINVAL;
- struct sock *sk;
+ struct sock *sk = NULL;
struct sk_buff *rep;
struct net *net = sock_net(in_skb->sk);
+ rcu_read_lock();
if (req->sdiag_family == AF_INET)
sk = __udp4_lib_lookup(net,
req->id.idiag_src[0], req->id.idiag_sport,
req->id.idiag_dst[0], req->id.idiag_dport,
- req->id.idiag_if, tbl);
+ req->id.idiag_if, tbl, NULL);
#if IS_ENABLED(CONFIG_IPV6)
else if (req->sdiag_family == AF_INET6)
sk = __udp6_lib_lookup(net,
@@ -52,11 +53,11 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
req->id.idiag_sport,
(struct in6_addr *)req->id.idiag_dst,
req->id.idiag_dport,
- req->id.idiag_if, tbl);
+ req->id.idiag_if, tbl, NULL);
#endif
- else
- goto out_nosk;
-
+ if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+ sk = NULL;
+ rcu_read_unlock();
err = -ENOENT;
if (!sk)
goto out_nosk;
@@ -97,25 +98,24 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *r, struct nlattr *bc)
{
- int num, s_num, slot, s_slot;
- struct net *net = sock_net(skb->sk);
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
+ struct net *net = sock_net(skb->sk);
+ int num, s_num, slot, s_slot;
s_slot = cb->args[0];
num = s_num = cb->args[1];
for (slot = s_slot; slot <= table->mask; s_num = 0, slot++) {
- struct sock *sk;
- struct hlist_nulls_node *node;
struct udp_hslot *hslot = &table->hash[slot];
+ struct sock *sk;
num = 0;
- if (hlist_nulls_empty(&hslot->head))
+ if (hlist_empty(&hslot->head))
continue;
spin_lock_bh(&hslot->lock);
- sk_nulls_for_each(sk, node, &hslot->head) {
+ sk_for_each(sk, &hslot->head) {
struct inet_sock *inet = inet_sk(sk);
if (!net_eq(sock_net(sk), net))
@@ -182,7 +182,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
sk = __udp4_lib_lookup(net,
req->id.idiag_dst[0], req->id.idiag_dport,
req->id.idiag_src[0], req->id.idiag_sport,
- req->id.idiag_if, tbl);
+ req->id.idiag_if, tbl, NULL);
#if IS_ENABLED(CONFIG_IPV6)
else if (req->sdiag_family == AF_INET6) {
if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
@@ -190,7 +190,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
sk = __udp4_lib_lookup(net,
req->id.idiag_dst[3], req->id.idiag_dport,
req->id.idiag_src[3], req->id.idiag_sport,
- req->id.idiag_if, tbl);
+ req->id.idiag_if, tbl, NULL);
else
sk = __udp6_lib_lookup(net,
@@ -198,7 +198,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
req->id.idiag_dport,
(struct in6_addr *)req->id.idiag_src,
req->id.idiag_sport,
- req->id.idiag_if, tbl);
+ req->id.idiag_if, tbl, NULL);
}
#endif
else {
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 6dfc3daf7c21..2a37f367dc04 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -300,7 +300,7 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
int flush = 1;
if (NAPI_GRO_CB(skb)->encap_mark ||
- (skb->ip_summed != CHECKSUM_PARTIAL &&
+ (uh->check && skb->ip_summed != CHECKSUM_PARTIAL &&
NAPI_GRO_CB(skb)->csum_cnt == 0 &&
!NAPI_GRO_CB(skb)->csum_valid))
goto out;
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 78766b32b78b..705d9fbf0bcf 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -55,7 +55,6 @@ struct proto udplite_prot = {
.unhash = udp_lib_unhash,
.get_port = udp_v4_get_port,
.obj_size = sizeof(struct udp_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
.h.udp_table = &udplite_table,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udp_setsockopt,
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 7ee6518afa86..73705a2368d9 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -75,9 +75,7 @@ int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb)
{
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
-#ifdef CONFIG_NETFILTER
IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
-#endif
return xfrm_output(sk, skb);
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e1ee6515cd4e..55c1cbfdf1ff 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -569,7 +569,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC);
+ skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_KERNEL);
if (!skb)
goto errout;
@@ -581,7 +581,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC);
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_KERNEL);
return;
errout:
rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err);
@@ -800,7 +800,14 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
}
if (p == &net->ipv6.devconf_all->forwarding) {
+ int old_dflt = net->ipv6.devconf_dflt->forwarding;
+
net->ipv6.devconf_dflt->forwarding = newf;
+ if ((!newf) ^ (!old_dflt))
+ inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ NETCONFA_IFINDEX_DEFAULT,
+ net->ipv6.devconf_dflt);
+
addrconf_forward_change(net, newf);
if ((!newf) ^ (!old))
inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
@@ -2295,6 +2302,7 @@ static void addrconf_add_mroute(struct net_device *dev)
.fc_dst_len = 8,
.fc_flags = RTF_UP,
.fc_nlinfo.nl_net = dev_net(dev),
+ .fc_protocol = RTPROT_KERNEL,
};
ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
@@ -2389,6 +2397,12 @@ static void manage_tempaddrs(struct inet6_dev *idev,
}
}
+static bool is_addr_mode_generate_stable(struct inet6_dev *idev)
+{
+ return idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY ||
+ idev->addr_gen_mode == IN6_ADDR_GEN_MODE_RANDOM;
+}
+
void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
{
struct prefix_info *pinfo;
@@ -2505,8 +2519,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
in6_dev->token.s6_addr + 8, 8);
read_unlock_bh(&in6_dev->lock);
tokenized = true;
- } else if (in6_dev->addr_gen_mode ==
- IN6_ADDR_GEN_MODE_STABLE_PRIVACY &&
+ } else if (is_addr_mode_generate_stable(in6_dev) &&
!ipv6_generate_stable_address(&addr, 0,
in6_dev)) {
addr_flags |= IFA_F_STABLE_PRIVACY;
@@ -3106,6 +3119,17 @@ retry:
return 0;
}
+static void ipv6_gen_mode_random_init(struct inet6_dev *idev)
+{
+ struct ipv6_stable_secret *s = &idev->cnf.stable_secret;
+
+ if (s->initialized)
+ return;
+ s = &idev->cnf.stable_secret;
+ get_random_bytes(&s->secret, sizeof(s->secret));
+ s->initialized = true;
+}
+
static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
{
struct in6_addr addr;
@@ -3116,13 +3140,18 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
- if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) {
+ switch (idev->addr_gen_mode) {
+ case IN6_ADDR_GEN_MODE_RANDOM:
+ ipv6_gen_mode_random_init(idev);
+ /* fallthrough */
+ case IN6_ADDR_GEN_MODE_STABLE_PRIVACY:
if (!ipv6_generate_stable_address(&addr, 0, idev))
addrconf_add_linklocal(idev, &addr,
IFA_F_STABLE_PRIVACY);
else if (prefix_route)
addrconf_prefix_route(&addr, 64, idev->dev, 0, 0);
- } else if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) {
+ break;
+ case IN6_ADDR_GEN_MODE_EUI64:
/* addrconf_add_linklocal also adds a prefix_route and we
* only need to care about prefix routes if ipv6_generate_eui64
* couldn't generate one.
@@ -3131,6 +3160,11 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
addrconf_add_linklocal(idev, &addr, 0);
else if (prefix_route)
addrconf_prefix_route(&addr, 64, idev->dev, 0, 0);
+ break;
+ case IN6_ADDR_GEN_MODE_NONE:
+ default:
+ /* will not add any link local address */
+ break;
}
}
@@ -3148,6 +3182,7 @@ static void addrconf_dev_config(struct net_device *dev)
(dev->type != ARPHRD_IEEE1394) &&
(dev->type != ARPHRD_TUNNEL6) &&
(dev->type != ARPHRD_6LOWPAN) &&
+ (dev->type != ARPHRD_NONE) &&
(dev->type != ARPHRD_RAWIP) &&
(dev->type != ARPHRD_INFINIBAND)) {
/* Alas, we support only Ethernet autoconfiguration. */
@@ -3158,6 +3193,11 @@ static void addrconf_dev_config(struct net_device *dev)
if (IS_ERR(idev))
return;
+ /* this device type has no EUI support */
+ if (dev->type == ARPHRD_NONE &&
+ idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)
+ idev->addr_gen_mode = IN6_ADDR_GEN_MODE_RANDOM;
+
addrconf_addr_gen(idev, false);
}
@@ -3215,6 +3255,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_changeupper_info *info;
struct inet6_dev *idev = __in6_dev_get(dev);
struct net *net = dev_net(dev);
int run_pending = 0;
@@ -3376,6 +3417,15 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
case NETDEV_POST_TYPE_CHANGE:
addrconf_type_change(dev, event);
break;
+
+ case NETDEV_CHANGEUPPER:
+ info = ptr;
+
+ /* flush all routes if dev is linked to or unlinked from
+ * an L3 master device (e.g., VRF)
+ */
+ if (info->upper_dev && netif_is_l3_master(info->upper_dev))
+ addrconf_ifdown(dev, 0);
}
return NOTIFY_OK;
@@ -4772,6 +4822,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown;
/* we omit DEVCONF_STABLE_SECRET for now */
array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only;
+ array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
+ array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
}
static inline size_t inet6_ifla6_size(void)
@@ -5022,7 +5074,8 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
if (mode != IN6_ADDR_GEN_MODE_EUI64 &&
mode != IN6_ADDR_GEN_MODE_NONE &&
- mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY)
+ mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY &&
+ mode != IN6_ADDR_GEN_MODE_RANDOM)
return -EINVAL;
if (mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY &&
@@ -5859,6 +5912,20 @@ static struct addrconf_sysctl_table
.proc_handler = addrconf_sysctl_ignore_routes_with_linkdown,
},
{
+ .procname = "drop_unicast_in_l2_multicast",
+ .data = &ipv6_devconf.drop_unicast_in_l2_multicast,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "drop_unsolicited_na",
+ .data = &ipv6_devconf.drop_unsolicited_na,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
/* sentinel */
}
},
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index bfa941fc1165..129324b36fb6 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -107,15 +107,16 @@ int inet6addr_notifier_call_chain(unsigned long val, void *v)
}
EXPORT_SYMBOL(inet6addr_notifier_call_chain);
-static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1,
- struct dst_entry **u2,
- struct flowi6 *u3)
+static struct dst_entry *eafnosupport_ipv6_dst_lookup_flow(struct net *net,
+ const struct sock *sk,
+ struct flowi6 *fl6,
+ const struct in6_addr *final_dst)
{
- return -EAFNOSUPPORT;
+ return ERR_PTR(-EAFNOSUPPORT);
}
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
- .ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
+ .ipv6_dst_lookup_flow = eafnosupport_ipv6_dst_lookup_flow,
};
EXPORT_SYMBOL_GPL(ipv6_stub);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 28e03fce9e89..091a93e30e2b 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -250,7 +250,11 @@ lookup_protocol:
* creation time automatically shares.
*/
inet->inet_sport = htons(inet->inet_num);
- sk->sk_prot->hash(sk);
+ err = sk->sk_prot->hash(sk);
+ if (err) {
+ sk_common_release(sk);
+ goto out;
+ }
}
if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk);
@@ -259,6 +263,14 @@ lookup_protocol:
goto out;
}
}
+
+ if (!kern) {
+ err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
+ if (err) {
+ sk_common_release(sk);
+ goto out;
+ }
+ }
out:
return err;
out_rcu_unlock:
@@ -277,6 +289,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct net *net = sock_net(sk);
__be32 v4addr = 0;
unsigned short snum;
+ bool saved_ipv6only;
int addr_type = 0;
int err = 0;
@@ -396,19 +409,21 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (!(addr_type & IPV6_ADDR_MULTICAST))
np->saddr = addr->sin6_addr;
+ saved_ipv6only = sk->sk_ipv6only;
+ if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED)
+ sk->sk_ipv6only = 1;
+
/* Make sure we are allowed to bind here. */
if ((snum || !inet->bind_address_no_port) &&
sk->sk_prot->get_port(sk, snum)) {
+ sk->sk_ipv6only = saved_ipv6only;
inet_reset_saddr(sk);
err = -EADDRINUSE;
goto out;
}
- if (addr_type != IPV6_ADDR_ANY) {
+ if (addr_type != IPV6_ADDR_ANY)
sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
- if (addr_type != IPV6_ADDR_MAPPED)
- sk->sk_ipv6only = 1;
- }
if (snum)
sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
inet->inet_sport = htons(inet->inet_num);
@@ -693,14 +708,14 @@ int inet6_sk_rebuild_header(struct sock *sk)
fl6.fl6_dport = inet->inet_dport;
fl6.fl6_sport = inet->inet_sport;
fl6.flowi6_uid = sk->sk_uid;
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
rcu_read_lock();
final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt),
&final);
rcu_read_unlock();
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
if (IS_ERR(dst)) {
sk->sk_route_caps = 0;
sk->sk_err_soft = -PTR_ERR(dst);
@@ -858,7 +873,7 @@ static struct pernet_operations inet6_net_ops = {
static const struct ipv6_stub ipv6_stub_impl = {
.ipv6_sock_mc_join = ipv6_sock_mc_join,
.ipv6_sock_mc_drop = ipv6_sock_mc_drop,
- .ipv6_dst_lookup = ip6_dst_lookup,
+ .ipv6_dst_lookup_flow = ip6_dst_lookup_flow,
.udpv6_encap_enable = udpv6_encap_enable,
.ndisc_send_na = ndisc_send_na,
.nd_tbl = &nd_tbl,
@@ -1046,11 +1061,11 @@ netfilter_fail:
igmp_fail:
ndisc_cleanup();
ndisc_fail:
- ip6_mr_cleanup();
+ icmpv6_cleanup();
icmp_fail:
- unregister_pernet_subsys(&inet6_net_ops);
+ ip6_mr_cleanup();
ipmr_fail:
- icmpv6_cleanup();
+ unregister_pernet_subsys(&inet6_net_ops);
register_pernet_fail:
sock_unregister(PF_INET6);
rtnl_unregister_all(PF_INET6);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index e742c4deb13d..98d253d7bed3 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -595,7 +595,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
memset(ah->auth_data, 0, ahp->icv_trunc_len);
- if (ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN))
+ err = ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN);
+ if (err)
goto out_free;
ip6h->priority = 0;
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 514ac259f543..b831e9b2e906 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -170,7 +170,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
return 0;
}
-void ipv6_sock_ac_close(struct sock *sk)
+void __ipv6_sock_ac_close(struct sock *sk)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct net_device *dev = NULL;
@@ -178,10 +178,7 @@ void ipv6_sock_ac_close(struct sock *sk)
struct net *net = sock_net(sk);
int prev_index;
- if (!np->ipv6_ac_list)
- return;
-
- rtnl_lock();
+ ASSERT_RTNL();
pac = np->ipv6_ac_list;
np->ipv6_ac_list = NULL;
@@ -198,6 +195,16 @@ void ipv6_sock_ac_close(struct sock *sk)
sock_kfree_s(sk, pac, sizeof(*pac));
pac = next;
}
+}
+
+void ipv6_sock_ac_close(struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ if (!np->ipv6_ac_list)
+ return;
+ rtnl_lock();
+ __ipv6_sock_ac_close(sk);
rtnl_unlock();
}
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 674fc3861e41..7e966fe1ffd6 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -40,7 +40,8 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a)
return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0);
}
-static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len)
{
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
struct inet_sock *inet = inet_sk(sk);
@@ -173,14 +174,14 @@ ipv4_connected:
if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
fl6.flowi6_oif = np->mcast_oif;
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
rcu_read_lock();
opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt);
final_p = fl6_update_dst(&fl6, opt, &final);
rcu_read_unlock();
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
err = 0;
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
@@ -219,6 +220,7 @@ out:
fl6_sock_release(flowlabel);
return err;
}
+EXPORT_SYMBOL_GPL(__ip6_datagram_connect);
int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 44a2010e2076..37268a37312c 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -426,8 +426,10 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
sg_init_table(sg, nfrags);
ret = skb_to_sgvec(skb, sg, 0, skb->len);
- if (unlikely(ret < 0))
+ if (unlikely(ret < 0)) {
+ kfree(tmp);
goto out;
+ }
aead_request_set_crypt(req, sg, sg, elen + ivlen, iv);
aead_request_set_ad(req, assoclen);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 3ae2fbe07b25..4e6755e99318 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -446,6 +446,10 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
if (__ipv6_addr_needs_scope_id(addr_type))
iif = skb->dev->ifindex;
+ else {
+ dst = skb_dst(skb);
+ iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
+ }
/*
* Must not send error if the source does not uniquely
@@ -480,7 +484,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
fl6.fl6_icmp_type = type;
fl6.fl6_icmp_code = code;
fl6.flowi6_uid = sock_net_uid(net, NULL);
- security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
+ security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
sk = icmpv6_xmit_lock(net);
if (!sk)
@@ -501,9 +505,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
- if (!fl6.flowi6_oif)
- fl6.flowi6_oif = l3mdev_master_ifindex(skb->dev);
-
dst = icmpv6_route_lookup(net, skb, sk, &fl6);
if (IS_ERR(dst))
goto out;
@@ -588,7 +589,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
fl6.flowi6_mark = mark;
fl6.flowi6_uid = sock_net_uid(net, NULL);
- security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
+ security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
sk = icmpv6_xmit_lock(net);
if (!sk)
@@ -834,7 +835,7 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
fl6->fl6_icmp_type = type;
fl6->fl6_icmp_code = 0;
fl6->flowi6_oif = oif;
- security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
}
static int __net_init icmpv6_sk_init(struct net *net)
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index dc79ebc14189..ceeb3d221db5 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -26,6 +26,7 @@
#include <net/ip6_route.h>
#include <net/sock.h>
#include <net/inet6_connection_sock.h>
+#include <net/sock_reuseport.h>
int inet6_csk_bind_conflict(const struct sock *sk,
const struct inet_bind_bucket *tb, bool relax)
@@ -48,15 +49,16 @@ int inet6_csk_bind_conflict(const struct sock *sk,
if ((!reuse || !sk2->sk_reuse ||
sk2->sk_state == TCP_LISTEN) &&
(!reuseport || !sk2->sk_reuseport ||
+ rcu_access_pointer(sk->sk_reuseport_cb) ||
(sk2->sk_state != TCP_TIME_WAIT &&
!uid_eq(uid,
sock_i_uid((struct sock *)sk2))))) {
- if (ipv6_rcv_saddr_equal(sk, sk2))
+ if (ipv6_rcv_saddr_equal(sk, sk2, true))
break;
}
if (!relax && reuse && sk2->sk_reuse &&
sk2->sk_state != TCP_LISTEN &&
- ipv6_rcv_saddr_equal(sk, sk2))
+ ipv6_rcv_saddr_equal(sk, sk2, true))
break;
}
}
@@ -87,9 +89,9 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
fl6->fl6_dport = ireq->ir_rmt_port;
fl6->fl6_sport = htons(ireq->ir_num);
fl6->flowi6_uid = sk->sk_uid;
- security_req_classify_flow(req, flowi6_to_flowi(fl6));
+ security_req_classify_flow(req, flowi6_to_flowi_common(fl6));
- dst = ip6_dst_lookup_flow(sk, fl6, final_p);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
if (IS_ERR(dst))
return NULL;
@@ -136,7 +138,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
fl6->fl6_sport = inet->inet_sport;
fl6->fl6_dport = inet->inet_dport;
fl6->flowi6_uid = sk->sk_uid;
- security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
rcu_read_lock();
final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
@@ -144,7 +146,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
dst = __inet6_csk_dst_check(sk, np->dst_cookie);
if (!dst) {
- dst = ip6_dst_lookup_flow(sk, fl6, final_p);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
if (!IS_ERR(dst))
ip6_dst_store(sk, dst, NULL, NULL);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 21ace5a2bf7c..c27207563d10 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -17,11 +17,13 @@
#include <linux/module.h>
#include <linux/random.h>
+#include <net/addrconf.h>
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
#include <net/secure_seq.h>
#include <net/ip.h>
+#include <net/sock_reuseport.h>
u32 inet6_ehashfn(const struct net *net,
const struct in6_addr *laddr, const u16 lport,
@@ -121,7 +123,9 @@ static inline int compute_score(struct sock *sk, struct net *net,
}
struct sock *inet6_lookup_listener(struct net *net,
- struct inet_hashinfo *hashinfo, const struct in6_addr *saddr,
+ struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
const __be16 sport, const struct in6_addr *daddr,
const unsigned short hnum, const int dif)
{
@@ -129,6 +133,7 @@ struct sock *inet6_lookup_listener(struct net *net,
const struct hlist_nulls_node *node;
struct sock *result;
int score, hiscore, matches = 0, reuseport = 0;
+ bool select_ok = true;
u32 phash = 0;
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
@@ -146,6 +151,15 @@ begin:
if (reuseport) {
phash = inet6_ehashfn(net, daddr, hnum,
saddr, sport);
+ if (select_ok) {
+ struct sock *sk2;
+ sk2 = reuseport_select_sock(sk, phash,
+ skb, doff);
+ if (sk2) {
+ result = sk2;
+ goto found;
+ }
+ }
matches = 1;
}
} else if (score == hiscore && reuseport) {
@@ -163,11 +177,13 @@ begin:
if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
goto begin;
if (result) {
+found:
if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
result = NULL;
else if (unlikely(compute_score(result, net, hnum, daddr,
dif) < hiscore)) {
sock_put(result);
+ select_ok = false;
goto begin;
}
}
@@ -177,6 +193,7 @@ begin:
EXPORT_SYMBOL_GPL(inet6_lookup_listener);
struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
const struct in6_addr *saddr, const __be16 sport,
const struct in6_addr *daddr, const __be16 dport,
const int dif)
@@ -184,7 +201,8 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
struct sock *sk;
local_bh_disable();
- sk = __inet6_lookup(net, hashinfo, saddr, sport, daddr, ntohs(dport), dif);
+ sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
+ ntohs(dport), dif);
local_bh_enable();
return sk;
@@ -274,3 +292,61 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row,
__inet6_check_established);
}
EXPORT_SYMBOL_GPL(inet6_hash_connect);
+
+int inet6_hash(struct sock *sk)
+{
+ int err = 0;
+
+ if (sk->sk_state != TCP_CLOSE) {
+ local_bh_disable();
+ err = __inet_hash(sk, NULL, ipv6_rcv_saddr_equal);
+ local_bh_enable();
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(inet6_hash);
+
+/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
+ * only, and any IPv4 addresses if not IPv6 only
+ * match_wildcard == false: addresses must be exactly the same, i.e.
+ * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
+ * and 0.0.0.0 equals to 0.0.0.0 only
+ */
+int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard)
+{
+ const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
+ int sk2_ipv6only = inet_v6_ipv6only(sk2);
+ int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+ int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
+
+ /* if both are mapped, treat as IPv4 */
+ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
+ if (!sk2_ipv6only) {
+ if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
+ return 1;
+ if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
+ return match_wildcard;
+ }
+ return 0;
+ }
+
+ if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
+ return 1;
+
+ if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
+ !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
+ return 1;
+
+ if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
+ !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
+ return 1;
+
+ if (sk2_rcv_saddr6 &&
+ ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
+ return 1;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ipv6_rcv_saddr_equal);
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index 391a8fedb27e..1132624edee9 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -84,9 +84,12 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
* we accept a checksum of zero here. When we find the socket
* for the UDP packet we'll check if that socket allows zero checksum
* for IPv6 (set by socket option).
+ *
+ * Note, we are only interested in != 0 or == 0, thus the
+ * force to int.
*/
- return skb_checksum_init_zero_check(skb, proto, uh->check,
- ip6_compute_pseudo);
+ return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
+ ip6_compute_pseudo);
}
EXPORT_SYMBOL(udp6_csum_init);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index bf3824b59597..24e91d438f21 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -780,8 +780,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
found++;
break;
}
- if (rt_can_ecmp)
- fallback_ins = fallback_ins ?: ins;
+ fallback_ins = fallback_ins ?: ins;
goto next_iter;
}
@@ -821,7 +820,9 @@ next_iter:
}
if (fallback_ins && !found) {
- /* No ECMP-able route found, replace first non-ECMP one */
+ /* No matching route with same ecmp-able-ness found, replace
+ * first matching route
+ */
ins = fallback_ins;
iter = *ins;
found++;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 6e496c3dd8ef..17eee5083548 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -125,6 +125,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
ARPHRD_ETHER : ARPHRD_IP6GRE;
int score, cand_score = 4;
+ struct net_device *ndev;
for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
if (!ipv6_addr_equal(local, &t->parms.laddr) ||
@@ -227,9 +228,9 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
if (cand)
return cand;
- dev = ign->fb_tunnel_dev;
- if (dev->flags & IFF_UP)
- return netdev_priv(dev);
+ ndev = READ_ONCE(ign->fb_tunnel_dev);
+ if (ndev && ndev->flags & IFF_UP)
+ return netdev_priv(ndev);
return NULL;
}
@@ -349,7 +350,6 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
if (!(nt->parms.o_flags & GRE_SEQ))
dev->features |= NETIF_F_LLTX;
- dev_hold(dev);
ip6gre_tunnel_link(ign, nt);
return nt;
@@ -364,6 +364,8 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
ip6gre_tunnel_unlink(ign, t);
+ if (ign->fb_tunnel_dev == dev)
+ WRITE_ONCE(ign->fb_tunnel_dev, NULL);
dst_cache_reset(&t->dst_cache);
dev_put(dev);
}
@@ -1311,8 +1313,6 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev)
strcpy(tunnel->parms.name, dev->name);
tunnel->hlen = sizeof(struct ipv6hdr) + 4;
-
- dev_hold(dev);
}
@@ -1356,15 +1356,16 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
static int __net_init ip6gre_init_net(struct net *net)
{
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ struct net_device *ndev;
int err;
- ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
- NET_NAME_UNKNOWN,
- ip6gre_tunnel_setup);
- if (!ign->fb_tunnel_dev) {
+ ndev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
+ NET_NAME_UNKNOWN, ip6gre_tunnel_setup);
+ if (!ndev) {
err = -ENOMEM;
goto err_alloc_dev;
}
+ ign->fb_tunnel_dev = ndev;
dev_net_set(ign->fb_tunnel_dev, net);
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
@@ -1384,7 +1385,7 @@ static int __net_init ip6gre_init_net(struct net *net)
return 0;
err_reg_dev:
- ip6gre_dev_free(ign->fb_tunnel_dev);
+ ip6gre_dev_free(ndev);
err_alloc_dev:
return err;
}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index c83c0faf5ae9..31ac3c56da4b 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -134,6 +134,16 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1)
goto err;
+ /* If enabled, drop unicast packets that were encapsulated in link-layer
+ * multicast or broadcast to protected against the so-called "hole-196"
+ * attack in 802.11 wireless.
+ */
+ if (!ipv6_addr_is_multicast(&hdr->daddr) &&
+ (skb->pkt_type == PACKET_BROADCAST ||
+ skb->pkt_type == PACKET_MULTICAST) &&
+ idev->cnf.drop_unicast_in_l2_multicast)
+ goto err;
+
/* RFC4291 2.7
* Nodes must not originate a packet to a multicast address whose scope
* field contains the reserved value 0; if such a packet is received, it
@@ -151,16 +161,6 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
if (ipv6_addr_is_multicast(&hdr->saddr))
goto err;
- /* While RFC4291 is not explicit about v4mapped addresses
- * in IPv6 headers, it seems clear linux dual-stack
- * model can not deal properly with these.
- * Security models could be fooled by ::ffff:127.0.0.1 for example.
- *
- * https://tools.ietf.org/html/draft-itojun-v6ops-v4mapped-harmful-02
- */
- if (ipv6_addr_v4mapped(&hdr->saddr))
- goto err;
-
skb->transport_header = skb->network_header + sizeof(*hdr);
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index e39dc94486b2..b824985c714c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -39,6 +39,7 @@
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/bpf-cgroup.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
@@ -65,9 +66,6 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
struct in6_addr *nexthop;
int ret;
- skb->protocol = htons(ETH_P_IPV6);
- skb->dev = dev;
-
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
@@ -123,6 +121,14 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ int ret;
+
+ ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+ if (ret) {
+ kfree_skb(skb);
+ return ret;
+ }
+
if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
dst_allfrag(skb_dst(skb)) ||
(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
@@ -136,6 +142,9 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
struct net_device *dev = skb_dst(skb)->dev;
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->dev = dev;
+
if (unlikely(idev->cnf.disable_ipv6)) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
@@ -851,7 +860,6 @@ fail_toobig:
if (skb->sk && dst_allfrag(skb_dst(skb)))
sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
- skb->dev = skb_dst(skb)->dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
err = -EMSGSIZE;
@@ -1057,13 +1065,13 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup);
* It returns a valid dst pointer on success, or a pointer encoded
* error code.
*/
-struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
+struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst)
{
struct dst_entry *dst = NULL;
int err;
- err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
+ err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
if (err)
return ERR_PTR(err);
if (final_dst)
@@ -1071,7 +1079,7 @@ struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
if (!fl6->flowi6_oif)
fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
- return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+ return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
}
EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
@@ -1096,7 +1104,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
dst = ip6_sk_dst_check(sk, dst, fl6);
if (!dst)
- dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
return dst;
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 40cf35c9a2f3..842bdb2d3d8c 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -261,7 +261,6 @@ static int ip6_tnl_create2(struct net_device *dev)
strcpy(t->parms.name, dev->name);
- dev_hold(dev);
ip6_tnl_link(ip6n, t);
return 0;
@@ -918,12 +917,12 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
ldev = dev_get_by_index_rcu(net, p->link);
if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
- pr_warn("%s xmit: Local address not yet configured!\n",
- p->name);
+ pr_warn_ratelimited("%s xmit: Local address not yet configured!\n",
+ p->name);
else if (!ipv6_addr_is_multicast(raddr) &&
unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
- pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
- p->name);
+ pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n",
+ p->name);
else
ret = 1;
rcu_read_unlock();
@@ -972,26 +971,28 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
/* NBMA tunnel */
if (ipv6_addr_any(&t->parms.raddr)) {
- struct in6_addr *addr6;
- struct neighbour *neigh;
- int addr_type;
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ struct in6_addr *addr6;
+ struct neighbour *neigh;
+ int addr_type;
- if (!skb_dst(skb))
- goto tx_err_link_failure;
+ if (!skb_dst(skb))
+ goto tx_err_link_failure;
- neigh = dst_neigh_lookup(skb_dst(skb),
- &ipv6_hdr(skb)->daddr);
- if (!neigh)
- goto tx_err_link_failure;
+ neigh = dst_neigh_lookup(skb_dst(skb),
+ &ipv6_hdr(skb)->daddr);
+ if (!neigh)
+ goto tx_err_link_failure;
- addr6 = (struct in6_addr *)&neigh->primary_key;
- addr_type = ipv6_addr_type(addr6);
+ addr6 = (struct in6_addr *)&neigh->primary_key;
+ addr_type = ipv6_addr_type(addr6);
- if (addr_type == IPV6_ADDR_ANY)
- addr6 = &ipv6_hdr(skb)->daddr;
+ if (addr_type == IPV6_ADDR_ANY)
+ addr6 = &ipv6_hdr(skb)->daddr;
- memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
- neigh_release(neigh);
+ memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
+ neigh_release(neigh);
+ }
} else if (!fl6->flowi6_mark)
dst = dst_cache_get(&t->dst_cache);
@@ -1582,6 +1583,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
return ret;
}
+ dev_hold(dev);
return 0;
}
@@ -1615,7 +1617,6 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
t->parms.proto = IPPROTO_IPV6;
- dev_hold(dev);
rcu_assign_pointer(ip6n->tnls_wc[0], t);
return 0;
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index 30b03d8e321a..7ec737d85ccf 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -74,8 +74,8 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb,
struct net_device *dev, struct in6_addr *saddr,
struct in6_addr *daddr,
- __u8 prio, __u8 ttl, __be16 src_port,
- __be16 dst_port, bool nocheck)
+ __u8 prio, __u8 ttl, __be32 label,
+ __be16 src_port, __be16 dst_port, bool nocheck)
{
struct udphdr *uh;
struct ipv6hdr *ip6h;
@@ -99,7 +99,7 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
__skb_push(skb, sizeof(*ip6h));
skb_reset_network_header(skb);
ip6h = ipv6_hdr(skb);
- ip6_flow_hdr(ip6h, prio, htonl(0));
+ ip6_flow_hdr(ip6h, prio, label);
ip6h->payload_len = htons(skb->len);
ip6h->nexthdr = IPPROTO_UDP;
ip6h->hop_limit = ttl;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 5dd544c5cfe2..9df1947e79eb 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -196,7 +196,6 @@ static int vti6_tnl_create2(struct net_device *dev)
strcpy(t->parms.name, dev->name);
- dev_hold(dev);
vti6_tnl_link(ip6n, t);
return 0;
@@ -315,7 +314,7 @@ static int vti6_rcv(struct sk_buff *skb)
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
rcu_read_unlock();
- return 0;
+ goto discard;
}
if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
@@ -324,11 +323,9 @@ static int vti6_rcv(struct sk_buff *skb)
goto discard;
}
- XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
-
rcu_read_unlock();
- return xfrm6_rcv(skb);
+ return xfrm6_rcv_tnl(skb, t);
}
rcu_read_unlock();
return -EINVAL;
@@ -441,8 +438,35 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
int err = -1;
int mtu;
- if (!dst)
- goto tx_err_link_failure;
+ if (!dst) {
+ switch (skb->protocol) {
+ case htons(ETH_P_IP): {
+ struct rtable *rt;
+
+ fl->u.ip4.flowi4_oif = dev->ifindex;
+ fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC;
+ rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4);
+ if (IS_ERR(rt))
+ goto tx_err_link_failure;
+ dst = &rt->dst;
+ skb_dst_set(skb, dst);
+ break;
+ }
+ case htons(ETH_P_IPV6):
+ fl->u.ip6.flowi6_oif = dev->ifindex;
+ fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
+ dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6);
+ if (dst->error) {
+ dst_release(dst);
+ dst = NULL;
+ goto tx_err_link_failure;
+ }
+ skb_dst_set(skb, dst);
+ break;
+ default:
+ goto tx_err_link_failure;
+ }
+ }
dst_hold(dst);
dst = xfrm_lookup(t->net, dst, fl, NULL, 0);
@@ -736,6 +760,8 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
struct net *net = dev_net(dev);
struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ memset(&p1, 0, sizeof(p1));
+
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == ip6n->fb_tnl_dev) {
@@ -876,6 +902,7 @@ static inline int vti6_dev_init_gen(struct net_device *dev)
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
+ dev_hold(dev);
return 0;
}
@@ -907,7 +934,6 @@ static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev)
struct vti6_net *ip6n = net_generic(net, vti6_net_id);
t->parms.proto = IPPROTO_IPV6;
- dev_hold(dev);
rcu_assign_pointer(ip6n->tnls_wc[0], t);
return 0;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 91f16e679f63..20812e8b24dd 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1594,14 +1594,15 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
if (likely(mrt->mroute6_sk == NULL)) {
mrt->mroute6_sk = sk;
net->ipv6.devconf_all->mc_forwarding++;
- inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
- NETCONFA_IFINDEX_ALL,
- net->ipv6.devconf_all);
- }
- else
+ } else {
err = -EADDRINUSE;
+ }
write_unlock_bh(&mrt_lock);
+ if (!err)
+ inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+ NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all);
rtnl_unlock();
return err;
@@ -1619,11 +1620,11 @@ int ip6mr_sk_done(struct sock *sk)
write_lock_bh(&mrt_lock);
mrt->mroute6_sk = NULL;
net->ipv6.devconf_all->mc_forwarding--;
+ write_unlock_bh(&mrt_lock);
inet6_netconf_notify_devconf(net,
NETCONFA_MC_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
- write_unlock_bh(&mrt_lock);
mroute_clean_tables(mrt, false);
err = 0;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 71263754b19b..4a75013a2ede 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -185,8 +185,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
retv = -EBUSY;
break;
}
- } else if (sk->sk_protocol != IPPROTO_TCP)
+ } else if (sk->sk_protocol == IPPROTO_TCP) {
+ if (sk->sk_prot != &tcpv6_prot) {
+ retv = -EBUSY;
+ break;
+ }
+ } else {
break;
+ }
if (sk->sk_state != TCP_ESTABLISHED) {
retv = -ENOTCONN;
@@ -201,6 +207,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
fl6_free_socklist(sk);
__ipv6_sock_mc_close(sk);
+ __ipv6_sock_ac_close(sk);
/*
* Sock is moving from IPv6 to IPv4 (sk_prot), so
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 976c8133a281..636425999aac 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1573,10 +1573,7 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
IPV6_TLV_PADN, 0 };
/* we assume size > sizeof(ra) here */
- /* limit our allocations to order-0 page */
- size = min_t(int, size, SKB_MAX_ORDER(0, 0));
skb = sock_alloc_send_skb(sk, size, 1, &err);
-
if (!skb)
return NULL;
@@ -2580,6 +2577,7 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev)
write_unlock_bh(&idev->lock);
igmp6_group_dropped(i);
+ ip6_mc_clear_src(i);
ma_put(i);
write_lock_bh(&idev->lock);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index e16a05ca4879..963ac8e2a448 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -884,6 +884,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
offsetof(struct nd_msg, opt));
struct ndisc_options ndopts;
struct net_device *dev = skb->dev;
+ struct inet6_dev *idev = __in6_dev_get(dev);
struct inet6_ifaddr *ifp;
struct neighbour *neigh;
@@ -903,6 +904,14 @@ static void ndisc_recv_na(struct sk_buff *skb)
return;
}
+ /* For some 802.11 wireless deployments (and possibly other networks),
+ * there will be a NA proxy and unsolicitd packets are attacks
+ * and thus should not be accepted.
+ */
+ if (!msg->icmph.icmp6_solicited && idev &&
+ idev->cnf.drop_unsolicited_na)
+ return;
+
if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
ND_PRINTK(2, warn, "NS: invalid ND option\n");
return;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 2393e1e09d69..f57b72771e17 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -343,6 +343,7 @@ ip6t_do_table(struct sk_buff *skb,
* things we don't know, ie. tcp syn flag or ports). If the
* rule is also a fragment-specific rule, non-fragments won't
* match it. */
+ acpar.fragoff = 0;
acpar.hotdrop = false;
acpar.net = state->net;
acpar.in = state->in;
@@ -979,10 +980,6 @@ copy_entries_to_user(unsigned int total_size,
return PTR_ERR(counters);
loc_cpu_entry = private->entries;
- if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
- ret = -EFAULT;
- goto free_counters;
- }
/* FIXME: use iterator macros --RR */
/* ... then go back and fix counters and names */
@@ -992,6 +989,10 @@ copy_entries_to_user(unsigned int total_size,
const struct xt_entry_target *t;
e = (struct ip6t_entry *)(loc_cpu_entry + off);
+ if (copy_to_user(userptr + off, e, sizeof(*e))) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
if (copy_to_user(userptr + off
+ offsetof(struct ip6t_entry, counters),
&counters[num],
@@ -1005,23 +1006,14 @@ copy_entries_to_user(unsigned int total_size,
i += m->u.match_size) {
m = (void *)e + i;
- if (copy_to_user(userptr + off + i
- + offsetof(struct xt_entry_match,
- u.user.name),
- m->u.kernel.match->name,
- strlen(m->u.kernel.match->name)+1)
- != 0) {
+ if (xt_match_to_user(m, userptr + off + i)) {
ret = -EFAULT;
goto free_counters;
}
}
t = ip6t_get_target_c(e);
- if (copy_to_user(userptr + off + e->target_offset
- + offsetof(struct xt_entry_target,
- u.user.name),
- t->u.kernel.target->name,
- strlen(t->u.kernel.target->name)+1) != 0) {
+ if (xt_target_to_user(t, userptr + off + e->target_offset)) {
ret = -EFAULT;
goto free_counters;
}
@@ -1621,6 +1613,8 @@ translate_compat_table(struct net *net,
if (!newinfo)
goto out_unlock;
+ memset(newinfo->entries, 0, size);
+
newinfo->number = compatr->num_entries;
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
newinfo->hook_entry[i] = compatr->hook_entry[i];
diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
index 590f767db5d4..a379d2f79b19 100644
--- a/net/ipv6/netfilter/ip6t_NPT.c
+++ b/net/ipv6/netfilter/ip6t_NPT.c
@@ -112,6 +112,7 @@ static struct xt_target ip6t_npt_target_reg[] __read_mostly = {
.table = "mangle",
.target = ip6t_snpt_tg,
.targetsize = sizeof(struct ip6t_npt_tginfo),
+ .usersize = offsetof(struct ip6t_npt_tginfo, adjustment),
.checkentry = ip6t_npt_checkentry,
.family = NFPROTO_IPV6,
.hooks = (1 << NF_INET_LOCAL_IN) |
@@ -123,6 +124,7 @@ static struct xt_target ip6t_npt_target_reg[] __read_mostly = {
.table = "mangle",
.target = ip6t_dnpt_tg,
.targetsize = sizeof(struct ip6t_npt_tginfo),
+ .usersize = offsetof(struct ip6t_npt_tginfo, adjustment),
.checkentry = ip6t_npt_checkentry,
.family = NFPROTO_IPV6,
.hooks = (1 << NF_INET_PRE_ROUTING) |
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 7117e5bef412..96e91bbc9329 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -158,7 +158,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
fl6.fl6_sport = otcph->dest;
fl6.fl6_dport = otcph->source;
fl6.flowi6_mark = IP6_REPLY_MARK(net, oldskb->mark);
- security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
+ security_skb_classify_flow(oldskb, flowi6_to_flowi_common(&fl6));
dst = ip6_route_output(net, NULL, &fl6);
if (dst == NULL || dst->error) {
dst_release(dst);
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
index 8bfd470cbe72..831f86e1ec08 100644
--- a/net/ipv6/netfilter/nft_dup_ipv6.c
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -26,7 +26,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr,
{
struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
struct in6_addr *gw = (struct in6_addr *)&regs->data[priv->sreg_addr];
- int oif = regs->data[priv->sreg_dev];
+ int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1;
nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif);
}
@@ -57,7 +57,9 @@ static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
- if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
+ if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr))
+ goto nla_put_failure;
+ if (priv->sreg_dev &&
nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
goto nla_put_failure;
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 6b896cc9604e..e2de4b0479f6 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -14,29 +14,11 @@ static u32 __ipv6_select_ident(struct net *net,
const struct in6_addr *dst,
const struct in6_addr *src)
{
- const struct {
- struct in6_addr dst;
- struct in6_addr src;
- } __aligned(SIPHASH_ALIGNMENT) combined = {
- .dst = *dst,
- .src = *src,
- };
- u32 hash, id;
-
- /* Note the following code is not safe, but this is okay. */
- if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
- get_random_bytes(&net->ipv4.ip_id_key,
- sizeof(net->ipv4.ip_id_key));
-
- hash = siphash(&combined, sizeof(combined), &net->ipv4.ip_id_key);
-
- /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve,
- * set the hight order instead thus minimizing possible future
- * collisions.
- */
- id = ip_idents_reserve(hash, 1);
- if (unlikely(!id))
- id = 1 << 31;
+ u32 id;
+
+ do {
+ id = prandom_u32();
+ } while (!id);
return id;
}
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 40b835720722..9fced723d7e6 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -144,7 +144,7 @@ int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowi6_uid = sk->sk_uid;
fl6.fl6_icmp_type = user_icmph.icmp6_type;
fl6.fl6_icmp_code = user_icmph.icmp6_code;
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr);
if (IS_ERR(dst))
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 30db0167bd19..ebd64b99dc98 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -757,6 +757,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int hlimit = -1;
int tclass = -1;
int dontfrag = -1;
+ int hdrincl;
u16 proto;
int err;
@@ -770,6 +771,13 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_flags & MSG_OOB)
return -EOPNOTSUPP;
+ /* hdrincl should be READ_ONCE(inet->hdrincl)
+ * but READ_ONCE() doesn't work with bit fields.
+ * Doing this indirectly yields the same result.
+ */
+ hdrincl = inet->hdrincl;
+ hdrincl = READ_ONCE(hdrincl);
+
/*
* Get and verify the address.
*/
@@ -877,12 +885,12 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowi6_oif = np->mcast_oif;
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
- if (inet->hdrincl)
+ if (hdrincl)
fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH;
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto out;
@@ -900,7 +908,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto do_confirm;
back_from_confirm:
- if (inet->hdrincl)
+ if (hdrincl)
err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, msg->msg_flags);
else {
lock_sock(sk);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index f1514e3d89db..7b88fac606e1 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -770,8 +770,8 @@ err_protocol:
void ipv6_frag_exit(void)
{
- inet_frags_fini(&ip6_frags);
ip6_frags_sysctl_unregister();
unregister_pernet_subsys(&ip6_frags_ops);
inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+ inet_frags_fini(&ip6_frags);
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 11f67eaae1c1..decb94219a70 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -336,9 +336,9 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
return rt;
}
-static struct rt6_info *ip6_dst_alloc(struct net *net,
- struct net_device *dev,
- int flags)
+struct rt6_info *ip6_dst_alloc(struct net *net,
+ struct net_device *dev,
+ int flags)
{
struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
@@ -362,6 +362,7 @@ static struct rt6_info *ip6_dst_alloc(struct net *net,
return rt;
}
+EXPORT_SYMBOL(ip6_dst_alloc);
static void ip6_dst_destroy(struct dst_entry *dst)
{
@@ -1763,6 +1764,37 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
return -EINVAL;
}
+static struct rt6_info *ip6_nh_lookup_table(struct net *net,
+ struct fib6_config *cfg,
+ const struct in6_addr *gw_addr)
+{
+ struct flowi6 fl6 = {
+ .flowi6_oif = cfg->fc_ifindex,
+ .daddr = *gw_addr,
+ .saddr = cfg->fc_prefsrc,
+ };
+ struct fib6_table *table;
+ struct rt6_info *rt;
+ int flags = 0;
+
+ table = fib6_get_table(net, cfg->fc_table);
+ if (!table)
+ return NULL;
+
+ if (!ipv6_addr_any(&cfg->fc_prefsrc))
+ flags |= RT6_LOOKUP_F_HAS_SADDR;
+
+ rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
+
+ /* if table lookup failed, fall back to full lookup */
+ if (rt == net->ipv6.ip6_null_entry) {
+ ip6_rt_put(rt);
+ rt = NULL;
+ }
+
+ return rt;
+}
+
static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
{
struct net *net = cfg->fc_nlinfo.nl_net;
@@ -1938,7 +1970,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
rt->rt6i_gateway = *gw_addr;
if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
- struct rt6_info *grt;
+ struct rt6_info *grt = NULL;
/* IPv6 strictly inhibits using not link-local
addresses as nexthop address.
@@ -1950,7 +1982,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
if (!(gwa_type & IPV6_ADDR_UNICAST))
goto out;
- grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
+ if (cfg->fc_table)
+ grt = ip6_nh_lookup_table(net, cfg, gw_addr);
+
+ if (!grt)
+ grt = rt6_lookup(net, gw_addr, NULL,
+ cfg->fc_ifindex, 1);
err = -EHOSTUNREACH;
if (!grt)
@@ -2273,12 +2310,12 @@ static struct rt6_info *rt6_get_route_info(struct net_device *dev,
const struct in6_addr *prefix, int prefixlen,
const struct in6_addr *gwaddr)
{
+ u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO);
struct fib6_node *fn;
struct rt6_info *rt = NULL;
struct fib6_table *table;
- table = fib6_get_table(dev_net(dev),
- addrconf_rt_table(dev, RT6_TABLE_INFO));
+ table = fib6_get_table(dev_net(dev), tb_id);
if (!table)
return NULL;
@@ -2317,7 +2354,7 @@ static struct rt6_info *rt6_add_route_info(struct net_device *dev,
.fc_nlinfo.nl_net = dev_net(dev),
};
- cfg.fc_table = l3mdev_fib_table_by_index(dev_net(dev), dev->ifindex) ? : addrconf_rt_table(dev, RT6_TABLE_INFO);
+ cfg.fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO);
cfg.fc_dst = *prefix;
cfg.fc_gateway = *gwaddr;
@@ -2333,11 +2370,11 @@ static struct rt6_info *rt6_add_route_info(struct net_device *dev,
struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
{
+ u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_DFLT);
struct rt6_info *rt;
struct fib6_table *table;
- table = fib6_get_table(dev_net(dev),
- addrconf_rt_table(dev, RT6_TABLE_MAIN));
+ table = fib6_get_table(dev_net(dev), tb_id);
if (!table)
return NULL;
@@ -2940,8 +2977,11 @@ static int ip6_route_multipath_add(struct fib6_config *cfg)
* nexthops have been replaced by first new, the rest should
* be added to it.
*/
- cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
- NLM_F_REPLACE);
+ if (cfg->fc_nlinfo.nlh) {
+ cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
+ NLM_F_REPLACE);
+ cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
+ }
nhn++;
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 5039486c4f86..6a5442fdbd51 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -209,8 +209,6 @@ static int ipip6_tunnel_create(struct net_device *dev)
dev->rtnl_link_ops = &sit_link_ops;
- dev_hold(dev);
-
ipip6_tunnel_link(sitn, t);
return 0;
@@ -1079,7 +1077,6 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
if (tdev && !netif_is_l3_master(tdev)) {
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
- dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
dev->mtu = tdev->mtu - t_hlen;
if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
@@ -1371,7 +1368,6 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->destructor = ipip6_dev_free;
dev->type = ARPHRD_SIT;
- dev->hard_header_len = LL_MAX_HEADER + t_hlen;
dev->mtu = ETH_DATA_LEN - t_hlen;
dev->flags = IFF_NOARP;
netif_keep_dst(dev);
@@ -1401,7 +1397,7 @@ static int ipip6_tunnel_init(struct net_device *dev)
dev->tstats = NULL;
return err;
}
-
+ dev_hold(dev);
return 0;
}
@@ -1417,7 +1413,6 @@ static void __net_init ipip6_fb_tunnel_init(struct net_device *dev)
iph->ihl = 5;
iph->ttl = 64;
- dev_hold(dev);
rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
}
@@ -1586,8 +1581,11 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev,
}
#ifdef CONFIG_IPV6_SIT_6RD
- if (ipip6_netlink_6rd_parms(data, &ip6rd))
+ if (ipip6_netlink_6rd_parms(data, &ip6rd)) {
err = ipip6_tunnel_update_6rd(nt, &ip6rd);
+ if (err < 0)
+ unregister_netdevice_queue(dev, NULL);
+ }
#endif
return err;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 7f3667635431..bc4f37ef9e1d 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -144,7 +144,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
__u32 cookie = ntohl(th->ack_seq) - 1;
struct sock *ret = sk;
struct request_sock *req;
- int mss;
+ int full_space, mss;
struct dst_entry *dst;
__u8 rcv_wscale;
@@ -230,15 +230,21 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
fl6.fl6_dport = ireq->ir_rmt_port;
fl6.fl6_sport = inet_sk(sk)->inet_sport;
fl6.flowi6_uid = sk->sk_uid;
- security_req_classify_flow(req, flowi6_to_flowi(&fl6));
+ security_req_classify_flow(req, flowi6_to_flowi_common(&fl6));
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
if (IS_ERR(dst))
goto out_free;
}
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
- tcp_select_initial_window(tcp_full_space(sk), req->mss,
+ /* limit the window selection if the user enforce a smaller rx buffer */
+ full_space = tcp_full_space(sk);
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
+ (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
+ req->rsk_window_clamp = full_space;
+
+ tcp_select_initial_window(full_space, req->mss,
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 27d5bc29188a..9de573f89ea7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -244,9 +244,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
final_p = fl6_update_dst(&fl6, opt, &final);
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto failure;
@@ -309,11 +309,20 @@ failure:
static void tcp_v6_mtu_reduced(struct sock *sk)
{
struct dst_entry *dst;
+ u32 mtu;
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
return;
- dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
+ mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
+
+ /* Drop requests trying to increase our current mss.
+ * Check done in __ip6_rt_update_pmtu() is too late.
+ */
+ if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
+ return;
+
+ dst = inet6_csk_update_pmtu(sk, mtu);
if (!dst)
return;
@@ -392,6 +401,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
if (type == ICMPV6_PKT_TOOBIG) {
+ u32 mtu = ntohl(info);
+
/* We are not interested in TCP_LISTEN and open_requests
* (SYN-ACKs send out by Linux are always <576bytes so
* they should go through unfragmented).
@@ -402,7 +413,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!ip6_sk_accept_pmtu(sk))
goto out;
- tp->mtu_info = ntohl(info);
+ if (mtu < IPV6_MIN_MTU)
+ goto out;
+
+ WRITE_ONCE(tp->mtu_info, mtu);
+
if (!sock_owned_by_user(sk))
tcp_v6_mtu_reduced(sk);
else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
@@ -816,19 +831,24 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
fl6.flowi6_proto = IPPROTO_TCP;
if (rt6_need_strict(&fl6.daddr) && !oif)
fl6.flowi6_oif = tcp_v6_iif(skb);
- else
+ else {
+ if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
+ oif = skb->skb_iif;
+
fl6.flowi6_oif = oif;
+ }
+
fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
fl6.fl6_dport = t1->dest;
fl6.fl6_sport = t1->source;
fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
- security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
+ security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
/* Pass a socket to ip6_dst_lookup either it is for RST
* Underlying function will use this to retrieve the network
* namespace
*/
- dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
+ dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
if (!IS_ERR(dst)) {
skb_dst_set(buff, dst);
ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
@@ -875,7 +895,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
* no RST generated if md5 hash doesn't match.
*/
sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
- &tcp_hashinfo, &ipv6h->saddr,
+ &tcp_hashinfo, NULL, 0,
+ &ipv6h->saddr,
th->source, &ipv6h->daddr,
ntohs(th->source), tcp_v6_iif(skb));
if (!sk1)
@@ -975,6 +996,11 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (!ipv6_unicast_destination(skb))
goto drop;
+ if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
+ IP6_INC_STATS_BH(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
+ return 0;
+ }
+
return tcp_conn_request(&tcp6_request_sock_ops,
&tcp_request_sock_ipv6_ops, sk, skb);
@@ -1227,9 +1253,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_do_rcv(sk, skb);
- if (tcp_filter(sk, skb))
- goto discard;
-
/*
* socket locking is here for SMP purposes as backlog rcv
* is currently called with bh processing disabled.
@@ -1398,8 +1421,8 @@ static int tcp_v6_rcv(struct sk_buff *skb)
hdr = ipv6_hdr(skb);
lookup:
- sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest,
- inet6_iif(skb));
+ sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
+ th->source, th->dest, inet6_iif(skb));
if (!sk)
goto no_tcp_socket;
@@ -1530,6 +1553,7 @@ do_time_wait:
struct sock *sk2;
sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
+ skb, __tcp_hdrlen(th),
&ipv6_hdr(skb)->saddr, th->source,
&ipv6_hdr(skb)->daddr,
ntohs(th->dest), tcp_v6_iif(skb));
@@ -1899,7 +1923,7 @@ struct proto tcpv6_prot = {
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v6_do_rcv,
.release_cb = tcp_release_cb,
- .hash = inet_hash,
+ .hash = inet6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e9db0ff606b9..e670c45829bd 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -37,6 +37,7 @@
#include <linux/slab.h>
#include <asm/uaccess.h>
+#include <net/addrconf.h>
#include <net/ndisc.h>
#include <net/protocol.h>
#include <net/transp_v6.h>
@@ -48,6 +49,7 @@
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
#include <net/busy_poll.h>
+#include <net/sock_reuseport.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -77,34 +79,6 @@ static u32 udp6_ehashfn(const struct net *net,
udp_ipv6_hash_secret + net_hash_mix(net));
}
-int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
-{
- const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
- int sk2_ipv6only = inet_v6_ipv6only(sk2);
- int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
- int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
-
- /* if both are mapped, treat as IPv4 */
- if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
- return (!sk2_ipv6only &&
- (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr ||
- sk->sk_rcv_saddr == sk2->sk_rcv_saddr));
-
- if (addr_type2 == IPV6_ADDR_ANY &&
- !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
- return 1;
-
- if (addr_type == IPV6_ADDR_ANY &&
- !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
- return 1;
-
- if (sk2_rcv_saddr6 &&
- ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
- return 1;
-
- return 0;
-}
-
static u32 udp6_portaddr_hash(const struct net *net,
const struct in6_addr *addr6,
unsigned int port)
@@ -236,28 +210,32 @@ static inline int compute_score2(struct sock *sk, struct net *net,
static struct sock *udp6_lib_lookup2(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, unsigned int hnum, int dif,
- struct udp_hslot *hslot2, unsigned int slot2)
+ struct udp_hslot *hslot2, unsigned int slot2,
+ struct sk_buff *skb)
{
struct sock *sk, *result;
- struct hlist_nulls_node *node;
int score, badness, matches = 0, reuseport = 0;
u32 hash = 0;
-begin:
result = NULL;
badness = -1;
- udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
+ udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
score = compute_score2(sk, net, saddr, sport,
daddr, hnum, dif);
if (score > badness) {
- result = sk;
- badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
+
+ result = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ if (result)
+ return result;
matches = 1;
}
+ result = sk;
+ badness = score;
} else if (score == badness && reuseport) {
matches++;
if (reciprocal_scale(hash, matches) == 0)
@@ -265,40 +243,23 @@ begin:
hash = next_pseudo_random32(hash);
}
}
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot2)
- goto begin;
-
- if (result) {
- if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(compute_score2(result, net, saddr, sport,
- daddr, hnum, dif) < badness)) {
- sock_put(result);
- goto begin;
- }
- }
return result;
}
+/* rcu_read_lock() must be held */
struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
- int dif, struct udp_table *udptable)
+ int dif, struct udp_table *udptable,
+ struct sk_buff *skb)
{
struct sock *sk, *result;
- struct hlist_nulls_node *node;
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
int score, badness, matches = 0, reuseport = 0;
u32 hash = 0;
- rcu_read_lock();
if (hslot->count > 10) {
hash2 = udp6_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
@@ -308,7 +269,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
result = udp6_lib_lookup2(net, saddr, sport,
daddr, hnum, dif,
- hslot2, slot2);
+ hslot2, slot2, skb);
if (!result) {
hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
slot2 = hash2 & udptable->mask;
@@ -318,25 +279,28 @@ struct sock *__udp6_lib_lookup(struct net *net,
result = udp6_lib_lookup2(net, saddr, sport,
&in6addr_any, hnum, dif,
- hslot2, slot2);
+ hslot2, slot2, skb);
}
- rcu_read_unlock();
return result;
}
begin:
result = NULL;
badness = -1;
- sk_nulls_for_each_rcu(sk, node, &hslot->head) {
+ sk_for_each_rcu(sk, &hslot->head) {
score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
if (score > badness) {
- result = sk;
- badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
+ result = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ if (result)
+ return result;
matches = 1;
}
+ result = sk;
+ badness = score;
} else if (score == badness && reuseport) {
matches++;
if (reciprocal_scale(hash, matches) == 0)
@@ -344,24 +308,6 @@ begin:
hash = next_pseudo_random32(hash);
}
}
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot)
- goto begin;
-
- if (result) {
- if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(compute_score(result, net, hnum, saddr, sport,
- daddr, dport, dif) < badness)) {
- sock_put(result);
- goto begin;
- }
- }
- rcu_read_unlock();
return result;
}
EXPORT_SYMBOL_GPL(__udp6_lib_lookup);
@@ -378,15 +324,27 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
return sk;
return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport,
&iph->daddr, dport, inet6_iif(skb),
- udptable);
+ udptable, skb);
}
+/* Must be called under rcu_read_lock().
+ * Does increment socket refcount.
+ */
+#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \
+ IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY)
struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport, int dif)
{
- return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
+ struct sock *sk;
+
+ sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport,
+ dif, &udp_table, NULL);
+ if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+ sk = NULL;
+ return sk;
}
EXPORT_SYMBOL_GPL(udp6_lib_lookup);
+#endif
/*
* This should be easy, if there is something there we
@@ -551,8 +509,8 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
int err;
struct net *net = dev_net(skb->dev);
- sk = __udp6_lib_lookup(net, daddr, uh->dest,
- saddr, uh->source, inet6_iif(skb), udptable);
+ sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
+ inet6_iif(skb), udptable, NULL);
if (!sk) {
ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
ICMP6_MIB_INERRORS);
@@ -583,7 +541,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk->sk_err = err;
sk->sk_error_report(sk);
out:
- sock_put(sk);
+ return;
}
int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
@@ -673,7 +631,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
/*
* UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
*/
- if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
+ if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
if (up->pcrlen == 0) { /* full coverage was set */
net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n",
@@ -687,10 +645,8 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
}
- if (rcu_access_pointer(sk->sk_filter)) {
- if (udp_lib_checksum_complete(skb))
- goto csum_error;
- }
+ if (udp_lib_checksum_complete(skb))
+ goto csum_error;
if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
UDP6_INC_STATS_BH(sock_net(sk),
@@ -745,33 +701,6 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
return true;
}
-static void flush_stack(struct sock **stack, unsigned int count,
- struct sk_buff *skb, unsigned int final)
-{
- struct sk_buff *skb1 = NULL;
- struct sock *sk;
- unsigned int i;
-
- for (i = 0; i < count; i++) {
- sk = stack[i];
- if (likely(!skb1))
- skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
- if (!skb1) {
- atomic_inc(&sk->sk_drops);
- UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
- IS_UDPLITE(sk));
- UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
- IS_UDPLITE(sk));
- }
-
- if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0)
- skb1 = NULL;
- sock_put(sk);
- }
- if (unlikely(skb1))
- kfree_skb(skb1);
-}
-
static void udp6_csum_zero_error(struct sk_buff *skb)
{
/* RFC 2460 section 8.1 says that we SHOULD log
@@ -790,46 +719,51 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
const struct in6_addr *saddr, const struct in6_addr *daddr,
struct udp_table *udptable, int proto)
{
- struct sock *sk, *stack[256 / sizeof(struct sock *)];
+ struct sock *sk, *first = NULL;
const struct udphdr *uh = udp_hdr(skb);
- struct hlist_nulls_node *node;
unsigned short hnum = ntohs(uh->dest);
struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
- int dif = inet6_iif(skb);
- unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node);
+ unsigned int offset = offsetof(typeof(*sk), sk_node);
unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
- bool inner_flushed = false;
+ int dif = inet6_iif(skb);
+ struct hlist_node *node;
+ struct sk_buff *nskb;
if (use_hash2) {
hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) &
- udp_table.mask;
- hash2 = udp6_portaddr_hash(net, daddr, hnum) & udp_table.mask;
+ udptable->mask;
+ hash2 = udp6_portaddr_hash(net, daddr, hnum) & udptable->mask;
start_lookup:
- hslot = &udp_table.hash2[hash2];
+ hslot = &udptable->hash2[hash2];
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
}
- spin_lock(&hslot->lock);
- sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) {
- if (__udp_v6_is_mcast_sock(net, sk,
- uh->dest, daddr,
- uh->source, saddr,
- dif, hnum) &&
- /* If zero checksum and no_check is not on for
- * the socket then skip it.
- */
- (uh->check || udp_sk(sk)->no_check6_rx)) {
- if (unlikely(count == ARRAY_SIZE(stack))) {
- flush_stack(stack, count, skb, ~0);
- inner_flushed = true;
- count = 0;
- }
- stack[count++] = sk;
- sock_hold(sk);
+ sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
+ if (!__udp_v6_is_mcast_sock(net, sk, uh->dest, daddr,
+ uh->source, saddr, dif, hnum))
+ continue;
+ /* If zero checksum and no_check is not on for
+ * the socket then skip it.
+ */
+ if (!uh->check && !udp_sk(sk)->no_check6_rx)
+ continue;
+ if (!first) {
+ first = sk;
+ continue;
+ }
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (unlikely(!nskb)) {
+ atomic_inc(&sk->sk_drops);
+ UDP6_INC_STATS_BH(net, UDP_MIB_RCVBUFERRORS,
+ IS_UDPLITE(sk));
+ UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS,
+ IS_UDPLITE(sk));
+ continue;
}
- }
- spin_unlock(&hslot->lock);
+ if (udpv6_queue_rcv_skb(sk, nskb) > 0)
+ consume_skb(nskb);
+ }
/* Also lookup *:port if we are using hash2 and haven't done so yet. */
if (use_hash2 && hash2 != hash2_any) {
@@ -837,13 +771,13 @@ start_lookup:
goto start_lookup;
}
- if (count) {
- flush_stack(stack, count, skb, count - 1);
+ if (first) {
+ if (udpv6_queue_rcv_skb(first, skb) > 0)
+ consume_skb(skb);
} else {
- if (!inner_flushed)
- UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
- proto == IPPROTO_UDPLITE);
- consume_skb(skb);
+ kfree_skb(skb);
+ UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
+ proto == IPPROTO_UDPLITE);
}
return 0;
}
@@ -851,10 +785,10 @@ start_lookup:
int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
int proto)
{
+ const struct in6_addr *saddr, *daddr;
struct net *net = dev_net(skb->dev);
- struct sock *sk;
struct udphdr *uh;
- const struct in6_addr *saddr, *daddr;
+ struct sock *sk;
u32 ulen = 0;
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
@@ -908,7 +842,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
int ret;
if (!uh->check && !udp_sk(sk)->no_check6_rx) {
- sock_put(sk);
udp6_csum_zero_error(skb);
goto csum_error;
}
@@ -918,7 +851,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
ip6_compute_pseudo);
ret = udpv6_queue_rcv_skb(sk, skb);
- sock_put(sk);
/* a return value > 0 means to resubmit the input */
if (ret > 0)
@@ -965,7 +897,6 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
int dif)
{
struct sock *sk;
- struct hlist_nulls_node *hnode;
unsigned short hnum = ntohs(loc_port);
unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum);
unsigned int slot2 = hash2 & udp_table.mask;
@@ -973,7 +904,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
- udp_portaddr_for_each_entry_rcu(sk, hnode, &hslot2->head) {
+ udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
if (sk->sk_state == TCP_ESTABLISHED &&
INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
return sk;
@@ -1364,7 +1295,7 @@ do_udp_sendmsg:
} else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
@@ -1618,7 +1549,6 @@ struct proto udpv6_prot = {
.sysctl_wmem = &sysctl_udp_wmem_min,
.sysctl_rmem = &sysctl_udp_rmem_min,
.obj_size = sizeof(struct udp6_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
.h.udp_table = &udp_table,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udpv6_setsockopt,
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index d1eaeeaa34d2..af2895c77ed6 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -50,7 +50,6 @@ struct proto udplitev6_prot = {
.unhash = udp_lib_unhash,
.get_port = udp_v6_get_port,
.obj_size = sizeof(struct udp6_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
.h.udp_table = &udplite_table,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udpv6_setsockopt,
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 0eaab1fa6be5..b5789562aded 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -21,8 +21,10 @@ int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb)
return xfrm6_extract_header(skb);
}
-int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
+int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
+ struct ip6_tnl *t)
{
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
return xfrm_input(skb, nexthdr, spi, 0);
@@ -48,13 +50,18 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
return -1;
}
-int xfrm6_rcv(struct sk_buff *skb)
+int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t)
{
return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
- 0);
+ 0, t);
}
-EXPORT_SYMBOL(xfrm6_rcv);
+EXPORT_SYMBOL(xfrm6_rcv_tnl);
+int xfrm6_rcv(struct sk_buff *skb)
+{
+ return xfrm6_rcv_tnl(skb, NULL);
+}
+EXPORT_SYMBOL(xfrm6_rcv);
int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
xfrm_address_t *saddr, u8 proto)
{
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 64862c5084ee..ef6cc9eb0e45 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -125,9 +125,7 @@ int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
{
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
-#ifdef CONFIG_NETFILTER
IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
-#endif
return xfrm_output(sk, skb);
}
@@ -143,7 +141,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct xfrm_state *x = dst->xfrm;
- int mtu;
+ unsigned int mtu;
bool toobig;
#ifdef CONFIG_NETFILTER
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index f9d493c59d6c..07b7b2540579 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -239,7 +239,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb)
__be32 spi;
spi = xfrm6_tunnel_spi_lookup(net, (const xfrm_address_t *)&iph->saddr);
- return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi);
+ return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi, NULL);
}
static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 7cc9db38e1b6..0e8f8a3f7b23 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -839,7 +839,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
struct sock *sk = sock->sk;
struct irda_sock *new, *self = irda_sk(sk);
struct sock *newsk;
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
int err;
err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0);
@@ -907,7 +907,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
err = -EPERM; /* value does not seem to make sense. -arnd */
if (!new->tsap) {
pr_debug("%s(), dup failed!\n", __func__);
- kfree_skb(skb);
goto out;
}
@@ -926,7 +925,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
/* Clean up the original one to keep it in listen state */
irttp_listen(self->tsap);
- kfree_skb(skb);
sk->sk_ack_backlog--;
newsock->state = SS_CONNECTED;
@@ -934,6 +932,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
irda_connect_response(new);
err = 0;
out:
+ kfree_skb(skb);
release_sock(sk);
return err;
}
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 5984cc35d508..78a4b9dd6167 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1513,7 +1513,8 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
break;
}
- if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) {
+ if ((how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) &&
+ sk->sk_state == IUCV_CONNECTED) {
if (iucv->transport == AF_IUCV_TRANS_IUCV) {
txmsg.class = 0;
txmsg.tag = 0;
@@ -1723,7 +1724,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
}
/* Create the new socket */
- nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0);
+ nsk = iucv_sock_alloc(NULL, sk->sk_protocol, GFP_ATOMIC, 0);
if (!nsk) {
err = pr_iucv->path_sever(path, user_data);
iucv_path_free(path);
@@ -1933,7 +1934,7 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb)
goto out;
}
- nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0);
+ nsk = iucv_sock_alloc(NULL, sk->sk_protocol, GFP_ATOMIC, 0);
bh_lock_sock(sk);
if ((sk->sk_state != IUCV_LISTEN) ||
sk_acceptq_is_full(sk) ||
@@ -2392,6 +2393,13 @@ out:
return err;
}
+static void afiucv_iucv_exit(void)
+{
+ device_unregister(af_iucv_dev);
+ driver_unregister(&af_iucv_driver);
+ pr_iucv->iucv_unregister(&af_iucv_handler, 0);
+}
+
static int __init afiucv_init(void)
{
int err;
@@ -2425,11 +2433,18 @@ static int __init afiucv_init(void)
err = afiucv_iucv_init();
if (err)
goto out_sock;
- } else
- register_netdevice_notifier(&afiucv_netdev_notifier);
+ }
+
+ err = register_netdevice_notifier(&afiucv_netdev_notifier);
+ if (err)
+ goto out_notifier;
+
dev_add_pack(&iucv_packet_type);
return 0;
+out_notifier:
+ if (pr_iucv)
+ afiucv_iucv_exit();
out_sock:
sock_unregister(PF_IUCV);
out_proto:
@@ -2443,12 +2458,11 @@ out:
static void __exit afiucv_exit(void)
{
if (pr_iucv) {
- device_unregister(af_iucv_dev);
- driver_unregister(&af_iucv_driver);
- pr_iucv->iucv_unregister(&af_iucv_handler, 0);
+ afiucv_iucv_exit();
symbol_put(iucv_if);
- } else
- unregister_netdevice_notifier(&afiucv_netdev_notifier);
+ }
+
+ unregister_netdevice_notifier(&afiucv_netdev_notifier);
dev_remove_pack(&iucv_packet_type);
sock_unregister(PF_IUCV);
proto_unregister(&iucv_proto);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index b400a4cc265b..52ab3c3c11d8 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1863,6 +1863,13 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
if (ext_hdrs[SADB_X_EXT_FILTER - 1]) {
struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1];
+ if ((xfilter->sadb_x_filter_splen >=
+ (sizeof(xfrm_address_t) << 3)) ||
+ (xfilter->sadb_x_filter_dplen >=
+ (sizeof(xfrm_address_t) << 3))) {
+ mutex_unlock(&pfk->dump_lock);
+ return -EINVAL;
+ }
filter = kmalloc(sizeof(*filter), GFP_KERNEL);
if (filter == NULL) {
mutex_unlock(&pfk->dump_lock);
@@ -2916,7 +2923,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t)
break;
if (!aalg->pfkey_supported)
continue;
- if (aalg_tmpl_set(t, aalg) && aalg->available)
+ if (aalg_tmpl_set(t, aalg))
sz += sizeof(struct sadb_comb);
}
return sz + sizeof(struct sadb_prop);
@@ -2934,7 +2941,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
if (!ealg->pfkey_supported)
continue;
- if (!(ealg_tmpl_set(t, ealg) && ealg->available))
+ if (!(ealg_tmpl_set(t, ealg)))
continue;
for (k = 1; ; k++) {
@@ -2945,7 +2952,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
if (!aalg->pfkey_supported)
continue;
- if (aalg_tmpl_set(t, aalg) && aalg->available)
+ if (aalg_tmpl_set(t, aalg))
sz += sizeof(struct sadb_comb);
}
}
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 429dbb064240..9a85b0133991 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -112,53 +112,19 @@ struct l2tp_net {
spinlock_t l2tp_session_hlist_lock;
};
-static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
{
return sk->sk_user_data;
}
-static inline struct l2tp_net *l2tp_pernet(struct net *net)
+static inline struct l2tp_net *l2tp_pernet(const struct net *net)
{
BUG_ON(!net);
return net_generic(net, l2tp_net_id);
}
-/* Tunnel reference counts. Incremented per session that is added to
- * the tunnel.
- */
-static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel)
-{
- atomic_inc(&tunnel->ref_count);
-}
-
-static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel)
-{
- if (atomic_dec_and_test(&tunnel->ref_count))
- l2tp_tunnel_free(tunnel);
-}
-#ifdef L2TP_REFCNT_DEBUG
-#define l2tp_tunnel_inc_refcount(_t) \
-do { \
- pr_debug("l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", \
- __func__, __LINE__, (_t)->name, \
- atomic_read(&_t->ref_count)); \
- l2tp_tunnel_inc_refcount_1(_t); \
-} while (0)
-#define l2tp_tunnel_dec_refcount(_t) \
-do { \
- pr_debug("l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", \
- __func__, __LINE__, (_t)->name, \
- atomic_read(&_t->ref_count)); \
- l2tp_tunnel_dec_refcount_1(_t); \
-} while (0)
-#else
-#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t)
-#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
-#endif
-
/* Session hash global list for L2TPv3.
* The session_id SHOULD be random according to RFC3931, but several
* L2TP implementations use incrementing session_ids. So we do a real
@@ -216,27 +182,6 @@ static void l2tp_tunnel_sock_put(struct sock *sk)
sock_put(sk);
}
-/* Lookup a session by id in the global session list
- */
-static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id)
-{
- struct l2tp_net *pn = l2tp_pernet(net);
- struct hlist_head *session_list =
- l2tp_session_id_hash_2(pn, session_id);
- struct l2tp_session *session;
-
- rcu_read_lock_bh();
- hlist_for_each_entry_rcu(session, session_list, global_hlist) {
- if (session->session_id == session_id) {
- rcu_read_unlock_bh();
- return session;
- }
- }
- rcu_read_unlock_bh();
-
- return NULL;
-}
-
/* Session hash list.
* The session_id SHOULD be random according to RFC2661, but several
* L2TP implementations (Cisco and Microsoft) use incrementing
@@ -249,25 +194,67 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)];
}
-/* Lookup a session by id
+/* Lookup a tunnel. A new reference is held on the returned tunnel. */
+struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
+{
+ const struct l2tp_net *pn = l2tp_pernet(net);
+ struct l2tp_tunnel *tunnel;
+
+ rcu_read_lock_bh();
+ list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
+ if (tunnel->tunnel_id == tunnel_id) {
+ l2tp_tunnel_inc_refcount(tunnel);
+ rcu_read_unlock_bh();
+
+ return tunnel;
+ }
+ }
+ rcu_read_unlock_bh();
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_get);
+
+/* Lookup a session. A new reference is held on the returned session.
+ * Optionally calls session->ref() too if do_ref is true.
*/
-struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id)
+struct l2tp_session *l2tp_session_get(const struct net *net,
+ struct l2tp_tunnel *tunnel,
+ u32 session_id, bool do_ref)
{
struct hlist_head *session_list;
struct l2tp_session *session;
- /* In L2TPv3, session_ids are unique over all tunnels and we
- * sometimes need to look them up before we know the
- * tunnel.
- */
- if (tunnel == NULL)
- return l2tp_session_find_2(net, session_id);
+ if (!tunnel) {
+ struct l2tp_net *pn = l2tp_pernet(net);
+
+ session_list = l2tp_session_id_hash_2(pn, session_id);
+
+ rcu_read_lock_bh();
+ hlist_for_each_entry_rcu(session, session_list, global_hlist) {
+ if (session->session_id == session_id) {
+ l2tp_session_inc_refcount(session);
+ if (do_ref && session->ref)
+ session->ref(session);
+ rcu_read_unlock_bh();
+
+ return session;
+ }
+ }
+ rcu_read_unlock_bh();
+
+ return NULL;
+ }
session_list = l2tp_session_id_hash(tunnel, session_id);
read_lock_bh(&tunnel->hlist_lock);
hlist_for_each_entry(session, session_list, hlist) {
if (session->session_id == session_id) {
+ l2tp_session_inc_refcount(session);
+ if (do_ref && session->ref)
+ session->ref(session);
read_unlock_bh(&tunnel->hlist_lock);
+
return session;
}
}
@@ -275,7 +262,7 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn
return NULL;
}
-EXPORT_SYMBOL_GPL(l2tp_session_find);
+EXPORT_SYMBOL_GPL(l2tp_session_get);
struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
bool do_ref)
@@ -306,7 +293,9 @@ EXPORT_SYMBOL_GPL(l2tp_session_get_nth);
/* Lookup a session by interface name.
* This is very inefficient but is only used by management interfaces.
*/
-struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
+struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
+ const char *ifname,
+ bool do_ref)
{
struct l2tp_net *pn = l2tp_pernet(net);
int hash;
@@ -316,7 +305,11 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) {
hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) {
if (!strcmp(session->ifname, ifname)) {
+ l2tp_session_inc_refcount(session);
+ if (do_ref && session->ref)
+ session->ref(session);
rcu_read_unlock_bh();
+
return session;
}
}
@@ -326,11 +319,80 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
return NULL;
}
-EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname);
+EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname);
+
+int l2tp_session_register(struct l2tp_session *session,
+ struct l2tp_tunnel *tunnel)
+{
+ struct l2tp_session *session_walk;
+ struct hlist_head *g_head;
+ struct hlist_head *head;
+ struct l2tp_net *pn;
+ int err;
+
+ head = l2tp_session_id_hash(tunnel, session->session_id);
+
+ write_lock_bh(&tunnel->hlist_lock);
+ if (!tunnel->acpt_newsess) {
+ err = -ENODEV;
+ goto err_tlock;
+ }
+
+ hlist_for_each_entry(session_walk, head, hlist)
+ if (session_walk->session_id == session->session_id) {
+ err = -EEXIST;
+ goto err_tlock;
+ }
+
+ if (tunnel->version == L2TP_HDR_VER_3) {
+ pn = l2tp_pernet(tunnel->l2tp_net);
+ g_head = l2tp_session_id_hash_2(l2tp_pernet(tunnel->l2tp_net),
+ session->session_id);
+
+ spin_lock_bh(&pn->l2tp_session_hlist_lock);
+
+ /* IP encap expects session IDs to be globally unique, while
+ * UDP encap doesn't.
+ */
+ hlist_for_each_entry(session_walk, g_head, global_hlist)
+ if (session_walk->session_id == session->session_id &&
+ (session_walk->tunnel->encap == L2TP_ENCAPTYPE_IP ||
+ tunnel->encap == L2TP_ENCAPTYPE_IP)) {
+ err = -EEXIST;
+ goto err_tlock_pnlock;
+ }
+
+ l2tp_tunnel_inc_refcount(tunnel);
+ sock_hold(tunnel->sock);
+ hlist_add_head_rcu(&session->global_hlist, g_head);
+
+ spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+ } else {
+ l2tp_tunnel_inc_refcount(tunnel);
+ sock_hold(tunnel->sock);
+ }
+
+ hlist_add_head(&session->hlist, head);
+ write_unlock_bh(&tunnel->hlist_lock);
+
+ /* Ignore management session in session count value */
+ if (session->session_id != 0)
+ atomic_inc(&l2tp_session_count);
+
+ return 0;
+
+err_tlock_pnlock:
+ spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+err_tlock:
+ write_unlock_bh(&tunnel->hlist_lock);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_register);
/* Lookup a tunnel by id
*/
-struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id)
+struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id)
{
struct l2tp_tunnel *tunnel;
struct l2tp_net *pn = l2tp_pernet(net);
@@ -348,7 +410,7 @@ struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id)
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_find);
-struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth)
+struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth)
{
struct l2tp_net *pn = l2tp_pernet(net);
struct l2tp_tunnel *tunnel;
@@ -636,6 +698,9 @@ discard:
* a data (not control) frame before coming here. Fields up to the
* session-id have already been parsed and ptr points to the data
* after the session-id.
+ *
+ * session->ref() must have been called prior to l2tp_recv_common().
+ * session->deref() will be called automatically after skb is processed.
*/
void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
unsigned char *ptr, unsigned char *optr, u16 hdrflags,
@@ -645,14 +710,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
int offset;
u32 ns, nr;
- /* The ref count is increased since we now hold a pointer to
- * the session. Take care to decrement the refcnt when exiting
- * this function from now on...
- */
- l2tp_session_inc_refcount(session);
- if (session->ref)
- (*session->ref)(session);
-
/* Parse and check optional cookie */
if (session->peer_cookie_len > 0) {
if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) {
@@ -803,8 +860,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
/* Try to dequeue as many skbs from reorder_q as we can. */
l2tp_recv_dequeue(session);
- l2tp_session_dec_refcount(session);
-
return;
discard:
@@ -813,8 +868,6 @@ discard:
if (session->deref)
(*session->deref)(session);
-
- l2tp_session_dec_refcount(session);
}
EXPORT_SYMBOL(l2tp_recv_common);
@@ -921,8 +974,14 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
}
/* Find the session context */
- session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id);
+ session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id, true);
if (!session || !session->recv_skb) {
+ if (session) {
+ if (session->deref)
+ session->deref(session);
+ l2tp_session_dec_refcount(session);
+ }
+
/* Not found? Pass to userspace to deal with */
l2tp_info(tunnel, L2TP_MSG_DATA,
"%s: no session found (%u/%u). Passing up.\n",
@@ -931,10 +990,13 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
}
if (tunnel->version == L2TP_HDR_VER_3 &&
- l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
+ l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) {
+ l2tp_session_dec_refcount(session);
goto error;
+ }
l2tp_recv_common(session, skb, ptr, optr, hdrflags, length, payload_hook);
+ l2tp_session_dec_refcount(session);
return 0;
@@ -1079,6 +1141,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
/* Queue the packet to IP for output */
skb->ignore_df = 1;
+ skb_dst_drop(skb);
#if IS_ENABLED(CONFIG_IPV6)
if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped)
error = inet6_csk_xmit(tunnel->sock, skb, NULL);
@@ -1143,10 +1206,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
goto out_unlock;
}
- /* Get routing info from the tunnel socket */
- skb_dst_drop(skb);
- skb_dst_set(skb, sk_dst_check(sk, 0));
-
inet = inet_sk(sk);
fl = &inet->cork.fl;
switch (tunnel->encap) {
@@ -1218,7 +1277,6 @@ static void l2tp_tunnel_destruct(struct sock *sk)
/* Remove hooks into tunnel socket */
sk->sk_destruct = tunnel->old_sk_destruct;
sk->sk_user_data = NULL;
- tunnel->sock = NULL;
/* Remove the tunnel struct from the tunnel list */
pn = l2tp_pernet(tunnel->l2tp_net);
@@ -1228,6 +1286,8 @@ static void l2tp_tunnel_destruct(struct sock *sk)
atomic_dec(&l2tp_tunnel_count);
l2tp_tunnel_closeall(tunnel);
+
+ tunnel->sock = NULL;
l2tp_tunnel_dec_refcount(tunnel);
/* Call the original destructor */
@@ -1252,6 +1312,7 @@ void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
tunnel->name);
write_lock_bh(&tunnel->hlist_lock);
+ tunnel->acpt_newsess = false;
for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
again:
hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
@@ -1262,6 +1323,9 @@ again:
hlist_del_init(&session->hlist);
+ if (test_and_set_bit(0, &session->dead))
+ goto again;
+
if (session->ref != NULL)
(*session->ref)(session);
@@ -1302,17 +1366,6 @@ static void l2tp_udp_encap_destroy(struct sock *sk)
}
}
-/* Really kill the tunnel.
- * Come here only when all sessions have been cleared from the tunnel.
- */
-static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
-{
- BUG_ON(atomic_read(&tunnel->ref_count) != 0);
- BUG_ON(tunnel->sock != NULL);
- l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: free...\n", tunnel->name);
- kfree_rcu(tunnel, rcu);
-}
-
/* Workqueue tunnel deletion function */
static void l2tp_tunnel_del_work(struct work_struct *work)
{
@@ -1524,6 +1577,8 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
tunnel_id, fd);
goto err;
}
+ if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+ goto err;
switch (encap) {
case L2TP_ENCAPTYPE_UDP:
if (sk->sk_protocol != IPPROTO_UDP) {
@@ -1563,6 +1618,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
tunnel->magic = L2TP_TUNNEL_MAGIC;
sprintf(&tunnel->name[0], "tunl %u", tunnel_id);
rwlock_init(&tunnel->hlist_lock);
+ tunnel->acpt_newsess = true;
/* The net we belong to */
tunnel->l2tp_net = net;
@@ -1710,6 +1766,9 @@ EXPORT_SYMBOL_GPL(__l2tp_session_unhash);
*/
int l2tp_session_delete(struct l2tp_session *session)
{
+ if (test_and_set_bit(0, &session->dead))
+ return 0;
+
if (session->ref)
(*session->ref)(session);
__l2tp_session_unhash(session);
@@ -1800,37 +1859,12 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
l2tp_session_set_header_len(session, tunnel->version);
- /* Bump the reference count. The session context is deleted
- * only when this drops to zero.
- */
l2tp_session_inc_refcount(session);
- l2tp_tunnel_inc_refcount(tunnel);
-
- /* Ensure tunnel socket isn't deleted */
- sock_hold(tunnel->sock);
- /* Add session to the tunnel's hash list */
- write_lock_bh(&tunnel->hlist_lock);
- hlist_add_head(&session->hlist,
- l2tp_session_id_hash(tunnel, session_id));
- write_unlock_bh(&tunnel->hlist_lock);
-
- /* And to the global session list if L2TPv3 */
- if (tunnel->version != L2TP_HDR_VER_2) {
- struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
-
- spin_lock_bh(&pn->l2tp_session_hlist_lock);
- hlist_add_head_rcu(&session->global_hlist,
- l2tp_session_id_hash_2(pn, session_id));
- spin_unlock_bh(&pn->l2tp_session_hlist_lock);
- }
-
- /* Ignore management session in session count value */
- if (session->session_id != 0)
- atomic_inc(&l2tp_session_count);
+ return session;
}
- return session;
+ return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL_GPL(l2tp_session_create);
@@ -1864,6 +1898,9 @@ static __net_exit void l2tp_exit_net(struct net *net)
l2tp_tunnel_delete(tunnel);
}
rcu_read_unlock_bh();
+
+ flush_workqueue(l2tp_wq);
+ rcu_barrier();
}
static struct pernet_operations l2tp_net_ops = {
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index fad47e9d74bc..57da0f1d62dd 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -23,16 +23,6 @@
#define L2TP_HASH_BITS_2 8
#define L2TP_HASH_SIZE_2 (1 << L2TP_HASH_BITS_2)
-/* Debug message categories for the DEBUG socket option */
-enum {
- L2TP_MSG_DEBUG = (1 << 0), /* verbose debug (if
- * compiled in) */
- L2TP_MSG_CONTROL = (1 << 1), /* userspace - kernel
- * interface */
- L2TP_MSG_SEQ = (1 << 2), /* sequence numbers */
- L2TP_MSG_DATA = (1 << 3), /* data packets */
-};
-
struct sk_buff;
struct l2tp_stats {
@@ -85,6 +75,7 @@ struct l2tp_session_cfg {
struct l2tp_session {
int magic; /* should be
* L2TP_SESSION_MAGIC */
+ long dead;
struct l2tp_tunnel *tunnel; /* back pointer to tunnel
* context */
@@ -174,6 +165,10 @@ struct l2tp_tunnel {
struct rcu_head rcu;
rwlock_t hlist_lock; /* protect session_hlist */
+ bool acpt_newsess; /* Indicates whether this
+ * tunnel accepts new sessions.
+ * Protected by hlist_lock.
+ */
struct hlist_head session_hlist[L2TP_HASH_SIZE];
/* hashed list of sessions,
* hashed by id */
@@ -209,7 +204,9 @@ struct l2tp_tunnel {
};
struct l2tp_nl_cmd_ops {
- int (*session_create)(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
+ int (*session_create)(struct net *net, struct l2tp_tunnel *tunnel,
+ u32 session_id, u32 peer_session_id,
+ struct l2tp_session_cfg *cfg);
int (*session_delete)(struct l2tp_session *session);
};
@@ -243,14 +240,18 @@ out:
return tunnel;
}
-struct l2tp_session *l2tp_session_find(struct net *net,
- struct l2tp_tunnel *tunnel,
- u32 session_id);
+struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id);
+
+struct l2tp_session *l2tp_session_get(const struct net *net,
+ struct l2tp_tunnel *tunnel,
+ u32 session_id, bool do_ref);
struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
bool do_ref);
-struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname);
-struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
-struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
+struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
+ const char *ifname,
+ bool do_ref);
+struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id);
+struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth);
int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id,
u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg,
@@ -261,6 +262,9 @@ struct l2tp_session *l2tp_session_create(int priv_size,
struct l2tp_tunnel *tunnel,
u32 session_id, u32 peer_session_id,
struct l2tp_session_cfg *cfg);
+int l2tp_session_register(struct l2tp_session *session,
+ struct l2tp_tunnel *tunnel);
+
void __l2tp_session_unhash(struct l2tp_session *session);
int l2tp_session_delete(struct l2tp_session *session);
void l2tp_session_free(struct l2tp_session *session);
@@ -279,6 +283,17 @@ int l2tp_nl_register_ops(enum l2tp_pwtype pw_type,
void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+static inline void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel)
+{
+ atomic_inc(&tunnel->ref_count);
+}
+
+static inline void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel)
+{
+ if (atomic_dec_and_test(&tunnel->ref_count))
+ kfree_rcu(tunnel, rcu);
+}
+
/* Session reference counts. Incremented when code obtains a reference
* to a session.
*/
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index e253c26f31ac..e0a65ee1e830 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -30,6 +30,9 @@
#include <net/xfrm.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
#include "l2tp_core.h"
@@ -41,7 +44,6 @@ struct l2tp_eth {
struct net_device *dev;
struct sock *tunnel_sock;
struct l2tp_session *session;
- struct list_head list;
atomic_long_t tx_bytes;
atomic_long_t tx_packets;
atomic_long_t tx_dropped;
@@ -52,20 +54,9 @@ struct l2tp_eth {
/* via l2tp_session_priv() */
struct l2tp_eth_sess {
- struct net_device *dev;
-};
-
-/* per-net private data for this module */
-static unsigned int l2tp_eth_net_id;
-struct l2tp_eth_net {
- struct list_head l2tp_eth_dev_list;
- spinlock_t l2tp_eth_lock;
+ struct net_device __rcu *dev;
};
-static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net)
-{
- return net_generic(net, l2tp_eth_net_id);
-}
static struct lock_class_key l2tp_eth_tx_busylock;
static int l2tp_eth_dev_init(struct net_device *dev)
@@ -82,12 +73,13 @@ static int l2tp_eth_dev_init(struct net_device *dev)
static void l2tp_eth_dev_uninit(struct net_device *dev)
{
struct l2tp_eth *priv = netdev_priv(dev);
- struct l2tp_eth_net *pn = l2tp_eth_pernet(dev_net(dev));
+ struct l2tp_eth_sess *spriv;
- spin_lock(&pn->l2tp_eth_lock);
- list_del_init(&priv->list);
- spin_unlock(&pn->l2tp_eth_lock);
- dev_put(dev);
+ spriv = l2tp_session_priv(priv->session);
+ RCU_INIT_POINTER(spriv->dev, NULL);
+ /* No need for synchronize_net() here. We're called by
+ * unregister_netdev*(), which does the synchronisation for us.
+ */
}
static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -141,8 +133,8 @@ static void l2tp_eth_dev_setup(struct net_device *dev)
static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
{
struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
- struct net_device *dev = spriv->dev;
- struct l2tp_eth *priv = netdev_priv(dev);
+ struct net_device *dev;
+ struct l2tp_eth *priv;
if (session->debug & L2TP_MSG_DATA) {
unsigned int length;
@@ -166,16 +158,25 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
skb_dst_drop(skb);
nf_reset(skb);
+ rcu_read_lock();
+ dev = rcu_dereference(spriv->dev);
+ if (!dev)
+ goto error_rcu;
+
+ priv = netdev_priv(dev);
if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) {
atomic_long_inc(&priv->rx_packets);
atomic_long_add(data_len, &priv->rx_bytes);
} else {
atomic_long_inc(&priv->rx_errors);
}
+ rcu_read_unlock();
+
return;
+error_rcu:
+ rcu_read_unlock();
error:
- atomic_long_inc(&priv->rx_errors);
kfree_skb(skb);
}
@@ -186,11 +187,15 @@ static void l2tp_eth_delete(struct l2tp_session *session)
if (session) {
spriv = l2tp_session_priv(session);
- dev = spriv->dev;
+
+ rtnl_lock();
+ dev = rtnl_dereference(spriv->dev);
if (dev) {
- unregister_netdev(dev);
- spriv->dev = NULL;
+ unregister_netdevice(dev);
+ rtnl_unlock();
module_put(THIS_MODULE);
+ } else {
+ rtnl_unlock();
}
}
}
@@ -200,41 +205,89 @@ static void l2tp_eth_show(struct seq_file *m, void *arg)
{
struct l2tp_session *session = arg;
struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
- struct net_device *dev = spriv->dev;
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = rcu_dereference(spriv->dev);
+ if (!dev) {
+ rcu_read_unlock();
+ return;
+ }
+ dev_hold(dev);
+ rcu_read_unlock();
seq_printf(m, " interface %s\n", dev->name);
+
+ dev_put(dev);
}
#endif
-static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
+ struct l2tp_session *session,
+ struct net_device *dev)
+{
+ unsigned int overhead = 0;
+ struct dst_entry *dst;
+ u32 l3_overhead = 0;
+
+ /* if the encap is UDP, account for UDP header size */
+ if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+ overhead += sizeof(struct udphdr);
+ dev->needed_headroom += sizeof(struct udphdr);
+ }
+ if (session->mtu != 0) {
+ dev->mtu = session->mtu;
+ dev->needed_headroom += session->hdr_len;
+ return;
+ }
+ lock_sock(tunnel->sock);
+ l3_overhead = kernel_sock_ip_overhead(tunnel->sock);
+ release_sock(tunnel->sock);
+ if (l3_overhead == 0) {
+ /* L3 Overhead couldn't be identified, this could be
+ * because tunnel->sock was NULL or the socket's
+ * address family was not IPv4 or IPv6,
+ * dev mtu stays at 1500.
+ */
+ return;
+ }
+ /* Adjust MTU, factor overhead - underlay L3, overlay L2 hdr
+ * UDP overhead, if any, was already factored in above.
+ */
+ overhead += session->hdr_len + ETH_HLEN + l3_overhead;
+
+ /* If PMTU discovery was enabled, use discovered MTU on L2TP device */
+ dst = sk_dst_get(tunnel->sock);
+ if (dst) {
+ /* dst_mtu will use PMTU if found, else fallback to intf MTU */
+ u32 pmtu = dst_mtu(dst);
+
+ if (pmtu != 0)
+ dev->mtu = pmtu;
+ dst_release(dst);
+ }
+ session->mtu = dev->mtu - overhead;
+ dev->mtu = session->mtu;
+ dev->needed_headroom += session->hdr_len;
+}
+
+static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
+ u32 session_id, u32 peer_session_id,
+ struct l2tp_session_cfg *cfg)
{
struct net_device *dev;
char name[IFNAMSIZ];
- struct l2tp_tunnel *tunnel;
struct l2tp_session *session;
struct l2tp_eth *priv;
struct l2tp_eth_sess *spriv;
int rc;
- struct l2tp_eth_net *pn;
-
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (!tunnel) {
- rc = -ENODEV;
- goto out;
- }
-
- session = l2tp_session_find(net, tunnel, session_id);
- if (session) {
- rc = -EEXIST;
- goto out;
- }
if (cfg->ifname) {
dev = dev_get_by_name(net, cfg->ifname);
if (dev) {
dev_put(dev);
rc = -EEXIST;
- goto out;
+ goto err;
}
strlcpy(name, cfg->ifname, IFNAMSIZ);
} else
@@ -242,28 +295,24 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
session = l2tp_session_create(sizeof(*spriv), tunnel, session_id,
peer_session_id, cfg);
- if (!session) {
- rc = -ENOMEM;
- goto out;
+ if (IS_ERR(session)) {
+ rc = PTR_ERR(session);
+ goto err;
}
dev = alloc_netdev(sizeof(*priv), name, NET_NAME_UNKNOWN,
l2tp_eth_dev_setup);
if (!dev) {
rc = -ENOMEM;
- goto out_del_session;
+ goto err_sess;
}
dev_net_set(dev, net);
- if (session->mtu == 0)
- session->mtu = dev->mtu - session->hdr_len;
- dev->mtu = session->mtu;
- dev->needed_headroom += session->hdr_len;
+ l2tp_eth_adjust_mtu(tunnel, session, dev);
priv = netdev_priv(dev);
priv->dev = dev;
priv->session = session;
- INIT_LIST_HEAD(&priv->list);
priv->tunnel_sock = tunnel->sock;
session->recv_skb = l2tp_eth_dev_recv;
@@ -273,48 +322,50 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
#endif
spriv = l2tp_session_priv(session);
- spriv->dev = dev;
- rc = register_netdev(dev);
- if (rc < 0)
- goto out_del_dev;
+ l2tp_session_inc_refcount(session);
- __module_get(THIS_MODULE);
- /* Must be done after register_netdev() */
- strlcpy(session->ifname, dev->name, IFNAMSIZ);
+ rtnl_lock();
- dev_hold(dev);
- pn = l2tp_eth_pernet(dev_net(dev));
- spin_lock(&pn->l2tp_eth_lock);
- list_add(&priv->list, &pn->l2tp_eth_dev_list);
- spin_unlock(&pn->l2tp_eth_lock);
+ /* Register both device and session while holding the rtnl lock. This
+ * ensures that l2tp_eth_delete() will see that there's a device to
+ * unregister, even if it happened to run before we assign spriv->dev.
+ */
+ rc = l2tp_session_register(session, tunnel);
+ if (rc < 0) {
+ rtnl_unlock();
+ goto err_sess_dev;
+ }
- return 0;
+ rc = register_netdevice(dev);
+ if (rc < 0) {
+ rtnl_unlock();
+ l2tp_session_delete(session);
+ l2tp_session_dec_refcount(session);
+ free_netdev(dev);
-out_del_dev:
- free_netdev(dev);
- spriv->dev = NULL;
-out_del_session:
- l2tp_session_delete(session);
-out:
- return rc;
-}
+ return rc;
+ }
-static __net_init int l2tp_eth_init_net(struct net *net)
-{
- struct l2tp_eth_net *pn = net_generic(net, l2tp_eth_net_id);
+ strlcpy(session->ifname, dev->name, IFNAMSIZ);
+ rcu_assign_pointer(spriv->dev, dev);
- INIT_LIST_HEAD(&pn->l2tp_eth_dev_list);
- spin_lock_init(&pn->l2tp_eth_lock);
+ rtnl_unlock();
+
+ l2tp_session_dec_refcount(session);
+
+ __module_get(THIS_MODULE);
return 0;
-}
-static struct pernet_operations l2tp_eth_net_ops = {
- .init = l2tp_eth_init_net,
- .id = &l2tp_eth_net_id,
- .size = sizeof(struct l2tp_eth_net),
-};
+err_sess_dev:
+ l2tp_session_dec_refcount(session);
+ free_netdev(dev);
+err_sess:
+ kfree(session);
+err:
+ return rc;
+}
static const struct l2tp_nl_cmd_ops l2tp_eth_nl_cmd_ops = {
@@ -329,25 +380,18 @@ static int __init l2tp_eth_init(void)
err = l2tp_nl_register_ops(L2TP_PWTYPE_ETH, &l2tp_eth_nl_cmd_ops);
if (err)
- goto out;
-
- err = register_pernet_device(&l2tp_eth_net_ops);
- if (err)
- goto out_unreg;
+ goto err;
pr_info("L2TP ethernet pseudowire support (L2TPv3)\n");
return 0;
-out_unreg:
- l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
-out:
+err:
return err;
}
static void __exit l2tp_eth_exit(void)
{
- unregister_pernet_device(&l2tp_eth_net_ops);
l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
}
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 7efb3cadc152..3a31370d568a 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -24,7 +24,6 @@
#include <net/icmp.h>
#include <net/udp.h>
#include <net/inet_common.h>
-#include <net/inet_hashtables.h>
#include <net/tcp_states.h>
#include <net/protocol.h>
#include <net/xfrm.h>
@@ -122,6 +121,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
unsigned char *ptr, *optr;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel = NULL;
+ struct iphdr *iph;
int length;
if (!pskb_may_pull(skb, 4))
@@ -142,19 +142,19 @@ static int l2tp_ip_recv(struct sk_buff *skb)
}
/* Ok, this is a data packet. Lookup the session. */
- session = l2tp_session_find(net, NULL, session_id);
- if (session == NULL)
+ session = l2tp_session_get(net, NULL, session_id, true);
+ if (!session)
goto discard;
tunnel = session->tunnel;
- if (tunnel == NULL)
- goto discard;
+ if (!tunnel)
+ goto discard_sess;
/* Trace packet contents, if enabled */
if (tunnel->debug & L2TP_MSG_DATA) {
length = min(32u, skb->len);
if (!pskb_may_pull(skb, length))
- goto discard;
+ goto discard_sess;
/* Point to L2TP header */
optr = ptr = skb->data;
@@ -167,6 +167,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
goto discard;
l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook);
+ l2tp_session_dec_refcount(session);
return 0;
@@ -179,23 +180,16 @@ pass_up:
goto discard;
tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel) {
- sk = tunnel->sock;
- sock_hold(sk);
- } else {
- struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
+ iph = (struct iphdr *)skb_network_header(skb);
- read_lock_bh(&l2tp_ip_lock);
- sk = __l2tp_ip_bind_lookup(net, iph->daddr, 0, tunnel_id);
- if (!sk) {
- read_unlock_bh(&l2tp_ip_lock);
- goto discard;
- }
-
- sock_hold(sk);
+ read_lock_bh(&l2tp_ip_lock);
+ sk = __l2tp_ip_bind_lookup(net, iph->daddr, 0, tunnel_id);
+ if (!sk) {
read_unlock_bh(&l2tp_ip_lock);
+ goto discard;
}
+ sock_hold(sk);
+ read_unlock_bh(&l2tp_ip_lock);
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_put;
@@ -204,6 +198,12 @@ pass_up:
return sk_receive_skb(sk, skb, 1);
+discard_sess:
+ if (session->deref)
+ session->deref(session);
+ l2tp_session_dec_refcount(session);
+ goto discard;
+
discard_put:
sock_put(sk);
@@ -212,15 +212,31 @@ discard:
return 0;
}
-static int l2tp_ip_open(struct sock *sk)
+static int l2tp_ip_hash(struct sock *sk)
{
- /* Prevent autobind. We don't have ports. */
- inet_sk(sk)->inet_num = IPPROTO_L2TP;
+ if (sk_unhashed(sk)) {
+ write_lock_bh(&l2tp_ip_lock);
+ sk_add_node(sk, &l2tp_ip_table);
+ write_unlock_bh(&l2tp_ip_lock);
+ }
+ return 0;
+}
+static void l2tp_ip_unhash(struct sock *sk)
+{
+ if (sk_unhashed(sk))
+ return;
write_lock_bh(&l2tp_ip_lock);
- sk_add_node(sk, &l2tp_ip_table);
+ sk_del_node_init(sk);
write_unlock_bh(&l2tp_ip_lock);
+}
+static int l2tp_ip_open(struct sock *sk)
+{
+ /* Prevent autobind. We don't have ports. */
+ inet_sk(sk)->inet_num = IPPROTO_L2TP;
+
+ l2tp_ip_hash(sk);
return 0;
}
@@ -262,15 +278,9 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (addr->l2tp_family != AF_INET)
return -EINVAL;
- ret = -EADDRINUSE;
- read_lock_bh(&l2tp_ip_lock);
- if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr,
- sk->sk_bound_dev_if, addr->l2tp_conn_id))
- goto out_in_use;
-
- read_unlock_bh(&l2tp_ip_lock);
-
lock_sock(sk);
+
+ ret = -EINVAL;
if (!sock_flag(sk, SOCK_ZAPPED))
goto out;
@@ -287,14 +297,22 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_rcv_saddr = inet->inet_saddr = addr->l2tp_addr.s_addr;
if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
inet->inet_saddr = 0; /* Use device */
- sk_dst_reset(sk);
+ write_lock_bh(&l2tp_ip_lock);
+ if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr,
+ sk->sk_bound_dev_if, addr->l2tp_conn_id)) {
+ write_unlock_bh(&l2tp_ip_lock);
+ ret = -EADDRINUSE;
+ goto out;
+ }
+
+ sk_dst_reset(sk);
l2tp_ip_sk(sk)->conn_id = addr->l2tp_conn_id;
- write_lock_bh(&l2tp_ip_lock);
sk_add_bind_node(sk, &l2tp_ip_bind_table);
sk_del_node_init(sk);
write_unlock_bh(&l2tp_ip_lock);
+
ret = 0;
sock_reset_flag(sk, SOCK_ZAPPED);
@@ -302,11 +320,6 @@ out:
release_sock(sk);
return ret;
-
-out_in_use:
- read_unlock_bh(&l2tp_ip_lock);
-
- return ret;
}
static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
@@ -314,21 +327,24 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr;
int rc;
- if (sock_flag(sk, SOCK_ZAPPED)) /* Must bind first - autobinding does not work */
- return -EINVAL;
-
if (addr_len < sizeof(*lsa))
return -EINVAL;
if (ipv4_is_multicast(lsa->l2tp_addr.s_addr))
return -EINVAL;
- rc = ip4_datagram_connect(sk, uaddr, addr_len);
- if (rc < 0)
- return rc;
-
lock_sock(sk);
+ /* Must bind first - autobinding does not work */
+ if (sock_flag(sk, SOCK_ZAPPED)) {
+ rc = -EINVAL;
+ goto out_sk;
+ }
+
+ rc = __ip4_datagram_connect(sk, uaddr, addr_len);
+ if (rc < 0)
+ goto out_sk;
+
l2tp_ip_sk(sk)->peer_conn_id = lsa->l2tp_conn_id;
write_lock_bh(&l2tp_ip_lock);
@@ -336,7 +352,9 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
sk_add_bind_node(sk, &l2tp_ip_bind_table);
write_unlock_bh(&l2tp_ip_lock);
+out_sk:
release_sock(sk);
+
return rc;
}
@@ -600,8 +618,8 @@ static struct proto l2tp_ip_prot = {
.sendmsg = l2tp_ip_sendmsg,
.recvmsg = l2tp_ip_recvmsg,
.backlog_rcv = l2tp_ip_backlog_recv,
- .hash = inet_hash,
- .unhash = inet_unhash,
+ .hash = l2tp_ip_hash,
+ .unhash = l2tp_ip_unhash,
.obj_size = sizeof(struct l2tp_ip_sock),
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ip_setsockopt,
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index c125478981ac..1a8a4e451a5f 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -24,7 +24,6 @@
#include <net/icmp.h>
#include <net/udp.h>
#include <net/inet_common.h>
-#include <net/inet_hashtables.h>
#include <net/tcp_states.h>
#include <net/protocol.h>
#include <net/xfrm.h>
@@ -127,12 +126,14 @@ static inline struct sock *l2tp_ip6_bind_lookup(struct net *net,
*/
static int l2tp_ip6_recv(struct sk_buff *skb)
{
+ struct net *net = dev_net(skb->dev);
struct sock *sk;
u32 session_id;
u32 tunnel_id;
unsigned char *ptr, *optr;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel = NULL;
+ struct ipv6hdr *iph;
int length;
if (!pskb_may_pull(skb, 4))
@@ -153,19 +154,19 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
}
/* Ok, this is a data packet. Lookup the session. */
- session = l2tp_session_find(&init_net, NULL, session_id);
- if (session == NULL)
+ session = l2tp_session_get(net, NULL, session_id, true);
+ if (!session)
goto discard;
tunnel = session->tunnel;
- if (tunnel == NULL)
- goto discard;
+ if (!tunnel)
+ goto discard_sess;
/* Trace packet contents, if enabled */
if (tunnel->debug & L2TP_MSG_DATA) {
length = min(32u, skb->len);
if (!pskb_may_pull(skb, length))
- goto discard;
+ goto discard_sess;
/* Point to L2TP header */
optr = ptr = skb->data;
@@ -179,6 +180,8 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
l2tp_recv_common(session, skb, ptr, optr, 0, skb->len,
tunnel->recv_payload_hook);
+ l2tp_session_dec_refcount(session);
+
return 0;
pass_up:
@@ -190,24 +193,16 @@ pass_up:
goto discard;
tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
- tunnel = l2tp_tunnel_find(&init_net, tunnel_id);
- if (tunnel) {
- sk = tunnel->sock;
- sock_hold(sk);
- } else {
- struct ipv6hdr *iph = ipv6_hdr(skb);
-
- read_lock_bh(&l2tp_ip6_lock);
- sk = __l2tp_ip6_bind_lookup(&init_net, &iph->daddr,
- 0, tunnel_id);
- if (!sk) {
- read_unlock_bh(&l2tp_ip6_lock);
- goto discard;
- }
+ iph = ipv6_hdr(skb);
- sock_hold(sk);
+ read_lock_bh(&l2tp_ip6_lock);
+ sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, 0, tunnel_id);
+ if (!sk) {
read_unlock_bh(&l2tp_ip6_lock);
+ goto discard;
}
+ sock_hold(sk);
+ read_unlock_bh(&l2tp_ip6_lock);
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_put;
@@ -216,6 +211,12 @@ pass_up:
return sk_receive_skb(sk, skb, 1);
+discard_sess:
+ if (session->deref)
+ session->deref(session);
+ l2tp_session_dec_refcount(session);
+ goto discard;
+
discard_put:
sock_put(sk);
@@ -224,15 +225,31 @@ discard:
return 0;
}
-static int l2tp_ip6_open(struct sock *sk)
+static int l2tp_ip6_hash(struct sock *sk)
{
- /* Prevent autobind. We don't have ports. */
- inet_sk(sk)->inet_num = IPPROTO_L2TP;
+ if (sk_unhashed(sk)) {
+ write_lock_bh(&l2tp_ip6_lock);
+ sk_add_node(sk, &l2tp_ip6_table);
+ write_unlock_bh(&l2tp_ip6_lock);
+ }
+ return 0;
+}
+static void l2tp_ip6_unhash(struct sock *sk)
+{
+ if (sk_unhashed(sk))
+ return;
write_lock_bh(&l2tp_ip6_lock);
- sk_add_node(sk, &l2tp_ip6_table);
+ sk_del_node_init(sk);
write_unlock_bh(&l2tp_ip6_lock);
+}
+
+static int l2tp_ip6_open(struct sock *sk)
+{
+ /* Prevent autobind. We don't have ports. */
+ inet_sk(sk)->inet_num = IPPROTO_L2TP;
+ l2tp_ip6_hash(sk);
return 0;
}
@@ -267,7 +284,9 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
struct sockaddr_l2tpip6 *addr = (struct sockaddr_l2tpip6 *) uaddr;
+ struct net *net = sock_net(sk);
__be32 v4addr = 0;
+ int bound_dev_if;
int addr_type;
int err;
@@ -286,13 +305,6 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (addr_type & IPV6_ADDR_MULTICAST)
return -EADDRNOTAVAIL;
- err = -EADDRINUSE;
- read_lock_bh(&l2tp_ip6_lock);
- if (__l2tp_ip6_bind_lookup(&init_net, &addr->l2tp_addr,
- sk->sk_bound_dev_if, addr->l2tp_conn_id))
- goto out_in_use;
- read_unlock_bh(&l2tp_ip6_lock);
-
lock_sock(sk);
err = -EINVAL;
@@ -302,28 +314,25 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (sk->sk_state != TCP_CLOSE)
goto out_unlock;
+ bound_dev_if = sk->sk_bound_dev_if;
+
/* Check if the address belongs to the host. */
rcu_read_lock();
if (addr_type != IPV6_ADDR_ANY) {
struct net_device *dev = NULL;
if (addr_type & IPV6_ADDR_LINKLOCAL) {
- if (addr_len >= sizeof(struct sockaddr_in6) &&
- addr->l2tp_scope_id) {
- /* Override any existing binding, if another
- * one is supplied by user.
- */
- sk->sk_bound_dev_if = addr->l2tp_scope_id;
- }
+ if (addr->l2tp_scope_id)
+ bound_dev_if = addr->l2tp_scope_id;
/* Binding to link-local address requires an
- interface */
- if (!sk->sk_bound_dev_if)
+ * interface.
+ */
+ if (!bound_dev_if)
goto out_unlock_rcu;
err = -ENODEV;
- dev = dev_get_by_index_rcu(sock_net(sk),
- sk->sk_bound_dev_if);
+ dev = dev_get_by_index_rcu(sock_net(sk), bound_dev_if);
if (!dev)
goto out_unlock_rcu;
}
@@ -338,13 +347,22 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
}
rcu_read_unlock();
- inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
+ write_lock_bh(&l2tp_ip6_lock);
+ if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, bound_dev_if,
+ addr->l2tp_conn_id)) {
+ write_unlock_bh(&l2tp_ip6_lock);
+ err = -EADDRINUSE;
+ goto out_unlock;
+ }
+
+ inet->inet_saddr = v4addr;
+ inet->inet_rcv_saddr = v4addr;
+ sk->sk_bound_dev_if = bound_dev_if;
sk->sk_v6_rcv_saddr = addr->l2tp_addr;
np->saddr = addr->l2tp_addr;
l2tp_ip6_sk(sk)->conn_id = addr->l2tp_conn_id;
- write_lock_bh(&l2tp_ip6_lock);
sk_add_bind_node(sk, &l2tp_ip6_bind_table);
sk_del_node_init(sk);
write_unlock_bh(&l2tp_ip6_lock);
@@ -357,10 +375,7 @@ out_unlock_rcu:
rcu_read_unlock();
out_unlock:
release_sock(sk);
- return err;
-out_in_use:
- read_unlock_bh(&l2tp_ip6_lock);
return err;
}
@@ -373,9 +388,6 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_type;
int rc;
- if (sock_flag(sk, SOCK_ZAPPED)) /* Must bind first - autobinding does not work */
- return -EINVAL;
-
if (addr_len < sizeof(*lsa))
return -EINVAL;
@@ -392,10 +404,18 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr,
return -EINVAL;
}
- rc = ip6_datagram_connect(sk, uaddr, addr_len);
-
lock_sock(sk);
+ /* Must bind first - autobinding does not work */
+ if (sock_flag(sk, SOCK_ZAPPED)) {
+ rc = -EINVAL;
+ goto out_sk;
+ }
+
+ rc = __ip6_datagram_connect(sk, uaddr, addr_len);
+ if (rc < 0)
+ goto out_sk;
+
l2tp_ip6_sk(sk)->peer_conn_id = lsa->l2tp_conn_id;
write_lock_bh(&l2tp_ip6_lock);
@@ -403,6 +423,7 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr,
sk_add_bind_node(sk, &l2tp_ip6_bind_table);
write_unlock_bh(&l2tp_ip6_lock);
+out_sk:
release_sock(sk);
return rc;
@@ -461,7 +482,7 @@ static int l2tp_ip6_backlog_recv(struct sock *sk, struct sk_buff *skb)
return 0;
drop:
- IP_INC_STATS(&init_net, IPSTATS_MIB_INDISCARDS);
+ IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS);
kfree_skb(skb);
return -1;
}
@@ -608,9 +629,9 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto out;
@@ -725,8 +746,8 @@ static struct proto l2tp_ip6_prot = {
.sendmsg = l2tp_ip6_sendmsg,
.recvmsg = l2tp_ip6_recvmsg,
.backlog_rcv = l2tp_ip6_backlog_recv,
- .hash = inet_hash,
- .unhash = inet_unhash,
+ .hash = l2tp_ip6_hash,
+ .unhash = l2tp_ip6_unhash,
.obj_size = sizeof(struct l2tp_ip6_sock),
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ipv6_setsockopt,
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index fb3248ff8b48..d3a84a181348 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -55,7 +55,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq,
/* Accessed under genl lock */
static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX];
-static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info)
+static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info,
+ bool do_ref)
{
u32 tunnel_id;
u32 session_id;
@@ -66,14 +67,17 @@ static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info)
if (info->attrs[L2TP_ATTR_IFNAME]) {
ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
- session = l2tp_session_find_by_ifname(net, ifname);
+ session = l2tp_session_get_by_ifname(net, ifname, do_ref);
} else if ((info->attrs[L2TP_ATTR_SESSION_ID]) &&
(info->attrs[L2TP_ATTR_CONN_ID])) {
tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel)
- session = l2tp_session_find(net, tunnel, session_id);
+ tunnel = l2tp_tunnel_get(net, tunnel_id);
+ if (tunnel) {
+ session = l2tp_session_get(net, tunnel, session_id,
+ do_ref);
+ l2tp_tunnel_dec_refcount(tunnel);
+ }
}
return session;
@@ -276,8 +280,8 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info
}
tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel == NULL) {
+ tunnel = l2tp_tunnel_get(net, tunnel_id);
+ if (!tunnel) {
ret = -ENODEV;
goto out;
}
@@ -287,6 +291,8 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info
l2tp_tunnel_delete(tunnel);
+ l2tp_tunnel_dec_refcount(tunnel);
+
out:
return ret;
}
@@ -304,8 +310,8 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info
}
tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel == NULL) {
+ tunnel = l2tp_tunnel_get(net, tunnel_id);
+ if (!tunnel) {
ret = -ENODEV;
goto out;
}
@@ -316,6 +322,8 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info
ret = l2tp_tunnel_notify(&l2tp_nl_family, info,
tunnel, L2TP_CMD_TUNNEL_MODIFY);
+ l2tp_tunnel_dec_refcount(tunnel);
+
out:
return ret;
}
@@ -420,34 +428,37 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info)
if (!info->attrs[L2TP_ATTR_CONN_ID]) {
ret = -EINVAL;
- goto out;
+ goto err;
}
tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel == NULL) {
- ret = -ENODEV;
- goto out;
- }
-
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg) {
ret = -ENOMEM;
- goto out;
+ goto err;
+ }
+
+ tunnel = l2tp_tunnel_get(net, tunnel_id);
+ if (!tunnel) {
+ ret = -ENODEV;
+ goto err_nlmsg;
}
ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
NLM_F_ACK, tunnel, L2TP_CMD_TUNNEL_GET);
if (ret < 0)
- goto err_out;
+ goto err_nlmsg_tunnel;
+
+ l2tp_tunnel_dec_refcount(tunnel);
return genlmsg_unicast(net, msg, info->snd_portid);
-err_out:
+err_nlmsg_tunnel:
+ l2tp_tunnel_dec_refcount(tunnel);
+err_nlmsg:
nlmsg_free(msg);
-
-out:
+err:
return ret;
}
@@ -491,8 +502,9 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
ret = -EINVAL;
goto out;
}
+
tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
+ tunnel = l2tp_tunnel_get(net, tunnel_id);
if (!tunnel) {
ret = -ENODEV;
goto out;
@@ -500,29 +512,24 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
if (!info->attrs[L2TP_ATTR_SESSION_ID]) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
- session = l2tp_session_find(net, tunnel, session_id);
- if (session) {
- ret = -EEXIST;
- goto out;
- }
if (!info->attrs[L2TP_ATTR_PEER_SESSION_ID]) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
peer_session_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_SESSION_ID]);
if (!info->attrs[L2TP_ATTR_PW_TYPE]) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
cfg.pw_type = nla_get_u16(info->attrs[L2TP_ATTR_PW_TYPE]);
if (cfg.pw_type >= __L2TP_PWTYPE_MAX) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
if (tunnel->version > 2) {
@@ -544,7 +551,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]);
if (len > 8) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
cfg.cookie_len = len;
memcpy(&cfg.cookie[0], nla_data(info->attrs[L2TP_ATTR_COOKIE]), len);
@@ -553,7 +560,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
u16 len = nla_len(info->attrs[L2TP_ATTR_PEER_COOKIE]);
if (len > 8) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
cfg.peer_cookie_len = len;
memcpy(&cfg.peer_cookie[0], nla_data(info->attrs[L2TP_ATTR_PEER_COOKIE]), len);
@@ -596,7 +603,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
if ((l2tp_nl_cmd_ops[cfg.pw_type] == NULL) ||
(l2tp_nl_cmd_ops[cfg.pw_type]->session_create == NULL)) {
ret = -EPROTONOSUPPORT;
- goto out;
+ goto out_tunnel;
}
/* Check that pseudowire-specific params are present */
@@ -606,7 +613,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
case L2TP_PWTYPE_ETH_VLAN:
if (!info->attrs[L2TP_ATTR_VLAN_ID]) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
break;
case L2TP_PWTYPE_ETH:
@@ -620,18 +627,22 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
break;
}
- ret = -EPROTONOSUPPORT;
- if (l2tp_nl_cmd_ops[cfg.pw_type]->session_create)
- ret = (*l2tp_nl_cmd_ops[cfg.pw_type]->session_create)(net, tunnel_id,
- session_id, peer_session_id, &cfg);
+ ret = l2tp_nl_cmd_ops[cfg.pw_type]->session_create(net, tunnel,
+ session_id,
+ peer_session_id,
+ &cfg);
if (ret >= 0) {
- session = l2tp_session_find(net, tunnel, session_id);
- if (session)
+ session = l2tp_session_get(net, tunnel, session_id, false);
+ if (session) {
ret = l2tp_session_notify(&l2tp_nl_family, info, session,
L2TP_CMD_SESSION_CREATE);
+ l2tp_session_dec_refcount(session);
+ }
}
+out_tunnel:
+ l2tp_tunnel_dec_refcount(tunnel);
out:
return ret;
}
@@ -642,7 +653,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
struct l2tp_session *session;
u16 pw_type;
- session = l2tp_nl_session_find(info);
+ session = l2tp_nl_session_get(info, true);
if (session == NULL) {
ret = -ENODEV;
goto out;
@@ -656,6 +667,10 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session);
+ if (session->deref)
+ session->deref(session);
+ l2tp_session_dec_refcount(session);
+
out:
return ret;
}
@@ -665,7 +680,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
int ret = 0;
struct l2tp_session *session;
- session = l2tp_nl_session_find(info);
+ session = l2tp_nl_session_get(info, false);
if (session == NULL) {
ret = -ENODEV;
goto out;
@@ -700,6 +715,8 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
ret = l2tp_session_notify(&l2tp_nl_family, info,
session, L2TP_CMD_SESSION_MODIFY);
+ l2tp_session_dec_refcount(session);
+
out:
return ret;
}
@@ -786,29 +803,34 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *msg;
int ret;
- session = l2tp_nl_session_find(info);
+ session = l2tp_nl_session_get(info, false);
if (session == NULL) {
ret = -ENODEV;
- goto out;
+ goto err;
}
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg) {
ret = -ENOMEM;
- goto out;
+ goto err_ref;
}
ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
0, session, L2TP_CMD_SESSION_GET);
if (ret < 0)
- goto err_out;
+ goto err_ref_msg;
- return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid);
+ ret = genlmsg_unicast(genl_info_net(info), msg, info->snd_portid);
-err_out:
- nlmsg_free(msg);
+ l2tp_session_dec_refcount(session);
-out:
+ return ret;
+
+err_ref_msg:
+ nlmsg_free(msg);
+err_ref:
+ l2tp_session_dec_refcount(session);
+err:
return ret;
}
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index d3f1222c1a8c..8ff5352bb0e3 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -122,8 +122,11 @@
struct pppol2tp_session {
int owner; /* pid that opened the socket */
- struct sock *sock; /* Pointer to the session
+ struct mutex sk_lock; /* Protects .sk */
+ struct sock __rcu *sk; /* Pointer to the session
* PPPoX socket */
+ struct sock *__sk; /* Copy of .sk, for cleanup */
+ struct rcu_head rcu; /* For asynchronous release */
struct sock *tunnel_sock; /* Pointer to the tunnel UDP
* socket */
int flags; /* accessed by PPPIOCGFLAGS.
@@ -138,6 +141,24 @@ static const struct ppp_channel_ops pppol2tp_chan_ops = {
static const struct proto_ops pppol2tp_ops;
+/* Retrieves the pppol2tp socket associated to a session.
+ * A reference is held on the returned socket, so this function must be paired
+ * with sock_put().
+ */
+static struct sock *pppol2tp_session_get_sock(struct l2tp_session *session)
+{
+ struct pppol2tp_session *ps = l2tp_session_priv(session);
+ struct sock *sk;
+
+ rcu_read_lock();
+ sk = rcu_dereference(ps->sk);
+ if (sk)
+ sock_hold(sk);
+ rcu_read_unlock();
+
+ return sk;
+}
+
/* Helpers to obtain tunnel/session contexts from sockets.
*/
static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
@@ -177,7 +198,7 @@ static int pppol2tp_recv_payload_hook(struct sk_buff *skb)
if (!pskb_may_pull(skb, 2))
return 1;
- if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
+ if ((skb->data[0] == PPP_ALLSTATIONS) && (skb->data[1] == PPP_UI))
skb_pull(skb, 2);
return 0;
@@ -224,13 +245,14 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
/* If the socket is bound, send it in to PPP's input queue. Otherwise
* queue it on the session socket.
*/
- sk = ps->sock;
+ rcu_read_lock();
+ sk = rcu_dereference(ps->sk);
if (sk == NULL)
goto no_sock;
if (sk->sk_state & PPPOX_BOUND) {
struct pppox_sock *po;
- l2tp_dbg(session, PPPOL2TP_MSG_DATA,
+ l2tp_dbg(session, L2TP_MSG_DATA,
"%s: recv %d byte data frame, passing to ppp\n",
session->name, data_len);
@@ -253,7 +275,7 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
po = pppox_sk(sk);
ppp_input(&po->chan, skb);
} else {
- l2tp_dbg(session, PPPOL2TP_MSG_DATA,
+ l2tp_dbg(session, L2TP_MSG_DATA,
"%s: recv %d byte data frame, passing to L2TP socket\n",
session->name, data_len);
@@ -262,30 +284,16 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
kfree_skb(skb);
}
}
+ rcu_read_unlock();
return;
no_sock:
- l2tp_info(session, PPPOL2TP_MSG_DATA, "%s: no socket\n", session->name);
+ rcu_read_unlock();
+ l2tp_info(session, L2TP_MSG_DATA, "%s: no socket\n", session->name);
kfree_skb(skb);
}
-static void pppol2tp_session_sock_hold(struct l2tp_session *session)
-{
- struct pppol2tp_session *ps = l2tp_session_priv(session);
-
- if (ps->sock)
- sock_hold(ps->sock);
-}
-
-static void pppol2tp_session_sock_put(struct l2tp_session *session)
-{
- struct pppol2tp_session *ps = l2tp_session_priv(session);
-
- if (ps->sock)
- sock_put(ps->sock);
-}
-
/************************************************************************
* Transmit handling
***********************************************************************/
@@ -297,7 +305,6 @@ static void pppol2tp_session_sock_put(struct l2tp_session *session)
static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
size_t total_len)
{
- static const unsigned char ppph[2] = { 0xff, 0x03 };
struct sock *sk = sock->sk;
struct sk_buff *skb;
int error;
@@ -327,7 +334,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
error = -ENOMEM;
skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
uhlen + session->hdr_len +
- sizeof(ppph) + total_len,
+ 2 + total_len, /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
0, GFP_KERNEL);
if (!skb)
goto error_put_sess_tun;
@@ -340,8 +347,8 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
skb_reserve(skb, uhlen);
/* Add PPP header */
- skb->data[0] = ppph[0];
- skb->data[1] = ppph[1];
+ skb->data[0] = PPP_ALLSTATIONS;
+ skb->data[1] = PPP_UI;
skb_put(skb, 2);
/* Copy user data into skb */
@@ -384,7 +391,6 @@ error:
*/
static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
{
- static const u8 ppph[2] = { 0xff, 0x03 };
struct sock *sk = (struct sock *) chan->private;
struct sock *sk_tun;
struct l2tp_session *session;
@@ -413,14 +419,14 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
sizeof(struct iphdr) + /* IP header */
uhlen + /* UDP header (if L2TP_ENCAPTYPE_UDP) */
session->hdr_len + /* L2TP header */
- sizeof(ppph); /* PPP header */
+ 2; /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
if (skb_cow_head(skb, headroom))
goto abort_put_sess_tun;
/* Setup PPP header */
- __skb_push(skb, sizeof(ppph));
- skb->data[0] = ppph[0];
- skb->data[1] = ppph[1];
+ __skb_push(skb, 2);
+ skb->data[0] = PPP_ALLSTATIONS;
+ skb->data[1] = PPP_UI;
local_bh_disable();
l2tp_xmit_skb(session, skb, session->hdr_len);
@@ -448,16 +454,15 @@ abort:
*/
static void pppol2tp_session_close(struct l2tp_session *session)
{
- struct pppol2tp_session *ps = l2tp_session_priv(session);
- struct sock *sk = ps->sock;
- struct socket *sock = sk->sk_socket;
+ struct sock *sk;
BUG_ON(session->magic != L2TP_SESSION_MAGIC);
- if (sock) {
- inet_shutdown(sock, 2);
- /* Don't let the session go away before our socket does */
- l2tp_session_inc_refcount(session);
+ sk = pppol2tp_session_get_sock(session);
+ if (sk) {
+ if (sk->sk_socket)
+ inet_shutdown(sk->sk_socket, SEND_SHUTDOWN);
+ sock_put(sk);
}
}
@@ -478,6 +483,14 @@ static void pppol2tp_session_destruct(struct sock *sk)
}
}
+static void pppol2tp_put_sk(struct rcu_head *head)
+{
+ struct pppol2tp_session *ps;
+
+ ps = container_of(head, typeof(*ps), rcu);
+ sock_put(ps->__sk);
+}
+
/* Called when the PPPoX socket (session) is closed.
*/
static int pppol2tp_release(struct socket *sock)
@@ -503,11 +516,23 @@ static int pppol2tp_release(struct socket *sock)
session = pppol2tp_sock_to_session(sk);
- /* Purge any queued data */
if (session != NULL) {
- __l2tp_session_unhash(session);
- l2tp_session_queue_purge(session);
- sock_put(sk);
+ struct pppol2tp_session *ps;
+
+ l2tp_session_delete(session);
+
+ ps = l2tp_session_priv(session);
+ mutex_lock(&ps->sk_lock);
+ ps->__sk = rcu_dereference_protected(ps->sk,
+ lockdep_is_held(&ps->sk_lock));
+ RCU_INIT_POINTER(ps->sk, NULL);
+ mutex_unlock(&ps->sk_lock);
+ call_rcu(&ps->rcu, pppol2tp_put_sk);
+
+ /* Rely on the sock_put() call at the end of the function for
+ * dropping the reference held by pppol2tp_sock_to_session().
+ * The last reference will be dropped by pppol2tp_put_sk().
+ */
}
release_sock(sk);
@@ -574,16 +599,47 @@ out:
static void pppol2tp_show(struct seq_file *m, void *arg)
{
struct l2tp_session *session = arg;
- struct pppol2tp_session *ps = l2tp_session_priv(session);
+ struct sock *sk;
+
+ sk = pppol2tp_session_get_sock(session);
+ if (sk) {
+ struct pppox_sock *po = pppox_sk(sk);
- if (ps) {
- struct pppox_sock *po = pppox_sk(ps->sock);
- if (po)
- seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan));
+ seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan));
+ sock_put(sk);
}
}
#endif
+static void pppol2tp_session_init(struct l2tp_session *session)
+{
+ struct pppol2tp_session *ps;
+ struct dst_entry *dst;
+
+ session->recv_skb = pppol2tp_recv;
+ session->session_close = pppol2tp_session_close;
+#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
+ session->show = pppol2tp_show;
+#endif
+
+ ps = l2tp_session_priv(session);
+ mutex_init(&ps->sk_lock);
+ ps->tunnel_sock = session->tunnel->sock;
+ ps->owner = current->pid;
+
+ /* If PMTU discovery was enabled, use the MTU that was discovered */
+ dst = sk_dst_get(session->tunnel->sock);
+ if (dst) {
+ u32 pmtu = dst_mtu(dst);
+
+ if (pmtu) {
+ session->mtu = pmtu - PPPOL2TP_HEADER_OVERHEAD;
+ session->mru = pmtu - PPPOL2TP_HEADER_OVERHEAD;
+ }
+ dst_release(dst);
+ }
+}
+
/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
*/
static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
@@ -595,11 +651,11 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
struct l2tp_session *session = NULL;
struct l2tp_tunnel *tunnel;
struct pppol2tp_session *ps;
- struct dst_entry *dst;
struct l2tp_session_cfg cfg = { 0, };
int error = 0;
u32 tunnel_id, peer_tunnel_id;
u32 session_id, peer_session_id;
+ bool drop_refcnt = false;
int ver = 2;
int fd;
@@ -708,65 +764,53 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
if (tunnel->peer_tunnel_id == 0)
tunnel->peer_tunnel_id = peer_tunnel_id;
- /* Create session if it doesn't already exist. We handle the
- * case where a session was previously created by the netlink
- * interface by checking that the session doesn't already have
- * a socket and its tunnel socket are what we expect. If any
- * of those checks fail, return EEXIST to the caller.
- */
- session = l2tp_session_find(sock_net(sk), tunnel, session_id);
- if (session == NULL) {
- /* Default MTU must allow space for UDP/L2TP/PPP
- * headers.
+ session = l2tp_session_get(sock_net(sk), tunnel, session_id, false);
+ if (session) {
+ drop_refcnt = true;
+ ps = l2tp_session_priv(session);
+
+ /* Using a pre-existing session is fine as long as it hasn't
+ * been connected yet.
*/
- cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+ mutex_lock(&ps->sk_lock);
+ if (rcu_dereference_protected(ps->sk,
+ lockdep_is_held(&ps->sk_lock))) {
+ mutex_unlock(&ps->sk_lock);
+ error = -EEXIST;
+ goto end;
+ }
+
+ /* consistency checks */
+ if (ps->tunnel_sock != tunnel->sock) {
+ mutex_unlock(&ps->sk_lock);
+ error = -EEXIST;
+ goto end;
+ }
+ } else {
+ /* Default MTU must allow space for UDP/L2TP/PPP headers */
+ cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+ cfg.mru = cfg.mtu;
- /* Allocate and initialize a new session context. */
session = l2tp_session_create(sizeof(struct pppol2tp_session),
tunnel, session_id,
peer_session_id, &cfg);
- if (session == NULL) {
- error = -ENOMEM;
+ if (IS_ERR(session)) {
+ error = PTR_ERR(session);
goto end;
}
- } else {
+
+ pppol2tp_session_init(session);
ps = l2tp_session_priv(session);
- error = -EEXIST;
- if (ps->sock != NULL)
- goto end;
+ l2tp_session_inc_refcount(session);
- /* consistency checks */
- if (ps->tunnel_sock != tunnel->sock)
+ mutex_lock(&ps->sk_lock);
+ error = l2tp_session_register(session, tunnel);
+ if (error < 0) {
+ mutex_unlock(&ps->sk_lock);
+ kfree(session);
goto end;
- }
-
- /* Associate session with its PPPoL2TP socket */
- ps = l2tp_session_priv(session);
- ps->owner = current->pid;
- ps->sock = sk;
- ps->tunnel_sock = tunnel->sock;
-
- session->recv_skb = pppol2tp_recv;
- session->session_close = pppol2tp_session_close;
-#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
- session->show = pppol2tp_show;
-#endif
-
- /* We need to know each time a skb is dropped from the reorder
- * queue.
- */
- session->ref = pppol2tp_session_sock_hold;
- session->deref = pppol2tp_session_sock_put;
-
- /* If PMTU discovery was enabled, use the MTU that was discovered */
- dst = sk_dst_get(tunnel->sock);
- if (dst != NULL) {
- u32 pmtu = dst_mtu(dst);
-
- if (pmtu != 0)
- session->mtu = session->mru = pmtu -
- PPPOL2TP_HEADER_OVERHEAD;
- dst_release(dst);
+ }
+ drop_refcnt = true;
}
/* Special case: if source & dest session_id == 0x0000, this
@@ -791,17 +835,30 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
po->chan.mtu = session->mtu;
error = ppp_register_net_channel(sock_net(sk), &po->chan);
- if (error)
+ if (error) {
+ mutex_unlock(&ps->sk_lock);
goto end;
+ }
out_no_ppp:
/* This is how we get the session context from the socket. */
sk->sk_user_data = session;
+ rcu_assign_pointer(ps->sk, sk);
+ mutex_unlock(&ps->sk_lock);
+
+ /* Keep the reference we've grabbed on the session: sk doesn't expect
+ * the session to disappear. pppol2tp_session_destruct() is responsible
+ * for dropping it.
+ */
+ drop_refcnt = false;
+
sk->sk_state = PPPOX_CONNECTED;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: created\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n",
session->name);
end:
+ if (drop_refcnt)
+ l2tp_session_dec_refcount(session);
release_sock(sk);
return error;
@@ -809,31 +866,19 @@ end:
#ifdef CONFIG_L2TP_V3
-/* Called when creating sessions via the netlink interface.
- */
-static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+/* Called when creating sessions via the netlink interface. */
+static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel,
+ u32 session_id, u32 peer_session_id,
+ struct l2tp_session_cfg *cfg)
{
int error;
- struct l2tp_tunnel *tunnel;
struct l2tp_session *session;
- struct pppol2tp_session *ps;
-
- tunnel = l2tp_tunnel_find(net, tunnel_id);
-
- /* Error if we can't find the tunnel */
- error = -ENOENT;
- if (tunnel == NULL)
- goto out;
/* Error if tunnel socket is not prepped */
- if (tunnel->sock == NULL)
- goto out;
-
- /* Check that this session doesn't already exist */
- error = -EEXIST;
- session = l2tp_session_find(net, tunnel, session_id);
- if (session != NULL)
- goto out;
+ if (!tunnel->sock) {
+ error = -ENOENT;
+ goto err;
+ }
/* Default MTU values. */
if (cfg->mtu == 0)
@@ -842,22 +887,25 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i
cfg->mru = cfg->mtu;
/* Allocate and initialize a new session context. */
- error = -ENOMEM;
session = l2tp_session_create(sizeof(struct pppol2tp_session),
tunnel, session_id,
peer_session_id, cfg);
- if (session == NULL)
- goto out;
+ if (IS_ERR(session)) {
+ error = PTR_ERR(session);
+ goto err;
+ }
- ps = l2tp_session_priv(session);
- ps->tunnel_sock = tunnel->sock;
+ pppol2tp_session_init(session);
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: created\n",
- session->name);
+ error = l2tp_session_register(session, tunnel);
+ if (error < 0)
+ goto err_sess;
- error = 0;
+ return 0;
-out:
+err_sess:
+ kfree(session);
+err:
return error;
}
@@ -889,10 +937,8 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
pls = l2tp_session_priv(session);
tunnel = l2tp_sock_to_tunnel(pls->tunnel_sock);
- if (tunnel == NULL) {
- error = -EBADF;
+ if (tunnel == NULL)
goto end_put_sess;
- }
inet = inet_sk(tunnel->sock);
if ((tunnel->version == 2) && (tunnel->sock->sk_family == AF_INET)) {
@@ -970,12 +1016,11 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
}
*usockaddr_len = len;
+ error = 0;
sock_put(pls->tunnel_sock);
end_put_sess:
sock_put(sk);
- error = 0;
-
end:
return error;
}
@@ -1017,16 +1062,14 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
struct l2tp_tunnel *tunnel = session->tunnel;
struct pppol2tp_ioc_stats stats;
- l2tp_dbg(session, PPPOL2TP_MSG_CONTROL,
+ l2tp_dbg(session, L2TP_MSG_CONTROL,
"%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
session->name, cmd, arg);
- sk = ps->sock;
+ sk = pppol2tp_session_get_sock(session);
if (!sk)
return -EBADR;
- sock_hold(sk);
-
switch (cmd) {
case SIOCGIFMTU:
err = -ENXIO;
@@ -1040,7 +1083,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
if (copy_to_user((void __user *) arg, &ifr, sizeof(struct ifreq)))
break;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: get mtu=%d\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: get mtu=%d\n",
session->name, session->mtu);
err = 0;
break;
@@ -1056,7 +1099,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
session->mtu = ifr.ifr_mtu;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set mtu=%d\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: set mtu=%d\n",
session->name, session->mtu);
err = 0;
break;
@@ -1070,7 +1113,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
if (put_user(session->mru, (int __user *) arg))
break;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: get mru=%d\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: get mru=%d\n",
session->name, session->mru);
err = 0;
break;
@@ -1085,7 +1128,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
break;
session->mru = val;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set mru=%d\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: set mru=%d\n",
session->name, session->mru);
err = 0;
break;
@@ -1095,7 +1138,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
if (put_user(ps->flags, (int __user *) arg))
break;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: get flags=%d\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: get flags=%d\n",
session->name, ps->flags);
err = 0;
break;
@@ -1105,7 +1148,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
if (get_user(val, (int __user *) arg))
break;
ps->flags = val;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set flags=%d\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: set flags=%d\n",
session->name, ps->flags);
err = 0;
break;
@@ -1122,7 +1165,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
if (copy_to_user((void __user *) arg, &stats,
sizeof(stats)))
break;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: get L2TP stats\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: get L2TP stats\n",
session->name);
err = 0;
break;
@@ -1150,7 +1193,7 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
struct sock *sk;
struct pppol2tp_ioc_stats stats;
- l2tp_dbg(tunnel, PPPOL2TP_MSG_CONTROL,
+ l2tp_dbg(tunnel, L2TP_MSG_CONTROL,
"%s: pppol2tp_tunnel_ioctl(cmd=%#x, arg=%#lx)\n",
tunnel->name, cmd, arg);
@@ -1171,11 +1214,18 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
if (stats.session_id != 0) {
/* resend to session ioctl handler */
struct l2tp_session *session =
- l2tp_session_find(sock_net(sk), tunnel, stats.session_id);
- if (session != NULL)
- err = pppol2tp_session_ioctl(session, cmd, arg);
- else
+ l2tp_session_get(sock_net(sk), tunnel,
+ stats.session_id, true);
+
+ if (session) {
+ err = pppol2tp_session_ioctl(session, cmd,
+ arg);
+ if (session->deref)
+ session->deref(session);
+ l2tp_session_dec_refcount(session);
+ } else {
err = -EBADR;
+ }
break;
}
#ifdef CONFIG_XFRM
@@ -1186,7 +1236,7 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
err = -EFAULT;
break;
}
- l2tp_info(tunnel, PPPOL2TP_MSG_CONTROL, "%s: get L2TP stats\n",
+ l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: get L2TP stats\n",
tunnel->name);
err = 0;
break;
@@ -1276,7 +1326,7 @@ static int pppol2tp_tunnel_setsockopt(struct sock *sk,
switch (optname) {
case PPPOL2TP_SO_DEBUG:
tunnel->debug = val;
- l2tp_info(tunnel, PPPOL2TP_MSG_CONTROL, "%s: set debug=%x\n",
+ l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: set debug=%x\n",
tunnel->name, tunnel->debug);
break;
@@ -1295,7 +1345,6 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
int optname, int val)
{
int err = 0;
- struct pppol2tp_session *ps = l2tp_session_priv(session);
switch (optname) {
case PPPOL2TP_SO_RECVSEQ:
@@ -1304,7 +1353,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
break;
}
session->recv_seq = val ? -1 : 0;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL,
+ l2tp_info(session, L2TP_MSG_CONTROL,
"%s: set recv_seq=%d\n",
session->name, session->recv_seq);
break;
@@ -1316,13 +1365,13 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
}
session->send_seq = val ? -1 : 0;
{
- struct sock *ssk = ps->sock;
- struct pppox_sock *po = pppox_sk(ssk);
+ struct pppox_sock *po = pppox_sk(sk);
+
po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
}
l2tp_session_set_header_len(session, session->tunnel->version);
- l2tp_info(session, PPPOL2TP_MSG_CONTROL,
+ l2tp_info(session, L2TP_MSG_CONTROL,
"%s: set send_seq=%d\n",
session->name, session->send_seq);
break;
@@ -1333,20 +1382,20 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
break;
}
session->lns_mode = val ? -1 : 0;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL,
+ l2tp_info(session, L2TP_MSG_CONTROL,
"%s: set lns_mode=%d\n",
session->name, session->lns_mode);
break;
case PPPOL2TP_SO_DEBUG:
session->debug = val;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set debug=%x\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: set debug=%x\n",
session->name, session->debug);
break;
case PPPOL2TP_SO_REORDERTO:
session->reorder_timeout = msecs_to_jiffies(val);
- l2tp_info(session, PPPOL2TP_MSG_CONTROL,
+ l2tp_info(session, L2TP_MSG_CONTROL,
"%s: set reorder_timeout=%d\n",
session->name, session->reorder_timeout);
break;
@@ -1427,7 +1476,7 @@ static int pppol2tp_tunnel_getsockopt(struct sock *sk,
switch (optname) {
case PPPOL2TP_SO_DEBUG:
*val = tunnel->debug;
- l2tp_info(tunnel, PPPOL2TP_MSG_CONTROL, "%s: get debug=%x\n",
+ l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: get debug=%x\n",
tunnel->name, tunnel->debug);
break;
@@ -1450,31 +1499,31 @@ static int pppol2tp_session_getsockopt(struct sock *sk,
switch (optname) {
case PPPOL2TP_SO_RECVSEQ:
*val = session->recv_seq;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL,
+ l2tp_info(session, L2TP_MSG_CONTROL,
"%s: get recv_seq=%d\n", session->name, *val);
break;
case PPPOL2TP_SO_SENDSEQ:
*val = session->send_seq;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL,
+ l2tp_info(session, L2TP_MSG_CONTROL,
"%s: get send_seq=%d\n", session->name, *val);
break;
case PPPOL2TP_SO_LNSMODE:
*val = session->lns_mode;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL,
+ l2tp_info(session, L2TP_MSG_CONTROL,
"%s: get lns_mode=%d\n", session->name, *val);
break;
case PPPOL2TP_SO_DEBUG:
*val = session->debug;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: get debug=%d\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: get debug=%d\n",
session->name, *val);
break;
case PPPOL2TP_SO_REORDERTO:
*val = (int) jiffies_to_msecs(session->reorder_timeout);
- l2tp_info(session, PPPOL2TP_MSG_CONTROL,
+ l2tp_info(session, L2TP_MSG_CONTROL,
"%s: get reorder_timeout=%d\n", session->name, *val);
break;
@@ -1653,8 +1702,9 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
{
struct l2tp_session *session = v;
struct l2tp_tunnel *tunnel = session->tunnel;
- struct pppol2tp_session *ps = l2tp_session_priv(session);
- struct pppox_sock *po = pppox_sk(ps->sock);
+ unsigned char state;
+ char user_data_ok;
+ struct sock *sk;
u32 ip = 0;
u16 port = 0;
@@ -1664,6 +1714,15 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
port = ntohs(inet->inet_sport);
}
+ sk = pppol2tp_session_get_sock(session);
+ if (sk) {
+ state = sk->sk_state;
+ user_data_ok = (session == sk->sk_user_data) ? 'Y' : 'N';
+ } else {
+ state = 0;
+ user_data_ok = 'N';
+ }
+
seq_printf(m, " SESSION '%s' %08X/%d %04X/%04X -> "
"%04X/%04X %d %c\n",
session->name, ip, port,
@@ -1671,9 +1730,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
session->session_id,
tunnel->peer_tunnel_id,
session->peer_session_id,
- ps->sock->sk_state,
- (session == ps->sock->sk_user_data) ?
- 'Y' : 'N');
+ state, user_data_ok);
seq_printf(m, " %d/%d/%c/%c/%s %08x %u\n",
session->mtu, session->mru,
session->recv_seq ? 'R' : '-',
@@ -1690,8 +1747,12 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
atomic_long_read(&session->stats.rx_bytes),
atomic_long_read(&session->stats.rx_errors));
- if (po)
+ if (sk) {
+ struct pppox_sock *po = pppox_sk(sk);
+
seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan));
+ sock_put(sk);
+ }
}
static int pppol2tp_seq_show(struct seq_file *m, void *v)
diff --git a/net/lapb/lapb_out.c b/net/lapb/lapb_out.c
index ba4d015bd1a6..7cbb77b7479a 100644
--- a/net/lapb/lapb_out.c
+++ b/net/lapb/lapb_out.c
@@ -87,7 +87,8 @@ void lapb_kick(struct lapb_cb *lapb)
skb = skb_dequeue(&lapb->write_queue);
do {
- if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) {
+ skbn = skb_copy(skb, GFP_ATOMIC);
+ if (!skbn) {
skb_queue_head(&lapb->write_queue, skb);
break;
}
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index c153fc2883a8..82b07bc43071 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -96,8 +96,16 @@ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr)
{
u8 rc = LLC_PDU_LEN_U;
- if (addr->sllc_test || addr->sllc_xid)
+ if (addr->sllc_test)
rc = LLC_PDU_LEN_U;
+ else if (addr->sllc_xid)
+ /* We need to expand header to sizeof(struct llc_xid_info)
+ * since llc_pdu_init_as_xid_cmd() sets 4,5,6 bytes of LLC header
+ * as XID PDU. In llc_ui_sendmsg() we reserved header size and then
+ * filled all other space with user data. If we won't reserve this
+ * bytes, llc_pdu_init_as_xid_cmd() will overwrite user data
+ */
+ rc = LLC_PDU_LEN_U_XID;
else if (sk->sk_type == SOCK_STREAM)
rc = LLC_PDU_LEN_I;
return rc;
@@ -111,22 +119,26 @@ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr)
*
* Send data via reliable llc2 connection.
* Returns 0 upon success, non-zero if action did not succeed.
+ *
+ * This function always consumes a reference to the skb.
*/
static int llc_ui_send_data(struct sock* sk, struct sk_buff *skb, int noblock)
{
struct llc_sock* llc = llc_sk(sk);
- int rc = 0;
if (unlikely(llc_data_accept_state(llc->state) ||
llc->remote_busy_flag ||
llc->p_flag)) {
long timeout = sock_sndtimeo(sk, noblock);
+ int rc;
rc = llc_ui_wait_for_busy_core(sk, timeout);
+ if (rc) {
+ kfree_skb(skb);
+ return rc;
+ }
}
- if (unlikely(!rc))
- rc = llc_build_and_send_pkt(sk, skb);
- return rc;
+ return llc_build_and_send_pkt(sk, skb);
}
static void llc_ui_sk_init(struct socket *sock, struct sock *sk)
@@ -267,6 +279,10 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
if (!sock_flag(sk, SOCK_ZAPPED))
goto out;
+ if (!addr->sllc_arphrd)
+ addr->sllc_arphrd = ARPHRD_ETHER;
+ if (addr->sllc_arphrd != ARPHRD_ETHER)
+ goto out;
rc = -ENODEV;
if (sk->sk_bound_dev_if) {
llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
@@ -324,15 +340,15 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr)))
goto out;
rc = -EAFNOSUPPORT;
- if (unlikely(addr->sllc_family != AF_LLC))
+ if (!addr->sllc_arphrd)
+ addr->sllc_arphrd = ARPHRD_ETHER;
+ if (unlikely(addr->sllc_family != AF_LLC || addr->sllc_arphrd != ARPHRD_ETHER))
goto out;
rc = -ENODEV;
rcu_read_lock();
if (sk->sk_bound_dev_if) {
llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if);
if (llc->dev) {
- if (!addr->sllc_arphrd)
- addr->sllc_arphrd = llc->dev->type;
if (is_zero_ether_addr(addr->sllc_mac))
memcpy(addr->sllc_mac, llc->dev->dev_addr,
IFHWADDRLEN);
@@ -896,7 +912,7 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name);
int flags = msg->msg_flags;
int noblock = flags & MSG_DONTWAIT;
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
size_t size = 0;
int rc = -EINVAL, copied = 0, hdrlen;
@@ -905,10 +921,10 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
lock_sock(sk);
if (addr) {
if (msg->msg_namelen < sizeof(*addr))
- goto release;
+ goto out;
} else {
if (llc_ui_addr_null(&llc->addr))
- goto release;
+ goto out;
addr = &llc->addr;
}
/* must bind connection to sap if user hasn't done it. */
@@ -916,7 +932,7 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
/* bind to sap with null dev, exclusive. */
rc = llc_ui_autobind(sock, addr);
if (rc)
- goto release;
+ goto out;
}
hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr);
size = hdrlen + len;
@@ -925,12 +941,12 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
copied = size - hdrlen;
rc = -EINVAL;
if (copied < 0)
- goto release;
+ goto out;
release_sock(sk);
skb = sock_alloc_send_skb(sk, size, noblock, &rc);
lock_sock(sk);
if (!skb)
- goto release;
+ goto out;
skb->dev = llc->dev;
skb->protocol = llc_proto_type(addr->sllc_arphrd);
skb_reserve(skb, hdrlen);
@@ -940,29 +956,31 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (sk->sk_type == SOCK_DGRAM || addr->sllc_ua) {
llc_build_and_send_ui_pkt(llc->sap, skb, addr->sllc_mac,
addr->sllc_sap);
+ skb = NULL;
goto out;
}
if (addr->sllc_test) {
llc_build_and_send_test_pkt(llc->sap, skb, addr->sllc_mac,
addr->sllc_sap);
+ skb = NULL;
goto out;
}
if (addr->sllc_xid) {
llc_build_and_send_xid_pkt(llc->sap, skb, addr->sllc_mac,
addr->sllc_sap);
+ skb = NULL;
goto out;
}
rc = -ENOPROTOOPT;
if (!(sk->sk_type == SOCK_STREAM && !addr->sllc_ua))
goto out;
rc = llc_ui_send_data(sk, skb, noblock);
+ skb = NULL;
out:
- if (rc) {
- kfree_skb(skb);
-release:
+ kfree_skb(skb);
+ if (rc)
dprintk("%s: failed sending from %02X to %02X: %d\n",
__func__, llc->laddr.lsap, llc->daddr.lsap, rc);
- }
release_sock(sk);
return rc ? : copied;
}
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index d861b74ad068..3b002ab68b29 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -55,6 +55,8 @@ int sysctl_llc2_busy_timeout = LLC2_BUSY_TIME * HZ;
* (executing it's actions and changing state), upper layer will be
* indicated or confirmed, if needed. Returns 0 for success, 1 for
* failure. The socket lock has to be held before calling this function.
+ *
+ * This function always consumes a reference to the skb.
*/
int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
{
@@ -62,12 +64,6 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
struct llc_sock *llc = llc_sk(skb->sk);
struct llc_conn_state_ev *ev = llc_conn_ev(skb);
- /*
- * We have to hold the skb, because llc_conn_service will kfree it in
- * the sending path and we need to look at the skb->cb, where we encode
- * llc_conn_state_ev.
- */
- skb_get(skb);
ev->ind_prim = ev->cfm_prim = 0;
/*
* Send event to state machine
@@ -75,21 +71,12 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
rc = llc_conn_service(skb->sk, skb);
if (unlikely(rc != 0)) {
printk(KERN_ERR "%s: llc_conn_service failed\n", __func__);
- goto out_kfree_skb;
- }
-
- if (unlikely(!ev->ind_prim && !ev->cfm_prim)) {
- /* indicate or confirm not required */
- if (!skb->next)
- goto out_kfree_skb;
goto out_skb_put;
}
- if (unlikely(ev->ind_prim && ev->cfm_prim)) /* Paranoia */
- skb_get(skb);
-
switch (ev->ind_prim) {
case LLC_DATA_PRIM:
+ skb_get(skb);
llc_save_primitive(sk, skb, LLC_DATA_PRIM);
if (unlikely(sock_queue_rcv_skb(sk, skb))) {
/*
@@ -106,6 +93,7 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
* skb->sk pointing to the newly created struct sock in
* llc_conn_handler. -acme
*/
+ skb_get(skb);
skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_state_change(sk);
break;
@@ -121,7 +109,6 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
sk->sk_state_change(sk);
}
}
- kfree_skb(skb);
sock_put(sk);
break;
case LLC_RESET_PRIM:
@@ -130,14 +117,11 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
* RESET is not being notified to upper layers for now
*/
printk(KERN_INFO "%s: received a reset ind!\n", __func__);
- kfree_skb(skb);
break;
default:
- if (ev->ind_prim) {
+ if (ev->ind_prim)
printk(KERN_INFO "%s: received unknown %d prim!\n",
__func__, ev->ind_prim);
- kfree_skb(skb);
- }
/* No indication */
break;
}
@@ -179,15 +163,12 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
printk(KERN_INFO "%s: received a reset conf!\n", __func__);
break;
default:
- if (ev->cfm_prim) {
+ if (ev->cfm_prim)
printk(KERN_INFO "%s: received unknown %d prim!\n",
__func__, ev->cfm_prim);
- break;
- }
- goto out_skb_put; /* No confirmation */
+ /* No confirmation */
+ break;
}
-out_kfree_skb:
- kfree_skb(skb);
out_skb_put:
kfree_skb(skb);
return rc;
diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c
index 6daf391b3e84..fc4d2bd8816f 100644
--- a/net/llc/llc_if.c
+++ b/net/llc/llc_if.c
@@ -38,6 +38,8 @@
* closed and -EBUSY when sending data is not permitted in this state or
* LLC has send an I pdu with p bit set to 1 and is waiting for it's
* response.
+ *
+ * This function always consumes a reference to the skb.
*/
int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb)
{
@@ -46,20 +48,22 @@ int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb)
struct llc_sock *llc = llc_sk(sk);
if (unlikely(llc->state == LLC_CONN_STATE_ADM))
- goto out;
+ goto out_free;
rc = -EBUSY;
if (unlikely(llc_data_accept_state(llc->state) || /* data_conn_refuse */
llc->p_flag)) {
llc->failed_data_req = 1;
- goto out;
+ goto out_free;
}
ev = llc_conn_ev(skb);
ev->type = LLC_CONN_EV_TYPE_PRIM;
ev->prim = LLC_DATA_PRIM;
ev->prim_type = LLC_PRIM_TYPE_REQ;
skb->dev = llc->dev;
- rc = llc_conn_state_process(sk, skb);
-out:
+ return llc_conn_state_process(sk, skb);
+
+out_free:
+ kfree_skb(skb);
return rc;
}
diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c
index 7ae4cc684d3a..9fa3342c7a82 100644
--- a/net/llc/llc_s_ac.c
+++ b/net/llc/llc_s_ac.c
@@ -79,7 +79,7 @@ int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb)
struct llc_sap_state_ev *ev = llc_sap_ev(skb);
int rc;
- llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap,
+ llc_pdu_header_init(skb, LLC_PDU_TYPE_U_XID, ev->saddr.lsap,
ev->daddr.lsap, LLC_PDU_CMD);
llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0);
rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 204a8351efff..c29170e767a8 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -32,7 +32,7 @@ static int llc_stat_ev_rx_null_dsap_xid_c(struct sk_buff *skb)
return LLC_PDU_IS_CMD(pdu) && /* command PDU */
LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */
LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_XID &&
- !pdu->dsap ? 0 : 1; /* NULL DSAP value */
+ !pdu->dsap; /* NULL DSAP value */
}
static int llc_stat_ev_rx_null_dsap_test_c(struct sk_buff *skb)
@@ -42,7 +42,7 @@ static int llc_stat_ev_rx_null_dsap_test_c(struct sk_buff *skb)
return LLC_PDU_IS_CMD(pdu) && /* command PDU */
LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */
LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_TEST &&
- !pdu->dsap ? 0 : 1; /* NULL DSAP */
+ !pdu->dsap; /* NULL DSAP */
}
static int llc_station_ac_send_xid_r(struct sk_buff *skb)
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 4932e9f243a2..3d49ffe8a34d 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -109,7 +109,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
mgmt->u.action.u.addba_req.start_seq_num =
cpu_to_le16(start_seq_num << 4);
- ieee80211_tx_skb(sdata, skb);
+ ieee80211_tx_skb_tid(sdata, skb, tid);
}
void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn)
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 1999a7eaa692..c5be6bf2f00d 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -875,50 +875,6 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
return 0;
}
-/* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */
-struct iapp_layer2_update {
- u8 da[ETH_ALEN]; /* broadcast */
- u8 sa[ETH_ALEN]; /* STA addr */
- __be16 len; /* 6 */
- u8 dsap; /* 0 */
- u8 ssap; /* 0 */
- u8 control;
- u8 xid_info[3];
-} __packed;
-
-static void ieee80211_send_layer2_update(struct sta_info *sta)
-{
- struct iapp_layer2_update *msg;
- struct sk_buff *skb;
-
- /* Send Level 2 Update Frame to update forwarding tables in layer 2
- * bridge devices */
-
- skb = dev_alloc_skb(sizeof(*msg));
- if (!skb)
- return;
- msg = (struct iapp_layer2_update *)skb_put(skb, sizeof(*msg));
-
- /* 802.2 Type 1 Logical Link Control (LLC) Exchange Identifier (XID)
- * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */
-
- eth_broadcast_addr(msg->da);
- memcpy(msg->sa, sta->sta.addr, ETH_ALEN);
- msg->len = htons(6);
- msg->dsap = 0;
- msg->ssap = 0x01; /* NULL LSAP, CR Bit: Response */
- msg->control = 0xaf; /* XID response lsb.1111F101.
- * F=0 (no poll command; unsolicited frame) */
- msg->xid_info[0] = 0x81; /* XID format identifier */
- msg->xid_info[1] = 1; /* LLC types/classes: Type 1 LLC */
- msg->xid_info[2] = 0; /* XID sender's receive window size (RW) */
-
- skb->dev = sta->sdata->dev;
- skb->protocol = eth_type_trans(skb, sta->sdata->dev);
- memset(skb->cb, 0, sizeof(skb->cb));
- netif_rx_ni(skb);
-}
-
static int sta_apply_auth_flags(struct ieee80211_local *local,
struct sta_info *sta,
u32 mask, u32 set)
@@ -1045,7 +1001,7 @@ static int sta_apply_parameters(struct ieee80211_local *local,
int ret = 0;
struct ieee80211_supported_band *sband;
struct ieee80211_sub_if_data *sdata = sta->sdata;
- enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+ enum nl80211_band band = ieee80211_get_sdata_band(sdata);
u32 mask, set;
sband = local->hw.wiphy->bands[band];
@@ -1194,7 +1150,6 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
struct sta_info *sta;
struct ieee80211_sub_if_data *sdata;
int err;
- int layer2_update;
if (params->vlan) {
sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
@@ -1248,18 +1203,12 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
test_sta_flag(sta, WLAN_STA_ASSOC))
rate_control_rate_init(sta);
- layer2_update = sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
- sdata->vif.type == NL80211_IFTYPE_AP;
-
err = sta_info_insert_rcu(sta);
if (err) {
rcu_read_unlock();
return err;
}
- if (layer2_update)
- ieee80211_send_layer2_update(sta);
-
rcu_read_unlock();
return 0;
@@ -1367,7 +1316,9 @@ static int ieee80211_change_station(struct wiphy *wiphy,
atomic_inc(&sta->sdata->bss->num_mcast_sta);
}
- ieee80211_send_layer2_update(sta);
+ if (sta->sta_state == IEEE80211_STA_AUTHORIZED)
+ cfg80211_send_layer2_update(sta->sdata->dev,
+ sta->sta.addr);
}
err = sta_apply_parameters(local, sta, params);
@@ -1815,6 +1766,7 @@ static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev)
ieee80211_stop_mesh(sdata);
mutex_lock(&sdata->local->mtx);
ieee80211_vif_release_channel(sdata);
+ kfree(sdata->u.mesh.ie);
mutex_unlock(&sdata->local->mtx);
return 0;
@@ -1826,7 +1778,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
struct bss_parameters *params)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
- enum ieee80211_band band;
+ enum nl80211_band band;
u32 changed = 0;
if (!sdata_dereference(sdata->u.ap.beacon, sdata))
@@ -1845,7 +1797,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
}
if (!sdata->vif.bss_conf.use_short_slot &&
- band == IEEE80211_BAND_5GHZ) {
+ band == NL80211_BAND_5GHZ) {
sdata->vif.bss_conf.use_short_slot = true;
changed |= BSS_CHANGED_ERP_SLOT;
}
@@ -2069,12 +2021,12 @@ static int ieee80211_leave_ocb(struct wiphy *wiphy, struct net_device *dev)
}
static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev,
- int rate[IEEE80211_NUM_BANDS])
+ int rate[NUM_NL80211_BANDS])
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
memcpy(sdata->vif.bss_conf.mcast_rate, rate,
- sizeof(int) * IEEE80211_NUM_BANDS);
+ sizeof(int) * NUM_NL80211_BANDS);
return 0;
}
@@ -2479,7 +2431,7 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
return ret;
}
- for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
+ for (i = 0; i < NUM_NL80211_BANDS; i++) {
struct ieee80211_supported_band *sband = wiphy->bands[i];
int j;
@@ -2496,14 +2448,14 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
continue;
for (j = 0; j < IEEE80211_HT_MCS_MASK_LEN; j++) {
- if (~sdata->rc_rateidx_mcs_mask[i][j]) {
+ if (sdata->rc_rateidx_mcs_mask[i][j] != 0xff) {
sdata->rc_has_mcs_mask[i] = true;
break;
}
}
for (j = 0; j < NL80211_VHT_NSS_MAX; j++) {
- if (~sdata->rc_rateidx_vht_mcs_mask[i][j]) {
+ if (sdata->rc_rateidx_vht_mcs_mask[i][j] != 0xffff) {
sdata->rc_has_vht_mcs_mask[i] = true;
break;
}
@@ -3580,7 +3532,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
struct ieee80211_tx_info *info;
struct sta_info *sta;
struct ieee80211_chanctx_conf *chanctx_conf;
- enum ieee80211_band band;
+ enum nl80211_band band;
int ret;
/* the lock is needed to assign the cookie later */
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index e546a987a9d3..3e24d0ddb51b 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -125,6 +125,7 @@ static const char *hw_flag_names[] = {
FLAG(TDLS_WIDER_BW),
FLAG(SUPPORTS_AMSDU_IN_AMPDU),
FLAG(BEACON_TX_STATUS),
+ FLAG(NEEDS_UNIQUE_STA_ADDR),
#undef FLAG
};
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 37ea30e0754c..a5ba739cd2a7 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -169,21 +169,21 @@ static ssize_t ieee80211_if_write_##name(struct file *file, \
IEEE80211_IF_FILE_R(name)
/* common attributes */
-IEEE80211_IF_FILE(rc_rateidx_mask_2ghz, rc_rateidx_mask[IEEE80211_BAND_2GHZ],
+IEEE80211_IF_FILE(rc_rateidx_mask_2ghz, rc_rateidx_mask[NL80211_BAND_2GHZ],
HEX);
-IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[IEEE80211_BAND_5GHZ],
+IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[NL80211_BAND_5GHZ],
HEX);
IEEE80211_IF_FILE(rc_rateidx_mcs_mask_2ghz,
- rc_rateidx_mcs_mask[IEEE80211_BAND_2GHZ], HEXARRAY);
+ rc_rateidx_mcs_mask[NL80211_BAND_2GHZ], HEXARRAY);
IEEE80211_IF_FILE(rc_rateidx_mcs_mask_5ghz,
- rc_rateidx_mcs_mask[IEEE80211_BAND_5GHZ], HEXARRAY);
+ rc_rateidx_mcs_mask[NL80211_BAND_5GHZ], HEXARRAY);
static ssize_t ieee80211_if_fmt_rc_rateidx_vht_mcs_mask_2ghz(
const struct ieee80211_sub_if_data *sdata,
char *buf, int buflen)
{
int i, len = 0;
- const u16 *mask = sdata->rc_rateidx_vht_mcs_mask[IEEE80211_BAND_2GHZ];
+ const u16 *mask = sdata->rc_rateidx_vht_mcs_mask[NL80211_BAND_2GHZ];
for (i = 0; i < NL80211_VHT_NSS_MAX; i++)
len += scnprintf(buf + len, buflen - len, "%04x ", mask[i]);
@@ -199,7 +199,7 @@ static ssize_t ieee80211_if_fmt_rc_rateidx_vht_mcs_mask_5ghz(
char *buf, int buflen)
{
int i, len = 0;
- const u16 *mask = sdata->rc_rateidx_vht_mcs_mask[IEEE80211_BAND_5GHZ];
+ const u16 *mask = sdata->rc_rateidx_vht_mcs_mask[NL80211_BAND_5GHZ];
for (i = 0; i < NL80211_VHT_NSS_MAX; i++)
len += scnprintf(buf + len, buflen - len, "%04x ", mask[i]);
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index df2e4e311217..5d097ae26b70 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -128,8 +128,11 @@ int drv_sta_state(struct ieee80211_local *local,
} else if (old_state == IEEE80211_STA_AUTH &&
new_state == IEEE80211_STA_ASSOC) {
ret = drv_sta_add(local, sdata, &sta->sta);
- if (ret == 0)
+ if (ret == 0) {
sta->uploaded = true;
+ if (rcu_access_pointer(sta->sta.rates))
+ drv_sta_rate_tbl_update(local, sdata, &sta->sta);
+ }
} else if (old_state == IEEE80211_STA_ASSOC &&
new_state == IEEE80211_STA_AUTH) {
drv_sta_remove(local, sdata, &sta->sta);
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 0a35dd68f1a1..95fcf57e6567 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -128,7 +128,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
}
}
- if (sband->band == IEEE80211_BAND_2GHZ) {
+ if (sband->band == NL80211_BAND_2GHZ) {
*pos++ = WLAN_EID_DS_PARAMS;
*pos++ = 1;
*pos++ = ieee80211_frequency_to_channel(
@@ -350,11 +350,11 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
*
* HT follows these specifications (IEEE 802.11-2012 20.3.18)
*/
- sdata->vif.bss_conf.use_short_slot = chan->band == IEEE80211_BAND_5GHZ;
+ sdata->vif.bss_conf.use_short_slot = chan->band == NL80211_BAND_5GHZ;
bss_change |= BSS_CHANGED_ERP_SLOT;
/* cf. IEEE 802.11 9.2.12 */
- if (chan->band == IEEE80211_BAND_2GHZ && have_higher_than_11mbit)
+ if (chan->band == NL80211_BAND_2GHZ && have_higher_than_11mbit)
sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE;
else
sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE;
@@ -991,7 +991,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
struct ieee80211_channel *channel)
{
struct sta_info *sta;
- enum ieee80211_band band = rx_status->band;
+ enum nl80211_band band = rx_status->band;
enum nl80211_bss_scan_width scan_width;
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
@@ -1113,7 +1113,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
struct ieee80211_channel *channel;
u64 beacon_timestamp, rx_timestamp;
u32 supp_rates = 0;
- enum ieee80211_band band = rx_status->band;
+ enum nl80211_band band = rx_status->band;
channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq);
if (!channel)
@@ -1860,6 +1860,8 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
/* remove beacon */
kfree(sdata->u.ibss.ie);
+ sdata->u.ibss.ie = NULL;
+ sdata->u.ibss.ie_len = 0;
/* on the next join, re-program HT parameters */
memset(&ifibss->ht_capa, 0, sizeof(ifibss->ht_capa));
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 2527294f96c8..3a91f32d1eda 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -51,12 +51,6 @@ struct ieee80211_local;
#define IEEE80211_ENCRYPT_HEADROOM 8
#define IEEE80211_ENCRYPT_TAILROOM 18
-/* IEEE 802.11 (Ch. 9.5 Defragmentation) requires support for concurrent
- * reception of at least three fragmented frames. This limit can be increased
- * by changing this define, at the cost of slower frame reassembly and
- * increased memory use (about 2 kB of RAM per entry). */
-#define IEEE80211_FRAGMENT_MAX 4
-
/* power level hasn't been configured (or set to automatic) */
#define IEEE80211_UNSET_POWER_LEVEL INT_MIN
@@ -85,18 +79,6 @@ struct ieee80211_local;
#define IEEE80211_DEAUTH_FRAME_LEN (24 /* hdr */ + 2 /* reason */)
-struct ieee80211_fragment_entry {
- struct sk_buff_head skb_list;
- unsigned long first_frag_time;
- u16 seq;
- u16 extra_len;
- u16 last_frag;
- u8 rx_queue;
- bool check_sequential_pn; /* needed for CCMP/GCMP */
- u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
-};
-
-
struct ieee80211_bss {
u32 device_ts_beacon, device_ts_presp;
@@ -236,8 +218,15 @@ struct ieee80211_rx_data {
*/
int security_idx;
- u32 tkip_iv32;
- u16 tkip_iv16;
+ union {
+ struct {
+ u32 iv32;
+ u16 iv16;
+ } tkip;
+ struct {
+ u8 pn[IEEE80211_CCMP_PN_LEN];
+ } ccm_gcm;
+ };
};
struct ieee80211_csa_settings {
@@ -835,9 +824,7 @@ struct ieee80211_sub_if_data {
char name[IFNAMSIZ];
- /* Fragment table for host-based reassembly */
- struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX];
- unsigned int fragment_next;
+ struct ieee80211_fragment_cache frags;
/* TID bitmap for NoAck policy */
u16 noack_map;
@@ -895,13 +882,13 @@ struct ieee80211_sub_if_data {
struct ieee80211_if_ap *bss;
/* bitmap of allowed (non-MCS) rate indexes for rate control */
- u32 rc_rateidx_mask[IEEE80211_NUM_BANDS];
+ u32 rc_rateidx_mask[NUM_NL80211_BANDS];
- bool rc_has_mcs_mask[IEEE80211_NUM_BANDS];
- u8 rc_rateidx_mcs_mask[IEEE80211_NUM_BANDS][IEEE80211_HT_MCS_MASK_LEN];
+ bool rc_has_mcs_mask[NUM_NL80211_BANDS];
+ u8 rc_rateidx_mcs_mask[NUM_NL80211_BANDS][IEEE80211_HT_MCS_MASK_LEN];
- bool rc_has_vht_mcs_mask[IEEE80211_NUM_BANDS];
- u16 rc_rateidx_vht_mcs_mask[IEEE80211_NUM_BANDS][NL80211_VHT_NSS_MAX];
+ bool rc_has_vht_mcs_mask[NUM_NL80211_BANDS];
+ u16 rc_rateidx_vht_mcs_mask[NUM_NL80211_BANDS][NL80211_VHT_NSS_MAX];
union {
struct ieee80211_if_ap ap;
@@ -956,10 +943,10 @@ sdata_assert_lock(struct ieee80211_sub_if_data *sdata)
lockdep_assert_held(&sdata->wdev.mtx);
}
-static inline enum ieee80211_band
+static inline enum nl80211_band
ieee80211_get_sdata_band(struct ieee80211_sub_if_data *sdata)
{
- enum ieee80211_band band = IEEE80211_BAND_2GHZ;
+ enum nl80211_band band = NL80211_BAND_2GHZ;
struct ieee80211_chanctx_conf *chanctx_conf;
rcu_read_lock();
@@ -1028,6 +1015,7 @@ enum queue_stop_reason {
IEEE80211_QUEUE_STOP_REASON_FLUSH,
IEEE80211_QUEUE_STOP_REASON_TDLS_TEARDOWN,
IEEE80211_QUEUE_STOP_REASON_RESERVE_TID,
+ IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE,
IEEE80211_QUEUE_STOP_REASONS,
};
@@ -1230,7 +1218,7 @@ struct ieee80211_local {
struct cfg80211_scan_request __rcu *scan_req;
struct ieee80211_scan_request *hw_scan_req;
struct cfg80211_chan_def scan_chandef;
- enum ieee80211_band hw_scan_band;
+ enum nl80211_band hw_scan_band;
int scan_channel_idx;
int scan_ies_len;
int hw_scan_ies_bufsize;
@@ -1707,12 +1695,16 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta);
enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta);
void ieee80211_sta_set_rx_nss(struct sta_info *sta);
+enum ieee80211_sta_rx_bandwidth
+ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width);
+enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta);
+void ieee80211_sta_set_rx_nss(struct sta_info *sta);
u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta, u8 opmode,
- enum ieee80211_band band);
+ enum nl80211_band band);
void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta, u8 opmode,
- enum ieee80211_band band);
+ enum nl80211_band band);
void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata,
struct ieee80211_sta_vht_cap *vht_cap);
void ieee80211_get_vht_mask_from_cap(__le16 vht_cap,
@@ -1740,7 +1732,7 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
*/
int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems,
- enum ieee80211_band current_band,
+ enum nl80211_band current_band,
u32 sta_flags, u8 *bssid,
struct ieee80211_csa_ie *csa_ie);
@@ -1765,7 +1757,7 @@ static inline int __ieee80211_resume(struct ieee80211_hw *hw)
/* utility functions/constants */
extern const void *const mac80211_wiphy_privid; /* for wiphy privid */
-int ieee80211_frame_duration(enum ieee80211_band band, size_t len,
+int ieee80211_frame_duration(enum nl80211_band band, size_t len,
int rate, int erp, int short_preamble,
int shift);
void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
@@ -1775,12 +1767,12 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, int tid,
- enum ieee80211_band band);
+ enum nl80211_band band);
static inline void
ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, int tid,
- enum ieee80211_band band)
+ enum nl80211_band band)
{
rcu_read_lock();
__ieee80211_tx_skb_tid_band(sdata, skb, tid, band);
@@ -1949,7 +1941,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata,
u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems,
- enum ieee80211_band band, u32 *basic_rates);
+ enum nl80211_band band, u32 *basic_rates);
int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata,
enum ieee80211_smps_mode smps_mode);
int __ieee80211_request_smps_ap(struct ieee80211_sub_if_data *sdata,
@@ -1972,10 +1964,10 @@ int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
const u8 *srates, int srates_len, u32 *rates);
int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, bool need_basic,
- enum ieee80211_band band);
+ enum nl80211_band band);
int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, bool need_basic,
- enum ieee80211_band band);
+ enum nl80211_band band);
u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo);
/* channel management */
@@ -2072,4 +2064,7 @@ extern const struct ethtool_ops ieee80211_ethtool_ops;
#define debug_noinline
#endif
+void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache);
+void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache);
+
#endif /* IEEE80211_I_H */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 3678f2d5fcfe..dceaad91c1e0 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1083,16 +1083,12 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
*/
static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata)
{
- int i;
-
/* free extra data */
ieee80211_free_keys(sdata, false);
ieee80211_debugfs_remove_netdev(sdata);
- for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
- __skb_queue_purge(&sdata->fragments[i].skb_list);
- sdata->fragment_next = 0;
+ ieee80211_destroy_frag_cache(&sdata->frags);
if (ieee80211_vif_is_mesh(&sdata->vif))
mesh_rmc_free(sdata);
@@ -1508,6 +1504,10 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
if (ret)
return ret;
+ ieee80211_stop_vif_queues(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE);
+ synchronize_net();
+
ieee80211_do_stop(sdata, false);
ieee80211_teardown_sdata(sdata);
@@ -1528,6 +1528,8 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
err = ieee80211_do_open(&sdata->wdev, false);
WARN(err, "type change: do_open returned %d", err);
+ ieee80211_wake_vif_queues(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE);
return ret;
}
@@ -1782,8 +1784,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
sdata->wdev.wiphy = local->hw.wiphy;
sdata->local = local;
- for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
- skb_queue_head_init(&sdata->fragments[i].skb_list);
+ ieee80211_init_frag_cache(&sdata->frags);
INIT_LIST_HEAD(&sdata->key_list);
@@ -1792,7 +1793,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
INIT_DELAYED_WORK(&sdata->dec_tailroom_needed_wk,
ieee80211_delayed_tailroom_dec);
- for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
+ for (i = 0; i < NUM_NL80211_BANDS; i++) {
struct ieee80211_supported_band *sband;
sband = local->hw.wiphy->bands[i];
sdata->rc_rateidx_mask[i] =
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 91a4e606edcd..a2050d5776ce 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -646,6 +646,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
struct sta_info *sta)
{
struct ieee80211_local *local = sdata->local;
+ static atomic_t key_color = ATOMIC_INIT(0);
struct ieee80211_key *old_key;
int idx = key->conf.keyidx;
bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
@@ -680,6 +681,12 @@ int ieee80211_key_link(struct ieee80211_key *key,
key->sdata = sdata;
key->sta = sta;
+ /*
+ * Assign a unique ID to every key so we can easily prevent mixed
+ * key and fragment cache attacks.
+ */
+ key->color = atomic_inc_return(&key_color);
+
increment_tailroom_need_count(sdata);
ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 9951ef06323e..9ac5c00dbe80 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -123,6 +123,8 @@ struct ieee80211_key {
} debugfs;
#endif
+ unsigned int color;
+
/*
* key config, must be last because it contains key
* material as variable length member
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 15d23aeea634..3752e43ef41b 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -808,7 +808,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
{
struct ieee80211_local *local = hw_to_local(hw);
int result, i;
- enum ieee80211_band band;
+ enum nl80211_band band;
int channels, max_bitrates;
bool supp_ht, supp_vht;
netdev_features_t feature_whitelist;
@@ -881,7 +881,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
max_bitrates = 0;
supp_ht = false;
supp_vht = false;
- for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
struct ieee80211_supported_band *sband;
sband = local->hw.wiphy->bands[band];
@@ -889,8 +889,19 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
continue;
if (!dflt_chandef.chan) {
+ /*
+ * Assign the first enabled channel to dflt_chandef
+ * from the list of channels
+ */
+ for (i = 0; i < sband->n_channels; i++)
+ if (!(sband->channels[i].flags &
+ IEEE80211_CHAN_DISABLED))
+ break;
+ /* if none found then use the first anyway */
+ if (i == sband->n_channels)
+ i = 0;
cfg80211_chandef_create(&dflt_chandef,
- &sband->channels[0],
+ &sband->channels[i],
NL80211_CHAN_NO_HT);
/* init channel we're on */
if (!local->use_chanctx && !local->_oper_chandef.chan) {
@@ -948,7 +959,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
if (!local->int_scan_req)
return -ENOMEM;
- for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
if (!local->hw.wiphy->bands[band])
continue;
local->int_scan_req->rates[band] = (u32) -1;
@@ -1008,8 +1019,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
if (local->hw.wiphy->max_scan_ie_len)
local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len;
- WARN_ON(!ieee80211_cs_list_valid(local->hw.cipher_schemes,
- local->hw.n_cipher_schemes));
+ if (WARN_ON(!ieee80211_cs_list_valid(local->hw.cipher_schemes,
+ local->hw.n_cipher_schemes))) {
+ result = -EINVAL;
+ goto fail_workqueue;
+ }
result = ieee80211_init_cipher_suites(local);
if (result < 0)
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 4bd8f3f056d8..8d33125518de 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -416,7 +416,7 @@ int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
struct ieee80211_local *local = sdata->local;
- enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+ enum nl80211_band band = ieee80211_get_sdata_band(sdata);
struct ieee80211_supported_band *sband;
u8 *pos;
@@ -479,7 +479,7 @@ int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
struct ieee80211_local *local = sdata->local;
- enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+ enum nl80211_band band = ieee80211_get_sdata_band(sdata);
struct ieee80211_supported_band *sband;
u8 *pos;
@@ -681,7 +681,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
struct ieee80211_mgmt *mgmt;
struct ieee80211_chanctx_conf *chanctx_conf;
struct mesh_csa_settings *csa;
- enum ieee80211_band band;
+ enum nl80211_band band;
u8 *pos;
struct ieee80211_sub_if_data *sdata;
int hdr_len = offsetof(struct ieee80211_mgmt, u.beacon) +
@@ -930,7 +930,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
struct cfg80211_csa_settings params;
struct ieee80211_csa_ie csa_ie;
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+ enum nl80211_band band = ieee80211_get_sdata_band(sdata);
int err;
u32 sta_flags;
@@ -1084,7 +1084,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
struct ieee80211_channel *channel;
size_t baselen;
int freq;
- enum ieee80211_band band = rx_status->band;
+ enum nl80211_band band = rx_status->band;
/* ignore ProbeResp to foreign address */
if (stype == IEEE80211_STYPE_PROBE_RESP &&
@@ -1114,8 +1114,15 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
return;
- if (mesh_matches_local(sdata, &elems))
- mesh_neighbour_update(sdata, mgmt->sa, &elems);
+ if (mesh_matches_local(sdata, &elems)) {
+ mpl_dbg(sdata, "rssi_threshold=%d,rx_status->signal=%d\n",
+ sdata->u.mesh.mshcfg.rssi_threshold, rx_status->signal);
+ if (!sdata->u.mesh.user_mpm ||
+ sdata->u.mesh.mshcfg.rssi_threshold == 0 ||
+ sdata->u.mesh.mshcfg.rssi_threshold < rx_status->signal)
+ mesh_neighbour_update(sdata, mgmt->sa, &elems,
+ rx_status);
+ }
if (ifmsh->sync_ops)
ifmsh->sync_ops->rx_bcn_presp(sdata,
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 4a8019f79fb2..7274e6719e8b 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -289,7 +289,8 @@ int mesh_gate_num(struct ieee80211_sub_if_data *sdata);
/* Mesh plinks */
void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
- u8 *hw_addr, struct ieee802_11_elems *ie);
+ u8 *hw_addr, struct ieee802_11_elems *ie,
+ struct ieee80211_rx_status *rx_status);
bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie);
u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata);
void mesh_plink_broken(struct sta_info *sta);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 466922f09d04..43edcba6d67b 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -1112,7 +1112,8 @@ int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata,
}
}
- if (!(mpath->flags & MESH_PATH_RESOLVING))
+ if (!(mpath->flags & MESH_PATH_RESOLVING) &&
+ mesh_path_sel_is_hwmp(sdata))
mesh_queue_preq(mpath, PREQ_Q_F_START);
if (skb_queue_len(&mpath->frame_queue) >= MESH_FRAME_QUEUE_LEN)
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 9f02e54ad2a5..51b5d98f6c5d 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -93,18 +93,18 @@ static inline void mesh_plink_fsm_restart(struct sta_info *sta)
static u32 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
- enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+ enum nl80211_band band = ieee80211_get_sdata_band(sdata);
struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
struct sta_info *sta;
u32 erp_rates = 0, changed = 0;
int i;
bool short_slot = false;
- if (band == IEEE80211_BAND_5GHZ) {
+ if (band == NL80211_BAND_5GHZ) {
/* (IEEE 802.11-2012 19.4.5) */
short_slot = true;
goto out;
- } else if (band != IEEE80211_BAND_2GHZ)
+ } else if (band != NL80211_BAND_2GHZ)
goto out;
for (i = 0; i < sband->n_bitrates; i++)
@@ -247,7 +247,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
mgmt->u.action.u.self_prot.action_code = action;
if (action != WLAN_SP_MESH_PEERING_CLOSE) {
- enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+ enum nl80211_band band = ieee80211_get_sdata_band(sdata);
/* capability info */
pos = skb_put(skb, 2);
@@ -383,7 +383,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems, bool insert)
{
struct ieee80211_local *local = sdata->local;
- enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+ enum nl80211_band band = ieee80211_get_sdata_band(sdata);
struct ieee80211_supported_band *sband;
u32 rates, basic_rates = 0, changed = 0;
enum ieee80211_sta_rx_bandwidth bw = sta->sta.bandwidth;
@@ -489,7 +489,8 @@ __mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *hw_addr)
static struct sta_info *
mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr,
- struct ieee802_11_elems *elems)
+ struct ieee802_11_elems *elems,
+ struct ieee80211_rx_status *rx_status)
{
struct sta_info *sta = NULL;
@@ -497,11 +498,17 @@ mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr,
if (sdata->u.mesh.user_mpm ||
sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED) {
if (mesh_peer_accepts_plinks(elems) &&
- mesh_plink_availables(sdata))
+ mesh_plink_availables(sdata)) {
+ int sig = 0;
+
+ if (ieee80211_hw_check(&sdata->local->hw, SIGNAL_DBM))
+ sig = rx_status->signal;
+
cfg80211_notify_new_peer_candidate(sdata->dev, addr,
elems->ie_start,
elems->total_len,
- GFP_KERNEL);
+ sig, GFP_KERNEL);
+ }
} else
sta = __mesh_sta_info_alloc(sdata, addr);
@@ -514,13 +521,15 @@ mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr,
* @sdata: local meshif
* @addr: peer's address
* @elems: IEs from beacon or mesh peering frame.
+ * @rx_status: rx status for the frame for signal reporting
*
* Return existing or newly allocated sta_info under RCU read lock.
* (re)initialize with given IEs.
*/
static struct sta_info *
mesh_sta_info_get(struct ieee80211_sub_if_data *sdata,
- u8 *addr, struct ieee802_11_elems *elems) __acquires(RCU)
+ u8 *addr, struct ieee802_11_elems *elems,
+ struct ieee80211_rx_status *rx_status) __acquires(RCU)
{
struct sta_info *sta = NULL;
@@ -531,7 +540,7 @@ mesh_sta_info_get(struct ieee80211_sub_if_data *sdata,
} else {
rcu_read_unlock();
/* can't run atomic */
- sta = mesh_sta_info_alloc(sdata, addr, elems);
+ sta = mesh_sta_info_alloc(sdata, addr, elems, rx_status);
if (!sta) {
rcu_read_lock();
return NULL;
@@ -552,17 +561,19 @@ mesh_sta_info_get(struct ieee80211_sub_if_data *sdata,
* @sdata: local meshif
* @addr: peer's address
* @elems: IEs from beacon or mesh peering frame
+ * @rx_status: rx status for the frame for signal reporting
*
* Initiates peering if appropriate.
*/
void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
u8 *hw_addr,
- struct ieee802_11_elems *elems)
+ struct ieee802_11_elems *elems,
+ struct ieee80211_rx_status *rx_status)
{
struct sta_info *sta;
u32 changed = 0;
- sta = mesh_sta_info_get(sdata, hw_addr, elems);
+ sta = mesh_sta_info_get(sdata, hw_addr, elems, rx_status);
if (!sta)
goto out;
@@ -1044,7 +1055,8 @@ out:
static void
mesh_process_plink_frame(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt,
- struct ieee802_11_elems *elems)
+ struct ieee802_11_elems *elems,
+ struct ieee80211_rx_status *rx_status)
{
struct sta_info *sta;
@@ -1109,7 +1121,7 @@ mesh_process_plink_frame(struct ieee80211_sub_if_data *sdata,
if (event == OPN_ACPT) {
rcu_read_unlock();
/* allocate sta entry if necessary and update info */
- sta = mesh_sta_info_get(sdata, mgmt->sa, elems);
+ sta = mesh_sta_info_get(sdata, mgmt->sa, elems, rx_status);
if (!sta) {
mpl_dbg(sdata, "Mesh plink: failed to init peer!\n");
goto unlock_rcu;
@@ -1175,5 +1187,5 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
return;
}
ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems);
- mesh_process_plink_frame(sdata, mgmt, &elems);
+ mesh_process_plink_frame(sdata, mgmt, &elems, rx_status);
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1e6dd3de116b..bae631cf549a 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -693,7 +693,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
capab = WLAN_CAPABILITY_ESS;
- if (sband->band == IEEE80211_BAND_2GHZ) {
+ if (sband->band == NL80211_BAND_2GHZ) {
capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME;
capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
}
@@ -1133,6 +1133,11 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_sub_if_data *sdata)
sdata->vif.csa_active = false;
ifmgd->csa_waiting_bcn = false;
+ /*
+ * If the CSA IE is still present on the beacon after the switch,
+ * we need to consider it as a new CSA (possibly to self).
+ */
+ ifmgd->beacon_crc_valid = false;
ret = drv_post_channel_switch(sdata);
if (ret) {
@@ -1182,7 +1187,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
struct cfg80211_bss *cbss = ifmgd->associated;
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *chanctx;
- enum ieee80211_band current_band;
+ enum nl80211_band current_band;
struct ieee80211_csa_ie csa_ie;
struct ieee80211_channel_switch ch_switch;
int res;
@@ -1339,11 +1344,11 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata,
default:
WARN_ON_ONCE(1);
/* fall through */
- case IEEE80211_BAND_2GHZ:
+ case NL80211_BAND_2GHZ:
case IEEE80211_BAND_60GHZ:
chan_increment = 1;
break;
- case IEEE80211_BAND_5GHZ:
+ case NL80211_BAND_5GHZ:
chan_increment = 4;
break;
}
@@ -1955,7 +1960,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
}
use_short_slot = !!(capab & WLAN_CAPABILITY_SHORT_SLOT_TIME);
- if (ieee80211_get_sdata_band(sdata) == IEEE80211_BAND_5GHZ)
+ if (ieee80211_get_sdata_band(sdata) == NL80211_BAND_5GHZ)
use_short_slot = true;
if (use_protection != bss_conf->use_cts_prot) {
@@ -2283,7 +2288,7 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
if (!ieee80211_is_data(hdr->frame_control))
return;
- if (ieee80211_is_nullfunc(hdr->frame_control) &&
+ if (ieee80211_is_any_nullfunc(hdr->frame_control) &&
sdata->u.mgd.probe_send_count > 0) {
if (ack)
ieee80211_sta_reset_conn_monitor(sdata);
@@ -4426,7 +4431,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
if (new_sta) {
u32 rates = 0, basic_rates = 0;
- bool have_higher_than_11mbit;
+ bool have_higher_than_11mbit = false;
int min_rate = INT_MAX, min_rate_index = -1;
struct ieee80211_chanctx_conf *chanctx_conf;
const struct cfg80211_bss_ies *ies;
@@ -4468,7 +4473,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
sdata->vif.bss_conf.basic_rates = basic_rates;
/* cf. IEEE 802.11 9.2.12 */
- if (cbss->channel->band == IEEE80211_BAND_2GHZ &&
+ if (cbss->channel->band == NL80211_BAND_2GHZ &&
have_higher_than_11mbit)
sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE;
else
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index b6be51940ead..af489405d5b3 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -308,11 +308,10 @@ void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free)
/* was never transmitted */
if (roc->frame) {
- cfg80211_mgmt_tx_status(&roc->sdata->wdev,
- (unsigned long)roc->frame,
+ cfg80211_mgmt_tx_status(&roc->sdata->wdev, roc->mgmt_tx_cookie,
roc->frame->data, roc->frame->len,
false, GFP_KERNEL);
- kfree_skb(roc->frame);
+ ieee80211_free_txskb(&roc->sdata->local->hw, roc->frame);
}
if (!roc->mgmt_tx_cookie)
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 24033c81f3d0..d9756da625f0 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -289,7 +289,7 @@ static void __rate_control_send_low(struct ieee80211_hw *hw,
u32 rate_flags =
ieee80211_chandef_rate_flags(&hw->conf.chandef);
- if ((sband->band == IEEE80211_BAND_2GHZ) &&
+ if ((sband->band == NL80211_BAND_2GHZ) &&
(info->flags & IEEE80211_TX_CTL_NO_CCK_RATE))
rate_flags |= IEEE80211_RATE_ERP_G;
@@ -890,7 +890,8 @@ int rate_control_set_rates(struct ieee80211_hw *hw,
if (old)
kfree_rcu(old, rcu_head);
- drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta);
+ if (sta->uploaded)
+ drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta);
return 0;
}
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index b54f398cda5d..e91386919399 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -274,7 +274,7 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband,
success = !!(info->flags & IEEE80211_TX_STAT_ACK);
for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
- if (ar[i].idx < 0)
+ if (ar[i].idx < 0 || !ar[i].count)
break;
ndx = rix_to_ndx(mi, ar[i].idx);
@@ -287,12 +287,6 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband,
mi->r[ndx].stats.success += success;
}
- if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && (i >= 0))
- mi->sample_packets++;
-
- if (mi->sample_deferred > 0)
- mi->sample_deferred--;
-
if (time_after(jiffies, mi->last_stats_update +
(mp->update_interval * HZ) / 1000))
minstrel_update_stats(mp, mi);
@@ -366,7 +360,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
#endif
delta = (mi->total_packets * sampling_ratio / 100) -
- (mi->sample_packets + mi->sample_deferred / 2);
+ mi->sample_packets;
/* delta < 0: no sampling required */
prev_sample = mi->prev_sample;
@@ -375,7 +369,6 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
return;
if (mi->total_packets >= 10000) {
- mi->sample_deferred = 0;
mi->sample_packets = 0;
mi->total_packets = 0;
} else if (delta > mi->n_rates * 2) {
@@ -400,19 +393,8 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
* rate sampling method should be used.
* Respect such rates that are not sampled for 20 interations.
*/
- if (mrr_capable &&
- msr->perfect_tx_time > mr->perfect_tx_time &&
- msr->stats.sample_skipped < 20) {
- /* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark
- * packets that have the sampling rate deferred to the
- * second MRR stage. Increase the sample counter only
- * if the deferred sample rate was actually used.
- * Use the sample_deferred counter to make sure that
- * the sampling is not done in large bursts */
- info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
- rate++;
- mi->sample_deferred++;
- } else {
+ if (msr->perfect_tx_time < mr->perfect_tx_time ||
+ msr->stats.sample_skipped >= 20) {
if (!msr->sample_limit)
return;
@@ -432,11 +414,12 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
rate->idx = mi->r[ndx].rix;
rate->count = minstrel_get_retry_count(&mi->r[ndx], info);
+ info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
}
static void
-calc_rate_durations(enum ieee80211_band band,
+calc_rate_durations(enum nl80211_band band,
struct minstrel_rate *d,
struct ieee80211_rate *rate,
struct cfg80211_chan_def *chandef)
@@ -579,7 +562,7 @@ minstrel_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
if (!mi)
return NULL;
- for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
+ for (i = 0; i < NUM_NL80211_BANDS; i++) {
sband = hw->wiphy->bands[i];
if (sband && sband->n_bitrates > max_rates)
max_rates = sband->n_bitrates;
@@ -621,7 +604,7 @@ minstrel_init_cck_rates(struct minstrel_priv *mp)
u32 rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef);
int i, j;
- sband = mp->hw->wiphy->bands[IEEE80211_BAND_2GHZ];
+ sband = mp->hw->wiphy->bands[NL80211_BAND_2GHZ];
if (!sband)
return;
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index c230bbe93262..5a9e44f4fba4 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -105,7 +105,6 @@ struct minstrel_sta_info {
u8 max_prob_rate;
unsigned int total_packets;
unsigned int sample_packets;
- int sample_deferred;
unsigned int sample_row;
unsigned int sample_column;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index ff3b28e7dbce..c15427895d64 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -546,7 +546,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
/* (re)Initialize group rate indexes */
for(j = 0; j < MAX_THR_RATES; j++)
- tmp_group_tp_rate[j] = group;
+ tmp_group_tp_rate[j] = MCS_GROUP_RATES * group;
for (i = 0; i < MCS_GROUP_RATES; i++) {
if (!(mg->supported & BIT(i)))
@@ -1073,7 +1073,7 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
{
int i;
- if (sband->band != IEEE80211_BAND_2GHZ)
+ if (sband->band != NL80211_BAND_2GHZ)
return;
if (!ieee80211_hw_check(mp->hw, SUPPORTS_HT_CCK_RATES))
@@ -1271,7 +1271,7 @@ minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
int max_rates = 0;
int i;
- for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
+ for (i = 0; i < NUM_NL80211_BANDS; i++) {
sband = hw->wiphy->bands[i];
if (sband && sband->n_bitrates > max_rates)
max_rates = sband->n_bitrates;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 6676b413d049..7c05981bdee9 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -321,7 +321,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
else if (status->flag & RX_FLAG_5MHZ)
channel_flags |= IEEE80211_CHAN_QUARTER;
- if (status->band == IEEE80211_BAND_5GHZ)
+ if (status->band == NL80211_BAND_5GHZ)
channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ;
else if (status->flag & (RX_FLAG_HT | RX_FLAG_VHT))
channel_flags |= IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ;
@@ -1111,8 +1111,7 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx)
return RX_CONTINUE;
if (ieee80211_is_ctl(hdr->frame_control) ||
- ieee80211_is_nullfunc(hdr->frame_control) ||
- ieee80211_is_qos_nullfunc(hdr->frame_control) ||
+ ieee80211_is_any_nullfunc(hdr->frame_control) ||
is_multicast_ether_addr(hdr->addr1))
return RX_CONTINUE;
@@ -1488,8 +1487,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
* Drop (qos-)data::nullfunc frames silently, since they
* are used only to control station power saving mode.
*/
- if (ieee80211_is_nullfunc(hdr->frame_control) ||
- ieee80211_is_qos_nullfunc(hdr->frame_control)) {
+ if (ieee80211_is_any_nullfunc(hdr->frame_control)) {
I802_DEBUG_INC(rx->local->rx_handlers_drop_nullfunc);
/*
@@ -1741,19 +1739,34 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
return result;
}
+void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cache->entries); i++)
+ skb_queue_head_init(&cache->entries[i].skb_list);
+}
+
+void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cache->entries); i++)
+ __skb_queue_purge(&cache->entries[i].skb_list);
+}
+
static inline struct ieee80211_fragment_entry *
-ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
+ieee80211_reassemble_add(struct ieee80211_fragment_cache *cache,
unsigned int frag, unsigned int seq, int rx_queue,
struct sk_buff **skb)
{
struct ieee80211_fragment_entry *entry;
- entry = &sdata->fragments[sdata->fragment_next++];
- if (sdata->fragment_next >= IEEE80211_FRAGMENT_MAX)
- sdata->fragment_next = 0;
+ entry = &cache->entries[cache->next++];
+ if (cache->next >= IEEE80211_FRAGMENT_MAX)
+ cache->next = 0;
- if (!skb_queue_empty(&entry->skb_list))
- __skb_queue_purge(&entry->skb_list);
+ __skb_queue_purge(&entry->skb_list);
__skb_queue_tail(&entry->skb_list, *skb); /* no need for locking */
*skb = NULL;
@@ -1768,14 +1781,14 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
}
static inline struct ieee80211_fragment_entry *
-ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
+ieee80211_reassemble_find(struct ieee80211_fragment_cache *cache,
unsigned int frag, unsigned int seq,
int rx_queue, struct ieee80211_hdr *hdr)
{
struct ieee80211_fragment_entry *entry;
int i, idx;
- idx = sdata->fragment_next;
+ idx = cache->next;
for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) {
struct ieee80211_hdr *f_hdr;
@@ -1783,7 +1796,7 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
if (idx < 0)
idx = IEEE80211_FRAGMENT_MAX - 1;
- entry = &sdata->fragments[idx];
+ entry = &cache->entries[idx];
if (skb_queue_empty(&entry->skb_list) || entry->seq != seq ||
entry->rx_queue != rx_queue ||
entry->last_frag + 1 != frag)
@@ -1810,16 +1823,27 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
return NULL;
}
+static bool requires_sequential_pn(struct ieee80211_rx_data *rx, __le16 fc)
+{
+ return rx->key &&
+ (rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP ||
+ rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 ||
+ rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP ||
+ rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) &&
+ ieee80211_has_protected(fc);
+}
+
static ieee80211_rx_result debug_noinline
ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
{
+ struct ieee80211_fragment_cache *cache = &rx->sdata->frags;
struct ieee80211_hdr *hdr;
u16 sc;
__le16 fc;
unsigned int frag, seq;
struct ieee80211_fragment_entry *entry;
struct sk_buff *skb;
- struct ieee80211_rx_status *status;
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
hdr = (struct ieee80211_hdr *)rx->skb->data;
fc = hdr->frame_control;
@@ -1830,14 +1854,15 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
sc = le16_to_cpu(hdr->seq_ctrl);
frag = sc & IEEE80211_SCTL_FRAG;
- if (is_multicast_ether_addr(hdr->addr1)) {
- I802_DEBUG_INC(rx->local->dot11MulticastReceivedFrameCount);
- goto out_no_led;
- }
+ if (rx->sta)
+ cache = &rx->sta->frags;
if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
goto out;
+ if (is_multicast_ether_addr(hdr->addr1))
+ return RX_DROP_MONITOR;
+
I802_DEBUG_INC(rx->local->rx_handlers_fragments);
if (skb_linearize(rx->skb))
@@ -1853,20 +1878,17 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
if (frag == 0) {
/* This is the first fragment of a new frame. */
- entry = ieee80211_reassemble_add(rx->sdata, frag, seq,
+ entry = ieee80211_reassemble_add(cache, frag, seq,
rx->seqno_idx, &(rx->skb));
- if (rx->key &&
- (rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP ||
- rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 ||
- rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP ||
- rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) &&
- ieee80211_has_protected(fc)) {
+ if (requires_sequential_pn(rx, fc)) {
int queue = rx->security_idx;
/* Store CCMP/GCMP PN so that we can verify that the
* next fragment has a sequential PN value.
*/
entry->check_sequential_pn = true;
+ entry->is_protected = true;
+ entry->key_color = rx->key->color;
memcpy(entry->last_pn,
rx->key->u.ccmp.rx_pn[queue],
IEEE80211_CCMP_PN_LEN);
@@ -1878,6 +1900,11 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
sizeof(rx->key->u.gcmp.rx_pn[queue]));
BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN !=
IEEE80211_GCMP_PN_LEN);
+ } else if (rx->key &&
+ (ieee80211_has_protected(fc) ||
+ (status->flag & RX_FLAG_DECRYPTED))) {
+ entry->is_protected = true;
+ entry->key_color = rx->key->color;
}
return RX_QUEUED;
}
@@ -1885,7 +1912,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
/* This is a fragment for a frame that should already be pending in
* fragment cache. Add this fragment to the end of the pending entry.
*/
- entry = ieee80211_reassemble_find(rx->sdata, frag, seq,
+ entry = ieee80211_reassemble_find(cache, frag, seq,
rx->seqno_idx, hdr);
if (!entry) {
I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag);
@@ -1900,25 +1927,39 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
if (entry->check_sequential_pn) {
int i;
u8 pn[IEEE80211_CCMP_PN_LEN], *rpn;
- int queue;
- if (!rx->key ||
- (rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP &&
- rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256 &&
- rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP &&
- rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP_256))
+ if (!requires_sequential_pn(rx, fc))
+ return RX_DROP_UNUSABLE;
+
+ /* Prevent mixed key and fragment cache attacks */
+ if (entry->key_color != rx->key->color)
return RX_DROP_UNUSABLE;
+
memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN);
for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) {
pn[i]++;
if (pn[i])
break;
}
- queue = rx->security_idx;
- rpn = rx->key->u.ccmp.rx_pn[queue];
+
+ rpn = rx->ccm_gcm.pn;
if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN))
return RX_DROP_UNUSABLE;
memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN);
+ } else if (entry->is_protected &&
+ (!rx->key ||
+ (!ieee80211_has_protected(fc) &&
+ !(status->flag & RX_FLAG_DECRYPTED)) ||
+ rx->key->color != entry->key_color)) {
+ /* Drop this as a mixed key or fragment cache attack, even
+ * if for TKIP Michael MIC should protect us, and WEP is a
+ * lost cause anyway.
+ */
+ return RX_DROP_UNUSABLE;
+ } else if (entry->is_protected && rx->key &&
+ entry->key_color != rx->key->color &&
+ (status->flag & RX_FLAG_DECRYPTED)) {
+ return RX_DROP_UNUSABLE;
}
skb_pull(rx->skb, ieee80211_hdrlen(fc));
@@ -1950,7 +1991,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
out:
ieee80211_led_rx(rx->local);
- out_no_led:
if (rx->sta)
rx->sta->rx_stats.packets++;
return RX_CONTINUE;
@@ -1966,6 +2006,7 @@ static int ieee80211_802_1x_port_control(struct ieee80211_rx_data *rx)
static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc)
{
+ struct ieee80211_hdr *hdr = (void *)rx->skb->data;
struct sk_buff *skb = rx->skb;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
@@ -1976,9 +2017,34 @@ static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc)
if (status->flag & RX_FLAG_DECRYPTED)
return 0;
+ /* check mesh EAPOL frames first */
+ if (unlikely(rx->sta && ieee80211_vif_is_mesh(&rx->sdata->vif) &&
+ ieee80211_is_data(fc))) {
+ struct ieee80211s_hdr *mesh_hdr;
+ u16 hdr_len = ieee80211_hdrlen(fc);
+ u16 ethertype_offset;
+ __be16 ethertype;
+
+ if (!ether_addr_equal(hdr->addr1, rx->sdata->vif.addr))
+ goto drop_check;
+
+ /* make sure fixed part of mesh header is there, also checks skb len */
+ if (!pskb_may_pull(rx->skb, hdr_len + 6))
+ goto drop_check;
+
+ mesh_hdr = (struct ieee80211s_hdr *)(skb->data + hdr_len);
+ ethertype_offset = hdr_len + ieee80211_get_mesh_hdrlen(mesh_hdr) +
+ sizeof(rfc1042_header);
+
+ if (skb_copy_bits(rx->skb, ethertype_offset, &ethertype, 2) == 0 &&
+ ethertype == rx->sdata->control_port_protocol)
+ return 0;
+ }
+
+drop_check:
/* Drop unencrypted frames if key is set. */
if (unlikely(!ieee80211_has_protected(fc) &&
- !ieee80211_is_nullfunc(fc) &&
+ !ieee80211_is_any_nullfunc(fc) &&
ieee80211_is_data(fc) && rx->key))
return -EACCES;
@@ -2081,13 +2147,13 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc)
struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
/*
- * Allow EAPOL frames to us/the PAE group address regardless
- * of whether the frame was encrypted or not.
+ * Allow EAPOL frames to us/the PAE group address regardless of
+ * whether the frame was encrypted or not, and always disallow
+ * all other destination addresses for them.
*/
- if (ehdr->h_proto == rx->sdata->control_port_protocol &&
- (ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) ||
- ether_addr_equal(ehdr->h_dest, pae_group_addr)))
- return true;
+ if (unlikely(ehdr->h_proto == rx->sdata->control_port_protocol))
+ return ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) ||
+ ether_addr_equal(ehdr->h_dest, pae_group_addr);
if (ieee80211_802_1x_port_control(rx) ||
ieee80211_drop_unencrypted(rx, fc))
@@ -2116,6 +2182,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
if ((sdata->vif.type == NL80211_IFTYPE_AP ||
sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
!(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
+ ehdr->h_proto != rx->sdata->control_port_protocol &&
(sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) {
if (is_multicast_ether_addr(ehdr->h_dest)) {
/*
@@ -2168,9 +2235,30 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
#endif
if (skb) {
+ struct ethhdr *ehdr = (struct ethhdr *)skb->data;
+
/* deliver to local stack */
skb->protocol = eth_type_trans(skb, dev);
memset(skb->cb, 0, sizeof(skb->cb));
+
+ /*
+ * 802.1X over 802.11 requires that the authenticator address
+ * be used for EAPOL frames. However, 802.1X allows the use of
+ * the PAE group address instead. If the interface is part of
+ * a bridge and we pass the frame with the PAE group address,
+ * then the bridge will forward it to the network (even if the
+ * client was not associated yet), which isn't supposed to
+ * happen.
+ * To avoid that, rewrite the destination address to our own
+ * address, so that the authenticator (e.g. hostapd) will see
+ * the frame, but bridge won't forward it anywhere else. Note
+ * that due to earlier filtering, the only other address can
+ * be the PAE group address.
+ */
+ if (unlikely(skb->protocol == sdata->control_port_protocol &&
+ !ether_addr_equal(ehdr->h_dest, sdata->vif.addr)))
+ ether_addr_copy(ehdr->h_dest, sdata->vif.addr);
+
if (rx->napi)
napi_gro_receive(rx->napi, skb);
else
@@ -2234,6 +2322,23 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
if (skb_linearize(skb))
return RX_DROP_UNUSABLE;
+ if (rx->key) {
+ /*
+ * We should not receive A-MSDUs on pre-HT connections,
+ * and HT connections cannot use old ciphers. Thus drop
+ * them, as in those cases we couldn't even have SPP
+ * A-MSDUs or such.
+ */
+ switch (rx->key->conf.cipher) {
+ case WLAN_CIPHER_SUITE_WEP40:
+ case WLAN_CIPHER_SUITE_WEP104:
+ case WLAN_CIPHER_SUITE_TKIP:
+ return RX_DROP_UNUSABLE;
+ default:
+ break;
+ }
+ }
+
ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
rx->sdata->vif.type,
rx->local->hw.extra_tx_headroom, true);
@@ -2817,7 +2922,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
switch (mgmt->u.action.u.measurement.action_code) {
case WLAN_ACTION_SPCT_MSR_REQ:
- if (status->band != IEEE80211_BAND_5GHZ)
+ if (status->band != NL80211_BAND_5GHZ)
break;
if (len < (IEEE80211_MIN_ACTION_SIZE +
@@ -3041,9 +3146,18 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
case cpu_to_le16(IEEE80211_STYPE_PROBE_RESP):
/* process for all: mesh, mlme, ibss */
break;
+ case cpu_to_le16(IEEE80211_STYPE_DEAUTH):
+ if (is_multicast_ether_addr(mgmt->da) &&
+ !is_broadcast_ether_addr(mgmt->da))
+ return RX_DROP_MONITOR;
+
+ /* process only for station/IBSS */
+ if (sdata->vif.type != NL80211_IFTYPE_STATION &&
+ sdata->vif.type != NL80211_IFTYPE_ADHOC)
+ return RX_DROP_MONITOR;
+ break;
case cpu_to_le16(IEEE80211_STYPE_ASSOC_RESP):
case cpu_to_le16(IEEE80211_STYPE_REASSOC_RESP):
- case cpu_to_le16(IEEE80211_STYPE_DEAUTH):
case cpu_to_le16(IEEE80211_STYPE_DISASSOC):
if (is_multicast_ether_addr(mgmt->da) &&
!is_broadcast_ether_addr(mgmt->da))
@@ -3334,7 +3448,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
if (!bssid)
return false;
if (ether_addr_equal(sdata->vif.addr, hdr->addr2) ||
- ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2))
+ ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2) ||
+ !is_valid_ether_addr(hdr->addr2))
return false;
if (ieee80211_is_beacon(hdr->frame_control))
return true;
@@ -3623,7 +3738,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
WARN_ON_ONCE(softirq_count() == 0);
- if (WARN_ON(status->band >= IEEE80211_NUM_BANDS))
+ if (WARN_ON(status->band >= NUM_NL80211_BANDS))
goto drop;
sband = local->hw.wiphy->bands[status->band];
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index acbe182b75d1..efc28f6b25c3 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -270,7 +270,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
n_chans = req->n_channels;
} else {
do {
- if (local->hw_scan_band == IEEE80211_NUM_BANDS)
+ if (local->hw_scan_band == NUM_NL80211_BANDS)
return false;
n_chans = 0;
@@ -482,7 +482,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,
int i;
struct ieee80211_sub_if_data *sdata;
struct cfg80211_scan_request *scan_req;
- enum ieee80211_band band = local->hw.conf.chandef.chan->band;
+ enum nl80211_band band = local->hw.conf.chandef.chan->band;
u32 tx_flags;
scan_req = rcu_dereference_protected(local->scan_req,
@@ -949,7 +949,7 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
int ret = -EBUSY, i, n_ch = 0;
- enum ieee80211_band band;
+ enum nl80211_band band;
mutex_lock(&local->mtx);
@@ -961,7 +961,7 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
if (!channels) {
int max_n;
- for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
if (!local->hw.wiphy->bands[band])
continue;
@@ -1081,7 +1081,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
struct ieee80211_scan_ies sched_scan_ies = {};
struct cfg80211_chan_def chandef;
int ret, i, iebufsz, num_bands = 0;
- u32 rate_masks[IEEE80211_NUM_BANDS] = {};
+ u32 rate_masks[NUM_NL80211_BANDS] = {};
u8 bands_used = 0;
u8 *ie;
size_t len;
@@ -1093,7 +1093,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
if (!local->ops->sched_scan_start)
return -ENOTSUPP;
- for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
+ for (i = 0; i < NUM_NL80211_BANDS; i++) {
if (local->hw.wiphy->bands[i]) {
bands_used |= BIT(i);
rate_masks[i] = (u32) -1;
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 06e6ac8cc693..2ddc661f0988 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -23,11 +23,11 @@
int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems,
- enum ieee80211_band current_band,
+ enum nl80211_band current_band,
u32 sta_flags, u8 *bssid,
struct ieee80211_csa_ie *csa_ie)
{
- enum ieee80211_band new_band;
+ enum nl80211_band new_band;
int new_freq;
u8 new_chan_no;
struct ieee80211_channel *new_chan;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index a450be6812b8..5f2c8aeb9bd3 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -2,6 +2,7 @@
* Copyright 2002-2005, Instant802 Networks, Inc.
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright (C) 2018-2020 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -242,6 +243,24 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata,
*/
void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
{
+ /*
+ * If we had used sta_info_pre_move_state() then we might not
+ * have gone through the state transitions down again, so do
+ * it here now (and warn if it's inserted).
+ *
+ * This will clear state such as fast TX/RX that may have been
+ * allocated during state transitions.
+ */
+ while (sta->sta_state > IEEE80211_STA_NONE) {
+ int ret;
+
+ WARN_ON_ONCE(test_sta_flag(sta, WLAN_STA_INSERTED));
+
+ ret = sta_info_move_state(sta, sta->sta_state - 1);
+ if (WARN_ONCE(ret, "sta_info_move_state() returned %d\n", ret))
+ break;
+ }
+
if (sta->rate_ctrl)
rate_control_free_sta(sta);
@@ -337,6 +356,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
sta->sdata = sdata;
sta->rx_stats.last_rx = jiffies;
+ ieee80211_init_frag_cache(&sta->frags);
+
sta->sta_state = IEEE80211_STA_NONE;
/* Mark TID as unreserved */
@@ -439,6 +460,19 @@ static int sta_info_insert_check(struct sta_info *sta)
is_multicast_ether_addr(sta->sta.addr)))
return -EINVAL;
+ /* Strictly speaking this isn't necessary as we hold the mutex, but
+ * the rhashtable code can't really deal with that distinction. We
+ * do require the mutex for correctness though.
+ */
+ rcu_read_lock();
+ lockdep_assert_held(&sdata->local->sta_mtx);
+ if (ieee80211_hw_check(&sdata->local->hw, NEEDS_UNIQUE_STA_ADDR) &&
+ ieee80211_find_sta_by_ifaddr(&sdata->local->hw, sta->addr, NULL)) {
+ rcu_read_unlock();
+ return -ENOTUNIQ;
+ }
+ rcu_read_unlock();
+
return 0;
}
@@ -552,9 +586,10 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
out_drop_sta:
local->num_sta--;
synchronize_net();
- __cleanup_single_sta(sta);
+ cleanup_single_sta(sta);
out_err:
mutex_unlock(&local->sta_mtx);
+ kfree(sinfo);
rcu_read_lock();
return err;
}
@@ -566,22 +601,17 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
might_sleep();
+ mutex_lock(&local->sta_mtx);
+
err = sta_info_insert_check(sta);
if (err) {
+ sta_info_free(local, sta);
+ mutex_unlock(&local->sta_mtx);
rcu_read_lock();
- goto out_free;
+ return err;
}
- mutex_lock(&local->sta_mtx);
-
- err = sta_info_insert_finish(sta);
- if (err)
- goto out_free;
-
- return 0;
- out_free:
- sta_info_free(local, sta);
- return err;
+ return sta_info_insert_finish(sta);
}
int sta_info_insert(struct sta_info *sta)
@@ -905,6 +935,11 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
might_sleep();
lockdep_assert_held(&local->sta_mtx);
+ if (sta->sta_state == IEEE80211_STA_AUTHORIZED) {
+ ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
+ WARN_ON_ONCE(ret);
+ }
+
/* now keys can no longer be reached */
ieee80211_free_sta_keys(local, sta);
@@ -942,6 +977,8 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
ieee80211_sta_debugfs_remove(sta);
ieee80211_recalc_min_chandef(sdata);
+ ieee80211_destroy_frag_cache(&sta->frags);
+
cleanup_single_sta(sta);
}
@@ -1776,6 +1813,10 @@ int sta_info_move_state(struct sta_info *sta,
set_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
ieee80211_check_fast_xmit(sta);
}
+ if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
+ sta->sdata->vif.type == NL80211_IFTYPE_AP)
+ cfg80211_send_layer2_update(sta->sdata->dev,
+ sta->sta.addr);
break;
default:
break;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 15b0150283b6..b2e5928b1f7b 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -324,6 +324,34 @@ struct mesh_sta {
DECLARE_EWMA(signal, 1024, 8)
+/*
+ * IEEE 802.11-2016 (10.6 "Defragmentation") recommends support for "concurrent
+ * reception of at least one MSDU per access category per associated STA"
+ * on APs, or "at least one MSDU per access category" on other interface types.
+ *
+ * This limit can be increased by changing this define, at the cost of slower
+ * frame reassembly and increased memory use while fragments are pending.
+ */
+#define IEEE80211_FRAGMENT_MAX 4
+
+struct ieee80211_fragment_entry {
+ struct sk_buff_head skb_list;
+ unsigned long first_frag_time;
+ u16 seq;
+ u16 extra_len;
+ u16 last_frag;
+ u8 rx_queue;
+ u8 check_sequential_pn:1, /* needed for CCMP/GCMP */
+ is_protected:1;
+ u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
+ unsigned int key_color;
+};
+
+struct ieee80211_fragment_cache {
+ struct ieee80211_fragment_entry entries[IEEE80211_FRAGMENT_MAX];
+ unsigned int next;
+};
+
/**
* struct sta_info - STA information
*
@@ -384,6 +412,7 @@ DECLARE_EWMA(signal, 1024, 8)
* @tx_stats: TX statistics
* @rx_stats: RX statistics
* @status_stats: TX status statistics
+ * @frags: fragment cache
*/
struct sta_info {
/* General information, mostly static */
@@ -493,6 +522,8 @@ struct sta_info {
struct cfg80211_chan_def tdls_chandef;
+ struct ieee80211_fragment_cache frags;
+
/* keep last! */
struct ieee80211_sta sta;
};
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index d221300e59e5..618479e0d648 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -474,8 +474,7 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
rcu_read_lock();
sdata = ieee80211_sdata_from_skb(local, skb);
if (sdata) {
- if (ieee80211_is_nullfunc(hdr->frame_control) ||
- ieee80211_is_qos_nullfunc(hdr->frame_control))
+ if (ieee80211_is_any_nullfunc(hdr->frame_control))
cfg80211_probe_status(sdata->dev, hdr->addr1,
cookie, acked,
GFP_ATOMIC);
@@ -905,7 +904,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
I802_DEBUG_INC(local->dot11FailedCount);
}
- if (ieee80211_is_nullfunc(fc) && ieee80211_has_pm(fc) &&
+ if (ieee80211_is_any_nullfunc(fc) && ieee80211_has_pm(fc) &&
ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS) &&
!(info->flags & IEEE80211_TX_CTL_INJECTED) &&
local->ps_sdata && !(local->scanning)) {
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index ce2ece424384..9eeacf4db494 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -4,7 +4,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2014, Intel Corporation
* Copyright 2014 Intel Mobile Communications GmbH
- * Copyright 2015 Intel Deutschland GmbH
+ * Copyright 2015 - 2016 Intel Deutschland GmbH
*
* This file is GPLv2 as found in COPYING.
*/
@@ -15,6 +15,7 @@
#include <linux/rtnetlink.h>
#include "ieee80211_i.h"
#include "driver-ops.h"
+#include "rate.h"
/* give usermode some time for retries in setting up the TDLS session */
#define TDLS_PEER_SETUP_TIMEOUT (15 * HZ)
@@ -46,7 +47,7 @@ static void ieee80211_tdls_add_ext_capab(struct ieee80211_sub_if_data *sdata,
NL80211_FEATURE_TDLS_CHANNEL_SWITCH;
bool wider_band = ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) &&
!ifmgd->tdls_wider_bw_prohibited;
- enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+ enum nl80211_band band = ieee80211_get_sdata_band(sdata);
struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
bool vht = sband && sband->vht_cap.vht_supported;
u8 *pos = (void *)skb_put(skb, 10);
@@ -183,7 +184,7 @@ static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata,
if (status_code != 0)
return 0;
- if (ieee80211_get_sdata_band(sdata) == IEEE80211_BAND_2GHZ) {
+ if (ieee80211_get_sdata_band(sdata) == NL80211_BAND_2GHZ) {
return WLAN_CAPABILITY_SHORT_SLOT_TIME |
WLAN_CAPABILITY_SHORT_PREAMBLE;
}
@@ -302,7 +303,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata,
/* IEEE802.11ac-2013 Table E-4 */
u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 };
struct cfg80211_chan_def uc = sta->tdls_chandef;
- enum nl80211_chan_width max_width = ieee80211_get_sta_bw(&sta->sta);
+ enum nl80211_chan_width max_width = ieee80211_sta_cap_chan_bw(sta);
int i;
/* only support upgrading non-narrow channels up to 80Mhz */
@@ -313,7 +314,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata,
if (max_width > NL80211_CHAN_WIDTH_80)
max_width = NL80211_CHAN_WIDTH_80;
- if (uc.width == max_width)
+ if (uc.width >= max_width)
return;
/*
* Channel usage constrains in the IEEE802.11ac-2013 specification only
@@ -324,6 +325,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata,
for (i = 0; i < ARRAY_SIZE(centers_80mhz); i++)
if (abs(uc.chan->center_freq - centers_80mhz[i]) <= 30) {
uc.center_freq1 = centers_80mhz[i];
+ uc.center_freq2 = 0;
uc.width = NL80211_CHAN_WIDTH_80;
break;
}
@@ -332,7 +334,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata,
return;
/* proceed to downgrade the chandef until usable or the same */
- while (uc.width > max_width &&
+ while (uc.width > max_width ||
!cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &uc,
sdata->wdev.iftype))
ieee80211_chandef_downgrade(&uc);
@@ -355,7 +357,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
u8 action_code, bool initiator,
const u8 *extra_ies, size_t extra_ies_len)
{
- enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+ enum nl80211_band band = ieee80211_get_sdata_band(sdata);
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
struct ieee80211_sta_ht_cap ht_cap;
@@ -542,7 +544,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
size_t offset = 0, noffset;
struct sta_info *sta, *ap_sta;
- enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+ enum nl80211_band band = ieee80211_get_sdata_band(sdata);
u8 *pos;
mutex_lock(&local->sta_mtx);
@@ -609,7 +611,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator);
/* only include VHT-operation if not on the 2.4GHz band */
- if (band != IEEE80211_BAND_2GHZ && sta->sta.vht_cap.vht_supported) {
+ if (band != NL80211_BAND_2GHZ && sta->sta.vht_cap.vht_supported) {
/*
* if both peers support WIDER_BW, we can expand the chandef to
* a wider compatible one, up to 80MHz
@@ -1242,18 +1244,44 @@ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
return ret;
}
-static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata)
+static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *ctx;
+ enum nl80211_chan_width width;
+ struct ieee80211_supported_band *sband;
mutex_lock(&local->chanctx_mtx);
conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
lockdep_is_held(&local->chanctx_mtx));
if (conf) {
+ width = conf->def.width;
+ sband = local->hw.wiphy->bands[conf->def.chan->band];
ctx = container_of(conf, struct ieee80211_chanctx, conf);
ieee80211_recalc_chanctx_chantype(local, ctx);
+
+ /* if width changed and a peer is given, update its BW */
+ if (width != conf->def.width && sta &&
+ test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW)) {
+ enum ieee80211_sta_rx_bandwidth bw;
+
+ bw = ieee80211_chan_width_to_rx_bw(conf->def.width);
+ bw = min(bw, ieee80211_sta_cap_rx_bw(sta));
+ if (bw != sta->sta.bandwidth) {
+ sta->sta.bandwidth = bw;
+ rate_control_rate_update(local, sband, sta,
+ IEEE80211_RC_BW_CHANGED);
+ /*
+ * if a TDLS peer BW was updated, we need to
+ * recalc the chandef width again, to get the
+ * correct chanctx min_def
+ */
+ ieee80211_recalc_chanctx_chantype(local, ctx);
+ }
+ }
+
}
mutex_unlock(&local->chanctx_mtx);
}
@@ -1350,8 +1378,6 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
break;
}
- iee80211_tdls_recalc_chanctx(sdata);
-
mutex_lock(&local->sta_mtx);
sta = sta_info_get(sdata, peer);
if (!sta) {
@@ -1360,6 +1386,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
break;
}
+ iee80211_tdls_recalc_chanctx(sdata, sta);
iee80211_tdls_recalc_ht_protection(sdata, sta);
set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH);
@@ -1390,7 +1417,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
iee80211_tdls_recalc_ht_protection(sdata, NULL);
mutex_unlock(&local->sta_mtx);
- iee80211_tdls_recalc_chanctx(sdata);
+ iee80211_tdls_recalc_chanctx(sdata, NULL);
break;
default:
ret = -ENOTSUPP;
@@ -1746,7 +1773,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
u8 target_channel, oper_class;
bool local_initiator;
struct sta_info *sta;
- enum ieee80211_band band;
+ enum nl80211_band band;
struct ieee80211_tdls_data *tf = (void *)skb->data;
struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
int baselen = offsetof(typeof(*tf), u.chan_switch_req.variable);
@@ -1778,10 +1805,10 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
if ((oper_class == 112 || oper_class == 2 || oper_class == 3 ||
oper_class == 4 || oper_class == 5 || oper_class == 6) &&
target_channel < 14)
- band = IEEE80211_BAND_5GHZ;
+ band = NL80211_BAND_5GHZ;
else
- band = target_channel < 14 ? IEEE80211_BAND_2GHZ :
- IEEE80211_BAND_5GHZ;
+ band = target_channel < 14 ? NL80211_BAND_2GHZ :
+ NL80211_BAND_5GHZ;
freq = ieee80211_channel_to_frequency(target_channel, band);
if (freq == 0) {
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 913e959b03cf..8000864ddfc6 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -396,7 +396,7 @@ TRACE_EVENT(drv_bss_info_changed,
__field(u32, sync_device_ts)
__field(u8, sync_dtim_count)
__field(u32, basic_rates)
- __array(int, mcast_rate, IEEE80211_NUM_BANDS)
+ __array(int, mcast_rate, NUM_NL80211_BANDS)
__field(u16, ht_operation_mode)
__field(s32, cqm_rssi_thold);
__field(s32, cqm_rssi_hyst);
@@ -1253,8 +1253,8 @@ TRACE_EVENT(drv_set_bitrate_mask,
TP_fast_assign(
LOCAL_ASSIGN;
VIF_ASSIGN;
- __entry->legacy_2g = mask->control[IEEE80211_BAND_2GHZ].legacy;
- __entry->legacy_5g = mask->control[IEEE80211_BAND_5GHZ].legacy;
+ __entry->legacy_2g = mask->control[NL80211_BAND_2GHZ].legacy;
+ __entry->legacy_5g = mask->control[NL80211_BAND_5GHZ].legacy;
),
TP_printk(
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1e43c597db17..89eb87474fdf 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -150,7 +150,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
rate = DIV_ROUND_UP(r->bitrate, 1 << shift);
switch (sband->band) {
- case IEEE80211_BAND_2GHZ: {
+ case NL80211_BAND_2GHZ: {
u32 flag;
if (tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
flag = IEEE80211_RATE_MANDATORY_G;
@@ -160,13 +160,13 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
mrate = r->bitrate;
break;
}
- case IEEE80211_BAND_5GHZ:
+ case NL80211_BAND_5GHZ:
if (r->flags & IEEE80211_RATE_MANDATORY_A)
mrate = r->bitrate;
break;
case IEEE80211_BAND_60GHZ:
/* TODO, for now fall through */
- case IEEE80211_NUM_BANDS:
+ case NUM_NL80211_BANDS:
WARN_ON(1);
break;
}
@@ -291,7 +291,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
if (unlikely(test_bit(SCAN_SW_SCANNING, &tx->local->scanning)) &&
test_bit(SDATA_STATE_OFFCHANNEL, &tx->sdata->state) &&
!ieee80211_is_probe_req(hdr->frame_control) &&
- !ieee80211_is_nullfunc(hdr->frame_control))
+ !ieee80211_is_any_nullfunc(hdr->frame_control))
/*
* When software scanning only nullfunc frames (to notify
* the sleep state to the AP) and probe requests (for the
@@ -1600,19 +1600,24 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
/* device xmit handlers */
+enum ieee80211_encrypt {
+ ENCRYPT_NO,
+ ENCRYPT_MGMT,
+ ENCRYPT_DATA,
+};
+
static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb,
- int head_need, bool may_encrypt)
+ int head_need,
+ enum ieee80211_encrypt encrypt)
{
struct ieee80211_local *local = sdata->local;
- struct ieee80211_hdr *hdr;
bool enc_tailroom;
int tail_need = 0;
- hdr = (struct ieee80211_hdr *) skb->data;
- enc_tailroom = may_encrypt &&
- (sdata->crypto_tx_tailroom_needed_cnt ||
- ieee80211_is_mgmt(hdr->frame_control));
+ enc_tailroom = encrypt == ENCRYPT_MGMT ||
+ (encrypt == ENCRYPT_DATA &&
+ sdata->crypto_tx_tailroom_needed_cnt);
if (enc_tailroom) {
tail_need = IEEE80211_ENCRYPT_TAILROOM;
@@ -1645,21 +1650,27 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
int headroom;
- bool may_encrypt;
+ enum ieee80211_encrypt encrypt;
- may_encrypt = !(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT);
+ if (info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT)
+ encrypt = ENCRYPT_NO;
+ else if (ieee80211_is_mgmt(hdr->frame_control))
+ encrypt = ENCRYPT_MGMT;
+ else
+ encrypt = ENCRYPT_DATA;
headroom = local->tx_headroom;
- if (may_encrypt)
+ if (encrypt != ENCRYPT_NO)
headroom += sdata->encrypt_headroom;
headroom -= skb_headroom(skb);
headroom = max_t(int, 0, headroom);
- if (ieee80211_skb_resize(sdata, skb, headroom, may_encrypt)) {
+ if (ieee80211_skb_resize(sdata, skb, headroom, encrypt)) {
ieee80211_free_txskb(&local->hw, skb);
return;
}
+ /* reload after potential resize */
hdr = (struct ieee80211_hdr *) skb->data;
info->control.vif = &sdata->vif;
@@ -2031,7 +2042,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
u16 info_id = 0;
struct ieee80211_chanctx_conf *chanctx_conf;
struct ieee80211_sub_if_data *ap_sdata;
- enum ieee80211_band band;
+ enum nl80211_band band;
int ret;
if (IS_ERR(sta))
@@ -2352,7 +2363,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
head_need += sdata->encrypt_headroom;
head_need += local->tx_headroom;
head_need = max_t(int, 0, head_need);
- if (ieee80211_skb_resize(sdata, skb, head_need, true)) {
+ if (ieee80211_skb_resize(sdata, skb, head_need, ENCRYPT_DATA)) {
ieee80211_free_txskb(&local->hw, skb);
skb = NULL;
return ERR_PTR(-ENOMEM);
@@ -2762,7 +2773,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
if (unlikely(ieee80211_skb_resize(sdata, skb,
max_t(int, extra_head + hw_headroom -
skb_headroom(skb), 0),
- false))) {
+ ENCRYPT_NO))) {
kfree_skb(skb);
return true;
}
@@ -3336,7 +3347,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
struct sk_buff *skb = NULL;
struct ieee80211_tx_info *info;
struct ieee80211_sub_if_data *sdata = NULL;
- enum ieee80211_band band;
+ enum nl80211_band band;
struct ieee80211_tx_rate_control txrc;
struct ieee80211_chanctx_conf *chanctx_conf;
int csa_off_base = 0;
@@ -3904,7 +3915,7 @@ EXPORT_SYMBOL(ieee80211_unreserve_tid);
void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, int tid,
- enum ieee80211_band band)
+ enum nl80211_band band)
{
int ac = ieee802_1d_to_ac[tid & 7];
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index c8cc9bd7cac1..391de2fa08a8 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -59,7 +59,7 @@ void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx)
}
}
-int ieee80211_frame_duration(enum ieee80211_band band, size_t len,
+int ieee80211_frame_duration(enum nl80211_band band, size_t len,
int rate, int erp, int short_preamble,
int shift)
{
@@ -77,7 +77,7 @@ int ieee80211_frame_duration(enum ieee80211_band band, size_t len,
* is assumed to be 0 otherwise.
*/
- if (band == IEEE80211_BAND_5GHZ || erp) {
+ if (band == NL80211_BAND_5GHZ || erp) {
/*
* OFDM:
*
@@ -129,7 +129,7 @@ int ieee80211_frame_duration(enum ieee80211_band band, size_t len,
/* Exported duration function for driver use */
__le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
- enum ieee80211_band band,
+ enum nl80211_band band,
size_t frame_len,
struct ieee80211_rate *rate)
{
@@ -939,16 +939,22 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
elem_parse_failed = true;
break;
case WLAN_EID_VHT_OPERATION:
- if (elen >= sizeof(struct ieee80211_vht_operation))
+ if (elen >= sizeof(struct ieee80211_vht_operation)) {
elems->vht_operation = (void *)pos;
- else
- elem_parse_failed = true;
+ if (calc_crc)
+ crc = crc32_be(crc, pos - 2, elen + 2);
+ break;
+ }
+ elem_parse_failed = true;
break;
case WLAN_EID_OPMODE_NOTIF:
- if (elen > 0)
+ if (elen > 0) {
elems->opmode_notif = pos;
- else
- elem_parse_failed = true;
+ if (calc_crc)
+ crc = crc32_be(crc, pos - 2, elen + 2);
+ break;
+ }
+ elem_parse_failed = true;
break;
case WLAN_EID_MESH_ID:
elems->mesh_id = pos;
@@ -1126,7 +1132,7 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
rcu_read_lock();
chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
use_11b = (chanctx_conf &&
- chanctx_conf->def.chan->band == IEEE80211_BAND_2GHZ) &&
+ chanctx_conf->def.chan->band == NL80211_BAND_2GHZ) &&
!(sdata->flags & IEEE80211_SDATA_OPERATING_GMODE);
rcu_read_unlock();
@@ -1298,7 +1304,7 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
u8 *buffer, size_t buffer_len,
const u8 *ie, size_t ie_len,
- enum ieee80211_band band,
+ enum nl80211_band band,
u32 rate_mask,
struct cfg80211_chan_def *chandef,
size_t *offset)
@@ -1372,7 +1378,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
pos += ext_rates_len;
}
- if (chandef->chan && sband->band == IEEE80211_BAND_2GHZ) {
+ if (chandef->chan && sband->band == NL80211_BAND_2GHZ) {
if (end - pos < 3)
goto out_err;
*pos++ = WLAN_EID_DS_PARAMS;
@@ -1476,7 +1482,7 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
memset(ie_desc, 0, sizeof(*ie_desc));
- for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
+ for (i = 0; i < NUM_NL80211_BANDS; i++) {
if (bands_used & BIT(i)) {
pos += ieee80211_build_preq_ies_band(local,
buffer + pos,
@@ -1519,7 +1525,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb;
struct ieee80211_mgmt *mgmt;
int ies_len;
- u32 rate_masks[IEEE80211_NUM_BANDS] = {};
+ u32 rate_masks[NUM_NL80211_BANDS] = {};
struct ieee80211_scan_ies dummy_ie_desc;
/*
@@ -1579,7 +1585,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata,
u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems,
- enum ieee80211_band band, u32 *basic_rates)
+ enum nl80211_band band, u32 *basic_rates)
{
struct ieee80211_supported_band *sband;
size_t num_rates;
@@ -2472,7 +2478,7 @@ int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, bool need_basic,
- enum ieee80211_band band)
+ enum nl80211_band band)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
@@ -2517,7 +2523,7 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, bool need_basic,
- enum ieee80211_band band)
+ enum nl80211_band band)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index c38b2f07a919..47f708cd3e5b 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -299,7 +299,30 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta)
return IEEE80211_STA_RX_BW_80;
}
-static enum ieee80211_sta_rx_bandwidth
+enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta)
+{
+ struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
+ u32 cap_width;
+
+ if (!vht_cap->vht_supported) {
+ if (!sta->sta.ht_cap.ht_supported)
+ return NL80211_CHAN_WIDTH_20_NOHT;
+
+ return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
+ NL80211_CHAN_WIDTH_40 : NL80211_CHAN_WIDTH_20;
+ }
+
+ cap_width = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
+
+ if (cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ)
+ return NL80211_CHAN_WIDTH_160;
+ else if (cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
+ return NL80211_CHAN_WIDTH_80P80;
+
+ return NL80211_CHAN_WIDTH_80;
+}
+
+enum ieee80211_sta_rx_bandwidth
ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width)
{
switch (width) {
@@ -327,10 +350,7 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
bw = ieee80211_sta_cap_rx_bw(sta);
bw = min(bw, sta->cur_max_bandwidth);
-
- /* do not cap the BW of TDLS WIDER_BW peers by the bss */
- if (!test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW))
- bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width));
+ bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width));
return bw;
}
@@ -378,7 +398,7 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta)
u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta, u8 opmode,
- enum ieee80211_band band)
+ enum nl80211_band band)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
@@ -427,7 +447,7 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta, u8 opmode,
- enum ieee80211_band band)
+ enum nl80211_band band)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index cb439e06919f..3884bb1a59dd 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -161,8 +161,8 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
update_iv:
/* update IV in key information to be able to detect replays */
- rx->key->u.tkip.rx[rx->security_idx].iv32 = rx->tkip_iv32;
- rx->key->u.tkip.rx[rx->security_idx].iv16 = rx->tkip_iv16;
+ rx->key->u.tkip.rx[rx->security_idx].iv32 = rx->tkip.iv32;
+ rx->key->u.tkip.rx[rx->security_idx].iv16 = rx->tkip.iv16;
return RX_CONTINUE;
@@ -292,8 +292,8 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
key, skb->data + hdrlen,
skb->len - hdrlen, rx->sta->sta.addr,
hdr->addr1, hwaccel, rx->security_idx,
- &rx->tkip_iv32,
- &rx->tkip_iv16);
+ &rx->tkip.iv32,
+ &rx->tkip.iv16);
if (res != TKIP_DECRYPT_OK)
return RX_DROP_UNUSABLE;
@@ -519,6 +519,9 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
return RX_DROP_UNUSABLE;
}
+ /* reload hdr - skb might have been reallocated */
+ hdr = (void *)rx->skb->data;
+
data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - mic_len;
if (!rx->sta || data_len < 0)
return RX_DROP_UNUSABLE;
@@ -553,6 +556,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
}
memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN);
+ if (unlikely(ieee80211_is_frag(hdr)))
+ memcpy(rx->ccm_gcm.pn, pn, IEEE80211_CCMP_PN_LEN);
}
/* Remove CCMP header and MIC */
@@ -749,6 +754,9 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
return RX_DROP_UNUSABLE;
}
+ /* reload hdr - skb might have been reallocated */
+ hdr = (void *)rx->skb->data;
+
data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN - mic_len;
if (!rx->sta || data_len < 0)
return RX_DROP_UNUSABLE;
@@ -784,6 +792,8 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
}
memcpy(key->u.gcmp.rx_pn[queue], pn, IEEE80211_GCMP_PN_LEN);
+ if (unlikely(ieee80211_is_frag(hdr)))
+ memcpy(rx->ccm_gcm.pn, pn, IEEE80211_CCMP_PN_LEN);
}
/* Remove GCMP header and MIC */
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index a13d02b7cee4..55ed8a97b33f 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -158,7 +158,7 @@ err_tfm0:
crypto_free_blkcipher(key->tfm0);
err_tfm:
for (i = 0; i < ARRAY_SIZE(key->tfm); i++)
- if (key->tfm[i])
+ if (!IS_ERR_OR_NULL(key->tfm[i]))
crypto_free_aead(key->tfm[i]);
kzfree(key);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index c2ce7dec5198..50d9138b2a1c 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -470,16 +470,15 @@ static struct net_device *inet6_fib_lookup_dev(struct net *net,
struct net_device *dev;
struct dst_entry *dst;
struct flowi6 fl6;
- int err;
if (!ipv6_stub)
return ERR_PTR(-EAFNOSUPPORT);
memset(&fl6, 0, sizeof(fl6));
memcpy(&fl6.daddr, addr, sizeof(struct in6_addr));
- err = ipv6_stub->ipv6_dst_lookup(net, NULL, &dst, &fl6);
- if (err)
- return ERR_PTR(err);
+ dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
+ if (IS_ERR(dst))
+ return ERR_CAST(dst);
dev = dst->dev;
dev_hold(dev);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index f83c255d7da2..c577907e3715 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -71,7 +71,7 @@ config NF_CONNTRACK_MARK
config NF_CONNTRACK_SECMARK
bool 'Connection tracking security mark support'
depends on NETWORK_SECMARK
- default m if NETFILTER_ADVANCED=n
+ default y if NETFILTER_ADVANCED=n
help
This option enables security markings to be applied to
connections. Typically they are copied to connections from
@@ -1327,22 +1327,6 @@ config NETFILTER_XT_MATCH_PKTTYPE
To compile it as a module, choose M here. If unsure, say N.
-config NETFILTER_XT_MATCH_QTAGUID
- bool '"quota, tag, owner" match and stats support'
- depends on NETFILTER_XT_MATCH_SOCKET
- depends on NETFILTER_XT_MATCH_OWNER=n
- help
- This option replaces the `owner' match. In addition to matching
- on uid, it keeps stats based on a tag assigned to a socket.
- The full tag is comprised of a UID and an accounting tag.
- The tags are assignable to sockets from user space (e.g. a download
- manager can assign the socket to another UID for accounting).
- Stats and control are done via /proc/net/xt_qtaguid/.
- It replaces owner as it takes the same arguments, but should
- really be recognized by the iptables tool.
-
- If unsure, say `N'.
-
config NETFILTER_XT_MATCH_QUOTA
tristate '"quota" match support'
depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index fcdc061130d7..6c8d78f4f248 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -157,7 +157,6 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CGROUP) += xt_cgroup.o
obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o
-obj-$(CONFIG_NETFILTER_XT_MATCH_QTAGUID) += xt_qtaguid_print.o xt_qtaguid.o
obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o
obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA2) += xt_quota2.o
obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index b0bc475f641e..7d08a170ac27 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -66,9 +66,9 @@ mtype_destroy(struct ip_set *set)
if (SET_WITH_TIMEOUT(set))
del_timer_sync(&map->gc);
- ip_set_free(map->members);
if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
+ ip_set_free(map->members);
ip_set_free(map);
set->data = NULL;
@@ -81,7 +81,7 @@ mtype_flush(struct ip_set *set)
if (set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
- memset(map->members, 0, map->memsize);
+ bitmap_zero(map->members, map->elements);
}
static int
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 4783efff0bde..a4c104a4977f 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -40,7 +40,7 @@ MODULE_ALIAS("ip_set_bitmap:ip");
/* Type structure */
struct bitmap_ip {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -222,7 +222,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
u32 first_ip, u32 last_ip,
u32 elements, u32 hosts, u8 netmask)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -315,7 +315,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ip;
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 29dde208381d..0e961690510d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -46,7 +46,7 @@ enum {
/* Type structure */
struct bitmap_ipmac {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -297,7 +297,7 @@ static bool
init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
u32 first_ip, u32 last_ip, u32 elements)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -361,7 +361,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ipmac;
if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 7f0c733358a4..6771b362a123 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -34,7 +34,7 @@ MODULE_ALIAS("ip_set_bitmap:port");
/* Type structure */
struct bitmap_port {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u16 first_port; /* host byte order, included in range */
u16 last_port; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -207,7 +207,7 @@ static bool
init_map_port(struct ip_set *set, struct bitmap_port *map,
u16 first_port, u16 last_port)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(map->elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_port = first_port;
@@ -250,7 +250,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -ENOMEM;
map->elements = elements;
- map->memsize = bitmap_bytes(0, map->elements);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_port;
if (!init_map_port(set, map, first_port, last_port)) {
kfree(map);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index caa26184f7e3..3231030a73ed 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -379,6 +379,8 @@ ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len,
for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
if (!add_extension(id, cadt_flags, tb))
continue;
+ if (align < ip_set_extensions[id].align)
+ align = ip_set_extensions[id].align;
len = ALIGN(len, ip_set_extensions[id].align);
set->offset[id] = len;
set->extensions |= ip_set_extensions[id].type;
@@ -1619,6 +1621,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
struct ip_set *set;
struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
int ret = 0;
+ u32 lineno;
if (unlikely(protocol_failed(attr) ||
!attr[IPSET_ATTR_SETNAME] ||
@@ -1635,7 +1638,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
return -IPSET_ERR_PROTOCOL;
rcu_read_lock_bh();
- ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0);
+ ret = set->variant->uadt(set, tb, IPSET_TEST, &lineno, 0, 0);
rcu_read_unlock_bh();
/* Userspace can't trigger element to be re-added */
if (ret == -EAGAIN)
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index e5336ab36d67..7b69d1ad8f3e 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -102,31 +102,17 @@ htable_size(u8 hbits)
{
size_t hsize;
- /* We must fit both into u32 in jhash and size_t */
+ /* We must fit both into u32 in jhash and INT_MAX in kvmalloc_node() */
if (hbits > 31)
return 0;
hsize = jhash_size(hbits);
- if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *)
+ if ((INT_MAX - sizeof(struct htable)) / sizeof(struct hbucket *)
< hsize)
return 0;
return hsize * sizeof(struct hbucket *) + sizeof(struct htable);
}
-/* Compute htable_bits from the user input parameter hashsize */
-static u8
-htable_bits(u32 hashsize)
-{
- /* Assume that hashsize == 2^htable_bits */
- u8 bits = fls(hashsize - 1);
-
- if (jhash_size(bits) != hashsize)
- /* Round up to the first 2^n value */
- bits = fls(hashsize);
-
- return bits;
-}
-
#ifdef IP_SET_HASH_WITH_NETS
#if IPSET_NET_COUNT > 1
#define __CIDR(cidr, i) (cidr[i])
@@ -1309,7 +1295,11 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
get_random_bytes(&h->initval, sizeof(h->initval));
set->timeout = IPSET_NO_TIMEOUT;
- hbits = htable_bits(hashsize);
+ /* Compute htable_bits from the user input parameter hashsize.
+ * Assume that hashsize == 2^htable_bits,
+ * otherwise round up to the first 2^n value.
+ */
+ hbits = fls(hashsize - 1);
hsize = htable_size(hbits);
if (hsize == 0) {
kfree(h);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index bbede95c9f68..085711b35a99 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -60,7 +60,7 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
/* Don't lookup sub-counters at all */
opt->cmdflags &= ~IPSET_FLAG_MATCH_COUNTERS;
if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE)
- opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE;
+ opt->cmdflags |= IPSET_FLAG_SKIP_COUNTER_UPDATE;
list_for_each_entry_rcu(e, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 85ca189bdc3d..de196dd95dcd 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1368,6 +1368,10 @@ int __init ip_vs_conn_init(void)
int idx;
/* Compute size and mask */
+ if (ip_vs_conn_tab_bits < 8 || ip_vs_conn_tab_bits > 20) {
+ pr_info("conn_tab_bits not in [8, 20]. Using default value\n");
+ ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
+ }
ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 79f4ffe7291a..7065410b13ad 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1226,7 +1226,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
svc->port = u->port;
svc->fwmark = u->fwmark;
- svc->flags = u->flags;
+ svc->flags = u->flags & ~IP_VS_SVC_F_HASHED;
svc->timeout = u->timeout * HZ;
svc->netmask = u->netmask;
svc->ipvs = ipvs;
@@ -2383,6 +2383,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
/* Set timeout values for (tcp tcpfin udp) */
ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg);
goto out_unlock;
+ } else if (!len) {
+ /* No more commands with len == 0 below */
+ ret = -EINVAL;
+ goto out_unlock;
}
usvc_compat = (struct ip_vs_service_user *)arg;
@@ -2459,9 +2463,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
break;
case IP_VS_SO_SET_DELDEST:
ret = ip_vs_del_dest(svc, &udest);
- break;
- default:
- ret = -EINVAL;
}
out_unlock:
@@ -3921,6 +3922,11 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
+#ifdef CONFIG_IP_VS_DEBUG
+ /* Global sysctls must be ro in non-init netns */
+ if (!net_eq(net, &init_net))
+ tbl[idx++].mode = 0444;
+#endif
ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
if (ipvs->sysctl_hdr == NULL) {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d637d151d0c7..d32a3fd4528e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -898,9 +898,9 @@ __nf_conntrack_alloc(struct net *net,
/* Don't set timer yet: wait for confirmation */
setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
write_pnet(&ct->ct_net, net);
- memset(&ct->__nfct_init_offset[0], 0,
+ memset(&ct->__nfct_init_offset, 0,
offsetof(struct nf_conn, proto) -
- offsetof(struct nf_conn, __nfct_init_offset[0]));
+ offsetof(struct nf_conn, __nfct_init_offset));
if (zone && nf_ct_zone_add(ct, GFP_ATOMIC, zone) < 0)
goto out_free;
@@ -1600,7 +1600,7 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
}
EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
-int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
+int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
{
int i, bucket, rc;
unsigned int hashsize, old_size;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 9511af04dc81..15495b956855 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1225,6 +1225,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = {
{
.name = "Q.931",
.me = THIS_MODULE,
+ .data_len = sizeof(struct nf_ct_h323_master),
.tuple.src.l3num = AF_INET6,
.tuple.src.u.tcp.port = cpu_to_be16(Q931_PORT),
.tuple.dst.protonum = IPPROTO_TCP,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 3a24c01cb909..b349b8410ec8 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1022,6 +1022,8 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
if (!tb[CTA_TUPLE_IP])
return -EINVAL;
+ if (l3num != NFPROTO_IPV4 && l3num != NFPROTO_IPV6)
+ return -EOPNOTSUPP;
tuple->src.l3num = l3num;
err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP], tuple);
@@ -3390,6 +3392,9 @@ static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list)
list_for_each_entry(net, net_exit_list, exit_list)
ctnetlink_net_exit(net);
+
+ /* wait for other cpus until they are done with ctnl_notifiers */
+ synchronize_rcu();
}
static struct pernet_operations ctnetlink_net_ops = {
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 5588c7ae1ac2..9f9f92d637ad 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -71,24 +71,32 @@ EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn);
#if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
/* PptpControlMessageType names */
-const char *const pptp_msg_name[] = {
- "UNKNOWN_MESSAGE",
- "START_SESSION_REQUEST",
- "START_SESSION_REPLY",
- "STOP_SESSION_REQUEST",
- "STOP_SESSION_REPLY",
- "ECHO_REQUEST",
- "ECHO_REPLY",
- "OUT_CALL_REQUEST",
- "OUT_CALL_REPLY",
- "IN_CALL_REQUEST",
- "IN_CALL_REPLY",
- "IN_CALL_CONNECT",
- "CALL_CLEAR_REQUEST",
- "CALL_DISCONNECT_NOTIFY",
- "WAN_ERROR_NOTIFY",
- "SET_LINK_INFO"
+static const char *const pptp_msg_name_array[PPTP_MSG_MAX + 1] = {
+ [0] = "UNKNOWN_MESSAGE",
+ [PPTP_START_SESSION_REQUEST] = "START_SESSION_REQUEST",
+ [PPTP_START_SESSION_REPLY] = "START_SESSION_REPLY",
+ [PPTP_STOP_SESSION_REQUEST] = "STOP_SESSION_REQUEST",
+ [PPTP_STOP_SESSION_REPLY] = "STOP_SESSION_REPLY",
+ [PPTP_ECHO_REQUEST] = "ECHO_REQUEST",
+ [PPTP_ECHO_REPLY] = "ECHO_REPLY",
+ [PPTP_OUT_CALL_REQUEST] = "OUT_CALL_REQUEST",
+ [PPTP_OUT_CALL_REPLY] = "OUT_CALL_REPLY",
+ [PPTP_IN_CALL_REQUEST] = "IN_CALL_REQUEST",
+ [PPTP_IN_CALL_REPLY] = "IN_CALL_REPLY",
+ [PPTP_IN_CALL_CONNECT] = "IN_CALL_CONNECT",
+ [PPTP_CALL_CLEAR_REQUEST] = "CALL_CLEAR_REQUEST",
+ [PPTP_CALL_DISCONNECT_NOTIFY] = "CALL_DISCONNECT_NOTIFY",
+ [PPTP_WAN_ERROR_NOTIFY] = "WAN_ERROR_NOTIFY",
+ [PPTP_SET_LINK_INFO] = "SET_LINK_INFO"
};
+
+const char *pptp_msg_name(u_int16_t msg)
+{
+ if (msg > PPTP_MSG_MAX)
+ return pptp_msg_name_array[0];
+
+ return pptp_msg_name_array[msg];
+}
EXPORT_SYMBOL(pptp_msg_name);
#endif
@@ -278,7 +286,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
typeof(nf_nat_pptp_hook_inbound) nf_nat_pptp_inbound;
msg = ntohs(ctlh->messageType);
- pr_debug("inbound control message %s\n", pptp_msg_name[msg]);
+ pr_debug("inbound control message %s\n", pptp_msg_name(msg));
switch (msg) {
case PPTP_START_SESSION_REPLY:
@@ -313,7 +321,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
pcid = pptpReq->ocack.peersCallID;
if (info->pns_call_id != pcid)
goto invalid;
- pr_debug("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
+ pr_debug("%s, CID=%X, PCID=%X\n", pptp_msg_name(msg),
ntohs(cid), ntohs(pcid));
if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) {
@@ -330,7 +338,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
goto invalid;
cid = pptpReq->icreq.callID;
- pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
+ pr_debug("%s, CID=%X\n", pptp_msg_name(msg), ntohs(cid));
info->cstate = PPTP_CALL_IN_REQ;
info->pac_call_id = cid;
break;
@@ -349,7 +357,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
if (info->pns_call_id != pcid)
goto invalid;
- pr_debug("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
+ pr_debug("%s, PCID=%X\n", pptp_msg_name(msg), ntohs(pcid));
info->cstate = PPTP_CALL_IN_CONF;
/* we expect a GRE connection from PAC to PNS */
@@ -359,7 +367,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
case PPTP_CALL_DISCONNECT_NOTIFY:
/* server confirms disconnect */
cid = pptpReq->disc.callID;
- pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
+ pr_debug("%s, CID=%X\n", pptp_msg_name(msg), ntohs(cid));
info->cstate = PPTP_CALL_NONE;
/* untrack this call id, unexpect GRE packets */
@@ -386,7 +394,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
invalid:
pr_debug("invalid %s: type=%d cid=%u pcid=%u "
"cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
- msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
+ pptp_msg_name(msg),
msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
ntohs(info->pns_call_id), ntohs(info->pac_call_id));
return NF_ACCEPT;
@@ -406,7 +414,7 @@ pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff,
typeof(nf_nat_pptp_hook_outbound) nf_nat_pptp_outbound;
msg = ntohs(ctlh->messageType);
- pr_debug("outbound control message %s\n", pptp_msg_name[msg]);
+ pr_debug("outbound control message %s\n", pptp_msg_name(msg));
switch (msg) {
case PPTP_START_SESSION_REQUEST:
@@ -428,7 +436,7 @@ pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff,
info->cstate = PPTP_CALL_OUT_REQ;
/* track PNS call id */
cid = pptpReq->ocreq.callID;
- pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
+ pr_debug("%s, CID=%X\n", pptp_msg_name(msg), ntohs(cid));
info->pns_call_id = cid;
break;
@@ -442,7 +450,7 @@ pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff,
pcid = pptpReq->icack.peersCallID;
if (info->pac_call_id != pcid)
goto invalid;
- pr_debug("%s, CID=%X PCID=%X\n", pptp_msg_name[msg],
+ pr_debug("%s, CID=%X PCID=%X\n", pptp_msg_name(msg),
ntohs(cid), ntohs(pcid));
if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) {
@@ -482,7 +490,7 @@ pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff,
invalid:
pr_debug("invalid %s: type=%d cid=%u pcid=%u "
"cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
- msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
+ pptp_msg_name(msg),
msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
ntohs(info->pns_call_id), ntohs(info->pac_call_id));
return NF_ACCEPT;
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index e84a578dbe35..d76afafdc699 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -134,7 +134,7 @@ static int __init nf_nat_ftp_init(void)
}
/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
+static int warn_set(const char *val, const struct kernel_param *kp)
{
printk(KERN_INFO KBUILD_MODNAME
": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
index 1fb2258c3535..8039bcd60758 100644
--- a/net/netfilter/nf_nat_irc.c
+++ b/net/netfilter/nf_nat_irc.c
@@ -107,7 +107,7 @@ static int __init nf_nat_irc_init(void)
}
/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
+static int warn_set(const char *val, const struct kernel_param *kp)
{
printk(KERN_INFO KBUILD_MODNAME
": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index c8a4a48bced9..8be604eb6961 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -34,6 +34,9 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
int length = (th->doff * 4) - sizeof(*th);
u8 buf[40], *ptr;
+ if (unlikely(length < 0))
+ return false;
+
ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf);
if (ptr == NULL)
return false;
@@ -50,6 +53,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
length--;
continue;
default:
+ if (length < 2)
+ return true;
opsize = *ptr++;
if (opsize < 2)
return true;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index a7967af0da82..6203995003a5 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2849,12 +2849,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
if (err < 0)
- goto err2;
+ goto err3;
list_add_tail_rcu(&set->list, &table->sets);
table->use++;
return 0;
+err3:
+ ops->destroy(set);
err2:
kfree(set);
err1:
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 99bc2f87a974..204be9374657 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -130,7 +130,7 @@ next_rule:
list_for_each_entry_continue_rcu(rule, &chain->rules, list) {
/* This rule is not active, skip. */
- if (unlikely(rule->genmask & (1 << gencursor)))
+ if (unlikely(rule->genmask & gencursor))
continue;
rulenum++;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 044559c10e98..f01764b94b34 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -309,14 +309,14 @@ replay:
#endif
{
nfnl_unlock(subsys_id);
- netlink_ack(skb, nlh, -EOPNOTSUPP);
+ netlink_ack(oskb, nlh, -EOPNOTSUPP);
return kfree_skb(skb);
}
}
if (!ss->commit || !ss->abort) {
nfnl_unlock(subsys_id);
- netlink_ack(skb, nlh, -EOPNOTSUPP);
+ netlink_ack(oskb, nlh, -EOPNOTSUPP);
return kfree_skb(skb);
}
@@ -406,7 +406,7 @@ ack:
* pointing to the batch header.
*/
nfnl_err_reset(&err_list);
- netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM);
+ netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM);
status |= NFNL_BATCH_FAILURE;
goto done;
}
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index ac143ae4f7b6..63a9d5fd00c0 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -355,10 +355,14 @@ static int
nfnl_cthelper_update(const struct nlattr * const tb[],
struct nf_conntrack_helper *helper)
{
+ u32 size;
int ret;
- if (tb[NFCTH_PRIV_DATA_LEN])
- return -EBUSY;
+ if (tb[NFCTH_PRIV_DATA_LEN]) {
+ size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
+ if (size != helper->data_len)
+ return -EBUSY;
+ }
if (tb[NFCTH_POLICY]) {
ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]);
@@ -711,6 +715,8 @@ static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = {
[NFCTH_NAME] = { .type = NLA_NUL_STRING,
.len = NF_CT_HELPER_NAME_LEN-1 },
[NFCTH_QUEUE_NUM] = { .type = NLA_U32, },
+ [NFCTH_PRIV_DATA_LEN] = { .type = NLA_U32, },
+ [NFCTH_STATUS] = { .type = NLA_U32, },
};
static const struct nfnl_callback nfnl_cthelper_cb[NFNL_MSG_CTHELPER_MAX] = {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 54cde78c2718..ebce25080f7f 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -486,7 +486,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
goto nla_put_failure;
if (indev && entskb->dev &&
- entskb->mac_header != entskb->network_header) {
+ skb_mac_header_was_set(entskb)) {
struct nfqnl_msg_packet_hw phw;
int len;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 0a5df0cbaa28..d6fcfc995420 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -121,6 +121,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
return PTR_ERR(set);
}
+ if (set->ops->update == NULL)
+ return -EOPNOTSUPP;
+
if (set->flags & NFT_SET_CONSTANT)
return -EBUSY;
@@ -186,8 +189,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_EXPR,
priv->expr->ops->size);
if (set->flags & NFT_SET_TIMEOUT) {
- if (timeout || set->timeout)
+ if (timeout || set->timeout) {
+ nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_TIMEOUT);
nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION);
+ }
}
priv->timeout = timeout;
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index ba7aed13e174..a81f6bf42d1f 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -34,6 +34,9 @@ static void nft_exthdr_eval(const struct nft_expr *expr,
unsigned int offset = 0;
int err;
+ if (pkt->skb->protocol != htons(ETH_P_IPV6))
+ goto err;
+
err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL);
if (err < 0)
goto err;
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index a97a5bf716be..8a69c6fdced5 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -200,7 +200,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
sk = skb_to_full_sk(skb);
if (!sk || !sk_fullsock(sk))
goto err;
- *dest = sk->sk_classid;
+ *dest = sock_cgroup_classid(&sk->sk_cgrp_data);
break;
#endif
default:
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index ee2d71753746..182704b980d1 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -135,7 +135,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
priv->type = NF_NAT_MANIP_DST;
break;
default:
- return -EINVAL;
+ return -EOPNOTSUPP;
}
err = nft_nat_validate(ctx, expr, NULL);
@@ -157,7 +157,9 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
alen = FIELD_SIZEOF(struct nf_nat_range, min_addr.ip6);
break;
default:
- return -EAFNOSUPPORT;
+ if (tb[NFTA_NAT_REG_ADDR_MIN])
+ return -EAFNOSUPPORT;
+ break;
}
priv->family = family;
@@ -206,7 +208,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (tb[NFTA_NAT_FLAGS]) {
priv->flags = ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS]));
if (priv->flags & ~NF_NAT_RANGE_MASK)
- return -EINVAL;
+ return -EOPNOTSUPP;
}
return 0;
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 09b4b07eb676..ab3e7b14de09 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -74,7 +74,9 @@ static void nft_payload_eval(const struct nft_expr *expr,
u32 *dest = &regs->data[priv->dreg];
int offset;
- dest[priv->len / NFT_REG32_SIZE] = 0;
+ if (priv->len % NFT_REG32_SIZE)
+ dest[priv->len / NFT_REG32_SIZE] = 0;
+
switch (priv->base) {
case NFT_PAYLOAD_LL_HEADER:
if (!skb_mac_header_was_set(skb))
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index cdafbd38a456..2909c34dda7a 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -266,11 +266,66 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
}
EXPORT_SYMBOL_GPL(xt_request_find_target);
+
+static int xt_obj_to_user(u16 __user *psize, u16 size,
+ void __user *pname, const char *name,
+ u8 __user *prev, u8 rev)
+{
+ if (put_user(size, psize))
+ return -EFAULT;
+ if (copy_to_user(pname, name, strlen(name) + 1))
+ return -EFAULT;
+ if (put_user(rev, prev))
+ return -EFAULT;
+
+ return 0;
+}
+
+#define XT_OBJ_TO_USER(U, K, TYPE, C_SIZE) \
+ xt_obj_to_user(&U->u.TYPE##_size, C_SIZE ? : K->u.TYPE##_size, \
+ U->u.user.name, K->u.kernel.TYPE->name, \
+ &U->u.user.revision, K->u.kernel.TYPE->revision)
+
+int xt_data_to_user(void __user *dst, const void *src,
+ int usersize, int size)
+{
+ usersize = usersize ? : size;
+ if (copy_to_user(dst, src, usersize))
+ return -EFAULT;
+ if (usersize != size && clear_user(dst + usersize, size - usersize))
+ return -EFAULT;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xt_data_to_user);
+
+#define XT_DATA_TO_USER(U, K, TYPE, C_SIZE) \
+ xt_data_to_user(U->data, K->data, \
+ K->u.kernel.TYPE->usersize, \
+ C_SIZE ? : K->u.kernel.TYPE->TYPE##size)
+
+int xt_match_to_user(const struct xt_entry_match *m,
+ struct xt_entry_match __user *u)
+{
+ return XT_OBJ_TO_USER(u, m, match, 0) ||
+ XT_DATA_TO_USER(u, m, match, 0);
+}
+EXPORT_SYMBOL_GPL(xt_match_to_user);
+
+int xt_target_to_user(const struct xt_entry_target *t,
+ struct xt_entry_target __user *u)
+{
+ return XT_OBJ_TO_USER(u, t, target, 0) ||
+ XT_DATA_TO_USER(u, t, target, 0);
+}
+EXPORT_SYMBOL_GPL(xt_target_to_user);
+
static int match_revfn(u8 af, const char *name, u8 revision, int *bestp)
{
const struct xt_match *m;
int have_rev = 0;
+ mutex_lock(&xt[af].mutex);
list_for_each_entry(m, &xt[af].match, list) {
if (strcmp(m->name, name) == 0) {
if (m->revision > *bestp)
@@ -279,6 +334,7 @@ static int match_revfn(u8 af, const char *name, u8 revision, int *bestp)
have_rev = 1;
}
}
+ mutex_unlock(&xt[af].mutex);
if (af != NFPROTO_UNSPEC && !have_rev)
return match_revfn(NFPROTO_UNSPEC, name, revision, bestp);
@@ -291,6 +347,7 @@ static int target_revfn(u8 af, const char *name, u8 revision, int *bestp)
const struct xt_target *t;
int have_rev = 0;
+ mutex_lock(&xt[af].mutex);
list_for_each_entry(t, &xt[af].target, list) {
if (strcmp(t->name, name) == 0) {
if (t->revision > *bestp)
@@ -299,6 +356,7 @@ static int target_revfn(u8 af, const char *name, u8 revision, int *bestp)
have_rev = 1;
}
}
+ mutex_unlock(&xt[af].mutex);
if (af != NFPROTO_UNSPEC && !have_rev)
return target_revfn(NFPROTO_UNSPEC, name, revision, bestp);
@@ -312,12 +370,10 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target,
{
int have_rev, best = -1;
- mutex_lock(&xt[af].mutex);
if (target == 1)
have_rev = target_revfn(af, name, revision, &best);
else
have_rev = match_revfn(af, name, revision, &best);
- mutex_unlock(&xt[af].mutex);
/* Nothing at all? Return 0 to try loading module. */
if (best == -1) {
@@ -566,7 +622,7 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
{
const struct xt_match *match = m->u.kernel.match;
struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
- int pad, off = xt_compat_match_offset(match);
+ int off = xt_compat_match_offset(match);
u_int16_t msize = cm->u.user.match_size;
char name[sizeof(m->u.user.name)];
@@ -576,9 +632,6 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
match->compat_from_user(m->data, cm->data);
else
memcpy(m->data, cm->data, msize - sizeof(*cm));
- pad = XT_ALIGN(match->matchsize) - match->matchsize;
- if (pad > 0)
- memset(m->data + match->matchsize, 0, pad);
msize += off;
m->u.user.match_size = msize;
@@ -924,7 +977,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
{
const struct xt_target *target = t->u.kernel.target;
struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
- int pad, off = xt_compat_target_offset(target);
+ int off = xt_compat_target_offset(target);
u_int16_t tsize = ct->u.user.target_size;
char name[sizeof(t->u.user.name)];
@@ -934,9 +987,6 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
target->compat_from_user(t->data, ct->data);
else
memcpy(t->data, ct->data, tsize - sizeof(*ct));
- pad = XT_ALIGN(target->targetsize) - target->targetsize;
- if (pad > 0)
- memset(t->data + target->targetsize, 0, pad);
tsize += off;
t->u.user.target_size = tsize;
@@ -1144,6 +1194,9 @@ xt_replace_table(struct xt_table *table,
smp_wmb();
table->private = newinfo;
+ /* make sure all cpus see new ->private value */
+ smp_mb();
+
/*
* Even though table entries have now been swapped, other CPU's
* may still be using the old entries. This is okay, because
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index febcfac7e3df..f2fe60ce286f 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -380,6 +380,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = {
.name = "CT",
.family = NFPROTO_UNSPEC,
.targetsize = sizeof(struct xt_ct_target_info),
+ .usersize = offsetof(struct xt_ct_target_info, ct),
.checkentry = xt_ct_tg_check_v0,
.destroy = xt_ct_tg_destroy_v0,
.target = xt_ct_target_v0,
@@ -391,6 +392,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = {
.family = NFPROTO_UNSPEC,
.revision = 1,
.targetsize = sizeof(struct xt_ct_target_info_v1),
+ .usersize = offsetof(struct xt_ct_target_info, ct),
.checkentry = xt_ct_tg_check_v1,
.destroy = xt_ct_tg_destroy_v1,
.target = xt_ct_target_v1,
@@ -402,6 +404,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = {
.family = NFPROTO_UNSPEC,
.revision = 2,
.targetsize = sizeof(struct xt_ct_target_info_v1),
+ .usersize = offsetof(struct xt_ct_target_info, ct),
.checkentry = xt_ct_tg_check_v2,
.destroy = xt_ct_tg_destroy_v1,
.target = xt_ct_target_v1,
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index b0f4f1bca61f..18e4fd8aa166 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -505,6 +505,7 @@ static struct xt_target idletimer_tg __read_mostly = {
.family = NFPROTO_UNSPEC,
.target = idletimer_tg_target,
.targetsize = sizeof(struct idletimer_tg_info),
+ .usersize = offsetof(struct idletimer_tg_info, timer),
.checkentry = idletimer_tg_checkentry,
.destroy = idletimer_tg_destroy,
.me = THIS_MODULE,
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 0858fe17e14a..2d1c5c169a26 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -198,6 +198,7 @@ static struct xt_target led_tg_reg __read_mostly = {
.family = NFPROTO_UNSPEC,
.target = led_tg,
.targetsize = sizeof(struct xt_led_info),
+ .usersize = offsetof(struct xt_led_info, internal_data),
.checkentry = led_tg_check,
.destroy = led_tg_destroy,
.me = THIS_MODULE,
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 0be96f8475f7..6768d4d2ffd0 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -107,6 +107,9 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
} cfg;
int ret;
+ if (strnlen(info->name, sizeof(est->name)) >= sizeof(est->name))
+ return -ENAMETOOLONG;
+
if (unlikely(!rnd_inited)) {
get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
rnd_inited = true;
@@ -178,6 +181,7 @@ static struct xt_target xt_rateest_tg_reg __read_mostly = {
.checkentry = xt_rateest_tg_checkentry,
.destroy = xt_rateest_tg_destroy,
.targetsize = sizeof(struct xt_rateest_target_info),
+ .usersize = offsetof(struct xt_rateest_target_info, est),
.me = THIS_MODULE,
};
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 3eff7b67cdf2..d597b504a82e 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -127,6 +127,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
.family = NFPROTO_IPV4,
.target = tee_tg4,
.targetsize = sizeof(struct xt_tee_tginfo),
+ .usersize = offsetof(struct xt_tee_tginfo, priv),
.checkentry = tee_tg_check,
.destroy = tee_tg_destroy,
.me = THIS_MODULE,
@@ -138,6 +139,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
.family = NFPROTO_IPV6,
.target = tee_tg6,
.targetsize = sizeof(struct xt_tee_tginfo),
+ .usersize = offsetof(struct xt_tee_tginfo, priv),
.checkentry = tee_tg_check,
.destroy = tee_tg_destroy,
.me = THIS_MODULE,
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 3ab591e73ec0..7f4414d26a66 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -105,19 +105,24 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
* belonging to established connections going through that one.
*/
static inline struct sock *
-nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
+nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
+ const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
+ struct tcphdr *tcph;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_LISTENER:
- sk = inet_lookup_listener(net, &tcp_hashinfo,
+ tcph = hp;
+ sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
+ ip_hdrlen(skb) +
+ __tcp_hdrlen(tcph),
saddr, sport,
daddr, dport,
in->ifindex);
@@ -169,19 +174,23 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
#ifdef XT_TPROXY_HAVE_IPV6
static inline struct sock *
-nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
+nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
+ const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
+ struct tcphdr *tcph;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_LISTENER:
- sk = inet6_lookup_listener(net, &tcp_hashinfo,
+ tcph = hp;
+ sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
+ thoff + __tcp_hdrlen(tcph),
saddr, sport,
daddr, ntohs(dport),
in->ifindex);
@@ -267,7 +276,7 @@ tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v4(net, iph->protocol,
+ sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, laddr ? laddr : iph->daddr,
hp->source, lport ? lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
@@ -305,7 +314,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v4(net, iph->protocol,
+ sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, iph->daddr,
hp->source, hp->dest,
skb->dev, NFT_LOOKUP_ESTABLISHED);
@@ -321,7 +330,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
else if (!sk)
/* no, there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v4(net, iph->protocol,
+ sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, laddr,
hp->source, lport,
skb->dev, NFT_LOOKUP_LISTENER);
@@ -429,7 +438,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v6(par->net, tproto,
+ sk2 = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto,
&iph->saddr,
tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
hp->source,
@@ -472,7 +481,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v6(par->net, tproto,
+ sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto,
&iph->saddr, &iph->daddr,
hp->source, hp->dest,
par->in, NFT_LOOKUP_ESTABLISHED);
@@ -487,8 +496,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
else if (!sk)
/* no there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v6(par->net, tproto,
- &iph->saddr, laddr,
+ sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp,
+ tproto, &iph->saddr, laddr,
hp->source, lport,
par->in, NFT_LOOKUP_LISTENER);
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index dffee9d47ec4..7e7746cc45d6 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -8,8 +8,10 @@
*/
#include <linux/module.h>
+#include <linux/syscalls.h>
#include <linux/skbuff.h>
#include <linux/filter.h>
+#include <linux/bpf.h>
#include <linux/netfilter/xt_bpf.h>
#include <linux/netfilter/x_tables.h>
@@ -20,15 +22,18 @@ MODULE_LICENSE("GPL");
MODULE_ALIAS("ipt_bpf");
MODULE_ALIAS("ip6t_bpf");
-static int bpf_mt_check(const struct xt_mtchk_param *par)
+static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len,
+ struct bpf_prog **ret)
{
- struct xt_bpf_info *info = par->matchinfo;
struct sock_fprog_kern program;
- program.len = info->bpf_program_num_elem;
- program.filter = info->bpf_program;
+ if (len > XT_BPF_MAX_NUM_INSTR)
+ return -EINVAL;
- if (bpf_prog_create(&info->filter, &program)) {
+ program.len = len;
+ program.filter = insns;
+
+ if (bpf_prog_create(ret, &program)) {
pr_info("bpf: check failed: parse error\n");
return -EINVAL;
}
@@ -36,6 +41,53 @@ static int bpf_mt_check(const struct xt_mtchk_param *par)
return 0;
}
+static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
+{
+ struct bpf_prog *prog;
+
+ prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ *ret = prog;
+ return 0;
+}
+
+static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
+{
+ if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX)
+ return -EINVAL;
+
+ *ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER);
+ return PTR_ERR_OR_ZERO(*ret);
+
+}
+
+static int bpf_mt_check(const struct xt_mtchk_param *par)
+{
+ struct xt_bpf_info *info = par->matchinfo;
+
+ return __bpf_mt_check_bytecode(info->bpf_program,
+ info->bpf_program_num_elem,
+ &info->filter);
+}
+
+static int bpf_mt_check_v1(const struct xt_mtchk_param *par)
+{
+ struct xt_bpf_info_v1 *info = par->matchinfo;
+
+ if (info->mode == XT_BPF_MODE_BYTECODE)
+ return __bpf_mt_check_bytecode(info->bpf_program,
+ info->bpf_program_num_elem,
+ &info->filter);
+ else if (info->mode == XT_BPF_MODE_FD_ELF)
+ return __bpf_mt_check_fd(info->fd, &info->filter);
+ else if (info->mode == XT_BPF_MODE_PATH_PINNED)
+ return __bpf_mt_check_path(info->path, &info->filter);
+ else
+ return -EINVAL;
+}
+
static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_bpf_info *info = par->matchinfo;
@@ -43,31 +95,60 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
return BPF_PROG_RUN(info->filter, skb);
}
+static bool bpf_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_bpf_info_v1 *info = par->matchinfo;
+
+ return !!bpf_prog_run_save_cb(info->filter, (struct sk_buff *) skb);
+}
+
static void bpf_mt_destroy(const struct xt_mtdtor_param *par)
{
const struct xt_bpf_info *info = par->matchinfo;
+
+ bpf_prog_destroy(info->filter);
+}
+
+static void bpf_mt_destroy_v1(const struct xt_mtdtor_param *par)
+{
+ const struct xt_bpf_info_v1 *info = par->matchinfo;
+
bpf_prog_destroy(info->filter);
}
-static struct xt_match bpf_mt_reg __read_mostly = {
- .name = "bpf",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .checkentry = bpf_mt_check,
- .match = bpf_mt,
- .destroy = bpf_mt_destroy,
- .matchsize = sizeof(struct xt_bpf_info),
- .me = THIS_MODULE,
+static struct xt_match bpf_mt_reg[] __read_mostly = {
+ {
+ .name = "bpf",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = bpf_mt_check,
+ .match = bpf_mt,
+ .destroy = bpf_mt_destroy,
+ .matchsize = sizeof(struct xt_bpf_info),
+ .usersize = offsetof(struct xt_bpf_info, filter),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "bpf",
+ .revision = 1,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = bpf_mt_check_v1,
+ .match = bpf_mt_v1,
+ .destroy = bpf_mt_destroy_v1,
+ .matchsize = sizeof(struct xt_bpf_info_v1),
+ .usersize = offsetof(struct xt_bpf_info_v1, filter),
+ .me = THIS_MODULE,
+ },
};
static int __init bpf_mt_init(void)
{
- return xt_register_match(&bpf_mt_reg);
+ return xt_register_matches(bpf_mt_reg, ARRAY_SIZE(bpf_mt_reg));
}
static void __exit bpf_mt_exit(void)
{
- xt_unregister_match(&bpf_mt_reg);
+ xt_unregister_matches(bpf_mt_reg, ARRAY_SIZE(bpf_mt_reg));
}
module_init(bpf_mt_init);
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index a1d126f29463..54eaeb45ce99 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -42,7 +42,8 @@ cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (skb->sk == NULL || !sk_fullsock(skb->sk))
return false;
- return (info->id == skb->sk->sk_classid) ^ info->invert;
+ return (info->id == sock_cgroup_classid(&skb->sk->sk_cgrp_data)) ^
+ info->invert;
}
static struct xt_match cgroup_mt_reg __read_mostly = {
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 99bbc829868d..aab11f7ab547 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -437,6 +437,7 @@ static struct xt_match connlimit_mt_reg __read_mostly = {
.checkentry = connlimit_mt_check,
.match = connlimit_mt,
.matchsize = sizeof(struct xt_connlimit_info),
+ .usersize = offsetof(struct xt_connlimit_info, data),
.destroy = connlimit_mt_destroy,
.me = THIS_MODULE,
};
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 7381be0cdcdf..d893cc133de4 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -726,6 +726,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
.family = NFPROTO_IPV4,
.match = hashlimit_mt,
.matchsize = sizeof(struct xt_hashlimit_mtinfo1),
+ .usersize = offsetof(struct xt_hashlimit_mtinfo1, hinfo),
.checkentry = hashlimit_mt_check,
.destroy = hashlimit_mt_destroy,
.me = THIS_MODULE,
@@ -737,6 +738,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
.family = NFPROTO_IPV6,
.match = hashlimit_mt,
.matchsize = sizeof(struct xt_hashlimit_mtinfo1),
+ .usersize = offsetof(struct xt_hashlimit_mtinfo1, hinfo),
.checkentry = hashlimit_mt_check,
.destroy = hashlimit_mt_destroy,
.me = THIS_MODULE,
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index bef850596558..e84de7656289 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -193,6 +193,7 @@ static struct xt_match limit_mt_reg __read_mostly = {
.compat_from_user = limit_mt_compat_from_user,
.compat_to_user = limit_mt_compat_to_user,
#endif
+ .usersize = offsetof(struct xt_rateinfo, prev),
.me = THIS_MODULE,
};
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index 3048a7e3a90a..e9adf6ebca30 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -62,6 +62,7 @@ static struct xt_match nfacct_mt_reg __read_mostly = {
.match = nfacct_mt,
.destroy = nfacct_mt_destroy,
.matchsize = sizeof(struct xt_nfacct_match_info),
+ .usersize = offsetof(struct xt_nfacct_match_info, nfacct),
.me = THIS_MODULE,
};
diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c
deleted file mode 100644
index e82524b21d07..000000000000
--- a/net/netfilter/xt_qtaguid.c
+++ /dev/null
@@ -1,3015 +0,0 @@
-/*
- * Kernel iptables module to track stats for packets based on user tags.
- *
- * (C) 2011 Google, Inc
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * There are run-time debug flags enabled via the debug_mask module param, or
- * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
- */
-#define DEBUG
-
-#include <linux/file.h>
-#include <linux/inetdevice.h>
-#include <linux/module.h>
-#include <linux/miscdevice.h>
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter/xt_qtaguid.h>
-#include <linux/ratelimit.h>
-#include <linux/seq_file.h>
-#include <linux/skbuff.h>
-#include <linux/workqueue.h>
-#include <net/addrconf.h>
-#include <net/sock.h>
-#include <net/tcp.h>
-#include <net/udp.h>
-
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#endif
-
-#include <linux/netfilter/xt_socket.h>
-#include "xt_qtaguid_internal.h"
-#include "xt_qtaguid_print.h"
-#include "../../fs/proc/internal.h"
-
-/*
- * We only use the xt_socket funcs within a similar context to avoid unexpected
- * return values.
- */
-#define XT_SOCKET_SUPPORTED_HOOKS \
- ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
-
-
-static const char *module_procdirname = "xt_qtaguid";
-static struct proc_dir_entry *xt_qtaguid_procdir;
-
-static unsigned int proc_iface_perms = S_IRUGO;
-module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
-
-static struct proc_dir_entry *xt_qtaguid_stats_file;
-static unsigned int proc_stats_perms = S_IRUGO;
-module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
-
-static struct proc_dir_entry *xt_qtaguid_ctrl_file;
-
-/* Everybody can write. But proc_ctrl_write_limited is true by default which
- * limits what can be controlled. See the can_*() functions.
- */
-static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
-module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
-
-/* Limited by default, so the gid of the ctrl and stats proc entries
- * will limit what can be done. See the can_*() functions.
- */
-static bool proc_stats_readall_limited = true;
-static bool proc_ctrl_write_limited = true;
-
-module_param_named(stats_readall_limited, proc_stats_readall_limited, bool,
- S_IRUGO | S_IWUSR);
-module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool,
- S_IRUGO | S_IWUSR);
-
-/*
- * Limit the number of active tags (via socket tags) for a given UID.
- * Multiple processes could share the UID.
- */
-static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
-module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
-
-/*
- * After the kernel has initiallized this module, it is still possible
- * to make it passive.
- * Setting passive to Y:
- * - the iface stats handling will not act on notifications.
- * - iptables matches will never match.
- * - ctrl commands silently succeed.
- * - stats are always empty.
- * This is mostly usefull when a bug is suspected.
- */
-static bool module_passive;
-module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
-
-/*
- * Control how qtaguid data is tracked per proc/uid.
- * Setting tag_tracking_passive to Y:
- * - don't create proc specific structs to track tags
- * - don't check that active tag stats exceed some limits.
- * - don't clean up socket tags on process exits.
- * This is mostly usefull when a bug is suspected.
- */
-static bool qtu_proc_handling_passive;
-module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
- S_IRUGO | S_IWUSR);
-
-#define QTU_DEV_NAME "xt_qtaguid"
-
-uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
-module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
-
-/*---------------------------------------------------------------------------*/
-static const char *iface_stat_procdirname = "iface_stat";
-static struct proc_dir_entry *iface_stat_procdir;
-/*
- * The iface_stat_all* will go away once userspace gets use to the new fields
- * that have a format line.
- */
-static const char *iface_stat_all_procfilename = "iface_stat_all";
-static struct proc_dir_entry *iface_stat_all_procfile;
-static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
-static struct proc_dir_entry *iface_stat_fmt_procfile;
-
-
-static LIST_HEAD(iface_stat_list);
-static DEFINE_SPINLOCK(iface_stat_list_lock);
-
-static struct rb_root sock_tag_tree = RB_ROOT;
-static DEFINE_SPINLOCK(sock_tag_list_lock);
-
-static struct rb_root tag_counter_set_tree = RB_ROOT;
-static DEFINE_SPINLOCK(tag_counter_set_list_lock);
-
-static struct rb_root uid_tag_data_tree = RB_ROOT;
-static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
-
-static struct rb_root proc_qtu_data_tree = RB_ROOT;
-/* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
-
-static struct qtaguid_event_counts qtu_events;
-/*----------------------------------------------*/
-static bool can_manipulate_uids(void)
-{
- /* root pwnd */
- return in_egroup_p(xt_qtaguid_ctrl_file->gid)
- || unlikely(!from_kuid(&init_user_ns, current_fsuid())) || unlikely(!proc_ctrl_write_limited)
- || unlikely(uid_eq(current_fsuid(), xt_qtaguid_ctrl_file->uid));
-}
-
-static bool can_impersonate_uid(kuid_t uid)
-{
- return uid_eq(uid, current_fsuid()) || can_manipulate_uids();
-}
-
-static bool can_read_other_uid_stats(kuid_t uid)
-{
- /* root pwnd */
- return in_egroup_p(xt_qtaguid_stats_file->gid)
- || unlikely(!from_kuid(&init_user_ns, current_fsuid())) || uid_eq(uid, current_fsuid())
- || unlikely(!proc_stats_readall_limited)
- || unlikely(uid_eq(current_fsuid(), xt_qtaguid_ctrl_file->uid));
-}
-
-static inline void dc_add_byte_packets(struct data_counters *counters, int set,
- enum ifs_tx_rx direction,
- enum ifs_proto ifs_proto,
- int bytes,
- int packets)
-{
- counters->bpc[set][direction][ifs_proto].bytes += bytes;
- counters->bpc[set][direction][ifs_proto].packets += packets;
-}
-
-static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
-{
- struct rb_node *node = root->rb_node;
-
- while (node) {
- struct tag_node *data = rb_entry(node, struct tag_node, node);
- int result;
- RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
- " node=%p data=%p\n", tag, node, data);
- result = tag_compare(tag, data->tag);
- RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
- " data.tag=0x%llx (uid=%u) res=%d\n",
- tag, data->tag, get_uid_from_tag(data->tag), result);
- if (result < 0)
- node = node->rb_left;
- else if (result > 0)
- node = node->rb_right;
- else
- return data;
- }
- return NULL;
-}
-
-static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
-{
- struct rb_node **new = &(root->rb_node), *parent = NULL;
-
- /* Figure out where to put new node */
- while (*new) {
- struct tag_node *this = rb_entry(*new, struct tag_node,
- node);
- int result = tag_compare(data->tag, this->tag);
- RB_DEBUG("qtaguid: %s(): tag=0x%llx"
- " (uid=%u)\n", __func__,
- this->tag,
- get_uid_from_tag(this->tag));
- parent = *new;
- if (result < 0)
- new = &((*new)->rb_left);
- else if (result > 0)
- new = &((*new)->rb_right);
- else
- BUG();
- }
-
- /* Add new node and rebalance tree. */
- rb_link_node(&data->node, parent, new);
- rb_insert_color(&data->node, root);
-}
-
-static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
-{
- tag_node_tree_insert(&data->tn, root);
-}
-
-static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
-{
- struct tag_node *node = tag_node_tree_search(root, tag);
- if (!node)
- return NULL;
- return rb_entry(&node->node, struct tag_stat, tn.node);
-}
-
-static void tag_counter_set_tree_insert(struct tag_counter_set *data,
- struct rb_root *root)
-{
- tag_node_tree_insert(&data->tn, root);
-}
-
-static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
- tag_t tag)
-{
- struct tag_node *node = tag_node_tree_search(root, tag);
- if (!node)
- return NULL;
- return rb_entry(&node->node, struct tag_counter_set, tn.node);
-
-}
-
-static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
-{
- tag_node_tree_insert(&data->tn, root);
-}
-
-static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
-{
- struct tag_node *node = tag_node_tree_search(root, tag);
- if (!node)
- return NULL;
- return rb_entry(&node->node, struct tag_ref, tn.node);
-}
-
-static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
- const struct sock *sk)
-{
- struct rb_node *node = root->rb_node;
-
- while (node) {
- struct sock_tag *data = rb_entry(node, struct sock_tag,
- sock_node);
- if (sk < data->sk)
- node = node->rb_left;
- else if (sk > data->sk)
- node = node->rb_right;
- else
- return data;
- }
- return NULL;
-}
-
-static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
-{
- struct rb_node **new = &(root->rb_node), *parent = NULL;
-
- /* Figure out where to put new node */
- while (*new) {
- struct sock_tag *this = rb_entry(*new, struct sock_tag,
- sock_node);
- parent = *new;
- if (data->sk < this->sk)
- new = &((*new)->rb_left);
- else if (data->sk > this->sk)
- new = &((*new)->rb_right);
- else
- BUG();
- }
-
- /* Add new node and rebalance tree. */
- rb_link_node(&data->sock_node, parent, new);
- rb_insert_color(&data->sock_node, root);
-}
-
-static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
-{
- struct rb_node *node;
- struct sock_tag *st_entry;
-
- node = rb_first(st_to_free_tree);
- while (node) {
- st_entry = rb_entry(node, struct sock_tag, sock_node);
- node = rb_next(node);
- CT_DEBUG("qtaguid: %s(): "
- "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
- st_entry->sk,
- st_entry->tag,
- get_uid_from_tag(st_entry->tag));
- rb_erase(&st_entry->sock_node, st_to_free_tree);
- sock_put(st_entry->sk);
- kfree(st_entry);
- }
-}
-
-static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
- const pid_t pid)
-{
- struct rb_node *node = root->rb_node;
-
- while (node) {
- struct proc_qtu_data *data = rb_entry(node,
- struct proc_qtu_data,
- node);
- if (pid < data->pid)
- node = node->rb_left;
- else if (pid > data->pid)
- node = node->rb_right;
- else
- return data;
- }
- return NULL;
-}
-
-static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
- struct rb_root *root)
-{
- struct rb_node **new = &(root->rb_node), *parent = NULL;
-
- /* Figure out where to put new node */
- while (*new) {
- struct proc_qtu_data *this = rb_entry(*new,
- struct proc_qtu_data,
- node);
- parent = *new;
- if (data->pid < this->pid)
- new = &((*new)->rb_left);
- else if (data->pid > this->pid)
- new = &((*new)->rb_right);
- else
- BUG();
- }
-
- /* Add new node and rebalance tree. */
- rb_link_node(&data->node, parent, new);
- rb_insert_color(&data->node, root);
-}
-
-static void uid_tag_data_tree_insert(struct uid_tag_data *data,
- struct rb_root *root)
-{
- struct rb_node **new = &(root->rb_node), *parent = NULL;
-
- /* Figure out where to put new node */
- while (*new) {
- struct uid_tag_data *this = rb_entry(*new,
- struct uid_tag_data,
- node);
- parent = *new;
- if (data->uid < this->uid)
- new = &((*new)->rb_left);
- else if (data->uid > this->uid)
- new = &((*new)->rb_right);
- else
- BUG();
- }
-
- /* Add new node and rebalance tree. */
- rb_link_node(&data->node, parent, new);
- rb_insert_color(&data->node, root);
-}
-
-static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
- uid_t uid)
-{
- struct rb_node *node = root->rb_node;
-
- while (node) {
- struct uid_tag_data *data = rb_entry(node,
- struct uid_tag_data,
- node);
- if (uid < data->uid)
- node = node->rb_left;
- else if (uid > data->uid)
- node = node->rb_right;
- else
- return data;
- }
- return NULL;
-}
-
-/*
- * Allocates a new uid_tag_data struct if needed.
- * Returns a pointer to the found or allocated uid_tag_data.
- * Returns a PTR_ERR on failures, and lock is not held.
- * If found is not NULL:
- * sets *found to true if not allocated.
- * sets *found to false if allocated.
- */
-struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
-{
- struct uid_tag_data *utd_entry;
-
- /* Look for top level uid_tag_data for the UID */
- utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
- DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
-
- if (found_res)
- *found_res = utd_entry;
- if (utd_entry)
- return utd_entry;
-
- utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
- if (!utd_entry) {
- pr_err("qtaguid: get_uid_data(%u): "
- "tag data alloc failed\n", uid);
- return ERR_PTR(-ENOMEM);
- }
-
- utd_entry->uid = uid;
- utd_entry->tag_ref_tree = RB_ROOT;
- uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
- DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
- return utd_entry;
-}
-
-/* Never returns NULL. Either PTR_ERR or a valid ptr. */
-static struct tag_ref *new_tag_ref(tag_t new_tag,
- struct uid_tag_data *utd_entry)
-{
- struct tag_ref *tr_entry;
- int res;
-
- if (utd_entry->num_active_tags + 1 > max_sock_tags) {
- pr_info("qtaguid: new_tag_ref(0x%llx): "
- "tag ref alloc quota exceeded. max=%d\n",
- new_tag, max_sock_tags);
- res = -EMFILE;
- goto err_res;
-
- }
-
- tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
- if (!tr_entry) {
- pr_err("qtaguid: new_tag_ref(0x%llx): "
- "tag ref alloc failed\n",
- new_tag);
- res = -ENOMEM;
- goto err_res;
- }
- tr_entry->tn.tag = new_tag;
- /* tr_entry->num_sock_tags handled by caller */
- utd_entry->num_active_tags++;
- tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
- DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
- " inserted new tag ref %p\n",
- new_tag, tr_entry);
- return tr_entry;
-
-err_res:
- return ERR_PTR(res);
-}
-
-static struct tag_ref *lookup_tag_ref(tag_t full_tag,
- struct uid_tag_data **utd_res)
-{
- struct uid_tag_data *utd_entry;
- struct tag_ref *tr_entry;
- bool found_utd;
- uid_t uid = get_uid_from_tag(full_tag);
-
- DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
- full_tag, uid);
-
- utd_entry = get_uid_data(uid, &found_utd);
- if (IS_ERR_OR_NULL(utd_entry)) {
- if (utd_res)
- *utd_res = utd_entry;
- return NULL;
- }
-
- tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
- if (utd_res)
- *utd_res = utd_entry;
- DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
- full_tag, utd_entry, tr_entry);
- return tr_entry;
-}
-
-/* Never returns NULL. Either PTR_ERR or a valid ptr. */
-static struct tag_ref *get_tag_ref(tag_t full_tag,
- struct uid_tag_data **utd_res)
-{
- struct uid_tag_data *utd_entry;
- struct tag_ref *tr_entry;
-
- DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
- full_tag);
- tr_entry = lookup_tag_ref(full_tag, &utd_entry);
- BUG_ON(IS_ERR_OR_NULL(utd_entry));
- if (!tr_entry)
- tr_entry = new_tag_ref(full_tag, utd_entry);
-
- if (utd_res)
- *utd_res = utd_entry;
- DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
- full_tag, utd_entry, tr_entry);
- return tr_entry;
-}
-
-/* Checks and maybe frees the UID Tag Data entry */
-static void put_utd_entry(struct uid_tag_data *utd_entry)
-{
- /* Are we done with the UID tag data entry? */
- if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
- !utd_entry->num_pqd) {
- DR_DEBUG("qtaguid: %s(): "
- "erase utd_entry=%p uid=%u "
- "by pid=%u tgid=%u uid=%u\n", __func__,
- utd_entry, utd_entry->uid,
- current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
- BUG_ON(utd_entry->num_active_tags);
- rb_erase(&utd_entry->node, &uid_tag_data_tree);
- kfree(utd_entry);
- } else {
- DR_DEBUG("qtaguid: %s(): "
- "utd_entry=%p still has %d tags %d proc_qtu_data\n",
- __func__, utd_entry, utd_entry->num_active_tags,
- utd_entry->num_pqd);
- BUG_ON(!(utd_entry->num_active_tags ||
- utd_entry->num_pqd));
- }
-}
-
-/*
- * If no sock_tags are using this tag_ref,
- * decrements refcount of utd_entry, removes tr_entry
- * from utd_entry->tag_ref_tree and frees.
- */
-static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
- struct uid_tag_data *utd_entry)
-{
- DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
- tr_entry, tr_entry->tn.tag,
- get_uid_from_tag(tr_entry->tn.tag));
- if (!tr_entry->num_sock_tags) {
- BUG_ON(!utd_entry->num_active_tags);
- utd_entry->num_active_tags--;
- rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
- DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
- kfree(tr_entry);
- }
-}
-
-static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
-{
- struct rb_node *node;
- struct tag_ref *tr_entry;
- tag_t acct_tag;
-
- DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
- full_tag, get_uid_from_tag(full_tag));
- acct_tag = get_atag_from_tag(full_tag);
- node = rb_first(&utd_entry->tag_ref_tree);
- while (node) {
- tr_entry = rb_entry(node, struct tag_ref, tn.node);
- node = rb_next(node);
- if (!acct_tag || tr_entry->tn.tag == full_tag)
- free_tag_ref_from_utd_entry(tr_entry, utd_entry);
- }
-}
-
-static ssize_t read_proc_u64(struct file *file, char __user *buf,
- size_t size, loff_t *ppos)
-{
- uint64_t *valuep = PDE_DATA(file_inode(file));
- char tmp[24];
- size_t tmp_size;
-
- tmp_size = scnprintf(tmp, sizeof(tmp), "%llu\n", *valuep);
- return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
-}
-
-static ssize_t read_proc_bool(struct file *file, char __user *buf,
- size_t size, loff_t *ppos)
-{
- bool *valuep = PDE_DATA(file_inode(file));
- char tmp[24];
- size_t tmp_size;
-
- tmp_size = scnprintf(tmp, sizeof(tmp), "%u\n", *valuep);
- return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
-}
-
-static int get_active_counter_set(tag_t tag)
-{
- int active_set = 0;
- struct tag_counter_set *tcs;
-
- MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
- " (uid=%u)\n",
- tag, get_uid_from_tag(tag));
- /* For now we only handle UID tags for active sets */
- tag = get_utag_from_tag(tag);
- spin_lock_bh(&tag_counter_set_list_lock);
- tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
- if (tcs)
- active_set = tcs->active_set;
- spin_unlock_bh(&tag_counter_set_list_lock);
- return active_set;
-}
-
-/*
- * Find the entry for tracking the specified interface.
- * Caller must hold iface_stat_list_lock
- */
-static struct iface_stat *get_iface_entry(const char *ifname)
-{
- struct iface_stat *iface_entry;
-
- /* Find the entry for tracking the specified tag within the interface */
- if (ifname == NULL) {
- pr_info("qtaguid: iface_stat: get() NULL device name\n");
- return NULL;
- }
-
- /* Iterate over interfaces */
- list_for_each_entry(iface_entry, &iface_stat_list, list) {
- if (!strcmp(ifname, iface_entry->ifname))
- goto done;
- }
- iface_entry = NULL;
-done:
- return iface_entry;
-}
-
-/* This is for fmt2 only */
-static void pp_iface_stat_header(struct seq_file *m)
-{
- seq_puts(m,
- "ifname "
- "total_skb_rx_bytes total_skb_rx_packets "
- "total_skb_tx_bytes total_skb_tx_packets "
- "rx_tcp_bytes rx_tcp_packets "
- "rx_udp_bytes rx_udp_packets "
- "rx_other_bytes rx_other_packets "
- "tx_tcp_bytes tx_tcp_packets "
- "tx_udp_bytes tx_udp_packets "
- "tx_other_bytes tx_other_packets\n"
- );
-}
-
-static void pp_iface_stat_line(struct seq_file *m,
- struct iface_stat *iface_entry)
-{
- struct data_counters *cnts;
- int cnt_set = 0; /* We only use one set for the device */
- cnts = &iface_entry->totals_via_skb;
- seq_printf(m, "%s %llu %llu %llu %llu %llu %llu %llu %llu "
- "%llu %llu %llu %llu %llu %llu %llu %llu\n",
- iface_entry->ifname,
- dc_sum_bytes(cnts, cnt_set, IFS_RX),
- dc_sum_packets(cnts, cnt_set, IFS_RX),
- dc_sum_bytes(cnts, cnt_set, IFS_TX),
- dc_sum_packets(cnts, cnt_set, IFS_TX),
- cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
- cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
- cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
- cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
- cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
- cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
- cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
- cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
- cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
- cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
- cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
- cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
-}
-
-struct proc_iface_stat_fmt_info {
- int fmt;
-};
-
-static void *iface_stat_fmt_proc_start(struct seq_file *m, loff_t *pos)
-{
- struct proc_iface_stat_fmt_info *p = m->private;
- loff_t n = *pos;
-
- /*
- * This lock will prevent iface_stat_update() from changing active,
- * and in turn prevent an interface from unregistering itself.
- */
- spin_lock_bh(&iface_stat_list_lock);
-
- if (unlikely(module_passive))
- return NULL;
-
- if (!n && p->fmt == 2)
- pp_iface_stat_header(m);
-
- return seq_list_start(&iface_stat_list, n);
-}
-
-static void *iface_stat_fmt_proc_next(struct seq_file *m, void *p, loff_t *pos)
-{
- return seq_list_next(p, &iface_stat_list, pos);
-}
-
-static void iface_stat_fmt_proc_stop(struct seq_file *m, void *p)
-{
- spin_unlock_bh(&iface_stat_list_lock);
-}
-
-static int iface_stat_fmt_proc_show(struct seq_file *m, void *v)
-{
- struct proc_iface_stat_fmt_info *p = m->private;
- struct iface_stat *iface_entry;
- struct rtnl_link_stats64 dev_stats, *stats;
- struct rtnl_link_stats64 no_dev_stats = {0};
-
-
- CT_DEBUG("qtaguid:proc iface_stat_fmt pid=%u tgid=%u uid=%u\n",
- current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
-
- iface_entry = list_entry(v, struct iface_stat, list);
-
- if (iface_entry->active) {
- stats = dev_get_stats(iface_entry->net_dev,
- &dev_stats);
- } else {
- stats = &no_dev_stats;
- }
- /*
- * If the meaning of the data changes, then update the fmtX
- * string.
- */
- if (p->fmt == 1) {
- seq_printf(m, "%s %d %llu %llu %llu %llu %llu %llu %llu %llu\n",
- iface_entry->ifname,
- iface_entry->active,
- iface_entry->totals_via_dev[IFS_RX].bytes,
- iface_entry->totals_via_dev[IFS_RX].packets,
- iface_entry->totals_via_dev[IFS_TX].bytes,
- iface_entry->totals_via_dev[IFS_TX].packets,
- stats->rx_bytes, stats->rx_packets,
- stats->tx_bytes, stats->tx_packets
- );
- } else {
- pp_iface_stat_line(m, iface_entry);
- }
- return 0;
-}
-
-static const struct file_operations read_u64_fops = {
- .read = read_proc_u64,
- .llseek = default_llseek,
-};
-
-static const struct file_operations read_bool_fops = {
- .read = read_proc_bool,
- .llseek = default_llseek,
-};
-
-static void iface_create_proc_worker(struct work_struct *work)
-{
- struct proc_dir_entry *proc_entry;
- struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
- iface_work);
- struct iface_stat *new_iface = isw->iface_entry;
-
- /* iface_entries are not deleted, so safe to manipulate. */
- proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
- if (IS_ERR_OR_NULL(proc_entry)) {
- pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
- kfree(isw);
- return;
- }
-
- new_iface->proc_ptr = proc_entry;
-
- proc_create_data("tx_bytes", proc_iface_perms, proc_entry,
- &read_u64_fops,
- &new_iface->totals_via_dev[IFS_TX].bytes);
- proc_create_data("rx_bytes", proc_iface_perms, proc_entry,
- &read_u64_fops,
- &new_iface->totals_via_dev[IFS_RX].bytes);
- proc_create_data("tx_packets", proc_iface_perms, proc_entry,
- &read_u64_fops,
- &new_iface->totals_via_dev[IFS_TX].packets);
- proc_create_data("rx_packets", proc_iface_perms, proc_entry,
- &read_u64_fops,
- &new_iface->totals_via_dev[IFS_RX].packets);
- proc_create_data("active", proc_iface_perms, proc_entry,
- &read_bool_fops, &new_iface->active);
-
- IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
- "entry=%p dev=%s\n", new_iface, new_iface->ifname);
- kfree(isw);
-}
-
-/*
- * Will set the entry's active state, and
- * update the net_dev accordingly also.
- */
-static void _iface_stat_set_active(struct iface_stat *entry,
- struct net_device *net_dev,
- bool activate)
-{
- if (activate) {
- entry->net_dev = net_dev;
- entry->active = true;
- IF_DEBUG("qtaguid: %s(%s): "
- "enable tracking. rfcnt=%d\n", __func__,
- entry->ifname,
- __this_cpu_read(*net_dev->pcpu_refcnt));
- } else {
- entry->active = false;
- entry->net_dev = NULL;
- IF_DEBUG("qtaguid: %s(%s): "
- "disable tracking. rfcnt=%d\n", __func__,
- entry->ifname,
- __this_cpu_read(*net_dev->pcpu_refcnt));
-
- }
-}
-
-/* Caller must hold iface_stat_list_lock */
-static struct iface_stat *iface_alloc(struct net_device *net_dev)
-{
- struct iface_stat *new_iface;
- struct iface_stat_work *isw;
-
- new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
- if (new_iface == NULL) {
- pr_err("qtaguid: iface_stat: create(%s): "
- "iface_stat alloc failed\n", net_dev->name);
- return NULL;
- }
- new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
- if (new_iface->ifname == NULL) {
- pr_err("qtaguid: iface_stat: create(%s): "
- "ifname alloc failed\n", net_dev->name);
- kfree(new_iface);
- return NULL;
- }
- spin_lock_init(&new_iface->tag_stat_list_lock);
- new_iface->tag_stat_tree = RB_ROOT;
- _iface_stat_set_active(new_iface, net_dev, true);
-
- /*
- * ipv6 notifier chains are atomic :(
- * No create_proc_read_entry() for you!
- */
- isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
- if (!isw) {
- pr_err("qtaguid: iface_stat: create(%s): "
- "work alloc failed\n", new_iface->ifname);
- _iface_stat_set_active(new_iface, net_dev, false);
- kfree(new_iface->ifname);
- kfree(new_iface);
- return NULL;
- }
- isw->iface_entry = new_iface;
- INIT_WORK(&isw->iface_work, iface_create_proc_worker);
- schedule_work(&isw->iface_work);
- list_add(&new_iface->list, &iface_stat_list);
- return new_iface;
-}
-
-static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
- struct iface_stat *iface)
-{
- struct rtnl_link_stats64 dev_stats, *stats;
- bool stats_rewound;
-
- stats = dev_get_stats(net_dev, &dev_stats);
- /* No empty packets */
- stats_rewound =
- (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
- || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
-
- IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
- "bytes rx/tx=%llu/%llu "
- "active=%d last_known=%d "
- "stats_rewound=%d\n", __func__,
- net_dev ? net_dev->name : "?",
- iface, net_dev,
- stats->rx_bytes, stats->tx_bytes,
- iface->active, iface->last_known_valid, stats_rewound);
-
- if (iface->active && iface->last_known_valid && stats_rewound) {
- pr_warn_once("qtaguid: iface_stat: %s(%s): "
- "iface reset its stats unexpectedly\n", __func__,
- net_dev->name);
-
- iface->totals_via_dev[IFS_TX].bytes +=
- iface->last_known[IFS_TX].bytes;
- iface->totals_via_dev[IFS_TX].packets +=
- iface->last_known[IFS_TX].packets;
- iface->totals_via_dev[IFS_RX].bytes +=
- iface->last_known[IFS_RX].bytes;
- iface->totals_via_dev[IFS_RX].packets +=
- iface->last_known[IFS_RX].packets;
- iface->last_known_valid = false;
- IF_DEBUG("qtaguid: %s(%s): iface=%p "
- "used last known bytes rx/tx=%llu/%llu\n", __func__,
- iface->ifname, iface, iface->last_known[IFS_RX].bytes,
- iface->last_known[IFS_TX].bytes);
- }
-}
-
-/*
- * Create a new entry for tracking the specified interface.
- * Do nothing if the entry already exists.
- * Called when an interface is configured with a valid IP address.
- */
-static void iface_stat_create(struct net_device *net_dev,
- struct in_ifaddr *ifa)
-{
- struct in_device *in_dev = NULL;
- const char *ifname;
- struct iface_stat *entry;
- __be32 ipaddr = 0;
- struct iface_stat *new_iface;
-
- IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
- net_dev ? net_dev->name : "?",
- ifa, net_dev);
- if (!net_dev) {
- pr_err("qtaguid: iface_stat: create(): no net dev\n");
- return;
- }
-
- ifname = net_dev->name;
- if (!ifa) {
- in_dev = in_dev_get(net_dev);
- if (!in_dev) {
- pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
- ifname);
- return;
- }
- IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
- ifname, in_dev);
- for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
- IF_DEBUG("qtaguid: iface_stat: create(%s): "
- "ifa=%p ifa_label=%s\n",
- ifname, ifa,
- ifa->ifa_label);
- if (!strcmp(ifname, ifa->ifa_label))
- break;
- }
- }
-
- if (!ifa) {
- IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
- ifname);
- goto done_put;
- }
- ipaddr = ifa->ifa_local;
-
- spin_lock_bh(&iface_stat_list_lock);
- entry = get_iface_entry(ifname);
- if (entry != NULL) {
- IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
- ifname, entry);
- iface_check_stats_reset_and_adjust(net_dev, entry);
- _iface_stat_set_active(entry, net_dev, true);
- IF_DEBUG("qtaguid: %s(%s): "
- "tracking now %d on ip=%pI4\n", __func__,
- entry->ifname, true, &ipaddr);
- goto done_unlock_put;
- }
-
- new_iface = iface_alloc(net_dev);
- IF_DEBUG("qtaguid: iface_stat: create(%s): done "
- "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
-done_unlock_put:
- spin_unlock_bh(&iface_stat_list_lock);
-done_put:
- if (in_dev)
- in_dev_put(in_dev);
-}
-
-static void iface_stat_create_ipv6(struct net_device *net_dev,
- struct inet6_ifaddr *ifa)
-{
- struct in_device *in_dev;
- const char *ifname;
- struct iface_stat *entry;
- struct iface_stat *new_iface;
- int addr_type;
-
- IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
- ifa, net_dev, net_dev ? net_dev->name : "");
- if (!net_dev) {
- pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
- return;
- }
- ifname = net_dev->name;
-
- in_dev = in_dev_get(net_dev);
- if (!in_dev) {
- pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
- ifname);
- return;
- }
-
- IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
- ifname, in_dev);
-
- if (!ifa) {
- IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
- ifname);
- goto done_put;
- }
- addr_type = ipv6_addr_type(&ifa->addr);
-
- spin_lock_bh(&iface_stat_list_lock);
- entry = get_iface_entry(ifname);
- if (entry != NULL) {
- IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
- ifname, entry);
- iface_check_stats_reset_and_adjust(net_dev, entry);
- _iface_stat_set_active(entry, net_dev, true);
- IF_DEBUG("qtaguid: %s(%s): "
- "tracking now %d on ip=%pI6c\n", __func__,
- entry->ifname, true, &ifa->addr);
- goto done_unlock_put;
- }
-
- new_iface = iface_alloc(net_dev);
- IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
- "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
-
-done_unlock_put:
- spin_unlock_bh(&iface_stat_list_lock);
-done_put:
- in_dev_put(in_dev);
-}
-
-static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
-{
- MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
- return sock_tag_tree_search(&sock_tag_tree, sk);
-}
-
-static struct sock_tag *get_sock_stat(const struct sock *sk)
-{
- struct sock_tag *sock_tag_entry;
- MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
- if (!sk)
- return NULL;
- spin_lock_bh(&sock_tag_list_lock);
- sock_tag_entry = get_sock_stat_nl(sk);
- spin_unlock_bh(&sock_tag_list_lock);
- return sock_tag_entry;
-}
-
-static int ipx_proto(const struct sk_buff *skb,
- struct xt_action_param *par)
-{
- int thoff = 0, tproto;
-
- switch (par->family) {
- case NFPROTO_IPV6:
- tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
- if (tproto < 0)
- MT_DEBUG("%s(): transport header not found in ipv6"
- " skb=%p\n", __func__, skb);
- break;
- case NFPROTO_IPV4:
- tproto = ip_hdr(skb)->protocol;
- break;
- default:
- tproto = IPPROTO_RAW;
- }
- return tproto;
-}
-
-static void
-data_counters_update(struct data_counters *dc, int set,
- enum ifs_tx_rx direction, int proto, int bytes)
-{
- switch (proto) {
- case IPPROTO_TCP:
- dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
- break;
- case IPPROTO_UDP:
- dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
- break;
- case IPPROTO_IP:
- default:
- dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
- 1);
- break;
- }
-}
-
-/*
- * Update stats for the specified interface. Do nothing if the entry
- * does not exist (when a device was never configured with an IP address).
- * Called when an device is being unregistered.
- */
-static void iface_stat_update(struct net_device *net_dev, bool stash_only)
-{
- struct rtnl_link_stats64 dev_stats, *stats;
- struct iface_stat *entry;
-
- stats = dev_get_stats(net_dev, &dev_stats);
- spin_lock_bh(&iface_stat_list_lock);
- entry = get_iface_entry(net_dev->name);
- if (entry == NULL) {
- IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
- net_dev->name);
- spin_unlock_bh(&iface_stat_list_lock);
- return;
- }
-
- IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
- net_dev->name, entry);
- if (!entry->active) {
- IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
- net_dev->name);
- spin_unlock_bh(&iface_stat_list_lock);
- return;
- }
-
- if (stash_only) {
- entry->last_known[IFS_TX].bytes = stats->tx_bytes;
- entry->last_known[IFS_TX].packets = stats->tx_packets;
- entry->last_known[IFS_RX].bytes = stats->rx_bytes;
- entry->last_known[IFS_RX].packets = stats->rx_packets;
- entry->last_known_valid = true;
- IF_DEBUG("qtaguid: %s(%s): "
- "dev stats stashed rx/tx=%llu/%llu\n", __func__,
- net_dev->name, stats->rx_bytes, stats->tx_bytes);
- spin_unlock_bh(&iface_stat_list_lock);
- return;
- }
- entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
- entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
- entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
- entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
- /* We don't need the last_known[] anymore */
- entry->last_known_valid = false;
- _iface_stat_set_active(entry, net_dev, false);
- IF_DEBUG("qtaguid: %s(%s): "
- "disable tracking. rx/tx=%llu/%llu\n", __func__,
- net_dev->name, stats->rx_bytes, stats->tx_bytes);
- spin_unlock_bh(&iface_stat_list_lock);
-}
-
-/* Guarantied to return a net_device that has a name */
-static void get_dev_and_dir(const struct sk_buff *skb,
- struct xt_action_param *par,
- enum ifs_tx_rx *direction,
- const struct net_device **el_dev)
-{
- BUG_ON(!direction || !el_dev);
-
- if (par->in) {
- *el_dev = par->in;
- *direction = IFS_RX;
- } else if (par->out) {
- *el_dev = par->out;
- *direction = IFS_TX;
- } else {
- pr_err("qtaguid[%d]: %s(): no par->in/out?!!\n",
- par->hooknum, __func__);
- BUG();
- }
- if (skb->dev && *el_dev != skb->dev) {
- MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs par->%s=%p %s\n",
- par->hooknum, skb->dev, skb->dev->name,
- *direction == IFS_RX ? "in" : "out", *el_dev,
- (*el_dev)->name);
- }
-}
-
-/*
- * Update stats for the specified interface from the skb.
- * Do nothing if the entry
- * does not exist (when a device was never configured with an IP address).
- * Called on each sk.
- */
-static void iface_stat_update_from_skb(const struct sk_buff *skb,
- struct xt_action_param *par)
-{
- struct iface_stat *entry;
- const struct net_device *el_dev;
- enum ifs_tx_rx direction;
- int bytes = skb->len;
- int proto;
-
- get_dev_and_dir(skb, par, &direction, &el_dev);
- proto = ipx_proto(skb, par);
- MT_DEBUG("qtaguid[%d]: iface_stat: %s(%s): "
- "type=%d fam=%d proto=%d dir=%d\n",
- par->hooknum, __func__, el_dev->name, el_dev->type,
- par->family, proto, direction);
-
- spin_lock_bh(&iface_stat_list_lock);
- entry = get_iface_entry(el_dev->name);
- if (entry == NULL) {
- IF_DEBUG("qtaguid[%d]: iface_stat: %s(%s): not tracked\n",
- par->hooknum, __func__, el_dev->name);
- spin_unlock_bh(&iface_stat_list_lock);
- return;
- }
-
- IF_DEBUG("qtaguid[%d]: %s(%s): entry=%p\n", par->hooknum, __func__,
- el_dev->name, entry);
-
- data_counters_update(&entry->totals_via_skb, 0, direction, proto,
- bytes);
- spin_unlock_bh(&iface_stat_list_lock);
-}
-
-static void tag_stat_update(struct tag_stat *tag_entry,
- enum ifs_tx_rx direction, int proto, int bytes)
-{
- int active_set;
- active_set = get_active_counter_set(tag_entry->tn.tag);
- MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
- "dir=%d proto=%d bytes=%d)\n",
- tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
- active_set, direction, proto, bytes);
- data_counters_update(&tag_entry->counters, active_set, direction,
- proto, bytes);
- if (tag_entry->parent_counters)
- data_counters_update(tag_entry->parent_counters, active_set,
- direction, proto, bytes);
-}
-
-/*
- * Create a new entry for tracking the specified {acct_tag,uid_tag} within
- * the interface.
- * iface_entry->tag_stat_list_lock should be held.
- */
-static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
- tag_t tag)
-{
- struct tag_stat *new_tag_stat_entry = NULL;
- IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
- " (uid=%u)\n", __func__,
- iface_entry, tag, get_uid_from_tag(tag));
- new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
- if (!new_tag_stat_entry) {
- pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
- goto done;
- }
- new_tag_stat_entry->tn.tag = tag;
- tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
-done:
- return new_tag_stat_entry;
-}
-
-static void if_tag_stat_update(const char *ifname, uid_t uid,
- const struct sock *sk, enum ifs_tx_rx direction,
- int proto, int bytes)
-{
- struct tag_stat *tag_stat_entry;
- tag_t tag, acct_tag;
- tag_t uid_tag;
- struct data_counters *uid_tag_counters;
- struct sock_tag *sock_tag_entry;
- struct iface_stat *iface_entry;
- struct tag_stat *new_tag_stat = NULL;
- MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
- "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
- ifname, uid, sk, direction, proto, bytes);
-
- spin_lock_bh(&iface_stat_list_lock);
- iface_entry = get_iface_entry(ifname);
- if (!iface_entry) {
- pr_err_ratelimited("qtaguid: tag_stat: stat_update() "
- "%s not found\n", ifname);
- spin_unlock_bh(&iface_stat_list_lock);
- return;
- }
- /* It is ok to process data when an iface_entry is inactive */
-
- MT_DEBUG("qtaguid: tag_stat: stat_update() dev=%s entry=%p\n",
- ifname, iface_entry);
-
- /*
- * Look for a tagged sock.
- * It will have an acct_uid.
- */
- sock_tag_entry = get_sock_stat(sk);
- if (sock_tag_entry) {
- tag = sock_tag_entry->tag;
- acct_tag = get_atag_from_tag(tag);
- uid_tag = get_utag_from_tag(tag);
- } else {
- acct_tag = make_atag_from_value(0);
- tag = combine_atag_with_uid(acct_tag, uid);
- uid_tag = make_tag_from_uid(uid);
- }
- MT_DEBUG("qtaguid: tag_stat: stat_update(): "
- " looking for tag=0x%llx (uid=%u) in ife=%p\n",
- tag, get_uid_from_tag(tag), iface_entry);
- /* Loop over tag list under this interface for {acct_tag,uid_tag} */
- spin_lock_bh(&iface_entry->tag_stat_list_lock);
-
- tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
- tag);
- if (tag_stat_entry) {
- /*
- * Updating the {acct_tag, uid_tag} entry handles both stats:
- * {0, uid_tag} will also get updated.
- */
- tag_stat_update(tag_stat_entry, direction, proto, bytes);
- goto unlock;
- }
-
- /* Loop over tag list under this interface for {0,uid_tag} */
- tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
- uid_tag);
- if (!tag_stat_entry) {
- /* Here: the base uid_tag did not exist */
- /*
- * No parent counters. So
- * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
- */
- new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
- if (!new_tag_stat)
- goto unlock;
- uid_tag_counters = &new_tag_stat->counters;
- } else {
- uid_tag_counters = &tag_stat_entry->counters;
- }
-
- if (acct_tag) {
- /* Create the child {acct_tag, uid_tag} and hook up parent. */
- new_tag_stat = create_if_tag_stat(iface_entry, tag);
- if (!new_tag_stat)
- goto unlock;
- new_tag_stat->parent_counters = uid_tag_counters;
- } else {
- /*
- * For new_tag_stat to be still NULL here would require:
- * {0, uid_tag} exists
- * and {acct_tag, uid_tag} doesn't exist
- * AND acct_tag == 0.
- * Impossible. This reassures us that new_tag_stat
- * below will always be assigned.
- */
- BUG_ON(!new_tag_stat);
- }
- tag_stat_update(new_tag_stat, direction, proto, bytes);
-unlock:
- spin_unlock_bh(&iface_entry->tag_stat_list_lock);
- spin_unlock_bh(&iface_stat_list_lock);
-}
-
-static int iface_netdev_event_handler(struct notifier_block *nb,
- unsigned long event, void *ptr) {
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-
- if (unlikely(module_passive))
- return NOTIFY_DONE;
-
- IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
- "ev=0x%lx/%s netdev=%p->name=%s\n",
- event, netdev_evt_str(event), dev, dev ? dev->name : "");
-
- switch (event) {
- case NETDEV_UP:
- iface_stat_create(dev, NULL);
- atomic64_inc(&qtu_events.iface_events);
- break;
- case NETDEV_DOWN:
- case NETDEV_UNREGISTER:
- iface_stat_update(dev, event == NETDEV_DOWN);
- atomic64_inc(&qtu_events.iface_events);
- break;
- }
- return NOTIFY_DONE;
-}
-
-static int iface_inet6addr_event_handler(struct notifier_block *nb,
- unsigned long event, void *ptr)
-{
- struct inet6_ifaddr *ifa = ptr;
- struct net_device *dev;
-
- if (unlikely(module_passive))
- return NOTIFY_DONE;
-
- IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
- "ev=0x%lx/%s ifa=%p\n",
- event, netdev_evt_str(event), ifa);
-
- switch (event) {
- case NETDEV_UP:
- BUG_ON(!ifa || !ifa->idev);
- dev = (struct net_device *)ifa->idev->dev;
- iface_stat_create_ipv6(dev, ifa);
- atomic64_inc(&qtu_events.iface_events);
- break;
- case NETDEV_DOWN:
- case NETDEV_UNREGISTER:
- BUG_ON(!ifa || !ifa->idev);
- dev = (struct net_device *)ifa->idev->dev;
- iface_stat_update(dev, event == NETDEV_DOWN);
- atomic64_inc(&qtu_events.iface_events);
- break;
- }
- return NOTIFY_DONE;
-}
-
-static int iface_inetaddr_event_handler(struct notifier_block *nb,
- unsigned long event, void *ptr)
-{
- struct in_ifaddr *ifa = ptr;
- struct net_device *dev;
-
- if (unlikely(module_passive))
- return NOTIFY_DONE;
-
- IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
- "ev=0x%lx/%s ifa=%p\n",
- event, netdev_evt_str(event), ifa);
-
- switch (event) {
- case NETDEV_UP:
- BUG_ON(!ifa || !ifa->ifa_dev);
- dev = ifa->ifa_dev->dev;
- iface_stat_create(dev, ifa);
- atomic64_inc(&qtu_events.iface_events);
- break;
- case NETDEV_DOWN:
- case NETDEV_UNREGISTER:
- BUG_ON(!ifa || !ifa->ifa_dev);
- dev = ifa->ifa_dev->dev;
- iface_stat_update(dev, event == NETDEV_DOWN);
- atomic64_inc(&qtu_events.iface_events);
- break;
- }
- return NOTIFY_DONE;
-}
-
-static struct notifier_block iface_netdev_notifier_blk = {
- .notifier_call = iface_netdev_event_handler,
-};
-
-static struct notifier_block iface_inetaddr_notifier_blk = {
- .notifier_call = iface_inetaddr_event_handler,
-};
-
-static struct notifier_block iface_inet6addr_notifier_blk = {
- .notifier_call = iface_inet6addr_event_handler,
-};
-
-static const struct seq_operations iface_stat_fmt_proc_seq_ops = {
- .start = iface_stat_fmt_proc_start,
- .next = iface_stat_fmt_proc_next,
- .stop = iface_stat_fmt_proc_stop,
- .show = iface_stat_fmt_proc_show,
-};
-
-static int proc_iface_stat_fmt_open(struct inode *inode, struct file *file)
-{
- struct proc_iface_stat_fmt_info *s;
-
- s = __seq_open_private(file, &iface_stat_fmt_proc_seq_ops,
- sizeof(struct proc_iface_stat_fmt_info));
- if (!s)
- return -ENOMEM;
-
- s->fmt = (uintptr_t)PDE_DATA(inode);
- return 0;
-}
-
-static const struct file_operations proc_iface_stat_fmt_fops = {
- .open = proc_iface_stat_fmt_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
-static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
-{
- int err;
-
- iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
- if (!iface_stat_procdir) {
- pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
- err = -1;
- goto err;
- }
-
- iface_stat_all_procfile = proc_create_data(iface_stat_all_procfilename,
- proc_iface_perms,
- parent_procdir,
- &proc_iface_stat_fmt_fops,
- (void *)1 /* fmt1 */);
- if (!iface_stat_all_procfile) {
- pr_err("qtaguid: iface_stat: init "
- " failed to create stat_old proc entry\n");
- err = -1;
- goto err_zap_entry;
- }
-
- iface_stat_fmt_procfile = proc_create_data(iface_stat_fmt_procfilename,
- proc_iface_perms,
- parent_procdir,
- &proc_iface_stat_fmt_fops,
- (void *)2 /* fmt2 */);
- if (!iface_stat_fmt_procfile) {
- pr_err("qtaguid: iface_stat: init "
- " failed to create stat_all proc entry\n");
- err = -1;
- goto err_zap_all_stats_entry;
- }
-
-
- err = register_netdevice_notifier(&iface_netdev_notifier_blk);
- if (err) {
- pr_err("qtaguid: iface_stat: init "
- "failed to register dev event handler\n");
- goto err_zap_all_stats_entries;
- }
- err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
- if (err) {
- pr_err("qtaguid: iface_stat: init "
- "failed to register ipv4 dev event handler\n");
- goto err_unreg_nd;
- }
-
- err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
- if (err) {
- pr_err("qtaguid: iface_stat: init "
- "failed to register ipv6 dev event handler\n");
- goto err_unreg_ip4_addr;
- }
- return 0;
-
-err_unreg_ip4_addr:
- unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
-err_unreg_nd:
- unregister_netdevice_notifier(&iface_netdev_notifier_blk);
-err_zap_all_stats_entries:
- remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
-err_zap_all_stats_entry:
- remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
-err_zap_entry:
- remove_proc_entry(iface_stat_procdirname, parent_procdir);
-err:
- return err;
-}
-
-static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
- struct xt_action_param *par)
-{
- struct sock *sk;
- unsigned int hook_mask = (1 << par->hooknum);
-
- MT_DEBUG("qtaguid[%d]: find_sk(skb=%p) family=%d\n",
- par->hooknum, skb, par->family);
-
- /*
- * Let's not abuse the the xt_socket_get*_sk(), or else it will
- * return garbage SKs.
- */
- if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
- return NULL;
-
- switch (par->family) {
- case NFPROTO_IPV6:
- sk = xt_socket_lookup_slow_v6(dev_net(skb->dev), skb, par->in);
- break;
- case NFPROTO_IPV4:
- sk = xt_socket_lookup_slow_v4(dev_net(skb->dev), skb, par->in);
- break;
- default:
- return NULL;
- }
-
- if (sk) {
- MT_DEBUG("qtaguid[%d]: %p->sk_proto=%u->sk_state=%d\n",
- par->hooknum, sk, sk->sk_protocol, sk->sk_state);
- /*
- * When in TCP_TIME_WAIT the sk is not a "struct sock" but
- * "struct inet_timewait_sock" which is missing fields.
- */
- if (!sk_fullsock(sk) || sk->sk_state == TCP_TIME_WAIT) {
- sock_gen_put(sk);
- sk = NULL;
- }
- }
- return sk;
-}
-
-static void account_for_uid(const struct sk_buff *skb,
- const struct sock *alternate_sk, uid_t uid,
- struct xt_action_param *par)
-{
- const struct net_device *el_dev;
- enum ifs_tx_rx direction;
- int proto;
-
- get_dev_and_dir(skb, par, &direction, &el_dev);
- proto = ipx_proto(skb, par);
- MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d dir=%d\n",
- par->hooknum, el_dev->name, el_dev->type,
- par->family, proto, direction);
-
- if_tag_stat_update(el_dev->name, uid,
- skb->sk ? skb->sk : alternate_sk,
- direction,
- proto, skb->len);
-}
-
-static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
-{
- const struct xt_qtaguid_match_info *info = par->matchinfo;
- const struct file *filp;
- bool got_sock = false;
- struct sock *sk;
- kuid_t sock_uid;
- bool res;
- bool set_sk_callback_lock = false;
- /*
- * TODO: unhack how to force just accounting.
- * For now we only do tag stats when the uid-owner is not requested
- */
- bool do_tag_stat = !(info->match & XT_QTAGUID_UID);
-
- if (unlikely(module_passive))
- return (info->match ^ info->invert) == 0;
-
- MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
- par->hooknum, skb, par->in, par->out, par->family);
-
- atomic64_inc(&qtu_events.match_calls);
- if (skb == NULL) {
- res = (info->match ^ info->invert) == 0;
- goto ret_res;
- }
-
- switch (par->hooknum) {
- case NF_INET_PRE_ROUTING:
- case NF_INET_POST_ROUTING:
- atomic64_inc(&qtu_events.match_calls_prepost);
- iface_stat_update_from_skb(skb, par);
- /*
- * We are done in pre/post. The skb will get processed
- * further alter.
- */
- res = (info->match ^ info->invert);
- goto ret_res;
- break;
- /* default: Fall through and do UID releated work */
- }
-
- sk = skb_to_full_sk(skb);
- /*
- * When in TCP_TIME_WAIT the sk is not a "struct sock" but
- * "struct inet_timewait_sock" which is missing fields.
- * So we ignore it.
- */
- if (sk && sk->sk_state == TCP_TIME_WAIT)
- sk = NULL;
- if (sk == NULL) {
- /*
- * A missing sk->sk_socket happens when packets are in-flight
- * and the matching socket is already closed and gone.
- */
- sk = qtaguid_find_sk(skb, par);
- /*
- * If we got the socket from the find_sk(), we will need to put
- * it back, as nf_tproxy_get_sock_v4() got it.
- */
- got_sock = sk;
- if (sk)
- atomic64_inc(&qtu_events.match_found_sk_in_ct);
- else
- atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
- } else {
- atomic64_inc(&qtu_events.match_found_sk);
- }
- MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
- par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par));
-
- if (!sk) {
- /*
- * Here, the qtaguid_find_sk() using connection tracking
- * couldn't find the owner, so for now we just count them
- * against the system.
- */
- if (do_tag_stat)
- account_for_uid(skb, sk, 0, par);
- MT_DEBUG("qtaguid[%d]: leaving (sk=NULL)\n", par->hooknum);
- res = (info->match ^ info->invert) == 0;
- atomic64_inc(&qtu_events.match_no_sk);
- goto put_sock_ret_res;
- } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
- res = false;
- goto put_sock_ret_res;
- }
- sock_uid = sk->sk_uid;
- if (do_tag_stat)
- account_for_uid(skb, sk, from_kuid(&init_user_ns, sock_uid),
- par);
-
- /*
- * The following two tests fail the match when:
- * id not in range AND no inverted condition requested
- * or id in range AND inverted condition requested
- * Thus (!a && b) || (a && !b) == a ^ b
- */
- if (info->match & XT_QTAGUID_UID) {
- kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min);
- kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max);
-
- if ((uid_gte(sock_uid, uid_min) &&
- uid_lte(sock_uid, uid_max)) ^
- !(info->invert & XT_QTAGUID_UID)) {
- MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
- par->hooknum);
- res = false;
- goto put_sock_ret_res;
- }
- }
- if (info->match & XT_QTAGUID_GID) {
- kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min);
- kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max);
- set_sk_callback_lock = true;
- read_lock_bh(&sk->sk_callback_lock);
- MT_DEBUG("qtaguid[%d]: sk=%pK->sk_socket=%pK->file=%pK\n",
- par->hooknum, sk, sk->sk_socket,
- sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
- filp = sk->sk_socket ? sk->sk_socket->file : NULL;
- if (!filp) {
- res = ((info->match ^ info->invert) &
- XT_QTAGUID_GID) == 0;
- atomic64_inc(&qtu_events.match_no_sk_gid);
- goto put_sock_ret_res;
- }
- MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
- par->hooknum, filp ?
- from_kuid(&init_user_ns, filp->f_cred->fsuid) : -1);
- if ((gid_gte(filp->f_cred->fsgid, gid_min) &&
- gid_lte(filp->f_cred->fsgid, gid_max)) ^
- !(info->invert & XT_QTAGUID_GID)) {
- MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
- par->hooknum);
- res = false;
- goto put_sock_ret_res;
- }
- }
- MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
- res = true;
-
-put_sock_ret_res:
- if (got_sock)
- sock_gen_put(sk);
- if (set_sk_callback_lock)
- read_unlock_bh(&sk->sk_callback_lock);
-ret_res:
- MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
- return res;
-}
-
-#ifdef DDEBUG
-/*
- * This function is not in xt_qtaguid_print.c because of locks visibility.
- * The lock of sock_tag_list must be aquired before calling this function
- */
-static void prdebug_full_state_locked(int indent_level, const char *fmt, ...)
-{
- va_list args;
- char *fmt_buff;
- char *buff;
-
- if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
- return;
-
- fmt_buff = kasprintf(GFP_ATOMIC,
- "qtaguid: %s(): %s {\n", __func__, fmt);
- BUG_ON(!fmt_buff);
- va_start(args, fmt);
- buff = kvasprintf(GFP_ATOMIC,
- fmt_buff, args);
- BUG_ON(!buff);
- pr_debug("%s", buff);
- kfree(fmt_buff);
- kfree(buff);
- va_end(args);
-
- prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
-
- spin_lock_bh(&uid_tag_data_tree_lock);
- prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
- prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
- spin_unlock_bh(&uid_tag_data_tree_lock);
-
- spin_lock_bh(&iface_stat_list_lock);
- prdebug_iface_stat_list(indent_level, &iface_stat_list);
- spin_unlock_bh(&iface_stat_list_lock);
-
- pr_debug("qtaguid: %s(): }\n", __func__);
-}
-#else
-static void prdebug_full_state_locked(int indent_level, const char *fmt, ...) {}
-#endif
-
-struct proc_ctrl_print_info {
- struct sock *sk; /* socket found by reading to sk_pos */
- loff_t sk_pos;
-};
-
-static void *qtaguid_ctrl_proc_next(struct seq_file *m, void *v, loff_t *pos)
-{
- struct proc_ctrl_print_info *pcpi = m->private;
- struct sock_tag *sock_tag_entry = v;
- struct rb_node *node;
-
- (*pos)++;
-
- if (!v || v == SEQ_START_TOKEN)
- return NULL;
-
- node = rb_next(&sock_tag_entry->sock_node);
- if (!node) {
- pcpi->sk = NULL;
- sock_tag_entry = SEQ_START_TOKEN;
- } else {
- sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
- pcpi->sk = sock_tag_entry->sk;
- }
- pcpi->sk_pos = *pos;
- return sock_tag_entry;
-}
-
-static void *qtaguid_ctrl_proc_start(struct seq_file *m, loff_t *pos)
-{
- struct proc_ctrl_print_info *pcpi = m->private;
- struct sock_tag *sock_tag_entry;
- struct rb_node *node;
-
- spin_lock_bh(&sock_tag_list_lock);
-
- if (unlikely(module_passive))
- return NULL;
-
- if (*pos == 0) {
- pcpi->sk_pos = 0;
- node = rb_first(&sock_tag_tree);
- if (!node) {
- pcpi->sk = NULL;
- return SEQ_START_TOKEN;
- }
- sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
- pcpi->sk = sock_tag_entry->sk;
- } else {
- sock_tag_entry = (pcpi->sk ? get_sock_stat_nl(pcpi->sk) :
- NULL) ?: SEQ_START_TOKEN;
- if (*pos != pcpi->sk_pos) {
- /* seq_read skipped a next call */
- *pos = pcpi->sk_pos;
- return qtaguid_ctrl_proc_next(m, sock_tag_entry, pos);
- }
- }
- return sock_tag_entry;
-}
-
-static void qtaguid_ctrl_proc_stop(struct seq_file *m, void *v)
-{
- spin_unlock_bh(&sock_tag_list_lock);
-}
-
-/*
- * Procfs reader to get all active socket tags using style "1)" as described in
- * fs/proc/generic.c
- */
-static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v)
-{
- struct sock_tag *sock_tag_entry = v;
- uid_t uid;
-
- CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u\n",
- current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
-
- if (sock_tag_entry != SEQ_START_TOKEN) {
- int sk_ref_count;
- uid = get_uid_from_tag(sock_tag_entry->tag);
- CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
- "pid=%u\n",
- sock_tag_entry->sk,
- sock_tag_entry->tag,
- uid,
- sock_tag_entry->pid
- );
- sk_ref_count = atomic_read(
- &sock_tag_entry->sk->sk_refcnt);
- seq_printf(m, "sock=%pK tag=0x%llx (uid=%u) pid=%u "
- "f_count=%d\n",
- sock_tag_entry->sk,
- sock_tag_entry->tag, uid,
- sock_tag_entry->pid, sk_ref_count);
- } else {
- seq_printf(m, "events: sockets_tagged=%llu "
- "sockets_untagged=%llu "
- "counter_set_changes=%llu "
- "delete_cmds=%llu "
- "iface_events=%llu "
- "match_calls=%llu "
- "match_calls_prepost=%llu "
- "match_found_sk=%llu "
- "match_found_sk_in_ct=%llu "
- "match_found_no_sk_in_ct=%llu "
- "match_no_sk=%llu "
- "match_no_sk_gid=%llu\n",
- (u64)atomic64_read(&qtu_events.sockets_tagged),
- (u64)atomic64_read(&qtu_events.sockets_untagged),
- (u64)atomic64_read(&qtu_events.counter_set_changes),
- (u64)atomic64_read(&qtu_events.delete_cmds),
- (u64)atomic64_read(&qtu_events.iface_events),
- (u64)atomic64_read(&qtu_events.match_calls),
- (u64)atomic64_read(&qtu_events.match_calls_prepost),
- (u64)atomic64_read(&qtu_events.match_found_sk),
- (u64)atomic64_read(&qtu_events.match_found_sk_in_ct),
- (u64)atomic64_read(&qtu_events.match_found_no_sk_in_ct),
- (u64)atomic64_read(&qtu_events.match_no_sk),
- (u64)atomic64_read(&qtu_events.match_no_sk_gid));
-
- /* Count the following as part of the last item_index. No need
- * to lock the sock_tag_list here since it is already locked when
- * starting the seq_file operation
- */
- prdebug_full_state_locked(0, "proc ctrl");
- }
-
- return 0;
-}
-
-/*
- * Delete socket tags, and stat tags associated with a given
- * accouting tag and uid.
- */
-static int ctrl_cmd_delete(const char *input)
-{
- char cmd;
- int uid_int;
- kuid_t uid;
- uid_t entry_uid;
- tag_t acct_tag;
- tag_t tag;
- int res, argc;
- struct iface_stat *iface_entry;
- struct rb_node *node;
- struct sock_tag *st_entry;
- struct rb_root st_to_free_tree = RB_ROOT;
- struct tag_stat *ts_entry;
- struct tag_counter_set *tcs_entry;
- struct tag_ref *tr_entry;
- struct uid_tag_data *utd_entry;
-
- argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid_int);
- uid = make_kuid(&init_user_ns, uid_int);
- CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
- "user_tag=0x%llx uid=%u\n", input, argc, cmd,
- acct_tag, uid_int);
- if (argc < 2) {
- res = -EINVAL;
- goto err;
- }
- if (!valid_atag(acct_tag)) {
- pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
- res = -EINVAL;
- goto err;
- }
- if (argc < 3) {
- uid = current_fsuid();
- uid_int = from_kuid(&init_user_ns, uid);
- } else if (!can_impersonate_uid(uid)) {
- pr_info("qtaguid: ctrl_delete(%s): "
- "insufficient priv from pid=%u tgid=%u uid=%u\n",
- input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
- res = -EPERM;
- goto err;
- }
-
- tag = combine_atag_with_uid(acct_tag, uid_int);
- CT_DEBUG("qtaguid: ctrl_delete(%s): "
- "looking for tag=0x%llx (uid=%u)\n",
- input, tag, uid_int);
-
- /* Delete socket tags */
- spin_lock_bh(&sock_tag_list_lock);
- spin_lock_bh(&uid_tag_data_tree_lock);
- node = rb_first(&sock_tag_tree);
- while (node) {
- st_entry = rb_entry(node, struct sock_tag, sock_node);
- entry_uid = get_uid_from_tag(st_entry->tag);
- node = rb_next(node);
- if (entry_uid != uid_int)
- continue;
-
- CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
- input, st_entry->tag, entry_uid);
-
- if (!acct_tag || st_entry->tag == tag) {
- rb_erase(&st_entry->sock_node, &sock_tag_tree);
- /* Can't sockfd_put() within spinlock, do it later. */
- sock_tag_tree_insert(st_entry, &st_to_free_tree);
- tr_entry = lookup_tag_ref(st_entry->tag, NULL);
- BUG_ON(tr_entry->num_sock_tags <= 0);
- tr_entry->num_sock_tags--;
- /*
- * TODO: remove if, and start failing.
- * This is a hack to work around the fact that in some
- * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
- * and are trying to work around apps
- * that didn't open the /dev/xt_qtaguid.
- */
- if (st_entry->list.next && st_entry->list.prev)
- list_del(&st_entry->list);
- }
- }
- spin_unlock_bh(&uid_tag_data_tree_lock);
- spin_unlock_bh(&sock_tag_list_lock);
-
- sock_tag_tree_erase(&st_to_free_tree);
-
- /* Delete tag counter-sets */
- spin_lock_bh(&tag_counter_set_list_lock);
- /* Counter sets are only on the uid tag, not full tag */
- tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
- if (tcs_entry) {
- CT_DEBUG("qtaguid: ctrl_delete(%s): "
- "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
- input,
- tcs_entry->tn.tag,
- get_uid_from_tag(tcs_entry->tn.tag),
- tcs_entry->active_set);
- rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
- kfree(tcs_entry);
- }
- spin_unlock_bh(&tag_counter_set_list_lock);
-
- /*
- * If acct_tag is 0, then all entries belonging to uid are
- * erased.
- */
- spin_lock_bh(&iface_stat_list_lock);
- list_for_each_entry(iface_entry, &iface_stat_list, list) {
- spin_lock_bh(&iface_entry->tag_stat_list_lock);
- node = rb_first(&iface_entry->tag_stat_tree);
- while (node) {
- ts_entry = rb_entry(node, struct tag_stat, tn.node);
- entry_uid = get_uid_from_tag(ts_entry->tn.tag);
- node = rb_next(node);
-
- CT_DEBUG("qtaguid: ctrl_delete(%s): "
- "ts tag=0x%llx (uid=%u)\n",
- input, ts_entry->tn.tag, entry_uid);
-
- if (entry_uid != uid_int)
- continue;
- if (!acct_tag || ts_entry->tn.tag == tag) {
- CT_DEBUG("qtaguid: ctrl_delete(%s): "
- "erase ts: %s 0x%llx %u\n",
- input, iface_entry->ifname,
- get_atag_from_tag(ts_entry->tn.tag),
- entry_uid);
- rb_erase(&ts_entry->tn.node,
- &iface_entry->tag_stat_tree);
- kfree(ts_entry);
- }
- }
- spin_unlock_bh(&iface_entry->tag_stat_list_lock);
- }
- spin_unlock_bh(&iface_stat_list_lock);
-
- /* Cleanup the uid_tag_data */
- spin_lock_bh(&uid_tag_data_tree_lock);
- node = rb_first(&uid_tag_data_tree);
- while (node) {
- utd_entry = rb_entry(node, struct uid_tag_data, node);
- entry_uid = utd_entry->uid;
- node = rb_next(node);
-
- CT_DEBUG("qtaguid: ctrl_delete(%s): "
- "utd uid=%u\n",
- input, entry_uid);
-
- if (entry_uid != uid_int)
- continue;
- /*
- * Go over the tag_refs, and those that don't have
- * sock_tags using them are freed.
- */
- put_tag_ref_tree(tag, utd_entry);
- put_utd_entry(utd_entry);
- }
- spin_unlock_bh(&uid_tag_data_tree_lock);
-
- atomic64_inc(&qtu_events.delete_cmds);
- res = 0;
-
-err:
- return res;
-}
-
-static int ctrl_cmd_counter_set(const char *input)
-{
- char cmd;
- uid_t uid = 0;
- tag_t tag;
- int res, argc;
- struct tag_counter_set *tcs;
- int counter_set;
-
- argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
- CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
- "set=%d uid=%u\n", input, argc, cmd,
- counter_set, uid);
- if (argc != 3) {
- res = -EINVAL;
- goto err;
- }
- if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
- pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
- input);
- res = -EINVAL;
- goto err;
- }
- if (!can_manipulate_uids()) {
- pr_info("qtaguid: ctrl_counterset(%s): "
- "insufficient priv from pid=%u tgid=%u uid=%u\n",
- input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
- res = -EPERM;
- goto err;
- }
-
- tag = make_tag_from_uid(uid);
- spin_lock_bh(&tag_counter_set_list_lock);
- tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
- if (!tcs) {
- tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
- if (!tcs) {
- spin_unlock_bh(&tag_counter_set_list_lock);
- pr_err("qtaguid: ctrl_counterset(%s): "
- "failed to alloc counter set\n",
- input);
- res = -ENOMEM;
- goto err;
- }
- tcs->tn.tag = tag;
- tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
- CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
- "(uid=%u) set=%d\n",
- input, tag, get_uid_from_tag(tag), counter_set);
- }
- tcs->active_set = counter_set;
- spin_unlock_bh(&tag_counter_set_list_lock);
- atomic64_inc(&qtu_events.counter_set_changes);
- res = 0;
-
-err:
- return res;
-}
-
-static int ctrl_cmd_tag(const char *input)
-{
- char cmd;
- int sock_fd = 0;
- kuid_t uid;
- unsigned int uid_int = 0;
- tag_t acct_tag = make_atag_from_value(0);
- tag_t full_tag;
- struct socket *el_socket;
- int res, argc;
- struct sock_tag *sock_tag_entry;
- struct tag_ref *tag_ref_entry;
- struct uid_tag_data *uid_tag_data_entry;
- struct proc_qtu_data *pqd_entry;
-
- /* Unassigned args will get defaulted later. */
- argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid_int);
- uid = make_kuid(&init_user_ns, uid_int);
- CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
- "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
- acct_tag, uid_int);
- if (argc < 2) {
- res = -EINVAL;
- goto err;
- }
- el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
- if (!el_socket) {
- pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
- " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
- input, sock_fd, res, current->pid, current->tgid,
- from_kuid(&init_user_ns, current_fsuid()));
- goto err;
- }
- CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->sk_refcnt=%d ->sk=%p\n",
- input, atomic_read(&el_socket->sk->sk_refcnt),
- el_socket->sk);
- if (argc < 3) {
- acct_tag = make_atag_from_value(0);
- } else if (!valid_atag(acct_tag)) {
- pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
- res = -EINVAL;
- goto err_put;
- }
- CT_DEBUG("qtaguid: ctrl_tag(%s): "
- "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
- "ctrl.gid=%u in_group()=%d in_egroup()=%d\n",
- input, current->pid, current->tgid,
- from_kuid(&init_user_ns, current_uid()),
- from_kuid(&init_user_ns, current_euid()),
- from_kuid(&init_user_ns, current_fsuid()),
- from_kgid(&init_user_ns, xt_qtaguid_ctrl_file->gid),
- in_group_p(xt_qtaguid_ctrl_file->gid),
- in_egroup_p(xt_qtaguid_ctrl_file->gid));
- if (argc < 4) {
- uid = current_fsuid();
- uid_int = from_kuid(&init_user_ns, uid);
- } else if (!can_impersonate_uid(uid)) {
- pr_info("qtaguid: ctrl_tag(%s): "
- "insufficient priv from pid=%u tgid=%u uid=%u\n",
- input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
- res = -EPERM;
- goto err_put;
- }
- full_tag = combine_atag_with_uid(acct_tag, uid_int);
-
- spin_lock_bh(&sock_tag_list_lock);
- spin_lock_bh(&uid_tag_data_tree_lock);
- sock_tag_entry = get_sock_stat_nl(el_socket->sk);
- tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
- if (IS_ERR(tag_ref_entry)) {
- res = PTR_ERR(tag_ref_entry);
- spin_unlock_bh(&uid_tag_data_tree_lock);
- spin_unlock_bh(&sock_tag_list_lock);
- goto err_put;
- }
- tag_ref_entry->num_sock_tags++;
- if (sock_tag_entry) {
- struct tag_ref *prev_tag_ref_entry;
-
- CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
- "st@%p ...->sk_refcnt=%d\n",
- input, el_socket->sk, sock_tag_entry,
- atomic_read(&el_socket->sk->sk_refcnt));
- prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
- &uid_tag_data_entry);
- BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
- BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
- prev_tag_ref_entry->num_sock_tags--;
- sock_tag_entry->tag = full_tag;
- } else {
- CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
- input, el_socket->sk);
- sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
- GFP_ATOMIC);
- if (!sock_tag_entry) {
- pr_err("qtaguid: ctrl_tag(%s): "
- "socket tag alloc failed\n",
- input);
- BUG_ON(tag_ref_entry->num_sock_tags <= 0);
- tag_ref_entry->num_sock_tags--;
- free_tag_ref_from_utd_entry(tag_ref_entry,
- uid_tag_data_entry);
- spin_unlock_bh(&uid_tag_data_tree_lock);
- spin_unlock_bh(&sock_tag_list_lock);
- res = -ENOMEM;
- goto err_put;
- }
- /*
- * Hold the sk refcount here to make sure the sk pointer cannot
- * be freed and reused
- */
- sock_hold(el_socket->sk);
- sock_tag_entry->sk = el_socket->sk;
- sock_tag_entry->pid = current->tgid;
- sock_tag_entry->tag = combine_atag_with_uid(acct_tag, uid_int);
- pqd_entry = proc_qtu_data_tree_search(
- &proc_qtu_data_tree, current->tgid);
- /*
- * TODO: remove if, and start failing.
- * At first, we want to catch user-space code that is not
- * opening the /dev/xt_qtaguid.
- */
- if (IS_ERR_OR_NULL(pqd_entry))
- pr_warn_once(
- "qtaguid: %s(): "
- "User space forgot to open /dev/xt_qtaguid? "
- "pid=%u tgid=%u uid=%u\n", __func__,
- current->pid, current->tgid,
- from_kuid(&init_user_ns, current_fsuid()));
- else
- list_add(&sock_tag_entry->list,
- &pqd_entry->sock_tag_list);
-
- sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
- atomic64_inc(&qtu_events.sockets_tagged);
- }
- spin_unlock_bh(&uid_tag_data_tree_lock);
- spin_unlock_bh(&sock_tag_list_lock);
- /* We keep the ref to the sk until it is untagged */
- CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->sk_refcnt=%d\n",
- input, sock_tag_entry,
- atomic_read(&el_socket->sk->sk_refcnt));
- sockfd_put(el_socket);
- return 0;
-
-err_put:
- CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->sk_refcnt=%d\n",
- input, atomic_read(&el_socket->sk->sk_refcnt) - 1);
- /* Release the sock_fd that was grabbed by sockfd_lookup(). */
- sockfd_put(el_socket);
- return res;
-
-err:
- CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
- return res;
-}
-
-static int ctrl_cmd_untag(const char *input)
-{
- char cmd;
- int sock_fd = 0;
- struct socket *el_socket;
- int res, argc;
-
- argc = sscanf(input, "%c %d", &cmd, &sock_fd);
- CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
- input, argc, cmd, sock_fd);
- if (argc < 2) {
- res = -EINVAL;
- return res;
- }
- el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
- if (!el_socket) {
- pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
- " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
- input, sock_fd, res, current->pid, current->tgid,
- from_kuid(&init_user_ns, current_fsuid()));
- return res;
- }
- CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
- input, atomic_long_read(&el_socket->file->f_count),
- el_socket->sk);
- res = qtaguid_untag(el_socket, false);
- sockfd_put(el_socket);
- return res;
-}
-
-int qtaguid_untag(struct socket *el_socket, bool kernel)
-{
- int res;
- pid_t pid;
- struct sock_tag *sock_tag_entry;
- struct tag_ref *tag_ref_entry;
- struct uid_tag_data *utd_entry;
- struct proc_qtu_data *pqd_entry;
-
- spin_lock_bh(&sock_tag_list_lock);
- sock_tag_entry = get_sock_stat_nl(el_socket->sk);
- if (!sock_tag_entry) {
- spin_unlock_bh(&sock_tag_list_lock);
- res = -EINVAL;
- return res;
- }
- /*
- * The socket already belongs to the current process
- * so it can do whatever it wants to it.
- */
- rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
-
- tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
- BUG_ON(!tag_ref_entry);
- BUG_ON(tag_ref_entry->num_sock_tags <= 0);
- spin_lock_bh(&uid_tag_data_tree_lock);
- if (kernel)
- pid = sock_tag_entry->pid;
- else
- pid = current->tgid;
- pqd_entry = proc_qtu_data_tree_search(
- &proc_qtu_data_tree, pid);
- /*
- * TODO: remove if, and start failing.
- * At first, we want to catch user-space code that is not
- * opening the /dev/xt_qtaguid.
- */
- if (IS_ERR_OR_NULL(pqd_entry) || !sock_tag_entry->list.next) {
- pr_warn_once("qtaguid: %s(): "
- "User space forgot to open /dev/xt_qtaguid? "
- "pid=%u tgid=%u sk_pid=%u, uid=%u\n", __func__,
- current->pid, current->tgid, sock_tag_entry->pid,
- from_kuid(&init_user_ns, current_fsuid()));
- } else {
- list_del(&sock_tag_entry->list);
- }
- spin_unlock_bh(&uid_tag_data_tree_lock);
- /*
- * We don't free tag_ref from the utd_entry here,
- * only during a cmd_delete().
- */
- tag_ref_entry->num_sock_tags--;
- spin_unlock_bh(&sock_tag_list_lock);
- /*
- * Release the sock_fd that was grabbed at tag time.
- */
- sock_put(sock_tag_entry->sk);
- CT_DEBUG("qtaguid: done. st@%p ...->sk_refcnt=%d\n",
- sock_tag_entry,
- atomic_read(&el_socket->sk->sk_refcnt));
-
- kfree(sock_tag_entry);
- atomic64_inc(&qtu_events.sockets_untagged);
-
- return 0;
-}
-
-static ssize_t qtaguid_ctrl_parse(const char *input, size_t count)
-{
- char cmd;
- ssize_t res;
-
- CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
- input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
-
- cmd = input[0];
- /* Collect params for commands */
- switch (cmd) {
- case 'd':
- res = ctrl_cmd_delete(input);
- break;
-
- case 's':
- res = ctrl_cmd_counter_set(input);
- break;
-
- case 't':
- res = ctrl_cmd_tag(input);
- break;
-
- case 'u':
- res = ctrl_cmd_untag(input);
- break;
-
- default:
- res = -EINVAL;
- goto err;
- }
- if (!res)
- res = count;
-err:
- CT_DEBUG("qtaguid: ctrl(%s): res=%zd\n", input, res);
- return res;
-}
-
-#define MAX_QTAGUID_CTRL_INPUT_LEN 255
-static ssize_t qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
- size_t count, loff_t *offp)
-{
- char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
-
- if (unlikely(module_passive))
- return count;
-
- if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
- return -EINVAL;
-
- if (copy_from_user(input_buf, buffer, count))
- return -EFAULT;
-
- input_buf[count] = '\0';
- return qtaguid_ctrl_parse(input_buf, count);
-}
-
-struct proc_print_info {
- struct iface_stat *iface_entry;
- int item_index;
- tag_t tag; /* tag found by reading to tag_pos */
- off_t tag_pos;
- int tag_item_index;
-};
-
-static void pp_stats_header(struct seq_file *m)
-{
- seq_puts(m,
- "idx iface acct_tag_hex uid_tag_int cnt_set "
- "rx_bytes rx_packets "
- "tx_bytes tx_packets "
- "rx_tcp_bytes rx_tcp_packets "
- "rx_udp_bytes rx_udp_packets "
- "rx_other_bytes rx_other_packets "
- "tx_tcp_bytes tx_tcp_packets "
- "tx_udp_bytes tx_udp_packets "
- "tx_other_bytes tx_other_packets\n");
-}
-
-static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry,
- int cnt_set)
-{
- struct data_counters *cnts;
- tag_t tag = ts_entry->tn.tag;
- uid_t stat_uid = get_uid_from_tag(tag);
- struct proc_print_info *ppi = m->private;
- /* Detailed tags are not available to everybody */
- if (!can_read_other_uid_stats(make_kuid(&init_user_ns,stat_uid))) {
- CT_DEBUG("qtaguid: stats line: "
- "%s 0x%llx %u: insufficient priv "
- "from pid=%u tgid=%u uid=%u stats.gid=%u\n",
- ppi->iface_entry->ifname,
- get_atag_from_tag(tag), stat_uid,
- current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()),
- from_kgid(&init_user_ns,xt_qtaguid_stats_file->gid));
- return 0;
- }
- ppi->item_index++;
- cnts = &ts_entry->counters;
- seq_printf(m, "%d %s 0x%llx %u %u "
- "%llu %llu "
- "%llu %llu "
- "%llu %llu "
- "%llu %llu "
- "%llu %llu "
- "%llu %llu "
- "%llu %llu "
- "%llu %llu\n",
- ppi->item_index,
- ppi->iface_entry->ifname,
- get_atag_from_tag(tag),
- stat_uid,
- cnt_set,
- dc_sum_bytes(cnts, cnt_set, IFS_RX),
- dc_sum_packets(cnts, cnt_set, IFS_RX),
- dc_sum_bytes(cnts, cnt_set, IFS_TX),
- dc_sum_packets(cnts, cnt_set, IFS_TX),
- cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
- cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
- cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
- cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
- cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
- cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
- cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
- cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
- cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
- cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
- cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
- cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
- return seq_has_overflowed(m) ? -ENOSPC : 1;
-}
-
-static bool pp_sets(struct seq_file *m, struct tag_stat *ts_entry)
-{
- int ret;
- int counter_set;
- for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
- counter_set++) {
- ret = pp_stats_line(m, ts_entry, counter_set);
- if (ret < 0)
- return false;
- }
- return true;
-}
-
-static int qtaguid_stats_proc_iface_stat_ptr_valid(struct iface_stat *ptr)
-{
- struct iface_stat *iface_entry;
-
- if (!ptr)
- return false;
-
- list_for_each_entry(iface_entry, &iface_stat_list, list)
- if (iface_entry == ptr)
- return true;
- return false;
-}
-
-static void qtaguid_stats_proc_next_iface_entry(struct proc_print_info *ppi)
-{
- spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
- list_for_each_entry_continue(ppi->iface_entry, &iface_stat_list, list) {
- spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
- return;
- }
- ppi->iface_entry = NULL;
-}
-
-static void *qtaguid_stats_proc_next(struct seq_file *m, void *v, loff_t *pos)
-{
- struct proc_print_info *ppi = m->private;
- struct tag_stat *ts_entry;
- struct rb_node *node;
-
- if (!v) {
- pr_err("qtaguid: %s(): unexpected v: NULL\n", __func__);
- return NULL;
- }
-
- (*pos)++;
-
- if (!ppi->iface_entry || unlikely(module_passive))
- return NULL;
-
- if (v == SEQ_START_TOKEN)
- node = rb_first(&ppi->iface_entry->tag_stat_tree);
- else
- node = rb_next(&((struct tag_stat *)v)->tn.node);
-
- while (!node) {
- qtaguid_stats_proc_next_iface_entry(ppi);
- if (!ppi->iface_entry)
- return NULL;
- node = rb_first(&ppi->iface_entry->tag_stat_tree);
- }
-
- ts_entry = rb_entry(node, struct tag_stat, tn.node);
- ppi->tag = ts_entry->tn.tag;
- ppi->tag_pos = *pos;
- ppi->tag_item_index = ppi->item_index;
- return ts_entry;
-}
-
-static void *qtaguid_stats_proc_start(struct seq_file *m, loff_t *pos)
-{
- struct proc_print_info *ppi = m->private;
- struct tag_stat *ts_entry = NULL;
-
- spin_lock_bh(&iface_stat_list_lock);
-
- if (*pos == 0) {
- ppi->item_index = 1;
- ppi->tag_pos = 0;
- if (list_empty(&iface_stat_list)) {
- ppi->iface_entry = NULL;
- } else {
- ppi->iface_entry = list_first_entry(&iface_stat_list,
- struct iface_stat,
- list);
- spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
- }
- return SEQ_START_TOKEN;
- }
- if (!qtaguid_stats_proc_iface_stat_ptr_valid(ppi->iface_entry)) {
- if (ppi->iface_entry) {
- pr_err("qtaguid: %s(): iface_entry %p not found\n",
- __func__, ppi->iface_entry);
- ppi->iface_entry = NULL;
- }
- return NULL;
- }
-
- spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
-
- if (!ppi->tag_pos) {
- /* seq_read skipped first next call */
- ts_entry = SEQ_START_TOKEN;
- } else {
- ts_entry = tag_stat_tree_search(
- &ppi->iface_entry->tag_stat_tree, ppi->tag);
- if (!ts_entry) {
- pr_info("qtaguid: %s(): tag_stat.tag 0x%llx not found. Abort.\n",
- __func__, ppi->tag);
- return NULL;
- }
- }
-
- if (*pos == ppi->tag_pos) { /* normal resume */
- ppi->item_index = ppi->tag_item_index;
- } else {
- /* seq_read skipped a next call */
- *pos = ppi->tag_pos;
- ts_entry = qtaguid_stats_proc_next(m, ts_entry, pos);
- }
-
- return ts_entry;
-}
-
-static void qtaguid_stats_proc_stop(struct seq_file *m, void *v)
-{
- struct proc_print_info *ppi = m->private;
- if (ppi->iface_entry)
- spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
- spin_unlock_bh(&iface_stat_list_lock);
-}
-
-/*
- * Procfs reader to get all tag stats using style "1)" as described in
- * fs/proc/generic.c
- * Groups all protocols tx/rx bytes.
- */
-static int qtaguid_stats_proc_show(struct seq_file *m, void *v)
-{
- struct tag_stat *ts_entry = v;
-
- if (v == SEQ_START_TOKEN)
- pp_stats_header(m);
- else
- pp_sets(m, ts_entry);
-
- return 0;
-}
-
-/*------------------------------------------*/
-static int qtudev_open(struct inode *inode, struct file *file)
-{
- struct uid_tag_data *utd_entry;
- struct proc_qtu_data *pqd_entry;
- struct proc_qtu_data *new_pqd_entry;
- int res;
- bool utd_entry_found;
-
- if (unlikely(qtu_proc_handling_passive))
- return 0;
-
- DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
- current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
-
- spin_lock_bh(&uid_tag_data_tree_lock);
-
- /* Look for existing uid data, or alloc one. */
- utd_entry = get_uid_data(from_kuid(&init_user_ns, current_fsuid()), &utd_entry_found);
- if (IS_ERR_OR_NULL(utd_entry)) {
- res = PTR_ERR(utd_entry);
- goto err_unlock;
- }
-
- /* Look for existing PID based proc_data */
- pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
- current->tgid);
- if (pqd_entry) {
- pr_err("qtaguid: qtudev_open(): %u/%u %u "
- "%s already opened\n",
- current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()),
- QTU_DEV_NAME);
- res = -EBUSY;
- goto err_unlock_free_utd;
- }
-
- new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
- if (!new_pqd_entry) {
- pr_err("qtaguid: qtudev_open(): %u/%u %u: "
- "proc data alloc failed\n",
- current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
- res = -ENOMEM;
- goto err_unlock_free_utd;
- }
- new_pqd_entry->pid = current->tgid;
- INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
- new_pqd_entry->parent_tag_data = utd_entry;
- utd_entry->num_pqd++;
-
- proc_qtu_data_tree_insert(new_pqd_entry,
- &proc_qtu_data_tree);
-
- spin_unlock_bh(&uid_tag_data_tree_lock);
- DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
- from_kuid(&init_user_ns, current_fsuid()), new_pqd_entry);
- file->private_data = new_pqd_entry;
- return 0;
-
-err_unlock_free_utd:
- if (!utd_entry_found) {
- rb_erase(&utd_entry->node, &uid_tag_data_tree);
- kfree(utd_entry);
- }
-err_unlock:
- spin_unlock_bh(&uid_tag_data_tree_lock);
- return res;
-}
-
-static int qtudev_release(struct inode *inode, struct file *file)
-{
- struct proc_qtu_data *pqd_entry = file->private_data;
- struct uid_tag_data *utd_entry = pqd_entry->parent_tag_data;
- struct sock_tag *st_entry;
- struct rb_root st_to_free_tree = RB_ROOT;
- struct list_head *entry, *next;
- struct tag_ref *tr;
-
- if (unlikely(qtu_proc_handling_passive))
- return 0;
-
- /*
- * Do not trust the current->pid, it might just be a kworker cleaning
- * up after a dead proc.
- */
- DR_DEBUG("qtaguid: qtudev_release(): "
- "pid=%u tgid=%u uid=%u "
- "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
- current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
- pqd_entry, pqd_entry->pid, utd_entry,
- utd_entry->num_active_tags);
-
- spin_lock_bh(&sock_tag_list_lock);
- spin_lock_bh(&uid_tag_data_tree_lock);
-
- list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
- st_entry = list_entry(entry, struct sock_tag, list);
- DR_DEBUG("qtaguid: %s(): "
- "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
- __func__,
- st_entry, st_entry->sk,
- current->pid, current->tgid,
- pqd_entry->parent_tag_data->uid);
-
- utd_entry = uid_tag_data_tree_search(
- &uid_tag_data_tree,
- get_uid_from_tag(st_entry->tag));
- BUG_ON(IS_ERR_OR_NULL(utd_entry));
- DR_DEBUG("qtaguid: %s(): "
- "looking for tag=0x%llx in utd_entry=%p\n", __func__,
- st_entry->tag, utd_entry);
- tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
- st_entry->tag);
- BUG_ON(!tr);
- BUG_ON(tr->num_sock_tags <= 0);
- tr->num_sock_tags--;
- free_tag_ref_from_utd_entry(tr, utd_entry);
-
- rb_erase(&st_entry->sock_node, &sock_tag_tree);
- list_del(&st_entry->list);
- /* Can't sockfd_put() within spinlock, do it later. */
- sock_tag_tree_insert(st_entry, &st_to_free_tree);
-
- /*
- * Try to free the utd_entry if no other proc_qtu_data is
- * using it (num_pqd is 0) and it doesn't have active tags
- * (num_active_tags is 0).
- */
- put_utd_entry(utd_entry);
- }
-
- rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
- BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
- pqd_entry->parent_tag_data->num_pqd--;
- put_utd_entry(pqd_entry->parent_tag_data);
- kfree(pqd_entry);
- file->private_data = NULL;
-
- spin_unlock_bh(&uid_tag_data_tree_lock);
- spin_unlock_bh(&sock_tag_list_lock);
-
-
- sock_tag_tree_erase(&st_to_free_tree);
-
- spin_lock_bh(&sock_tag_list_lock);
- prdebug_full_state_locked(0, "%s(): pid=%u tgid=%u", __func__,
- current->pid, current->tgid);
- spin_unlock_bh(&sock_tag_list_lock);
- return 0;
-}
-
-/*------------------------------------------*/
-static const struct file_operations qtudev_fops = {
- .owner = THIS_MODULE,
- .open = qtudev_open,
- .release = qtudev_release,
-};
-
-static struct miscdevice qtu_device = {
- .minor = MISC_DYNAMIC_MINOR,
- .name = QTU_DEV_NAME,
- .fops = &qtudev_fops,
- /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
-};
-
-static const struct seq_operations proc_qtaguid_ctrl_seqops = {
- .start = qtaguid_ctrl_proc_start,
- .next = qtaguid_ctrl_proc_next,
- .stop = qtaguid_ctrl_proc_stop,
- .show = qtaguid_ctrl_proc_show,
-};
-
-static int proc_qtaguid_ctrl_open(struct inode *inode, struct file *file)
-{
- return seq_open_private(file, &proc_qtaguid_ctrl_seqops,
- sizeof(struct proc_ctrl_print_info));
-}
-
-static const struct file_operations proc_qtaguid_ctrl_fops = {
- .open = proc_qtaguid_ctrl_open,
- .read = seq_read,
- .write = qtaguid_ctrl_proc_write,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
-static const struct seq_operations proc_qtaguid_stats_seqops = {
- .start = qtaguid_stats_proc_start,
- .next = qtaguid_stats_proc_next,
- .stop = qtaguid_stats_proc_stop,
- .show = qtaguid_stats_proc_show,
-};
-
-static int proc_qtaguid_stats_open(struct inode *inode, struct file *file)
-{
- return seq_open_private(file, &proc_qtaguid_stats_seqops,
- sizeof(struct proc_print_info));
-}
-
-static const struct file_operations proc_qtaguid_stats_fops = {
- .open = proc_qtaguid_stats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
-/*------------------------------------------*/
-static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
-{
- int ret;
- *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
- if (!*res_procdir) {
- pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
- ret = -ENOMEM;
- goto no_dir;
- }
-
- xt_qtaguid_ctrl_file = proc_create_data("ctrl", proc_ctrl_perms,
- *res_procdir,
- &proc_qtaguid_ctrl_fops,
- NULL);
- if (!xt_qtaguid_ctrl_file) {
- pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
- " file\n");
- ret = -ENOMEM;
- goto no_ctrl_entry;
- }
-
- xt_qtaguid_stats_file = proc_create_data("stats", proc_stats_perms,
- *res_procdir,
- &proc_qtaguid_stats_fops,
- NULL);
- if (!xt_qtaguid_stats_file) {
- pr_err("qtaguid: failed to create xt_qtaguid/stats "
- "file\n");
- ret = -ENOMEM;
- goto no_stats_entry;
- }
- /*
- * TODO: add support counter hacking
- * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
- */
- return 0;
-
-no_stats_entry:
- remove_proc_entry("ctrl", *res_procdir);
-no_ctrl_entry:
- remove_proc_entry("xt_qtaguid", NULL);
-no_dir:
- return ret;
-}
-
-static struct xt_match qtaguid_mt_reg __read_mostly = {
- /*
- * This module masquerades as the "owner" module so that iptables
- * tools can deal with it.
- */
- .name = "owner",
- .revision = 1,
- .family = NFPROTO_UNSPEC,
- .match = qtaguid_mt,
- .matchsize = sizeof(struct xt_qtaguid_match_info),
- .me = THIS_MODULE,
-};
-
-static int __init qtaguid_mt_init(void)
-{
- if (qtaguid_proc_register(&xt_qtaguid_procdir)
- || iface_stat_init(xt_qtaguid_procdir)
- || xt_register_match(&qtaguid_mt_reg)
- || misc_register(&qtu_device))
- return -1;
- return 0;
-}
-
-/*
- * TODO: allow unloading of the module.
- * For now stats are permanent.
- * Kconfig forces'y/n' and never an 'm'.
- */
-
-module_init(qtaguid_mt_init);
-MODULE_AUTHOR("jpa <jpa@google.com>");
-MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("ipt_owner");
-MODULE_ALIAS("ip6t_owner");
-MODULE_ALIAS("ipt_qtaguid");
-MODULE_ALIAS("ip6t_qtaguid");
diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h
deleted file mode 100644
index c7052707a6a4..000000000000
--- a/net/netfilter/xt_qtaguid_internal.h
+++ /dev/null
@@ -1,350 +0,0 @@
-/*
- * Kernel iptables module to track stats for packets based on user tags.
- *
- * (C) 2011 Google, Inc
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef __XT_QTAGUID_INTERNAL_H__
-#define __XT_QTAGUID_INTERNAL_H__
-
-#include <linux/types.h>
-#include <linux/rbtree.h>
-#include <linux/spinlock_types.h>
-#include <linux/workqueue.h>
-
-/* Iface handling */
-#define IDEBUG_MASK (1<<0)
-/* Iptable Matching. Per packet. */
-#define MDEBUG_MASK (1<<1)
-/* Red-black tree handling. Per packet. */
-#define RDEBUG_MASK (1<<2)
-/* procfs ctrl/stats handling */
-#define CDEBUG_MASK (1<<3)
-/* dev and resource tracking */
-#define DDEBUG_MASK (1<<4)
-
-/* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */
-#define DEFAULT_DEBUG_MASK 0
-
-/*
- * (Un)Define these *DEBUG to compile out/in the pr_debug calls.
- * All undef: text size ~ 0x3030; all def: ~ 0x4404.
- */
-#define IDEBUG
-#define MDEBUG
-#define RDEBUG
-#define CDEBUG
-#define DDEBUG
-
-#define MSK_DEBUG(mask, ...) do { \
- if (unlikely(qtaguid_debug_mask & (mask))) \
- pr_debug(__VA_ARGS__); \
- } while (0)
-#ifdef IDEBUG
-#define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__)
-#else
-#define IF_DEBUG(...) no_printk(__VA_ARGS__)
-#endif
-#ifdef MDEBUG
-#define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__)
-#else
-#define MT_DEBUG(...) no_printk(__VA_ARGS__)
-#endif
-#ifdef RDEBUG
-#define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__)
-#else
-#define RB_DEBUG(...) no_printk(__VA_ARGS__)
-#endif
-#ifdef CDEBUG
-#define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__)
-#else
-#define CT_DEBUG(...) no_printk(__VA_ARGS__)
-#endif
-#ifdef DDEBUG
-#define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__)
-#else
-#define DR_DEBUG(...) no_printk(__VA_ARGS__)
-#endif
-
-extern uint qtaguid_debug_mask;
-
-/*---------------------------------------------------------------------------*/
-/*
- * Tags:
- *
- * They represent what the data usage counters will be tracked against.
- * By default a tag is just based on the UID.
- * The UID is used as the base for policing, and can not be ignored.
- * So a tag will always at least represent a UID (uid_tag).
- *
- * A tag can be augmented with an "accounting tag" which is associated
- * with a UID.
- * User space can set the acct_tag portion of the tag which is then used
- * with sockets: all data belonging to that socket will be counted against the
- * tag. The policing is then based on the tag's uid_tag portion,
- * and stats are collected for the acct_tag portion separately.
- *
- * There could be
- * a: {acct_tag=1, uid_tag=10003}
- * b: {acct_tag=2, uid_tag=10003}
- * c: {acct_tag=3, uid_tag=10003}
- * d: {acct_tag=0, uid_tag=10003}
- * a, b, and c represent tags associated with specific sockets.
- * d is for the totals for that uid, including all untagged traffic.
- * Typically d is used with policing/quota rules.
- *
- * We want tag_t big enough to distinguish uid_t and acct_tag.
- * It might become a struct if needed.
- * Nothing should be using it as an int.
- */
-typedef uint64_t tag_t; /* Only used via accessors */
-
-#define TAG_UID_MASK 0xFFFFFFFFULL
-#define TAG_ACCT_MASK (~0xFFFFFFFFULL)
-
-static inline int tag_compare(tag_t t1, tag_t t2)
-{
- return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
-}
-
-static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
-{
- return acct_tag | uid;
-}
-static inline tag_t make_tag_from_uid(uid_t uid)
-{
- return uid;
-}
-static inline uid_t get_uid_from_tag(tag_t tag)
-{
- return tag & TAG_UID_MASK;
-}
-static inline tag_t get_utag_from_tag(tag_t tag)
-{
- return tag & TAG_UID_MASK;
-}
-static inline tag_t get_atag_from_tag(tag_t tag)
-{
- return tag & TAG_ACCT_MASK;
-}
-
-static inline bool valid_atag(tag_t tag)
-{
- return !(tag & TAG_UID_MASK);
-}
-static inline tag_t make_atag_from_value(uint32_t value)
-{
- return (uint64_t)value << 32;
-}
-/*---------------------------------------------------------------------------*/
-
-/*
- * Maximum number of socket tags that a UID is allowed to have active.
- * Multiple processes belonging to the same UID contribute towards this limit.
- * Special UIDs that can impersonate a UID also contribute (e.g. download
- * manager, ...)
- */
-#define DEFAULT_MAX_SOCK_TAGS 1024
-
-/*
- * For now we only track 2 sets of counters.
- * The default set is 0.
- * Userspace can activate another set for a given uid being tracked.
- */
-#define IFS_MAX_COUNTER_SETS 2
-
-enum ifs_tx_rx {
- IFS_TX,
- IFS_RX,
- IFS_MAX_DIRECTIONS
-};
-
-/* For now, TCP, UDP, the rest */
-enum ifs_proto {
- IFS_TCP,
- IFS_UDP,
- IFS_PROTO_OTHER,
- IFS_MAX_PROTOS
-};
-
-struct byte_packet_counters {
- uint64_t bytes;
- uint64_t packets;
-};
-
-struct data_counters {
- struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
-};
-
-static inline uint64_t dc_sum_bytes(struct data_counters *counters,
- int set,
- enum ifs_tx_rx direction)
-{
- return counters->bpc[set][direction][IFS_TCP].bytes
- + counters->bpc[set][direction][IFS_UDP].bytes
- + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
-}
-
-static inline uint64_t dc_sum_packets(struct data_counters *counters,
- int set,
- enum ifs_tx_rx direction)
-{
- return counters->bpc[set][direction][IFS_TCP].packets
- + counters->bpc[set][direction][IFS_UDP].packets
- + counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
-}
-
-
-/* Generic X based nodes used as a base for rb_tree ops */
-struct tag_node {
- struct rb_node node;
- tag_t tag;
-};
-
-struct tag_stat {
- struct tag_node tn;
- struct data_counters counters;
- /*
- * If this tag is acct_tag based, we need to count against the
- * matching parent uid_tag.
- */
- struct data_counters *parent_counters;
-};
-
-struct iface_stat {
- struct list_head list; /* in iface_stat_list */
- char *ifname;
- bool active;
- /* net_dev is only valid for active iface_stat */
- struct net_device *net_dev;
-
- struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS];
- struct data_counters totals_via_skb;
- /*
- * We keep the last_known, because some devices reset their counters
- * just before NETDEV_UP, while some will reset just before
- * NETDEV_REGISTER (which is more normal).
- * So now, if the device didn't do a NETDEV_UNREGISTER and we see
- * its current dev stats smaller that what was previously known, we
- * assume an UNREGISTER and just use the last_known.
- */
- struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS];
- /* last_known is usable when last_known_valid is true */
- bool last_known_valid;
-
- struct proc_dir_entry *proc_ptr;
-
- struct rb_root tag_stat_tree;
- spinlock_t tag_stat_list_lock;
-};
-
-/* This is needed to create proc_dir_entries from atomic context. */
-struct iface_stat_work {
- struct work_struct iface_work;
- struct iface_stat *iface_entry;
-};
-
-/*
- * Track tag that this socket is transferring data for, and not necessarily
- * the uid that owns the socket.
- * This is the tag against which tag_stat.counters will be billed.
- * These structs need to be looked up by sock and pid.
- */
-struct sock_tag {
- struct rb_node sock_node;
- struct sock *sk; /* Only used as a number, never dereferenced */
- /* Used to associate with a given pid */
- struct list_head list; /* in proc_qtu_data.sock_tag_list */
- pid_t pid;
-
- tag_t tag;
-};
-
-struct qtaguid_event_counts {
- /* Various successful events */
- atomic64_t sockets_tagged;
- atomic64_t sockets_untagged;
- atomic64_t counter_set_changes;
- atomic64_t delete_cmds;
- atomic64_t iface_events; /* Number of NETDEV_* events handled */
-
- atomic64_t match_calls; /* Number of times iptables called mt */
- /* Number of times iptables called mt from pre or post routing hooks */
- atomic64_t match_calls_prepost;
- /*
- * match_found_sk_*: numbers related to the netfilter matching
- * function finding a sock for the sk_buff.
- * Total skbs processed is sum(match_found*).
- */
- atomic64_t match_found_sk; /* An sk was already in the sk_buff. */
- /* The connection tracker had or didn't have the sk. */
- atomic64_t match_found_sk_in_ct;
- atomic64_t match_found_no_sk_in_ct;
- /*
- * No sk could be found. No apparent owner. Could happen with
- * unsolicited traffic.
- */
- atomic64_t match_no_sk;
- /*
- * The file ptr in the sk_socket wasn't there and we couldn't get GID.
- * This might happen for traffic while the socket is being closed.
- */
- atomic64_t match_no_sk_gid;
-};
-
-/* Track the set active_set for the given tag. */
-struct tag_counter_set {
- struct tag_node tn;
- int active_set;
-};
-
-/*----------------------------------------------*/
-/*
- * The qtu uid data is used to track resources that are created directly or
- * indirectly by processes (uid tracked).
- * It is shared by the processes with the same uid.
- * Some of the resource will be counted to prevent further rogue allocations,
- * some will need freeing once the owner process (uid) exits.
- */
-struct uid_tag_data {
- struct rb_node node;
- uid_t uid;
-
- /*
- * For the uid, how many accounting tags have been set.
- */
- int num_active_tags;
- /* Track the number of proc_qtu_data that reference it */
- int num_pqd;
- struct rb_root tag_ref_tree;
- /* No tag_node_tree_lock; use uid_tag_data_tree_lock */
-};
-
-struct tag_ref {
- struct tag_node tn;
-
- /*
- * This tracks the number of active sockets that have a tag on them
- * which matches this tag_ref.tn.tag.
- * A tag ref can live on after the sockets are untagged.
- * A tag ref can only be removed during a tag delete command.
- */
- int num_sock_tags;
-};
-
-struct proc_qtu_data {
- struct rb_node node;
- pid_t pid;
-
- struct uid_tag_data *parent_tag_data;
-
- /* Tracks the sock_tags that need freeing upon this proc's death */
- struct list_head sock_tag_list;
- /* No spinlock_t sock_tag_list_lock; use the global one. */
-};
-
-/*----------------------------------------------*/
-#endif /* ifndef __XT_QTAGUID_INTERNAL_H__ */
diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c
deleted file mode 100644
index 2a7190d285e6..000000000000
--- a/net/netfilter/xt_qtaguid_print.c
+++ /dev/null
@@ -1,566 +0,0 @@
-/*
- * Pretty printing Support for iptables xt_qtaguid module.
- *
- * (C) 2011 Google, Inc
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * Most of the functions in this file just waste time if DEBUG is not defined.
- * The matching xt_qtaguid_print.h will static inline empty funcs if the needed
- * debug flags ore not defined.
- * Those funcs that fail to allocate memory will panic as there is no need to
- * hobble allong just pretending to do the requested work.
- */
-
-#define DEBUG
-
-#include <linux/fs.h>
-#include <linux/gfp.h>
-#include <linux/net.h>
-#include <linux/rbtree.h>
-#include <linux/slab.h>
-#include <linux/spinlock_types.h>
-#include <net/sock.h>
-
-#include "xt_qtaguid_internal.h"
-#include "xt_qtaguid_print.h"
-
-#ifdef DDEBUG
-
-static void _bug_on_err_or_null(void *ptr)
-{
- if (IS_ERR_OR_NULL(ptr)) {
- pr_err("qtaguid: kmalloc failed\n");
- BUG();
- }
-}
-
-char *pp_tag_t(tag_t *tag)
-{
- char *res;
-
- if (!tag)
- res = kasprintf(GFP_ATOMIC, "tag_t@null{}");
- else
- res = kasprintf(GFP_ATOMIC,
- "tag_t@%p{tag=0x%llx, uid=%u}",
- tag, *tag, get_uid_from_tag(*tag));
- _bug_on_err_or_null(res);
- return res;
-}
-
-char *pp_data_counters(struct data_counters *dc, bool showValues)
-{
- char *res;
-
- if (!dc)
- res = kasprintf(GFP_ATOMIC, "data_counters@null{}");
- else if (showValues)
- res = kasprintf(
- GFP_ATOMIC, "data_counters@%p{"
- "set0{"
- "rx{"
- "tcp{b=%llu, p=%llu}, "
- "udp{b=%llu, p=%llu},"
- "other{b=%llu, p=%llu}}, "
- "tx{"
- "tcp{b=%llu, p=%llu}, "
- "udp{b=%llu, p=%llu},"
- "other{b=%llu, p=%llu}}}, "
- "set1{"
- "rx{"
- "tcp{b=%llu, p=%llu}, "
- "udp{b=%llu, p=%llu},"
- "other{b=%llu, p=%llu}}, "
- "tx{"
- "tcp{b=%llu, p=%llu}, "
- "udp{b=%llu, p=%llu},"
- "other{b=%llu, p=%llu}}}}",
- dc,
- dc->bpc[0][IFS_RX][IFS_TCP].bytes,
- dc->bpc[0][IFS_RX][IFS_TCP].packets,
- dc->bpc[0][IFS_RX][IFS_UDP].bytes,
- dc->bpc[0][IFS_RX][IFS_UDP].packets,
- dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].bytes,
- dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].packets,
- dc->bpc[0][IFS_TX][IFS_TCP].bytes,
- dc->bpc[0][IFS_TX][IFS_TCP].packets,
- dc->bpc[0][IFS_TX][IFS_UDP].bytes,
- dc->bpc[0][IFS_TX][IFS_UDP].packets,
- dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].bytes,
- dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].packets,
- dc->bpc[1][IFS_RX][IFS_TCP].bytes,
- dc->bpc[1][IFS_RX][IFS_TCP].packets,
- dc->bpc[1][IFS_RX][IFS_UDP].bytes,
- dc->bpc[1][IFS_RX][IFS_UDP].packets,
- dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].bytes,
- dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].packets,
- dc->bpc[1][IFS_TX][IFS_TCP].bytes,
- dc->bpc[1][IFS_TX][IFS_TCP].packets,
- dc->bpc[1][IFS_TX][IFS_UDP].bytes,
- dc->bpc[1][IFS_TX][IFS_UDP].packets,
- dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].bytes,
- dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].packets);
- else
- res = kasprintf(GFP_ATOMIC, "data_counters@%p{...}", dc);
- _bug_on_err_or_null(res);
- return res;
-}
-
-char *pp_tag_node(struct tag_node *tn)
-{
- char *tag_str;
- char *res;
-
- if (!tn) {
- res = kasprintf(GFP_ATOMIC, "tag_node@null{}");
- _bug_on_err_or_null(res);
- return res;
- }
- tag_str = pp_tag_t(&tn->tag);
- res = kasprintf(GFP_ATOMIC,
- "tag_node@%p{tag=%s}",
- tn, tag_str);
- _bug_on_err_or_null(res);
- kfree(tag_str);
- return res;
-}
-
-char *pp_tag_ref(struct tag_ref *tr)
-{
- char *tn_str;
- char *res;
-
- if (!tr) {
- res = kasprintf(GFP_ATOMIC, "tag_ref@null{}");
- _bug_on_err_or_null(res);
- return res;
- }
- tn_str = pp_tag_node(&tr->tn);
- res = kasprintf(GFP_ATOMIC,
- "tag_ref@%p{%s, num_sock_tags=%d}",
- tr, tn_str, tr->num_sock_tags);
- _bug_on_err_or_null(res);
- kfree(tn_str);
- return res;
-}
-
-char *pp_tag_stat(struct tag_stat *ts)
-{
- char *tn_str;
- char *counters_str;
- char *parent_counters_str;
- char *res;
-
- if (!ts) {
- res = kasprintf(GFP_ATOMIC, "tag_stat@null{}");
- _bug_on_err_or_null(res);
- return res;
- }
- tn_str = pp_tag_node(&ts->tn);
- counters_str = pp_data_counters(&ts->counters, true);
- parent_counters_str = pp_data_counters(ts->parent_counters, false);
- res = kasprintf(GFP_ATOMIC,
- "tag_stat@%p{%s, counters=%s, parent_counters=%s}",
- ts, tn_str, counters_str, parent_counters_str);
- _bug_on_err_or_null(res);
- kfree(tn_str);
- kfree(counters_str);
- kfree(parent_counters_str);
- return res;
-}
-
-char *pp_iface_stat(struct iface_stat *is)
-{
- char *res;
- if (!is) {
- res = kasprintf(GFP_ATOMIC, "iface_stat@null{}");
- } else {
- struct data_counters *cnts = &is->totals_via_skb;
- res = kasprintf(GFP_ATOMIC, "iface_stat@%p{"
- "list=list_head{...}, "
- "ifname=%s, "
- "total_dev={rx={bytes=%llu, "
- "packets=%llu}, "
- "tx={bytes=%llu, "
- "packets=%llu}}, "
- "total_skb={rx={bytes=%llu, "
- "packets=%llu}, "
- "tx={bytes=%llu, "
- "packets=%llu}}, "
- "last_known_valid=%d, "
- "last_known={rx={bytes=%llu, "
- "packets=%llu}, "
- "tx={bytes=%llu, "
- "packets=%llu}}, "
- "active=%d, "
- "net_dev=%p, "
- "proc_ptr=%p, "
- "tag_stat_tree=rb_root{...}}",
- is,
- is->ifname,
- is->totals_via_dev[IFS_RX].bytes,
- is->totals_via_dev[IFS_RX].packets,
- is->totals_via_dev[IFS_TX].bytes,
- is->totals_via_dev[IFS_TX].packets,
- dc_sum_bytes(cnts, 0, IFS_RX),
- dc_sum_packets(cnts, 0, IFS_RX),
- dc_sum_bytes(cnts, 0, IFS_TX),
- dc_sum_packets(cnts, 0, IFS_TX),
- is->last_known_valid,
- is->last_known[IFS_RX].bytes,
- is->last_known[IFS_RX].packets,
- is->last_known[IFS_TX].bytes,
- is->last_known[IFS_TX].packets,
- is->active,
- is->net_dev,
- is->proc_ptr);
- }
- _bug_on_err_or_null(res);
- return res;
-}
-
-char *pp_sock_tag(struct sock_tag *st)
-{
- char *tag_str;
- char *res;
-
- if (!st) {
- res = kasprintf(GFP_ATOMIC, "sock_tag@null{}");
- _bug_on_err_or_null(res);
- return res;
- }
- tag_str = pp_tag_t(&st->tag);
- res = kasprintf(GFP_ATOMIC, "sock_tag@%p{"
- "sock_node=rb_node{...}, "
- "sk=%p (f_count=%d), list=list_head{...}, "
- "pid=%u, tag=%s}",
- st, st->sk, atomic_read(
- &st->sk->sk_refcnt),
- st->pid, tag_str);
- _bug_on_err_or_null(res);
- kfree(tag_str);
- return res;
-}
-
-char *pp_uid_tag_data(struct uid_tag_data *utd)
-{
- char *res;
-
- if (!utd)
- res = kasprintf(GFP_ATOMIC, "uid_tag_data@null{}");
- else
- res = kasprintf(GFP_ATOMIC, "uid_tag_data@%p{"
- "uid=%u, num_active_acct_tags=%d, "
- "num_pqd=%d, "
- "tag_node_tree=rb_root{...}, "
- "proc_qtu_data_tree=rb_root{...}}",
- utd, utd->uid,
- utd->num_active_tags, utd->num_pqd);
- _bug_on_err_or_null(res);
- return res;
-}
-
-char *pp_proc_qtu_data(struct proc_qtu_data *pqd)
-{
- char *parent_tag_data_str;
- char *res;
-
- if (!pqd) {
- res = kasprintf(GFP_ATOMIC, "proc_qtu_data@null{}");
- _bug_on_err_or_null(res);
- return res;
- }
- parent_tag_data_str = pp_uid_tag_data(pqd->parent_tag_data);
- res = kasprintf(GFP_ATOMIC, "proc_qtu_data@%p{"
- "node=rb_node{...}, pid=%u, "
- "parent_tag_data=%s, "
- "sock_tag_list=list_head{...}}",
- pqd, pqd->pid, parent_tag_data_str
- );
- _bug_on_err_or_null(res);
- kfree(parent_tag_data_str);
- return res;
-}
-
-/*------------------------------------------*/
-void prdebug_sock_tag_tree(int indent_level,
- struct rb_root *sock_tag_tree)
-{
- struct rb_node *node;
- struct sock_tag *sock_tag_entry;
- char *str;
-
- if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
- return;
-
- if (RB_EMPTY_ROOT(sock_tag_tree)) {
- str = "sock_tag_tree=rb_root{}";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
- return;
- }
-
- str = "sock_tag_tree=rb_root{";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
- indent_level++;
- for (node = rb_first(sock_tag_tree);
- node;
- node = rb_next(node)) {
- sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
- str = pp_sock_tag(sock_tag_entry);
- pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
- kfree(str);
- }
- indent_level--;
- str = "}";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
-}
-
-void prdebug_sock_tag_list(int indent_level,
- struct list_head *sock_tag_list)
-{
- struct sock_tag *sock_tag_entry;
- char *str;
-
- if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
- return;
-
- if (list_empty(sock_tag_list)) {
- str = "sock_tag_list=list_head{}";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
- return;
- }
-
- str = "sock_tag_list=list_head{";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
- indent_level++;
- list_for_each_entry(sock_tag_entry, sock_tag_list, list) {
- str = pp_sock_tag(sock_tag_entry);
- pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
- kfree(str);
- }
- indent_level--;
- str = "}";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
-}
-
-void prdebug_proc_qtu_data_tree(int indent_level,
- struct rb_root *proc_qtu_data_tree)
-{
- char *str;
- struct rb_node *node;
- struct proc_qtu_data *proc_qtu_data_entry;
-
- if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
- return;
-
- if (RB_EMPTY_ROOT(proc_qtu_data_tree)) {
- str = "proc_qtu_data_tree=rb_root{}";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
- return;
- }
-
- str = "proc_qtu_data_tree=rb_root{";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
- indent_level++;
- for (node = rb_first(proc_qtu_data_tree);
- node;
- node = rb_next(node)) {
- proc_qtu_data_entry = rb_entry(node,
- struct proc_qtu_data,
- node);
- str = pp_proc_qtu_data(proc_qtu_data_entry);
- pr_debug("%*d: %s,\n", indent_level*2, indent_level,
- str);
- kfree(str);
- indent_level++;
- prdebug_sock_tag_list(indent_level,
- &proc_qtu_data_entry->sock_tag_list);
- indent_level--;
-
- }
- indent_level--;
- str = "}";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
-}
-
-void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree)
-{
- char *str;
- struct rb_node *node;
- struct tag_ref *tag_ref_entry;
-
- if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
- return;
-
- if (RB_EMPTY_ROOT(tag_ref_tree)) {
- str = "tag_ref_tree{}";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
- return;
- }
-
- str = "tag_ref_tree{";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
- indent_level++;
- for (node = rb_first(tag_ref_tree);
- node;
- node = rb_next(node)) {
- tag_ref_entry = rb_entry(node,
- struct tag_ref,
- tn.node);
- str = pp_tag_ref(tag_ref_entry);
- pr_debug("%*d: %s,\n", indent_level*2, indent_level,
- str);
- kfree(str);
- }
- indent_level--;
- str = "}";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
-}
-
-void prdebug_uid_tag_data_tree(int indent_level,
- struct rb_root *uid_tag_data_tree)
-{
- char *str;
- struct rb_node *node;
- struct uid_tag_data *uid_tag_data_entry;
-
- if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
- return;
-
- if (RB_EMPTY_ROOT(uid_tag_data_tree)) {
- str = "uid_tag_data_tree=rb_root{}";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
- return;
- }
-
- str = "uid_tag_data_tree=rb_root{";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
- indent_level++;
- for (node = rb_first(uid_tag_data_tree);
- node;
- node = rb_next(node)) {
- uid_tag_data_entry = rb_entry(node, struct uid_tag_data,
- node);
- str = pp_uid_tag_data(uid_tag_data_entry);
- pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
- kfree(str);
- if (!RB_EMPTY_ROOT(&uid_tag_data_entry->tag_ref_tree)) {
- indent_level++;
- prdebug_tag_ref_tree(indent_level,
- &uid_tag_data_entry->tag_ref_tree);
- indent_level--;
- }
- }
- indent_level--;
- str = "}";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
-}
-
-void prdebug_tag_stat_tree(int indent_level,
- struct rb_root *tag_stat_tree)
-{
- char *str;
- struct rb_node *node;
- struct tag_stat *ts_entry;
-
- if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
- return;
-
- if (RB_EMPTY_ROOT(tag_stat_tree)) {
- str = "tag_stat_tree{}";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
- return;
- }
-
- str = "tag_stat_tree{";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
- indent_level++;
- for (node = rb_first(tag_stat_tree);
- node;
- node = rb_next(node)) {
- ts_entry = rb_entry(node, struct tag_stat, tn.node);
- str = pp_tag_stat(ts_entry);
- pr_debug("%*d: %s\n", indent_level*2, indent_level,
- str);
- kfree(str);
- }
- indent_level--;
- str = "}";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
-}
-
-void prdebug_iface_stat_list(int indent_level,
- struct list_head *iface_stat_list)
-{
- char *str;
- struct iface_stat *iface_entry;
-
- if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
- return;
-
- if (list_empty(iface_stat_list)) {
- str = "iface_stat_list=list_head{}";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
- return;
- }
-
- str = "iface_stat_list=list_head{";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
- indent_level++;
- list_for_each_entry(iface_entry, iface_stat_list, list) {
- str = pp_iface_stat(iface_entry);
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
- kfree(str);
-
- spin_lock_bh(&iface_entry->tag_stat_list_lock);
- if (!RB_EMPTY_ROOT(&iface_entry->tag_stat_tree)) {
- indent_level++;
- prdebug_tag_stat_tree(indent_level,
- &iface_entry->tag_stat_tree);
- indent_level--;
- }
- spin_unlock_bh(&iface_entry->tag_stat_list_lock);
- }
- indent_level--;
- str = "}";
- pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
-}
-
-#endif /* ifdef DDEBUG */
-/*------------------------------------------*/
-static const char * const netdev_event_strings[] = {
- "netdev_unknown",
- "NETDEV_UP",
- "NETDEV_DOWN",
- "NETDEV_REBOOT",
- "NETDEV_CHANGE",
- "NETDEV_REGISTER",
- "NETDEV_UNREGISTER",
- "NETDEV_CHANGEMTU",
- "NETDEV_CHANGEADDR",
- "NETDEV_GOING_DOWN",
- "NETDEV_CHANGENAME",
- "NETDEV_FEAT_CHANGE",
- "NETDEV_BONDING_FAILOVER",
- "NETDEV_PRE_UP",
- "NETDEV_PRE_TYPE_CHANGE",
- "NETDEV_POST_TYPE_CHANGE",
- "NETDEV_POST_INIT",
- "NETDEV_UNREGISTER_BATCH",
- "NETDEV_RELEASE",
- "NETDEV_NOTIFY_PEERS",
- "NETDEV_JOIN",
-};
-
-const char *netdev_evt_str(int netdev_event)
-{
- if (netdev_event < 0
- || netdev_event >= ARRAY_SIZE(netdev_event_strings))
- return "bad event num";
- return netdev_event_strings[netdev_event];
-}
diff --git a/net/netfilter/xt_qtaguid_print.h b/net/netfilter/xt_qtaguid_print.h
deleted file mode 100644
index b63871a0be5a..000000000000
--- a/net/netfilter/xt_qtaguid_print.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Pretty printing Support for iptables xt_qtaguid module.
- *
- * (C) 2011 Google, Inc
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef __XT_QTAGUID_PRINT_H__
-#define __XT_QTAGUID_PRINT_H__
-
-#include "xt_qtaguid_internal.h"
-
-#ifdef DDEBUG
-
-char *pp_tag_t(tag_t *tag);
-char *pp_data_counters(struct data_counters *dc, bool showValues);
-char *pp_tag_node(struct tag_node *tn);
-char *pp_tag_ref(struct tag_ref *tr);
-char *pp_tag_stat(struct tag_stat *ts);
-char *pp_iface_stat(struct iface_stat *is);
-char *pp_sock_tag(struct sock_tag *st);
-char *pp_uid_tag_data(struct uid_tag_data *qtd);
-char *pp_proc_qtu_data(struct proc_qtu_data *pqd);
-
-/*------------------------------------------*/
-void prdebug_sock_tag_list(int indent_level,
- struct list_head *sock_tag_list);
-void prdebug_sock_tag_tree(int indent_level,
- struct rb_root *sock_tag_tree);
-void prdebug_proc_qtu_data_tree(int indent_level,
- struct rb_root *proc_qtu_data_tree);
-void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree);
-void prdebug_uid_tag_data_tree(int indent_level,
- struct rb_root *uid_tag_data_tree);
-void prdebug_tag_stat_tree(int indent_level,
- struct rb_root *tag_stat_tree);
-void prdebug_iface_stat_list(int indent_level,
- struct list_head *iface_stat_list);
-
-#else
-
-/*------------------------------------------*/
-static inline char *pp_tag_t(tag_t *tag)
-{
- return NULL;
-}
-static inline char *pp_data_counters(struct data_counters *dc, bool showValues)
-{
- return NULL;
-}
-static inline char *pp_tag_node(struct tag_node *tn)
-{
- return NULL;
-}
-static inline char *pp_tag_ref(struct tag_ref *tr)
-{
- return NULL;
-}
-static inline char *pp_tag_stat(struct tag_stat *ts)
-{
- return NULL;
-}
-static inline char *pp_iface_stat(struct iface_stat *is)
-{
- return NULL;
-}
-static inline char *pp_sock_tag(struct sock_tag *st)
-{
- return NULL;
-}
-static inline char *pp_uid_tag_data(struct uid_tag_data *qtd)
-{
- return NULL;
-}
-static inline char *pp_proc_qtu_data(struct proc_qtu_data *pqd)
-{
- return NULL;
-}
-
-/*------------------------------------------*/
-static inline
-void prdebug_sock_tag_list(int indent_level,
- struct list_head *sock_tag_list)
-{
-}
-static inline
-void prdebug_sock_tag_tree(int indent_level,
- struct rb_root *sock_tag_tree)
-{
-}
-static inline
-void prdebug_proc_qtu_data_tree(int indent_level,
- struct rb_root *proc_qtu_data_tree)
-{
-}
-static inline
-void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree)
-{
-}
-static inline
-void prdebug_uid_tag_data_tree(int indent_level,
- struct rb_root *uid_tag_data_tree)
-{
-}
-static inline
-void prdebug_tag_stat_tree(int indent_level,
- struct rb_root *tag_stat_tree)
-{
-}
-static inline
-void prdebug_iface_stat_list(int indent_level,
- struct list_head *iface_stat_list)
-{
-}
-#endif
-/*------------------------------------------*/
-const char *netdev_evt_str(int netdev_event);
-#endif /* ifndef __XT_QTAGUID_PRINT_H__ */
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 44c8eb4c9d66..10d61a6eed71 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -73,6 +73,7 @@ static struct xt_match quota_mt_reg __read_mostly = {
.checkentry = quota_mt_check,
.destroy = quota_mt_destroy,
.matchsize = sizeof(struct xt_quota_info),
+ .usersize = offsetof(struct xt_quota_info, master),
.me = THIS_MODULE,
};
diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c
index 8c481e359d94..cb7b53f6dade 100644
--- a/net/netfilter/xt_quota2.c
+++ b/net/netfilter/xt_quota2.c
@@ -136,6 +136,8 @@ static ssize_t quota_proc_write(struct file *file, const char __user *input,
if (copy_from_user(buf, input, size) != 0)
return -EFAULT;
buf[sizeof(buf)-1] = '\0';
+ if (size < sizeof(buf))
+ buf[size] = '\0';
spin_lock_bh(&e->lock);
e->quota = simple_strtoull(buf, NULL, 0);
@@ -283,6 +285,8 @@ quota_mt2(const struct sk_buff *skb, struct xt_action_param *par)
{
struct xt_quota_mtinfo2 *q = (void *)par->matchinfo;
struct xt_quota_counter *e = q->master;
+ int charge = (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len;
+ bool no_change = q->flags & XT_QUOTA_NO_CHANGE;
bool ret = q->flags & XT_QUOTA_INVERT;
spin_lock_bh(&e->lock);
@@ -291,20 +295,17 @@ quota_mt2(const struct sk_buff *skb, struct xt_action_param *par)
* While no_change is pointless in "grow" mode, we will
* implement it here simply to have a consistent behavior.
*/
- if (!(q->flags & XT_QUOTA_NO_CHANGE)) {
- e->quota += (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len;
- }
- ret = true;
+ if (!no_change)
+ e->quota += charge;
+ ret = true; /* note: does not respect inversion (bug??) */
} else {
- if (e->quota >= skb->len) {
- if (!(q->flags & XT_QUOTA_NO_CHANGE))
- e->quota -= (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len;
+ if (e->quota > charge) {
+ if (!no_change)
+ e->quota -= charge;
ret = !ret;
- } else {
+ } else if (e->quota) {
/* We are transitioning, log that fact. */
- if (e->quota) {
- quota2_log(par->in, par->out, e, q->name);
- }
+ quota2_log(par->in, par->out, e, q->name);
/* we do not allow even small packets from now on */
e->quota = 0;
}
@@ -322,6 +323,7 @@ static struct xt_match quota_mt2_reg[] __read_mostly = {
.match = quota_mt2,
.destroy = quota_mt2_destroy,
.matchsize = sizeof(struct xt_quota_mtinfo2),
+ .usersize = offsetof(struct xt_quota_mtinfo2, master),
.me = THIS_MODULE,
},
{
@@ -332,6 +334,7 @@ static struct xt_match quota_mt2_reg[] __read_mostly = {
.match = quota_mt2,
.destroy = quota_mt2_destroy,
.matchsize = sizeof(struct xt_quota_mtinfo2),
+ .usersize = offsetof(struct xt_quota_mtinfo2, master),
.me = THIS_MODULE,
},
};
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 7720b036d76a..d46dc9ff591f 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -135,6 +135,7 @@ static struct xt_match xt_rateest_mt_reg __read_mostly = {
.checkentry = xt_rateest_mt_checkentry,
.destroy = xt_rateest_mt_destroy,
.matchsize = sizeof(struct xt_rateest_match_info),
+ .usersize = offsetof(struct xt_rateest_match_info, est1),
.me = THIS_MODULE,
};
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index cd53b861a15c..ffe673c6a248 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -156,7 +156,8 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e)
/*
* Drop entries with timestamps older then 'time'.
*/
-static void recent_entry_reap(struct recent_table *t, unsigned long time)
+static void recent_entry_reap(struct recent_table *t, unsigned long time,
+ struct recent_entry *working, bool update)
{
struct recent_entry *e;
@@ -166,6 +167,12 @@ static void recent_entry_reap(struct recent_table *t, unsigned long time)
e = list_entry(t->lru_list.next, struct recent_entry, lru_list);
/*
+ * Do not reap the entry which are going to be updated.
+ */
+ if (e == working && update)
+ return;
+
+ /*
* The last time stamp is the most recent.
*/
if (time_after(time, e->stamps[e->index-1]))
@@ -307,7 +314,8 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
/* info->seconds must be non-zero */
if (info->check_set & XT_RECENT_REAP)
- recent_entry_reap(t, time);
+ recent_entry_reap(t, time, e,
+ info->check_set & XT_RECENT_UPDATE && ret);
}
if (info->check_set & XT_RECENT_SET ||
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 939821821fcb..c5d930670cf0 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -112,14 +112,15 @@ extract_icmp4_fields(const struct sk_buff *skb,
* box.
*/
static struct sock *
-xt_socket_get_sock_v4(struct net *net, const u8 protocol,
+xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
+ const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in)
{
switch (protocol) {
case IPPROTO_TCP:
- return __inet_lookup(net, &tcp_hashinfo,
+ return __inet_lookup(net, &tcp_hashinfo, skb, doff,
saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
@@ -148,6 +149,8 @@ struct sock *xt_socket_lookup_slow_v4(struct net *net,
const struct net_device *indev)
{
const struct iphdr *iph = ip_hdr(skb);
+ struct sk_buff *data_skb = NULL;
+ int doff = 0;
struct sock *sk = skb->sk;
__be32 uninitialized_var(daddr), uninitialized_var(saddr);
__be16 uninitialized_var(dport), uninitialized_var(sport);
@@ -173,6 +176,10 @@ struct sock *xt_socket_lookup_slow_v4(struct net *net,
sport = hp->source;
daddr = iph->daddr;
dport = hp->dest;
+ data_skb = (struct sk_buff *)skb;
+ doff = iph->protocol == IPPROTO_TCP ?
+ ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) :
+ ip_hdrlen(skb) + sizeof(*hp);
} else if (iph->protocol == IPPROTO_ICMP) {
if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
@@ -205,9 +212,9 @@ struct sock *xt_socket_lookup_slow_v4(struct net *net,
if (sk)
atomic_inc(&sk->sk_refcnt);
else
- sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol,
- saddr, daddr, sport, dport,
- indev);
+ sk = xt_socket_get_sock_v4(dev_net(skb->dev), data_skb, doff,
+ protocol, saddr, daddr, sport,
+ dport, indev);
return sk;
}
@@ -328,14 +335,15 @@ extract_icmp6_fields(const struct sk_buff *skb,
}
static struct sock *
-xt_socket_get_sock_v6(struct net *net, const u8 protocol,
+xt_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
+ const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in)
{
switch (protocol) {
case IPPROTO_TCP:
- return inet6_lookup(net, &tcp_hashinfo,
+ return inet6_lookup(net, &tcp_hashinfo, skb, doff,
saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
@@ -354,6 +362,8 @@ struct sock *xt_socket_lookup_slow_v6(struct net *net,
__be16 uninitialized_var(dport), uninitialized_var(sport);
const struct in6_addr *daddr = NULL, *saddr = NULL;
struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct sk_buff *data_skb = NULL;
+ int doff = 0;
int thoff = 0, tproto;
tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
@@ -375,6 +385,10 @@ struct sock *xt_socket_lookup_slow_v6(struct net *net,
sport = hp->source;
daddr = &iph->daddr;
dport = hp->dest;
+ data_skb = (struct sk_buff *)skb;
+ doff = tproto == IPPROTO_TCP ?
+ thoff + __tcp_hdrlen((struct tcphdr *)hp) :
+ thoff + sizeof(*hp);
} else if (tproto == IPPROTO_ICMPV6) {
struct ipv6hdr ipv6_var;
@@ -389,8 +403,8 @@ struct sock *xt_socket_lookup_slow_v6(struct net *net,
if (sk)
atomic_inc(&sk->sk_refcnt);
else
- sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto,
- saddr, daddr, sport, dport,
+ sk = xt_socket_get_sock_v6(dev_net(skb->dev), data_skb, doff,
+ tproto, saddr, daddr, sport, dport,
indev);
return sk;
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 11de55e7a868..8710fdba2ae2 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -84,6 +84,7 @@ static struct xt_match xt_statistic_mt_reg __read_mostly = {
.checkentry = statistic_mt_check,
.destroy = statistic_mt_destroy,
.matchsize = sizeof(struct xt_statistic_info),
+ .usersize = offsetof(struct xt_statistic_info, master),
.me = THIS_MODULE,
};
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 0bc3460319c8..423293ee57c2 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -77,6 +77,7 @@ static struct xt_match xt_string_mt_reg __read_mostly = {
.match = string_mt,
.destroy = string_mt_destroy,
.matchsize = sizeof(struct xt_string_info),
+ .usersize = offsetof(struct xt_string_info, config),
.me = THIS_MODULE,
};
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 7fd1104ba900..422fac2a4a3c 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -163,8 +163,8 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
return -ENOMEM;
doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL);
if (doi_def->map.std == NULL) {
- ret_val = -ENOMEM;
- goto add_std_failure;
+ kfree(doi_def);
+ return -ENOMEM;
}
doi_def->type = CIPSO_V4_MAP_TRANS;
@@ -205,14 +205,14 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
}
doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size,
sizeof(u32),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (doi_def->map.std->lvl.local == NULL) {
ret_val = -ENOMEM;
goto add_std_failure;
}
doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size,
sizeof(u32),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (doi_def->map.std->lvl.cipso == NULL) {
ret_val = -ENOMEM;
goto add_std_failure;
@@ -279,7 +279,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
doi_def->map.std->cat.local = kcalloc(
doi_def->map.std->cat.local_size,
sizeof(u32),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (doi_def->map.std->cat.local == NULL) {
ret_val = -ENOMEM;
goto add_std_failure;
@@ -287,7 +287,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
doi_def->map.std->cat.cipso = kcalloc(
doi_def->map.std->cat.cipso_size,
sizeof(u32),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (doi_def->map.std->cat.cipso == NULL) {
ret_val = -ENOMEM;
goto add_std_failure;
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index bfa2b6d5b5cf..25ab12e25e05 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -605,6 +605,12 @@ int netlbl_catmap_getlong(struct netlbl_lsm_catmap *catmap,
if ((off & (BITS_PER_LONG - 1)) != 0)
return -EINVAL;
+ /* a null catmap is equivalent to an empty one */
+ if (!catmap) {
+ *offset = (u32)-1;
+ return 0;
+ }
+
if (off < catmap->startbit) {
off = catmap->startbit;
*offset = off;
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 13f777f20995..5f1218dc9162 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -92,6 +92,7 @@ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
static int netlbl_mgmt_add_common(struct genl_info *info,
struct netlbl_audit *audit_info)
{
+ void *pmap = NULL;
int ret_val = -EINVAL;
struct netlbl_domaddr_map *addrmap = NULL;
struct cipso_v4_doi *cipsov4 = NULL;
@@ -165,6 +166,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
ret_val = -ENOMEM;
goto add_free_addrmap;
}
+ pmap = map;
map->list.addr = addr->s_addr & mask->s_addr;
map->list.mask = mask->s_addr;
map->list.valid = 1;
@@ -173,10 +175,8 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
map->def.cipso = cipsov4;
ret_val = netlbl_af4list_add(&map->list, &addrmap->list4);
- if (ret_val != 0) {
- kfree(map);
- goto add_free_addrmap;
- }
+ if (ret_val != 0)
+ goto add_free_map;
entry->def.type = NETLBL_NLTYPE_ADDRSELECT;
entry->def.addrsel = addrmap;
@@ -212,6 +212,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
ret_val = -ENOMEM;
goto add_free_addrmap;
}
+ pmap = map;
map->list.addr = *addr;
map->list.addr.s6_addr32[0] &= mask->s6_addr32[0];
map->list.addr.s6_addr32[1] &= mask->s6_addr32[1];
@@ -222,10 +223,8 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
map->def.type = entry->def.type;
ret_val = netlbl_af6list_add(&map->list, &addrmap->list6);
- if (ret_val != 0) {
- kfree(map);
- goto add_free_addrmap;
- }
+ if (ret_val != 0)
+ goto add_free_map;
entry->def.type = NETLBL_NLTYPE_ADDRSELECT;
entry->def.addrsel = addrmap;
@@ -234,10 +233,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
ret_val = netlbl_domhsh_add(entry, audit_info);
if (ret_val != 0)
- goto add_free_addrmap;
+ goto add_free_map;
return 0;
+add_free_map:
+ kfree(pmap);
add_free_addrmap:
kfree(addrmap);
add_doi_put_def:
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 9f4ec16abfcf..5210f5546e3e 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1185,12 +1185,13 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
struct netlbl_unlhsh_walk_arg cb_arg;
u32 skip_bkt = cb->args[0];
u32 skip_chain = cb->args[1];
- u32 iter_bkt;
- u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0;
+ u32 skip_addr4 = cb->args[2];
+ u32 iter_bkt, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0;
struct netlbl_unlhsh_iface *iface;
struct list_head *iter_list;
struct netlbl_af4list *addr4;
#if IS_ENABLED(CONFIG_IPV6)
+ u32 skip_addr6 = cb->args[3];
struct netlbl_af6list *addr6;
#endif
@@ -1201,7 +1202,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
rcu_read_lock();
for (iter_bkt = skip_bkt;
iter_bkt < rcu_dereference(netlbl_unlhsh)->size;
- iter_bkt++, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0) {
+ iter_bkt++) {
iter_list = &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt];
list_for_each_entry_rcu(iface, iter_list, list) {
if (!iface->valid ||
@@ -1209,7 +1210,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
continue;
netlbl_af4list_foreach_rcu(addr4,
&iface->addr4_list) {
- if (iter_addr4++ < cb->args[2])
+ if (iter_addr4++ < skip_addr4)
continue;
if (netlbl_unlabel_staticlist_gen(
NLBL_UNLABEL_C_STATICLIST,
@@ -1222,10 +1223,12 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
goto unlabel_staticlist_return;
}
}
+ iter_addr4 = 0;
+ skip_addr4 = 0;
#if IS_ENABLED(CONFIG_IPV6)
netlbl_af6list_foreach_rcu(addr6,
&iface->addr6_list) {
- if (iter_addr6++ < cb->args[3])
+ if (iter_addr6++ < skip_addr6)
continue;
if (netlbl_unlabel_staticlist_gen(
NLBL_UNLABEL_C_STATICLIST,
@@ -1238,8 +1241,12 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
goto unlabel_staticlist_return;
}
}
+ iter_addr6 = 0;
+ skip_addr6 = 0;
#endif /* IPv6 */
}
+ iter_chain = 0;
+ skip_chain = 0;
}
unlabel_staticlist_return:
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 6e5c14309fb8..98d8453eee27 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -419,11 +419,13 @@ void netlink_table_ungrab(void)
static inline void
netlink_lock_table(void)
{
+ unsigned long flags;
+
/* read_lock() synchronizes us to netlink_table_grab */
- read_lock(&nl_table_lock);
+ read_lock_irqsave(&nl_table_lock, flags);
atomic_inc(&nl_table_users);
- read_unlock(&nl_table_lock);
+ read_unlock_irqrestore(&nl_table_lock, flags);
}
static inline void
@@ -556,7 +558,10 @@ static int netlink_insert(struct sock *sk, u32 portid)
/* We need to ensure that the socket is hashed and visible. */
smp_wmb();
- nlk_sk(sk)->bound = portid;
+ /* Paired with lockless reads from netlink_bind(),
+ * netlink_connect() and netlink_sendmsg().
+ */
+ WRITE_ONCE(nlk_sk(sk)->bound, portid);
err:
release_sock(sk);
@@ -971,7 +976,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
else if (nlk->ngroups < 8*sizeof(groups))
groups &= (1UL << nlk->ngroups) - 1;
- bound = nlk->bound;
+ /* Paired with WRITE_ONCE() in netlink_insert() */
+ bound = READ_ONCE(nlk->bound);
if (bound) {
/* Ensure nlk->portid is up-to-date. */
smp_rmb();
@@ -983,7 +989,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
if (nlk->netlink_bind && groups) {
int group;
- for (group = 0; group < nlk->ngroups; group++) {
+ /* nl_groups is a u32, so cap the maximum groups we can bind */
+ for (group = 0; group < BITS_PER_TYPE(u32); group++) {
if (!test_bit(group, &groups))
continue;
err = nlk->netlink_bind(net, group + 1);
@@ -1002,7 +1009,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
netlink_insert(sk, nladdr->nl_pid) :
netlink_autobind(sock);
if (err) {
- netlink_undo_bind(nlk->ngroups, groups, sk);
+ netlink_undo_bind(BITS_PER_TYPE(u32), groups, sk);
return err;
}
}
@@ -1050,8 +1057,9 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
/* No need for barriers here as we return to user-space without
* using any of the bound attributes.
+ * Paired with WRITE_ONCE() in netlink_insert().
*/
- if (!nlk->bound)
+ if (!READ_ONCE(nlk->bound))
err = netlink_autobind(sock);
if (err == 0) {
@@ -1776,6 +1784,11 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (msg->msg_flags&MSG_OOB)
return -EOPNOTSUPP;
+ if (len == 0) {
+ pr_warn_once("Zero length message leads to an empty skb\n");
+ return -ENODATA;
+ }
+
err = scm_send(sock, msg, &scm, true);
if (err < 0)
return err;
@@ -1798,7 +1811,8 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
dst_group = nlk->dst_group;
}
- if (!nlk->bound) {
+ /* Paired with WRITE_ONCE() in netlink_insert() */
+ if (!READ_ONCE(nlk->bound)) {
err = netlink_autobind(sock);
if (err)
goto out;
@@ -2382,13 +2396,15 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
/* errors reported via destination sk->sk_err, but propagate
* delivery errors if NETLINK_BROADCAST_ERROR flag is set */
err = nlmsg_multicast(sk, skb, exclude_portid, group, flags);
+ if (err == -ESRCH)
+ err = 0;
}
if (report) {
int err2;
err2 = nlmsg_unicast(sk, skb, portid);
- if (!err || err == -ESRCH)
+ if (!err)
err = err2;
}
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 8336c9607a70..3fc00c320a9f 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1007,63 +1007,11 @@ static struct genl_multicast_group genl_ctrl_groups[] = {
{ .name = "notify", },
};
-static int genl_bind(struct net *net, int group)
-{
- int i, err = 0;
-
- down_read(&cb_lock);
- for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
- struct genl_family *f;
-
- list_for_each_entry(f, genl_family_chain(i), family_list) {
- if (group >= f->mcgrp_offset &&
- group < f->mcgrp_offset + f->n_mcgrps) {
- int fam_grp = group - f->mcgrp_offset;
-
- if (!f->netnsok && net != &init_net)
- err = -ENOENT;
- else if (f->mcast_bind)
- err = f->mcast_bind(net, fam_grp);
- else
- err = 0;
- break;
- }
- }
- }
- up_read(&cb_lock);
-
- return err;
-}
-
-static void genl_unbind(struct net *net, int group)
-{
- int i;
-
- down_read(&cb_lock);
- for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
- struct genl_family *f;
-
- list_for_each_entry(f, genl_family_chain(i), family_list) {
- if (group >= f->mcgrp_offset &&
- group < f->mcgrp_offset + f->n_mcgrps) {
- int fam_grp = group - f->mcgrp_offset;
-
- if (f->mcast_unbind)
- f->mcast_unbind(net, fam_grp);
- break;
- }
- }
- }
- up_read(&cb_lock);
-}
-
static int __net_init genl_pernet_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {
.input = genl_rcv,
.flags = NL_CFG_F_NONROOT_RECV,
- .bind = genl_bind,
- .unbind = genl_unbind,
};
/* we'll bump the group number right afterwards */
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index d72a4f1558f2..ef6a3d586591 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -199,6 +199,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
/* refcount initialized at 1 */
spin_unlock_bh(&nr_node_list_lock);
+ nr_neigh_put(nr_neigh);
return 0;
}
nr_node_lock(nr_node);
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index f0ecaec1ff3d..d1a0b7056743 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -125,11 +125,9 @@ static void nr_heartbeat_expiry(unsigned long param)
is accepted() it isn't 'dead' so doesn't get removed. */
if (sock_flag(sk, SOCK_DESTROY) ||
(sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
- sock_hold(sk);
bh_unlock_sock(sk);
nr_destroy_socket(sk);
- sock_put(sk);
- return;
+ goto out;
}
break;
@@ -150,6 +148,8 @@ static void nr_heartbeat_expiry(unsigned long param)
nr_start_heartbeat(sk);
bh_unlock_sock(sk);
+out:
+ sock_put(sk);
}
static void nr_t2timer_expiry(unsigned long param)
@@ -163,6 +163,7 @@ static void nr_t2timer_expiry(unsigned long param)
nr_enquiry_response(sk);
}
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void nr_t4timer_expiry(unsigned long param)
@@ -172,6 +173,7 @@ static void nr_t4timer_expiry(unsigned long param)
bh_lock_sock(sk);
nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY;
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void nr_idletimer_expiry(unsigned long param)
@@ -200,6 +202,7 @@ static void nr_idletimer_expiry(unsigned long param)
sock_set_flag(sk, SOCK_DEAD);
}
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void nr_t1timer_expiry(unsigned long param)
@@ -212,8 +215,7 @@ static void nr_t1timer_expiry(unsigned long param)
case NR_STATE_1:
if (nr->n2count == nr->n2) {
nr_disconnect(sk, ETIMEDOUT);
- bh_unlock_sock(sk);
- return;
+ goto out;
} else {
nr->n2count++;
nr_write_internal(sk, NR_CONNREQ);
@@ -223,8 +225,7 @@ static void nr_t1timer_expiry(unsigned long param)
case NR_STATE_2:
if (nr->n2count == nr->n2) {
nr_disconnect(sk, ETIMEDOUT);
- bh_unlock_sock(sk);
- return;
+ goto out;
} else {
nr->n2count++;
nr_write_internal(sk, NR_DISCREQ);
@@ -234,8 +235,7 @@ static void nr_t1timer_expiry(unsigned long param)
case NR_STATE_3:
if (nr->n2count == nr->n2) {
nr_disconnect(sk, ETIMEDOUT);
- bh_unlock_sock(sk);
- return;
+ goto out;
} else {
nr->n2count++;
nr_requeue_frames(sk);
@@ -244,5 +244,7 @@ static void nr_t1timer_expiry(unsigned long param)
}
nr_start_t1timer(sk);
+out:
bh_unlock_sock(sk);
+ sock_put(sk);
}
diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c
index 54e40fa47822..1859b8e98ded 100644
--- a/net/nfc/af_nfc.c
+++ b/net/nfc/af_nfc.c
@@ -72,6 +72,9 @@ int nfc_proto_register(const struct nfc_protocol *nfc_proto)
proto_tab[nfc_proto->id] = nfc_proto;
write_unlock(&proto_tab_lock);
+ if (rc)
+ proto_unregister(nfc_proto->proto);
+
return rc;
}
EXPORT_SYMBOL(nfc_proto_register);
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 1471e4b0aa2c..8c7f221e1d12 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -106,13 +106,13 @@ int nfc_dev_up(struct nfc_dev *dev)
device_lock(&dev->dev);
- if (dev->rfkill && rfkill_blocked(dev->rfkill)) {
- rc = -ERFKILL;
+ if (!device_is_registered(&dev->dev)) {
+ rc = -ENODEV;
goto error;
}
- if (!device_is_registered(&dev->dev)) {
- rc = -ENODEV;
+ if (dev->rfkill && rfkill_blocked(dev->rfkill)) {
+ rc = -ERFKILL;
goto error;
}
@@ -1120,11 +1120,7 @@ int nfc_register_device(struct nfc_dev *dev)
if (rc)
pr_err("Could not register llcp device\n");
- rc = nfc_genl_device_added(dev);
- if (rc)
- pr_debug("The userspace won't be notified that the device %s was added\n",
- dev_name(&dev->dev));
-
+ device_lock(&dev->dev);
dev->rfkill = rfkill_alloc(dev_name(&dev->dev), &dev->dev,
RFKILL_TYPE_NFC, &nfc_rfkill_ops, dev);
if (dev->rfkill) {
@@ -1133,6 +1129,12 @@ int nfc_register_device(struct nfc_dev *dev)
dev->rfkill = NULL;
}
}
+ device_unlock(&dev->dev);
+
+ rc = nfc_genl_device_added(dev);
+ if (rc)
+ pr_debug("The userspace won't be notified that the device %s was added\n",
+ dev_name(&dev->dev));
return 0;
}
@@ -1149,10 +1151,17 @@ void nfc_unregister_device(struct nfc_dev *dev)
pr_debug("dev_name=%s\n", dev_name(&dev->dev));
+ rc = nfc_genl_device_removed(dev);
+ if (rc)
+ pr_debug("The userspace won't be notified that the device %s "
+ "was removed\n", dev_name(&dev->dev));
+
+ device_lock(&dev->dev);
if (dev->rfkill) {
rfkill_unregister(dev->rfkill);
rfkill_destroy(dev->rfkill);
}
+ device_unlock(&dev->dev);
if (dev->ops->check_presence) {
device_lock(&dev->dev);
@@ -1162,11 +1171,6 @@ void nfc_unregister_device(struct nfc_dev *dev)
cancel_work_sync(&dev->check_pres_work);
}
- rc = nfc_genl_device_removed(dev);
- if (rc)
- pr_debug("The userspace won't be notified that the device %s "
- "was removed\n", dev_name(&dev->dev));
-
nfc_llcp_unregister_device(dev);
mutex_lock(&nfc_devlist_mutex);
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index 23c2a118ac9f..28c60e291c7e 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -280,6 +280,7 @@ int digital_tg_configure_hw(struct nfc_digital_dev *ddev, int type, int param)
static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech)
{
struct digital_tg_mdaa_params *params;
+ int rc;
params = kzalloc(sizeof(struct digital_tg_mdaa_params), GFP_KERNEL);
if (!params)
@@ -294,8 +295,12 @@ static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech)
get_random_bytes(params->nfcid2 + 2, NFC_NFCID2_MAXSIZE - 2);
params->sc = DIGITAL_SENSF_FELICA_SC;
- return digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MDAA, NULL, params,
- 500, digital_tg_recv_atr_req, NULL);
+ rc = digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MDAA, NULL, params,
+ 500, digital_tg_recv_atr_req, NULL);
+ if (rc)
+ kfree(params);
+
+ return rc;
}
static int digital_tg_listen_md(struct nfc_digital_dev *ddev, u8 rf_tech)
diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c
index f72be7433df3..46375ff214c0 100644
--- a/net/nfc/digital_dep.c
+++ b/net/nfc/digital_dep.c
@@ -1187,6 +1187,8 @@ static void digital_tg_recv_dep_req(struct nfc_digital_dev *ddev, void *arg,
}
rc = nfc_tm_data_received(ddev->nfc_dev, resp);
+ if (rc)
+ resp = NULL;
exit:
kfree_skb(ddev->chaining_skb);
diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c
index fb58ed2dd41d..082dd95f6ef3 100644
--- a/net/nfc/digital_technology.c
+++ b/net/nfc/digital_technology.c
@@ -473,8 +473,12 @@ static int digital_in_send_sdd_req(struct nfc_digital_dev *ddev,
*skb_put(skb, sizeof(u8)) = sel_cmd;
*skb_put(skb, sizeof(u8)) = DIGITAL_SDD_REQ_SEL_PAR;
- return digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sdd_res,
- target);
+ rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sdd_res,
+ target);
+ if (rc)
+ kfree_skb(skb);
+
+ return rc;
}
static void digital_in_recv_sens_res(struct nfc_digital_dev *ddev, void *arg,
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 5a58f9f38095..291f24fef19a 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -193,13 +193,20 @@ exit:
void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
struct sk_buff *skb)
{
- u8 gate = hdev->pipes[pipe].gate;
u8 status = NFC_HCI_ANY_OK;
struct hci_create_pipe_resp *create_info;
struct hci_delete_pipe_noti *delete_info;
struct hci_all_pipe_cleared_noti *cleared_info;
+ u8 gate;
- pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd);
+ pr_debug("from pipe %x cmd %x\n", pipe, cmd);
+
+ if (pipe >= NFC_HCI_MAX_PIPES) {
+ status = NFC_HCI_ANY_E_NOK;
+ goto exit;
+ }
+
+ gate = hdev->pipes[pipe].gate;
switch (cmd) {
case NFC_HCI_ADM_NOTIFY_PIPE_CREATED:
@@ -387,8 +394,14 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event,
struct sk_buff *skb)
{
int r = 0;
- u8 gate = hdev->pipes[pipe].gate;
+ u8 gate;
+
+ if (pipe >= NFC_HCI_MAX_PIPES) {
+ pr_err("Discarded event %x to invalid pipe %x\n", event, pipe);
+ goto exit;
+ }
+ gate = hdev->pipes[pipe].gate;
if (gate == NFC_HCI_INVALID_GATE) {
pr_err("Discarded event %x to unopened pipe %x\n", event, pipe);
goto exit;
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 44d6b8355eab..1d61a08eafaf 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -119,13 +119,19 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
llcp_sock->service_name_len,
GFP_KERNEL);
if (!llcp_sock->service_name) {
+ nfc_llcp_local_put(llcp_sock->local);
+ llcp_sock->local = NULL;
+ llcp_sock->dev = NULL;
ret = -ENOMEM;
goto put_dev;
}
llcp_sock->ssap = nfc_llcp_get_sdp_ssap(local, llcp_sock);
if (llcp_sock->ssap == LLCP_SAP_MAX) {
+ nfc_llcp_local_put(llcp_sock->local);
+ llcp_sock->local = NULL;
kfree(llcp_sock->service_name);
llcp_sock->service_name = NULL;
+ llcp_sock->dev = NULL;
ret = -EADDRINUSE;
goto put_dev;
}
@@ -677,6 +683,10 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
ret = -EISCONN;
goto error;
}
+ if (sk->sk_state == LLCP_CONNECTING) {
+ ret = -EINPROGRESS;
+ goto error;
+ }
dev = nfc_get_device(addr->dev_idx);
if (dev == NULL) {
@@ -708,6 +718,8 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
llcp_sock->local = nfc_llcp_local_get(local);
llcp_sock->ssap = nfc_llcp_get_local_ssap(local);
if (llcp_sock->ssap == LLCP_SAP_MAX) {
+ nfc_llcp_local_put(llcp_sock->local);
+ llcp_sock->local = NULL;
ret = -ENOMEM;
goto put_dev;
}
@@ -745,8 +757,12 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
sock_unlink:
nfc_llcp_put_ssap(local, llcp_sock->ssap);
+ nfc_llcp_local_put(llcp_sock->local);
+ llcp_sock->local = NULL;
nfc_llcp_sock_unlink(&local->connecting_sockets, sk);
+ kfree(llcp_sock->service_name);
+ llcp_sock->service_name = NULL;
put_dev:
nfc_put_device(dev);
@@ -774,6 +790,11 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg,
lock_sock(sk);
+ if (!llcp_sock->local) {
+ release_sock(sk);
+ return -ENODEV;
+ }
+
if (sk->sk_type == SOCK_DGRAM) {
DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, addr,
msg->msg_name);
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 67583ad7f610..d5d215776980 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -149,12 +149,15 @@ inline int nci_request(struct nci_dev *ndev,
{
int rc;
- if (!test_bit(NCI_UP, &ndev->flags))
- return -ENETDOWN;
-
/* Serialize all requests */
mutex_lock(&ndev->req_lock);
- rc = __nci_request(ndev, req, opt, timeout);
+ /* check the state after obtaing the lock against any races
+ * from nci_close_device when the device gets removed.
+ */
+ if (test_bit(NCI_UP, &ndev->flags))
+ rc = __nci_request(ndev, req, opt, timeout);
+ else
+ rc = -ENETDOWN;
mutex_unlock(&ndev->req_lock);
return rc;
@@ -398,6 +401,11 @@ static int nci_open_device(struct nci_dev *ndev)
mutex_lock(&ndev->req_lock);
+ if (test_bit(NCI_UNREG, &ndev->flags)) {
+ rc = -ENODEV;
+ goto done;
+ }
+
if (test_bit(NCI_UP, &ndev->flags)) {
rc = -EALREADY;
goto done;
@@ -461,6 +469,10 @@ done:
static int nci_close_device(struct nci_dev *ndev)
{
nci_req_cancel(ndev, ENODEV);
+
+ /* This mutex needs to be held as a barrier for
+ * caller nci_unregister_device
+ */
mutex_lock(&ndev->req_lock);
if (!test_and_clear_bit(NCI_UP, &ndev->flags)) {
@@ -498,8 +510,8 @@ static int nci_close_device(struct nci_dev *ndev)
/* Flush cmd wq */
flush_workqueue(ndev->cmd_wq);
- /* Clear flags */
- ndev->flags = 0;
+ /* Clear flags except NCI_UNREG */
+ ndev->flags &= BIT(NCI_UNREG);
mutex_unlock(&ndev->req_lock);
@@ -610,14 +622,14 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
struct nci_core_conn_create_cmd *cmd;
struct core_conn_create_data data;
+ if (!number_destination_params)
+ return -EINVAL;
+
data.length = params_len + sizeof(struct nci_core_conn_create_cmd);
cmd = kzalloc(data.length, GFP_KERNEL);
if (!cmd)
return -ENOMEM;
- if (!number_destination_params)
- return -EINVAL;
-
cmd->destination_type = destination_type;
cmd->number_destination_params = number_destination_params;
memcpy(cmd->params, params, params_len);
@@ -1099,6 +1111,7 @@ EXPORT_SYMBOL(nci_allocate_device);
void nci_free_device(struct nci_dev *ndev)
{
nfc_free_device(ndev->nfc_dev);
+ nci_hci_deallocate(ndev);
kfree(ndev);
}
EXPORT_SYMBOL(nci_free_device);
@@ -1178,6 +1191,12 @@ void nci_unregister_device(struct nci_dev *ndev)
{
struct nci_conn_info *conn_info, *n;
+ /* This set_bit is not protected with specialized barrier,
+ * However, it is fine because the mutex_lock(&ndev->req_lock);
+ * in nci_close_device() will help to emit one.
+ */
+ set_bit(NCI_UNREG, &ndev->flags);
+
nci_close_device(ndev);
destroy_workqueue(ndev->cmd_wq);
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index 2aedac15cb59..309e8cebed55 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -798,3 +798,8 @@ struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev)
return hdev;
}
+
+void nci_hci_deallocate(struct nci_dev *ndev)
+{
+ kfree(ndev->hci_dev);
+}
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index 9b6eb913d801..74e4d5e8c275 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -274,6 +274,8 @@ static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev,
conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_id);
if (conn_info) {
list_del(&conn_info->list);
+ if (conn_info == ndev->rf_conn_info)
+ ndev->rf_conn_info = NULL;
devm_kfree(&ndev->nfc_dev->dev, conn_info);
}
}
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index 21d8875673a4..c3f2faa0210e 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -355,7 +355,7 @@ static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data,
nu->rx_packet_len = -1;
nu->rx_skb = nci_skb_alloc(nu->ndev,
NCI_MAX_PACKET_SIZE,
- GFP_KERNEL);
+ GFP_ATOMIC);
if (!nu->rx_skb)
return -ENOMEM;
}
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 04d4c388a7a8..4286b900a306 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -62,7 +62,10 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
[NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED },
[NFC_ATTR_FIRMWARE_NAME] = { .type = NLA_STRING,
.len = NFC_FIRMWARE_NAME_MAXSIZE },
+ [NFC_ATTR_SE_INDEX] = { .type = NLA_U32 },
[NFC_ATTR_SE_APDU] = { .type = NLA_BINARY },
+ [NFC_ATTR_VENDOR_ID] = { .type = NLA_U32 },
+ [NFC_ATTR_VENDOR_SUBCMD] = { .type = NLA_U32 },
[NFC_ATTR_VENDOR_DATA] = { .type = NLA_BINARY },
};
@@ -629,8 +632,10 @@ static int nfc_genl_dump_devices_done(struct netlink_callback *cb)
{
struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0];
- nfc_device_iter_exit(iter);
- kfree(iter);
+ if (iter) {
+ nfc_device_iter_exit(iter);
+ kfree(iter);
+ }
return 0;
}
@@ -847,6 +852,7 @@ static int nfc_genl_stop_poll(struct sk_buff *skb, struct genl_info *info)
if (!dev->polling) {
device_unlock(&dev->dev);
+ nfc_put_device(dev);
return -EINVAL;
}
@@ -1187,7 +1193,7 @@ static int nfc_genl_fw_download(struct sk_buff *skb, struct genl_info *info)
u32 idx;
char firmware_name[NFC_FIRMWARE_NAME_MAXSIZE + 1];
- if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
+ if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_FIRMWARE_NAME])
return -EINVAL;
idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
@@ -1362,8 +1368,10 @@ static int nfc_genl_dump_ses_done(struct netlink_callback *cb)
{
struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0];
- nfc_device_iter_exit(iter);
- kfree(iter);
+ if (iter) {
+ nfc_device_iter_exit(iter);
+ kfree(iter);
+ }
return 0;
}
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index e386e6c90b17..2fba626a0125 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -117,7 +117,7 @@ static int rawsock_connect(struct socket *sock, struct sockaddr *_addr,
if (addr->target_idx > dev->target_next_idx - 1 ||
addr->target_idx < dev->target_next_idx - dev->n_targets) {
rc = -EINVAL;
- goto error;
+ goto put_dev;
}
rc = nfc_activate_target(dev, addr->target_idx, addr->nfc_protocol);
@@ -344,10 +344,13 @@ static int rawsock_create(struct net *net, struct socket *sock,
if ((sock->type != SOCK_SEQPACKET) && (sock->type != SOCK_RAW))
return -ESOCKTNOSUPPORT;
- if (sock->type == SOCK_RAW)
+ if (sock->type == SOCK_RAW) {
+ if (!ns_capable(net->user_ns, CAP_NET_RAW))
+ return -EPERM;
sock->ops = &rawsock_raw_ops;
- else
+ } else {
sock->ops = &rawsock_ops;
+ }
sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto, kern);
if (!sk)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 7cb8184ac165..b8f2ee52e64e 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -137,11 +137,22 @@ static bool is_flow_key_valid(const struct sw_flow_key *key)
return !!key->eth.type;
}
+static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,
+ __be16 ethertype)
+{
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ __be16 diff[] = { ~(hdr->h_proto), ethertype };
+
+ skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
+ }
+
+ hdr->h_proto = ethertype;
+}
+
static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
const struct ovs_action_push_mpls *mpls)
{
__be32 *new_mpls_lse;
- struct ethhdr *hdr;
/* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
if (skb->encapsulation)
@@ -160,9 +171,7 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
- hdr = eth_hdr(skb);
- hdr->h_proto = mpls->mpls_ethertype;
-
+ update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype);
if (!skb->inner_protocol)
skb_set_inner_protocol(skb, skb->protocol);
skb->protocol = mpls->mpls_ethertype;
@@ -193,7 +202,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
* field correctly in the presence of VLAN tags.
*/
hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN);
- hdr->h_proto = ethertype;
+ update_ethertype(skb, hdr, ethertype);
if (eth_p_mpls(skb->protocol))
skb->protocol = ethertype;
@@ -217,8 +226,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
if (skb->ip_summed == CHECKSUM_COMPLETE) {
__be32 diff[] = { ~(*stack), lse };
- skb->csum = ~csum_partial((char *)diff, sizeof(diff),
- ~skb->csum);
+ skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
}
*stack = lse;
@@ -686,16 +694,16 @@ static void ovs_fragment(struct net *net, struct vport *vport,
}
if (ethertype == htons(ETH_P_IP)) {
- struct dst_entry ovs_dst;
+ struct rtable ovs_rt = { 0 };
unsigned long orig_dst;
prepare_frag(vport, skb);
- dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1,
+ dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
DST_OBSOLETE_NONE, DST_NOCOUNT);
- ovs_dst.dev = vport->dev;
+ ovs_rt.dst.dev = vport->dev;
orig_dst = skb->_skb_refdst;
- skb_dst_set_noref(skb, &ovs_dst);
+ skb_dst_set_noref(skb, &ovs_rt.dst);
IPCB(skb)->frag_max_size = mru;
ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index caa23ee913f0..82ca7fe7a163 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -725,9 +725,13 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
{
size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
- /* OVS_FLOW_ATTR_UFID */
+ /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
+ * see ovs_nla_put_identifier()
+ */
if (sfid && ovs_identifier_is_ufid(sfid))
len += nla_total_size(sfid->ufid_len);
+ else
+ len += nla_total_size(ovs_key_attr_size());
/* OVS_FLOW_ATTR_KEY */
if (!sfid || should_fill_key(sfid, ufid_flags))
@@ -900,7 +904,10 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
info->snd_portid, info->snd_seq, 0,
cmd, ufid_flags);
- BUG_ON(retval < 0);
+ if (WARN_ON_ONCE(retval < 0)) {
+ kfree_skb(skb);
+ skb = ERR_PTR(retval);
+ }
return skb;
}
@@ -1318,7 +1325,10 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
OVS_FLOW_CMD_DEL,
ufid_flags);
rcu_read_unlock();
- BUG_ON(err < 0);
+ if (WARN_ON_ONCE(err < 0)) {
+ kfree_skb(reply);
+ goto out_free;
+ }
ovs_notify(&dp_flow_genl_family, reply, info);
} else {
@@ -1326,6 +1336,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
}
}
+out_free:
ovs_flow_free(flow, true);
return 0;
unlock:
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 1d055c559eaf..03378e75a67c 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -55,7 +55,7 @@ struct ovs_tunnel_info {
FIELD_SIZEOF(struct sw_flow_key, recirc_id))
struct sw_flow_key {
- u8 tun_opts[255];
+ u8 tun_opts[IP_TUNNEL_OPTS_MAX];
u8 tun_opts_len;
struct ip_tunnel_key tun_key; /* Encapsulating tunnel key. */
struct {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 0dd9fc3f57e8..46bf6f84e0e6 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -587,7 +587,8 @@ static int prb_calc_retire_blk_tmo(struct packet_sock *po,
msec = 1;
div = speed / 1000;
}
- }
+ } else
+ return DEFAULT_PRB_RETIRE_TOV;
mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
@@ -1331,15 +1332,21 @@ static void packet_sock_destruct(struct sock *sk)
static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb)
{
- u32 rxhash;
+ u32 *history = po->rollover->history;
+ u32 victim, rxhash;
int i, count = 0;
rxhash = skb_get_hash(skb);
for (i = 0; i < ROLLOVER_HLEN; i++)
- if (po->rollover->history[i] == rxhash)
+ if (READ_ONCE(history[i]) == rxhash)
count++;
- po->rollover->history[prandom_u32() % ROLLOVER_HLEN] = rxhash;
+ victim = prandom_u32() % ROLLOVER_HLEN;
+
+ /* Avoid dirtying the cache line if possible */
+ if (READ_ONCE(history[victim]) != rxhash)
+ WRITE_ONCE(history[victim], rxhash);
+
return count > (ROLLOVER_HLEN >> 1);
}
@@ -1598,13 +1605,9 @@ static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
if (copy_from_user(&fd, data, len))
return -EFAULT;
- new = bpf_prog_get(fd);
+ new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
if (IS_ERR(new))
return PTR_ERR(new);
- if (new->type != BPF_PROG_TYPE_SOCKET_FILTER) {
- bpf_prog_put(new);
- return -EINVAL;
- }
__fanout_set_data_bpf(po->fanout, new);
return 0;
@@ -1702,6 +1705,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
match->prot_hook.dev = po->prot_hook.dev;
match->prot_hook.func = packet_rcv_fanout;
match->prot_hook.af_packet_priv = match;
+ match->prot_hook.af_packet_net = read_pnet(&match->net);
match->prot_hook.id_match = match_fanout_group;
list_add(&match->list, &fanout_list);
}
@@ -3160,6 +3164,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
po->prot_hook.func = packet_rcv_spkt;
po->prot_hook.af_packet_priv = sk;
+ po->prot_hook.af_packet_net = sock_net(sk);
if (proto) {
po->prot_hook.type = proto;
@@ -3308,20 +3313,29 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
sock_recv_ts_and_drops(msg, sk, skb);
if (msg->msg_name) {
+ int copy_len;
+
/* If the address length field is there to be filled
* in, we fill it in now.
*/
if (sock->type == SOCK_PACKET) {
__sockaddr_check_size(sizeof(struct sockaddr_pkt));
msg->msg_namelen = sizeof(struct sockaddr_pkt);
+ copy_len = msg->msg_namelen;
} else {
struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
msg->msg_namelen = sll->sll_halen +
offsetof(struct sockaddr_ll, sll_addr);
+ copy_len = msg->msg_namelen;
+ if (msg->msg_namelen < sizeof(struct sockaddr_ll)) {
+ memset(msg->msg_name +
+ offsetof(struct sockaddr_ll, sll_addr),
+ 0, sizeof(sll->sll_addr));
+ msg->msg_namelen = sizeof(struct sockaddr_ll);
+ }
}
- memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
- msg->msg_namelen);
+ memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
}
if (pkt_sk(sk)->auxdata) {
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index f6aa532bcbf6..a734d47c5eb1 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -878,6 +878,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
err = pep_accept_conn(newsk, skb);
if (err) {
+ __sock_put(sk);
sock_put(newsk);
newsk = NULL;
goto drop;
@@ -956,6 +957,8 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
ret = -EBUSY;
else if (sk->sk_state == TCP_ESTABLISHED)
ret = -EISCONN;
+ else if (!pn->pn_sk.sobject)
+ ret = -EADDRNOTAVAIL;
else
ret = pep_sock_enable(sk, NULL, 0);
release_sock(sk);
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index d575ef4e9aa6..ffd5f2297584 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -140,13 +140,15 @@ void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb)
rcu_read_unlock();
}
-void pn_sock_hash(struct sock *sk)
+int pn_sock_hash(struct sock *sk)
{
struct hlist_head *hlist = pn_hash_list(pn_sk(sk)->sobject);
mutex_lock(&pnsocks.lock);
sk_add_node_rcu(sk, hlist);
mutex_unlock(&pnsocks.lock);
+
+ return 0;
}
EXPORT_SYMBOL(pn_sock_hash);
@@ -200,7 +202,7 @@ static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len)
pn->resource = spn->spn_resource;
/* Enable RX on the socket */
- sk->sk_prot->hash(sk);
+ err = sk->sk_prot->hash(sk);
out_port:
mutex_unlock(&port_mutex);
out:
diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c
index d77e04473056..a88460058185 100644
--- a/net/rds/ib_stats.c
+++ b/net/rds/ib_stats.c
@@ -42,7 +42,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_ib_statistics, rds_ib_stats);
static const char *const rds_ib_stat_names[] = {
"ib_connect_raced",
"ib_listen_closed_stale",
- "s_ib_evt_handler_call",
+ "ib_evt_handler_call",
"ib_tasklet_call",
"ib_tx_cq_event",
"ib_tx_ring_full",
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 6275de19689c..1ff4bc3237f0 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -301,12 +301,13 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc,
int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr)
{
struct rds_notifier *notifier;
- struct rds_rdma_notify cmsg = { 0 }; /* fill holes with zero */
+ struct rds_rdma_notify cmsg;
unsigned int count = 0, max_messages = ~0U;
unsigned long flags;
LIST_HEAD(copy);
int err = 0;
+ memset(&cmsg, 0, sizeof(cmsg)); /* fill holes with zero */
/* put_cmsg copies to user space and thus may sleep. We can't do this
* with rs_lock held, so first grab as many notifications as we can stuff
@@ -481,7 +482,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
if (rds_cmsg_recv(inc, msg)) {
ret = -EFAULT;
- goto out;
+ break;
}
rds_stats_inc(s_recv_delivered);
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index c10622a9321c..465756fe7958 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -110,7 +110,7 @@ void rds_tcp_restore_callbacks(struct socket *sock,
/*
* This is the only path that sets tc->t_sock. Send and receive trust that
- * it is set. The RDS_CONN_CONNECTED bit protects those paths from being
+ * it is set. The RDS_CONN_UP bit protects those paths from being
* called while it isn't set.
*/
void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index e353e3255206..5213cd781c24 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -115,24 +115,32 @@ int rds_tcp_accept_one(struct socket *sock)
* rds_tcp_state_change() will do that cleanup
*/
rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data;
- if (rs_tcp->t_sock &&
- ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) {
- struct sock *nsk = new_sock->sk;
-
- nsk->sk_user_data = NULL;
- nsk->sk_prot->disconnect(nsk, 0);
- tcp_done(nsk);
- new_sock = NULL;
- ret = 0;
- goto out;
- } else if (rs_tcp->t_sock) {
- rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp);
- conn->c_outgoing = 0;
- }
-
rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
+ if (rs_tcp->t_sock) {
+ /* Need to resolve a duelling SYN between peers.
+ * We have an outstanding SYN to this peer, which may
+ * potentially have transitioned to the RDS_CONN_UP state,
+ * so we must quiesce any send threads before resetting
+ * c_transport_data.
+ */
+ wait_event(conn->c_waitq,
+ !test_bit(RDS_IN_XMIT, &conn->c_flags));
+ if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) {
+ struct sock *nsk = new_sock->sk;
+
+ nsk->sk_user_data = NULL;
+ nsk->sk_prot->disconnect(nsk, 0);
+ tcp_done(nsk);
+ new_sock = NULL;
+ ret = 0;
+ goto out;
+ } else if (rs_tcp->t_sock) {
+ rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp);
+ conn->c_outgoing = 0;
+ }
+ }
rds_tcp_set_callbacks(new_sock, conn);
- rds_connect_complete(conn);
+ rds_connect_complete(conn); /* marks RDS_CONN_UP */
new_sock = NULL;
ret = 0;
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 71e1f0def5a5..8662a976f666 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -936,10 +936,13 @@ static void rfkill_sync_work(struct work_struct *work)
int __must_check rfkill_register(struct rfkill *rfkill)
{
static unsigned long rfkill_no;
- struct device *dev = &rfkill->dev;
+ struct device *dev;
int error;
- BUG_ON(!rfkill);
+ if (!rfkill)
+ return -EINVAL;
+
+ dev = &rfkill->dev;
mutex_lock(&rfkill_global_mutex);
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 344456206b70..0f371e50d9c4 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -99,10 +99,19 @@ static void rose_loopback_timer(unsigned long param)
}
if (frametype == ROSE_CALL_REQUEST) {
- if ((dev = rose_dev_get(dest)) != NULL) {
- if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0)
- kfree_skb(skb);
- } else {
+ if (!rose_loopback_neigh->dev) {
+ kfree_skb(skb);
+ continue;
+ }
+
+ dev = rose_dev_get(dest);
+ if (!dev) {
+ kfree_skb(skb);
+ continue;
+ }
+
+ if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0) {
+ dev_put(dev);
kfree_skb(skb);
}
} else {
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 91d43ab3a961..f4ad63d6e540 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -897,7 +897,7 @@ int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen)
_enter("");
- if (optlen <= 0 || optlen > PAGE_SIZE - 1)
+ if (optlen <= 0 || optlen > PAGE_SIZE - 1 || rx->securities)
return -EINVAL;
description = kmalloc(optlen + 1, GFP_KERNEL);
@@ -1114,8 +1114,9 @@ static long rxrpc_read(const struct key *key,
break;
default: /* we have a ticket we can't encode */
- BUG();
- continue;
+ pr_err("Unsupported key token type (%u)\n",
+ token->security_index);
+ return -ENOPKG;
}
_debug("token[%u]: toksize=%u", ntoks, toksize);
@@ -1149,6 +1150,14 @@ static long rxrpc_read(const struct key *key,
goto fault; \
xdr += (_l + 3) >> 2; \
} while(0)
+#define ENCODE_BYTES(l, s) \
+ do { \
+ u32 _l = (l); \
+ memcpy(xdr, (s), _l); \
+ if (_l & 3) \
+ memcpy((u8 *)xdr + _l, &zero, 4 - (_l & 3)); \
+ xdr += (_l + 3) >> 2; \
+ } while(0)
#define ENCODE64(x) \
do { \
__be64 y = cpu_to_be64(x); \
@@ -1177,7 +1186,7 @@ static long rxrpc_read(const struct key *key,
case RXRPC_SECURITY_RXKAD:
ENCODE(token->kad->vice_id);
ENCODE(token->kad->kvno);
- ENCODE_DATA(8, token->kad->session_key);
+ ENCODE_BYTES(8, token->kad->session_key);
ENCODE(token->kad->start);
ENCODE(token->kad->expiry);
ENCODE(token->kad->primary_flag);
@@ -1227,8 +1236,9 @@ static long rxrpc_read(const struct key *key,
break;
default:
- BUG();
- break;
+ pr_err("Unsupported key token type (%u)\n",
+ token->security_index);
+ return -ENOPKG;
}
ASSERTCMP((unsigned long)xdr - (unsigned long)oldxdr, ==,
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index 14c4e12c47b0..a1997f9a447a 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -533,7 +533,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
/* this should be in poll */
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
- if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+ if (sk->sk_shutdown & SEND_SHUTDOWN)
return -EPIPE;
more = msg->msg_flags & MSG_MORE;
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index b92beded7459..cb9affdb1e87 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -78,7 +78,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
release_sock(&rx->sk);
if (continue_call)
rxrpc_put_call(continue_call);
- return -ENODATA;
+ return -EAGAIN;
}
}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index bd155e59be1c..143093b95bc3 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -51,9 +51,11 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
filter = rcu_dereference(prog->filter);
if (at_ingress) {
__skb_push(skb, skb->mac_len);
+ bpf_compute_data_end(skb);
filter_res = BPF_PROG_RUN(filter, skb);
__skb_pull(skb, skb->mac_len);
} else {
+ bpf_compute_data_end(skb);
filter_res = BPF_PROG_RUN(filter, skb);
}
rcu_read_unlock();
@@ -220,15 +222,10 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]);
- fp = bpf_prog_get(bpf_fd);
+ fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_ACT);
if (IS_ERR(fp))
return PTR_ERR(fp);
- if (fp->type != BPF_PROG_TYPE_SCHED_ACT) {
- bpf_prog_put(fp);
- return -EINVAL;
- }
-
if (tb[TCA_ACT_BPF_NAME]) {
name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]),
nla_len(tb[TCA_ACT_BPF_NAME]),
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 3eef0215e53f..f674d330ebd1 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -100,15 +100,18 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
if (at_ingress) {
/* It is safe to push/pull even if skb_shared() */
__skb_push(skb, skb->mac_len);
+ bpf_compute_data_end(skb);
filter_res = BPF_PROG_RUN(prog->filter, skb);
__skb_pull(skb, skb->mac_len);
} else {
+ bpf_compute_data_end(skb);
filter_res = BPF_PROG_RUN(prog->filter, skb);
}
if (prog->exts_integrated) {
- res->class = prog->res.class;
- res->classid = qdisc_skb_cb(skb)->tc_classid;
+ res->class = 0;
+ res->classid = TC_H_MAJ(prog->res.classid) |
+ qdisc_skb_cb(skb)->tc_classid;
ret = cls_bpf_exec_opcode(filter_res);
if (ret == TC_ACT_UNSPEC)
@@ -118,10 +121,12 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
if (filter_res == 0)
continue;
-
- *res = prog->res;
- if (filter_res != -1)
+ if (filter_res != -1) {
+ res->class = 0;
res->classid = filter_res;
+ } else {
+ *res = prog->res;
+ }
ret = tcf_exts_exec(skb, &prog->exts, res);
if (ret < 0)
@@ -267,15 +272,10 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
bpf_fd = nla_get_u32(tb[TCA_BPF_FD]);
- fp = bpf_prog_get(bpf_fd);
+ fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS);
if (IS_ERR(fp))
return PTR_ERR(fp);
- if (fp->type != BPF_PROG_TYPE_SCHED_CLS) {
- bpf_prog_put(fp);
- return -EINVAL;
- }
-
if (tb[TCA_BPF_NAME]) {
name = kmemdup(nla_data(tb[TCA_BPF_NAME]),
nla_len(tb[TCA_BPF_NAME]),
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index e5a58c82728a..a97096a7f801 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -127,6 +127,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct fl_flow_key skb_key;
struct fl_flow_key skb_mkey;
+ flow_dissector_init_keys(&skb_key.control, &skb_key.basic);
fl_clear_masked_range(&skb_key, &head->mask);
skb_key.indev_ifindex = skb->skb_iif;
/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
@@ -350,12 +351,10 @@ static int fl_init_hashtable(struct cls_fl_head *head,
#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
#define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
-#define FL_KEY_MEMBER_END_OFFSET(member) \
- (FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member))
-#define FL_KEY_IN_RANGE(mask, member) \
- (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end && \
- FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start)
+#define FL_KEY_IS_MASKED(mask, member) \
+ memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member), \
+ 0, FL_KEY_MEMBER_SIZE(member)) \
#define FL_KEY_SET(keys, cnt, id, member) \
do { \
@@ -364,9 +363,9 @@ static int fl_init_hashtable(struct cls_fl_head *head,
cnt++; \
} while(0);
-#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member) \
+#define FL_KEY_SET_IF_MASKED(mask, keys, cnt, id, member) \
do { \
- if (FL_KEY_IN_RANGE(mask, member)) \
+ if (FL_KEY_IS_MASKED(mask, member)) \
FL_KEY_SET(keys, cnt, id, member); \
} while(0);
@@ -378,14 +377,14 @@ static void fl_init_dissector(struct cls_fl_head *head,
FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
- FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
- FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
- FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
- FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
- FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
- FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
- FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
- FLOW_DISSECTOR_KEY_PORTS, tp);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_PORTS, tp);
skb_flow_dissector_init(&head->dissector, keys, cnt);
}
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 08a3b0a6f5ab..4f13c771f36d 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -540,8 +540,8 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
fp = &b->ht[h];
for (pfp = rtnl_dereference(*fp); pfp;
fp = &pfp->next, pfp = rtnl_dereference(*fp)) {
- if (pfp == f) {
- *fp = f->next;
+ if (pfp == fold) {
+ rcu_assign_pointer(*fp, fold->next);
break;
}
}
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 9992dfac6938..7317a64fdb79 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -455,10 +455,8 @@ static u32 gen_tunnel(struct rsvp_head *data)
static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
[TCA_RSVP_CLASSID] = { .type = NLA_U32 },
- [TCA_RSVP_DST] = { .type = NLA_BINARY,
- .len = RSVP_DST_LEN * sizeof(u32) },
- [TCA_RSVP_SRC] = { .type = NLA_BINARY,
- .len = RSVP_DST_LEN * sizeof(u32) },
+ [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) },
+ [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) },
[TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
};
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 040d853f48b9..3d891b11c077 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -267,6 +267,29 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
cp->fall_through = p->fall_through;
cp->tp = tp;
+ if (tb[TCA_TCINDEX_HASH])
+ cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
+
+ if (tb[TCA_TCINDEX_MASK])
+ cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
+
+ if (tb[TCA_TCINDEX_SHIFT]) {
+ cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
+ if (cp->shift > 16) {
+ err = -EINVAL;
+ goto errout;
+ }
+ }
+ if (!cp->hash) {
+ /* Hash not specified, use perfect hash if the upper limit
+ * of the hashing index is below the threshold.
+ */
+ if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
+ cp->hash = (cp->mask >> cp->shift) + 1;
+ else
+ cp->hash = DEFAULT_HASH_SIZE;
+ }
+
if (p->perfect) {
int i;
@@ -274,7 +297,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
sizeof(*r) * cp->hash, GFP_KERNEL);
if (!cp->perfect)
goto errout;
- for (i = 0; i < cp->hash; i++)
+ cp->alloc_hash = cp->hash;
+ for (i = 0; i < min(cp->hash, p->hash); i++)
tcf_exts_init(&cp->perfect[i].exts,
TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
balloc = 1;
@@ -286,15 +310,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
if (old_r)
cr.res = r->res;
- if (tb[TCA_TCINDEX_HASH])
- cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
-
- if (tb[TCA_TCINDEX_MASK])
- cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
-
- if (tb[TCA_TCINDEX_SHIFT])
- cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
-
err = -EBUSY;
/* Hash already allocated, make sure that we still meet the
@@ -312,16 +327,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
if (tb[TCA_TCINDEX_FALL_THROUGH])
cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
- if (!cp->hash) {
- /* Hash not specified, use perfect hash if the upper limit
- * of the hashing index is below the threshold.
- */
- if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
- cp->hash = (cp->mask >> cp->shift) + 1;
- else
- cp->hash = DEFAULT_HASH_SIZE;
- }
-
if (!cp->perfect && !cp->h)
cp->alloc_hash = cp->hash;
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index fbb7ebfc58c6..d4d6f9c91e8c 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -242,6 +242,9 @@ static int tcf_em_validate(struct tcf_proto *tp,
goto errout;
if (em->ops->change) {
+ err = -EINVAL;
+ if (em_hdr->flags & TCF_EM_SIMPLE)
+ goto errout;
err = em->ops->change(net, data, data_len, em);
if (err < 0)
goto errout;
@@ -267,12 +270,12 @@ static int tcf_em_validate(struct tcf_proto *tp,
}
em->data = (unsigned long) v;
}
+ em->datalen = data_len;
}
}
em->matchid = em_hdr->matchid;
em->flags = em_hdr->flags;
- em->datalen = data_len;
em->net = net;
err = 0;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index a4b492bb7fe5..4ee2e9a3d12e 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -391,7 +391,8 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *ta
{
struct qdisc_rate_table *rtab;
- if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
+ if (tab == NULL || r->rate == 0 ||
+ r->cell_log == 0 || r->cell_log >= 32 ||
nla_len(tab) != TC_RTAB_SIZE)
return NULL;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index e8dcf94a23c8..fee59e25929c 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -396,7 +396,8 @@ static void choke_reset(struct Qdisc *sch)
qdisc_drop(skb, sch);
}
- memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
+ if (q->tab)
+ memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
q->head = q->tail = 0;
red_restart(&q->vars);
}
@@ -422,6 +423,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
struct sk_buff **old = NULL;
unsigned int mask;
u32 max_P;
+ u8 *stab;
if (opt == NULL)
return -EINVAL;
@@ -437,8 +439,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
ctl = nla_data(tb[TCA_CHOKE_PARMS]);
-
- if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+ stab = nla_data(tb[TCA_CHOKE_STAB]);
+ if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab))
return -EINVAL;
if (ctl->limit > CHOKE_MAX_QUEUE)
@@ -491,7 +493,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
ctl->Plog, ctl->Scell_log,
- nla_data(tb[TCA_CHOKE_STAB]),
+ stab,
max_P);
red_set_vars(&q->vars);
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index d6e3ad43cecb..06e42727590a 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -375,6 +375,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
cl->deficit = cl->quantum;
}
+ qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
return err;
}
@@ -405,6 +406,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
bstats_update(&cl->bstats, skb);
qdisc_bstats_update(sch, skb);
+ qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
return skb;
}
@@ -426,6 +428,7 @@ static unsigned int drr_drop(struct Qdisc *sch)
if (cl->qdisc->ops->drop) {
len = cl->qdisc->ops->drop(cl->qdisc);
if (len > 0) {
+ sch->qstats.backlog -= len;
sch->q.qlen--;
if (cl->qdisc->q.qlen == 0)
list_del(&cl->alist);
@@ -461,6 +464,7 @@ static void drr_reset_qdisc(struct Qdisc *sch)
qdisc_reset(cl->qdisc);
}
}
+ sch->qstats.backlog = 0;
sch->q.qlen = 0;
}
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 5f8f6d94336c..f5d2c32dae24 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -404,7 +404,8 @@ static void dsmark_reset(struct Qdisc *sch)
struct dsmark_qdisc_data *p = qdisc_priv(sch);
pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
- qdisc_reset(p->q);
+ if (p->q)
+ qdisc_reset(p->q);
sch->qstats.backlog = 0;
sch->q.qlen = 0;
}
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 2e4bd2c0a50c..6c99b833f665 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -151,6 +151,9 @@ int fifo_set_limit(struct Qdisc *q, unsigned int limit)
if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
return 0;
+ if (!q->ops->change)
+ return 0;
+
nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
if (nla) {
nla->nla_type = RTM_NEWQDISC;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 117ed90c5f21..eb814ffc0902 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -668,6 +668,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
[TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 },
[TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 },
[TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 },
+ [TCA_FQ_ORPHAN_MASK] = { .type = NLA_U32 },
};
static int fq_change(struct Qdisc *sch, struct nlattr *opt)
@@ -706,7 +707,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_FQ_QUANTUM]) {
u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
- if (quantum > 0)
+ if (quantum > 0 && quantum <= (1 << 20))
q->quantum = quantum;
else
err = -EINVAL;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 1800f7977595..70e0dfd21f04 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -588,7 +588,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl,
qs.backlog = q->backlogs[idx];
qs.drops = flow->dropped;
}
- if (gnet_stats_copy_queue(d, NULL, &qs, 0) < 0)
+ if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
return -1;
if (idx < q->flows_cnt)
return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index eec6dc2d3152..357e52455be6 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -49,6 +49,7 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
{
q->gso_skb = skb;
q->qstats.requeues++;
+ qdisc_qstats_backlog_inc(q, skb);
q->q.qlen++; /* it's still part of the queue */
__netif_schedule(q);
@@ -92,6 +93,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
txq = skb_get_tx_queue(txq->dev, skb);
if (!netif_xmit_frozen_or_stopped(txq)) {
q->gso_skb = NULL;
+ qdisc_qstats_backlog_dec(q, skb);
q->q.qlen--;
} else
skb = NULL;
@@ -301,7 +303,12 @@ static void dev_watchdog(unsigned long arg)
}
}
- if (some_queue_timedout) {
+ /* The noise is pissing off our CI and upstream doesn't
+ * move on the bug report:
+ *
+ * https://bugzilla.kernel.org/show_bug.cgi?id=196399
+ */
+ if (some_queue_timedout && 0) {
WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
dev->name, netdev_drivername(dev), i);
dev->netdev_ops->ndo_tx_timeout(dev);
@@ -327,6 +334,7 @@ void __netdev_watchdog_up(struct net_device *dev)
dev_hold(dev);
}
}
+EXPORT_SYMBOL_GPL(__netdev_watchdog_up);
static void dev_watchdog_up(struct net_device *dev)
{
@@ -624,18 +632,19 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
struct Qdisc *sch;
if (!try_module_get(ops->owner))
- goto errout;
+ return NULL;
sch = qdisc_alloc(dev_queue, ops);
- if (IS_ERR(sch))
- goto errout;
+ if (IS_ERR(sch)) {
+ module_put(ops->owner);
+ return NULL;
+ }
sch->parent = parentid;
if (!ops->init || ops->init(sch, NULL) == 0)
return sch;
qdisc_destroy(sch);
-errout:
return NULL;
}
EXPORT_SYMBOL(qdisc_create_dflt);
@@ -962,6 +971,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
{
memset(r, 0, sizeof(*r));
r->overhead = conf->overhead;
+ r->mpu = conf->mpu;
r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
r->mult = 1;
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index a5745cb2d014..2f73232031c6 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -389,7 +389,7 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
struct gred_sched *table = qdisc_priv(sch);
struct gred_sched_data *q = table->tab[dp];
- if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+ if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab))
return -EINVAL;
if (!q) {
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index d783d7cc3348..1ac9f9f03fe3 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1529,6 +1529,7 @@ hfsc_reset_qdisc(struct Qdisc *sch)
q->eligible = RB_ROOT;
INIT_LIST_HEAD(&q->droplist);
qdisc_watchdog_cancel(&q->watchdog);
+ sch->qstats.backlog = 0;
sch->q.qlen = 0;
}
@@ -1559,14 +1560,6 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
struct hfsc_sched *q = qdisc_priv(sch);
unsigned char *b = skb_tail_pointer(skb);
struct tc_hfsc_qopt qopt;
- struct hfsc_class *cl;
- unsigned int i;
-
- sch->qstats.backlog = 0;
- for (i = 0; i < q->clhash.hashsize; i++) {
- hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode)
- sch->qstats.backlog += cl->qdisc->qstats.backlog;
- }
qopt.defcls = q->defcls;
if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
@@ -1604,6 +1597,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (cl->qdisc->q.qlen == 1)
set_active(cl, qdisc_pkt_len(skb));
+ qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
return NET_XMIT_SUCCESS;
@@ -1672,6 +1666,7 @@ hfsc_dequeue(struct Qdisc *sch)
qdisc_unthrottled(sch);
qdisc_bstats_update(sch, skb);
+ qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
return skb;
@@ -1695,6 +1690,7 @@ hfsc_drop(struct Qdisc *sch)
}
cl->qstats.drops++;
qdisc_qstats_drop(sch);
+ sch->qstats.backlog -= len;
sch->q.qlen--;
return len;
}
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index d9c84328e7eb..b34992808879 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -195,7 +195,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
sch = dev_queue->qdisc_sleeping;
- if (gnet_stats_copy_basic(d, NULL, &sch->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats) < 0 ||
gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0)
return -1;
return 0;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 66bccc5ff4ea..03c19adb2e61 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -355,7 +355,8 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
sch = dev_queue->qdisc_sleeping;
- if (gnet_stats_copy_basic(d, NULL, &sch->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, sch->cpu_bstats,
+ &sch->bstats) < 0 ||
gnet_stats_copy_queue(d, NULL,
&sch->qstats, sch->q.qlen) < 0)
return -1;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index cef36ad691dd..be3d78357fbb 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -351,7 +351,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct Qdisc *cl_q;
cl_q = q->queues[cl - 1];
- if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
return -1;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 0d4630b155fe..81ea3a87bc04 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -87,6 +87,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
ret = qdisc_enqueue(skb, qdisc);
if (ret == NET_XMIT_SUCCESS) {
+ qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
return NET_XMIT_SUCCESS;
}
@@ -125,6 +126,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc *sch)
struct sk_buff *skb = qdisc_dequeue_peeked(qdisc);
if (skb) {
qdisc_bstats_update(sch, skb);
+ qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
return skb;
}
@@ -143,6 +145,7 @@ static unsigned int prio_drop(struct Qdisc *sch)
for (prio = q->bands-1; prio >= 0; prio--) {
qdisc = q->queues[prio];
if (qdisc->ops->drop && (len = qdisc->ops->drop(qdisc)) != 0) {
+ sch->qstats.backlog -= len;
sch->q.qlen--;
return len;
}
@@ -159,6 +162,7 @@ prio_reset(struct Qdisc *sch)
for (prio = 0; prio < q->bands; prio++)
qdisc_reset(q->queues[prio]);
+ sch->qstats.backlog = 0;
sch->q.qlen = 0;
q->enable_flow = 1;
}
@@ -341,7 +345,7 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct Qdisc *cl_q;
cl_q = q->queues[cl - 1];
- if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
return -1;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 8d2d8d953432..d466fab84261 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -1150,6 +1150,7 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
if (!skb)
return NULL;
+ qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
qdisc_bstats_update(sch, skb);
@@ -1250,6 +1251,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
bstats_update(&cl->bstats, skb);
+ qdisc_qstats_backlog_inc(sch, skb);
++sch->q.qlen;
agg = cl->agg;
@@ -1477,10 +1479,8 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
if (err < 0)
return err;
- if (qdisc_dev(sch)->tx_queue_len + 1 > QFQ_MAX_AGG_CLASSES)
- max_classes = QFQ_MAX_AGG_CLASSES;
- else
- max_classes = qdisc_dev(sch)->tx_queue_len + 1;
+ max_classes = min_t(u64, (u64)qdisc_dev(sch)->tx_queue_len + 1,
+ QFQ_MAX_AGG_CLASSES);
/* max_cl_shift = floor(log_2(max_classes)) */
max_cl_shift = __fls(max_classes);
q->max_agg_classes = 1<<max_cl_shift;
@@ -1516,6 +1516,7 @@ static void qfq_reset_qdisc(struct Qdisc *sch)
qdisc_reset(cl->qdisc);
}
}
+ sch->qstats.backlog = 0;
sch->q.qlen = 0;
}
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 4bf2b599ef98..ac85792038c4 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -188,6 +188,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
struct Qdisc *child = NULL;
int err;
u32 max_P;
+ u8 *stab;
if (opt == NULL)
return -EINVAL;
@@ -203,7 +204,9 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
ctl = nla_data(tb[TCA_RED_PARMS]);
- if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+ stab = nla_data(tb[TCA_RED_STAB]);
+ if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
+ ctl->Scell_log, stab))
return -EINVAL;
if (ctl->limit > 0) {
@@ -225,7 +228,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
red_set_parms(&q->parms,
ctl->qth_min, ctl->qth_max, ctl->Wlog,
ctl->Plog, ctl->Scell_log,
- nla_data(tb[TCA_RED_STAB]),
+ stab,
max_P);
red_set_vars(&q->vars);
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 10c0b184cdbe..624b5e6fa52f 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -400,6 +400,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
enqueue:
ret = qdisc_enqueue(skb, child);
if (likely(ret == NET_XMIT_SUCCESS)) {
+ qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
increment_qlen(skb, q);
} else if (net_xmit_drop_count(ret)) {
@@ -428,6 +429,7 @@ static struct sk_buff *sfb_dequeue(struct Qdisc *sch)
if (skb) {
qdisc_bstats_update(sch, skb);
+ qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
decrement_qlen(skb, q);
}
@@ -450,6 +452,7 @@ static void sfb_reset(struct Qdisc *sch)
struct sfb_sched_data *q = qdisc_priv(sch);
qdisc_reset(q->qdisc);
+ sch->qstats.backlog = 0;
sch->q.qlen = 0;
q->slot = 0;
q->double_buffering = false;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index e2e4ebc0c4c3..0b27487fd07d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -635,8 +635,17 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
if (ctl->divisor &&
(!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
return -EINVAL;
+
+ /* slot->allot is a short, make sure quantum is not too big. */
+ if (ctl->quantum) {
+ unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum);
+
+ if (scaled <= 0 || scaled > SHRT_MAX)
+ return -EINVAL;
+ }
+
if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
- ctl_v1->Wlog))
+ ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
return -EINVAL;
if (ctl_v1 && ctl_v1->qth_min) {
p = kmalloc(sizeof(*p), GFP_KERNEL);
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 05c7a66f64da..87dee4deb66e 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -197,6 +197,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return ret;
}
+ qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
return NET_XMIT_SUCCESS;
}
@@ -207,6 +208,7 @@ static unsigned int tbf_drop(struct Qdisc *sch)
unsigned int len = 0;
if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
+ sch->qstats.backlog -= len;
sch->q.qlen--;
qdisc_qstats_drop(sch);
}
@@ -253,6 +255,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
q->t_c = now;
q->tokens = toks;
q->ptokens = ptoks;
+ qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
qdisc_unthrottled(sch);
qdisc_bstats_update(sch, skb);
@@ -284,6 +287,7 @@ static void tbf_reset(struct Qdisc *sch)
struct tbf_sched_data *q = qdisc_priv(sch);
qdisc_reset(q->qdisc);
+ sch->qstats.backlog = 0;
sch->q.qlen = 0;
q->t_c = ktime_get_ns();
q->tokens = q->buffer;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index e02687185a59..a7ecf626e998 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -138,6 +138,9 @@ teql_destroy(struct Qdisc *sch)
struct teql_sched_data *dat = qdisc_priv(sch);
struct teql_master *master = dat->m;
+ if (!master)
+ return;
+
prev = master->slaves;
if (prev) {
do {
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index f085b01b6603..aa38578fdfcd 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1290,7 +1290,7 @@ static struct sctp_transport *sctp_trans_elect_best(struct sctp_transport *curr,
if (score_curr > score_best)
return curr;
else if (score_curr == score_best)
- return sctp_trans_elect_tie(curr, best);
+ return sctp_trans_elect_tie(best, curr);
else
return best;
}
@@ -1575,12 +1575,15 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
sctp_scope_t scope, gfp_t gfp)
{
+ struct sock *sk = asoc->base.sk;
int flags;
/* Use scoping rules to determine the subset of addresses from
* the endpoint.
*/
- flags = (PF_INET6 == asoc->base.sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0;
+ flags = (PF_INET6 == sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0;
+ if (!inet_v6_ipv6only(sk))
+ flags |= SCTP_ADDR4_ALLOWED;
if (asoc->peer.ipv4_address)
flags |= SCTP_ADDR4_PEERSUPP;
if (asoc->peer.ipv6_address)
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 1543e39f47c3..04cd87d26ed1 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -496,6 +496,7 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
out_err:
/* Clean up any successful allocations */
sctp_auth_destroy_hmacs(ep->auth_hmacs);
+ ep->auth_hmacs = NULL;
return -ENOMEM;
}
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 871cdf9567e6..40fd399a1035 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -284,19 +284,15 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
rawaddr = (union sctp_addr_param *)raw_addr_list;
af = sctp_get_af_specific(param_type2af(param->type));
- if (unlikely(!af)) {
+ if (unlikely(!af) ||
+ !af->from_addr_param(&addr, rawaddr, htons(port), 0)) {
retval = -EINVAL;
- sctp_bind_addr_clean(bp);
- break;
+ goto out_err;
}
- af->from_addr_param(&addr, rawaddr, htons(port), 0);
retval = sctp_add_bind_addr(bp, &addr, SCTP_ADDR_SRC, gfp);
- if (retval) {
- /* Can't finish building the list, clean up. */
- sctp_bind_addr_clean(bp);
- break;
- }
+ if (retval)
+ goto out_err;
len = ntohs(param->length);
addrs_len -= len;
@@ -304,6 +300,12 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
}
return retval;
+
+out_err:
+ if (retval)
+ sctp_bind_addr_clean(bp);
+
+ return retval;
}
/********************************************************************
@@ -449,6 +451,7 @@ static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest,
* well as the remote peer.
*/
if ((((AF_INET == addr->sa.sa_family) &&
+ (flags & SCTP_ADDR4_ALLOWED) &&
(flags & SCTP_ADDR4_PEERSUPP))) ||
(((AF_INET6 == addr->sa.sa_family) &&
(flags & SCTP_ADDR6_ALLOWED) &&
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 71c2ef84c5b0..3f0b8aafc21a 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -448,7 +448,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
else {
if (!mod_timer(&t->proto_unreach_timer,
jiffies + (HZ/20)))
- sctp_association_hold(asoc);
+ sctp_transport_hold(t);
}
} else {
struct net *net = sock_net(sk);
@@ -457,7 +457,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
"encountered!\n", __func__);
if (del_timer(&t->proto_unreach_timer))
- sctp_association_put(asoc);
+ sctp_transport_put(t);
sctp_do_sm(net, SCTP_EVENT_T_OTHER,
SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
@@ -972,7 +972,8 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
if (!af)
continue;
- af->from_addr_param(paddr, params.addr, sh->source, 0);
+ if (!af->from_addr_param(paddr, params.addr, sh->source, 0))
+ continue;
asoc = __sctp_lookup_association(net, laddr, paddr, &transport);
if (asoc)
@@ -1008,6 +1009,9 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
union sctp_addr_param *param;
union sctp_addr paddr;
+ if (ntohs(ch->length) < sizeof(*asconf) + sizeof(struct sctp_paramhdr))
+ return NULL;
+
/* Skip over the ADDIP header and find the Address parameter */
param = (union sctp_addr_param *)(asconf + 1);
@@ -1015,7 +1019,8 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
if (unlikely(!af))
return NULL;
- af->from_addr_param(&paddr, param, peer_port, 0);
+ if (af->from_addr_param(&paddr, param, peer_port, 0))
+ return NULL;
return __sctp_lookup_association(net, laddr, &paddr, transportp);
}
@@ -1086,7 +1091,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
ch = (sctp_chunkhdr_t *) ch_end;
chunk_num++;
- } while (ch_end < skb_tail_pointer(skb));
+ } while (ch_end + sizeof(*ch) < skb_tail_pointer(skb));
return asoc;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index ae619cffc3a9..62c729402a04 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -234,7 +234,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
{
struct sctp_association *asoc = t->asoc;
struct dst_entry *dst = NULL;
- struct flowi6 *fl6 = &fl->u.ip6;
+ struct flowi _fl;
+ struct flowi6 *fl6 = &_fl.u.ip6;
struct sctp_bind_addr *bp;
struct ipv6_pinfo *np = inet6_sk(sk);
struct sctp_sockaddr_entry *laddr;
@@ -244,7 +245,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
__u8 matchlen = 0;
sctp_scope_t scope;
- memset(fl6, 0, sizeof(struct flowi6));
+ memset(&_fl, 0, sizeof(_fl));
fl6->daddr = daddr->v6.sin6_addr;
fl6->fl6_dport = daddr->v6.sin6_port;
fl6->flowi6_proto = IPPROTO_SCTP;
@@ -267,9 +268,12 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
rcu_read_unlock();
- dst = ip6_dst_lookup_flow(sk, fl6, final_p);
- if (!asoc || saddr)
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
+ if (!asoc || saddr) {
+ t->dst = dst;
+ memcpy(fl, &_fl, sizeof(_fl));
goto out;
+ }
bp = &asoc->base.bind_addr;
scope = sctp_scope(daddr);
@@ -292,6 +296,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
if ((laddr->a.sa.sa_family == AF_INET6) &&
(sctp_v6_cmp_addr(&dst_saddr, &laddr->a))) {
rcu_read_unlock();
+ t->dst = dst;
+ memcpy(fl, &_fl, sizeof(_fl));
goto out;
}
}
@@ -320,7 +326,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
fl6->saddr = laddr->a.v6.sin6_addr;
fl6->fl6_sport = laddr->a.v6.sin6_port;
final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
- bdst = ip6_dst_lookup_flow(sk, fl6, final_p);
+ bdst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
if (IS_ERR(bdst))
continue;
@@ -330,6 +336,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
if (!IS_ERR_OR_NULL(dst))
dst_release(dst);
dst = bdst;
+ t->dst = dst;
+ memcpy(fl, &_fl, sizeof(_fl));
break;
}
@@ -343,6 +351,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
dst_release(dst);
dst = bdst;
matchlen = bmatchlen;
+ t->dst = dst;
+ memcpy(fl, &_fl, sizeof(_fl));
}
rcu_read_unlock();
@@ -351,14 +361,12 @@ out:
struct rt6_info *rt;
rt = (struct rt6_info *)dst;
- t->dst = dst;
t->dst_cookie = rt6_get_cookie(rt);
pr_debug("rt6_dst:%pI6/%d rt6_src:%pI6\n",
&rt->rt6i_dst.addr, rt->rt6i_dst.plen,
- &fl6->saddr);
+ &fl->u.ip6.saddr);
} else {
t->dst = NULL;
-
pr_debug("no route\n");
}
}
@@ -480,15 +488,20 @@ static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
}
/* Initialize a sctp_addr from an address parameter. */
-static void sctp_v6_from_addr_param(union sctp_addr *addr,
+static bool sctp_v6_from_addr_param(union sctp_addr *addr,
union sctp_addr_param *param,
__be16 port, int iif)
{
+ if (ntohs(param->v6.param_hdr.length) < sizeof(struct sctp_ipv6addr_param))
+ return false;
+
addr->v6.sin6_family = AF_INET6;
addr->v6.sin6_port = port;
addr->v6.sin6_flowinfo = 0; /* BUG */
addr->v6.sin6_addr = param->v6.addr;
addr->v6.sin6_scope_id = iif;
+
+ return true;
}
/* Initialize an address parameter from a sctp_addr and return the length
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 8816e49fd88b..510b805aab2d 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -210,6 +210,7 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
* sock as well as the remote peer.
*/
if ((((AF_INET == addr->a.sa.sa_family) &&
+ (copy_flags & SCTP_ADDR4_ALLOWED) &&
(copy_flags & SCTP_ADDR4_PEERSUPP))) ||
(((AF_INET6 == addr->a.sa.sa_family) &&
(copy_flags & SCTP_ADDR6_ALLOWED) &&
@@ -255,6 +256,7 @@ static void sctp_v4_from_sk(union sctp_addr *addr, struct sock *sk)
addr->v4.sin_family = AF_INET;
addr->v4.sin_port = 0;
addr->v4.sin_addr.s_addr = inet_sk(sk)->inet_rcv_saddr;
+ memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero));
}
/* Initialize sk->sk_rcv_saddr from sctp_addr. */
@@ -270,13 +272,19 @@ static void sctp_v4_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
}
/* Initialize a sctp_addr from an address parameter. */
-static void sctp_v4_from_addr_param(union sctp_addr *addr,
+static bool sctp_v4_from_addr_param(union sctp_addr *addr,
union sctp_addr_param *param,
__be16 port, int iif)
{
+ if (ntohs(param->v4.param_hdr.length) < sizeof(struct sctp_ipv4addr_param))
+ return false;
+
addr->v4.sin_family = AF_INET;
addr->v4.sin_port = port;
addr->v4.sin_addr.s_addr = param->v4.addr.s_addr;
+ memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero));
+
+ return true;
}
/* Initialize an address parameter from a sctp_addr and return the length
@@ -301,6 +309,7 @@ static void sctp_v4_dst_saddr(union sctp_addr *saddr, struct flowi4 *fl4,
saddr->v4.sin_family = AF_INET;
saddr->v4.sin_port = port;
saddr->v4.sin_addr.s_addr = fl4->saddr;
+ memset(saddr->v4.sin_zero, 0, sizeof(saddr->v4.sin_zero));
}
/* Compare two addresses exactly. */
@@ -323,6 +332,7 @@ static void sctp_v4_inaddr_any(union sctp_addr *addr, __be16 port)
addr->v4.sin_family = AF_INET;
addr->v4.sin_addr.s_addr = htonl(INADDR_ANY);
addr->v4.sin_port = port;
+ memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero));
}
/* Is this a wildcard address? */
@@ -406,7 +416,8 @@ static sctp_scope_t sctp_v4_scope(union sctp_addr *addr)
retval = SCTP_SCOPE_LINK;
} else if (ipv4_is_private_10(addr->v4.sin_addr.s_addr) ||
ipv4_is_private_172(addr->v4.sin_addr.s_addr) ||
- ipv4_is_private_192(addr->v4.sin_addr.s_addr)) {
+ ipv4_is_private_192(addr->v4.sin_addr.s_addr) ||
+ ipv4_is_test_198(addr->v4.sin_addr.s_addr)) {
retval = SCTP_SCOPE_PRIVATE;
} else {
retval = SCTP_SCOPE_GLOBAL;
@@ -424,14 +435,15 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
{
struct sctp_association *asoc = t->asoc;
struct rtable *rt;
- struct flowi4 *fl4 = &fl->u.ip4;
+ struct flowi _fl;
+ struct flowi4 *fl4 = &_fl.u.ip4;
struct sctp_bind_addr *bp;
struct sctp_sockaddr_entry *laddr;
struct dst_entry *dst = NULL;
union sctp_addr *daddr = &t->ipaddr;
union sctp_addr dst_saddr;
- memset(fl4, 0x0, sizeof(struct flowi4));
+ memset(&_fl, 0x0, sizeof(_fl));
fl4->daddr = daddr->v4.sin_addr.s_addr;
fl4->fl4_dport = daddr->v4.sin_port;
fl4->flowi4_proto = IPPROTO_SCTP;
@@ -449,8 +461,11 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
&fl4->saddr);
rt = ip_route_output_key(sock_net(sk), fl4);
- if (!IS_ERR(rt))
+ if (!IS_ERR(rt)) {
dst = &rt->dst;
+ t->dst = dst;
+ memcpy(fl, &_fl, sizeof(_fl));
+ }
/* If there is no association or if a source address is passed, no
* more validation is required.
@@ -513,27 +528,33 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
odev = __ip_dev_find(sock_net(sk), laddr->a.v4.sin_addr.s_addr,
false);
if (!odev || odev->ifindex != fl4->flowi4_oif) {
- if (!dst)
+ if (!dst) {
dst = &rt->dst;
- else
+ t->dst = dst;
+ memcpy(fl, &_fl, sizeof(_fl));
+ } else {
dst_release(&rt->dst);
+ }
continue;
}
dst_release(dst);
dst = &rt->dst;
+ t->dst = dst;
+ memcpy(fl, &_fl, sizeof(_fl));
break;
}
out_unlock:
rcu_read_unlock();
out:
- t->dst = dst;
- if (dst)
+ if (dst) {
pr_debug("rt_dst:%pI4, rt_src:%pI4\n",
- &fl4->daddr, &fl4->saddr);
- else
+ &fl->u.ip4.daddr, &fl->u.ip4.saddr);
+ } else {
+ t->dst = NULL;
pr_debug("no route\n");
+ }
}
/* For v4, the source address is cached in the route entry(dst). So no need
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 509e9426a056..d31e0d6c641b 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -857,7 +857,11 @@ struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc,
sctp_shutdownhdr_t shut;
__u32 ctsn;
- ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
+ if (chunk && chunk->asoc)
+ ctsn = sctp_tsnmap_get_ctsn(&chunk->asoc->peer.tsn_map);
+ else
+ ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
+
shut.cum_tsn_ack = htonl(ctsn);
retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0,
@@ -2142,9 +2146,16 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
break;
case SCTP_PARAM_SET_PRIMARY:
- if (net->sctp.addip_enable)
- break;
- goto fallthrough;
+ if (!net->sctp.addip_enable)
+ goto fallthrough;
+
+ if (ntohs(param.p->length) < sizeof(struct sctp_addip_param) +
+ sizeof(struct sctp_paramhdr)) {
+ sctp_process_inv_paramlength(asoc, param.p,
+ chunk, err_chunk);
+ retval = SCTP_IERROR_ABORT;
+ }
+ break;
case SCTP_PARAM_HOST_NAME_ADDRESS:
/* Tell the peer, we won't support this param. */
@@ -2322,11 +2333,13 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
/* Process the initialization parameters. */
sctp_walk_params(param, peer_init, init_hdr.params) {
- if (!src_match && (param.p->type == SCTP_PARAM_IPV4_ADDRESS ||
- param.p->type == SCTP_PARAM_IPV6_ADDRESS)) {
+ if (!src_match &&
+ (param.p->type == SCTP_PARAM_IPV4_ADDRESS ||
+ param.p->type == SCTP_PARAM_IPV6_ADDRESS)) {
af = sctp_get_af_specific(param_type2af(param.p->type));
- af->from_addr_param(&addr, param.addr,
- chunk->sctp_hdr->source, 0);
+ if (!af->from_addr_param(&addr, param.addr,
+ chunk->sctp_hdr->source, 0))
+ continue;
if (sctp_cmp_addr_exact(sctp_source(chunk), &addr))
src_match = 1;
}
@@ -2520,7 +2533,8 @@ static int sctp_process_param(struct sctp_association *asoc,
break;
do_addr_param:
af = sctp_get_af_specific(param_type2af(param.p->type));
- af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0);
+ if (!af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0))
+ break;
scope = sctp_scope(peer_addr);
if (sctp_in_scope(net, &addr, scope))
if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_UNCONFIRMED))
@@ -2613,15 +2627,13 @@ do_addr_param:
addr_param = param.v + sizeof(sctp_addip_param_t);
af = sctp_get_af_specific(param_type2af(addr_param->p.type));
- if (af == NULL)
+ if (!af)
break;
- af->from_addr_param(&addr, addr_param,
- htons(asoc->peer.port), 0);
+ if (!af->from_addr_param(&addr, addr_param,
+ htons(asoc->peer.port), 0))
+ break;
- /* if the address is invalid, we can't process it.
- * XXX: see spec for what to do.
- */
if (!af->addr_valid(&addr, NULL, NULL))
break;
@@ -3031,7 +3043,8 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
if (unlikely(!af))
return SCTP_ERROR_DNS_FAILED;
- af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0);
+ if (!af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0))
+ return SCTP_ERROR_DNS_FAILED;
/* ADDIP 4.2.1 This parameter MUST NOT contain a broadcast
* or multicast address.
@@ -3115,7 +3128,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
* primary.
*/
if (af->is_any(&addr))
- memcpy(&addr.v4, sctp_source(asconf), sizeof(addr));
+ memcpy(&addr, sctp_source(asconf), sizeof(addr));
peer = sctp_assoc_lookup_paddr(asoc, &addr);
if (!peer)
@@ -3297,7 +3310,8 @@ static void sctp_asconf_param_success(struct sctp_association *asoc,
/* We have checked the packet before, so we do not check again. */
af = sctp_get_af_specific(param_type2af(addr_param->p.type));
- af->from_addr_param(&addr, addr_param, htons(bp->port), 0);
+ if (!af->from_addr_param(&addr, addr_param, htons(bp->port), 0))
+ return;
switch (asconf_param->param_hdr.type) {
case SCTP_PARAM_ADD_IP:
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 7c220e905168..fc3d8ca21f6e 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -416,7 +416,7 @@ void sctp_generate_proto_unreach_event(unsigned long data)
/* Try again later. */
if (!mod_timer(&transport->proto_unreach_timer,
jiffies + (HZ/20)))
- sctp_association_hold(asoc);
+ sctp_transport_hold(transport);
goto out_unlock;
}
@@ -432,7 +432,7 @@ void sctp_generate_proto_unreach_event(unsigned long data)
out_unlock:
bh_unlock_sock(sk);
- sctp_association_put(asoc);
+ sctp_transport_put(transport);
}
@@ -1333,8 +1333,10 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
/* Generate an INIT ACK chunk. */
new_obj = sctp_make_init_ack(asoc, chunk, GFP_ATOMIC,
0);
- if (!new_obj)
- goto nomem;
+ if (!new_obj) {
+ error = -ENOMEM;
+ break;
+ }
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
SCTP_CHUNK(new_obj));
@@ -1356,7 +1358,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
if (!new_obj) {
if (cmd->obj.chunk)
sctp_chunk_free(cmd->obj.chunk);
- goto nomem;
+ error = -ENOMEM;
+ break;
}
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
SCTP_CHUNK(new_obj));
@@ -1403,8 +1406,10 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
/* Generate a SHUTDOWN chunk. */
new_obj = sctp_make_shutdown(asoc, chunk);
- if (!new_obj)
- goto nomem;
+ if (!new_obj) {
+ error = -ENOMEM;
+ break;
+ }
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
SCTP_CHUNK(new_obj));
break;
@@ -1733,11 +1738,17 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
break;
}
- if (error)
+ if (error) {
+ cmd = sctp_next_cmd(commands);
+ while (cmd) {
+ if (cmd->verb == SCTP_CMD_REPLY)
+ sctp_chunk_free(cmd->obj.chunk);
+ cmd = sctp_next_cmd(commands);
+ }
break;
+ }
}
-out:
/* If this is in response to a received chunk, wait until
* we are done with the packet to open the queue so that we don't
* send multiple packets in response to a single request.
@@ -1748,8 +1759,5 @@ out:
} else if (local_cork)
error = sctp_outq_uncork(&asoc->outqueue);
return error;
-nomem:
- error = -ENOMEM;
- goto out;
}
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index df9ac3746c1b..53bb631ec490 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -177,6 +177,16 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk,
return 1;
}
+/* Check for format error in an ABORT chunk */
+static inline bool sctp_err_chunk_valid(struct sctp_chunk *chunk)
+{
+ struct sctp_errhdr *err;
+
+ sctp_walk_errors(err, chunk->chunk_hdr);
+
+ return (void *)err == (void *)chunk->chunk_end;
+}
+
/**********************************************************
* These are the state functions for handling chunk events.
**********************************************************/
@@ -1783,12 +1793,13 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net,
/* Update the content of current association. */
sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
- if (sctp_state(asoc, SHUTDOWN_PENDING) &&
+ if ((sctp_state(asoc, SHUTDOWN_PENDING) ||
+ sctp_state(asoc, SHUTDOWN_SENT)) &&
(sctp_sstate(asoc->base.sk, CLOSING) ||
sock_flag(asoc->base.sk, SOCK_DEAD))) {
- /* if were currently in SHUTDOWN_PENDING, but the socket
- * has been closed by user, don't transition to ESTABLISHED.
- * Instead trigger SHUTDOWN bundled with COOKIE_ACK.
+ /* If the socket has been closed by user, don't
+ * transition to ESTABLISHED. Instead trigger SHUTDOWN
+ * bundled with COOKIE_ACK.
*/
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
return sctp_sf_do_9_2_start_shutdown(net, ep, asoc,
@@ -1840,7 +1851,8 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(struct net *net,
sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_ESTABLISHED));
- SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
+ if (asoc->state < SCTP_STATE_ESTABLISHED)
+ SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
repl = sctp_make_cookie_ack(new_asoc, chunk);
@@ -2159,6 +2171,9 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ if (!sctp_err_chunk_valid(chunk))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
}
@@ -2201,6 +2216,9 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net,
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ if (!sctp_err_chunk_valid(chunk))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* Stop the T2-shutdown timer. */
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
@@ -2466,6 +2484,9 @@ sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ if (!sctp_err_chunk_valid(chunk))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
}
@@ -2482,15 +2503,9 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
/* See if we have an error cause code in the chunk. */
len = ntohs(chunk->chunk_hdr->length);
- if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) {
-
- sctp_errhdr_t *err;
- sctp_walk_errors(err, chunk->chunk_hdr);
- if ((void *)err != (void *)chunk->chunk_end)
- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr))
error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
- }
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET));
/* ASSOC_FAILED will DELETE_TCB. */
@@ -4322,6 +4337,9 @@ sctp_disposition_t sctp_sf_violation(struct net *net,
{
struct sctp_chunk *chunk = arg;
+ if (!sctp_vtag_verify(chunk, asoc))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* Make sure that the chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
@@ -6012,6 +6030,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
* yet.
*/
switch (chunk->chunk_hdr->type) {
+ case SCTP_CID_INIT:
case SCTP_CID_INIT_ACK:
{
sctp_initack_chunk_t *initack;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 2b6c88b9a038..4d60f1e42f42 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -185,13 +185,13 @@ static void sctp_for_each_tx_datachunk(struct sctp_association *asoc,
list_for_each_entry(chunk, &t->transmitted, transmitted_list)
cb(chunk);
- list_for_each_entry(chunk, &q->retransmit, list)
+ list_for_each_entry(chunk, &q->retransmit, transmitted_list)
cb(chunk);
- list_for_each_entry(chunk, &q->sacked, list)
+ list_for_each_entry(chunk, &q->sacked, transmitted_list)
cb(chunk);
- list_for_each_entry(chunk, &q->abandoned, list)
+ list_for_each_entry(chunk, &q->abandoned, transmitted_list)
cb(chunk);
list_for_each_entry(chunk, &q->out_chunk_list, list)
@@ -352,6 +352,18 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
return af;
}
+static void sctp_auto_asconf_init(struct sctp_sock *sp)
+{
+ struct net *net = sock_net(&sp->inet.sk);
+
+ if (net->sctp.default_auto_asconf) {
+ spin_lock(&net->sctp.addr_wq_lock);
+ list_add_tail(&sp->auto_asconf_list, &net->sctp.auto_asconf_splist);
+ spin_unlock(&net->sctp.addr_wq_lock);
+ sp->do_auto_asconf = 1;
+ }
+}
+
/* Bind a local address either to an endpoint or to an association. */
static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
{
@@ -414,8 +426,10 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
}
/* Refresh ephemeral port. */
- if (!bp->port)
+ if (!bp->port) {
bp->port = inet_sk(sk)->inet_num;
+ sctp_auto_asconf_init(sp);
+ }
/* Add the address to the bind address list.
* Use GFP_ATOMIC since BHs will be disabled.
@@ -4161,19 +4175,6 @@ static int sctp_init_sock(struct sock *sk)
sk_sockets_allocated_inc(sk);
sock_prot_inuse_add(net, sk->sk_prot, 1);
- /* Nothing can fail after this block, otherwise
- * sctp_destroy_sock() will be called without addr_wq_lock held
- */
- if (net->sctp.default_auto_asconf) {
- spin_lock(&sock_net(sk)->sctp.addr_wq_lock);
- list_add_tail(&sp->auto_asconf_list,
- &net->sctp.auto_asconf_splist);
- sp->do_auto_asconf = 1;
- spin_unlock(&sock_net(sk)->sctp.addr_wq_lock);
- } else {
- sp->do_auto_asconf = 0;
- }
-
local_bh_enable();
return 0;
@@ -6170,9 +6171,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
return retval;
}
-static void sctp_hash(struct sock *sk)
+static int sctp_hash(struct sock *sk)
{
/* STUB */
+ return 0;
}
static void sctp_unhash(struct sock *sk)
@@ -6206,8 +6208,6 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
pr_debug("%s: begins, snum:%d\n", __func__, snum);
- local_bh_disable();
-
if (snum == 0) {
/* Search for an available port. */
int low, high, remaining, index;
@@ -6226,20 +6226,21 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
continue;
index = sctp_phashfn(sock_net(sk), rover);
head = &sctp_port_hashtable[index];
- spin_lock(&head->lock);
+ spin_lock_bh(&head->lock);
sctp_for_each_hentry(pp, &head->chain)
if ((pp->port == rover) &&
net_eq(sock_net(sk), pp->net))
goto next;
break;
next:
- spin_unlock(&head->lock);
+ spin_unlock_bh(&head->lock);
+ cond_resched();
} while (--remaining > 0);
/* Exhausted local port range during search? */
ret = 1;
if (remaining <= 0)
- goto fail;
+ return ret;
/* OK, here is the one we will use. HEAD (the port
* hash table list entry) is non-NULL and we hold it's
@@ -6254,7 +6255,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
* port iterator, pp being NULL.
*/
head = &sctp_port_hashtable[sctp_phashfn(sock_net(sk), snum)];
- spin_lock(&head->lock);
+ spin_lock_bh(&head->lock);
sctp_for_each_hentry(pp, &head->chain) {
if ((pp->port == snum) && net_eq(pp->net, sock_net(sk)))
goto pp_found;
@@ -6338,10 +6339,7 @@ success:
ret = 0;
fail_unlock:
- spin_unlock(&head->lock);
-
-fail:
- local_bh_enable();
+ spin_unlock_bh(&head->lock);
return ret;
}
@@ -7341,6 +7339,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
sctp_bind_addr_dup(&newsp->ep->base.bind_addr,
&oldsp->ep->base.bind_addr, GFP_KERNEL);
+ sctp_auto_asconf_init(newsp);
+
/* Move any messages in the old socket's receive queue that are for the
* peeled off association to the new socket's receive queue.
*/
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index aab9e3f29755..f8041fb03290 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -72,7 +72,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
*/
peer->rto = msecs_to_jiffies(net->sctp.rto_initial);
- peer->last_time_heard = ktime_get();
+ peer->last_time_heard = ktime_set(0, 0);
peer->last_time_ecne_reduced = jiffies;
peer->param_flags = SPP_HB_DISABLE |
@@ -148,7 +148,7 @@ void sctp_transport_free(struct sctp_transport *transport)
/* Delete the ICMP proto unreachable timer if it's active. */
if (del_timer(&transport->proto_unreach_timer))
- sctp_association_put(transport->asoc);
+ sctp_transport_put(transport);
sctp_transport_put(transport);
}
diff --git a/net/socket.c b/net/socket.c
index 690b4de0377e..07b1143d03dd 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -469,7 +469,7 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
if (f.file) {
sock = sock_from_file(f.file, err);
if (likely(sock)) {
- *fput_needed = f.flags;
+ *fput_needed = f.flags & FDPUT_FPUT;
return sock;
}
fdput(f);
@@ -2597,15 +2597,6 @@ out_fs:
core_initcall(sock_init); /* early initcall */
-static int __init jit_init(void)
-{
-#ifdef CONFIG_BPF_JIT_ALWAYS_ON
- bpf_jit_enable = 1;
-#endif
- return 0;
-}
-pure_initcall(jit_init);
-
#ifdef CONFIG_PROC_FS
void socket_seq_show(struct seq_file *seq)
{
@@ -3216,6 +3207,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCSARP:
case SIOCGARP:
case SIOCDARP:
+ case SIOCOUTQNSD:
case SIOCATMARK:
return sock_do_ioctl(net, sock, cmd, arg);
}
@@ -3388,3 +3380,49 @@ int sockev_unregister_notify(struct notifier_block *nb)
return blocking_notifier_chain_unregister(&sockev_notifier_list, nb);
}
EXPORT_SYMBOL(sockev_unregister_notify);
+
+/* This routine returns the IP overhead imposed by a socket i.e.
+ * the length of the underlying IP header, depending on whether
+ * this is an IPv4 or IPv6 socket and the length from IP options turned
+ * on at the socket. Assumes that the caller has a lock on the socket.
+ */
+u32 kernel_sock_ip_overhead(struct sock *sk)
+{
+ struct inet_sock *inet;
+ struct ip_options_rcu *opt;
+ u32 overhead = 0;
+ bool owned_by_user;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct ipv6_pinfo *np;
+ struct ipv6_txoptions *optv6 = NULL;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+
+ if (!sk)
+ return overhead;
+
+ owned_by_user = sock_owned_by_user(sk);
+ switch (sk->sk_family) {
+ case AF_INET:
+ inet = inet_sk(sk);
+ overhead += sizeof(struct iphdr);
+ opt = rcu_dereference_protected(inet->inet_opt,
+ owned_by_user);
+ if (opt)
+ overhead += opt->opt.optlen;
+ return overhead;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ np = inet6_sk(sk);
+ overhead += sizeof(struct ipv6hdr);
+ if (np)
+ optv6 = rcu_dereference_protected(np->opt,
+ owned_by_user);
+ if (optv6)
+ overhead += (optv6->opt_flen + optv6->opt_nflen);
+ return overhead;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+ default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
+ return overhead;
+ }
+}
+EXPORT_SYMBOL(kernel_sock_ip_overhead);
diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index 2e0a6f92e563..7404f02702a1 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -81,11 +81,11 @@ static size_t rpc_ntop6(const struct sockaddr *sap,
rc = snprintf(scopebuf, sizeof(scopebuf), "%c%u",
IPV6_SCOPE_DELIMITER, sin6->sin6_scope_id);
- if (unlikely((size_t)rc > sizeof(scopebuf)))
+ if (unlikely((size_t)rc >= sizeof(scopebuf)))
return 0;
len += rc;
- if (unlikely(len > buflen))
+ if (unlikely(len >= buflen))
return 0;
strcat(buf, scopebuf);
@@ -184,7 +184,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf,
scope_id = dev->ifindex;
dev_put(dev);
} else {
- if (kstrtou32(p, 10, &scope_id) == 0) {
+ if (kstrtou32(p, 10, &scope_id) != 0) {
kfree(p);
return 0;
}
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 62fca77bf3c7..7bde2976307e 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -53,6 +53,7 @@
#include <asm/uaccess.h>
#include <linux/hashtable.h>
+#include "auth_gss_internal.h"
#include "../netns.h"
static const struct rpc_authops authgss_ops;
@@ -147,35 +148,6 @@ gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx)
clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags);
}
-static const void *
-simple_get_bytes(const void *p, const void *end, void *res, size_t len)
-{
- const void *q = (const void *)((const char *)p + len);
- if (unlikely(q > end || q < p))
- return ERR_PTR(-EFAULT);
- memcpy(res, p, len);
- return q;
-}
-
-static inline const void *
-simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
-{
- const void *q;
- unsigned int len;
-
- p = simple_get_bytes(p, end, &len, sizeof(len));
- if (IS_ERR(p))
- return p;
- q = (const void *)((const char *)p + len);
- if (unlikely(q > end || q < p))
- return ERR_PTR(-EFAULT);
- dest->data = kmemdup(p, len, GFP_NOFS);
- if (unlikely(dest->data == NULL))
- return ERR_PTR(-ENOMEM);
- dest->len = len;
- return q;
-}
-
static struct gss_cl_ctx *
gss_cred_get_ctx(struct rpc_cred *cred)
{
diff --git a/net/sunrpc/auth_gss/auth_gss_internal.h b/net/sunrpc/auth_gss/auth_gss_internal.h
new file mode 100644
index 000000000000..f6d9631bd9d0
--- /dev/null
+++ b/net/sunrpc/auth_gss/auth_gss_internal.h
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * linux/net/sunrpc/auth_gss/auth_gss_internal.h
+ *
+ * Internal definitions for RPCSEC_GSS client authentication
+ *
+ * Copyright (c) 2000 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ */
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/sunrpc/xdr.h>
+
+static inline const void *
+simple_get_bytes(const void *p, const void *end, void *res, size_t len)
+{
+ const void *q = (const void *)((const char *)p + len);
+ if (unlikely(q > end || q < p))
+ return ERR_PTR(-EFAULT);
+ memcpy(res, p, len);
+ return q;
+}
+
+static inline const void *
+simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
+{
+ const void *q;
+ unsigned int len;
+
+ p = simple_get_bytes(p, end, &len, sizeof(len));
+ if (IS_ERR(p))
+ return p;
+ q = (const void *)((const char *)p + len);
+ if (unlikely(q > end || q < p))
+ return ERR_PTR(-EFAULT);
+ if (len) {
+ dest->data = kmemdup(p, len, GFP_NOFS);
+ if (unlikely(dest->data == NULL))
+ return ERR_PTR(-ENOMEM);
+ } else
+ dest->data = NULL;
+ dest->len = len;
+ return q;
+}
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 28db442a0034..89e616da161f 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -45,6 +45,8 @@
#include <linux/crypto.h>
#include <linux/sunrpc/gss_krb5_enctypes.h>
+#include "auth_gss_internal.h"
+
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
@@ -186,35 +188,6 @@ get_gss_krb5_enctype(int etype)
return NULL;
}
-static const void *
-simple_get_bytes(const void *p, const void *end, void *res, int len)
-{
- const void *q = (const void *)((const char *)p + len);
- if (unlikely(q > end || q < p))
- return ERR_PTR(-EFAULT);
- memcpy(res, p, len);
- return q;
-}
-
-static const void *
-simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
-{
- const void *q;
- unsigned int len;
-
- p = simple_get_bytes(p, end, &len, sizeof(len));
- if (IS_ERR(p))
- return p;
- q = (const void *)((const char *)p + len);
- if (unlikely(q > end || q < p))
- return ERR_PTR(-EFAULT);
- res->data = kmemdup(p, len, GFP_NOFS);
- if (unlikely(res->data == NULL))
- return ERR_PTR(-ENOMEM);
- res->len = len;
- return q;
-}
-
static inline const void *
get_key(const void *p, const void *end,
struct krb5_ctx *ctx, struct crypto_blkcipher **res)
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 7063d856a598..e2fd931ddb22 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -61,6 +61,8 @@ gss_mech_free(struct gss_api_mech *gm)
for (i = 0; i < gm->gm_pf_num; i++) {
pf = &gm->gm_pfs[i];
+ if (pf->domain)
+ auth_domain_put(pf->domain);
kfree(pf->auth_domain_name);
pf->auth_domain_name = NULL;
}
@@ -83,6 +85,7 @@ make_auth_domain_name(char *name)
static int
gss_mech_svc_setup(struct gss_api_mech *gm)
{
+ struct auth_domain *dom;
struct pf_desc *pf;
int i, status;
@@ -92,10 +95,13 @@ gss_mech_svc_setup(struct gss_api_mech *gm)
status = -ENOMEM;
if (pf->auth_domain_name == NULL)
goto out;
- status = svcauth_gss_register_pseudoflavor(pf->pseudoflavor,
- pf->auth_domain_name);
- if (status)
+ dom = svcauth_gss_register_pseudoflavor(
+ pf->pseudoflavor, pf->auth_domain_name);
+ if (IS_ERR(dom)) {
+ status = PTR_ERR(dom);
goto out;
+ }
+ pf->domain = dom;
}
return 0;
out:
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index b5291ea54a3d..daf0c1ea3917 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -772,7 +772,7 @@ u32 svcauth_gss_flavor(struct auth_domain *dom)
EXPORT_SYMBOL_GPL(svcauth_gss_flavor);
-int
+struct auth_domain *
svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name)
{
struct gss_domain *new;
@@ -789,21 +789,23 @@ svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name)
new->h.flavour = &svcauthops_gss;
new->pseudoflavor = pseudoflavor;
- stat = 0;
test = auth_domain_lookup(name, &new->h);
- if (test != &new->h) { /* Duplicate registration */
+ if (test != &new->h) {
+ pr_warn("svc: duplicate registration of gss pseudo flavour %s.\n",
+ name);
+ stat = -EADDRINUSE;
auth_domain_put(test);
- kfree(new->h.name);
- goto out_free_dom;
+ goto out_free_name;
}
- return 0;
+ return test;
+out_free_name:
+ kfree(new->h.name);
out_free_dom:
kfree(new);
out:
- return stat;
+ return ERR_PTR(stat);
}
-
EXPORT_SYMBOL_GPL(svcauth_gss_register_pseudoflavor);
static inline int
@@ -1173,6 +1175,7 @@ static int gss_proxy_save_rsc(struct cache_detail *cd,
dprintk("RPC: No creds found!\n");
goto out;
} else {
+ struct timespec64 boot;
/* steal creds */
rsci.cred = ud->creds;
@@ -1193,6 +1196,9 @@ static int gss_proxy_save_rsc(struct cache_detail *cd,
&expiry, GFP_KERNEL);
if (status)
goto out;
+
+ getboottime64(&boot);
+ expiry -= boot.tv_sec;
}
rsci.h.expiry_time = expiry;
@@ -1691,11 +1697,14 @@ static int
svcauth_gss_release(struct svc_rqst *rqstp)
{
struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
- struct rpc_gss_wire_cred *gc = &gsd->clcred;
+ struct rpc_gss_wire_cred *gc;
struct xdr_buf *resbuf = &rqstp->rq_res;
int stat = -EINVAL;
struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
+ if (!gsd)
+ goto out;
+ gc = &gsd->clcred;
if (gc->gc_proc != RPC_GSS_PROC_DATA)
goto out;
/* Release can be called twice, but we only wrap once. */
@@ -1736,10 +1745,10 @@ out_err:
if (rqstp->rq_cred.cr_group_info)
put_group_info(rqstp->rq_cred.cr_group_info);
rqstp->rq_cred.cr_group_info = NULL;
- if (gsd->rsci)
+ if (gsd && gsd->rsci) {
cache_put(&gsd->rsci->h, sn->rsc_cache);
- gsd->rsci = NULL;
-
+ gsd->rsci = NULL;
+ }
return stat;
}
@@ -1836,7 +1845,7 @@ gss_svc_init_net(struct net *net)
goto out2;
return 0;
out2:
- destroy_use_gss_proxy_proc_entry(net);
+ rsi_cache_destroy_net(net);
out1:
rsc_cache_destroy_net(net);
return rv;
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index a8ab98b53a3a..570832949f91 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -54,9 +54,6 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail)
h->last_refresh = now;
}
-static inline int cache_is_valid(struct cache_head *h);
-static void cache_fresh_locked(struct cache_head *head, time_t expiry,
- struct cache_detail *detail);
static void cache_fresh_unlocked(struct cache_head *head,
struct cache_detail *detail);
@@ -101,9 +98,6 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
if (cache_is_expired(detail, tmp)) {
hlist_del_init(&tmp->cache_list);
detail->entries --;
- if (cache_is_valid(tmp) == -EAGAIN)
- set_bit(CACHE_NEGATIVE, &tmp->flags);
- cache_fresh_locked(tmp, 0, detail);
freeme = tmp;
break;
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index f28aeb2cfd32..cc308f7a2c02 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1826,6 +1826,14 @@ call_connect_status(struct rpc_task *task)
task->tk_status = 0;
switch (status) {
case -ECONNREFUSED:
+ /* A positive refusal suggests a rebind is needed. */
+ if (RPC_IS_SOFTCONN(task))
+ break;
+ if (clnt->cl_autobind) {
+ rpc_force_rebind(clnt);
+ task->tk_action = call_bind;
+ return;
+ }
case -ECONNRESET:
case -ECONNABORTED:
case -ENETUNREACH:
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 9103dd15511c..831a8cf231a1 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1347,6 +1347,7 @@ rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data)
q.len = strlen(gssd_dummy_clnt_dir[0].name);
clnt_dentry = d_hash_and_lookup(gssd_dentry, &q);
if (!clnt_dentry) {
+ __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1);
pipe_dentry = ERR_PTR(-ENOENT);
goto out;
}
@@ -1387,7 +1388,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
{
struct inode *inode;
struct dentry *root, *gssd_dentry;
- struct net *net = data;
+ struct net *net = get_net(sb->s_fs_info);
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
int err;
@@ -1420,7 +1421,6 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
sb);
if (err)
goto err_depopulate;
- sb->s_fs_info = get_net(net);
mutex_unlock(&sn->pipefs_sb_lock);
return 0;
@@ -1449,7 +1449,8 @@ static struct dentry *
rpc_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- return mount_ns(fs_type, flags, current->nsproxy->net_ns, rpc_fill_super);
+ struct net *net = current->nsproxy->net_ns;
+ return mount_ns(fs_type, flags, data, net, net->user_ns, rpc_fill_super);
}
static void rpc_kill_sb(struct super_block *sb)
@@ -1469,9 +1470,9 @@ static void rpc_kill_sb(struct super_block *sb)
RPC_PIPEFS_UMOUNT,
sb);
mutex_unlock(&sn->pipefs_sb_lock);
- put_net(net);
out:
kill_litter_super(sb);
+ put_net(net);
}
static struct file_system_type rpc_pipe_fs_type = {
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index c89626b2afff..696381a51634 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -977,8 +977,8 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, len);
if (unlikely(p == NULL))
goto out_fail;
- dprintk("RPC: %5u RPCB_%s reply: %s\n", req->rq_task->tk_pid,
- req->rq_task->tk_msg.rpc_proc->p_name, (char *)p);
+ dprintk("RPC: %5u RPCB_%s reply: %*pE\n", req->rq_task->tk_pid,
+ req->rq_task->tk_msg.rpc_proc->p_name, len, (char *)p);
if (rpc_uaddr2sockaddr(req->rq_xprt->xprt_net, (char *)p, len,
sap, sizeof(address)) == 0)
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 3eed71a2ff2b..737556204566 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -473,10 +473,20 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
struct rpc_task *task;
/*
+ * Service the privileged queue.
+ */
+ q = &queue->tasks[RPC_NR_PRIORITY - 1];
+ if (queue->maxpriority > RPC_PRIORITY_PRIVILEGED && !list_empty(q)) {
+ task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
+ goto out;
+ }
+
+ /*
* Service a batch of tasks from a single owner.
*/
q = &queue->tasks[queue->priority];
- if (!list_empty(q) && --queue->nr) {
+ if (!list_empty(q) && queue->nr) {
+ queue->nr--;
task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
goto out;
}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 41f6e964fe91..1a7d930a867c 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -50,7 +50,7 @@ EXPORT_SYMBOL_GPL(svc_pool_map);
static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
static int
-param_set_pool_mode(const char *val, struct kernel_param *kp)
+param_set_pool_mode(const char *val, const struct kernel_param *kp)
{
int *ip = (int *)kp->arg;
struct svc_pool_map *m = &svc_pool_map;
@@ -80,7 +80,7 @@ out:
}
static int
-param_get_pool_mode(char *buf, struct kernel_param *kp)
+param_get_pool_mode(char *buf, const struct kernel_param *kp)
{
int *ip = (int *)kp->arg;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 2b8e80c721db..7629982040c4 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -97,8 +97,17 @@ void svc_unreg_xprt_class(struct svc_xprt_class *xcl)
}
EXPORT_SYMBOL_GPL(svc_unreg_xprt_class);
-/*
- * Format the transport list for printing
+/**
+ * svc_print_xprts - Format the transport list for printing
+ * @buf: target buffer for formatted address
+ * @maxlen: length of target buffer
+ *
+ * Fills in @buf with a string containing a list of transport names, each name
+ * terminated with '\n'. If the buffer is too small, some entries may be
+ * missing, but it is guaranteed that all lines in the output buffer are
+ * complete.
+ *
+ * Returns positive length of the filled-in string.
*/
int svc_print_xprts(char *buf, int maxlen)
{
@@ -111,9 +120,9 @@ int svc_print_xprts(char *buf, int maxlen)
list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
int slen;
- sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload);
- slen = strlen(tmpstr);
- if (len + slen > maxlen)
+ slen = snprintf(tmpstr, sizeof(tmpstr), "%s %d\n",
+ xcl->xcl_name, xcl->xcl_max_payload);
+ if (slen >= sizeof(tmpstr) || len + slen >= maxlen)
break;
len += slen;
strcat(buf, tmpstr);
@@ -1002,7 +1011,7 @@ static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, st
struct svc_xprt *xprt;
int ret = 0;
- spin_lock(&serv->sv_lock);
+ spin_lock_bh(&serv->sv_lock);
list_for_each_entry(xprt, xprt_list, xpt_list) {
if (xprt->xpt_net != net)
continue;
@@ -1010,7 +1019,7 @@ static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, st
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
}
- spin_unlock(&serv->sv_lock);
+ spin_unlock_bh(&serv->sv_lock);
return ret;
}
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index ed9bbd383f7d..df7ecf9584f6 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1031,6 +1031,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
base = 0;
} else {
base -= buf->head[0].iov_len;
+ subbuf->head[0].iov_base = buf->head[0].iov_base;
subbuf->head[0].iov_len = 0;
}
@@ -1043,6 +1044,8 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
base = 0;
} else {
base -= buf->page_len;
+ subbuf->pages = buf->pages;
+ subbuf->page_base = 0;
subbuf->page_len = 0;
}
@@ -1054,6 +1057,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
base = 0;
} else {
base -= buf->tail[0].iov_len;
+ subbuf->tail[0].iov_base = buf->tail[0].iov_base;
subbuf->tail[0].iov_len = 0;
}
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 2dcb44f69e53..ddd70aec4d88 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -42,8 +42,8 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
size_t size;
req = rpcrdma_create_req(r_xprt);
- if (!req)
- return -ENOMEM;
+ if (IS_ERR(req))
+ return PTR_ERR(req);
req->rl_backchannel = true;
size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst);
@@ -84,25 +84,13 @@ out_fail:
static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
unsigned int count)
{
- struct rpcrdma_buffer *buffers = &r_xprt->rx_buf;
- struct rpcrdma_rep *rep;
- unsigned long flags;
int rc = 0;
while (count--) {
- rep = rpcrdma_create_rep(r_xprt);
- if (IS_ERR(rep)) {
- pr_err("RPC: %s: reply buffer alloc failed\n",
- __func__);
- rc = PTR_ERR(rep);
+ rc = rpcrdma_create_rep(r_xprt);
+ if (rc)
break;
- }
-
- spin_lock_irqsave(&buffers->rb_lock, flags);
- list_add(&rep->rr_list, &buffers->rb_recv_bufs);
- spin_unlock_irqrestore(&buffers->rb_lock, flags);
}
-
return rc;
}
@@ -341,6 +329,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
rqst->rq_reply_bytes_recvd = 0;
rqst->rq_bytes_sent = 0;
rqst->rq_xid = headerp->rm_xid;
+
+ rqst->rq_private_buf.len = size;
set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
buf = &rqst->rq_rcv_buf;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 8c545f7d7525..740bddcf3488 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -576,6 +576,9 @@ xprt_rdma_free(void *buffer)
rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]);
req = rb->rg_owner;
+ if (req->rl_backchannel)
+ return;
+
r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index eadd1655145a..b6879a1986a7 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -911,10 +911,17 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
return req;
}
-struct rpcrdma_rep *
-rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
+/**
+ * rpcrdma_create_rep - Allocate an rpcrdma_rep object
+ * @r_xprt: controlling transport
+ *
+ * Returns 0 on success or a negative errno on failure.
+ */
+int
+ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_rep *rep;
int rc;
@@ -934,12 +941,18 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
rep->rr_device = ia->ri_device;
rep->rr_rxprt = r_xprt;
INIT_WORK(&rep->rr_work, rpcrdma_receive_worker);
- return rep;
+
+ spin_lock(&buf->rb_lock);
+ list_add(&rep->rr_list, &buf->rb_recv_bufs);
+ spin_unlock(&buf->rb_lock);
+ return 0;
out_free:
kfree(rep);
out:
- return ERR_PTR(rc);
+ dprintk("RPC: %s: reply buffer %d alloc failed\n",
+ __func__, rc);
+ return rc;
}
int
@@ -975,17 +988,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
}
INIT_LIST_HEAD(&buf->rb_recv_bufs);
- for (i = 0; i < buf->rb_max_requests + 2; i++) {
- struct rpcrdma_rep *rep;
-
- rep = rpcrdma_create_rep(r_xprt);
- if (IS_ERR(rep)) {
- dprintk("RPC: %s: reply buffer %d alloc failed\n",
- __func__, i);
- rc = PTR_ERR(rep);
+ for (i = 0; i <= buf->rb_max_requests; i++) {
+ rc = rpcrdma_create_rep(r_xprt);
+ if (rc)
goto out;
- }
- list_add(&rep->rr_list, &buf->rb_recv_bufs);
}
return 0;
@@ -1337,15 +1343,14 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
struct rpcrdma_rep *rep;
- unsigned long flags;
int rc;
while (count--) {
- spin_lock_irqsave(&buffers->rb_lock, flags);
+ spin_lock(&buffers->rb_lock);
if (list_empty(&buffers->rb_recv_bufs))
goto out_reqbuf;
rep = rpcrdma_buffer_get_rep_locked(buffers);
- spin_unlock_irqrestore(&buffers->rb_lock, flags);
+ spin_unlock(&buffers->rb_lock);
rc = rpcrdma_ep_post_recv(ia, ep, rep);
if (rc)
@@ -1355,7 +1360,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
return 0;
out_reqbuf:
- spin_unlock_irqrestore(&buffers->rb_lock, flags);
+ spin_unlock(&buffers->rb_lock);
pr_warn("%s: no extra receive buffers\n", __func__);
return -ENOMEM;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index ac7f8d4f632a..36ec6a602665 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -431,8 +431,8 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
* Buffer calls - xprtrdma/verbs.c
*/
struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
-struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
void rpcrdma_destroy_req(struct rpcrdma_ia *, struct rpcrdma_req *);
+int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 35b376f58f21..758e59a20a6c 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -88,6 +88,11 @@ out_sk_rht:
static void __net_exit tipc_exit_net(struct net *net)
{
tipc_net_stop(net);
+
+ /* Make sure the tipc_net_finalize_work stopped
+ * before releasing the resources.
+ */
+ flush_scheduled_work();
tipc_bcast_stop(net);
tipc_nametbl_stop(net);
tipc_sk_rht_destroy(net);
@@ -117,14 +122,6 @@ static int __init tipc_init(void)
TIPC_CRITICAL_IMPORTANCE;
sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT;
- err = tipc_netlink_start();
- if (err)
- goto out_netlink;
-
- err = tipc_netlink_compat_start();
- if (err)
- goto out_netlink_compat;
-
err = tipc_register_sysctl();
if (err)
goto out_sysctl;
@@ -145,8 +142,21 @@ static int __init tipc_init(void)
if (err)
goto out_bearer;
+ err = tipc_netlink_start();
+ if (err)
+ goto out_netlink;
+
+ err = tipc_netlink_compat_start();
+ if (err)
+ goto out_netlink_compat;
+
pr_info("Started in single node mode\n");
return 0;
+
+out_netlink_compat:
+ tipc_netlink_stop();
+out_netlink:
+ tipc_bearer_cleanup();
out_bearer:
unregister_pernet_device(&tipc_topsrv_net_ops);
out_pernet_topsrv:
@@ -156,22 +166,18 @@ out_socket:
out_pernet:
tipc_unregister_sysctl();
out_sysctl:
- tipc_netlink_compat_stop();
-out_netlink_compat:
- tipc_netlink_stop();
-out_netlink:
pr_err("Unable to start in single node mode\n");
return err;
}
static void __exit tipc_exit(void)
{
+ tipc_netlink_compat_stop();
+ tipc_netlink_stop();
tipc_bearer_cleanup();
unregister_pernet_device(&tipc_topsrv_net_ops);
tipc_socket_stop();
unregister_pernet_device(&tipc_net_ops);
- tipc_netlink_stop();
- tipc_netlink_compat_stop();
tipc_unregister_sysctl();
pr_info("Deactivated\n");
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 736fffb28ab6..0080699b7cd1 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -883,7 +883,7 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
default:
pr_warn("Dropping received illegal msg type\n");
kfree_skb(skb);
- return false;
+ return true;
};
}
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 67bddcb2ff46..6bac0e6e4643 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -138,18 +138,14 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
if (fragid == FIRST_FRAGMENT) {
if (unlikely(head))
goto err;
- if (unlikely(skb_unclone(frag, GFP_ATOMIC)))
+ *buf = NULL;
+ if (skb_has_frag_list(frag) && __skb_linearize(frag))
+ goto err;
+ frag = skb_unshare(frag, GFP_ATOMIC);
+ if (unlikely(!frag))
goto err;
head = *headbuf = frag;
- *buf = NULL;
TIPC_SKB_CB(head)->tail = NULL;
- if (skb_is_nonlinear(head)) {
- skb_walk_frags(head, tail) {
- TIPC_SKB_CB(head)->tail = tail;
- }
- } else {
- skb_frag_list_init(head);
- }
return 0;
}
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index d2bf92e71150..fb1b5dcf0142 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -250,8 +250,9 @@ err_out:
static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
struct tipc_nl_compat_msg *msg)
{
- int err;
+ struct nlmsghdr *nlh;
struct sk_buff *arg;
+ int err;
if (msg->req_type && (!msg->req_size ||
!TLV_CHECK_TYPE(msg->req, msg->req_type)))
@@ -280,6 +281,15 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
return -ENOMEM;
}
+ nlh = nlmsg_put(arg, 0, 0, tipc_genl_family.id, 0, NLM_F_MULTI);
+ if (!nlh) {
+ kfree_skb(arg);
+ kfree_skb(msg->rep);
+ msg->rep = NULL;
+ return -EMSGSIZE;
+ }
+ nlmsg_end(arg, nlh);
+
err = __tipc_nl_compat_dumpit(cmd, msg, arg);
if (err) {
kfree_skb(msg->rep);
@@ -516,7 +526,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
if (len <= 0)
return -EINVAL;
- len = min_t(int, len, TIPC_MAX_BEARER_NAME);
+ len = min_t(int, len, TIPC_MAX_LINK_NAME);
if (!string_is_valid(name, len))
return -EINVAL;
@@ -622,7 +632,7 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg,
nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL);
- link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]);
+ link_info.dest = htonl(nla_get_flag(link[TIPC_NLA_LINK_DEST]));
link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP]));
nla_strlcpy(link_info.str, link[TIPC_NLA_LINK_NAME],
TIPC_MAX_LINK_NAME);
@@ -791,7 +801,7 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd,
if (len <= 0)
return -EINVAL;
- len = min_t(int, len, TIPC_MAX_BEARER_NAME);
+ len = min_t(int, len, TIPC_MAX_LINK_NAME);
if (!string_is_valid(name, len))
return -EINVAL;
@@ -926,6 +936,10 @@ static int tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, u32 sock)
hdr = genlmsg_put(args, 0, 0, &tipc_genl_family, NLM_F_MULTI,
TIPC_NL_PUBL_GET);
+ if (!hdr) {
+ kfree_skb(args);
+ return -EMSGSIZE;
+ }
nest = nla_nest_start(args, TIPC_NLA_SOCK);
if (!nest) {
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 65171f8e8c45..9d380d55ea1c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -763,6 +763,9 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
spin_lock_bh(&inputq->lock);
if (skb_peek(arrvq) == skb) {
skb_queue_splice_tail_init(&tmpq, inputq);
+ /* Decrease the skb's refcnt as increasing in the
+ * function tipc_skb_peek
+ */
kfree_skb(__skb_dequeue(arrvq));
}
spin_unlock_bh(&inputq->lock);
@@ -1754,7 +1757,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
u32 dport, struct sk_buff_head *xmitq)
{
- unsigned long time_limit = jiffies + 2;
+ unsigned long time_limit = jiffies + usecs_to_jiffies(20000);
struct sk_buff *skb;
unsigned int lim;
atomic_t *dcnt;
@@ -1984,7 +1987,7 @@ static int tipc_listen(struct socket *sock, int len)
static int tipc_wait_for_accept(struct socket *sock, long timeo)
{
struct sock *sk = sock->sk;
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
int err;
/* True wake-one mechanism for incoming connections: only
@@ -1993,12 +1996,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
* anymore, the common case will execute the loop only once.
*/
for (;;) {
- prepare_to_wait_exclusive(sk_sleep(sk), &wait,
- TASK_INTERRUPTIBLE);
if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
+ add_wait_queue(sk_sleep(sk), &wait);
release_sock(sk);
- timeo = schedule_timeout(timeo);
+ timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
lock_sock(sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
}
err = 0;
if (!skb_queue_empty(&sk->sk_receive_queue))
@@ -2013,7 +2016,6 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
if (signal_pending(current))
break;
}
- finish_wait(sk_sleep(sk), &wait);
return err;
}
diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c
index 1a779b1e8510..40f6d82083d7 100644
--- a/net/tipc/sysctl.c
+++ b/net/tipc/sysctl.c
@@ -37,6 +37,8 @@
#include <linux/sysctl.h>
+static int zero;
+static int one = 1;
static struct ctl_table_header *tipc_ctl_hdr;
static struct ctl_table tipc_table[] = {
@@ -45,14 +47,16 @@ static struct ctl_table tipc_table[] = {
.data = &sysctl_tipc_rmem,
.maxlen = sizeof(sysctl_tipc_rmem),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
},
{
.procname = "named_timeout",
.data = &sysctl_tipc_named_timeout,
.maxlen = sizeof(sysctl_tipc_named_timeout),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
},
{}
};
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 78d6b78de29d..90d54ad86e4e 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -200,14 +200,17 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
.saddr = src->ipv6,
.flowi6_proto = IPPROTO_UDP
};
- err = ipv6_stub->ipv6_dst_lookup(net, ub->ubsock->sk, &ndst,
- &fl6);
- if (err)
+ ndst = ipv6_stub->ipv6_dst_lookup_flow(net,
+ ub->ubsock->sk,
+ &fl6, NULL);
+ if (IS_ERR(ndst)) {
+ err = PTR_ERR(ndst);
goto tx_error;
+ }
ttl = ip6_dst_hoplimit(ndst);
err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb,
ndst->dev, &src->ipv6,
- &dst->ipv6, 0, ttl, src->udp_port,
+ &dst->ipv6, 0, ttl, 0, src->udp_port,
dst->udp_port, false);
#endif
}
@@ -405,10 +408,13 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
tuncfg.encap_destroy = NULL;
setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg);
- if (enable_mcast(ub, remote))
+ err = enable_mcast(ub, remote);
+ if (err)
goto err;
return 0;
err:
+ if (ub->ubsock)
+ udp_tunnel_sock_release(ub->ubsock);
kfree(ub);
return err;
}
diff --git a/net/unix/Kconfig b/net/unix/Kconfig
index 8b31ab85d050..3b9e450656a4 100644
--- a/net/unix/Kconfig
+++ b/net/unix/Kconfig
@@ -19,6 +19,11 @@ config UNIX
Say Y unless you know what you are doing.
+config UNIX_SCM
+ bool
+ depends on UNIX
+ default y
+
config UNIX_DIAG
tristate "UNIX: socket monitoring interface"
depends on UNIX
diff --git a/net/unix/Makefile b/net/unix/Makefile
index b663c607b1c6..dc686c6757fb 100644
--- a/net/unix/Makefile
+++ b/net/unix/Makefile
@@ -9,3 +9,5 @@ unix-$(CONFIG_SYSCTL) += sysctl_net_unix.o
obj-$(CONFIG_UNIX_DIAG) += unix_diag.o
unix_diag-y := diag.o
+
+obj-$(CONFIG_UNIX_SCM) += scm.o
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 4f547eebfd36..2c09eadfa90c 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -118,6 +118,8 @@
#include <linux/security.h>
#include <linux/freezer.h>
+#include "scm.h"
+
struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
EXPORT_SYMBOL_GPL(unix_socket_table);
DEFINE_SPINLOCK(unix_table_lock);
@@ -191,11 +193,17 @@ static inline int unix_may_send(struct sock *sk, struct sock *osk)
return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
}
-static inline int unix_recvq_full(struct sock const *sk)
+static inline int unix_recvq_full(const struct sock *sk)
{
return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
}
+static inline int unix_recvq_full_lockless(const struct sock *sk)
+{
+ return skb_queue_len_lockless(&sk->sk_receive_queue) >
+ READ_ONCE(sk->sk_max_ack_backlog);
+}
+
struct sock *unix_peer_get(struct sock *s)
{
struct sock *peer;
@@ -528,12 +536,14 @@ static void unix_release_sock(struct sock *sk, int embrion)
u->path.mnt = NULL;
state = sk->sk_state;
sk->sk_state = TCP_CLOSE;
+
+ skpair = unix_peer(sk);
+ unix_peer(sk) = NULL;
+
unix_state_unlock(sk);
wake_up_interruptible_all(&u->peer_wait);
- skpair = unix_peer(sk);
-
if (skpair != NULL) {
if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
unix_state_lock(skpair);
@@ -548,7 +558,6 @@ static void unix_release_sock(struct sock *sk, int embrion)
unix_dgram_peer_wake_disconnect(sk, skpair);
sock_put(skpair); /* It may now die */
- unix_peer(sk) = NULL;
}
/* Try to flush out this socket. Throw out buffers at least */
@@ -585,20 +594,42 @@ static void unix_release_sock(struct sock *sk, int embrion)
static void init_peercred(struct sock *sk)
{
- put_pid(sk->sk_peer_pid);
- if (sk->sk_peer_cred)
- put_cred(sk->sk_peer_cred);
+ const struct cred *old_cred;
+ struct pid *old_pid;
+
+ spin_lock(&sk->sk_peer_lock);
+ old_pid = sk->sk_peer_pid;
+ old_cred = sk->sk_peer_cred;
sk->sk_peer_pid = get_pid(task_tgid(current));
sk->sk_peer_cred = get_current_cred();
+ spin_unlock(&sk->sk_peer_lock);
+
+ put_pid(old_pid);
+ put_cred(old_cred);
}
static void copy_peercred(struct sock *sk, struct sock *peersk)
{
- put_pid(sk->sk_peer_pid);
- if (sk->sk_peer_cred)
- put_cred(sk->sk_peer_cred);
+ const struct cred *old_cred;
+ struct pid *old_pid;
+
+ if (sk < peersk) {
+ spin_lock(&sk->sk_peer_lock);
+ spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock(&peersk->sk_peer_lock);
+ spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+ }
+ old_pid = sk->sk_peer_pid;
+ old_cred = sk->sk_peer_cred;
sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
+
+ spin_unlock(&sk->sk_peer_lock);
+ spin_unlock(&peersk->sk_peer_lock);
+
+ put_pid(old_pid);
+ put_cred(old_cred);
}
static int unix_listen(struct socket *sock, int backlog)
@@ -1497,78 +1528,51 @@ out:
return err;
}
-static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
- int i;
-
- scm->fp = UNIXCB(skb).fp;
- UNIXCB(skb).fp = NULL;
-
- for (i = scm->fp->count-1; i >= 0; i--)
- unix_notinflight(scm->fp->user, scm->fp->fp[i]);
-}
-
-static void unix_destruct_scm(struct sk_buff *skb)
-{
- struct scm_cookie scm;
- memset(&scm, 0, sizeof(scm));
- scm.pid = UNIXCB(skb).pid;
- if (UNIXCB(skb).fp)
- unix_detach_fds(&scm, skb);
-
- /* Alas, it calls VFS */
- /* So fscking what? fput() had been SMP-safe since the last Summer */
- scm_destroy(&scm);
- sock_wfree(skb);
-}
-
-/*
- * The "user->unix_inflight" variable is protected by the garbage
- * collection lock, and we just read it locklessly here. If you go
- * over the limit, there might be a tiny race in actually noticing
- * it across threads. Tough.
- */
-static inline bool too_many_unix_fds(struct task_struct *p)
-{
- struct user_struct *user = current_user();
-
- if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
- return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
- return false;
-}
-
-#define MAX_RECURSION_LEVEL 4
-
-static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
-{
- int i;
- unsigned char max_level = 0;
-
- if (too_many_unix_fds(current))
- return -ETOOMANYREFS;
-
- for (i = scm->fp->count - 1; i >= 0; i--) {
- struct sock *sk = unix_get_socket(scm->fp->fp[i]);
-
- if (sk)
- max_level = max(max_level,
- unix_sk(sk)->recursion_level);
- }
- if (unlikely(max_level > MAX_RECURSION_LEVEL))
- return -ETOOMANYREFS;
+ scm->fp = scm_fp_dup(UNIXCB(skb).fp);
/*
- * Need to duplicate file references for the sake of garbage
- * collection. Otherwise a socket in the fps might become a
- * candidate for GC while the skb is not yet queued.
+ * Garbage collection of unix sockets starts by selecting a set of
+ * candidate sockets which have reference only from being in flight
+ * (total_refs == inflight_refs). This condition is checked once during
+ * the candidate collection phase, and candidates are marked as such, so
+ * that non-candidates can later be ignored. While inflight_refs is
+ * protected by unix_gc_lock, total_refs (file count) is not, hence this
+ * is an instantaneous decision.
+ *
+ * Once a candidate, however, the socket must not be reinstalled into a
+ * file descriptor while the garbage collection is in progress.
+ *
+ * If the above conditions are met, then the directed graph of
+ * candidates (*) does not change while unix_gc_lock is held.
+ *
+ * Any operations that changes the file count through file descriptors
+ * (dup, close, sendmsg) does not change the graph since candidates are
+ * not installed in fds.
+ *
+ * Dequeing a candidate via recvmsg would install it into an fd, but
+ * that takes unix_gc_lock to decrement the inflight count, so it's
+ * serialized with garbage collection.
+ *
+ * MSG_PEEK is special in that it does not change the inflight count,
+ * yet does install the socket into an fd. The following lock/unlock
+ * pair is to ensure serialization with garbage collection. It must be
+ * done between incrementing the file count and installing the file into
+ * an fd.
+ *
+ * If garbage collection starts after the barrier provided by the
+ * lock/unlock, then it will see the elevated refcount and not mark this
+ * as a candidate. If a garbage collection is already in progress
+ * before the file count was incremented, then the lock/unlock pair will
+ * ensure that garbage collection is finished before progressing to
+ * installing the fd.
+ *
+ * (*) A -> B where B is on the queue of A or B is on the queue of C
+ * which is on the queue of listening socket A.
*/
- UNIXCB(skb).fp = scm_fp_dup(scm->fp);
- if (!UNIXCB(skb).fp)
- return -ENOMEM;
-
- for (i = scm->fp->count - 1; i >= 0; i--)
- unix_inflight(scm->fp->user, scm->fp->fp[i]);
- return max_level;
+ spin_lock(&unix_gc_lock);
+ spin_unlock(&unix_gc_lock);
}
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@ -1792,7 +1796,8 @@ restart_locked:
* - unix_peer(sk) == sk by time of get but disconnected before lock
*/
if (other != sk &&
- unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
+ unlikely(unix_peer(other) != sk &&
+ unix_recvq_full_lockless(other))) {
if (timeo) {
timeo = unix_wait_for_peer(other, timeo);
@@ -2195,7 +2200,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
sk_peek_offset_fwd(sk, size);
if (UNIXCB(skb).fp)
- scm.fp = scm_fp_dup(UNIXCB(skb).fp);
+ unix_peek_fds(&scm, skb);
}
err = (flags & MSG_TRUNC) ? skb->len - skip : size;
@@ -2440,7 +2445,7 @@ unlock:
/* It is questionable, see note in unix_dgram_recvmsg.
*/
if (UNIXCB(skb).fp)
- scm.fp = scm_fp_dup(UNIXCB(skb).fp);
+ unix_peek_fds(&scm, skb);
sk_peek_offset_fwd(sk, chunk);
@@ -2717,7 +2722,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
other = unix_peer(sk);
if (other && unix_peer(other) != sk &&
- unix_recvq_full(other) &&
+ unix_recvq_full_lockless(other) &&
unix_dgram_peer_wake_me(sk, other))
writable = 0;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index c36757e72844..4d283e26d816 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -86,77 +86,13 @@
#include <net/scm.h>
#include <net/tcp_states.h>
+#include "scm.h"
+
/* Internal data structures and random procedures: */
-static LIST_HEAD(gc_inflight_list);
static LIST_HEAD(gc_candidates);
-static DEFINE_SPINLOCK(unix_gc_lock);
static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
-unsigned int unix_tot_inflight;
-
-struct sock *unix_get_socket(struct file *filp)
-{
- struct sock *u_sock = NULL;
- struct inode *inode = file_inode(filp);
-
- /* Socket ? */
- if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
- struct socket *sock = SOCKET_I(inode);
- struct sock *s = sock->sk;
-
- /* PF_UNIX ? */
- if (s && sock->ops && sock->ops->family == PF_UNIX)
- u_sock = s;
- }
- return u_sock;
-}
-
-/* Keep the number of times in flight count for the file
- * descriptor if it is for an AF_UNIX socket.
- */
-
-void unix_inflight(struct user_struct *user, struct file *fp)
-{
- struct sock *s = unix_get_socket(fp);
-
- spin_lock(&unix_gc_lock);
-
- if (s) {
- struct unix_sock *u = unix_sk(s);
-
- if (atomic_long_inc_return(&u->inflight) == 1) {
- BUG_ON(!list_empty(&u->link));
- list_add_tail(&u->link, &gc_inflight_list);
- } else {
- BUG_ON(list_empty(&u->link));
- }
- unix_tot_inflight++;
- }
- user->unix_inflight++;
- spin_unlock(&unix_gc_lock);
-}
-
-void unix_notinflight(struct user_struct *user, struct file *fp)
-{
- struct sock *s = unix_get_socket(fp);
-
- spin_lock(&unix_gc_lock);
-
- if (s) {
- struct unix_sock *u = unix_sk(s);
-
- BUG_ON(!atomic_long_read(&u->inflight));
- BUG_ON(list_empty(&u->link));
-
- if (atomic_long_dec_and_test(&u->inflight))
- list_del_init(&u->link);
- unix_tot_inflight--;
- }
- user->unix_inflight--;
- spin_unlock(&unix_gc_lock);
-}
-
static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
struct sk_buff_head *hitlist)
{
@@ -261,8 +197,11 @@ void wait_for_unix_gc(void)
{
/* If number of inflight sockets is insane,
* force a garbage collect right now.
+ * Paired with the WRITE_ONCE() in unix_inflight(),
+ * unix_notinflight() and gc_in_progress().
*/
- if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress)
+ if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
+ !READ_ONCE(gc_in_progress))
unix_gc();
wait_event(unix_gc_wait, gc_in_progress == false);
}
@@ -282,7 +221,9 @@ void unix_gc(void)
if (gc_in_progress)
goto out;
- gc_in_progress = true;
+ /* Paired with READ_ONCE() in wait_for_unix_gc(). */
+ WRITE_ONCE(gc_in_progress, true);
+
/* First, select candidates for garbage collection. Only
* in-flight sockets are considered, and from those only ones
* which don't have any external reference.
@@ -368,7 +309,10 @@ void unix_gc(void)
/* All candidates should have been detached by now. */
BUG_ON(!list_empty(&gc_candidates));
- gc_in_progress = false;
+
+ /* Paired with READ_ONCE() in wait_for_unix_gc(). */
+ WRITE_ONCE(gc_in_progress, false);
+
wake_up(&unix_gc_wait);
out:
diff --git a/net/unix/scm.c b/net/unix/scm.c
new file mode 100644
index 000000000000..bf1a8fa8c4f1
--- /dev/null
+++ b/net/unix/scm.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/fs.h>
+#include <net/af_unix.h>
+#include <net/scm.h>
+#include <linux/init.h>
+
+#include "scm.h"
+
+unsigned int unix_tot_inflight;
+EXPORT_SYMBOL(unix_tot_inflight);
+
+LIST_HEAD(gc_inflight_list);
+EXPORT_SYMBOL(gc_inflight_list);
+
+DEFINE_SPINLOCK(unix_gc_lock);
+EXPORT_SYMBOL(unix_gc_lock);
+
+struct sock *unix_get_socket(struct file *filp)
+{
+ struct sock *u_sock = NULL;
+ struct inode *inode = file_inode(filp);
+
+ /* Socket ? */
+ if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
+ struct socket *sock = SOCKET_I(inode);
+ struct sock *s = sock->sk;
+
+ /* PF_UNIX ? */
+ if (s && sock->ops && sock->ops->family == PF_UNIX)
+ u_sock = s;
+ }
+ return u_sock;
+}
+EXPORT_SYMBOL(unix_get_socket);
+
+/* Keep the number of times in flight count for the file
+ * descriptor if it is for an AF_UNIX socket.
+ */
+void unix_inflight(struct user_struct *user, struct file *fp)
+{
+ struct sock *s = unix_get_socket(fp);
+
+ spin_lock(&unix_gc_lock);
+
+ if (s) {
+ struct unix_sock *u = unix_sk(s);
+
+ if (atomic_long_inc_return(&u->inflight) == 1) {
+ BUG_ON(!list_empty(&u->link));
+ list_add_tail(&u->link, &gc_inflight_list);
+ } else {
+ BUG_ON(list_empty(&u->link));
+ }
+ /* Paired with READ_ONCE() in wait_for_unix_gc() */
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
+ }
+ user->unix_inflight++;
+ spin_unlock(&unix_gc_lock);
+}
+
+void unix_notinflight(struct user_struct *user, struct file *fp)
+{
+ struct sock *s = unix_get_socket(fp);
+
+ spin_lock(&unix_gc_lock);
+
+ if (s) {
+ struct unix_sock *u = unix_sk(s);
+
+ BUG_ON(!atomic_long_read(&u->inflight));
+ BUG_ON(list_empty(&u->link));
+
+ if (atomic_long_dec_and_test(&u->inflight))
+ list_del_init(&u->link);
+ /* Paired with READ_ONCE() in wait_for_unix_gc() */
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
+ }
+ user->unix_inflight--;
+ spin_unlock(&unix_gc_lock);
+}
+
+/*
+ * The "user->unix_inflight" variable is protected by the garbage
+ * collection lock, and we just read it locklessly here. If you go
+ * over the limit, there might be a tiny race in actually noticing
+ * it across threads. Tough.
+ */
+static inline bool too_many_unix_fds(struct task_struct *p)
+{
+ struct user_struct *user = current_user();
+
+ if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
+ return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+ return false;
+}
+
+#define MAX_RECURSION_LEVEL 4
+
+int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ int i;
+ unsigned char max_level = 0;
+
+ if (too_many_unix_fds(current))
+ return -ETOOMANYREFS;
+
+ for (i = scm->fp->count - 1; i >= 0; i--) {
+ struct sock *sk = unix_get_socket(scm->fp->fp[i]);
+
+ if (sk)
+ max_level = max(max_level,
+ unix_sk(sk)->recursion_level);
+ }
+ if (unlikely(max_level > MAX_RECURSION_LEVEL))
+ return -ETOOMANYREFS;
+
+ /*
+ * Need to duplicate file references for the sake of garbage
+ * collection. Otherwise a socket in the fps might become a
+ * candidate for GC while the skb is not yet queued.
+ */
+ UNIXCB(skb).fp = scm_fp_dup(scm->fp);
+ if (!UNIXCB(skb).fp)
+ return -ENOMEM;
+
+ for (i = scm->fp->count - 1; i >= 0; i--)
+ unix_inflight(scm->fp->user, scm->fp->fp[i]);
+ return max_level;
+}
+EXPORT_SYMBOL(unix_attach_fds);
+
+void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ int i;
+
+ scm->fp = UNIXCB(skb).fp;
+ UNIXCB(skb).fp = NULL;
+
+ for (i = scm->fp->count-1; i >= 0; i--)
+ unix_notinflight(scm->fp->user, scm->fp->fp[i]);
+}
+EXPORT_SYMBOL(unix_detach_fds);
+
+void unix_destruct_scm(struct sk_buff *skb)
+{
+ struct scm_cookie scm;
+
+ memset(&scm, 0, sizeof(scm));
+ scm.pid = UNIXCB(skb).pid;
+ if (UNIXCB(skb).fp)
+ unix_detach_fds(&scm, skb);
+
+ /* Alas, it calls VFS */
+ /* So fscking what? fput() had been SMP-safe since the last Summer */
+ scm_destroy(&scm);
+ sock_wfree(skb);
+}
+EXPORT_SYMBOL(unix_destruct_scm);
diff --git a/net/unix/scm.h b/net/unix/scm.h
new file mode 100644
index 000000000000..5a255a477f16
--- /dev/null
+++ b/net/unix/scm.h
@@ -0,0 +1,10 @@
+#ifndef NET_UNIX_SCM_H
+#define NET_UNIX_SCM_H
+
+extern struct list_head gc_inflight_list;
+extern spinlock_t unix_gc_lock;
+
+int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb);
+void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb);
+
+#endif
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 7566395e526d..baab5f65fbeb 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -97,6 +97,7 @@
#include <linux/mutex.h>
#include <linux/net.h>
#include <linux/poll.h>
+#include <linux/random.h>
#include <linux/skbuff.h>
#include <linux/smp.h>
#include <linux/socket.h>
@@ -501,9 +502,13 @@ out:
static int __vsock_bind_stream(struct vsock_sock *vsk,
struct sockaddr_vm *addr)
{
- static u32 port = LAST_RESERVED_PORT + 1;
+ static u32 port = 0;
struct sockaddr_vm new_addr;
+ if (!port)
+ port = LAST_RESERVED_PORT + 1 +
+ prandom_u32_max(U32_MAX - LAST_RESERVED_PORT);
+
vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port);
if (addr->svm_port == VMADDR_PORT_ANY) {
@@ -645,8 +650,9 @@ struct sock *__vsock_create(struct net *net,
vsk->trusted = psk->trusted;
vsk->owner = get_cred(psk->owner);
vsk->connect_timeout = psk->connect_timeout;
+ security_sk_clone(parent, sk);
} else {
- vsk->trusted = capable(CAP_NET_ADMIN);
+ vsk->trusted = ns_capable_noaudit(&init_user_ns, CAP_NET_ADMIN);
vsk->owner = get_current_cred();
vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT;
}
@@ -825,10 +831,12 @@ static int vsock_shutdown(struct socket *sock, int mode)
*/
sk = sock->sk;
+
+ lock_sock(sk);
if (sock->state == SS_UNCONNECTED) {
err = -ENOTCONN;
if (sk->sk_type == SOCK_STREAM)
- return err;
+ goto out;
} else {
sock->state = SS_DISCONNECTING;
err = 0;
@@ -837,10 +845,8 @@ static int vsock_shutdown(struct socket *sock, int mode)
/* Receive and send shutdowns are treated alike. */
mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN);
if (mode) {
- lock_sock(sk);
sk->sk_shutdown |= mode;
sk->sk_state_change(sk);
- release_sock(sk);
if (sk->sk_type == SOCK_STREAM) {
sock_reset_flag(sk, SOCK_DONE);
@@ -848,6 +854,8 @@ static int vsock_shutdown(struct socket *sock, int mode)
}
}
+out:
+ release_sock(sk);
return err;
}
@@ -1168,6 +1176,8 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
* non-blocking call.
*/
err = -EALREADY;
+ if (flags & O_NONBLOCK)
+ goto out;
break;
default:
if ((sk->sk_state == VSOCK_SS_LISTEN) ||
@@ -1291,7 +1301,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
/* Wait for children sockets to appear; these are the new sockets
* created upon connection establishment.
*/
- timeout = sock_sndtimeo(listener, flags & O_NONBLOCK);
+ timeout = sock_rcvtimeo(listener, flags & O_NONBLOCK);
prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
while ((connected = vsock_dequeue_accept(listener)) == NULL &&
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 9c07c76c504d..ddcae46ae408 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -25,6 +25,10 @@
/* How long to wait for graceful shutdown of a connection */
#define VSOCK_CLOSE_TIMEOUT (8 * HZ)
+uint virtio_transport_max_vsock_pkt_buf_size = 64 * 1024;
+module_param(virtio_transport_max_vsock_pkt_buf_size, uint, 0444);
+EXPORT_SYMBOL_GPL(virtio_transport_max_vsock_pkt_buf_size);
+
static const struct virtio_transport *virtio_transport_get_ops(void)
{
const struct vsock_transport *t = vsock_core_get_transport();
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 102bf9194662..c09efcdf72d2 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -593,8 +593,7 @@ vmci_transport_queue_pair_alloc(struct vmci_qp **qpair,
peer, flags, VMCI_NO_PRIVILEGE_FLAGS);
out:
if (err < 0) {
- pr_err("Could not attach to queue pair with %d\n",
- err);
+ pr_err_once("Could not attach to queue pair with %d\n", err);
err = vmci_transport_error_to_vsock_error(err);
}
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 6b5467ea99db..1efc3f14224c 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -877,7 +877,7 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy,
if (chan == other_chan)
return true;
- if (chan->band != IEEE80211_BAND_5GHZ)
+ if (chan->band != NL80211_BAND_5GHZ)
continue;
r1 = cfg80211_get_unii(chan->center_freq);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index eafc34b27013..32870deb8464 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -559,7 +559,7 @@ int wiphy_register(struct wiphy *wiphy)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
int res;
- enum ieee80211_band band;
+ enum nl80211_band band;
struct ieee80211_supported_band *sband;
bool have_band = false;
int i;
@@ -649,7 +649,7 @@ int wiphy_register(struct wiphy *wiphy)
return res;
/* sanity check supported bands/channels */
- for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
sband = wiphy->bands[band];
if (!sband)
continue;
@@ -661,7 +661,7 @@ int wiphy_register(struct wiphy *wiphy)
* on 60GHz band, there are no legacy rates, so
* n_bitrates is 0
*/
- if (WARN_ON(band != IEEE80211_BAND_60GHZ &&
+ if (WARN_ON(band != NL80211_BAND_60GHZ &&
!sband->n_bitrates))
return -EINVAL;
@@ -671,7 +671,7 @@ int wiphy_register(struct wiphy *wiphy)
* global structure for that.
*/
if (cfg80211_disable_40mhz_24ghz &&
- band == IEEE80211_BAND_2GHZ &&
+ band == NL80211_BAND_2GHZ &&
sband->ht_cap.ht_supported) {
sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SGI_40;
diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c
index 454157717efa..5d453916a417 100644
--- a/net/wireless/debugfs.c
+++ b/net/wireless/debugfs.c
@@ -69,7 +69,7 @@ static ssize_t ht40allow_map_read(struct file *file,
struct wiphy *wiphy = file->private_data;
char *buf;
unsigned int offset = 0, buf_size = PAGE_SIZE, i, r;
- enum ieee80211_band band;
+ enum nl80211_band band;
struct ieee80211_supported_band *sband;
buf = kzalloc(buf_size, GFP_KERNEL);
@@ -78,7 +78,7 @@ static ssize_t ht40allow_map_read(struct file *file,
rtnl_lock();
- for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
sband = wiphy->bands[band];
if (!sband)
continue;
diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c
index e9e91298c70d..3cedf2c2b60b 100644
--- a/net/wireless/ethtool.c
+++ b/net/wireless/ethtool.c
@@ -6,9 +6,13 @@
void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct device *pdev = wiphy_dev(wdev->wiphy);
- strlcpy(info->driver, wiphy_dev(wdev->wiphy)->driver->name,
- sizeof(info->driver));
+ if (pdev->driver)
+ strlcpy(info->driver, pdev->driver->name,
+ sizeof(info->driver));
+ else
+ strlcpy(info->driver, "N/A", sizeof(info->driver));
strlcpy(info->version, init_utsname()->release, sizeof(info->version));
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 7da0cd9c5e73..398fa066d249 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -104,7 +104,7 @@ static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
struct ieee80211_supported_band *sband =
rdev->wiphy.bands[params->chandef.chan->band];
int j;
- u32 flag = params->chandef.chan->band == IEEE80211_BAND_5GHZ ?
+ u32 flag = params->chandef.chan->band == NL80211_BAND_5GHZ ?
IEEE80211_RATE_MANDATORY_A :
IEEE80211_RATE_MANDATORY_B;
@@ -114,6 +114,9 @@ static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
}
}
+ if (WARN_ON(connkeys && connkeys->def < 0))
+ return -EINVAL;
+
if (WARN_ON(wdev->connect_keys))
kzfree(wdev->connect_keys);
wdev->connect_keys = connkeys;
@@ -237,7 +240,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev)
{
struct cfg80211_cached_keys *ck = NULL;
- enum ieee80211_band band;
+ enum nl80211_band band;
int i, err;
ASSERT_WDEV_LOCK(wdev);
@@ -249,7 +252,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
if (!wdev->wext.ibss.chandef.chan) {
struct ieee80211_channel *new_chan = NULL;
- for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
struct ieee80211_supported_band *sband;
struct ieee80211_channel *chan;
@@ -292,7 +295,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
wdev->wext.ibss.privacy = wdev->wext.default_key != -1;
- if (wdev->wext.keys) {
+ if (wdev->wext.keys && wdev->wext.keys->def != -1) {
ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL);
if (!ck)
return -ENOMEM;
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index dda90a39be40..60501476fcd0 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -128,9 +128,9 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
if (!setup->chandef.chan) {
/* if we don't have that either, use the first usable channel */
- enum ieee80211_band band;
+ enum nl80211_band band;
- for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
struct ieee80211_supported_band *sband;
struct ieee80211_channel *chan;
int i;
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 22536248bf67..7dd4f49b1cc0 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -774,7 +774,7 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work)
wiphy = &rdev->wiphy;
rtnl_lock();
- for (bandid = 0; bandid < IEEE80211_NUM_BANDS; bandid++) {
+ for (bandid = 0; bandid < NUM_NL80211_BANDS; bandid++) {
sband = wiphy->bands[bandid];
if (!sband)
continue;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index faf2a41282f9..4b61c19a7eb0 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -330,6 +330,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_CONTROL_PORT_ETHERTYPE] = { .type = NLA_U16 },
[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG },
[NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG },
+ [NL80211_ATTR_STATUS_CODE] = { .type = NLA_U16 },
[NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 },
[NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
[NL80211_ATTR_PID] = { .type = NLA_U32 },
@@ -403,6 +404,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_MDID] = { .type = NLA_U16 },
[NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY,
.len = IEEE80211_MAX_DATA_LEN },
+ [NL80211_ATTR_CRIT_PROT_ID] = { .type = NLA_U16 },
+ [NL80211_ATTR_MAX_CRIT_PROT_DURATION] = { .type = NLA_U16 },
[NL80211_ATTR_PEER_AID] = { .type = NLA_U16 },
[NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 },
[NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG },
@@ -428,6 +431,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_USER_PRIO] = { .type = NLA_U8 },
[NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 },
[NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 },
+ [NL80211_ATTR_OPER_CLASS] = { .type = NLA_U8 },
[NL80211_ATTR_MAC_MASK] = { .len = ETH_ALEN },
[NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG },
[NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 },
@@ -885,6 +889,15 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev,
struct nlattr *key;
struct cfg80211_cached_keys *result;
int rem, err, def = 0;
+ bool have_key = false;
+
+ nla_for_each_nested(key, keys, rem) {
+ have_key = true;
+ break;
+ }
+
+ if (!have_key)
+ return NULL;
result = kzalloc(sizeof(*result), GFP_KERNEL);
if (!result)
@@ -930,6 +943,11 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev,
}
}
+ if (result->def < 0) {
+ err = -EINVAL;
+ goto error;
+ }
+
return result;
error:
kfree(result);
@@ -1364,7 +1382,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
struct nlattr *nl_bands, *nl_band;
struct nlattr *nl_freqs, *nl_freq;
struct nlattr *nl_cmds;
- enum ieee80211_band band;
+ enum nl80211_band band;
struct ieee80211_channel *chan;
int i;
const struct ieee80211_txrx_stypes *mgmt_stypes =
@@ -1497,7 +1515,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
goto nla_put_failure;
for (band = state->band_start;
- band < IEEE80211_NUM_BANDS; band++) {
+ band < NUM_NL80211_BANDS; band++) {
struct ieee80211_supported_band *sband;
sband = rdev->wiphy.bands[band];
@@ -1559,7 +1577,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
}
nla_nest_end(msg, nl_bands);
- if (band < IEEE80211_NUM_BANDS)
+ if (band < NUM_NL80211_BANDS)
state->band_start = band + 1;
else
state->band_start = 0;
@@ -1745,7 +1763,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
* case we'll continue with more data in the next round,
* but break unconditionally so unsplit data stops here.
*/
- state->split_start++;
+ if (state->split)
+ state->split_start++;
+ else
+ state->split_start = 0;
break;
case 9:
if (rdev->wiphy.extended_capabilities &&
@@ -3241,6 +3262,9 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
+ if (key.idx < 0)
+ return -EINVAL;
+
if (info->attrs[NL80211_ATTR_MAC])
mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
@@ -3509,7 +3533,7 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
memset(mask, 0, sizeof(*mask));
/* Default to all rates enabled */
- for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
+ for (i = 0; i < NUM_NL80211_BANDS; i++) {
sband = rdev->wiphy.bands[i];
if (!sband)
@@ -3536,10 +3560,10 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
*/
BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8);
nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) {
- enum ieee80211_band band = nla_type(tx_rates);
+ enum nl80211_band band = nla_type(tx_rates);
int err;
- if (band < 0 || band >= IEEE80211_NUM_BANDS)
+ if (band < 0 || band >= NUM_NL80211_BANDS)
return -EINVAL;
sband = rdev->wiphy.bands[band];
if (sband == NULL)
@@ -3924,10 +3948,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
- err = validate_beacon_tx_rate(
- rdev,
- (enum nl80211_band)(params.chandef.chan->band),
- &params.beacon_rate);
+ err = validate_beacon_tx_rate(rdev, params.chandef.chan->band,
+ &params.beacon_rate);
if (err)
return err;
}
@@ -3956,7 +3978,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
}
params.pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]);
- if (params.pbss && !rdev->wiphy.bands[IEEE80211_BAND_60GHZ])
+ if (params.pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ])
return -EOPNOTSUPP;
if (info->attrs[NL80211_ATTR_ACL_POLICY]) {
@@ -5864,7 +5886,7 @@ static int nl80211_update_mesh_config(struct sk_buff *skb,
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct mesh_config cfg;
+ struct mesh_config cfg = {};
u32 mask;
int err;
@@ -6256,9 +6278,9 @@ static int validate_scan_freqs(struct nlattr *freqs)
return n_channels;
}
-static bool is_band_valid(struct wiphy *wiphy, enum ieee80211_band b)
+static bool is_band_valid(struct wiphy *wiphy, enum nl80211_band b)
{
- return b < IEEE80211_NUM_BANDS && wiphy->bands[b];
+ return b < NUM_NL80211_BANDS && wiphy->bands[b];
}
static int parse_bss_select(struct nlattr *nla, struct wiphy *wiphy,
@@ -6309,10 +6331,7 @@ static int parse_bss_select(struct nlattr *nla, struct wiphy *wiphy,
bss_select->behaviour = NL80211_BSS_SELECT_ATTR_RSSI_ADJUST;
bss_select->param.adjust.band = adj_param->band;
bss_select->param.adjust.delta = adj_param->delta;
- if (!is_band_valid(
- wiphy,
- ((enum ieee80211_band)(bss_select->param.adjust.band))
- ))
+ if (!is_band_valid(wiphy, bss_select->param.adjust.band))
return -EINVAL;
}
@@ -6456,10 +6475,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
i++;
}
} else {
- enum ieee80211_band band;
+ enum nl80211_band band;
/* all channels */
- for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
int j;
if (!wiphy->bands[band])
continue;
@@ -6504,7 +6523,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
request->ie_len);
}
- for (i = 0; i < IEEE80211_NUM_BANDS; i++)
+ for (i = 0; i < NUM_NL80211_BANDS; i++)
if (wiphy->bands[i])
request->rates[i] =
(1 << wiphy->bands[i]->n_bitrates) - 1;
@@ -6513,9 +6532,9 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
nla_for_each_nested(attr,
info->attrs[NL80211_ATTR_SCAN_SUPP_RATES],
tmp) {
- enum ieee80211_band band = nla_type(attr);
+ enum nl80211_band band = nla_type(attr);
- if (band < 0 || band >= IEEE80211_NUM_BANDS) {
+ if (band < 0 || band >= NUM_NL80211_BANDS) {
err = -EINVAL;
goto out_free;
}
@@ -6698,7 +6717,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
struct cfg80211_sched_scan_request *request;
struct nlattr *attr;
int err, tmp, n_ssids = 0, n_match_sets = 0, n_channels, i, n_plans = 0;
- enum ieee80211_band band;
+ enum nl80211_band band;
size_t ie_len;
struct nlattr *tb[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1];
s32 default_match_rssi = NL80211_SCAN_RSSI_THOLD_OFF;
@@ -6869,7 +6888,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
}
} else {
/* all channels */
- for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
int j;
if (!wiphy->bands[band])
continue;
@@ -7021,10 +7040,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
attrs[NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST]);
request->rssi_adjust.band = rssi_adjust->band;
request->rssi_adjust.delta = rssi_adjust->delta;
- if (!is_band_valid(
- wiphy,
- (enum ieee80211_band)(request->rssi_adjust.band)
- )) {
+ if (!is_band_valid(wiphy, request->rssi_adjust.band)) {
err = -EINVAL;
goto out_free;
}
@@ -8051,14 +8067,14 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
static bool
nl80211_parse_mcast_rate(struct cfg80211_registered_device *rdev,
- int mcast_rate[IEEE80211_NUM_BANDS],
+ int mcast_rate[NUM_NL80211_BANDS],
int rateval)
{
struct wiphy *wiphy = &rdev->wiphy;
bool found = false;
int band, i;
- for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
struct ieee80211_supported_band *sband;
sband = wiphy->bands[band];
@@ -8240,7 +8256,7 @@ static int nl80211_set_mcast_rate(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
- int mcast_rate[IEEE80211_NUM_BANDS];
+ int mcast_rate[NUM_NL80211_BANDS];
u32 nla_rate;
int err;
@@ -8643,7 +8659,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
}
connect.pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]);
- if (connect.pbss && !rdev->wiphy.bands[IEEE80211_BAND_60GHZ]) {
+ if (connect.pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) {
kzfree(connkeys);
return -EOPNOTSUPP;
}
@@ -9549,10 +9565,8 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
- err = validate_beacon_tx_rate(
- rdev,
- (enum nl80211_band)(setup.chandef.chan->band),
- &setup.beacon_rate);
+ err = validate_beacon_tx_rate(rdev, setup.chandef.chan->band,
+ &setup.beacon_rate);
if (err)
return err;
}
@@ -10443,7 +10457,7 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct nlattr *tb[NUM_NL80211_REKEY_DATA];
- struct cfg80211_gtk_rekey_data rekey_data;
+ struct cfg80211_gtk_rekey_data rekey_data = {};
int err;
if (!info->attrs[NL80211_ATTR_REKEY_DATA])
@@ -10802,13 +10816,13 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info)
if (!wdev->netdev && !wdev->p2p_started)
return -ENETDOWN;
}
-
- if (!vcmd->doit)
- return -EOPNOTSUPP;
} else {
wdev = NULL;
}
+ if (!vcmd->doit)
+ return -EOPNOTSUPP;
+
if (info->attrs[NL80211_ATTR_VENDOR_DATA]) {
data = nla_data(info->attrs[NL80211_ATTR_VENDOR_DATA]);
len = nla_len(info->attrs[NL80211_ATTR_VENDOR_DATA]);
@@ -11215,7 +11229,7 @@ static int nl80211_tdls_channel_switch(struct sk_buff *skb,
* section 10.22.6.2.1. Disallow 5/10Mhz channels as well for now, the
* specification is not defined for them.
*/
- if (chandef.chan->band == IEEE80211_BAND_2GHZ &&
+ if (chandef.chan->band == NL80211_BAND_2GHZ &&
chandef.width != NL80211_CHAN_WIDTH_20_NOHT &&
chandef.width != NL80211_CHAN_WIDTH_20)
return -EINVAL;
@@ -12802,7 +12816,8 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
}
void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr,
- const u8* ie, u8 ie_len, gfp_t gfp)
+ const u8 *ie, u8 ie_len,
+ int sig_dbm, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
@@ -12828,7 +12843,9 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr,
nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) ||
(ie_len && ie &&
- nla_put(msg, NL80211_ATTR_IE, ie_len , ie)))
+ nla_put(msg, NL80211_ATTR_IE, ie_len, ie)) ||
+ (sig_dbm &&
+ nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -13575,7 +13592,8 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
wdev->chandef = *chandef;
wdev->preset_chandef = *chandef;
- if (wdev->iftype == NL80211_IFTYPE_STATION &&
+ if ((wdev->iftype == NL80211_IFTYPE_STATION ||
+ wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) &&
!WARN_ON(!wdev->current_bss))
wdev->current_bss->pub.channel = chandef->chan;
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index c20e37acdc80..a7afd4be29e8 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -537,6 +537,10 @@ static inline int
rdev_set_wiphy_params(struct cfg80211_registered_device *rdev, u32 changed)
{
int ret;
+
+ if (!rdev->ops->set_wiphy_params)
+ return -EOPNOTSUPP;
+
trace_rdev_set_wiphy_params(&rdev->wiphy, changed);
ret = rdev->ops->set_wiphy_params(&rdev->wiphy, changed);
trace_rdev_return_int(&rdev->wiphy, ret);
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 647591c5dadd..98a5959b52a5 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1591,12 +1591,12 @@ static void reg_process_ht_flags_band(struct wiphy *wiphy,
static void reg_process_ht_flags(struct wiphy *wiphy)
{
- enum ieee80211_band band;
+ enum nl80211_band band;
if (!wiphy)
return;
- for (band = 0; band < IEEE80211_NUM_BANDS; band++)
+ for (band = 0; band < NUM_NL80211_BANDS; band++)
reg_process_ht_flags_band(wiphy, wiphy->bands[band]);
}
@@ -1754,7 +1754,7 @@ static void reg_check_channels(void)
static void wiphy_update_regulatory(struct wiphy *wiphy,
enum nl80211_reg_initiator initiator)
{
- enum ieee80211_band band;
+ enum nl80211_band band;
struct regulatory_request *lr = get_last_request();
if (ignore_reg_update(wiphy, initiator)) {
@@ -1773,7 +1773,7 @@ static void wiphy_update_regulatory(struct wiphy *wiphy,
lr->dfs_region = get_cfg80211_regdom()->dfs_region;
- for (band = 0; band < IEEE80211_NUM_BANDS; band++)
+ for (band = 0; band < NUM_NL80211_BANDS; band++)
handle_band(wiphy, initiator, wiphy->bands[band]);
reg_process_beacons(wiphy);
@@ -1898,14 +1898,14 @@ static void handle_band_custom(struct wiphy *wiphy,
void wiphy_apply_custom_regulatory(struct wiphy *wiphy,
const struct ieee80211_regdomain *regd)
{
- enum ieee80211_band band;
+ enum nl80211_band band;
unsigned int bands_set = 0;
WARN(!(wiphy->regulatory_flags & REGULATORY_CUSTOM_REG),
"wiphy should have REGULATORY_CUSTOM_REG\n");
wiphy->regulatory_flags |= REGULATORY_CUSTOM_REG;
- for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
if (!wiphy->bands[band])
continue;
handle_band_custom(wiphy, wiphy->bands[band], regd);
@@ -2337,7 +2337,7 @@ static void reg_process_self_managed_hints(void)
struct wiphy *wiphy;
const struct ieee80211_regdomain *tmp;
const struct ieee80211_regdomain *regd;
- enum ieee80211_band band;
+ enum nl80211_band band;
struct regulatory_request request = {};
list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
@@ -2355,7 +2355,7 @@ static void reg_process_self_managed_hints(void)
rcu_assign_pointer(wiphy->regd, regd);
rcu_free_regdom(tmp);
- for (band = 0; band < IEEE80211_NUM_BANDS; band++)
+ for (band = 0; band < NUM_NL80211_BANDS; band++)
handle_band_custom(wiphy, wiphy->bands[band], regd);
reg_process_ht_flags(wiphy);
@@ -2423,6 +2423,9 @@ int regulatory_hint_user(const char *alpha2,
if (WARN_ON(!alpha2))
return -EINVAL;
+ if (!is_world_regdom(alpha2) && !is_an_alpha2(alpha2))
+ return -EINVAL;
+
request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL);
if (!request)
return -ENOMEM;
@@ -2515,7 +2518,7 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2)
}
EXPORT_SYMBOL(regulatory_hint);
-void regulatory_hint_country_ie(struct wiphy *wiphy, enum ieee80211_band band,
+void regulatory_hint_country_ie(struct wiphy *wiphy, enum nl80211_band band,
const u8 *country_ie, u8 country_ie_len)
{
char alpha2[2];
@@ -2615,11 +2618,11 @@ static void restore_alpha2(char *alpha2, bool reset_user)
static void restore_custom_reg_settings(struct wiphy *wiphy)
{
struct ieee80211_supported_band *sband;
- enum ieee80211_band band;
+ enum nl80211_band band;
struct ieee80211_channel *chan;
int i;
- for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
sband = wiphy->bands[band];
if (!sband)
continue;
@@ -2734,9 +2737,9 @@ void regulatory_hint_disconnect(void)
static bool freq_is_chan_12_13_14(u16 freq)
{
- if (freq == ieee80211_channel_to_frequency(12, IEEE80211_BAND_2GHZ) ||
- freq == ieee80211_channel_to_frequency(13, IEEE80211_BAND_2GHZ) ||
- freq == ieee80211_channel_to_frequency(14, IEEE80211_BAND_2GHZ))
+ if (freq == ieee80211_channel_to_frequency(12, NL80211_BAND_2GHZ) ||
+ freq == ieee80211_channel_to_frequency(13, NL80211_BAND_2GHZ) ||
+ freq == ieee80211_channel_to_frequency(14, NL80211_BAND_2GHZ))
return true;
return false;
}
@@ -2761,7 +2764,7 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy,
if (beacon_chan->beacon_found ||
beacon_chan->flags & IEEE80211_CHAN_RADAR ||
- (beacon_chan->band == IEEE80211_BAND_2GHZ &&
+ (beacon_chan->band == NL80211_BAND_2GHZ &&
!freq_is_chan_12_13_14(beacon_chan->center_freq)))
return 0;
@@ -2813,7 +2816,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
power_rule = &reg_rule->power_rule;
if (reg_rule->flags & NL80211_RRF_AUTO_BW)
- snprintf(bw, sizeof(bw), "%d KHz, %d KHz AUTO",
+ snprintf(bw, sizeof(bw), "%d KHz, %u KHz AUTO",
freq_range->max_bandwidth_khz,
reg_get_max_bandwidth(rd, reg_rule));
else
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index 7bbe2d138d2a..ff078f093989 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -104,7 +104,7 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy,
* information for a band the BSS is not present in it will be ignored.
*/
void regulatory_hint_country_ie(struct wiphy *wiphy,
- enum ieee80211_band band,
+ enum nl80211_band band,
const u8 *country_ie,
u8 country_ie_len);
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index a6451bf9a717..afe55f6ebb6e 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -582,7 +582,7 @@ static int cmp_bss(struct cfg80211_bss *a,
}
static bool cfg80211_bss_type_match(u16 capability,
- enum ieee80211_band band,
+ enum nl80211_band band,
enum ieee80211_bss_type bss_type)
{
bool ret = true;
@@ -591,7 +591,7 @@ static bool cfg80211_bss_type_match(u16 capability,
if (bss_type == IEEE80211_BSS_TYPE_ANY)
return ret;
- if (band == IEEE80211_BAND_60GHZ) {
+ if (band == NL80211_BAND_60GHZ) {
mask = WLAN_CAPABILITY_DMG_TYPE_MASK;
switch (bss_type) {
case IEEE80211_BSS_TYPE_ESS:
@@ -947,14 +947,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
* be grouped with this beacon for updates ...
*/
if (!cfg80211_combine_bsses(rdev, new)) {
- kfree(new);
+ bss_ref_put(rdev, new);
goto drop;
}
}
if (rdev->bss_entries >= bss_entries_limit &&
!cfg80211_bss_expire_oldest(rdev)) {
- kfree(new);
+ bss_ref_put(rdev, new);
goto drop;
}
@@ -1075,7 +1075,7 @@ cfg80211_inform_bss_data(struct wiphy *wiphy,
if (!res)
return NULL;
- if (channel->band == IEEE80211_BAND_60GHZ) {
+ if (channel->band == NL80211_BAND_60GHZ) {
bss_type = res->pub.capability & WLAN_CAPABILITY_DMG_TYPE_MASK;
if (bss_type == WLAN_CAPABILITY_DMG_TYPE_AP ||
bss_type == WLAN_CAPABILITY_DMG_TYPE_PBSS)
@@ -1158,7 +1158,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
if (!res)
return NULL;
- if (channel->band == IEEE80211_BAND_60GHZ) {
+ if (channel->band == NL80211_BAND_60GHZ) {
bss_type = res->pub.capability & WLAN_CAPABILITY_DMG_TYPE_MASK;
if (bss_type == WLAN_CAPABILITY_DMG_TYPE_AP ||
bss_type == WLAN_CAPABILITY_DMG_TYPE_PBSS)
@@ -1254,7 +1254,7 @@ int cfg80211_wext_siwscan(struct net_device *dev,
struct iw_scan_req *wreq = NULL;
struct cfg80211_scan_request *creq = NULL;
int i, err, n_channels = 0;
- enum ieee80211_band band;
+ enum nl80211_band band;
if (!netif_running(dev))
return -ENETDOWN;
@@ -1298,7 +1298,7 @@ int cfg80211_wext_siwscan(struct net_device *dev,
/* translate "Scan on frequencies" request */
i = 0;
- for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
int j;
if (!wiphy->bands[band])
@@ -1358,7 +1358,7 @@ int cfg80211_wext_siwscan(struct net_device *dev,
creq->n_ssids = 0;
}
- for (i = 0; i < IEEE80211_NUM_BANDS; i++)
+ for (i = 0; i < NUM_NL80211_BANDS; i++)
if (wiphy->bands[i])
creq->rates[i] = (1 << wiphy->bands[i]->n_bitrates) - 1;
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 5527bbb80f71..593258220735 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -106,7 +106,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
return -ENOMEM;
if (wdev->conn->params.channel) {
- enum ieee80211_band band = wdev->conn->params.channel->band;
+ enum nl80211_band band = wdev->conn->params.channel->band;
struct ieee80211_supported_band *sband =
wdev->wiphy->bands[band];
@@ -118,11 +118,11 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
request->rates[band] = (1 << sband->n_bitrates) - 1;
} else {
int i = 0, j;
- enum ieee80211_band band;
+ enum nl80211_band band;
struct ieee80211_supported_band *bands;
struct ieee80211_channel *channel;
- for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
bands = wdev->wiphy->bands[band];
if (!bands)
continue;
@@ -290,6 +290,15 @@ void cfg80211_conn_work(struct work_struct *work)
rtnl_unlock();
}
+static void cfg80211_step_auth_next(struct cfg80211_conn *conn,
+ struct cfg80211_bss *bss)
+{
+ memcpy(conn->bssid, bss->bssid, ETH_ALEN);
+ conn->params.bssid = conn->bssid;
+ conn->params.channel = bss->channel;
+ conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
+}
+
/* Returned bss is reference counted and must be cleaned up appropriately. */
static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev)
{
@@ -307,10 +316,7 @@ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev)
if (!bss)
return NULL;
- memcpy(wdev->conn->bssid, bss->bssid, ETH_ALEN);
- wdev->conn->params.bssid = wdev->conn->bssid;
- wdev->conn->params.channel = bss->channel;
- wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
+ cfg80211_step_auth_next(wdev->conn, bss);
schedule_work(&rdev->conn_work);
return bss;
@@ -546,7 +552,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
if (wdev->current_bss)
return -EALREADY;
- if (WARN_ON(wdev->conn))
+ if (wdev->conn)
return -EINPROGRESS;
wdev->conn = kzalloc(sizeof(*wdev->conn), GFP_KERNEL);
@@ -584,7 +590,12 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
wdev->conn->params.ssid_len = wdev->ssid_len;
/* see if we have the bss already */
- bss = cfg80211_get_conn_bss(wdev);
+ bss = cfg80211_get_bss(wdev->wiphy, wdev->conn->params.channel,
+ wdev->conn->params.bssid,
+ wdev->conn->params.ssid,
+ wdev->conn->params.ssid_len,
+ wdev->conn_bss_type,
+ IEEE80211_PRIVACY(wdev->conn->params.privacy));
if (prev_bssid) {
memcpy(wdev->conn->prev_bssid, prev_bssid, ETH_ALEN);
@@ -595,6 +606,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
if (bss) {
enum nl80211_timeout_reason treason;
+ cfg80211_step_auth_next(wdev->conn, bss);
err = cfg80211_conn_do_work(wdev, &treason);
cfg80211_put_bss(wdev->wiphy, bss);
} else {
@@ -1116,6 +1128,18 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
connect->crypto.ciphers_pairwise[0] = cipher;
}
}
+ } else {
+ if (WARN_ON(connkeys))
+ return -EINVAL;
+
+ /* connect can point to wdev->wext.connect which
+ * can hold key data from a previous connection
+ */
+ connect->key = NULL;
+ connect->key_len = 0;
+ connect->key_idx = 0;
+ connect->crypto.cipher_group = 0;
+ connect->crypto.n_ciphers_pairwise = 0;
}
wdev->connect_keys = connkeys;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 77eb73d3c83f..48a2df99e7f9 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -110,7 +110,7 @@
conf->dot11MeshHWMPconfirmationInterval; \
} while (0)
-#define CHAN_ENTRY __field(enum ieee80211_band, band) \
+#define CHAN_ENTRY __field(enum nl80211_band, band) \
__field(u16, center_freq)
#define CHAN_ASSIGN(chan) \
do { \
@@ -125,7 +125,7 @@
#define CHAN_PR_FMT "band: %d, freq: %u"
#define CHAN_PR_ARG __entry->band, __entry->center_freq
-#define CHAN_DEF_ENTRY __field(enum ieee80211_band, band) \
+#define CHAN_DEF_ENTRY __field(enum nl80211_band, band) \
__field(u32, control_freq) \
__field(u32, width) \
__field(u32, center_freq1) \
@@ -2650,7 +2650,7 @@ TRACE_EVENT(cfg80211_scan_done,
TP_STRUCT__entry(
__field(u32, n_channels)
__dynamic_array(u8, ie, request ? request->ie_len : 0)
- __array(u32, rates, IEEE80211_NUM_BANDS)
+ __array(u32, rates, NUM_NL80211_BANDS)
__field(u32, wdev_id)
MAC_ENTRY(wiphy_mac)
__field(bool, no_cck)
@@ -2661,7 +2661,7 @@ TRACE_EVENT(cfg80211_scan_done,
memcpy(__get_dynamic_array(ie), request->ie,
request->ie_len);
memcpy(__entry->rates, request->rates,
- IEEE80211_NUM_BANDS);
+ NUM_NL80211_BANDS);
__entry->wdev_id = request->wdev ?
request->wdev->identifier : 0;
if (request->wiphy)
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 3e7525d0d8e3..95eab2690f4f 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -48,7 +48,7 @@ u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband,
if (WARN_ON(!sband))
return 1;
- if (sband->band == IEEE80211_BAND_2GHZ) {
+ if (sband->band == NL80211_BAND_2GHZ) {
if (scan_width == NL80211_BSS_CHAN_WIDTH_5 ||
scan_width == NL80211_BSS_CHAN_WIDTH_10)
mandatory_flag = IEEE80211_RATE_MANDATORY_G;
@@ -66,26 +66,26 @@ u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband,
}
EXPORT_SYMBOL(ieee80211_mandatory_rates);
-int ieee80211_channel_to_frequency(int chan, enum ieee80211_band band)
+int ieee80211_channel_to_frequency(int chan, enum nl80211_band band)
{
/* see 802.11 17.3.8.3.2 and Annex J
* there are overlapping channel numbers in 5GHz and 2GHz bands */
if (chan <= 0)
return 0; /* not supported */
switch (band) {
- case IEEE80211_BAND_2GHZ:
+ case NL80211_BAND_2GHZ:
if (chan == 14)
return 2484;
else if (chan < 14)
return 2407 + chan * 5;
break;
- case IEEE80211_BAND_5GHZ:
+ case NL80211_BAND_5GHZ:
if (chan >= 182 && chan <= 196)
return 4000 + chan * 5;
else
return 5000 + chan * 5;
break;
- case IEEE80211_BAND_60GHZ:
+ case NL80211_BAND_60GHZ:
if (chan < 5)
return 56160 + chan * 2160;
break;
@@ -117,11 +117,11 @@ EXPORT_SYMBOL(ieee80211_frequency_to_channel);
struct ieee80211_channel *__ieee80211_get_channel(struct wiphy *wiphy,
int freq)
{
- enum ieee80211_band band;
+ enum nl80211_band band;
struct ieee80211_supported_band *sband;
int i;
- for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
sband = wiphy->bands[band];
if (!sband)
@@ -138,12 +138,12 @@ struct ieee80211_channel *__ieee80211_get_channel(struct wiphy *wiphy,
EXPORT_SYMBOL(__ieee80211_get_channel);
static void set_mandatory_flags_band(struct ieee80211_supported_band *sband,
- enum ieee80211_band band)
+ enum nl80211_band band)
{
int i, want;
switch (band) {
- case IEEE80211_BAND_5GHZ:
+ case NL80211_BAND_5GHZ:
want = 3;
for (i = 0; i < sband->n_bitrates; i++) {
if (sband->bitrates[i].bitrate == 60 ||
@@ -156,7 +156,7 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband,
}
WARN_ON(want);
break;
- case IEEE80211_BAND_2GHZ:
+ case NL80211_BAND_2GHZ:
want = 7;
for (i = 0; i < sband->n_bitrates; i++) {
if (sband->bitrates[i].bitrate == 10) {
@@ -186,12 +186,12 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband,
}
WARN_ON(want != 0 && want != 3 && want != 6);
break;
- case IEEE80211_BAND_60GHZ:
+ case NL80211_BAND_60GHZ:
/* check for mandatory HT MCS 1..4 */
WARN_ON(!sband->ht_cap.ht_supported);
WARN_ON((sband->ht_cap.mcs.rx_mask[0] & 0x1e) != 0x1e);
break;
- case IEEE80211_NUM_BANDS:
+ case NUM_NL80211_BANDS:
WARN_ON(1);
break;
}
@@ -199,9 +199,9 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband,
void ieee80211_set_bitrate_flags(struct wiphy *wiphy)
{
- enum ieee80211_band band;
+ enum nl80211_band band;
- for (band = 0; band < IEEE80211_NUM_BANDS; band++)
+ for (band = 0; band < NUM_NL80211_BANDS; band++)
if (wiphy->bands[band])
set_mandatory_flags_band(wiphy->bands[band], band);
}
@@ -410,8 +410,8 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
}
EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen);
-int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
- enum nl80211_iftype iftype)
+static int __ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
+ enum nl80211_iftype iftype, bool is_amsdu)
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
u16 hdrlen, ethertype;
@@ -505,7 +505,7 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
payload = skb->data + hdrlen;
ethertype = (payload[6] << 8) | payload[7];
- if (likely((ether_addr_equal(payload, rfc1042_header) &&
+ if (likely((!is_amsdu && ether_addr_equal(payload, rfc1042_header) &&
ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) ||
ether_addr_equal(payload, bridge_tunnel_header))) {
/* remove RFC1042 or Bridge-Tunnel encapsulation and
@@ -526,6 +526,12 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
}
return 0;
}
+
+int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
+ enum nl80211_iftype iftype)
+{
+ return __ieee80211_data_to_8023(skb, addr, iftype, false);
+}
EXPORT_SYMBOL(ieee80211_data_to_8023);
int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
@@ -685,6 +691,9 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
/* the last MSDU has no padding */
if (subframe_len > remaining)
goto purge;
+ /* mitigate A-MSDU aggregation injection attacks */
+ if (ether_addr_equal(eth->h_dest, rfc1042_header))
+ goto purge;
skb_pull(skb, sizeof(struct ethhdr));
/* reuse skb for the last subframe */
@@ -950,6 +959,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
switch (otype) {
case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
cfg80211_stop_ap(rdev, dev, true);
break;
case NL80211_IFTYPE_ADHOC:
@@ -965,6 +975,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
case NL80211_IFTYPE_MESH_POINT:
/* mesh should be handled? */
break;
+ case NL80211_IFTYPE_OCB:
+ cfg80211_leave_ocb(rdev, dev);
+ break;
default:
break;
}
@@ -1330,22 +1343,22 @@ size_t ieee80211_ie_split(const u8 *ies, size_t ielen,
EXPORT_SYMBOL(ieee80211_ie_split);
bool ieee80211_operating_class_to_band(u8 operating_class,
- enum ieee80211_band *band)
+ enum nl80211_band *band)
{
switch (operating_class) {
case 112:
case 115 ... 127:
case 128 ... 130:
- *band = IEEE80211_BAND_5GHZ;
+ *band = NL80211_BAND_5GHZ;
return true;
case 81:
case 82:
case 83:
case 84:
- *band = IEEE80211_BAND_2GHZ;
+ *band = NL80211_BAND_2GHZ;
return true;
case 180:
- *band = IEEE80211_BAND_60GHZ;
+ *band = NL80211_BAND_60GHZ;
return true;
}
@@ -1818,10 +1831,10 @@ int ieee80211_get_ratemask(struct ieee80211_supported_band *sband,
unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy)
{
- enum ieee80211_band band;
+ enum nl80211_band band;
unsigned int n_channels = 0;
- for (band = 0; band < IEEE80211_NUM_BANDS; band++)
+ for (band = 0; band < NUM_NL80211_BANDS; band++)
if (wiphy->bands[band])
n_channels += wiphy->bands[band]->n_channels;
@@ -1908,3 +1921,48 @@ bool cfg80211_is_gratuitous_arp_unsolicited_na(struct sk_buff *skb)
return false;
}
EXPORT_SYMBOL(cfg80211_is_gratuitous_arp_unsolicited_na);
+
+/* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */
+struct iapp_layer2_update {
+ u8 da[ETH_ALEN]; /* broadcast */
+ u8 sa[ETH_ALEN]; /* STA addr */
+ __be16 len; /* 6 */
+ u8 dsap; /* 0 */
+ u8 ssap; /* 0 */
+ u8 control;
+ u8 xid_info[3];
+} __packed;
+
+void cfg80211_send_layer2_update(struct net_device *dev, const u8 *addr)
+{
+ struct iapp_layer2_update *msg;
+ struct sk_buff *skb;
+
+ /* Send Level 2 Update Frame to update forwarding tables in layer 2
+ * bridge devices */
+
+ skb = dev_alloc_skb(sizeof(*msg));
+ if (!skb)
+ return;
+ msg = (struct iapp_layer2_update *)skb_put(skb, sizeof(*msg));
+
+ /* 802.2 Type 1 Logical Link Control (LLC) Exchange Identifier (XID)
+ * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */
+
+ eth_broadcast_addr(msg->da);
+ ether_addr_copy(msg->sa, addr);
+ msg->len = htons(6);
+ msg->dsap = 0;
+ msg->ssap = 0x01; /* NULL LSAP, CR Bit: Response */
+ msg->control = 0xaf; /* XID response lsb.1111F101.
+ * F=0 (no poll command; unsolicited frame) */
+ msg->xid_info[0] = 0x81; /* XID format identifier */
+ msg->xid_info[1] = 1; /* LLC types/classes: Type 1 LLC */
+ msg->xid_info[2] = 0; /* XID sender's receive window size (RW) */
+
+ skb->dev = dev;
+ skb->protocol = eth_type_trans(skb, dev);
+ memset(skb->cb, 0, sizeof(skb->cb));
+ netif_rx_ni(skb);
+}
+EXPORT_SYMBOL(cfg80211_send_layer2_update);
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index cd119943612b..7139f9bcaf0c 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -32,13 +32,13 @@ int cfg80211_wext_giwname(struct net_device *dev,
if (!wdev)
return -EOPNOTSUPP;
- sband = wdev->wiphy->bands[IEEE80211_BAND_5GHZ];
+ sband = wdev->wiphy->bands[NL80211_BAND_5GHZ];
if (sband) {
is_a = true;
is_ht |= sband->ht_cap.ht_supported;
}
- sband = wdev->wiphy->bands[IEEE80211_BAND_2GHZ];
+ sband = wdev->wiphy->bands[NL80211_BAND_2GHZ];
if (sband) {
int i;
/* Check for mandatory rates */
@@ -143,7 +143,7 @@ int cfg80211_wext_giwrange(struct net_device *dev,
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct iw_range *range = (struct iw_range *) extra;
- enum ieee80211_band band;
+ enum nl80211_band band;
int i, c = 0;
if (!wdev)
@@ -215,7 +215,7 @@ int cfg80211_wext_giwrange(struct net_device *dev,
}
}
- for (band = 0; band < IEEE80211_NUM_BANDS; band ++) {
+ for (band = 0; band < NUM_NL80211_BANDS; band ++) {
struct ieee80211_supported_band *sband;
sband = wdev->wiphy->bands[band];
@@ -265,11 +265,11 @@ int cfg80211_wext_freq(struct iw_freq *freq)
* -EINVAL for impossible things.
*/
if (freq->e == 0) {
- enum ieee80211_band band = IEEE80211_BAND_2GHZ;
+ enum nl80211_band band = NL80211_BAND_2GHZ;
if (freq->m < 0)
return 0;
if (freq->m > 14)
- band = IEEE80211_BAND_5GHZ;
+ band = NL80211_BAND_5GHZ;
return ieee80211_channel_to_frequency(freq->m, band);
} else {
int i, div = 1000000;
@@ -1245,7 +1245,7 @@ static int cfg80211_wext_siwrate(struct net_device *dev,
maxrate = rate->value / 100000;
}
- for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
sband = wdev->wiphy->bands[band];
if (sband == NULL)
continue;
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index b50ee5d622e1..9a929010ea9d 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -656,7 +656,8 @@ struct iw_statistics *get_wireless_stats(struct net_device *dev)
return NULL;
}
-static int iw_handler_get_iwstats(struct net_device * dev,
+/* noinline to avoid a bogus warning with -O3 */
+static noinline int iw_handler_get_iwstats(struct net_device * dev,
struct iw_request_info * info,
union iwreq_data * wrqu,
char * extra)
@@ -894,8 +895,9 @@ out:
int call_commit_handler(struct net_device *dev)
{
#ifdef CONFIG_WIRELESS_EXT
- if ((netif_running(dev)) &&
- (dev->wireless_handlers->standard[0] != NULL))
+ if (netif_running(dev) &&
+ dev->wireless_handlers &&
+ dev->wireless_handlers->standard[0])
/* Call the commit handler on the driver */
return dev->wireless_handlers->standard[0](dev, NULL,
NULL, NULL);
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 98ff9d9e1aa9..8cc9a5f406ee 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -43,7 +43,7 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
if (!wdev->wext.connect.ssid_len)
return 0;
- if (wdev->wext.keys) {
+ if (wdev->wext.keys && wdev->wext.keys->def != -1) {
ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL);
if (!ck)
return -ENOMEM;
diff --git a/net/wireless/wext-spy.c b/net/wireless/wext-spy.c
index 33bef22e44e9..b379a0371653 100644
--- a/net/wireless/wext-spy.c
+++ b/net/wireless/wext-spy.c
@@ -120,8 +120,8 @@ int iw_handler_set_thrspy(struct net_device * dev,
return -EOPNOTSUPP;
/* Just do it */
- memcpy(&(spydata->spy_thr_low), &(threshold->low),
- 2 * sizeof(struct iw_quality));
+ spydata->spy_thr_low = threshold->low;
+ spydata->spy_thr_high = threshold->high;
/* Clear flag */
memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under));
@@ -147,8 +147,8 @@ int iw_handler_get_thrspy(struct net_device * dev,
return -EOPNOTSUPP;
/* Just do it */
- memcpy(&(threshold->low), &(spydata->spy_thr_low),
- 2 * sizeof(struct iw_quality));
+ threshold->low = spydata->spy_thr_low;
+ threshold->high = spydata->spy_thr_high;
return 0;
}
@@ -173,10 +173,10 @@ static void iw_send_thrspy_event(struct net_device * dev,
memcpy(threshold.addr.sa_data, address, ETH_ALEN);
threshold.addr.sa_family = ARPHRD_ETHER;
/* Copy stats */
- memcpy(&(threshold.qual), wstats, sizeof(struct iw_quality));
+ threshold.qual = *wstats;
/* Copy also thresholds */
- memcpy(&(threshold.low), &(spydata->spy_thr_low),
- 2 * sizeof(struct iw_quality));
+ threshold.low = spydata->spy_thr_low;
+ threshold.high = spydata->spy_thr_high;
/* Send event to user space */
wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 5dca42dbc737..156639be7ed0 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -100,7 +100,7 @@ int x25_parse_address_block(struct sk_buff *skb,
}
len = *skb->data;
- needed = 1 + (len >> 4) + (len & 0x0f);
+ needed = 1 + ((len >> 4) + (len & 0x0f) + 1) / 2;
if (!pskb_may_pull(skb, needed)) {
/* packet is too short to hold the addresses it claims
@@ -288,7 +288,7 @@ static struct sock *x25_find_listener(struct x25_address *addr,
sk_for_each(s, &x25_list)
if ((!strcmp(addr->x25_addr,
x25_sk(s)->source_addr.x25_addr) ||
- !strcmp(addr->x25_addr,
+ !strcmp(x25_sk(s)->source_addr.x25_addr,
null_x25_address.x25_addr)) &&
s->sk_state == TCP_LISTEN) {
/*
@@ -550,7 +550,7 @@ static int x25_create(struct net *net, struct socket *sock, int protocol,
if (protocol)
goto out;
- rc = -ENOBUFS;
+ rc = -ENOMEM;
if ((sk = x25_alloc_socket(net, kern)) == NULL)
goto out;
@@ -679,16 +679,21 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
int len, i, rc = 0;
if (addr_len != sizeof(struct sockaddr_x25) ||
- addr->sx25_family != AF_X25) {
+ addr->sx25_family != AF_X25 ||
+ strnlen(addr->sx25_addr.x25_addr, X25_ADDR_LEN) == X25_ADDR_LEN) {
rc = -EINVAL;
goto out;
}
- len = strlen(addr->sx25_addr.x25_addr);
- for (i = 0; i < len; i++) {
- if (!isdigit(addr->sx25_addr.x25_addr[i])) {
- rc = -EINVAL;
- goto out;
+ /* check for the null_x25_address */
+ if (strcmp(addr->sx25_addr.x25_addr, null_x25_address.x25_addr)) {
+
+ len = strlen(addr->sx25_addr.x25_addr);
+ for (i = 0; i < len; i++) {
+ if (!isdigit(addr->sx25_addr.x25_addr[i])) {
+ rc = -EINVAL;
+ goto out;
+ }
}
}
@@ -760,12 +765,17 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr,
if (sk->sk_state == TCP_ESTABLISHED)
goto out;
+ rc = -EALREADY; /* Do nothing if call is already in progress */
+ if (sk->sk_state == TCP_SYN_SENT)
+ goto out;
+
sk->sk_state = TCP_CLOSE;
sock->state = SS_UNCONNECTED;
rc = -EINVAL;
if (addr_len != sizeof(struct sockaddr_x25) ||
- addr->sx25_family != AF_X25)
+ addr->sx25_family != AF_X25 ||
+ strnlen(addr->sx25_addr.x25_addr, X25_ADDR_LEN) == X25_ADDR_LEN)
goto out;
rc = -ENETUNREACH;
@@ -806,7 +816,7 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr,
/* Now the loop */
rc = -EINPROGRESS;
if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK))
- goto out_put_neigh;
+ goto out;
rc = x25_wait_for_connection_establishment(sk);
if (rc)
@@ -815,7 +825,7 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr,
sock->state = SS_CONNECTED;
rc = 0;
out_put_neigh:
- if (rc) {
+ if (rc && x25->neighbour) {
read_lock_bh(&x25_list_lock);
x25_neigh_put(x25->neighbour);
x25->neighbour = NULL;
@@ -1039,6 +1049,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
makex25->lci = lci;
makex25->dest_addr = dest_addr;
makex25->source_addr = source_addr;
+ x25_neigh_hold(nb);
makex25->neighbour = nb;
makex25->facilities = facilities;
makex25->dte_facilities= dte_facilities;
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index 39231237e1c3..30f71620d4e3 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -120,8 +120,10 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev,
goto drop;
}
- if (!pskb_may_pull(skb, 1))
+ if (!pskb_may_pull(skb, 1)) {
+ x25_neigh_put(nb);
return 0;
+ }
switch (skb->data[0]) {
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
index 6b5af65f491f..a3163645b5bd 100644
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c
@@ -368,6 +368,12 @@ void x25_disconnect(struct sock *sk, int reason, unsigned char cause,
sk->sk_state_change(sk);
sock_set_flag(sk, SOCK_DEAD);
}
+ if (x25->neighbour) {
+ read_lock_bh(&x25_list_lock);
+ x25_neigh_put(x25->neighbour);
+ x25->neighbour = NULL;
+ read_unlock_bh(&x25_list_lock);
+ }
}
/*
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index bda1a13628a8..16cdd2c9221f 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -20,6 +20,17 @@ config XFRM_USER
If unsure, say Y.
+config XFRM_USER_COMPAT
+ tristate "Compatible ABI support"
+ depends on XFRM_USER && COMPAT_FOR_U64_ALIGNMENT && \
+ HAVE_EFFICIENT_UNALIGNED_ACCESS
+ select WANT_COMPAT_NETLINK_MESSAGES
+ help
+ Transformation(XFRM) user configuration interface like IPsec
+ used by compatible Linux applications.
+
+ If unsure, say N.
+
config XFRM_SUB_POLICY
bool "Transformation sub policy support"
depends on XFRM
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index c0e961983f17..516c78e22e2a 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -8,4 +8,5 @@ obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
obj-$(CONFIG_XFRM_USER) += xfrm_user.o
+obj-$(CONFIG_XFRM_USER_COMPAT) += xfrm_compat.o
obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c
new file mode 100644
index 000000000000..7158078a71f1
--- /dev/null
+++ b/net/xfrm/xfrm_compat.c
@@ -0,0 +1,685 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * XFRM compat layer
+ * Author: Dmitry Safonov <dima@arista.com>
+ * Based on code and translator idea by: Florian Westphal <fw@strlen.de>
+ */
+#include <linux/compat.h>
+#include <linux/vmalloc.h>
+#include <linux/xfrm.h>
+#include <net/xfrm.h>
+
+struct compat_xfrm_lifetime_cfg {
+ compat_u64 soft_byte_limit, hard_byte_limit;
+ compat_u64 soft_packet_limit, hard_packet_limit;
+ compat_u64 soft_add_expires_seconds, hard_add_expires_seconds;
+ compat_u64 soft_use_expires_seconds, hard_use_expires_seconds;
+}; /* same size on 32bit, but only 4 byte alignment required */
+
+struct compat_xfrm_lifetime_cur {
+ compat_u64 bytes, packets, add_time, use_time;
+}; /* same size on 32bit, but only 4 byte alignment required */
+
+struct compat_xfrm_userpolicy_info {
+ struct xfrm_selector sel;
+ struct compat_xfrm_lifetime_cfg lft;
+ struct compat_xfrm_lifetime_cur curlft;
+ __u32 priority, index;
+ u8 dir, action, flags, share;
+ /* 4 bytes additional padding on 64bit */
+};
+
+struct compat_xfrm_usersa_info {
+ struct xfrm_selector sel;
+ struct xfrm_id id;
+ xfrm_address_t saddr;
+ struct compat_xfrm_lifetime_cfg lft;
+ struct compat_xfrm_lifetime_cur curlft;
+ struct xfrm_stats stats;
+ __u32 seq, reqid;
+ u16 family;
+ u8 mode, replay_window, flags;
+ /* 4 bytes additional padding on 64bit */
+};
+
+struct compat_xfrm_user_acquire {
+ struct xfrm_id id;
+ xfrm_address_t saddr;
+ struct xfrm_selector sel;
+ struct compat_xfrm_userpolicy_info policy;
+ /* 4 bytes additional padding on 64bit */
+ __u32 aalgos, ealgos, calgos, seq;
+};
+
+struct compat_xfrm_userspi_info {
+ struct compat_xfrm_usersa_info info;
+ /* 4 bytes additional padding on 64bit */
+ __u32 min, max;
+};
+
+struct compat_xfrm_user_expire {
+ struct compat_xfrm_usersa_info state;
+ /* 8 bytes additional padding on 64bit */
+ u8 hard;
+};
+
+struct compat_xfrm_user_polexpire {
+ struct compat_xfrm_userpolicy_info pol;
+ /* 8 bytes additional padding on 64bit */
+ u8 hard;
+};
+
+#define XMSGSIZE(type) sizeof(struct type)
+
+static const int compat_msg_min[XFRM_NR_MSGTYPES] = {
+ [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_usersa_info),
+ [XFRM_MSG_DELSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id),
+ [XFRM_MSG_GETSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id),
+ [XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_userpolicy_info),
+ [XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id),
+ [XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id),
+ [XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_userspi_info),
+ [XFRM_MSG_ACQUIRE - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_user_acquire),
+ [XFRM_MSG_EXPIRE - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_user_expire),
+ [XFRM_MSG_UPDPOLICY - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_userpolicy_info),
+ [XFRM_MSG_UPDSA - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_usersa_info),
+ [XFRM_MSG_POLEXPIRE - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_user_polexpire),
+ [XFRM_MSG_FLUSHSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_flush),
+ [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = 0,
+ [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id),
+ [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id),
+ [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report),
+ [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id),
+ [XFRM_MSG_NEWSADINFO - XFRM_MSG_BASE] = sizeof(u32),
+ [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32),
+ [XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32),
+ [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32),
+ [XFRM_MSG_MAPPING - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_mapping)
+};
+
+static const struct nla_policy compat_policy[XFRMA_MAX+1] = {
+ [XFRMA_SA] = { .len = XMSGSIZE(compat_xfrm_usersa_info)},
+ [XFRMA_POLICY] = { .len = XMSGSIZE(compat_xfrm_userpolicy_info)},
+ [XFRMA_LASTUSED] = { .type = NLA_U64},
+ [XFRMA_ALG_AUTH_TRUNC] = { .len = sizeof(struct xfrm_algo_auth)},
+ [XFRMA_ALG_AEAD] = { .len = sizeof(struct xfrm_algo_aead) },
+ [XFRMA_ALG_AUTH] = { .len = sizeof(struct xfrm_algo) },
+ [XFRMA_ALG_CRYPT] = { .len = sizeof(struct xfrm_algo) },
+ [XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) },
+ [XFRMA_ENCAP] = { .len = sizeof(struct xfrm_encap_tmpl) },
+ [XFRMA_TMPL] = { .len = sizeof(struct xfrm_user_tmpl) },
+ [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_sec_ctx) },
+ [XFRMA_LTIME_VAL] = { .len = sizeof(struct xfrm_lifetime_cur) },
+ [XFRMA_REPLAY_VAL] = { .len = sizeof(struct xfrm_replay_state) },
+ [XFRMA_REPLAY_THRESH] = { .type = NLA_U32 },
+ [XFRMA_ETIMER_THRESH] = { .type = NLA_U32 },
+ [XFRMA_SRCADDR] = { .len = sizeof(xfrm_address_t) },
+ [XFRMA_COADDR] = { .len = sizeof(xfrm_address_t) },
+ [XFRMA_POLICY_TYPE] = { .len = sizeof(struct xfrm_userpolicy_type)},
+ [XFRMA_MIGRATE] = { .len = sizeof(struct xfrm_user_migrate) },
+ [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) },
+ [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) },
+ [XFRMA_TFCPAD] = { .type = NLA_U32 },
+ [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) },
+ [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 },
+ [XFRMA_PROTO] = { .type = NLA_U8 },
+ [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) },
+ [XFRMA_OUTPUT_MARK] = { .type = NLA_U32 },
+};
+
+static inline void *kvmalloc(size_t size, gfp_t flags)
+{
+ void *ret;
+
+ ret = kmalloc(size, flags | __GFP_NOWARN);
+ if (!ret)
+ ret = __vmalloc(size, flags, PAGE_KERNEL);
+ return ret;
+}
+
+static inline bool nla_need_padding_for_64bit(struct sk_buff *skb)
+{
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ if (IS_ALIGNED((unsigned long)skb_tail_pointer(skb), 8))
+ return true;
+#endif
+ return false;
+}
+
+static inline int nla_total_size_64bit(int payload)
+{
+ return NLA_ALIGN(nla_attr_size(payload))
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ + NLA_ALIGN(nla_attr_size(0))
+#endif
+ ;
+}
+
+static inline int nla_align_64bit(struct sk_buff *skb, int padattr)
+{
+ if (nla_need_padding_for_64bit(skb) &&
+ !nla_reserve(skb, padattr, 0))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+struct nlattr *__nla_reserve_64bit(struct sk_buff *skb, int attrtype,
+ int attrlen, int padattr)
+{
+ nla_align_64bit(skb, padattr);
+
+ return __nla_reserve(skb, attrtype, attrlen);
+}
+
+static inline void __nla_put_64bit(struct sk_buff *skb, int attrtype,
+ int attrlen, const void *data, int padattr)
+{
+ struct nlattr *nla;
+
+ nla = __nla_reserve_64bit(skb, attrtype, attrlen, padattr);
+ memcpy(nla_data(nla), data, attrlen);
+}
+
+static inline int nla_put_64bit(struct sk_buff *skb, int attrtype,
+ int attrlen, const void *data, int padattr)
+{
+ size_t len;
+
+ if (nla_need_padding_for_64bit(skb))
+ len = nla_total_size_64bit(attrlen);
+ else
+ len = nla_total_size(attrlen);
+ if (unlikely(skb_tailroom(skb) < len))
+ return -EMSGSIZE;
+
+ __nla_put_64bit(skb, attrtype, attrlen, data, padattr);
+ return 0;
+}
+
+static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb,
+ const struct nlmsghdr *nlh_src, u16 type)
+{
+ int payload = compat_msg_min[type];
+ int src_len = xfrm_msg_min[type];
+ struct nlmsghdr *nlh_dst;
+
+ /* Compat messages are shorter or equal to native (+padding) */
+ if (WARN_ON_ONCE(src_len < payload))
+ return ERR_PTR(-EMSGSIZE);
+
+ nlh_dst = nlmsg_put(skb, nlh_src->nlmsg_pid, nlh_src->nlmsg_seq,
+ nlh_src->nlmsg_type, payload, nlh_src->nlmsg_flags);
+ if (!nlh_dst)
+ return ERR_PTR(-EMSGSIZE);
+
+ memset(nlmsg_data(nlh_dst), 0, payload);
+
+ switch (nlh_src->nlmsg_type) {
+ /* Compat message has the same layout as native */
+ case XFRM_MSG_DELSA:
+ case XFRM_MSG_DELPOLICY:
+ case XFRM_MSG_FLUSHSA:
+ case XFRM_MSG_FLUSHPOLICY:
+ case XFRM_MSG_NEWAE:
+ case XFRM_MSG_REPORT:
+ case XFRM_MSG_MIGRATE:
+ case XFRM_MSG_NEWSADINFO:
+ case XFRM_MSG_NEWSPDINFO:
+ case XFRM_MSG_MAPPING:
+ WARN_ON_ONCE(src_len != payload);
+ memcpy(nlmsg_data(nlh_dst), nlmsg_data(nlh_src), src_len);
+ break;
+ /* 4 byte alignment for trailing u64 on native, but not on compat */
+ case XFRM_MSG_NEWSA:
+ case XFRM_MSG_NEWPOLICY:
+ case XFRM_MSG_UPDSA:
+ case XFRM_MSG_UPDPOLICY:
+ WARN_ON_ONCE(src_len != payload + 4);
+ memcpy(nlmsg_data(nlh_dst), nlmsg_data(nlh_src), payload);
+ break;
+ case XFRM_MSG_EXPIRE: {
+ const struct xfrm_user_expire *src_ue = nlmsg_data(nlh_src);
+ struct compat_xfrm_user_expire *dst_ue = nlmsg_data(nlh_dst);
+
+ /* compat_xfrm_user_expire has 4-byte smaller state */
+ memcpy(dst_ue, src_ue, sizeof(dst_ue->state));
+ dst_ue->hard = src_ue->hard;
+ break;
+ }
+ case XFRM_MSG_ACQUIRE: {
+ const struct xfrm_user_acquire *src_ua = nlmsg_data(nlh_src);
+ struct compat_xfrm_user_acquire *dst_ua = nlmsg_data(nlh_dst);
+
+ memcpy(dst_ua, src_ua, offsetof(struct compat_xfrm_user_acquire, aalgos));
+ dst_ua->aalgos = src_ua->aalgos;
+ dst_ua->ealgos = src_ua->ealgos;
+ dst_ua->calgos = src_ua->calgos;
+ dst_ua->seq = src_ua->seq;
+ break;
+ }
+ case XFRM_MSG_POLEXPIRE: {
+ const struct xfrm_user_polexpire *src_upe = nlmsg_data(nlh_src);
+ struct compat_xfrm_user_polexpire *dst_upe = nlmsg_data(nlh_dst);
+
+ /* compat_xfrm_user_polexpire has 4-byte smaller state */
+ memcpy(dst_upe, src_upe, sizeof(dst_upe->pol));
+ dst_upe->hard = src_upe->hard;
+ break;
+ }
+ case XFRM_MSG_ALLOCSPI: {
+ const struct xfrm_userspi_info *src_usi = nlmsg_data(nlh_src);
+ struct compat_xfrm_userspi_info *dst_usi = nlmsg_data(nlh_dst);
+
+ /* compat_xfrm_user_polexpire has 4-byte smaller state */
+ memcpy(dst_usi, src_usi, sizeof(src_usi->info));
+ dst_usi->min = src_usi->min;
+ dst_usi->max = src_usi->max;
+ break;
+ }
+ /* Not being sent by kernel */
+ case XFRM_MSG_GETSA:
+ case XFRM_MSG_GETPOLICY:
+ case XFRM_MSG_GETAE:
+ case XFRM_MSG_GETSADINFO:
+ case XFRM_MSG_GETSPDINFO:
+ default:
+ WARN_ONCE(1, "unsupported nlmsg_type %d", nlh_src->nlmsg_type);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ return nlh_dst;
+}
+
+static int xfrm_nla_cpy(struct sk_buff *dst, const struct nlattr *src, int len)
+{
+ return nla_put(dst, src->nla_type, len, nla_data(src));
+}
+
+static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src)
+{
+ switch (src->nla_type) {
+ case XFRMA_PAD:
+ case XFRMA_OFFLOAD_DEV:
+ /* Ignore */
+ return 0;
+ case XFRMA_ALG_AUTH:
+ case XFRMA_ALG_CRYPT:
+ case XFRMA_ALG_COMP:
+ case XFRMA_ENCAP:
+ case XFRMA_TMPL:
+ return xfrm_nla_cpy(dst, src, nla_len(src));
+ case XFRMA_SA:
+ return xfrm_nla_cpy(dst, src, XMSGSIZE(compat_xfrm_usersa_info));
+ case XFRMA_POLICY:
+ return xfrm_nla_cpy(dst, src, XMSGSIZE(compat_xfrm_userpolicy_info));
+ case XFRMA_SEC_CTX:
+ return xfrm_nla_cpy(dst, src, nla_len(src));
+ case XFRMA_LTIME_VAL:
+ return nla_put_64bit(dst, src->nla_type, nla_len(src),
+ nla_data(src), XFRMA_PAD);
+ case XFRMA_REPLAY_VAL:
+ case XFRMA_REPLAY_THRESH:
+ case XFRMA_ETIMER_THRESH:
+ case XFRMA_SRCADDR:
+ case XFRMA_COADDR:
+ return xfrm_nla_cpy(dst, src, nla_len(src));
+ case XFRMA_LASTUSED:
+ return nla_put_64bit(dst, src->nla_type, nla_len(src),
+ nla_data(src), XFRMA_PAD);
+ case XFRMA_POLICY_TYPE:
+ case XFRMA_MIGRATE:
+ case XFRMA_ALG_AEAD:
+ case XFRMA_KMADDRESS:
+ case XFRMA_ALG_AUTH_TRUNC:
+ case XFRMA_MARK:
+ case XFRMA_TFCPAD:
+ case XFRMA_REPLAY_ESN_VAL:
+ case XFRMA_SA_EXTRA_FLAGS:
+ case XFRMA_PROTO:
+ case XFRMA_ADDRESS_FILTER:
+ case XFRMA_OUTPUT_MARK:
+ return xfrm_nla_cpy(dst, src, nla_len(src));
+ default:
+ BUILD_BUG_ON(XFRMA_MAX != XFRMA_OUTPUT_MARK);
+ WARN_ONCE(1, "unsupported nla_type %d", src->nla_type);
+ return -EOPNOTSUPP;
+ }
+}
+
+/* Take kernel-built (64bit layout) and create 32bit layout for userspace */
+static int xfrm_xlate64(struct sk_buff *dst, const struct nlmsghdr *nlh_src)
+{
+ u16 type = nlh_src->nlmsg_type - XFRM_MSG_BASE;
+ const struct nlattr *nla, *attrs;
+ struct nlmsghdr *nlh_dst;
+ int len, remaining;
+
+ nlh_dst = xfrm_nlmsg_put_compat(dst, nlh_src, type);
+ if (IS_ERR(nlh_dst))
+ return PTR_ERR(nlh_dst);
+
+ attrs = nlmsg_attrdata(nlh_src, xfrm_msg_min[type]);
+ len = nlmsg_attrlen(nlh_src, xfrm_msg_min[type]);
+
+ nla_for_each_attr(nla, attrs, len, remaining) {
+ int err = xfrm_xlate64_attr(dst, nla);
+
+ if (err)
+ return err;
+ }
+
+ nlmsg_end(dst, nlh_dst);
+
+ return 0;
+}
+
+static int xfrm_alloc_compat(struct sk_buff *skb, const struct nlmsghdr *nlh_src)
+{
+ u16 type = nlh_src->nlmsg_type - XFRM_MSG_BASE;
+ struct sk_buff *new = NULL;
+ int err;
+
+ if (WARN_ON_ONCE(type >= ARRAY_SIZE(xfrm_msg_min)))
+ return -EOPNOTSUPP;
+
+ if (skb_shinfo(skb)->frag_list == NULL) {
+ new = alloc_skb(skb->len + skb_tailroom(skb), GFP_ATOMIC);
+ if (!new)
+ return -ENOMEM;
+ skb_shinfo(skb)->frag_list = new;
+ }
+
+ err = xfrm_xlate64(skb_shinfo(skb)->frag_list, nlh_src);
+ if (err) {
+ if (new) {
+ kfree_skb(new);
+ skb_shinfo(skb)->frag_list = NULL;
+ }
+ return err;
+ }
+
+ return 0;
+}
+
+/* Calculates len of translated 64-bit message. */
+static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src,
+ struct nlattr *attrs[XFRMA_MAX+1])
+{
+ size_t len = nlmsg_len(src);
+
+ switch (src->nlmsg_type) {
+ case XFRM_MSG_NEWSA:
+ case XFRM_MSG_NEWPOLICY:
+ case XFRM_MSG_ALLOCSPI:
+ case XFRM_MSG_ACQUIRE:
+ case XFRM_MSG_UPDPOLICY:
+ case XFRM_MSG_UPDSA:
+ len += 4;
+ break;
+ case XFRM_MSG_EXPIRE:
+ case XFRM_MSG_POLEXPIRE:
+ len += 8;
+ break;
+ default:
+ break;
+ }
+
+ if (attrs[XFRMA_SA])
+ len += 4;
+ if (attrs[XFRMA_POLICY])
+ len += 4;
+
+ /* XXX: some attrs may need to be realigned
+ * if !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ */
+
+ return len;
+}
+
+static int xfrm_attr_cpy32(void *dst, size_t *pos, const struct nlattr *src,
+ size_t size, int copy_len, int payload)
+{
+ struct nlmsghdr *nlmsg = dst;
+ struct nlattr *nla;
+
+ if (WARN_ON_ONCE(copy_len > payload))
+ copy_len = payload;
+
+ if (size - *pos < nla_attr_size(payload))
+ return -ENOBUFS;
+
+ nla = dst + *pos;
+
+ memcpy(nla, src, nla_attr_size(copy_len));
+ nla->nla_len = nla_attr_size(payload);
+ *pos += nla_attr_size(payload);
+ nlmsg->nlmsg_len += nla->nla_len;
+
+ memset(dst + *pos, 0, payload - copy_len);
+ *pos += payload - copy_len;
+
+ return 0;
+}
+
+static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla,
+ size_t *pos, size_t size)
+{
+ int type = nla_type(nla);
+ u16 pol_len32, pol_len64;
+ int err;
+
+ if (type > XFRMA_MAX) {
+ BUILD_BUG_ON(XFRMA_MAX != XFRMA_OUTPUT_MARK);
+ return -EOPNOTSUPP;
+ }
+ if (nla_len(nla) < compat_policy[type].len) {
+ return -EOPNOTSUPP;
+ }
+
+ pol_len32 = compat_policy[type].len;
+ pol_len64 = xfrma_policy[type].len;
+
+ /* XFRMA_SA and XFRMA_POLICY - need to know how-to translate */
+ if (pol_len32 != pol_len64) {
+ if (nla_len(nla) != compat_policy[type].len) {
+ return -EOPNOTSUPP;
+ }
+ err = xfrm_attr_cpy32(dst, pos, nla, size, pol_len32, pol_len64);
+ if (err)
+ return err;
+ }
+
+ return xfrm_attr_cpy32(dst, pos, nla, size, nla_len(nla), nla_len(nla));
+}
+
+static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src,
+ struct nlattr *attrs[XFRMA_MAX+1],
+ size_t size, u8 type)
+{
+ size_t pos;
+ int i;
+
+ memcpy(dst, src, NLMSG_HDRLEN);
+ dst->nlmsg_len = NLMSG_HDRLEN + xfrm_msg_min[type];
+ memset(nlmsg_data(dst), 0, xfrm_msg_min[type]);
+
+ switch (src->nlmsg_type) {
+ /* Compat message has the same layout as native */
+ case XFRM_MSG_DELSA:
+ case XFRM_MSG_GETSA:
+ case XFRM_MSG_DELPOLICY:
+ case XFRM_MSG_GETPOLICY:
+ case XFRM_MSG_FLUSHSA:
+ case XFRM_MSG_FLUSHPOLICY:
+ case XFRM_MSG_NEWAE:
+ case XFRM_MSG_GETAE:
+ case XFRM_MSG_REPORT:
+ case XFRM_MSG_MIGRATE:
+ case XFRM_MSG_NEWSADINFO:
+ case XFRM_MSG_GETSADINFO:
+ case XFRM_MSG_NEWSPDINFO:
+ case XFRM_MSG_GETSPDINFO:
+ case XFRM_MSG_MAPPING:
+ memcpy(nlmsg_data(dst), nlmsg_data(src), compat_msg_min[type]);
+ break;
+ /* 4 byte alignment for trailing u64 on native, but not on compat */
+ case XFRM_MSG_NEWSA:
+ case XFRM_MSG_NEWPOLICY:
+ case XFRM_MSG_UPDSA:
+ case XFRM_MSG_UPDPOLICY:
+ memcpy(nlmsg_data(dst), nlmsg_data(src), compat_msg_min[type]);
+ break;
+ case XFRM_MSG_EXPIRE: {
+ const struct compat_xfrm_user_expire *src_ue = nlmsg_data(src);
+ struct xfrm_user_expire *dst_ue = nlmsg_data(dst);
+
+ /* compat_xfrm_user_expire has 4-byte smaller state */
+ memcpy(dst_ue, src_ue, sizeof(src_ue->state));
+ dst_ue->hard = src_ue->hard;
+ break;
+ }
+ case XFRM_MSG_ACQUIRE: {
+ const struct compat_xfrm_user_acquire *src_ua = nlmsg_data(src);
+ struct xfrm_user_acquire *dst_ua = nlmsg_data(dst);
+
+ memcpy(dst_ua, src_ua, offsetof(struct compat_xfrm_user_acquire, aalgos));
+ dst_ua->aalgos = src_ua->aalgos;
+ dst_ua->ealgos = src_ua->ealgos;
+ dst_ua->calgos = src_ua->calgos;
+ dst_ua->seq = src_ua->seq;
+ break;
+ }
+ case XFRM_MSG_POLEXPIRE: {
+ const struct compat_xfrm_user_polexpire *src_upe = nlmsg_data(src);
+ struct xfrm_user_polexpire *dst_upe = nlmsg_data(dst);
+
+ /* compat_xfrm_user_polexpire has 4-byte smaller state */
+ memcpy(dst_upe, src_upe, sizeof(src_upe->pol));
+ dst_upe->hard = src_upe->hard;
+ break;
+ }
+ case XFRM_MSG_ALLOCSPI: {
+ const struct compat_xfrm_userspi_info *src_usi = nlmsg_data(src);
+ struct xfrm_userspi_info *dst_usi = nlmsg_data(dst);
+
+ /* compat_xfrm_user_polexpire has 4-byte smaller state */
+ memcpy(dst_usi, src_usi, sizeof(src_usi->info));
+ dst_usi->min = src_usi->min;
+ dst_usi->max = src_usi->max;
+ break;
+ }
+ default:
+ return -EOPNOTSUPP;
+ }
+ pos = dst->nlmsg_len;
+
+ for (i = 1; i < XFRMA_MAX + 1; i++) {
+ int err;
+
+ if (i == XFRMA_PAD || i == XFRMA_OFFLOAD_DEV)
+ continue;
+
+ if (!attrs[i])
+ continue;
+
+ err = xfrm_xlate32_attr(dst, attrs[i], &pos, size);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32,
+ int maxtype, const struct nla_policy *policy)
+{
+ /* netlink_rcv_skb() checks if a message has full (struct nlmsghdr) */
+ u16 type = h32->nlmsg_type - XFRM_MSG_BASE;
+ struct nlattr *attrs[XFRMA_MAX+1];
+ struct nlmsghdr *h64;
+ size_t len;
+ int err;
+
+ BUILD_BUG_ON(ARRAY_SIZE(xfrm_msg_min) != ARRAY_SIZE(compat_msg_min));
+
+ if (type >= ARRAY_SIZE(xfrm_msg_min))
+ return ERR_PTR(-EINVAL);
+
+ /* Don't call parse: the message might have only nlmsg header */
+ if ((h32->nlmsg_type == XFRM_MSG_GETSA ||
+ h32->nlmsg_type == XFRM_MSG_GETPOLICY) &&
+ (h32->nlmsg_flags & NLM_F_DUMP))
+ return NULL;
+
+ err = nlmsg_parse(h32, compat_msg_min[type], attrs,
+ maxtype ? : XFRMA_MAX, policy ? : compat_policy);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ len = xfrm_user_rcv_calculate_len64(h32, attrs);
+ /* The message doesn't need translation */
+ if (len == nlmsg_len(h32))
+ return NULL;
+
+ len += NLMSG_HDRLEN;
+ h64 = kvmalloc(len, GFP_KERNEL | __GFP_ZERO);
+ if (!h64)
+ return ERR_PTR(-ENOMEM);
+
+ err = xfrm_xlate32(h64, h32, attrs, len, type);
+ if (err < 0) {
+ kvfree(h64);
+ return ERR_PTR(err);
+ }
+
+ return h64;
+}
+
+static int xfrm_user_policy_compat(u8 **pdata32, int optlen)
+{
+ struct compat_xfrm_userpolicy_info *p = (void *)*pdata32;
+ u8 *src_templates, *dst_templates;
+ u8 *data64;
+
+ if (optlen < sizeof(*p))
+ return -EINVAL;
+
+ data64 = kmalloc_track_caller(optlen + 4, GFP_USER | __GFP_NOWARN);
+ if (!data64)
+ return -ENOMEM;
+
+ memcpy(data64, *pdata32, sizeof(*p));
+ memset(data64 + sizeof(*p), 0, 4);
+
+ src_templates = *pdata32 + sizeof(*p);
+ dst_templates = data64 + sizeof(*p) + 4;
+ memcpy(dst_templates, src_templates, optlen - sizeof(*p));
+
+ kfree(*pdata32);
+ *pdata32 = data64;
+ return 0;
+}
+
+static struct xfrm_translator xfrm_translator = {
+ .owner = THIS_MODULE,
+ .alloc_compat = xfrm_alloc_compat,
+ .rcv_msg_compat = xfrm_user_rcv_msg_compat,
+ .xlate_user_policy_sockptr = xfrm_user_policy_compat,
+};
+
+static int __init xfrm_compat_init(void)
+{
+ return xfrm_register_translator(&xfrm_translator);
+}
+
+static void __exit xfrm_compat_exit(void)
+{
+ xfrm_unregister_translator(&xfrm_translator);
+}
+
+module_init(xfrm_compat_init);
+module_exit(xfrm_compat_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dmitry Safonov");
+MODULE_DESCRIPTION("XFRM 32-bit compatibility layer");
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 1c4ad477ce93..d613bf77cc0f 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -207,15 +207,15 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
family = XFRM_SPI_SKB_CB(skb)->family;
/* if tunnel is present override skb->mark value with tunnel i_key */
- if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4) {
- switch (family) {
- case AF_INET:
+ switch (family) {
+ case AF_INET:
+ if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4)
mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4->parms.i_key);
- break;
- case AF_INET6:
+ break;
+ case AF_INET6:
+ if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6)
mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6->parms.i_key);
- break;
- }
+ break;
}
/* Allocate new secpath or COW existing one. */
@@ -302,7 +302,7 @@ resume:
dev_put(skb->dev);
spin_lock(&x->lock);
- if (nexthdr <= 0) {
+ if (nexthdr < 0) {
if (nexthdr == -EBADMSG) {
xfrm_audit_state_icvfail(x, skb,
x->type->proto);
@@ -315,7 +315,7 @@ resume:
/* only the first xfrm gets the encap type */
encap_type = 0;
- if (async && x->repl->recheck(x, skb, seq)) {
+ if (x->repl->recheck(x, skb, seq)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
goto drop_unlock;
}
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 16e828f2540f..6986da7e4ce8 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -240,7 +240,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu)
if (skb->protocol == htons(ETH_P_IP))
proto = AF_INET;
- else if (skb->protocol == htons(ETH_P_IPV6))
+ else if (skb->protocol == htons(ETH_P_IPV6) &&
+ skb->sk->sk_family == AF_INET6)
proto = AF_INET6;
else
return;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index e098ca928538..2c34636218c0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -330,7 +330,9 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
static void xfrm_policy_kill(struct xfrm_policy *policy)
{
+ write_lock_bh(&policy->lock);
policy->walk.dead = 1;
+ write_unlock_bh(&policy->lock);
atomic_inc(&policy->genid);
@@ -738,12 +740,7 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
struct xfrm_policy *pol)
{
- u32 mark = policy->mark.v & policy->mark.m;
-
- if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m)
- return true;
-
- if ((mark & pol->mark.m) == pol->mark.v &&
+ if (policy->mark.v == pol->mark.v &&
policy->priority == pol->priority)
return true;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 7d50a371574c..27ececc3fa37 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -332,6 +332,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
{
tasklet_hrtimer_cancel(&x->mtimer);
del_timer_sync(&x->rtimer);
+ kfree(x->aead);
kfree(x->aalg);
kfree(x->ealg);
kfree(x->calg);
@@ -741,8 +742,10 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
*/
if (x->km.state == XFRM_STATE_VALID) {
if ((x->sel.family &&
- !xfrm_selector_match(&x->sel, fl, x->sel.family)) ||
- !security_xfrm_state_pol_flow_match(x, pol, fl))
+ (x->sel.family != family ||
+ !xfrm_selector_match(&x->sel, fl, family))) ||
+ !security_xfrm_state_pol_flow_match(x, pol,
+ &fl->u.__fl_common))
return;
if (!*best ||
@@ -754,8 +757,11 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
*acq_in_progress = 1;
} else if (x->km.state == XFRM_STATE_ERROR ||
x->km.state == XFRM_STATE_EXPIRED) {
- if (xfrm_selector_match(&x->sel, fl, x->sel.family) &&
- security_xfrm_state_pol_flow_match(x, pol, fl))
+ if ((!x->sel.family ||
+ (x->sel.family == family &&
+ xfrm_selector_match(&x->sel, fl, family))) &&
+ security_xfrm_state_pol_flow_match(x, pol,
+ &fl->u.__fl_common))
*error = -ESRCH;
}
}
@@ -790,7 +796,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
tmpl->mode == x->props.mode &&
tmpl->id.proto == x->id.proto &&
(tmpl->id.spi == x->id.spi || !tmpl->id.spi))
- xfrm_state_look_at(pol, x, fl, encap_family,
+ xfrm_state_look_at(pol, x, fl, family,
&best, &acquire_in_progress, &error);
}
if (best || acquire_in_progress)
@@ -806,7 +812,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
tmpl->mode == x->props.mode &&
tmpl->id.proto == x->id.proto &&
(tmpl->id.spi == x->id.spi || !tmpl->id.spi))
- xfrm_state_look_at(pol, x, fl, encap_family,
+ xfrm_state_look_at(pol, x, fl, family,
&best, &acquire_in_progress, &error);
}
@@ -1206,7 +1212,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig)
x->tfcpad = orig->tfcpad;
x->replay_maxdiff = orig->replay_maxdiff;
x->replay_maxage = orig->replay_maxage;
- x->curlft.add_time = orig->curlft.add_time;
+ memcpy(&x->curlft, &orig->curlft, sizeof(x->curlft));
x->km.state = orig->km.state;
x->km.seq = orig->km.seq;
x->replay = orig->replay;
@@ -1559,6 +1565,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
int err = -ENOENT;
__be32 minspi = htonl(low);
__be32 maxspi = htonl(high);
+ __be32 newspi = 0;
u32 mark = x->mark.v & x->mark.m;
spin_lock_bh(&x->lock);
@@ -1577,21 +1584,22 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
xfrm_state_put(x0);
goto unlock;
}
- x->id.spi = minspi;
+ newspi = minspi;
} else {
u32 spi = 0;
for (h = 0; h < high-low+1; h++) {
spi = low + prandom_u32()%(high-low+1);
x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
if (x0 == NULL) {
- x->id.spi = htonl(spi);
+ newspi = htonl(spi);
break;
}
xfrm_state_put(x0);
}
}
- if (x->id.spi) {
+ if (newspi) {
spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ x->id.spi = newspi;
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
@@ -1850,6 +1858,66 @@ bool km_is_alive(const struct km_event *c)
}
EXPORT_SYMBOL(km_is_alive);
+#if IS_ENABLED(CONFIG_XFRM_USER_COMPAT)
+static DEFINE_SPINLOCK(xfrm_translator_lock);
+static struct xfrm_translator __rcu *xfrm_translator;
+
+struct xfrm_translator *xfrm_get_translator(void)
+{
+ struct xfrm_translator *xtr;
+
+ rcu_read_lock();
+ xtr = rcu_dereference(xfrm_translator);
+ if (unlikely(!xtr))
+ goto out;
+ if (!try_module_get(xtr->owner))
+ xtr = NULL;
+out:
+ rcu_read_unlock();
+ return xtr;
+}
+EXPORT_SYMBOL_GPL(xfrm_get_translator);
+
+void xfrm_put_translator(struct xfrm_translator *xtr)
+{
+ module_put(xtr->owner);
+}
+EXPORT_SYMBOL_GPL(xfrm_put_translator);
+
+int xfrm_register_translator(struct xfrm_translator *xtr)
+{
+ int err = 0;
+
+ spin_lock_bh(&xfrm_translator_lock);
+ if (unlikely(xfrm_translator != NULL))
+ err = -EEXIST;
+ else
+ rcu_assign_pointer(xfrm_translator, xtr);
+ spin_unlock_bh(&xfrm_translator_lock);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(xfrm_register_translator);
+
+int xfrm_unregister_translator(struct xfrm_translator *xtr)
+{
+ int err = 0;
+
+ spin_lock_bh(&xfrm_translator_lock);
+ if (likely(xfrm_translator != NULL)) {
+ if (rcu_access_pointer(xfrm_translator) != xtr)
+ err = -EINVAL;
+ else
+ RCU_INIT_POINTER(xfrm_translator, NULL);
+ }
+ spin_unlock_bh(&xfrm_translator_lock);
+ synchronize_rcu();
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(xfrm_unregister_translator);
+#endif
+
int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
{
int err;
@@ -1875,6 +1943,23 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
if (copy_from_user(data, optval, optlen))
goto out;
+ /* Use the 64-bit / untranslated format on Android, even for compat */
+ if (!IS_ENABLED(CONFIG_ANDROID) || IS_ENABLED(CONFIG_XFRM_USER_COMPAT)) {
+ if (is_compat_task()) {
+ struct xfrm_translator *xtr = xfrm_get_translator();
+
+ if (!xtr)
+ return -EOPNOTSUPP;
+
+ err = xtr->xlate_user_policy_sockptr(&data, optlen);
+ xfrm_put_translator(xtr);
+ if (err) {
+ kfree(data);
+ return err;
+ }
+ }
+ }
+
err = -EINVAL;
rcu_read_lock();
list_for_each_entry_rcu(km, &xfrm_km_list, list) {
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 69126af39c79..3fd866867ce9 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -109,7 +109,8 @@ static inline int verify_sec_ctx_len(struct nlattr **attrs)
return 0;
uctx = nla_data(rt);
- if (uctx->len != (sizeof(struct xfrm_user_sec_ctx) + uctx->ctx_len))
+ if (uctx->len > nla_len(rt) ||
+ uctx->len != (sizeof(struct xfrm_user_sec_ctx) + uctx->ctx_len))
return -EINVAL;
return 0;
@@ -565,6 +566,20 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
copy_from_user_state(x, p);
+ if (attrs[XFRMA_ENCAP]) {
+ x->encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]),
+ sizeof(*x->encap), GFP_KERNEL);
+ if (x->encap == NULL)
+ goto error;
+ }
+
+ if (attrs[XFRMA_COADDR]) {
+ x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]),
+ sizeof(*x->coaddr), GFP_KERNEL);
+ if (x->coaddr == NULL)
+ goto error;
+ }
+
if (attrs[XFRMA_SA_EXTRA_FLAGS])
x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]);
@@ -585,23 +600,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
attrs[XFRMA_ALG_COMP])))
goto error;
- if (attrs[XFRMA_ENCAP]) {
- x->encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]),
- sizeof(*x->encap), GFP_KERNEL);
- if (x->encap == NULL)
- goto error;
- }
-
if (attrs[XFRMA_TFCPAD])
x->tfcpad = nla_get_u32(attrs[XFRMA_TFCPAD]);
- if (attrs[XFRMA_COADDR]) {
- x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]),
- sizeof(*x->coaddr), GFP_KERNEL);
- if (x->coaddr == NULL)
- goto error;
- }
-
xfrm_mark_get(attrs, &x->mark);
if (attrs[XFRMA_OUTPUT_MARK])
@@ -611,9 +612,12 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
if (err)
goto error;
- if (attrs[XFRMA_SEC_CTX] &&
- security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX])))
- goto error;
+ if (attrs[XFRMA_SEC_CTX]) {
+ err = security_xfrm_state_alloc(x,
+ nla_data(attrs[XFRMA_SEC_CTX]));
+ if (err)
+ goto error;
+ }
if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn,
attrs[XFRMA_REPLAY_ESN_VAL])))
@@ -904,6 +908,7 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
struct xfrm_dump_info *sp = ptr;
struct sk_buff *in_skb = sp->in_skb;
struct sk_buff *skb = sp->out_skb;
+ struct xfrm_translator *xtr;
struct xfrm_usersa_info *p;
struct nlmsghdr *nlh;
int err;
@@ -921,6 +926,18 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
return err;
}
nlmsg_end(skb, nlh);
+
+ xtr = xfrm_get_translator();
+ if (xtr) {
+ err = xtr->alloc_compat(skb, nlh);
+
+ xfrm_put_translator(xtr);
+ if (err) {
+ nlmsg_cancel(skb, nlh);
+ return err;
+ }
+ }
+
return 0;
}
@@ -930,11 +947,11 @@ static int xfrm_dump_sa_done(struct netlink_callback *cb)
struct sock *sk = cb->skb->sk;
struct net *net = sock_net(sk);
- xfrm_state_walk_done(walk, net);
+ if (cb->args[0])
+ xfrm_state_walk_done(walk, net);
return 0;
}
-static const struct nla_policy xfrma_policy[XFRMA_MAX+1];
static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
@@ -955,8 +972,6 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
u8 proto = 0;
int err;
- cb->args[0] = 1;
-
err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX,
xfrma_policy);
if (err < 0)
@@ -973,6 +988,7 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
proto = nla_get_u8(attrs[XFRMA_PROTO]);
xfrm_state_walk_init(walk, proto, filter);
+ cb->args[0] = 1;
}
(void) xfrm_state_walk(net, walk, dump_one_state, &info);
@@ -1012,12 +1028,24 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb,
u32 pid, unsigned int group)
{
struct sock *nlsk = rcu_dereference(net->xfrm.nlsk);
+ struct xfrm_translator *xtr;
if (!nlsk) {
kfree_skb(skb);
return -EPIPE;
}
+ xtr = xfrm_get_translator();
+ if (xtr) {
+ int err = xtr->alloc_compat(skb, nlmsg_hdr(skb));
+
+ xfrm_put_translator(xtr);
+ if (err) {
+ kfree_skb(skb);
+ return err;
+ }
+ }
+
return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC);
}
@@ -1235,6 +1263,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
struct xfrm_state *x;
struct xfrm_userspi_info *p;
+ struct xfrm_translator *xtr;
struct sk_buff *resp_skb;
xfrm_address_t *daddr;
int family;
@@ -1280,6 +1309,17 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
}
+ xtr = xfrm_get_translator();
+ if (xtr) {
+ err = xtr->alloc_compat(skb, nlmsg_hdr(skb));
+
+ xfrm_put_translator(xtr);
+ if (err) {
+ kfree_skb(resp_skb);
+ goto out;
+ }
+ }
+
err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid);
out:
@@ -1683,6 +1723,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
struct xfrm_userpolicy_info *p;
struct sk_buff *in_skb = sp->in_skb;
struct sk_buff *skb = sp->out_skb;
+ struct xfrm_translator *xtr;
struct nlmsghdr *nlh;
int err;
@@ -1705,6 +1746,18 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
return err;
}
nlmsg_end(skb, nlh);
+
+ xtr = xfrm_get_translator();
+ if (xtr) {
+ err = xtr->alloc_compat(skb, nlh);
+
+ xfrm_put_translator(xtr);
+ if (err) {
+ nlmsg_cancel(skb, nlh);
+ return err;
+ }
+ }
+
return 0;
}
@@ -2186,6 +2239,9 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
err = verify_newpolicy_info(&ua->policy);
if (err)
+ goto free_state;
+ err = verify_sec_ctx_len(attrs);
+ if (err)
goto bad_policy;
/* build an XP */
@@ -2426,7 +2482,7 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
#define XMSGSIZE(type) sizeof(struct type)
-static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
+const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
[XFRM_MSG_NEWSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info),
[XFRM_MSG_DELSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id),
[XFRM_MSG_GETSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id),
@@ -2449,10 +2505,11 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
[XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32),
[XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32),
};
+EXPORT_SYMBOL_GPL(xfrm_msg_min);
#undef XMSGSIZE
-static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
+const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_SA] = { .len = sizeof(struct xfrm_usersa_info)},
[XFRMA_POLICY] = { .len = sizeof(struct xfrm_userpolicy_info)},
[XFRMA_LASTUSED] = { .type = NLA_U64},
@@ -2481,6 +2538,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) },
[XFRMA_OUTPUT_MARK] = { .len = NLA_U32 },
};
+EXPORT_SYMBOL_GPL(xfrma_policy);
static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
[XFRMA_SPD_IPV4_HTHRESH] = { .len = sizeof(struct xfrmu_spdhthresh) },
@@ -2529,6 +2587,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
struct net *net = sock_net(skb->sk);
struct nlattr *attrs[XFRMA_MAX+1];
const struct xfrm_link *link;
+ struct nlmsghdr *nlh64 = NULL;
int type, err;
type = nlh->nlmsg_type;
@@ -2542,32 +2601,58 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!netlink_net_capable(skb, CAP_NET_ADMIN))
return -EPERM;
+ /* Use the 64-bit / untranslated format on Android, even for compat */
+ if (!IS_ENABLED(CONFIG_ANDROID) || IS_ENABLED(CONFIG_XFRM_USER_COMPAT)) {
+ if (is_compat_task()) {
+ struct xfrm_translator *xtr = xfrm_get_translator();
+
+ if (!xtr)
+ return -EOPNOTSUPP;
+
+ nlh64 = xtr->rcv_msg_compat(nlh, link->nla_max,
+ link->nla_pol);
+ xfrm_put_translator(xtr);
+ if (IS_ERR(nlh64))
+ return PTR_ERR(nlh64);
+ if (nlh64)
+ nlh = nlh64;
+ }
+ }
+
if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) ||
type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) &&
(nlh->nlmsg_flags & NLM_F_DUMP)) {
- if (link->dump == NULL)
- return -EINVAL;
+ struct netlink_dump_control c = {
+ .start = link->start,
+ .dump = link->dump,
+ .done = link->done,
+ };
- {
- struct netlink_dump_control c = {
- .start = link->start,
- .dump = link->dump,
- .done = link->done,
- };
- return netlink_dump_start(net->xfrm.nlsk, skb, nlh, &c);
+ if (link->dump == NULL) {
+ err = -EINVAL;
+ goto err;
}
+
+ err = netlink_dump_start(net->xfrm.nlsk, skb, nlh, &c);
+ goto err;
}
err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs,
link->nla_max ? : XFRMA_MAX,
link->nla_pol ? : xfrma_policy);
if (err < 0)
- return err;
+ goto err;
- if (link->doit == NULL)
- return -EINVAL;
+ if (link->doit == NULL) {
+ err = -EINVAL;
+ goto err;
+ }
- return link->doit(skb, nlh, attrs);
+ err = link->doit(skb, nlh, attrs);
+
+err:
+ kvfree(nlh64);
+ return err;
}
static void xfrm_netlink_rcv(struct sk_buff *skb)