diff options
Diffstat (limited to 'net/ipv6')
50 files changed, 1698 insertions, 2405 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 36d7437ac054..5728695b5449 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -69,7 +69,7 @@ config IPV6_OPTIMISTIC_DAD config INET6_AH tristate "IPv6: AH transformation" - select XFRM + select XFRM_ALGO select CRYPTO select CRYPTO_HMAC select CRYPTO_MD5 @@ -81,7 +81,7 @@ config INET6_AH config INET6_ESP tristate "IPv6: ESP transformation" - select XFRM + select XFRM_ALGO select CRYPTO select CRYPTO_AUTHENC select CRYPTO_HMAC diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6a3bb6077e19..79181819a24f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -38,6 +38,8 @@ * status etc. */ +#define pr_fmt(fmt) "IPv6: " fmt + #include <linux/errno.h> #include <linux/types.h> #include <linux/kernel.h> @@ -61,11 +63,13 @@ #include <linux/delay.h> #include <linux/notifier.h> #include <linux/string.h> +#include <linux/hash.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/snmp.h> +#include <net/af_ieee802154.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/ndisc.h> @@ -149,7 +153,7 @@ static void addrconf_type_change(struct net_device *dev, unsigned long event); static int addrconf_ifdown(struct net_device *dev, int how); -static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags); +static void addrconf_dad_start(struct inet6_ifaddr *ifp); static void addrconf_dad_timer(unsigned long data); static void addrconf_dad_completed(struct inet6_ifaddr *ifp); static void addrconf_dad_run(struct inet6_dev *idev); @@ -326,20 +330,19 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) WARN_ON(idev->mc_list != NULL); #ifdef NET_REFCNT_DEBUG - printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL"); + pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL"); #endif dev_put(dev); if (!idev->dead) { - pr_warning("Freeing alive inet6 device %p\n", idev); + pr_warn("Freeing alive inet6 device %p\n", idev); return; } snmp6_free_dev(idev); kfree_rcu(idev, rcu); } - EXPORT_SYMBOL(in6_dev_finish_destroy); -static struct inet6_dev * ipv6_add_dev(struct net_device *dev) +static struct inet6_dev *ipv6_add_dev(struct net_device *dev) { struct inet6_dev *ndev; @@ -372,7 +375,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) if (snmp6_alloc_dev(ndev) < 0) { ADBG((KERN_WARNING - "%s(): cannot allocate memory for statistics; dev=%s.\n", + "%s: cannot allocate memory for statistics; dev=%s.\n", __func__, dev->name)); neigh_parms_release(&nd_tbl, ndev->nd_parms); dev_put(dev); @@ -382,7 +385,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) if (snmp6_register_dev(ndev) < 0) { ADBG((KERN_WARNING - "%s(): cannot create /proc/net/dev_snmp6/%s\n", + "%s: cannot create /proc/net/dev_snmp6/%s\n", __func__, dev->name)); neigh_parms_release(&nd_tbl, ndev->nd_parms); ndev->dead = 1; @@ -400,9 +403,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) { - printk(KERN_INFO - "%s: Disabled Multicast RS\n", - dev->name); + pr_info("%s: Disabled Multicast RS\n", dev->name); ndev->cnf.rtr_solicits = 0; } #endif @@ -441,7 +442,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) return ndev; } -static struct inet6_dev * ipv6_find_idev(struct net_device *dev) +static struct inet6_dev *ipv6_find_idev(struct net_device *dev) { struct inet6_dev *idev; @@ -542,7 +543,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) WARN_ON(!hlist_unhashed(&ifp->addr_lst)); #ifdef NET_REFCNT_DEBUG - printk(KERN_DEBUG "inet6_ifa_finish_destroy\n"); + pr_debug("%s\n", __func__); #endif in6_dev_put(ifp->idev); @@ -551,7 +552,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) pr_notice("Timer is still running, when freeing ifa=%p\n", ifp); if (ifp->state != INET6_IFADDR_STATE_DEAD) { - pr_warning("Freeing alive inet6 address %p\n", ifp); + pr_warn("Freeing alive inet6 address %p\n", ifp); return; } dst_release(&ifp->rt->dst); @@ -579,15 +580,9 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) list_add_tail(&ifp->if_list, p); } -static u32 ipv6_addr_hash(const struct in6_addr *addr) +static u32 inet6_addr_hash(const struct in6_addr *addr) { - /* - * We perform the hash function over the last 64 bits of the address - * This will include the IEEE address token on links that support it. - */ - return jhash_2words((__force u32)addr->s6_addr32[2], - (__force u32)addr->s6_addr32[3], 0) - & (IN6_ADDR_HSIZE - 1); + return hash_32(ipv6_addr_hash(addr), IN6_ADDR_HSIZE_SHIFT); } /* On success it returns ifp with increased reference count */ @@ -662,7 +657,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, in6_ifa_hold(ifa); /* Add to big hash table */ - hash = ipv6_addr_hash(addr); + hash = inet6_addr_hash(addr); hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]); spin_unlock(&addrconf_hash_lock); @@ -803,8 +798,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) ip6_del_rt(rt); rt = NULL; } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { - rt->dst.expires = expires; - rt->rt6i_flags |= RTF_EXPIRES; + rt6_set_expires(rt, expires); } } dst_release(&rt->dst); @@ -842,8 +836,7 @@ retry: in6_dev_hold(idev); if (idev->cnf.use_tempaddr <= 0) { write_unlock(&idev->lock); - printk(KERN_INFO - "ipv6_create_tempaddr(): use_tempaddr is disabled.\n"); + pr_info("%s: use_tempaddr is disabled\n", __func__); in6_dev_put(idev); ret = -1; goto out; @@ -853,8 +846,8 @@ retry: idev->cnf.use_tempaddr = -1; /*XXX*/ spin_unlock_bh(&ifp->lock); write_unlock(&idev->lock); - printk(KERN_WARNING - "ipv6_create_tempaddr(): regeneration time exceeded. disabled temporary address support.\n"); + pr_warn("%s: regeneration time exceeded - disabled temporary address support\n", + __func__); in6_dev_put(idev); ret = -1; goto out; @@ -864,8 +857,8 @@ retry: if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) { spin_unlock_bh(&ifp->lock); write_unlock(&idev->lock); - printk(KERN_WARNING - "ipv6_create_tempaddr(): regeneration of randomized interface id failed.\n"); + pr_warn("%s: regeneration of randomized interface id failed\n", + __func__); in6_ifa_put(ifp); in6_dev_put(idev); ret = -1; @@ -915,8 +908,7 @@ retry: if (!ift || IS_ERR(ift)) { in6_ifa_put(ifp); in6_dev_put(idev); - printk(KERN_INFO - "ipv6_create_tempaddr(): retry temporary address regeneration.\n"); + pr_info("%s: retry temporary address regeneration\n", __func__); tmpaddr = &addr; write_lock(&idev->lock); goto retry; @@ -930,7 +922,7 @@ retry: ift->tstamp = tmp_tstamp; spin_unlock_bh(&ift->lock); - addrconf_dad_start(ift, 0); + addrconf_dad_start(ift); in6_ifa_put(ift); in6_dev_put(idev); out: @@ -1273,7 +1265,7 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, { struct inet6_ifaddr *ifp; struct hlist_node *node; - unsigned int hash = ipv6_addr_hash(addr); + unsigned int hash = inet6_addr_hash(addr); rcu_read_lock_bh(); hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) { @@ -1296,7 +1288,7 @@ EXPORT_SYMBOL(ipv6_chk_addr); static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, struct net_device *dev) { - unsigned int hash = ipv6_addr_hash(addr); + unsigned int hash = inet6_addr_hash(addr); struct inet6_ifaddr *ifp; struct hlist_node *node; @@ -1333,14 +1325,13 @@ int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) rcu_read_unlock(); return onlink; } - EXPORT_SYMBOL(ipv6_chk_prefix); struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr, struct net_device *dev, int strict) { struct inet6_ifaddr *ifp, *result = NULL; - unsigned int hash = ipv6_addr_hash(addr); + unsigned int hash = inet6_addr_hash(addr); struct hlist_node *node; rcu_read_lock_bh(); @@ -1417,9 +1408,8 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) return; } - if (net_ratelimit()) - printk(KERN_INFO "%s: IPv6 duplicate address %pI6c detected!\n", - ifp->idev->dev->name, &ifp->addr); + net_info_ratelimited("%s: IPv6 duplicate address %pI6c detected!\n", + ifp->idev->dev->name, &ifp->addr); if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) { struct in6_addr addr; @@ -1432,7 +1422,7 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) /* DAD failed for link-local based on MAC address */ idev->cnf.disable_ipv6 = 1; - printk(KERN_INFO "%s: IPv6 being disabled!\n", + pr_info("%s: IPv6 being disabled!\n", ifp->idev->dev->name); } } @@ -1517,13 +1507,21 @@ static int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) return 0; } +static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev) +{ + if (dev->addr_len != IEEE802154_ADDR_LEN) + return -1; + memcpy(eui, dev->dev_addr, 8); + return 0; +} + static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev) { /* XXX: inherit EUI-64 from other interface -- yoshfuji */ if (dev->addr_len != ARCNET_ALEN) return -1; memset(eui, 0, 7); - eui[7] = *(u8*)dev->dev_addr; + eui[7] = *(u8 *)dev->dev_addr; return 0; } @@ -1570,7 +1568,6 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) switch (dev->type) { case ARPHRD_ETHER: case ARPHRD_FDDI: - case ARPHRD_IEEE802_TR: return addrconf_ifid_eui48(eui, dev); case ARPHRD_ARCNET: return addrconf_ifid_arcnet(eui, dev); @@ -1580,6 +1577,8 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) return addrconf_ifid_sit(eui, dev); case ARPHRD_IPGRE: return addrconf_ifid_gre(eui, dev); + case ARPHRD_IEEE802154: + return addrconf_ifid_eui64(eui, dev); } return -1; } @@ -1653,9 +1652,8 @@ static void ipv6_regen_rndid(unsigned long data) idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - idev->cnf.max_desync_factor * HZ; if (time_before(expires, jiffies)) { - printk(KERN_WARNING - "ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n", - idev->dev->name); + pr_warn("%s: too short regeneration interval; timer disabled for %s\n", + __func__, idev->dev->name); goto out; } @@ -1668,7 +1666,8 @@ out: in6_dev_put(idev); } -static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) { +static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) +{ int ret = 0; if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) @@ -1838,16 +1837,15 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) prefered_lft = ntohl(pinfo->prefered); if (prefered_lft > valid_lft) { - if (net_ratelimit()) - printk(KERN_WARNING "addrconf: prefix option has invalid lifetime\n"); + net_warn_ratelimited("addrconf: prefix option has invalid lifetime\n"); return; } in6_dev = in6_dev_get(dev); if (in6_dev == NULL) { - if (net_ratelimit()) - printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name); + net_dbg_ratelimited("addrconf: device %s not configured\n", + dev->name); return; } @@ -1887,11 +1885,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) rt = NULL; } else if (addrconf_finite_timeout(rt_expires)) { /* not infinity */ - rt->dst.expires = jiffies + rt_expires; - rt->rt6i_flags |= RTF_EXPIRES; + rt6_set_expires(rt, jiffies + rt_expires); } else { - rt->rt6i_flags &= ~RTF_EXPIRES; - rt->dst.expires = 0; + rt6_clean_expires(rt); } } else if (valid_lft) { clock_t expires = 0; @@ -1911,7 +1907,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) /* Try to figure out our local address for this prefix */ if (pinfo->autoconf && in6_dev->cnf.autoconf) { - struct inet6_ifaddr * ifp; + struct inet6_ifaddr *ifp; struct in6_addr addr; int create = 0, update_lft = 0; @@ -1924,9 +1920,8 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) } goto ok; } - if (net_ratelimit()) - printk(KERN_DEBUG "IPv6 addrconf: prefix with wrong length %d\n", - pinfo->prefix_len); + net_dbg_ratelimited("IPv6 addrconf: prefix with wrong length %d\n", + pinfo->prefix_len); in6_dev_put(in6_dev); return; @@ -1960,7 +1955,7 @@ ok: update_lft = create = 1; ifp->cstamp = jiffies; - addrconf_dad_start(ifp, RTF_ADDRCONF|RTF_PREFIX_RT); + addrconf_dad_start(ifp); } if (ifp) { @@ -2239,7 +2234,7 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p * that the Optimistic flag should not be set for * manually configured addresses */ - addrconf_dad_start(ifp, 0); + addrconf_dad_start(ifp); in6_ifa_put(ifp); addrconf_verify(0); return 0; @@ -2365,9 +2360,9 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) } for_each_netdev(net, dev) { - struct in_device * in_dev = __in_dev_get_rtnl(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); if (in_dev && (dev->flags & IFF_UP)) { - struct in_ifaddr * ifa; + struct in_ifaddr *ifa; int flag = scope; @@ -2404,7 +2399,7 @@ static void init_loopback(struct net_device *dev) ASSERT_RTNL(); if ((idev = ipv6_find_idev(dev)) == NULL) { - printk(KERN_DEBUG "init loopback: add_dev failed\n"); + pr_debug("%s: add_dev failed\n", __func__); return; } @@ -2413,7 +2408,7 @@ static void init_loopback(struct net_device *dev) static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr) { - struct inet6_ifaddr * ifp; + struct inet6_ifaddr *ifp; u32 addr_flags = IFA_F_PERMANENT; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD @@ -2426,7 +2421,7 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags); if (!IS_ERR(ifp)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); - addrconf_dad_start(ifp, 0); + addrconf_dad_start(ifp); in6_ifa_put(ifp); } } @@ -2434,15 +2429,15 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr static void addrconf_dev_config(struct net_device *dev) { struct in6_addr addr; - struct inet6_dev * idev; + struct inet6_dev *idev; ASSERT_RTNL(); if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_FDDI) && - (dev->type != ARPHRD_IEEE802_TR) && (dev->type != ARPHRD_ARCNET) && - (dev->type != ARPHRD_INFINIBAND)) { + (dev->type != ARPHRD_INFINIBAND) && + (dev->type != ARPHRD_IEEE802154)) { /* Alas, we support only Ethernet autoconfiguration. */ return; } @@ -2472,7 +2467,7 @@ static void addrconf_sit_config(struct net_device *dev) */ if ((idev = ipv6_find_idev(dev)) == NULL) { - printk(KERN_DEBUG "init sit: add_dev failed\n"); + pr_debug("%s: add_dev failed\n", __func__); return; } @@ -2502,12 +2497,12 @@ static void addrconf_gre_config(struct net_device *dev) struct inet6_dev *idev; struct in6_addr addr; - pr_info("ipv6: addrconf_gre_config(%s)\n", dev->name); + pr_info("%s(%s)\n", __func__, dev->name); ASSERT_RTNL(); if ((idev = ipv6_find_idev(dev)) == NULL) { - printk(KERN_DEBUG "init gre: add_dev failed\n"); + pr_debug("%s: add_dev failed\n", __func__); return; } @@ -2547,7 +2542,7 @@ static void ip6_tnl_add_linklocal(struct inet6_dev *idev) if (!ipv6_inherit_linklocal(idev, link_dev)) return; } - printk(KERN_DEBUG "init ip6-ip6: add_linklocal failed\n"); + pr_debug("init ip6-ip6: add_linklocal failed\n"); } /* @@ -2563,14 +2558,14 @@ static void addrconf_ip6_tnl_config(struct net_device *dev) idev = addrconf_add_dev(dev); if (IS_ERR(idev)) { - printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n"); + pr_debug("init ip6-ip6: add_dev failed\n"); return; } ip6_tnl_add_linklocal(idev); } static int addrconf_notify(struct notifier_block *this, unsigned long event, - void * data) + void *data) { struct net_device *dev = (struct net_device *) data; struct inet6_dev *idev = __in6_dev_get(dev); @@ -2594,9 +2589,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (event == NETDEV_UP) { if (!addrconf_qdisc_ok(dev)) { /* device is not ready yet. */ - printk(KERN_INFO - "ADDRCONF(NETDEV_UP): %s: " - "link is not ready\n", + pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n", dev->name); break; } @@ -2621,10 +2614,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, idev->if_flags |= IF_READY; } - printk(KERN_INFO - "ADDRCONF(NETDEV_CHANGE): %s: " - "link becomes ready\n", - dev->name); + pr_info("ADDRCONF(NETDEV_CHANGE): %s: link becomes ready\n", + dev->name); run_pending = 1; } @@ -2895,8 +2886,7 @@ static void addrconf_rs_timer(unsigned long data) * Note: we do not support deprecated "all on-link" * assumption any longer. */ - printk(KERN_DEBUG "%s: no IPv6 routers present\n", - idev->dev->name); + pr_debug("%s: no IPv6 routers present\n", idev->dev->name); } out: @@ -2921,7 +2911,7 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp) addrconf_mod_timer(ifp, AC_DAD, rand_num); } -static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) +static void addrconf_dad_start(struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net_device *dev = idev->dev; @@ -3228,7 +3218,7 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) int ret = 0; struct inet6_ifaddr *ifp = NULL; struct hlist_node *n; - unsigned int hash = ipv6_addr_hash(addr); + unsigned int hash = inet6_addr_hash(addr); rcu_read_lock_bh(); hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) { @@ -3794,7 +3784,7 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) return inet6_dump_addr(skb, cb, type); } -static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, +static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(in_skb->sk); @@ -3989,14 +3979,14 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) struct nlattr *nla; struct ifla_cacheinfo ci; - NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags); - + if (nla_put_u32(skb, IFLA_INET6_FLAGS, idev->if_flags)) + goto nla_put_failure; ci.max_reasm_len = IPV6_MAXPLEN; ci.tstamp = cstamp_delta(idev->tstamp); ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time); ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time); - NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci); - + if (nla_put(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci)) + goto nla_put_failure; nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); if (nla == NULL) goto nla_put_failure; @@ -4061,15 +4051,13 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, hdr->ifi_flags = dev_get_flags(dev); hdr->ifi_change = 0; - NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); - - if (dev->addr_len) - NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); - - NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); - if (dev->ifindex != dev->iflink) - NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); - + if (nla_put_string(skb, IFLA_IFNAME, dev->name) || + (dev->addr_len && + nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || + nla_put_u32(skb, IFLA_MTU, dev->mtu) || + (dev->ifindex != dev->iflink && + nla_put_u32(skb, IFLA_LINK, dev->iflink))) + goto nla_put_failure; protoinfo = nla_nest_start(skb, IFLA_PROTINFO); if (protoinfo == NULL) goto nla_put_failure; @@ -4182,12 +4170,12 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, if (pinfo->autoconf) pmsg->prefix_flags |= IF_PREFIX_AUTOCONF; - NLA_PUT(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix); - + if (nla_put(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix)) + goto nla_put_failure; ci.preferred_time = ntohl(pinfo->prefered); ci.valid_time = ntohl(pinfo->valid); - NLA_PUT(skb, PREFIX_CACHEINFO, sizeof(ci), &ci); - + if (nla_put(skb, PREFIX_CACHEINFO, sizeof(ci), &ci)) + goto nla_put_failure; return nlmsg_end(skb, nlh); nla_put_failure: @@ -4371,7 +4359,6 @@ static struct addrconf_sysctl_table { struct ctl_table_header *sysctl_header; ctl_table addrconf_vars[DEVCONF_MAX+1]; - char *dev_name; } addrconf_sysctl __read_mostly = { .sysctl_header = NULL, .addrconf_vars = { @@ -4600,17 +4587,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, { int i; struct addrconf_sysctl_table *t; - -#define ADDRCONF_CTL_PATH_DEV 3 - - struct ctl_path addrconf_ctl_path[] = { - { .procname = "net", }, - { .procname = "ipv6", }, - { .procname = "conf", }, - { /* to be set */ }, - { }, - }; - + char path[sizeof("net/ipv6/conf/") + IFNAMSIZ]; t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); if (t == NULL) @@ -4622,27 +4599,15 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, t->addrconf_vars[i].extra2 = net; } - /* - * Make a copy of dev_name, because '.procname' is regarded as const - * by sysctl and we wouldn't want anyone to change it under our feet - * (see SIOCSIFNAME). - */ - t->dev_name = kstrdup(dev_name, GFP_KERNEL); - if (!t->dev_name) - goto free; + snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name); - addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].procname = t->dev_name; - - t->sysctl_header = register_net_sysctl_table(net, addrconf_ctl_path, - t->addrconf_vars); + t->sysctl_header = register_net_sysctl(net, path, t->addrconf_vars); if (t->sysctl_header == NULL) - goto free_procname; + goto free; p->sysctl = t; return 0; -free_procname: - kfree(t->dev_name); free: kfree(t); out: @@ -4659,7 +4624,6 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) t = p->sysctl; p->sysctl = NULL; unregister_net_sysctl_table(t->sysctl_header); - kfree(t->dev_name); kfree(t); } @@ -4778,8 +4742,8 @@ int __init addrconf_init(void) err = ipv6_addr_label_init(); if (err < 0) { - printk(KERN_CRIT "IPv6 Addrconf:" - " cannot initialize default policy table: %d.\n", err); + pr_crit("%s: cannot initialize default policy table: %d\n", + __func__, err); goto out; } diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 399287e595d7..d051e5f4bf34 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -8,9 +8,9 @@ #define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16) -static inline unsigned ipv6_addr_scope2type(unsigned scope) +static inline unsigned int ipv6_addr_scope2type(unsigned int scope) { - switch(scope) { + switch (scope) { case IPV6_ADDR_SCOPE_NODELOCAL: return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_NODELOCAL) | IPV6_ADDR_LOOPBACK); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 2d8ddba9ee58..eb6a63632d3c 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -129,7 +129,7 @@ static void ip6addrlbl_free_rcu(struct rcu_head *h) ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu)); } -static inline int ip6addrlbl_hold(struct ip6addrlbl_entry *p) +static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p) { return atomic_inc_not_zero(&p->refcnt); } @@ -141,20 +141,20 @@ static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) } /* Find label */ -static int __ip6addrlbl_match(struct net *net, - struct ip6addrlbl_entry *p, - const struct in6_addr *addr, - int addrtype, int ifindex) +static bool __ip6addrlbl_match(struct net *net, + const struct ip6addrlbl_entry *p, + const struct in6_addr *addr, + int addrtype, int ifindex) { if (!net_eq(ip6addrlbl_net(p), net)) - return 0; + return false; if (p->ifindex && p->ifindex != ifindex) - return 0; + return false; if (p->addrtype && p->addrtype != addrtype) - return 0; + return false; if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen)) - return 0; - return 1; + return false; + return true; } static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, @@ -350,7 +350,7 @@ static int __net_init ip6addrlbl_net_init(struct net *net) int err = 0; int i; - ADDRLABEL(KERN_DEBUG "%s()\n", __func__); + ADDRLABEL(KERN_DEBUG "%s\n", __func__); for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { int ret = ip6addrlbl_add(net, @@ -456,8 +456,8 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } -static inline void ip6addrlbl_putmsg(struct nlmsghdr *nlh, - int prefixlen, int ifindex, u32 lseq) +static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, + int prefixlen, int ifindex, u32 lseq) { struct ifaddrlblmsg *ifal = nlmsg_data(nlh); ifal->ifal_family = AF_INET6; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 8ed1b930e75f..e22e6d88bac6 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -18,6 +18,7 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) "IPv6: " fmt #include <linux/module.h> #include <linux/capability.h> @@ -77,7 +78,7 @@ struct ipv6_params ipv6_defaults = { .autoconf = 1, }; -static int disable_ipv6_mod = 0; +static int disable_ipv6_mod; module_param_named(disable, disable_ipv6_mod, int, 0444); MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional"); @@ -180,7 +181,7 @@ lookup_protocol: err = 0; sk->sk_no_check = answer_no_check; if (INET_PROTOSW_REUSE & answer_flags) - sk->sk_reuse = 1; + sk->sk_reuse = SK_CAN_REUSE; inet = inet_sk(sk); inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; @@ -256,7 +257,7 @@ out_rcu_unlock: /* bind for INET6 API */ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { - struct sockaddr_in6 *addr=(struct sockaddr_in6 *)uaddr; + struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr; struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -390,7 +391,6 @@ out_unlock: rcu_read_unlock(); goto out; } - EXPORT_SYMBOL(inet6_bind); int inet6_release(struct socket *sock) @@ -408,7 +408,6 @@ int inet6_release(struct socket *sock) return inet_release(sock); } - EXPORT_SYMBOL(inet6_release); void inet6_destroy_sock(struct sock *sk) @@ -419,10 +418,12 @@ void inet6_destroy_sock(struct sock *sk) /* Release rx options */ - if ((skb = xchg(&np->pktoptions, NULL)) != NULL) + skb = xchg(&np->pktoptions, NULL); + if (skb != NULL) kfree_skb(skb); - if ((skb = xchg(&np->rxpmtu, NULL)) != NULL) + skb = xchg(&np->rxpmtu, NULL); + if (skb != NULL) kfree_skb(skb); /* Free flowlabels */ @@ -430,10 +431,10 @@ void inet6_destroy_sock(struct sock *sk) /* Free tx options */ - if ((opt = xchg(&np->opt, NULL)) != NULL) + opt = xchg(&np->opt, NULL); + if (opt != NULL) sock_kfree_s(sk, opt, opt->tot_len); } - EXPORT_SYMBOL_GPL(inet6_destroy_sock); /* @@ -443,7 +444,7 @@ EXPORT_SYMBOL_GPL(inet6_destroy_sock); int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { - struct sockaddr_in6 *sin=(struct sockaddr_in6 *)uaddr; + struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr; struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -474,7 +475,6 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, *uaddr_len = sizeof(*sin); return 0; } - EXPORT_SYMBOL(inet6_getname); int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) @@ -482,8 +482,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct sock *sk = sock->sk; struct net *net = sock_net(sk); - switch(cmd) - { + switch (cmd) { case SIOCGSTAMP: return sock_get_timestamp(sk, (struct timeval __user *)arg); @@ -509,7 +508,6 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) /*NOTREACHED*/ return 0; } - EXPORT_SYMBOL(inet6_ioctl); const struct proto_ops inet6_stream_ops = { @@ -615,25 +613,21 @@ out: return ret; out_permanent: - printk(KERN_ERR "Attempt to override permanent protocol %d.\n", - protocol); + pr_err("Attempt to override permanent protocol %d\n", protocol); goto out; out_illegal: - printk(KERN_ERR - "Ignoring attempt to register invalid socket type %d.\n", + pr_err("Ignoring attempt to register invalid socket type %d\n", p->type); goto out; } - EXPORT_SYMBOL(inet6_register_protosw); void inet6_unregister_protosw(struct inet_protosw *p) { if (INET_PROTOSW_PERMANENT & p->flags) { - printk(KERN_ERR - "Attempt to unregister permanent protocol %d.\n", + pr_err("Attempt to unregister permanent protocol %d\n", p->protocol); } else { spin_lock_bh(&inetsw6_lock); @@ -643,7 +637,6 @@ inet6_unregister_protosw(struct inet_protosw *p) synchronize_net(); } } - EXPORT_SYMBOL(inet6_unregister_protosw); int inet6_sk_rebuild_header(struct sock *sk) @@ -683,13 +676,12 @@ int inet6_sk_rebuild_header(struct sock *sk) return 0; } - EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header); -int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) +bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb) { - struct ipv6_pinfo *np = inet6_sk(sk); - struct inet6_skb_parm *opt = IP6CB(skb); + const struct ipv6_pinfo *np = inet6_sk(sk); + const struct inet6_skb_parm *opt = IP6CB(skb); if (np->rxopt.all) { if ((opt->hop && (np->rxopt.bits.hopopts || @@ -701,11 +693,10 @@ int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) np->rxopt.bits.osrcrt)) || ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts))) - return 1; + return true; } - return 0; + return false; } - EXPORT_SYMBOL_GPL(ipv6_opt_accepted); static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto) @@ -1070,13 +1061,11 @@ static int __init inet6_init(void) BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)); /* Register the socket-side information for inet6_create. */ - for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) + for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) INIT_LIST_HEAD(r); if (disable_ipv6_mod) { - printk(KERN_INFO - "IPv6: Loaded, but administratively disabled, " - "reboot required to enable\n"); + pr_info("Loaded, but administratively disabled, reboot required to enable\n"); goto out; } @@ -1111,11 +1100,6 @@ static int __init inet6_init(void) if (err) goto out_sock_register_fail; -#ifdef CONFIG_SYSCTL - err = ipv6_static_sysctl_register(); - if (err) - goto static_sysctl_fail; -#endif tcpv6_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem; /* @@ -1242,10 +1226,6 @@ ipmr_fail: icmp_fail: unregister_pernet_subsys(&inet6_net_ops); register_pernet_fail: -#ifdef CONFIG_SYSCTL - ipv6_static_sysctl_unregister(); -static_sysctl_fail: -#endif sock_unregister(PF_INET6); rtnl_unregister_all(PF_INET6); out_sock_register_fail: @@ -1272,9 +1252,6 @@ static void __exit inet6_exit(void) /* Disallow any further netlink messages */ rtnl_unregister_all(PF_INET6); -#ifdef CONFIG_SYSCTL - ipv6_sysctl_unregister(); -#endif udpv6_exit(); udplitev6_exit(); tcpv6_exit(); @@ -1302,9 +1279,6 @@ static void __exit inet6_exit(void) rawv6_exit(); unregister_pernet_subsys(&inet6_net_ops); -#ifdef CONFIG_SYSCTL - ipv6_static_sysctl_unregister(); -#endif proto_unregister(&rawv6_prot); proto_unregister(&udplitev6_prot); proto_unregister(&udpv6_prot); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 2ae79dbeec2f..7e6139508ee7 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -24,6 +24,8 @@ * This file is derived from net/ipv4/ah.c. */ +#define pr_fmt(fmt) "IPv6: " fmt + #include <crypto/hash.h> #include <linux/module.h> #include <linux/slab.h> @@ -33,6 +35,7 @@ #include <linux/pfkeyv2.h> #include <linux/string.h> #include <linux/scatterlist.h> +#include <net/ip6_route.h> #include <net/icmp.h> #include <net/ipv6.h> #include <net/protocol.h> @@ -111,7 +114,7 @@ static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash, __alignof__(struct scatterlist)); } -static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr) +static bool zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr) { u8 *opt = (u8 *)opthdr; int len = ipv6_optlen(opthdr); @@ -125,7 +128,7 @@ static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr) switch (opt[off]) { - case IPV6_TLV_PAD0: + case IPV6_TLV_PAD1: optlen = 1; break; default: @@ -143,10 +146,10 @@ static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr) len -= optlen; } if (len == 0) - return 1; + return true; bad: - return 0; + return false; } #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) @@ -169,7 +172,7 @@ static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *des switch (opt[off]) { - case IPV6_TLV_PAD0: + case IPV6_TLV_PAD1: optlen = 1; break; default: @@ -189,8 +192,8 @@ static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *des hao = (struct ipv6_destopt_hao *)&opt[off]; if (hao->length != sizeof(hao->addr)) { - if (net_ratelimit()) - printk(KERN_WARNING "destopt hao: invalid header length: %u\n", hao->length); + net_warn_ratelimited("destopt hao: invalid header length: %u\n", + hao->length); goto bad; } final_addr = hao->addr; @@ -610,16 +613,18 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct xfrm_state *x; if (type != ICMPV6_DEST_UNREACH && - type != ICMPV6_PKT_TOOBIG) + type != ICMPV6_PKT_TOOBIG && + type != NDISC_REDIRECT) return; x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); if (!x) return; - NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n", - ntohl(ah->spi), &iph->daddr); - + if (type == NDISC_REDIRECT) + ip6_redirect(skb, net, 0, 0); + else + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } @@ -659,9 +664,9 @@ static int ah6_init_state(struct xfrm_state *x) if (aalg_desc->uinfo.auth.icv_fullbits/8 != crypto_ahash_digestsize(ahash)) { - printk(KERN_INFO "AH: %s digestsize %u != %hu\n", - x->aalg->alg_name, crypto_ahash_digestsize(ahash), - aalg_desc->uinfo.auth.icv_fullbits/8); + pr_info("AH: %s digestsize %u != %hu\n", + x->aalg->alg_name, crypto_ahash_digestsize(ahash), + aalg_desc->uinfo.auth.icv_fullbits/8); goto error; } @@ -727,12 +732,12 @@ static const struct inet6_protocol ah6_protocol = { static int __init ah6_init(void) { if (xfrm_register_type(&ah6_type, AF_INET6) < 0) { - printk(KERN_INFO "ipv6 ah init: can't add xfrm type\n"); + pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) { - printk(KERN_INFO "ipv6 ah init: can't add protocol\n"); + pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&ah6_type, AF_INET6); return -EAGAIN; } @@ -743,10 +748,10 @@ static int __init ah6_init(void) static void __exit ah6_fini(void) { if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0) - printk(KERN_INFO "ipv6 ah close: can't remove protocol\n"); + pr_info("%s: can't remove protocol\n", __func__); if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0) - printk(KERN_INFO "ipv6 ah close: can't remove xfrm type\n"); + pr_info("%s: can't remove xfrm type\n", __func__); } diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index db00d27ffb16..cdf02be5f191 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -342,7 +342,7 @@ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr) * check if the interface has this anycast address * called with rcu_read_lock() */ -static int ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *addr) +static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *addr) { struct inet6_dev *idev; struct ifacaddr6 *aca; @@ -356,16 +356,16 @@ static int ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *add read_unlock_bh(&idev->lock); return aca != NULL; } - return 0; + return false; } /* * check if given interface (or any, if dev==0) has this anycast address */ -int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, - const struct in6_addr *addr) +bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, + const struct in6_addr *addr) { - int found = 0; + bool found = false; rcu_read_lock(); if (dev) @@ -373,7 +373,7 @@ int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, else for_each_netdev_rcu(net, dev) if (ipv6_chk_acast_dev(dev, addr)) { - found = 1; + found = true; break; } rcu_read_unlock(); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 76832c8dc89d..be2b67d631e5 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -22,6 +22,7 @@ #include <linux/ipv6.h> #include <linux/route.h> #include <linux/slab.h> +#include <linux/export.h> #include <net/ipv6.h> #include <net/ndisc.h> @@ -33,9 +34,9 @@ #include <linux/errqueue.h> #include <asm/uaccess.h> -static inline int ipv6_mapped_addr_any(const struct in6_addr *a) +static bool ipv6_mapped_addr_any(const struct in6_addr *a) { - return (ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0)); + return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0); } int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) @@ -98,7 +99,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sin.sin_port = usin->sin6_port; err = ip4_datagram_connect(sk, - (struct sockaddr*) &sin, + (struct sockaddr *) &sin, sizeof(sin)); ipv4_connected: @@ -202,6 +203,7 @@ out: fl6_sock_release(flowlabel); return err; } +EXPORT_SYMBOL_GPL(ip6_datagram_connect); void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload) @@ -414,6 +416,7 @@ out_free_skb: out: return err; } +EXPORT_SYMBOL_GPL(ipv6_recv_error); /* * Handle IPV6_RECVPATHMTU @@ -515,10 +518,10 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) u8 nexthdr = ipv6_hdr(skb)->nexthdr; while (off <= opt->lastopt) { - unsigned len; + unsigned int len; u8 *ptr = nh + off; - switch(nexthdr) { + switch (nexthdr) { case IPPROTO_DSTOPTS: nexthdr = ptr[0]; len = (ptr[1] + 1) << 3; @@ -827,9 +830,8 @@ int datagram_send_ctl(struct net *net, struct sock *sk, int tc; err = -EINVAL; - if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) { + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) goto exit_f; - } tc = *(int *)CMSG_DATA(cmsg); if (tc < -1 || tc > 0xff) @@ -846,9 +848,8 @@ int datagram_send_ctl(struct net *net, struct sock *sk, int df; err = -EINVAL; - if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) { + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) goto exit_f; - } df = *(int *)CMSG_DATA(cmsg); if (df < 0 || df > 1) @@ -870,3 +871,4 @@ int datagram_send_ctl(struct net *net, struct sock *sk, exit_f: return err; } +EXPORT_SYMBOL_GPL(datagram_send_ctl); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 1ac7938dd9ec..6dc7fd353ef5 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -24,6 +24,8 @@ * This file is derived from net/ipv4/esp.c */ +#define pr_fmt(fmt) "IPv6: " fmt + #include <crypto/aead.h> #include <crypto/authenc.h> #include <linux/err.h> @@ -37,6 +39,7 @@ #include <linux/random.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <net/ip6_route.h> #include <net/icmp.h> #include <net/ipv6.h> #include <net/protocol.h> @@ -411,19 +414,15 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) struct esp_data *esp = x->data; u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4); u32 align = max_t(u32, blksize, esp->padlen); - u32 rem; - - mtu -= x->props.header_len + crypto_aead_authsize(esp->aead); - rem = mtu & (align - 1); - mtu &= ~(align - 1); + unsigned int net_adj; - if (x->props.mode != XFRM_MODE_TUNNEL) { - u32 padsize = ((blksize - 1) & 7) + 1; - mtu -= blksize - padsize; - mtu += min_t(u32, blksize - padsize, rem); - } + if (x->props.mode != XFRM_MODE_TUNNEL) + net_adj = sizeof(struct ipv6hdr); + else + net_adj = 0; - return mtu - 2; + return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - + net_adj) & ~(align - 1)) + (net_adj - 2); } static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, @@ -435,15 +434,19 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct xfrm_state *x; if (type != ICMPV6_DEST_UNREACH && - type != ICMPV6_PKT_TOOBIG) + type != ICMPV6_PKT_TOOBIG && + type != NDISC_REDIRECT) return; x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n", - ntohl(esph->spi), &iph->daddr); + + if (type == NDISC_REDIRECT) + ip6_redirect(skb, net, 0, 0); + else + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } @@ -651,11 +654,11 @@ static const struct inet6_protocol esp6_protocol = { static int __init esp6_init(void) { if (xfrm_register_type(&esp6_type, AF_INET6) < 0) { - printk(KERN_INFO "ipv6 esp init: can't add xfrm type\n"); + pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) { - printk(KERN_INFO "ipv6 esp init: can't add protocol\n"); + pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&esp6_type, AF_INET6); return -EAGAIN; } @@ -666,9 +669,9 @@ static int __init esp6_init(void) static void __exit esp6_fini(void) { if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0) - printk(KERN_INFO "ipv6 esp close: can't remove protocol\n"); + pr_info("%s: can't remove protocol\n", __func__); if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0) - printk(KERN_INFO "ipv6 esp close: can't remove xfrm type\n"); + pr_info("%s: can't remove xfrm type\n", __func__); } module_init(esp6_init); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 3d641b6e9b09..fa3d9c328092 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -75,7 +75,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) return offset; switch (opttype) { - case IPV6_TLV_PAD0: + case IPV6_TLV_PAD1: optlen = 1; break; default: @@ -96,14 +96,14 @@ EXPORT_SYMBOL_GPL(ipv6_find_tlv); /* * Parsing tlv encoded headers. * - * Parsing function "func" returns 1, if parsing succeed - * and 0, if it failed. + * Parsing function "func" returns true, if parsing succeed + * and false, if it failed. * It MUST NOT touch skb->h. */ struct tlvtype_proc { int type; - int (*func)(struct sk_buff *skb, int offset); + bool (*func)(struct sk_buff *skb, int offset); }; /********************* @@ -112,11 +112,11 @@ struct tlvtype_proc { /* An unknown option is detected, decide what to do */ -static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) +static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) { switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) { case 0: /* ignore */ - return 1; + return true; case 1: /* drop packet */ break; @@ -129,21 +129,22 @@ static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) break; case 2: /* send ICMP PARM PROB regardless and drop packet */ icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff); - return 0; + return false; } kfree_skb(skb); - return 0; + return false; } /* Parse tlv encoded option header (hop-by-hop or destination) */ -static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb) +static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb) { - struct tlvtype_proc *curr; + const struct tlvtype_proc *curr; const unsigned char *nh = skb_network_header(skb); int off = skb_network_header_len(skb); int len = (skb_transport_header(skb)[1] + 1) << 3; + int padlen = 0; if (skb_transport_offset(skb) + len > skb_headlen(skb)) goto bad; @@ -153,13 +154,33 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb) while (len > 0) { int optlen = nh[off + 1] + 2; + int i; switch (nh[off]) { - case IPV6_TLV_PAD0: + case IPV6_TLV_PAD1: optlen = 1; + padlen++; + if (padlen > 7) + goto bad; break; case IPV6_TLV_PADN: + /* RFC 2460 states that the purpose of PadN is + * to align the containing header to multiples + * of 8. 7 is therefore the highest valid value. + * See also RFC 4942, Section 2.1.9.5. + */ + padlen += optlen; + if (padlen > 7) + goto bad; + /* RFC 4942 recommends receiving hosts to + * actively check PadN payload to contain + * only zeroes. + */ + for (i = 2; i < optlen; i++) { + if (nh[off + i] != 0) + goto bad; + } break; default: /* Other TLV code so scan list */ @@ -170,25 +191,33 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb) /* type specific length/alignment checks will be performed in the func(). */ - if (curr->func(skb, off) == 0) - return 0; + if (curr->func(skb, off) == false) + return false; break; } } if (curr->type < 0) { if (ip6_tlvopt_unknown(skb, off) == 0) - return 0; + return false; } + padlen = 0; break; } off += optlen; len -= optlen; } + /* This case will not be caught by above check since its padding + * length is smaller than 7: + * 1 byte NH + 1 byte Length + 6 bytes Padding + */ + if ((padlen == 6) && ((off - skb_network_header_len(skb)) == 8)) + goto bad; + if (len == 0) - return 1; + return true; bad: kfree_skb(skb); - return 0; + return false; } /***************************** @@ -196,7 +225,7 @@ bad: *****************************/ #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) -static int ipv6_dest_hao(struct sk_buff *skb, int optoff) +static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) { struct ipv6_destopt_hao *hao; struct inet6_skb_parm *opt = IP6CB(skb); @@ -250,15 +279,15 @@ static int ipv6_dest_hao(struct sk_buff *skb, int optoff) if (skb->tstamp.tv64 == 0) __net_timestamp(skb); - return 1; + return true; discard: kfree_skb(skb); - return 0; + return false; } #endif -static struct tlvtype_proc tlvprocdestopt_lst[] = { +static const struct tlvtype_proc tlvprocdestopt_lst[] = { #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) { .type = IPV6_TLV_HAO, @@ -563,23 +592,23 @@ static inline struct net *ipv6_skb_net(struct sk_buff *skb) /* Router Alert as of RFC 2711 */ -static int ipv6_hop_ra(struct sk_buff *skb, int optoff) +static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) { const unsigned char *nh = skb_network_header(skb); if (nh[optoff + 1] == 2) { IP6CB(skb)->ra = optoff; - return 1; + return true; } LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", nh[optoff + 1]); kfree_skb(skb); - return 0; + return false; } /* Jumbo payload */ -static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) +static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) { const unsigned char *nh = skb_network_header(skb); struct net *net = ipv6_skb_net(skb); @@ -598,13 +627,13 @@ static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2); - return 0; + return false; } if (ipv6_hdr(skb)->payload_len) { IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff); - return 0; + return false; } if (pkt_len > skb->len - sizeof(struct ipv6hdr)) { @@ -616,14 +645,14 @@ static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) goto drop; - return 1; + return true; drop: kfree_skb(skb); - return 0; + return false; } -static struct tlvtype_proc tlvprochopopt_lst[] = { +static const struct tlvtype_proc tlvprochopopt_lst[] = { { .type = IPV6_TLV_ROUTERALERT, .func = ipv6_hop_ra, @@ -722,7 +751,6 @@ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, if (opt->hopopt) ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt); } - EXPORT_SYMBOL(ipv6_push_nfrag_opts); void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto) @@ -738,20 +766,19 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) opt2 = sock_kmalloc(sk, opt->tot_len, GFP_ATOMIC); if (opt2) { - long dif = (char*)opt2 - (char*)opt; + long dif = (char *)opt2 - (char *)opt; memcpy(opt2, opt, opt->tot_len); if (opt2->hopopt) - *((char**)&opt2->hopopt) += dif; + *((char **)&opt2->hopopt) += dif; if (opt2->dst0opt) - *((char**)&opt2->dst0opt) += dif; + *((char **)&opt2->dst0opt) += dif; if (opt2->dst1opt) - *((char**)&opt2->dst1opt) += dif; + *((char **)&opt2->dst1opt) += dif; if (opt2->srcrt) - *((char**)&opt2->srcrt) += dif; + *((char **)&opt2->srcrt) += dif; } return opt2; } - EXPORT_SYMBOL_GPL(ipv6_dup_options); static int ipv6_renew_option(void *ohdr, @@ -764,14 +791,14 @@ static int ipv6_renew_option(void *ohdr, if (ohdr) { memcpy(*p, ohdr, ipv6_optlen((struct ipv6_opt_hdr *)ohdr)); *hdr = (struct ipv6_opt_hdr *)*p; - *p += CMSG_ALIGN(ipv6_optlen(*(struct ipv6_opt_hdr **)hdr)); + *p += CMSG_ALIGN(ipv6_optlen(*hdr)); } } else { if (newopt) { if (copy_from_user(*p, newopt, newoptlen)) return -EFAULT; *hdr = (struct ipv6_opt_hdr *)*p; - if (ipv6_optlen(*(struct ipv6_opt_hdr **)hdr) > newoptlen) + if (ipv6_optlen(*hdr) > newoptlen) return -EINVAL; *p += CMSG_ALIGN(newoptlen); } @@ -869,6 +896,7 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, return opt; } +EXPORT_SYMBOL_GPL(ipv6_fixup_options); /** * fl6_update_dst - update flowi destination address with info given @@ -892,5 +920,4 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6, fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr; return orig; } - EXPORT_SYMBOL_GPL(fl6_update_dst); diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 72957f4a7c6c..f73d59a14131 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -9,7 +9,7 @@ * find out if nexthdr is a well-known extension header or a protocol */ -int ipv6_ext_hdr(u8 nexthdr) +bool ipv6_ext_hdr(u8 nexthdr) { /* * find out if nexthdr is an extension header or a protocol @@ -21,6 +21,7 @@ int ipv6_ext_hdr(u8 nexthdr) (nexthdr == NEXTHDR_NONE) || (nexthdr == NEXTHDR_DEST); } +EXPORT_SYMBOL(ipv6_ext_hdr); /* * Skip any extension headers. This is used by the ICMP module. @@ -109,6 +110,4 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, *nexthdrp = nexthdr; return start; } - -EXPORT_SYMBOL(ipv6_ext_hdr); EXPORT_SYMBOL(ipv6_skip_exthdr); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index b6c573152067..0ff1cfd55bc4 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -22,8 +22,7 @@ #include <net/ip6_route.h> #include <net/netlink.h> -struct fib6_rule -{ +struct fib6_rule { struct fib_rule common; struct rt6key src; struct rt6key dst; @@ -215,14 +214,13 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, frh->src_len = rule6->src.plen; frh->tos = rule6->tclass; - if (rule6->dst.plen) - NLA_PUT(skb, FRA_DST, sizeof(struct in6_addr), - &rule6->dst.addr); - - if (rule6->src.plen) - NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr), - &rule6->src.addr); - + if ((rule6->dst.plen && + nla_put(skb, FRA_DST, sizeof(struct in6_addr), + &rule6->dst.addr)) || + (rule6->src.plen && + nla_put(skb, FRA_SRC, sizeof(struct in6_addr), + &rule6->src.addr))) + goto nla_put_failure; return 0; nla_put_failure: diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 27ac95a63429..24d69dbca4d6 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -29,6 +29,8 @@ * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data */ +#define pr_fmt(fmt) "IPv6: " fmt + #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> @@ -129,7 +131,7 @@ void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) * --ANK (980726) */ -static int is_ineligible(struct sk_buff *skb) +static bool is_ineligible(const struct sk_buff *skb) { int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; int len = skb->len - ptr; @@ -137,11 +139,11 @@ static int is_ineligible(struct sk_buff *skb) __be16 frag_off; if (len < 0) - return 1; + return true; ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off); if (ptr < 0) - return 0; + return false; if (nexthdr == IPPROTO_ICMPV6) { u8 _type, *tp; tp = skb_header_pointer(skb, @@ -149,9 +151,9 @@ static int is_ineligible(struct sk_buff *skb) sizeof(_type), &_type); if (tp == NULL || !(*tp & ICMPV6_INFOMSG_MASK)) - return 1; + return true; } - return 0; + return false; } /* @@ -186,14 +188,16 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type, } else { struct rt6_info *rt = (struct rt6_info *)dst; int tmo = net->ipv6.sysctl.icmpv6_time; + struct inet_peer *peer; /* Give more bandwidth to wider prefixes. */ if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); - if (!rt->rt6i_peer) - rt6_bind_peer(rt, 1); - res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo); + peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); + res = inet_peer_xrlim_allow(peer, tmo); + if (peer) + inet_putpeer(peer); } dst_release(dst); return res; @@ -206,14 +210,14 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type, * highest-order two bits set to 10 */ -static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset) +static bool opt_unrec(struct sk_buff *skb, __u32 offset) { u8 _optval, *op; offset += skb_network_offset(skb); op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval); if (op == NULL) - return 1; + return true; return (*op & 0xC0) == 0x80; } @@ -498,7 +502,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) err = ip6_append_data(sk, icmpv6_getfrag, &msg, len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), hlimit, - np->tclass, NULL, &fl6, (struct rt6_info*)dst, + np->tclass, NULL, &fl6, (struct rt6_info *)dst, MSG_DONTWAIT, np->dontfrag); if (err) { ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); @@ -579,7 +583,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6, - (struct rt6_info*)dst, MSG_DONTWAIT, + (struct rt6_info *)dst, MSG_DONTWAIT, np->dontfrag); if (err) { @@ -594,13 +598,12 @@ out: icmpv6_xmit_unlock(sk); } -static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) +void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) { const struct inet6_protocol *ipprot; int inner_offset; - int hash; - u8 nexthdr; __be16 frag_off; + u8 nexthdr; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) return; @@ -627,10 +630,8 @@ static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) --ANK (980726) */ - hash = nexthdr & (MAX_INET_PROTOS - 1); - rcu_read_lock(); - ipprot = rcu_dereference(inet6_protos[hash]); + ipprot = rcu_dereference(inet6_protos[nexthdr]); if (ipprot && ipprot->err_handler) ipprot->err_handler(skb, NULL, type, code, inner_offset, info); rcu_read_unlock(); @@ -647,7 +648,6 @@ static int icmpv6_rcv(struct sk_buff *skb) struct net_device *dev = skb->dev; struct inet6_dev *idev = __in6_dev_get(dev); const struct in6_addr *saddr, *daddr; - const struct ipv6hdr *orig_hdr; struct icmp6hdr *hdr; u8 type; @@ -659,7 +659,7 @@ static int icmpv6_rcv(struct sk_buff *skb) XFRM_STATE_ICMP)) goto drop_no_count; - if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr))) + if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr))) goto drop_no_count; nh = skb_network_offset(skb); @@ -720,9 +720,6 @@ static int icmpv6_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto discard_it; hdr = icmp6_hdr(skb); - orig_hdr = (struct ipv6hdr *) (hdr + 1); - rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev, - ntohl(hdr->icmp6_mtu)); /* * Drop through to notify @@ -820,9 +817,7 @@ static int __net_init icmpv6_sk_init(struct net *net) err = inet_ctl_sock_create(&sk, PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, net); if (err < 0) { - printk(KERN_ERR - "Failed to initialize the ICMP6 control socket " - "(err %d).\n", + pr_err("Failed to initialize the ICMP6 control socket (err %d)\n", err); goto fail; } @@ -881,7 +876,7 @@ int __init icmpv6_init(void) return 0; fail: - printk(KERN_ERR "Failed to register ICMP6 protocol\n"); + pr_err("Failed to register ICMP6 protocol\n"); unregister_pernet_subsys(&icmpv6_sk_ops); return err; } @@ -950,7 +945,6 @@ int icmpv6_err_convert(u8 type, u8 code, int *err) return fatal; } - EXPORT_SYMBOL(icmpv6_err_convert); #ifdef CONFIG_SYSCTL diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 02dd203d9eac..0251a6005be8 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -28,7 +28,7 @@ #include <net/inet6_connection_sock.h> int inet6_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb) + const struct inet_bind_bucket *tb, bool relax) { const struct sock *sk2; const struct hlist_node *node; @@ -55,26 +55,26 @@ int inet6_csk_bind_conflict(const struct sock *sk, EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); struct dst_entry *inet6_csk_route_req(struct sock *sk, + struct flowi6 *fl6, const struct request_sock *req) { struct inet6_request_sock *treq = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *final_p, final; struct dst_entry *dst; - struct flowi6 fl6; - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_TCP; - fl6.daddr = treq->rmt_addr; - final_p = fl6_update_dst(&fl6, np->opt, &final); - fl6.saddr = treq->loc_addr; - fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; - fl6.fl6_dport = inet_rsk(req)->rmt_port; - fl6.fl6_sport = inet_rsk(req)->loc_port; - security_req_classify_flow(req, flowi6_to_flowi(&fl6)); - - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + memset(fl6, 0, sizeof(*fl6)); + fl6->flowi6_proto = IPPROTO_TCP; + fl6->daddr = treq->rmt_addr; + final_p = fl6_update_dst(fl6, np->opt, &final); + fl6->saddr = treq->loc_addr; + fl6->flowi6_oif = treq->iif; + fl6->flowi6_mark = sk->sk_mark; + fl6->fl6_dport = inet_rsk(req)->rmt_port; + fl6->fl6_sport = inet_rsk(req)->loc_port; + security_req_classify_flow(req, flowi6_to_flowi(fl6)); + + dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); if (IS_ERR(dst)) return NULL; @@ -171,7 +171,8 @@ EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); static inline void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst, - struct in6_addr *daddr, struct in6_addr *saddr) + const struct in6_addr *daddr, + const struct in6_addr *saddr) { __ip6_dst_store(sk, dst, daddr, saddr); @@ -203,43 +204,52 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) return dst; } -int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) +static struct dst_entry *inet6_csk_route_socket(struct sock *sk, + struct flowi6 *fl6) { - struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct flowi6 fl6; - struct dst_entry *dst; struct in6_addr *final_p, final; - int res; + struct dst_entry *dst; - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = sk->sk_protocol; - fl6.daddr = np->daddr; - fl6.saddr = np->saddr; - fl6.flowlabel = np->flow_label; - IP6_ECN_flow_xmit(sk, fl6.flowlabel); - fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; - fl6.fl6_sport = inet->inet_sport; - fl6.fl6_dport = inet->inet_dport; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + memset(fl6, 0, sizeof(*fl6)); + fl6->flowi6_proto = sk->sk_protocol; + fl6->daddr = np->daddr; + fl6->saddr = np->saddr; + fl6->flowlabel = np->flow_label; + IP6_ECN_flow_xmit(sk, fl6->flowlabel); + fl6->flowi6_oif = sk->sk_bound_dev_if; + fl6->flowi6_mark = sk->sk_mark; + fl6->fl6_sport = inet->inet_sport; + fl6->fl6_dport = inet->inet_dport; + security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + final_p = fl6_update_dst(fl6, np->opt, &final); dst = __inet6_csk_dst_check(sk, np->dst_cookie); + if (!dst) { + dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); - if (dst == NULL) { - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + if (!IS_ERR(dst)) + __inet6_csk_dst_store(sk, dst, NULL, NULL); + } + return dst; +} - if (IS_ERR(dst)) { - sk->sk_err_soft = -PTR_ERR(dst); - sk->sk_route_caps = 0; - kfree_skb(skb); - return PTR_ERR(dst); - } +int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) +{ + struct sock *sk = skb->sk; + struct ipv6_pinfo *np = inet6_sk(sk); + struct flowi6 fl6; + struct dst_entry *dst; + int res; - __inet6_csk_dst_store(sk, dst, NULL, NULL); + dst = inet6_csk_route_socket(sk, &fl6); + if (IS_ERR(dst)) { + sk->sk_err_soft = -PTR_ERR(dst); + sk->sk_route_caps = 0; + kfree_skb(skb); + return PTR_ERR(dst); } rcu_read_lock(); @@ -253,3 +263,16 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) return res; } EXPORT_SYMBOL_GPL(inet6_csk_xmit); + +struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) +{ + struct flowi6 fl6; + struct dst_entry *dst = inet6_csk_route_socket(sk, &fl6); + + if (IS_ERR(dst)) + return NULL; + dst->ops->update_pmtu(dst, sk, NULL, mtu); + + return inet6_csk_route_socket(sk, &fl6); +} +EXPORT_SYMBOL_GPL(inet6_csk_update_pmtu); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5b27fbcae346..13690d650c3e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -18,6 +18,9 @@ * routing table. * Ville Nuorvala: Fixed routing subtrees. */ + +#define pr_fmt(fmt) "IPv6: " fmt + #include <linux/errno.h> #include <linux/types.h> #include <linux/net.h> @@ -38,7 +41,7 @@ #define RT6_DEBUG 2 #if RT6_DEBUG >= 3 -#define RT6_TRACE(x...) printk(KERN_DEBUG x) +#define RT6_TRACE(x...) pr_debug(x) #else #define RT6_TRACE(x...) do { ; } while (0) #endif @@ -194,6 +197,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) table->tb6_id = id; table->tb6_root.leaf = net->ipv6.ip6_null_entry; table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; + inet_peer_base_init(&table->tb6_peers); } return table; @@ -451,12 +455,10 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) { if (!allow_create) { if (replace_required) { - pr_warn("IPv6: Can't replace route, " - "no match found\n"); + pr_warn("Can't replace route, no match found\n"); return ERR_PTR(-ENOENT); } - pr_warn("IPv6: NLM_F_CREATE should be set " - "when creating new route\n"); + pr_warn("NLM_F_CREATE should be set when creating new route\n"); } goto insert_above; } @@ -499,11 +501,10 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, * That would keep IPv6 consistent with IPv4 */ if (replace_required) { - pr_warn("IPv6: Can't replace route, no match found\n"); + pr_warn("Can't replace route, no match found\n"); return ERR_PTR(-ENOENT); } - pr_warn("IPv6: NLM_F_CREATE should be set " - "when creating new route\n"); + pr_warn("NLM_F_CREATE should be set when creating new route\n"); } /* * We walked to the bottom of tree. @@ -673,11 +674,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, &rt->rt6i_gateway)) { if (!(iter->rt6i_flags & RTF_EXPIRES)) return -EEXIST; - iter->dst.expires = rt->dst.expires; - if (!(rt->rt6i_flags & RTF_EXPIRES)) { - iter->rt6i_flags &= ~RTF_EXPIRES; - iter->dst.expires = 0; - } + if (!(rt->rt6i_flags & RTF_EXPIRES)) + rt6_clean_expires(iter); + else + rt6_set_expires(iter, rt->dst.expires); return -EEXIST; } } @@ -697,7 +697,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, */ if (!replace) { if (!add) - pr_warn("IPv6: NLM_F_CREATE should be set when creating new route\n"); + pr_warn("NLM_F_CREATE should be set when creating new route\n"); add: rt->dst.rt6_next = iter; @@ -716,7 +716,7 @@ add: if (!found) { if (add) goto add; - pr_warn("IPv6: NLM_F_REPLACE set, but no existing node found!\n"); + pr_warn("NLM_F_REPLACE set, but no existing node found!\n"); return -ENOENT; } *ins = rt; @@ -769,7 +769,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) replace_required = 1; } if (!allow_create && !replace_required) - pr_warn("IPv6: RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n"); + pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n"); fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst), @@ -1350,8 +1350,8 @@ static int fib6_walk_continue(struct fib6_walker_t *w) if (w->leaf && fn->fn_flags & RTN_RTINFO) { int err; - if (w->count < w->skip) { - w->count++; + if (w->skip) { + w->skip--; continue; } @@ -1421,7 +1421,8 @@ static int fib6_clean_node(struct fib6_walker_t *w) res = fib6_del(rt, &info); if (res) { #if RT6_DEBUG >= 2 - printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); + pr_debug("%s: del failed: rt=%p@%p err=%d\n", + __func__, rt, rt->rt6i_node, res); #endif continue; } @@ -1561,7 +1562,7 @@ static int fib6_age(struct rt6_info *rt, void *arg) neigh_flags = neigh->flags; neigh_release(neigh); } - if (neigh_flags & NTF_ROUTER) { + if (!(neigh_flags & NTF_ROUTER)) { RT6_TRACE("purging route %p via non-router but gateway\n", rt); return -1; @@ -1633,6 +1634,7 @@ static int __net_init fib6_net_init(struct net *net) net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; net->ipv6.fib6_main_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; + inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers); #ifdef CONFIG_IPV6_MULTIPLE_TABLES net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl), @@ -1643,6 +1645,7 @@ static int __net_init fib6_net_init(struct net *net) net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; net->ipv6.fib6_local_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; + inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers); #endif fib6_tables_init(net); @@ -1666,8 +1669,10 @@ static void fib6_net_exit(struct net *net) del_timer_sync(&net->ipv6.ip6_fib_timer); #ifdef CONFIG_IPV6_MULTIPLE_TABLES + inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers); kfree(net->ipv6.fib6_local_tbl); #endif + inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers); kfree(net->ipv6.fib6_main_tbl); kfree(net->ipv6.fib_table_hash); kfree(net->ipv6.rt6_stats); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index b7867a1215b1..9772fbd8a3f5 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -294,6 +294,7 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space, opt_space->opt_flen = fopt->opt_flen; return opt_space; } +EXPORT_SYMBOL_GPL(fl6_merge_options); static unsigned long check_linger(unsigned long ttl) { @@ -432,32 +433,32 @@ static int mem_check(struct sock *sk) return 0; } -static int ipv6_hdr_cmp(struct ipv6_opt_hdr *h1, struct ipv6_opt_hdr *h2) +static bool ipv6_hdr_cmp(struct ipv6_opt_hdr *h1, struct ipv6_opt_hdr *h2) { if (h1 == h2) - return 0; + return false; if (h1 == NULL || h2 == NULL) - return 1; + return true; if (h1->hdrlen != h2->hdrlen) - return 1; + return true; return memcmp(h1+1, h2+1, ((h1->hdrlen+1)<<3) - sizeof(*h1)); } -static int ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2) +static bool ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2) { if (o1 == o2) - return 0; + return false; if (o1 == NULL || o2 == NULL) - return 1; + return true; if (o1->opt_nflen != o2->opt_nflen) - return 1; + return true; if (ipv6_hdr_cmp(o1->hopopt, o2->hopopt)) - return 1; + return true; if (ipv6_hdr_cmp(o1->dst0opt, o2->dst0opt)) - return 1; + return true; if (ipv6_hdr_cmp((struct ipv6_opt_hdr *)o1->srcrt, (struct ipv6_opt_hdr *)o2->srcrt)) - return 1; - return 0; + return true; + return false; } static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, @@ -705,9 +706,9 @@ static int ip6fl_seq_show(struct seq_file *seq, void *v) struct ip6_flowlabel *fl = v; seq_printf(seq, "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n", - (unsigned)ntohl(fl->label), + (unsigned int)ntohl(fl->label), fl->share, - (unsigned)fl->owner, + (int)fl->owner, atomic_read(&fl->users), fl->linger/HZ, (long)(fl->expires - jiffies)/HZ, diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 1ca5d45a12e8..5ab923e51af3 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -168,12 +168,12 @@ drop: static int ip6_input_finish(struct sk_buff *skb) { + struct net *net = dev_net(skb_dst(skb)->dev); const struct inet6_protocol *ipprot; - unsigned int nhoff; - int nexthdr, raw; - u8 hash; struct inet6_dev *idev; - struct net *net = dev_net(skb_dst(skb)->dev); + unsigned int nhoff; + int nexthdr; + bool raw; /* * Parse extension headers @@ -188,9 +188,7 @@ resubmit: nexthdr = skb_network_header(skb)[nhoff]; raw = raw6_local_deliver(skb, nexthdr); - - hash = nexthdr & (MAX_INET_PROTOS - 1); - if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) { + if ((ipprot = rcu_dereference(inet6_protos[nexthdr])) != NULL) { int ret; if (ipprot->flags & INET6_PROTO_FINAL) { @@ -251,7 +249,7 @@ int ip6_input(struct sk_buff *skb) int ip6_mc_input(struct sk_buff *skb) { const struct ipv6hdr *hdr; - int deliver; + bool deliver; IP6_UPD_PO_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST, @@ -287,7 +285,7 @@ int ip6_mc_input(struct sk_buff *skb) * is for MLD (0x0000). */ if ((ptr[2] | ptr[3]) == 0) { - deliver = 0; + deliver = false; if (!ipv6_ext_hdr(nexthdr)) { /* BUG */ @@ -312,7 +310,7 @@ int ip6_mc_input(struct sk_buff *skb) case ICMPV6_MGM_REPORT: case ICMPV6_MGM_REDUCTION: case ICMPV6_MLD2_REPORT: - deliver = 1; + deliver = true; break; } goto out; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b7ca46161cb9..5b2d63ed793e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -83,24 +83,12 @@ int ip6_local_out(struct sk_buff *skb) } EXPORT_SYMBOL_GPL(ip6_local_out); -/* dev_loopback_xmit for use with netfilter. */ -static int ip6_dev_loopback_xmit(struct sk_buff *newskb) -{ - skb_reset_mac_header(newskb); - __skb_pull(newskb, skb_network_offset(newskb)); - newskb->pkt_type = PACKET_LOOPBACK; - newskb->ip_summed = CHECKSUM_UNNECESSARY; - WARN_ON(!skb_dst(newskb)); - - netif_rx_ni(newskb); - return 0; -} - static int ip6_finish_output2(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; struct neighbour *neigh; + struct rt6_info *rt; skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -121,7 +109,7 @@ static int ip6_finish_output2(struct sk_buff *skb) if (newskb) NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, newskb, NULL, newskb->dev, - ip6_dev_loopback_xmit); + dev_loopback_xmit); if (ipv6_hdr(skb)->hop_limit == 0) { IP6_INC_STATS(dev_net(dev), idev, @@ -136,9 +124,10 @@ static int ip6_finish_output2(struct sk_buff *skb) } rcu_read_lock(); - neigh = dst_get_neighbour_noref(dst); + rt = (struct rt6_info *) dst; + neigh = rt->n; if (neigh) { - int res = neigh_output(neigh, skb); + int res = dst_neigh_output(dst, neigh, skb); rcu_read_unlock(); return res; @@ -210,7 +199,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -ENOBUFS; } - kfree_skb(skb); + consume_skb(skb); skb = skb2; skb_set_owner_w(skb, sk); } @@ -252,8 +241,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, dst->dev, dst_output); } - if (net_ratelimit()) - printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); + net_dbg_ratelimited("IPv6: sending pkt_too_big to self\n"); skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); @@ -464,6 +452,7 @@ int ip6_forward(struct sk_buff *skb) */ if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) { struct in6_addr *target = NULL; + struct inet_peer *peer; struct rt6_info *rt; /* @@ -477,14 +466,15 @@ int ip6_forward(struct sk_buff *skb) else target = &hdr->daddr; - if (!rt->rt6i_peer) - rt6_bind_peer(rt, 1); + peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); /* Limit redirects both by destination (here) and by source (inside ndisc_send_redirect) */ - if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) + if (inet_peer_xrlim_allow(peer, 1*HZ)) ndisc_send_redirect(skb, target); + if (peer) + inet_putpeer(peer); } else { int addrtype = ipv6_addr_type(&hdr->saddr); @@ -527,6 +517,7 @@ int ip6_forward(struct sk_buff *skb) hdr->hop_limit--; IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish); @@ -604,12 +595,13 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) if (rt && !(rt->dst.flags & DST_NOPEER)) { struct inet_peer *peer; + struct net *net; - if (!rt->rt6i_peer) - rt6_bind_peer(rt, 1); - peer = rt->rt6i_peer; + net = dev_net(rt->dst.dev); + peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); if (peer) { fhdr->identification = htonl(inet_getid(peer, 0)); + inet_putpeer(peer); return; } } @@ -644,7 +636,10 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) /* We must not fragment if the socket is set to force MTU discovery * or if the skb it not generated by a local socket. */ - if (!skb->local_df && skb->len > mtu) { + if (unlikely(!skb->local_df && skb->len > mtu)) { + if (skb->sk && dst_allfrag(skb_dst(skb))) + sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); + skb->dev = skb_dst(skb)->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), @@ -789,6 +784,10 @@ slow_path_clean: } slow_path: + if ((skb->ip_summed == CHECKSUM_PARTIAL) && + skb_checksum_help(skb)) + goto fail; + left = skb->len - hlen; /* Space per frame */ ptr = hlen; /* Where to start from */ @@ -889,7 +888,7 @@ slow_path: } IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGOKS); - kfree_skb(skb); + consume_skb(skb); return err; fail: @@ -953,6 +952,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, struct net *net = sock_net(sk); #ifdef CONFIG_IPV6_OPTIMISTIC_DAD struct neighbour *n; + struct rt6_info *rt; #endif int err; @@ -981,7 +981,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, * dst entry of the nexthop router */ rcu_read_lock(); - n = dst_get_neighbour_noref(*dst); + rt = (struct rt6_info *) *dst; + n = rt->n; if (n && !(n->nud_state & NUD_VALID)) { struct inet6_ifaddr *ifp; struct flowi6 fl_gw6; @@ -1181,6 +1182,29 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; } +static void ip6_append_data_mtu(int *mtu, + int *maxfraglen, + unsigned int fragheaderlen, + struct sk_buff *skb, + struct rt6_info *rt) +{ + if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { + if (skb == NULL) { + /* first fragment, reserve header_len */ + *mtu = *mtu - rt->dst.header_len; + + } else { + /* + * this fragment is not first, the headers + * space is regarded as data space. + */ + *mtu = dst_mtu(rt->dst.path); + } + *maxfraglen = ((*mtu - fragheaderlen) & ~7) + + fragheaderlen - sizeof(struct frag_hdr); + } +} + int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, @@ -1190,7 +1214,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct inet_cork *cork; - struct sk_buff *skb; + struct sk_buff *skb, *skb_prev = NULL; unsigned int maxfraglen, fragheaderlen; int exthdrlen; int dst_exthdrlen; @@ -1199,7 +1223,6 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int copy; int err; int offset = 0; - int csummode = CHECKSUM_NONE; __u8 tx_flags = 0; if (flags&MSG_PROBE) @@ -1248,8 +1271,12 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, inet->cork.fl.u.ip6 = *fl6; np->cork.hop_limit = hlimit; np->cork.tclass = tclass; - mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(&rt->dst); + if (rt->dst.flags & DST_XFRM_TUNNEL) + mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? + rt->dst.dev->mtu : dst_mtu(&rt->dst); + else + mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? + rt->dst.dev->mtu : dst_mtu(rt->dst.path); if (np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; @@ -1345,25 +1372,27 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, unsigned int fraglen; unsigned int fraggap; unsigned int alloclen; - struct sk_buff *skb_prev; alloc_new_skb: - skb_prev = skb; - /* There's no room in the current skb */ - if (skb_prev) - fraggap = skb_prev->len - maxfraglen; + if (skb) + fraggap = skb->len - maxfraglen; else fraggap = 0; + /* update mtu and maxfraglen if necessary */ + if (skb == NULL || skb_prev == NULL) + ip6_append_data_mtu(&mtu, &maxfraglen, + fragheaderlen, skb, rt); + + skb_prev = skb; /* * If remaining data exceeds the mtu, * we know we need more fragment(s). */ datalen = length + fraggap; - if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) - datalen = maxfraglen - fragheaderlen; - fraglen = datalen + fragheaderlen; + if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) + datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; @@ -1372,13 +1401,16 @@ alloc_new_skb: alloclen += dst_exthdrlen; - /* - * The last fragment gets additional space at tail. - * Note: we overallocate on fragments with MSG_MODE - * because we have no idea if we're the last one. - */ - if (datalen == length + fraggap) - alloclen += rt->dst.trailer_len; + if (datalen != length + fraggap) { + /* + * this is not the last fragment, the trailer + * space is regarded as data space. + */ + datalen += rt->dst.trailer_len; + } + + alloclen += rt->dst.trailer_len; + fraglen = datalen + fragheaderlen; /* * We just reserve space for fragment header. @@ -1412,7 +1444,7 @@ alloc_new_skb: /* * Fill in the control structures */ - skb->ip_summed = csummode; + skb->ip_summed = CHECKSUM_NONE; skb->csum = 0; /* reserve for fragmentation and ipsec header */ skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + @@ -1455,7 +1487,6 @@ alloc_new_skb: transhdrlen = 0; exthdrlen = 0; dst_exthdrlen = 0; - csummode = CHECKSUM_NONE; /* * Put the packet on the pending queue @@ -1535,6 +1566,7 @@ error: IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); return err; } +EXPORT_SYMBOL_GPL(ip6_append_data); static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) { @@ -1638,6 +1670,7 @@ error: IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); goto out; } +EXPORT_SYMBOL_GPL(ip6_push_pending_frames); void ip6_flush_pending_frames(struct sock *sk) { @@ -1652,3 +1685,4 @@ void ip6_flush_pending_frames(struct sock *sk) ip6_cork_release(inet_sk(sk), inet6_sk(sk)); } +EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index aa21da6a09cd..9a1d5fe6aef8 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -18,6 +18,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> @@ -38,6 +40,7 @@ #include <linux/rtnetlink.h> #include <linux/netfilter_ipv6.h> #include <linux/slab.h> +#include <linux/hash.h> #include <asm/uaccess.h> #include <linux/atomic.h> @@ -60,7 +63,7 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS_NETDEV("ip6tnl0"); #ifdef IP6_TNL_DEBUG -#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__) +#define IP6_TNL_TRACE(x...) pr_debug("%s:" x "\n", __func__) #else #define IP6_TNL_TRACE(x...) do {;} while(0) #endif @@ -68,11 +71,15 @@ MODULE_ALIAS_NETDEV("ip6tnl0"); #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) #define IPV6_TCLASS_SHIFT 20 -#define HASH_SIZE 32 +#define HASH_SIZE_SHIFT 5 +#define HASH_SIZE (1 << HASH_SIZE_SHIFT) + +static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) +{ + u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); -#define HASH(addr) ((__force u32)((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \ - (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \ - (HASH_SIZE - 1)) + return hash_32(hash, HASH_SIZE_SHIFT); +} static int ip6_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_setup(struct net_device *dev); @@ -164,12 +171,11 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) static struct ip6_tnl * ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local) { - unsigned int h0 = HASH(remote); - unsigned int h1 = HASH(local); + unsigned int hash = HASH(remote, local); struct ip6_tnl *t; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); - for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[h0 ^ h1]) { + for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && ipv6_addr_equal(remote, &t->parms.raddr) && (t->dev->flags & IFF_UP)) @@ -198,12 +204,12 @@ ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct ip6_tnl_parm *p) { const struct in6_addr *remote = &p->raddr; const struct in6_addr *local = &p->laddr; - unsigned h = 0; + unsigned int h = 0; int prio = 0; if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { prio = 1; - h = HASH(remote) ^ HASH(local); + h = HASH(remote, local); } return &ip6n->tnls[prio][h]; } @@ -250,7 +256,7 @@ static void ip6_dev_free(struct net_device *dev) } /** - * ip6_tnl_create() - create a new tunnel + * ip6_tnl_create - create a new tunnel * @p: tunnel parameters * @pt: pointer to new tunnel * @@ -460,19 +466,14 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, struct ipv6_tlv_tnl_enc_lim *tel; __u32 mtu; case ICMPV6_DEST_UNREACH: - if (net_ratelimit()) - printk(KERN_WARNING - "%s: Path to destination invalid " - "or inactive!\n", t->parms.name); + net_warn_ratelimited("%s: Path to destination invalid or inactive!\n", + t->parms.name); rel_msg = 1; break; case ICMPV6_TIME_EXCEED: if ((*code) == ICMPV6_EXC_HOPLIMIT) { - if (net_ratelimit()) - printk(KERN_WARNING - "%s: Too small hop limit or " - "routing loop in tunnel!\n", - t->parms.name); + net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", + t->parms.name); rel_msg = 1; } break; @@ -484,17 +485,13 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, if (teli && teli == *info - 2) { tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; if (tel->encap_limit == 0) { - if (net_ratelimit()) - printk(KERN_WARNING - "%s: Too small encapsulation " - "limit or routing loop in " - "tunnel!\n", t->parms.name); + net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", + t->parms.name); rel_msg = 1; } - } else if (net_ratelimit()) { - printk(KERN_WARNING - "%s: Recipient unable to parse tunneled " - "packet!\n ", t->parms.name); + } else { + net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n", + t->parms.name); } break; case ICMPV6_PKT_TOOBIG: @@ -557,6 +554,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_FRAG_NEEDED; break; + case NDISC_REDIRECT: + rel_type = ICMP_REDIRECT; + rel_code = ICMP_REDIR_HOST; default: return 0; } @@ -613,8 +613,10 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (rel_info > dst_mtu(skb_dst(skb2))) goto out; - skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info); + skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info); } + if (rel_type == ICMP_REDIRECT) + skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2); icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); @@ -691,24 +693,50 @@ static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, IP6_ECN_set_ce(ipv6_hdr(skb)); } +static __u32 ip6_tnl_get_cap(struct ip6_tnl *t, + const struct in6_addr *laddr, + const struct in6_addr *raddr) +{ + struct ip6_tnl_parm *p = &t->parms; + int ltype = ipv6_addr_type(laddr); + int rtype = ipv6_addr_type(raddr); + __u32 flags = 0; + + if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) { + flags = IP6_TNL_F_CAP_PER_PACKET; + } else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && + rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && + !((ltype|rtype) & IPV6_ADDR_LOOPBACK) && + (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) { + if (ltype&IPV6_ADDR_UNICAST) + flags |= IP6_TNL_F_CAP_XMIT; + if (rtype&IPV6_ADDR_UNICAST) + flags |= IP6_TNL_F_CAP_RCV; + } + return flags; +} + /* called with rcu_read_lock() */ -static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t) +static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t, + const struct in6_addr *laddr, + const struct in6_addr *raddr) { struct ip6_tnl_parm *p = &t->parms; int ret = 0; struct net *net = dev_net(t->dev); - if (p->flags & IP6_TNL_F_CAP_RCV) { + if ((p->flags & IP6_TNL_F_CAP_RCV) || + ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && + (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) { struct net_device *ldev = NULL; if (p->link) ldev = dev_get_by_index_rcu(net, p->link); - if ((ipv6_addr_is_multicast(&p->laddr) || - likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) && - likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0))) + if ((ipv6_addr_is_multicast(laddr) || + likely(ipv6_chk_addr(net, laddr, ldev, 0))) && + likely(!ipv6_chk_addr(net, raddr, NULL, 0))) ret = 1; - } return ret; } @@ -747,7 +775,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, goto discard; } - if (!ip6_tnl_rcv_ctl(t)) { + if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) { t->dev->stats.rx_dropped++; rcu_read_unlock(); goto discard; @@ -825,7 +853,7 @@ static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit) * 0 else **/ -static inline int +static inline bool ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) { return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); @@ -845,15 +873,12 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) ldev = dev_get_by_index_rcu(net, p->link); if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0))) - printk(KERN_WARNING - "%s xmit: Local address not yet configured!\n", - p->name); + pr_warn("%s xmit: Local address not yet configured!\n", + p->name); else if (!ipv6_addr_is_multicast(&p->raddr) && unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0))) - printk(KERN_WARNING - "%s xmit: Routing loop! " - "Remote address found on this node!\n", - p->name); + pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", + p->name); else ret = 1; rcu_read_unlock(); @@ -919,10 +944,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if (tdev == dev) { stats->collisions++; - if (net_ratelimit()) - printk(KERN_WARNING - "%s: Local routing loop detected!\n", - t->parms.name); + net_warn_ratelimited("%s: Local routing loop detected!\n", + t->parms.name); goto tx_err_dst_release; } mtu = dst_mtu(dst) - sizeof (*ipv6h); @@ -933,7 +956,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->len > mtu) { *pmtu = mtu; err = -EMSGSIZE; @@ -954,7 +977,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if (skb->sk) skb_set_owner_w(new_skb, skb->sk); - kfree_skb(skb); + consume_skb(skb); skb = new_skb; } skb_dst_drop(skb); @@ -1126,25 +1149,6 @@ tx_err: return NETDEV_TX_OK; } -static void ip6_tnl_set_cap(struct ip6_tnl *t) -{ - struct ip6_tnl_parm *p = &t->parms; - int ltype = ipv6_addr_type(&p->laddr); - int rtype = ipv6_addr_type(&p->raddr); - - p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV); - - if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && - rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && - !((ltype|rtype) & IPV6_ADDR_LOOPBACK) && - (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) { - if (ltype&IPV6_ADDR_UNICAST) - p->flags |= IP6_TNL_F_CAP_XMIT; - if (rtype&IPV6_ADDR_UNICAST) - p->flags |= IP6_TNL_F_CAP_RCV; - } -} - static void ip6_tnl_link_config(struct ip6_tnl *t) { struct net_device *dev = t->dev; @@ -1165,7 +1169,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; - ip6_tnl_set_cap(t); + p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET); + p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV) dev->flags |= IFF_POINTOPOINT; @@ -1450,6 +1455,9 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) t->parms.proto = IPPROTO_IPV6; dev_hold(dev); + + ip6_tnl_link_config(t); + rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; } @@ -1553,13 +1561,13 @@ static int __init ip6_tunnel_init(void) err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET); if (err < 0) { - printk(KERN_ERR "ip6_tunnel init: can't register ip4ip6\n"); + pr_err("%s: can't register ip4ip6\n", __func__); goto out_ip4ip6; } err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6); if (err < 0) { - printk(KERN_ERR "ip6_tunnel init: can't register ip6ip6\n"); + pr_err("%s: can't register ip6ip6\n", __func__); goto out_ip6ip6; } @@ -1580,10 +1588,10 @@ out_pernet: static void __exit ip6_tunnel_cleanup(void) { if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET)) - printk(KERN_INFO "ip6_tunnel close: can't deregister ip4ip6\n"); + pr_info("%s: can't deregister ip4ip6\n", __func__); if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6)) - printk(KERN_INFO "ip6_tunnel close: can't deregister ip6ip6\n"); + pr_info("%s: can't deregister ip6ip6\n", __func__); unregister_pernet_device(&ip6_tnl_net_ops); } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 8110362e0af5..4532973f0dd4 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1147,8 +1147,7 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, */ ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb); if (ret < 0) { - if (net_ratelimit()) - printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n"); + net_warn_ratelimited("mroute6: pending queue full, dropping entries\n"); kfree_skb(skb); } @@ -1351,7 +1350,7 @@ int __init ip6_mr_init(void) goto reg_notif_fail; #ifdef CONFIG_IPV6_PIMSM_V2 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { - printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n"); + pr_err("%s: can't add PIM protocol\n", __func__); err = -EAGAIN; goto add_proto_fail; } @@ -1887,6 +1886,8 @@ static inline int ip6mr_forward2_finish(struct sk_buff *skb) { IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTOCTETS, skb->len); return dst_output(skb); } @@ -2103,8 +2104,9 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, if (c->mf6c_parent >= MAXMIFS) return -ENOENT; - if (MIF_EXISTS(mrt, c->mf6c_parent)) - RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex); + if (MIF_EXISTS(mrt, c->mf6c_parent) && + nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0) + return -EMSGSIZE; mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); @@ -2215,14 +2217,15 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, rtm->rtm_src_len = 128; rtm->rtm_tos = 0; rtm->rtm_table = mrt->id; - NLA_PUT_U32(skb, RTA_TABLE, mrt->id); + if (nla_put_u32(skb, RTA_TABLE, mrt->id)) + goto nla_put_failure; rtm->rtm_scope = RT_SCOPE_UNIVERSE; rtm->rtm_protocol = RTPROT_UNSPEC; rtm->rtm_flags = 0; - NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin); - NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp); - + if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) || + nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp)) + goto nla_put_failure; if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0) goto nla_put_failure; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index bba658d9a03c..7af5aee75d98 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -30,6 +30,9 @@ * The decompression of IP datagram MUST be done after the reassembly, * AH/ESP processing. */ + +#define pr_fmt(fmt) "IPv6: " fmt + #include <linux/module.h> #include <net/ip.h> #include <net/xfrm.h> @@ -43,6 +46,7 @@ #include <linux/list.h> #include <linux/vmalloc.h> #include <linux/rtnetlink.h> +#include <net/ip6_route.h> #include <net/icmp.h> #include <net/ipv6.h> #include <net/protocol.h> @@ -60,7 +64,9 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, (struct ip_comp_hdr *)(skb->data + offset); struct xfrm_state *x; - if (type != ICMPV6_DEST_UNREACH && type != ICMPV6_PKT_TOOBIG) + if (type != ICMPV6_DEST_UNREACH && + type != ICMPV6_PKT_TOOBIG && + type != NDISC_REDIRECT) return; spi = htonl(ntohs(ipcomph->cpi)); @@ -69,8 +75,10 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI6\n", - spi, &iph->daddr); + if (type == NDISC_REDIRECT) + ip6_redirect(skb, net, 0, 0); + else + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } @@ -190,11 +198,11 @@ static const struct inet6_protocol ipcomp6_protocol = static int __init ipcomp6_init(void) { if (xfrm_register_type(&ipcomp6_type, AF_INET6) < 0) { - printk(KERN_INFO "ipcomp6 init: can't add xfrm type\n"); + pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) { - printk(KERN_INFO "ipcomp6 init: can't add protocol\n"); + pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&ipcomp6_type, AF_INET6); return -EAGAIN; } @@ -204,9 +212,9 @@ static int __init ipcomp6_init(void) static void __exit ipcomp6_fini(void) { if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) - printk(KERN_INFO "ipv6 ipcomp close: can't remove protocol\n"); + pr_info("%s: can't remove protocol\n", __func__); if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0) - printk(KERN_INFO "ipv6 ipcomp close: can't remove xfrm type\n"); + pr_info("%s: can't remove xfrm type\n", __func__); } module_init(ipcomp6_init); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 63dd1f89ed7d..ba6d13d1f1e1 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -678,7 +678,6 @@ done: } case MCAST_MSFILTER: { - extern int sysctl_mld_max_msf; struct group_filter *gsf; if (optlen < GROUP_FILTER_SIZE(0)) @@ -943,7 +942,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, } static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen, unsigned flags) + char __user *optval, int __user *optlen, unsigned int flags) { struct ipv6_pinfo *np = inet6_sk(sk); int len; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 16c33e308121..92f8e48e4ba4 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -211,6 +211,9 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) struct ipv6_mc_socklist __rcu **lnk; struct net *net = sock_net(sk); + if (!ipv6_addr_is_multicast(addr)) + return -EINVAL; + spin_lock(&ipv6_sk_mc_lock); for (lnk = &np->ipv6_mc_list; (mc_lst = rcu_dereference_protected(*lnk, @@ -606,13 +609,13 @@ done: return err; } -int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, - const struct in6_addr *src_addr) +bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, + const struct in6_addr *src_addr) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_mc_socklist *mc; struct ip6_sf_socklist *psl; - int rv = 1; + bool rv = true; rcu_read_lock(); for_each_pmc_rcu(np, mc) { @@ -621,7 +624,7 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, } if (!mc) { rcu_read_unlock(); - return 1; + return true; } read_lock(&mc->sflock); psl = mc->sflist; @@ -635,9 +638,9 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, break; } if (mc->sfmode == MCAST_INCLUDE && i >= psl->sl_count) - rv = 0; + rv = false; if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count) - rv = 0; + rv = false; } read_unlock(&mc->sflock); rcu_read_unlock(); @@ -931,15 +934,15 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr) /* * identify MLD packets for MLD filter exceptions */ -int ipv6_is_mld(struct sk_buff *skb, int nexthdr) +bool ipv6_is_mld(struct sk_buff *skb, int nexthdr) { struct icmp6hdr *pic; if (nexthdr != IPPROTO_ICMPV6) - return 0; + return false; if (!pskb_may_pull(skb, sizeof(struct icmp6hdr))) - return 0; + return false; pic = icmp6_hdr(skb); @@ -948,22 +951,22 @@ int ipv6_is_mld(struct sk_buff *skb, int nexthdr) case ICMPV6_MGM_REPORT: case ICMPV6_MGM_REDUCTION: case ICMPV6_MLD2_REPORT: - return 1; + return true; default: break; } - return 0; + return false; } /* * check if the interface/address pair is valid */ -int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, - const struct in6_addr *src_addr) +bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, + const struct in6_addr *src_addr) { struct inet6_dev *idev; struct ifmcaddr6 *mc; - int rv = 0; + bool rv = false; rcu_read_lock(); idev = __in6_dev_get(dev); @@ -990,7 +993,7 @@ int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, rv = mc->mca_sfcount[MCAST_EXCLUDE] !=0; spin_unlock_bh(&mc->mca_lock); } else - rv = 1; /* don't filter unspecified source */ + rv = true; /* don't filter unspecified source */ } read_unlock_bh(&idev->lock); } @@ -1046,8 +1049,8 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime) } /* mark EXCLUDE-mode sources */ -static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, - const struct in6_addr *srcs) +static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, + const struct in6_addr *srcs) { struct ip6_sf_list *psf; int i, scount; @@ -1061,7 +1064,7 @@ static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, if (psf->sf_count[MCAST_INCLUDE] || pmc->mca_sfcount[MCAST_EXCLUDE] != psf->sf_count[MCAST_EXCLUDE]) - continue; + break; if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) { scount++; break; @@ -1070,12 +1073,12 @@ static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, } pmc->mca_flags &= ~MAF_GSQUERY; if (scount == nsrcs) /* all sources excluded */ - return 0; - return 1; + return false; + return true; } -static int mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, - const struct in6_addr *srcs) +static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, + const struct in6_addr *srcs) { struct ip6_sf_list *psf; int i, scount; @@ -1099,10 +1102,10 @@ static int mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, } if (!scount) { pmc->mca_flags &= ~MAF_GSQUERY; - return 0; + return false; } pmc->mca_flags |= MAF_GSQUERY; - return 1; + return true; } /* called with rcu_read_lock() */ @@ -1276,17 +1279,17 @@ int igmp6_event_report(struct sk_buff *skb) return 0; } -static int is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type, - int gdeleted, int sdeleted) +static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type, + int gdeleted, int sdeleted) { switch (type) { case MLD2_MODE_IS_INCLUDE: case MLD2_MODE_IS_EXCLUDE: if (gdeleted || sdeleted) - return 0; + return false; if (!((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp)) { if (pmc->mca_sfmode == MCAST_INCLUDE) - return 1; + return true; /* don't include if this source is excluded * in all filters */ @@ -1295,29 +1298,29 @@ static int is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type, return pmc->mca_sfcount[MCAST_EXCLUDE] == psf->sf_count[MCAST_EXCLUDE]; } - return 0; + return false; case MLD2_CHANGE_TO_INCLUDE: if (gdeleted || sdeleted) - return 0; + return false; return psf->sf_count[MCAST_INCLUDE] != 0; case MLD2_CHANGE_TO_EXCLUDE: if (gdeleted || sdeleted) - return 0; + return false; if (pmc->mca_sfcount[MCAST_EXCLUDE] == 0 || psf->sf_count[MCAST_INCLUDE]) - return 0; + return false; return pmc->mca_sfcount[MCAST_EXCLUDE] == psf->sf_count[MCAST_EXCLUDE]; case MLD2_ALLOW_NEW_SOURCES: if (gdeleted || !psf->sf_crcount) - return 0; + return false; return (pmc->mca_sfmode == MCAST_INCLUDE) ^ sdeleted; case MLD2_BLOCK_OLD_SOURCES: if (pmc->mca_sfmode == MCAST_INCLUDE) return gdeleted || (psf->sf_crcount && sdeleted); return psf->sf_crcount && !gdeleted && !sdeleted; } - return 0; + return false; } static int @@ -2044,7 +2047,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!delta) pmc->mca_sfcount[sfmode]--; for (j=0; j<i; j++) - (void) ip6_mc_del1_src(pmc, sfmode, &psfsrc[i]); + ip6_mc_del1_src(pmc, sfmode, &psfsrc[j]); } else if (isexclude != (pmc->mca_sfcount[MCAST_EXCLUDE] != 0)) { struct ip6_sf_list *psf; @@ -2627,8 +2630,7 @@ static int __net_init igmp6_net_init(struct net *net) err = inet_ctl_sock_create(&net->ipv6.igmp_sk, PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, net); if (err < 0) { - printk(KERN_ERR - "Failed to initialize the IGMP6 control socket (err %d).\n", + pr_err("Failed to initialize the IGMP6 control socket (err %d)\n", err); goto out; } diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 7e1e0fbfef21..5b087c31d87b 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -22,6 +22,8 @@ * Masahide NAKAMURA @USAGI */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/skbuff.h> #include <linux/time.h> @@ -44,7 +46,7 @@ static inline void *mip6_padn(__u8 *data, __u8 padlen) if (!data) return NULL; if (padlen == 1) { - data[0] = IPV6_TLV_PAD0; + data[0] = IPV6_TLV_PAD1; } else if (padlen > 1) { data[0] = IPV6_TLV_PADN; data[1] = padlen - 2; @@ -307,13 +309,12 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb, static int mip6_destopt_init_state(struct xfrm_state *x) { if (x->id.spi) { - printk(KERN_INFO "%s: spi is not 0: %u\n", __func__, - x->id.spi); + pr_info("%s: spi is not 0: %u\n", __func__, x->id.spi); return -EINVAL; } if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { - printk(KERN_INFO "%s: state's mode is not %u: %u\n", - __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); + pr_info("%s: state's mode is not %u: %u\n", + __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); return -EINVAL; } @@ -443,13 +444,12 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb, static int mip6_rthdr_init_state(struct xfrm_state *x) { if (x->id.spi) { - printk(KERN_INFO "%s: spi is not 0: %u\n", __func__, - x->id.spi); + pr_info("%s: spi is not 0: %u\n", __func__, x->id.spi); return -EINVAL; } if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { - printk(KERN_INFO "%s: state's mode is not %u: %u\n", - __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); + pr_info("%s: state's mode is not %u: %u\n", + __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); return -EINVAL; } @@ -481,18 +481,18 @@ static const struct xfrm_type mip6_rthdr_type = static int __init mip6_init(void) { - printk(KERN_INFO "Mobile IPv6\n"); + pr_info("Mobile IPv6\n"); if (xfrm_register_type(&mip6_destopt_type, AF_INET6) < 0) { - printk(KERN_INFO "%s: can't add xfrm type(destopt)\n", __func__); + pr_info("%s: can't add xfrm type(destopt)\n", __func__); goto mip6_destopt_xfrm_fail; } if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) { - printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __func__); + pr_info("%s: can't add xfrm type(rthdr)\n", __func__); goto mip6_rthdr_xfrm_fail; } if (rawv6_mh_filter_register(mip6_mh_filter) < 0) { - printk(KERN_INFO "%s: can't add rawv6 mh filter\n", __func__); + pr_info("%s: can't add rawv6 mh filter\n", __func__); goto mip6_rawv6_mh_fail; } @@ -510,11 +510,11 @@ static int __init mip6_init(void) static void __exit mip6_fini(void) { if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0) - printk(KERN_INFO "%s: can't remove rawv6 mh filter\n", __func__); + pr_info("%s: can't remove rawv6 mh filter\n", __func__); if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0) - printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __func__); + pr_info("%s: can't remove xfrm type(rthdr)\n", __func__); if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0) - printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __func__); + pr_info("%s: can't remove xfrm type(destopt)\n", __func__); } module_init(mip6_init); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 3dcdb81ec3e8..ff36194a71aa 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -15,6 +15,7 @@ /* * Changes: * + * Alexey I. Froloff : RFC6106 (DNSSL) support * Pierre Ynard : export userland ND options * through netlink (RDNSS support) * Lars Fenneberg : fixed MTU setting on receipt @@ -26,27 +27,7 @@ * YOSHIFUJI Hideaki @USAGI : Verify ND options properly */ -/* Set to 3 to get tracing... */ -#define ND_DEBUG 1 - -#define ND_PRINTK(fmt, args...) do { if (net_ratelimit()) { printk(fmt, ## args); } } while(0) -#define ND_NOPRINTK(x...) do { ; } while(0) -#define ND_PRINTK0 ND_PRINTK -#define ND_PRINTK1 ND_NOPRINTK -#define ND_PRINTK2 ND_NOPRINTK -#define ND_PRINTK3 ND_NOPRINTK -#if ND_DEBUG >= 1 -#undef ND_PRINTK1 -#define ND_PRINTK1 ND_PRINTK -#endif -#if ND_DEBUG >= 2 -#undef ND_PRINTK2 -#define ND_PRINTK2 ND_PRINTK -#endif -#if ND_DEBUG >= 3 -#undef ND_PRINTK3 -#define ND_PRINTK3 ND_PRINTK -#endif +#define pr_fmt(fmt) "ICMPv6: " fmt #include <linux/module.h> #include <linux/errno.h> @@ -91,6 +72,15 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> +/* Set to 3 to get tracing... */ +#define ND_DEBUG 1 + +#define ND_PRINTK(val, level, fmt, ...) \ +do { \ + if (val <= ND_DEBUG) \ + net_##level##_ratelimited(fmt, ##__VA_ARGS__); \ +} while (0) + static u32 ndisc_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd); @@ -153,40 +143,6 @@ struct neigh_table nd_tbl = { .gc_thresh3 = 1024, }; -/* ND options */ -struct ndisc_options { - struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX]; -#ifdef CONFIG_IPV6_ROUTE_INFO - struct nd_opt_hdr *nd_opts_ri; - struct nd_opt_hdr *nd_opts_ri_end; -#endif - struct nd_opt_hdr *nd_useropts; - struct nd_opt_hdr *nd_useropts_end; -}; - -#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR] -#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR] -#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO] -#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END] -#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR] -#define nd_opts_mtu nd_opt_array[ND_OPT_MTU] - -#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7) - -/* - * Return the padding between the option length and the start of the - * link addr. Currently only IP-over-InfiniBand needs this, although - * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may - * also need a pad of 2. - */ -static int ndisc_addr_option_pad(unsigned short type) -{ - switch (type) { - case ARPHRD_INFINIBAND: return 2; - default: return 0; - } -} - static inline int ndisc_opt_addr_space(struct net_device *dev) { return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type)); @@ -228,7 +184,8 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, static inline int ndisc_is_useropt(struct nd_opt_hdr *opt) { - return opt->nd_opt_type == ND_OPT_RDNSS; + return opt->nd_opt_type == ND_OPT_RDNSS || + opt->nd_opt_type == ND_OPT_DNSSL; } static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur, @@ -242,8 +199,8 @@ static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur, return cur <= end && ndisc_is_useropt(cur) ? cur : NULL; } -static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, - struct ndisc_options *ndopts) +struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, + struct ndisc_options *ndopts) { struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt; @@ -263,10 +220,9 @@ static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, case ND_OPT_MTU: case ND_OPT_REDIRECT_HDR: if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { - ND_PRINTK2(KERN_WARNING - "%s(): duplicated ND6 option found: type=%d\n", - __func__, - nd_opt->nd_opt_type); + ND_PRINTK(2, warn, + "%s: duplicated ND6 option found: type=%d\n", + __func__, nd_opt->nd_opt_type); } else { ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; } @@ -294,10 +250,11 @@ static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, * to accommodate future extension to the * protocol. */ - ND_PRINTK2(KERN_NOTICE - "%s(): ignored unsupported option; type=%d, len=%d\n", - __func__, - nd_opt->nd_opt_type, nd_opt->nd_opt_len); + ND_PRINTK(2, notice, + "%s: ignored unsupported option; type=%d, len=%d\n", + __func__, + nd_opt->nd_opt_type, + nd_opt->nd_opt_len); } } opt_len -= l; @@ -306,17 +263,6 @@ static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, return ndopts; } -static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p, - struct net_device *dev) -{ - u8 *lladdr = (u8 *)(p + 1); - int lladdrlen = p->nd_opt_len << 3; - int prepad = ndisc_addr_option_pad(dev->type); - if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad)) - return NULL; - return lladdr + prepad; -} - int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir) { switch (dev->type) { @@ -325,9 +271,6 @@ int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, case ARPHRD_FDDI: ipv6_eth_mc_map(addr, buf); return 0; - case ARPHRD_IEEE802_TR: - ipv6_tr_mc_map(addr,buf); - return 0; case ARPHRD_ARCNET: ipv6_arcnet_mc_map(addr, buf); return 0; @@ -360,7 +303,7 @@ static int ndisc_constructor(struct neighbour *neigh) struct net_device *dev = neigh->dev; struct inet6_dev *in6_dev; struct neigh_parms *parms; - int is_multicast = ipv6_addr_is_multicast(addr); + bool is_multicast = ipv6_addr_is_multicast(addr); in6_dev = in6_dev_get(dev); if (in6_dev == NULL) { @@ -456,9 +399,8 @@ struct sk_buff *ndisc_build_skb(struct net_device *dev, len + hlen + tlen), 1, &err); if (!skb) { - ND_PRINTK0(KERN_ERR - "ICMPv6 ND: %s() failed to allocate an skb, err=%d.\n", - __func__, err); + ND_PRINTK(0, err, "ND: %s failed to allocate an skb, err=%d\n", + __func__, err); return NULL; } @@ -694,8 +636,9 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) if ((probes -= neigh->parms->ucast_probes) < 0) { if (!(neigh->nud_state & NUD_VALID)) { - ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: %pI6\n", - __func__, target); + ND_PRINTK(1, dbg, + "%s: trying to ucast probe in NUD_INVALID: %pI6\n", + __func__, target); } ndisc_send_ns(dev, neigh, target, target, saddr); } else if ((probes -= neigh->parms->app_probes) < 0) { @@ -737,12 +680,11 @@ static void ndisc_recv_ns(struct sk_buff *skb) struct inet6_dev *idev = NULL; struct neighbour *neigh; int dad = ipv6_addr_any(saddr); - int inc; + bool inc; int is_router = -1; if (ipv6_addr_is_multicast(&msg->target)) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 NS: multicast target address"); + ND_PRINTK(2, warn, "NS: multicast target address\n"); return; } @@ -755,22 +697,20 @@ static void ndisc_recv_ns(struct sk_buff *skb) daddr->s6_addr32[1] == htonl(0x00000000) && daddr->s6_addr32[2] == htonl(0x00000001) && daddr->s6_addr [12] == 0xff )) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 NS: bad DAD packet (wrong destination)\n"); + ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n"); return; } if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 NS: invalid ND options\n"); + ND_PRINTK(2, warn, "NS: invalid ND options\n"); return; } if (ndopts.nd_opts_src_lladdr) { lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev); if (!lladdr) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 NS: invalid link-layer address length\n"); + ND_PRINTK(2, warn, + "NS: invalid link-layer address length\n"); return; } @@ -780,8 +720,8 @@ static void ndisc_recv_ns(struct sk_buff *skb) * in the message. */ if (dad) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 NS: bad DAD packet (link-layer address option)\n"); + ND_PRINTK(2, warn, + "NS: bad DAD packet (link-layer address option)\n"); return; } } @@ -793,20 +733,6 @@ static void ndisc_recv_ns(struct sk_buff *skb) if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) { if (dad) { - if (dev->type == ARPHRD_IEEE802_TR) { - const unsigned char *sadr; - sadr = skb_mac_header(skb); - if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 && - sadr[9] == dev->dev_addr[1] && - sadr[10] == dev->dev_addr[2] && - sadr[11] == dev->dev_addr[3] && - sadr[12] == dev->dev_addr[4] && - sadr[13] == dev->dev_addr[5]) { - /* looped-back to us */ - goto out; - } - } - /* * We are colliding with another node * who is doing DAD @@ -913,34 +839,30 @@ static void ndisc_recv_na(struct sk_buff *skb) struct neighbour *neigh; if (skb->len < sizeof(struct nd_msg)) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 NA: packet too short\n"); + ND_PRINTK(2, warn, "NA: packet too short\n"); return; } if (ipv6_addr_is_multicast(&msg->target)) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 NA: target address is multicast.\n"); + ND_PRINTK(2, warn, "NA: target address is multicast\n"); return; } if (ipv6_addr_is_multicast(daddr) && msg->icmph.icmp6_solicited) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 NA: solicited NA is multicasted.\n"); + ND_PRINTK(2, warn, "NA: solicited NA is multicasted\n"); return; } if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 NS: invalid ND option\n"); + ND_PRINTK(2, warn, "NS: invalid ND option\n"); return; } if (ndopts.nd_opts_tgt_lladdr) { lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev); if (!lladdr) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 NA: invalid link-layer address length\n"); + ND_PRINTK(2, warn, + "NA: invalid link-layer address length\n"); return; } } @@ -961,9 +883,9 @@ static void ndisc_recv_na(struct sk_buff *skb) unsolicited advertisement. */ if (skb->pkt_type != PACKET_LOOPBACK) - ND_PRINTK1(KERN_WARNING - "ICMPv6 NA: someone advertises our address %pI6 on %s!\n", - &ifp->addr, ifp->idev->dev->name); + ND_PRINTK(1, warn, + "NA: someone advertises our address %pI6 on %s!\n", + &ifp->addr, ifp->idev->dev->name); in6_ifa_put(ifp); return; } @@ -1025,8 +947,7 @@ static void ndisc_recv_rs(struct sk_buff *skb) idev = __in6_dev_get(skb->dev); if (!idev) { - if (net_ratelimit()) - ND_PRINTK1("ICMP6 RS: can't find in6 device\n"); + ND_PRINTK(1, err, "RS: can't find in6 device\n"); return; } @@ -1043,8 +964,7 @@ static void ndisc_recv_rs(struct sk_buff *skb) /* Parse ND options */ if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) { - if (net_ratelimit()) - ND_PRINTK2("ICMP6 NS: invalid ND option, ignored\n"); + ND_PRINTK(2, notice, "NS: invalid ND option, ignored\n"); goto out; } @@ -1099,8 +1019,9 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3); - NLA_PUT(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr), - &ipv6_hdr(ra)->saddr); + if (nla_put(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr), + &ipv6_hdr(ra)->saddr)) + goto nla_put_failure; nlmsg_end(skb, nlh); rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC); @@ -1141,20 +1062,17 @@ static void ndisc_router_discovery(struct sk_buff *skb) optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg); if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 RA: source address is not link-local.\n"); + ND_PRINTK(2, warn, "RA: source address is not link-local\n"); return; } if (optlen < 0) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 RA: packet too short\n"); + ND_PRINTK(2, warn, "RA: packet too short\n"); return; } #ifdef CONFIG_IPV6_NDISC_NODETYPE if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 RA: from host or unauthorized router\n"); + ND_PRINTK(2, warn, "RA: from host or unauthorized router\n"); return; } #endif @@ -1165,15 +1083,13 @@ static void ndisc_router_discovery(struct sk_buff *skb) in6_dev = __in6_dev_get(skb->dev); if (in6_dev == NULL) { - ND_PRINTK0(KERN_ERR - "ICMPv6 RA: can't find inet6 device for %s.\n", - skb->dev->name); + ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n", + skb->dev->name); return; } if (!ndisc_parse_options(opt, optlen, &ndopts)) { - ND_PRINTK2(KERN_WARNING - "ICMP6 RA: invalid ND options\n"); + ND_PRINTK(2, warn, "RA: invalid ND options\n"); return; } @@ -1226,9 +1142,9 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (rt) { neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr); if (!neigh) { - ND_PRINTK0(KERN_ERR - "ICMPv6 RA: %s() got default router without neighbour.\n", - __func__); + ND_PRINTK(0, err, + "RA: %s got default router without neighbour\n", + __func__); dst_release(&rt->dst); return; } @@ -1239,22 +1155,21 @@ static void ndisc_router_discovery(struct sk_buff *skb) } if (rt == NULL && lifetime) { - ND_PRINTK3(KERN_DEBUG - "ICMPv6 RA: adding default router.\n"); + ND_PRINTK(3, dbg, "RA: adding default router\n"); rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref); if (rt == NULL) { - ND_PRINTK0(KERN_ERR - "ICMPv6 RA: %s() failed to add default route.\n", - __func__); + ND_PRINTK(0, err, + "RA: %s failed to add default route\n", + __func__); return; } neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr); if (neigh == NULL) { - ND_PRINTK0(KERN_ERR - "ICMPv6 RA: %s() got default router without neighbour.\n", - __func__); + ND_PRINTK(0, err, + "RA: %s got default router without neighbour\n", + __func__); dst_release(&rt->dst); return; } @@ -1264,8 +1179,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) } if (rt) - rt->dst.expires = jiffies + (HZ * lifetime); - + rt6_set_expires(rt, jiffies + (HZ * lifetime)); if (ra_msg->icmph.icmp6_hop_limit) { in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; if (rt) @@ -1323,8 +1237,8 @@ skip_linkparms: lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, skb->dev); if (!lladdr) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 RA: invalid link-layer address length\n"); + ND_PRINTK(2, warn, + "RA: invalid link-layer address length\n"); goto out; } } @@ -1388,9 +1302,7 @@ skip_routeinfo: mtu = ntohl(n); if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 RA: invalid mtu: %d\n", - mtu); + ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu); } else if (in6_dev->cnf.mtu6 != mtu) { in6_dev->cnf.mtu6 = mtu; @@ -1411,8 +1323,7 @@ skip_routeinfo: } if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 RA: invalid RA options"); + ND_PRINTK(2, warn, "RA: invalid RA options\n"); } out: if (rt) @@ -1423,93 +1334,23 @@ out: static void ndisc_redirect_rcv(struct sk_buff *skb) { - struct inet6_dev *in6_dev; - struct icmp6hdr *icmph; - const struct in6_addr *dest; - const struct in6_addr *target; /* new first hop to destination */ - struct neighbour *neigh; - int on_link = 0; - struct ndisc_options ndopts; - int optlen; - u8 *lladdr = NULL; - #ifdef CONFIG_IPV6_NDISC_NODETYPE switch (skb->ndisc_nodetype) { case NDISC_NODETYPE_HOST: case NDISC_NODETYPE_NODEFAULT: - ND_PRINTK2(KERN_WARNING - "ICMPv6 Redirect: from host or unauthorized router\n"); + ND_PRINTK(2, warn, + "Redirect: from host or unauthorized router\n"); return; } #endif if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 Redirect: source address is not link-local.\n"); + ND_PRINTK(2, warn, + "Redirect: source address is not link-local\n"); return; } - optlen = skb->tail - skb->transport_header; - optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); - - if (optlen < 0) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 Redirect: packet too short\n"); - return; - } - - icmph = icmp6_hdr(skb); - target = (const struct in6_addr *) (icmph + 1); - dest = target + 1; - - if (ipv6_addr_is_multicast(dest)) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 Redirect: destination address is multicast.\n"); - return; - } - - if (ipv6_addr_equal(dest, target)) { - on_link = 1; - } else if (ipv6_addr_type(target) != - (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 Redirect: target address is not link-local unicast.\n"); - return; - } - - in6_dev = __in6_dev_get(skb->dev); - if (!in6_dev) - return; - if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) - return; - - /* RFC2461 8.1: - * The IP source address of the Redirect MUST be the same as the current - * first-hop router for the specified ICMP Destination Address. - */ - - if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 Redirect: invalid ND options\n"); - return; - } - if (ndopts.nd_opts_tgt_lladdr) { - lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, - skb->dev); - if (!lladdr) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 Redirect: invalid link-layer address length\n"); - return; - } - } - - neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); - if (neigh) { - rt6_redirect(dest, &ipv6_hdr(skb)->daddr, - &ipv6_hdr(skb)->saddr, neigh, lladdr, - on_link); - neigh_release(neigh); - } + icmpv6_notify(skb, NDISC_REDIRECT, 0, 0); } void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) @@ -1518,6 +1359,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) struct net *net = dev_net(dev); struct sock *sk = net->ipv6.ndisc_sk; int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); + struct inet_peer *peer; struct sk_buff *buff; struct icmp6hdr *icmph; struct in6_addr saddr_buf; @@ -1531,18 +1373,18 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) int rd_len; int err; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; + bool ret; if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 Redirect: no link-local address on %s\n", - dev->name); + ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n", + dev->name); return; } if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) && ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 Redirect: target address is not link-local unicast.\n"); + ND_PRINTK(2, warn, + "Redirect: target address is not link-local unicast\n"); return; } @@ -1561,20 +1403,22 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) rt = (struct rt6_info *) dst; if (rt->rt6i_flags & RTF_GATEWAY) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 Redirect: destination is not a neighbour.\n"); + ND_PRINTK(2, warn, + "Redirect: destination is not a neighbour\n"); goto release; } - if (!rt->rt6i_peer) - rt6_bind_peer(rt, 1); - if (!inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) + peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); + ret = inet_peer_xrlim_allow(peer, 1*HZ); + if (peer) + inet_putpeer(peer); + if (!ret) goto release; if (dev->addr_len) { struct neighbour *neigh = dst_neigh_lookup(skb_dst(skb), target); if (!neigh) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 Redirect: no neigh for target address\n"); + ND_PRINTK(2, warn, + "Redirect: no neigh for target address\n"); goto release; } @@ -1602,9 +1446,9 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) len + hlen + tlen), 1, &err); if (buff == NULL) { - ND_PRINTK0(KERN_ERR - "ICMPv6 Redirect: %s() failed to allocate an skb, err=%d.\n", - __func__, err); + ND_PRINTK(0, err, + "Redirect: %s failed to allocate an skb, err=%d\n", + __func__, err); goto release; } @@ -1689,16 +1533,14 @@ int ndisc_rcv(struct sk_buff *skb) __skb_push(skb, skb->data - skb_transport_header(skb)); if (ipv6_hdr(skb)->hop_limit != 255) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 NDISC: invalid hop-limit: %d\n", - ipv6_hdr(skb)->hop_limit); + ND_PRINTK(2, warn, "NDISC: invalid hop-limit: %d\n", + ipv6_hdr(skb)->hop_limit); return 0; } if (msg->icmph.icmp6_code != 0) { - ND_PRINTK2(KERN_WARNING - "ICMPv6 NDISC: invalid ICMPv6 code: %d\n", - msg->icmph.icmp6_code); + ND_PRINTK(2, warn, "NDISC: invalid ICMPv6 code: %d\n", + msg->icmph.icmp6_code); return 0; } @@ -1765,11 +1607,7 @@ static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl, static int warned; if (strcmp(warncomm, current->comm) && warned < 5) { strcpy(warncomm, current->comm); - printk(KERN_WARNING - "process `%s' is using deprecated sysctl (%s) " - "net.ipv6.neigh.%s.%s; " - "Use net.ipv6.neigh.%s.%s_ms " - "instead.\n", + pr_warn("process `%s' is using deprecated sysctl (%s) net.ipv6.neigh.%s.%s - use net.ipv6.neigh.%s.%s_ms instead\n", warncomm, func, dev_name, ctl->procname, dev_name, ctl->procname); @@ -1823,9 +1661,9 @@ static int __net_init ndisc_net_init(struct net *net) err = inet_ctl_sock_create(&sk, PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, net); if (err < 0) { - ND_PRINTK0(KERN_ERR - "ICMPv6 NDISC: Failed to initialize the control socket (err %d).\n", - err); + ND_PRINTK(0, err, + "NDISC: Failed to initialize the control socket (err %d)\n", + err); return err; } diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index d33cddd16fbb..10135342799e 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,28 +25,6 @@ config NF_CONNTRACK_IPV6 To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_QUEUE - tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)" - depends on INET && IPV6 && NETFILTER - depends on NETFILTER_ADVANCED - ---help--- - - This option adds a queue handler to the kernel for IPv6 - packets which enables users to receive the filtered packets - with QUEUE target using libipq. - - This option enables the old IPv6-only "ip6_queue" implementation - which has been obsoleted by the new "nfnetlink_queue" code (see - CONFIG_NETFILTER_NETLINK_QUEUE). - - (C) Fernando Anton 2001 - IPv64 Project - Work based in IPv64 draft by Arturo Azcorra. - Universidad Carlos III de Madrid - Universidad Politecnica de Alcala de Henares - email: <fanton@it.uc3m.es>. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index d4dfd0a21097..534d3f216f7b 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -6,7 +6,6 @@ obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o -obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c deleted file mode 100644 index a34c9e4c792c..000000000000 --- a/net/ipv6/netfilter/ip6_queue.c +++ /dev/null @@ -1,641 +0,0 @@ -/* - * This is a module which is used for queueing IPv6 packets and - * communicating with userspace via netlink. - * - * (C) 2001 Fernando Anton, this code is GPL. - * IPv64 Project - Work based in IPv64 draft by Arturo Azcorra. - * Universidad Carlos III de Madrid - Leganes (Madrid) - Spain - * Universidad Politecnica de Alcala de Henares - Alcala de H. (Madrid) - Spain - * email: fanton@it.uc3m.es - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/init.h> -#include <linux/ipv6.h> -#include <linux/notifier.h> -#include <linux/netdevice.h> -#include <linux/netfilter.h> -#include <linux/netlink.h> -#include <linux/spinlock.h> -#include <linux/sysctl.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/mutex.h> -#include <linux/slab.h> -#include <net/net_namespace.h> -#include <net/sock.h> -#include <net/ipv6.h> -#include <net/ip6_route.h> -#include <net/netfilter/nf_queue.h> -#include <linux/netfilter_ipv4/ip_queue.h> -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv6/ip6_tables.h> - -#define IPQ_QMAX_DEFAULT 1024 -#define IPQ_PROC_FS_NAME "ip6_queue" -#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen" - -typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long); - -static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; -static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; -static DEFINE_SPINLOCK(queue_lock); -static int peer_pid __read_mostly; -static unsigned int copy_range __read_mostly; -static unsigned int queue_total; -static unsigned int queue_dropped = 0; -static unsigned int queue_user_dropped = 0; -static struct sock *ipqnl __read_mostly; -static LIST_HEAD(queue_list); -static DEFINE_MUTEX(ipqnl_mutex); - -static inline void -__ipq_enqueue_entry(struct nf_queue_entry *entry) -{ - list_add_tail(&entry->list, &queue_list); - queue_total++; -} - -static inline int -__ipq_set_mode(unsigned char mode, unsigned int range) -{ - int status = 0; - - switch(mode) { - case IPQ_COPY_NONE: - case IPQ_COPY_META: - copy_mode = mode; - copy_range = 0; - break; - - case IPQ_COPY_PACKET: - if (range > 0xFFFF) - range = 0xFFFF; - copy_range = range; - copy_mode = mode; - break; - - default: - status = -EINVAL; - - } - return status; -} - -static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data); - -static inline void -__ipq_reset(void) -{ - peer_pid = 0; - net_disable_timestamp(); - __ipq_set_mode(IPQ_COPY_NONE, 0); - __ipq_flush(NULL, 0); -} - -static struct nf_queue_entry * -ipq_find_dequeue_entry(unsigned long id) -{ - struct nf_queue_entry *entry = NULL, *i; - - spin_lock_bh(&queue_lock); - - list_for_each_entry(i, &queue_list, list) { - if ((unsigned long)i == id) { - entry = i; - break; - } - } - - if (entry) { - list_del(&entry->list); - queue_total--; - } - - spin_unlock_bh(&queue_lock); - return entry; -} - -static void -__ipq_flush(ipq_cmpfn cmpfn, unsigned long data) -{ - struct nf_queue_entry *entry, *next; - - list_for_each_entry_safe(entry, next, &queue_list, list) { - if (!cmpfn || cmpfn(entry, data)) { - list_del(&entry->list); - queue_total--; - nf_reinject(entry, NF_DROP); - } - } -} - -static void -ipq_flush(ipq_cmpfn cmpfn, unsigned long data) -{ - spin_lock_bh(&queue_lock); - __ipq_flush(cmpfn, data); - spin_unlock_bh(&queue_lock); -} - -static struct sk_buff * -ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) -{ - sk_buff_data_t old_tail; - size_t size = 0; - size_t data_len = 0; - struct sk_buff *skb; - struct ipq_packet_msg *pmsg; - struct nlmsghdr *nlh; - struct timeval tv; - - switch (ACCESS_ONCE(copy_mode)) { - case IPQ_COPY_META: - case IPQ_COPY_NONE: - size = NLMSG_SPACE(sizeof(*pmsg)); - break; - - case IPQ_COPY_PACKET: - if (entry->skb->ip_summed == CHECKSUM_PARTIAL && - (*errp = skb_checksum_help(entry->skb))) - return NULL; - - data_len = ACCESS_ONCE(copy_range); - if (data_len == 0 || data_len > entry->skb->len) - data_len = entry->skb->len; - - size = NLMSG_SPACE(sizeof(*pmsg) + data_len); - break; - - default: - *errp = -EINVAL; - return NULL; - } - - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) - goto nlmsg_failure; - - old_tail = skb->tail; - nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); - pmsg = NLMSG_DATA(nlh); - memset(pmsg, 0, sizeof(*pmsg)); - - pmsg->packet_id = (unsigned long )entry; - pmsg->data_len = data_len; - tv = ktime_to_timeval(entry->skb->tstamp); - pmsg->timestamp_sec = tv.tv_sec; - pmsg->timestamp_usec = tv.tv_usec; - pmsg->mark = entry->skb->mark; - pmsg->hook = entry->hook; - pmsg->hw_protocol = entry->skb->protocol; - - if (entry->indev) - strcpy(pmsg->indev_name, entry->indev->name); - else - pmsg->indev_name[0] = '\0'; - - if (entry->outdev) - strcpy(pmsg->outdev_name, entry->outdev->name); - else - pmsg->outdev_name[0] = '\0'; - - if (entry->indev && entry->skb->dev && - entry->skb->mac_header != entry->skb->network_header) { - pmsg->hw_type = entry->skb->dev->type; - pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr); - } - - if (data_len) - if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len)) - BUG(); - - nlh->nlmsg_len = skb->tail - old_tail; - return skb; - -nlmsg_failure: - kfree_skb(skb); - *errp = -EINVAL; - printk(KERN_ERR "ip6_queue: error creating packet message\n"); - return NULL; -} - -static int -ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) -{ - int status = -EINVAL; - struct sk_buff *nskb; - - if (copy_mode == IPQ_COPY_NONE) - return -EAGAIN; - - nskb = ipq_build_packet_message(entry, &status); - if (nskb == NULL) - return status; - - spin_lock_bh(&queue_lock); - - if (!peer_pid) - goto err_out_free_nskb; - - if (queue_total >= queue_maxlen) { - queue_dropped++; - status = -ENOSPC; - if (net_ratelimit()) - printk (KERN_WARNING "ip6_queue: fill at %d entries, " - "dropping packet(s). Dropped: %d\n", queue_total, - queue_dropped); - goto err_out_free_nskb; - } - - /* netlink_unicast will either free the nskb or attach it to a socket */ - status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT); - if (status < 0) { - queue_user_dropped++; - goto err_out_unlock; - } - - __ipq_enqueue_entry(entry); - - spin_unlock_bh(&queue_lock); - return status; - -err_out_free_nskb: - kfree_skb(nskb); - -err_out_unlock: - spin_unlock_bh(&queue_lock); - return status; -} - -static int -ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct nf_queue_entry *e) -{ - int diff; - struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload; - struct sk_buff *nskb; - - if (v->data_len < sizeof(*user_iph)) - return 0; - diff = v->data_len - e->skb->len; - if (diff < 0) { - if (pskb_trim(e->skb, v->data_len)) - return -ENOMEM; - } else if (diff > 0) { - if (v->data_len > 0xFFFF) - return -EINVAL; - if (diff > skb_tailroom(e->skb)) { - nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), - diff, GFP_ATOMIC); - if (!nskb) { - printk(KERN_WARNING "ip6_queue: OOM " - "in mangle, dropping packet\n"); - return -ENOMEM; - } - kfree_skb(e->skb); - e->skb = nskb; - } - skb_put(e->skb, diff); - } - if (!skb_make_writable(e->skb, v->data_len)) - return -ENOMEM; - skb_copy_to_linear_data(e->skb, v->payload, v->data_len); - e->skb->ip_summed = CHECKSUM_NONE; - - return 0; -} - -static int -ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) -{ - struct nf_queue_entry *entry; - - if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN) - return -EINVAL; - - entry = ipq_find_dequeue_entry(vmsg->id); - if (entry == NULL) - return -ENOENT; - else { - int verdict = vmsg->value; - - if (vmsg->data_len && vmsg->data_len == len) - if (ipq_mangle_ipv6(vmsg, entry) < 0) - verdict = NF_DROP; - - nf_reinject(entry, verdict); - return 0; - } -} - -static int -ipq_set_mode(unsigned char mode, unsigned int range) -{ - int status; - - spin_lock_bh(&queue_lock); - status = __ipq_set_mode(mode, range); - spin_unlock_bh(&queue_lock); - return status; -} - -static int -ipq_receive_peer(struct ipq_peer_msg *pmsg, - unsigned char type, unsigned int len) -{ - int status = 0; - - if (len < sizeof(*pmsg)) - return -EINVAL; - - switch (type) { - case IPQM_MODE: - status = ipq_set_mode(pmsg->msg.mode.value, - pmsg->msg.mode.range); - break; - - case IPQM_VERDICT: - status = ipq_set_verdict(&pmsg->msg.verdict, - len - sizeof(*pmsg)); - break; - default: - status = -EINVAL; - } - return status; -} - -static int -dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) -{ - if (entry->indev) - if (entry->indev->ifindex == ifindex) - return 1; - - if (entry->outdev) - if (entry->outdev->ifindex == ifindex) - return 1; -#ifdef CONFIG_BRIDGE_NETFILTER - if (entry->skb->nf_bridge) { - if (entry->skb->nf_bridge->physindev && - entry->skb->nf_bridge->physindev->ifindex == ifindex) - return 1; - if (entry->skb->nf_bridge->physoutdev && - entry->skb->nf_bridge->physoutdev->ifindex == ifindex) - return 1; - } -#endif - return 0; -} - -static void -ipq_dev_drop(int ifindex) -{ - ipq_flush(dev_cmp, ifindex); -} - -#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) - -static inline void -__ipq_rcv_skb(struct sk_buff *skb) -{ - int status, type, pid, flags; - unsigned int nlmsglen, skblen; - struct nlmsghdr *nlh; - bool enable_timestamp = false; - - skblen = skb->len; - if (skblen < sizeof(*nlh)) - return; - - nlh = nlmsg_hdr(skb); - nlmsglen = nlh->nlmsg_len; - if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen) - return; - - pid = nlh->nlmsg_pid; - flags = nlh->nlmsg_flags; - - if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI) - RCV_SKB_FAIL(-EINVAL); - - if (flags & MSG_TRUNC) - RCV_SKB_FAIL(-ECOMM); - - type = nlh->nlmsg_type; - if (type < NLMSG_NOOP || type >= IPQM_MAX) - RCV_SKB_FAIL(-EINVAL); - - if (type <= IPQM_BASE) - return; - - if (!capable(CAP_NET_ADMIN)) - RCV_SKB_FAIL(-EPERM); - - spin_lock_bh(&queue_lock); - - if (peer_pid) { - if (peer_pid != pid) { - spin_unlock_bh(&queue_lock); - RCV_SKB_FAIL(-EBUSY); - } - } else { - enable_timestamp = true; - peer_pid = pid; - } - - spin_unlock_bh(&queue_lock); - if (enable_timestamp) - net_enable_timestamp(); - - status = ipq_receive_peer(NLMSG_DATA(nlh), type, - nlmsglen - NLMSG_LENGTH(0)); - if (status < 0) - RCV_SKB_FAIL(status); - - if (flags & NLM_F_ACK) - netlink_ack(skb, nlh, 0); -} - -static void -ipq_rcv_skb(struct sk_buff *skb) -{ - mutex_lock(&ipqnl_mutex); - __ipq_rcv_skb(skb); - mutex_unlock(&ipqnl_mutex); -} - -static int -ipq_rcv_dev_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct net_device *dev = ptr; - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - - /* Drop any packets associated with the downed device */ - if (event == NETDEV_DOWN) - ipq_dev_drop(dev->ifindex); - return NOTIFY_DONE; -} - -static struct notifier_block ipq_dev_notifier = { - .notifier_call = ipq_rcv_dev_event, -}; - -static int -ipq_rcv_nl_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct netlink_notify *n = ptr; - - if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) { - spin_lock_bh(&queue_lock); - if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) - __ipq_reset(); - spin_unlock_bh(&queue_lock); - } - return NOTIFY_DONE; -} - -static struct notifier_block ipq_nl_notifier = { - .notifier_call = ipq_rcv_nl_event, -}; - -#ifdef CONFIG_SYSCTL -static struct ctl_table_header *ipq_sysctl_header; - -static ctl_table ipq_table[] = { - { - .procname = NET_IPQ_QMAX_NAME, - .data = &queue_maxlen, - .maxlen = sizeof(queue_maxlen), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { } -}; -#endif - -#ifdef CONFIG_PROC_FS -static int ip6_queue_show(struct seq_file *m, void *v) -{ - spin_lock_bh(&queue_lock); - - seq_printf(m, - "Peer PID : %d\n" - "Copy mode : %hu\n" - "Copy range : %u\n" - "Queue length : %u\n" - "Queue max. length : %u\n" - "Queue dropped : %u\n" - "Netfilter dropped : %u\n", - peer_pid, - copy_mode, - copy_range, - queue_total, - queue_maxlen, - queue_dropped, - queue_user_dropped); - - spin_unlock_bh(&queue_lock); - return 0; -} - -static int ip6_queue_open(struct inode *inode, struct file *file) -{ - return single_open(file, ip6_queue_show, NULL); -} - -static const struct file_operations ip6_queue_proc_fops = { - .open = ip6_queue_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; -#endif - -static const struct nf_queue_handler nfqh = { - .name = "ip6_queue", - .outfn = &ipq_enqueue_packet, -}; - -static int __init ip6_queue_init(void) -{ - int status = -ENOMEM; - struct proc_dir_entry *proc __maybe_unused; - - netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0, - ipq_rcv_skb, NULL, THIS_MODULE); - if (ipqnl == NULL) { - printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); - goto cleanup_netlink_notifier; - } - -#ifdef CONFIG_PROC_FS - proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net, - &ip6_queue_proc_fops); - if (!proc) { - printk(KERN_ERR "ip6_queue: failed to create proc entry\n"); - goto cleanup_ipqnl; - } -#endif - register_netdevice_notifier(&ipq_dev_notifier); -#ifdef CONFIG_SYSCTL - ipq_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path, ipq_table); -#endif - status = nf_register_queue_handler(NFPROTO_IPV6, &nfqh); - if (status < 0) { - printk(KERN_ERR "ip6_queue: failed to register queue handler\n"); - goto cleanup_sysctl; - } - return status; - -cleanup_sysctl: -#ifdef CONFIG_SYSCTL - unregister_sysctl_table(ipq_sysctl_header); -#endif - unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(&init_net, IPQ_PROC_FS_NAME); - -cleanup_ipqnl: __maybe_unused - netlink_kernel_release(ipqnl); - mutex_lock(&ipqnl_mutex); - mutex_unlock(&ipqnl_mutex); - -cleanup_netlink_notifier: - netlink_unregister_notifier(&ipq_nl_notifier); - return status; -} - -static void __exit ip6_queue_fini(void) -{ - nf_unregister_queue_handlers(&nfqh); - - ipq_flush(NULL, 0); - -#ifdef CONFIG_SYSCTL - unregister_sysctl_table(ipq_sysctl_header); -#endif - unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(&init_net, IPQ_PROC_FS_NAME); - - netlink_kernel_release(ipqnl); - mutex_lock(&ipqnl_mutex); - mutex_unlock(&ipqnl_mutex); - - netlink_unregister_notifier(&ipq_nl_notifier); -} - -MODULE_DESCRIPTION("IPv6 packet queue handler"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_IP6_FW); - -module_init(ip6_queue_init); -module_exit(ip6_queue_fini); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 94874b0bdcdc..d7cb04506c3d 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -78,19 +78,6 @@ EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table); Hence the start of any table is given by get_table() below. */ -/* Check for an extension */ -int -ip6t_ext_hdr(u8 nexthdr) -{ - return (nexthdr == IPPROTO_HOPOPTS) || - (nexthdr == IPPROTO_ROUTING) || - (nexthdr == IPPROTO_FRAGMENT) || - (nexthdr == IPPROTO_ESP) || - (nexthdr == IPPROTO_AH) || - (nexthdr == IPPROTO_NONE) || - (nexthdr == IPPROTO_DSTOPTS); -} - /* Returns whether matches rule or not. */ /* Performance critical - called for every packet */ static inline bool @@ -146,7 +133,7 @@ ip6_packet_match(const struct sk_buff *skb, int protohdr; unsigned short _frag_off; - protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off); + protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off, NULL); if (protohdr < 0) { if (_frag_off == 0) *hotdrop = true; @@ -194,8 +181,7 @@ ip6_checkentry(const struct ip6t_ip6 *ipv6) static unsigned int ip6t_error(struct sk_buff *skb, const struct xt_action_param *par) { - if (net_ratelimit()) - pr_info("error: `%s'\n", (const char *)par->targinfo); + net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo); return NF_DROP; } @@ -375,6 +361,7 @@ ip6t_do_table(struct sk_buff *skb, const struct xt_entry_match *ematch; IP_NF_ASSERT(e); + acpar.thoff = 0; if (!ip6_packet_match(skb, indev, outdev, &e->ipv6, &acpar.thoff, &acpar.fragoff, &acpar.hotdrop)) { no_match: @@ -409,7 +396,7 @@ ip6t_do_table(struct sk_buff *skb, if (v < 0) { /* Pop from stack? */ if (v != XT_RETURN) { - verdict = (unsigned)(-v) - 1; + verdict = (unsigned int)(-v) - 1; break; } if (*stackptr <= origptr) @@ -2291,6 +2278,10 @@ static void __exit ip6_tables_fini(void) * if target < 0. "last header" is transport protocol header, ESP, or * "No next header". * + * Note that *offset is used as input/output parameter. an if it is not zero, + * then it must be a valid offset to an inner IPv6 header. This can be used + * to explore inner IPv6 header, eg. ICMPv6 error messages. + * * If target header is found, its offset is set in *offset and return protocol * number. Otherwise, return -1. * @@ -2302,17 +2293,33 @@ static void __exit ip6_tables_fini(void) * *offset is meaningless and fragment offset is stored in *fragoff if fragoff * isn't NULL. * + * if flags is not NULL and it's a fragment, then the frag flag IP6T_FH_F_FRAG + * will be set. If it's an AH header, the IP6T_FH_F_AUTH flag is set and + * target < 0, then this function will stop at the AH header. */ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, - int target, unsigned short *fragoff) + int target, unsigned short *fragoff, int *flags) { unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); u8 nexthdr = ipv6_hdr(skb)->nexthdr; - unsigned int len = skb->len - start; + unsigned int len; if (fragoff) *fragoff = 0; + if (*offset) { + struct ipv6hdr _ip6, *ip6; + + ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6); + if (!ip6 || (ip6->version != 6)) { + printk(KERN_ERR "IPv6 header not found\n"); + return -EBADMSG; + } + start = *offset + sizeof(struct ipv6hdr); + nexthdr = ip6->nexthdr; + } + len = skb->len - start; + while (nexthdr != target) { struct ipv6_opt_hdr _hdr, *hp; unsigned int hdrlen; @@ -2329,6 +2336,9 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, if (nexthdr == NEXTHDR_FRAGMENT) { unsigned short _frag_off; __be16 *fp; + + if (flags) /* Indicate that this is a fragment */ + *flags |= IP6T_FH_F_FRAG; fp = skb_header_pointer(skb, start+offsetof(struct frag_hdr, frag_off), @@ -2349,9 +2359,11 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, return -ENOENT; } hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) + } else if (nexthdr == NEXTHDR_AUTH) { + if (flags && (*flags & IP6T_FH_F_AUTH) && (target < 0)) + break; hdrlen = (hp->hdrlen + 2) << 2; - else + } else hdrlen = ipv6_optlen(hp); nexthdr = hp->nexthdr; @@ -2366,7 +2378,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, EXPORT_SYMBOL(ip6t_register_table); EXPORT_SYMBOL(ip6t_unregister_table); EXPORT_SYMBOL(ip6t_do_table); -EXPORT_SYMBOL(ip6t_ext_hdr); EXPORT_SYMBOL(ipv6_find_hdr); module_init(ip6_tables_init); diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index aad2fa41cf46..fd4fb34c51c7 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -114,8 +114,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) GFP_ATOMIC); if (!nskb) { - if (net_ratelimit()) - pr_debug("cannot alloc skb\n"); + net_dbg_ratelimited("cannot alloc skb\n"); dst_release(dst); return; } @@ -210,8 +209,7 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) send_reset(net, skb); break; default: - if (net_ratelimit()) - pr_info("case %u not handled yet\n", reject->with); + net_info_ratelimited("case %u not handled yet\n", reject->with); break; } diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 89cccc5a9c92..04099ab7d2e3 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -41,11 +41,11 @@ static bool ah_mt6(const struct sk_buff *skb, struct xt_action_param *par) struct ip_auth_hdr _ah; const struct ip_auth_hdr *ah; const struct ip6t_ah *ahinfo = par->matchinfo; - unsigned int ptr; + unsigned int ptr = 0; unsigned int hdrlen = 0; int err; - err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL); + err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL, NULL); if (err < 0) { if (err != -ENOENT) par->hotdrop = true; diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index eda898fda6ca..3b5735e56bfe 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -40,10 +40,10 @@ frag_mt6(const struct sk_buff *skb, struct xt_action_param *par) struct frag_hdr _frag; const struct frag_hdr *fh; const struct ip6t_frag *fraginfo = par->matchinfo; - unsigned int ptr; + unsigned int ptr = 0; int err; - err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL); + err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL, NULL); if (err < 0) { if (err != -ENOENT) par->hotdrop = true; diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 59df051eaef6..01df142bb027 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -50,7 +50,7 @@ hbh_mt6(const struct sk_buff *skb, struct xt_action_param *par) const struct ipv6_opt_hdr *oh; const struct ip6t_opts *optinfo = par->matchinfo; unsigned int temp; - unsigned int ptr; + unsigned int ptr = 0; unsigned int hdrlen = 0; bool ret = false; u8 _opttype; @@ -62,7 +62,7 @@ hbh_mt6(const struct sk_buff *skb, struct xt_action_param *par) err = ipv6_find_hdr(skb, &ptr, (par->match == &hbh_mt6_reg[0]) ? - NEXTHDR_HOP : NEXTHDR_DEST, NULL); + NEXTHDR_HOP : NEXTHDR_DEST, NULL, NULL); if (err < 0) { if (err != -ENOENT) par->hotdrop = true; diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index d8488c50a8e0..2c99b94eeca3 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -42,14 +42,14 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) const struct ipv6_rt_hdr *rh; const struct ip6t_rt *rtinfo = par->matchinfo; unsigned int temp; - unsigned int ptr; + unsigned int ptr = 0; unsigned int hdrlen = 0; bool ret = false; struct in6_addr _addr; const struct in6_addr *ap; int err; - err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL); + err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL, NULL); if (err < 0) { if (err != -ENOENT) par->hotdrop = true; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 00d19173db7e..4d782405f125 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -42,8 +42,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr)) { - if (net_ratelimit()) - pr_warning("ip6t_hook: happy cracking.\n"); + net_warn_ratelimited("ip6t_hook: happy cracking\n"); return NF_ACCEPT; } #endif diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 4111050a9fc5..4794f96cf2e0 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -143,11 +143,11 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, return NF_ACCEPT; } -static unsigned int ipv6_confirm(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int ipv6_helper(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { struct nf_conn *ct; const struct nf_conn_help *help; @@ -161,15 +161,15 @@ static unsigned int ipv6_confirm(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED_REPLY) - goto out; + return NF_ACCEPT; help = nfct_help(ct); if (!help) - goto out; + return NF_ACCEPT; /* rcu_read_lock()ed by nf_hook_slow */ helper = rcu_dereference(help->helper); if (!helper) - goto out; + return NF_ACCEPT; protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, skb->len - extoff); @@ -179,12 +179,19 @@ static unsigned int ipv6_confirm(unsigned int hooknum, } ret = helper->help(skb, protoff, ct, ctinfo); - if (ret != NF_ACCEPT) { + if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) { nf_log_packet(NFPROTO_IPV6, hooknum, skb, in, out, NULL, "nf_ct_%s: dropping packet", helper->name); - return ret; } -out: + return ret; +} + +static unsigned int ipv6_confirm(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ /* We've seen it coming out the other side: confirm it */ return nf_conntrack_confirm(skb); } @@ -232,8 +239,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum, { /* root is playing with raw sockets. */ if (skb->len < sizeof(struct ipv6hdr)) { - if (net_ratelimit()) - pr_notice("ipv6_conntrack_local: packet too short\n"); + net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn); @@ -255,6 +261,13 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { .priority = NF_IP6_PRI_CONNTRACK, }, { + .hook = ipv6_helper, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP6_PRI_CONNTRACK_HELPER, + }, + { .hook = ipv6_confirm, .owner = THIS_MODULE, .pf = NFPROTO_IPV6, @@ -262,6 +275,13 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { .priority = NF_IP6_PRI_LAST, }, { + .hook = ipv6_helper, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP6_PRI_CONNTRACK_HELPER, + }, + { .hook = ipv6_confirm, .owner = THIS_MODULE, .pf = NFPROTO_IPV6, @@ -278,10 +298,11 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { static int ipv6_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { - NLA_PUT(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4, - &tuple->src.u3.ip6); - NLA_PUT(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4, - &tuple->dst.u3.ip6); + if (nla_put(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4, + &tuple->src.u3.ip6) || + nla_put(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4, + &tuple->dst.u3.ip6)) + goto nla_put_failure; return 0; nla_put_failure: @@ -333,37 +354,75 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6)); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>"); -static int __init nf_conntrack_l3proto_ipv6_init(void) +static int ipv6_net_init(struct net *net) { int ret = 0; - need_conntrack(); - nf_defrag_ipv6_enable(); - - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6); + ret = nf_conntrack_l4proto_register(net, + &nf_conntrack_l4proto_tcp6); if (ret < 0) { - pr_err("nf_conntrack_ipv6: can't register tcp.\n"); - return ret; + printk(KERN_ERR "nf_conntrack_l4proto_tcp6: protocol register failed\n"); + goto out; } - - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6); + ret = nf_conntrack_l4proto_register(net, + &nf_conntrack_l4proto_udp6); if (ret < 0) { - pr_err("nf_conntrack_ipv6: can't register udp.\n"); - goto cleanup_tcp; + printk(KERN_ERR "nf_conntrack_l4proto_udp6: protocol register failed\n"); + goto cleanup_tcp6; } - - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmpv6); + ret = nf_conntrack_l4proto_register(net, + &nf_conntrack_l4proto_icmpv6); if (ret < 0) { - pr_err("nf_conntrack_ipv6: can't register icmpv6.\n"); - goto cleanup_udp; + printk(KERN_ERR "nf_conntrack_l4proto_icmp6: protocol register failed\n"); + goto cleanup_udp6; } - - ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6); + ret = nf_conntrack_l3proto_register(net, + &nf_conntrack_l3proto_ipv6); if (ret < 0) { - pr_err("nf_conntrack_ipv6: can't register ipv6\n"); + printk(KERN_ERR "nf_conntrack_l3proto_ipv6: protocol register failed\n"); goto cleanup_icmpv6; } + return 0; + cleanup_icmpv6: + nf_conntrack_l4proto_unregister(net, + &nf_conntrack_l4proto_icmpv6); + cleanup_udp6: + nf_conntrack_l4proto_unregister(net, + &nf_conntrack_l4proto_udp6); + cleanup_tcp6: + nf_conntrack_l4proto_unregister(net, + &nf_conntrack_l4proto_tcp6); + out: + return ret; +} +static void ipv6_net_exit(struct net *net) +{ + nf_conntrack_l3proto_unregister(net, + &nf_conntrack_l3proto_ipv6); + nf_conntrack_l4proto_unregister(net, + &nf_conntrack_l4proto_icmpv6); + nf_conntrack_l4proto_unregister(net, + &nf_conntrack_l4proto_udp6); + nf_conntrack_l4proto_unregister(net, + &nf_conntrack_l4proto_tcp6); +} + +static struct pernet_operations ipv6_net_ops = { + .init = ipv6_net_init, + .exit = ipv6_net_exit, +}; + +static int __init nf_conntrack_l3proto_ipv6_init(void) +{ + int ret = 0; + + need_conntrack(); + nf_defrag_ipv6_enable(); + + ret = register_pernet_subsys(&ipv6_net_ops); + if (ret < 0) + goto cleanup_pernet; ret = nf_register_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); if (ret < 0) { @@ -374,13 +433,8 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) return ret; cleanup_ipv6: - nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6); - cleanup_icmpv6: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); - cleanup_udp: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6); - cleanup_tcp: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6); + unregister_pernet_subsys(&ipv6_net_ops); + cleanup_pernet: return ret; } @@ -388,10 +442,7 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void) { synchronize_net(); nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); - nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6); + unregister_pernet_subsys(&ipv6_net_ops); } module_init(nf_conntrack_l3proto_ipv6_init); diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 92cc9f2931ae..2d54b2061d68 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -29,6 +29,11 @@ static unsigned int nf_ct_icmpv6_timeout __read_mostly = 30*HZ; +static inline struct nf_icmp_net *icmpv6_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.icmpv6; +} + static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) @@ -90,7 +95,7 @@ static int icmpv6_print_tuple(struct seq_file *s, static unsigned int *icmpv6_get_timeouts(struct net *net) { - return &nf_ct_icmpv6_timeout; + return &icmpv6_pernet(net)->timeout; } /* Returns verdict for packet, or -1 for invalid. */ @@ -234,10 +239,10 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, static int icmpv6_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *t) { - NLA_PUT_BE16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id); - NLA_PUT_U8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type); - NLA_PUT_U8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code); - + if (nla_put_be16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id) || + nla_put_u8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type) || + nla_put_u8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code)) + goto nla_put_failure; return 0; nla_put_failure: @@ -281,16 +286,18 @@ static int icmpv6_nlattr_tuple_size(void) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_cttimeout.h> -static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) { unsigned int *timeout = data; + struct nf_icmp_net *in = icmpv6_pernet(net); if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) { *timeout = ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ; } else { /* Set default ICMPv6 timeout. */ - *timeout = nf_ct_icmpv6_timeout; + *timeout = in->timeout; } return 0; } @@ -300,8 +307,8 @@ icmpv6_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) { const unsigned int *timeout = data; - NLA_PUT_BE32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ)); - + if (nla_put_be32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ))) + goto nla_put_failure; return 0; nla_put_failure: @@ -315,11 +322,9 @@ icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = { #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL -static struct ctl_table_header *icmpv6_sysctl_header; static struct ctl_table icmpv6_sysctl_table[] = { { .procname = "nf_conntrack_icmpv6_timeout", - .data = &nf_ct_icmpv6_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -328,6 +333,36 @@ static struct ctl_table icmpv6_sysctl_table[] = { }; #endif /* CONFIG_SYSCTL */ +static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn, + struct nf_icmp_net *in) +{ +#ifdef CONFIG_SYSCTL + pn->ctl_table = kmemdup(icmpv6_sysctl_table, + sizeof(icmpv6_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_table) + return -ENOMEM; + + pn->ctl_table[0].data = &in->timeout; +#endif + return 0; +} + +static int icmpv6_init_net(struct net *net, u_int16_t proto) +{ + struct nf_icmp_net *in = icmpv6_pernet(net); + struct nf_proto_net *pn = &in->pn; + + in->timeout = nf_ct_icmpv6_timeout; + + return icmpv6_kmemdup_sysctl_table(pn, in); +} + +static struct nf_proto_net *icmpv6_get_net_proto(struct net *net) +{ + return &net->ct.nf_ct_proto.icmpv6.pn; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = { .l3proto = PF_INET6, @@ -355,8 +390,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = .nla_policy = icmpv6_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_header = &icmpv6_sysctl_header, - .ctl_table = icmpv6_sysctl_table, -#endif + .init_net = icmpv6_init_net, + .get_net_proto = icmpv6_get_net_proto, }; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 38f00b0298d3..c9c78c2e666b 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -444,12 +444,11 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) return head; out_oversize: - if (net_ratelimit()) - printk(KERN_DEBUG "nf_ct_frag6_reasm: payload len = %d\n", payload_len); + net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", + payload_len); goto out_fail; out_oom: - if (net_ratelimit()) - printk(KERN_DEBUG "nf_ct_frag6_reasm: no memory for reassembly\n"); + net_dbg_ratelimited("nf_ct_frag6_reasm: no memory for reassembly\n"); out_fail: return NULL; } @@ -626,8 +625,8 @@ int nf_ct_frag6_init(void) inet_frags_init(&nf_frags); #ifdef CONFIG_SYSCTL - nf_ct_frag6_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path, - nf_ct_frag6_sysctl_table); + nf_ct_frag6_sysctl_header = register_net_sysctl(&init_net, "net/netfilter", + nf_ct_frag6_sysctl_table); if (!nf_ct_frag6_sysctl_header) { inet_frags_fini(&nf_frags); return -ENOMEM; @@ -640,7 +639,7 @@ int nf_ct_frag6_init(void) void nf_ct_frag6_cleanup(void) { #ifdef CONFIG_SYSCTL - unregister_sysctl_table(nf_ct_frag6_sysctl_header); + unregister_net_sysctl_table(nf_ct_frag6_sysctl_header); nf_ct_frag6_sysctl_header = NULL; #endif inet_frags_fini(&nf_frags); diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index 9a7978fdc02a..053082dfc93e 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -29,9 +29,7 @@ const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly; int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) { - int hash = protocol & (MAX_INET_PROTOS - 1); - - return !cmpxchg((const struct inet6_protocol **)&inet6_protos[hash], + return !cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol], NULL, prot) ? 0 : -1; } EXPORT_SYMBOL(inet6_add_protocol); @@ -42,9 +40,9 @@ EXPORT_SYMBOL(inet6_add_protocol); int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol) { - int ret, hash = protocol & (MAX_INET_PROTOS - 1); + int ret; - ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[hash], + ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol], prot, NULL) == prot) ? 0 : -1; synchronize_net(); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 5bddea778840..ef0579d5bca6 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -72,7 +72,7 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, const struct in6_addr *rmt_addr, int dif) { struct hlist_node *node; - int is_multicast = ipv6_addr_is_multicast(loc_addr); + bool is_multicast = ipv6_addr_is_multicast(loc_addr); sk_for_each_from(sk, node) if (inet_sk(sk)->inet_num == num) { @@ -153,19 +153,19 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister); * * Caller owns SKB so we must make clones. */ -static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) +static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) { const struct in6_addr *saddr; const struct in6_addr *daddr; struct sock *sk; - int delivered = 0; + bool delivered = false; __u8 hash; struct net *net; saddr = &ipv6_hdr(skb)->saddr; daddr = saddr + 1; - hash = nexthdr & (MAX_INET_PROTOS - 1); + hash = nexthdr & (RAW_HTABLE_SIZE - 1); read_lock(&raw_v6_hashinfo.lock); sk = sk_head(&raw_v6_hashinfo.ht[hash]); @@ -179,7 +179,7 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) while (sk) { int filtered; - delivered = 1; + delivered = true; switch (nexthdr) { case IPPROTO_ICMPV6: filtered = icmpv6_filter(sk, skb); @@ -225,11 +225,11 @@ out: return delivered; } -int raw6_local_deliver(struct sk_buff *skb, int nexthdr) +bool raw6_local_deliver(struct sk_buff *skb, int nexthdr) { struct sock *raw_sk; - raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]); + raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (RAW_HTABLE_SIZE - 1)]); if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) raw_sk = NULL; @@ -328,9 +328,12 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, return; harderr = icmpv6_err_convert(type, code, &err); - if (type == ICMPV6_PKT_TOOBIG) + if (type == ICMPV6_PKT_TOOBIG) { + ip6_sk_update_pmtu(skb, sk, info); harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); - + } + if (type == NDISC_REDIRECT) + ip6_sk_redirect(skb, sk); if (np->recverr) { u8 *payload = skb->data; if (!inet->hdrincl) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 9447bd69873a..4ff9af628e72 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -134,15 +134,16 @@ static unsigned int ip6_hashfn(struct inet_frag_queue *q) return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr, ip6_frags.rnd); } -int ip6_frag_match(struct inet_frag_queue *q, void *a) +bool ip6_frag_match(struct inet_frag_queue *q, void *a) { struct frag_queue *fq; struct ip6_create_arg *arg = a; fq = container_of(q, struct frag_queue, q); - return (fq->id == arg->id && fq->user == arg->user && - ipv6_addr_equal(&fq->saddr, arg->src) && - ipv6_addr_equal(&fq->daddr, arg->dst)); + return fq->id == arg->id && + fq->user == arg->user && + ipv6_addr_equal(&fq->saddr, arg->src) && + ipv6_addr_equal(&fq->daddr, arg->dst); } EXPORT_SYMBOL(ip6_frag_match); @@ -414,6 +415,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, struct sk_buff *fp, *head = fq->q.fragments; int payload_len; unsigned int nhoff; + int sum_truesize; fq_kill(fq); @@ -433,7 +435,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, skb_morph(head, fq->q.fragments); head->next = fq->q.fragments->next; - kfree_skb(fq->q.fragments); + consume_skb(fq->q.fragments); fq->q.fragments = head; } @@ -483,20 +485,33 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->mac_header += sizeof(struct frag_hdr); head->network_header += sizeof(struct frag_hdr); - skb_shinfo(head)->frag_list = head->next; skb_reset_transport_header(head); skb_push(head, head->data - skb_network_header(head)); - for (fp=head->next; fp; fp = fp->next) { - head->data_len += fp->len; - head->len += fp->len; + sum_truesize = head->truesize; + for (fp = head->next; fp;) { + bool headstolen; + int delta; + struct sk_buff *next = fp->next; + + sum_truesize += fp->truesize; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); - head->truesize += fp->truesize; + + if (skb_try_coalesce(head, fp, &headstolen, &delta)) { + kfree_skb_partial(fp, headstolen); + } else { + if (!skb_shinfo(head)->frag_list) + skb_shinfo(head)->frag_list = fp; + head->data_len += fp->len; + head->len += fp->len; + head->truesize += fp->truesize; + } + fp = next; } - atomic_sub(head->truesize, &fq->q.net->mem); + atomic_sub(sum_truesize, &fq->q.net->mem); head->next = NULL; head->dev = dev; @@ -518,12 +533,10 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, return 1; out_oversize: - if (net_ratelimit()) - printk(KERN_DEBUG "ip6_frag_reasm: payload len = %d\n", payload_len); + net_dbg_ratelimited("ip6_frag_reasm: payload len = %d\n", payload_len); goto out_fail; out_oom: - if (net_ratelimit()) - printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n"); + net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n"); out_fail: rcu_read_lock(); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); @@ -646,7 +659,7 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net) table[2].data = &net->ipv6.frags.timeout; } - hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, table); + hdr = register_net_sysctl(net, "net/ipv6", table); if (hdr == NULL) goto err_reg; @@ -674,7 +687,7 @@ static struct ctl_table_header *ip6_ctl_header; static int ip6_frags_sysctl_register(void) { - ip6_ctl_header = register_net_sysctl_rotable(net_ipv6_ctl_path, + ip6_ctl_header = register_net_sysctl(&init_net, "net/ipv6", ip6_frags_ctl_table); return ip6_ctl_header == NULL ? -ENOMEM : 0; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 496b62712fe8..84f6564dd372 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -24,6 +24,8 @@ * Fixed routing subtrees. */ +#define pr_fmt(fmt) "IPv6: " fmt + #include <linux/capability.h> #include <linux/errno.h> #include <linux/export.h> @@ -62,7 +64,7 @@ #include <linux/sysctl.h> #endif -static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, +static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, const struct in6_addr *dest); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); @@ -76,13 +78,16 @@ static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); static int ip6_pkt_discard_out(struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); -static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); +static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu); +static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); #ifdef CONFIG_IPV6_ROUTE_INFO static struct rt6_info *rt6_add_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr, int ifindex, - unsigned pref); + unsigned int pref); static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr, int ifindex); @@ -97,10 +102,7 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) if (!(rt->dst.flags & DST_HOST)) return NULL; - if (!rt->rt6i_peer) - rt6_bind_peer(rt, 1); - - peer = rt->rt6i_peer; + peer = rt6_get_peer_create(rt); if (peer) { u32 *old_p = __DST_METRICS_PTR(old); unsigned long prev, new; @@ -121,21 +123,27 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) return p; } -static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr) +static inline const void *choose_neigh_daddr(struct rt6_info *rt, + struct sk_buff *skb, + const void *daddr) { struct in6_addr *p = &rt->rt6i_gateway; if (!ipv6_addr_any(p)) return (const void *) p; + else if (skb) + return &ipv6_hdr(skb)->daddr; return daddr; } -static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr) +static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) { struct rt6_info *rt = (struct rt6_info *) dst; struct neighbour *n; - daddr = choose_neigh_daddr(rt, daddr); + daddr = choose_neigh_daddr(rt, skb, daddr); n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr); if (n) return n; @@ -150,7 +158,7 @@ static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev) if (IS_ERR(n)) return PTR_ERR(n); } - dst_set_neighbour(&rt->dst, n); + rt->n = n; return 0; } @@ -169,6 +177,7 @@ static struct dst_ops ip6_dst_ops_template = { .negative_advice = ip6_negative_advice, .link_failure = ip6_link_failure, .update_pmtu = ip6_rt_update_pmtu, + .redirect = rt6_do_redirect, .local_out = __ip6_local_out, .neigh_lookup = ip6_neigh_lookup, }; @@ -180,7 +189,13 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) return mtu ? : dst->dev->mtu; } -static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) +{ +} + +static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } @@ -198,6 +213,7 @@ static struct dst_ops ip6_dst_blackhole_ops = { .mtu = ip6_blackhole_mtu, .default_advmss = ip6_default_advmss, .update_pmtu = ip6_rt_blackhole_update_pmtu, + .redirect = ip6_rt_blackhole_redirect, .cow_metrics = ip6_rt_blackhole_cow_metrics, .neigh_lookup = ip6_neigh_lookup, }; @@ -259,16 +275,20 @@ static struct rt6_info ip6_blk_hole_entry_template = { #endif /* allocate dst with ip6_dst_ops */ -static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops, +static inline struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, - int flags) + int flags, + struct fib6_table *table) { - struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags); + struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, + 0, 0, flags); - if (rt) - memset(&rt->rt6i_table, 0, - sizeof(*rt) - sizeof(struct dst_entry)); + if (rt) { + struct dst_entry *dst = &rt->dst; + memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); + rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); + } return rt; } @@ -276,7 +296,9 @@ static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; - struct inet_peer *peer = rt->rt6i_peer; + + if (rt->n) + neigh_release(rt->n); if (!(rt->dst.flags & DST_HOST)) dst_destroy_metrics_generic(dst); @@ -285,8 +307,12 @@ static void ip6_dst_destroy(struct dst_entry *dst) rt->rt6i_idev = NULL; in6_dev_put(idev); } - if (peer) { - rt->rt6i_peer = NULL; + + if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from) + dst_release(dst->from); + + if (rt6_has_peer(rt)) { + struct inet_peer *peer = rt6_peer_ptr(rt); inet_putpeer(peer); } } @@ -300,13 +326,20 @@ static u32 rt6_peer_genid(void) void rt6_bind_peer(struct rt6_info *rt, int create) { + struct inet_peer_base *base; struct inet_peer *peer; - peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); - if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) - inet_putpeer(peer); - else - rt->rt6i_peer_genid = rt6_peer_genid(); + base = inetpeer_base_ptr(rt->_rt6i_peer); + if (!base) + return; + + peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); + if (peer) { + if (!rt6_set_peer(rt, peer)) + inet_putpeer(peer); + else + rt->rt6i_peer_genid = rt6_peer_genid(); + } } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -317,23 +350,39 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, struct net_device *loopback_dev = dev_net(dev)->loopback_dev; - if (dev != loopback_dev && idev && idev->dev == dev) { - struct inet6_dev *loopback_idev = - in6_dev_get(loopback_dev); - if (loopback_idev) { - rt->rt6i_idev = loopback_idev; - in6_dev_put(idev); + if (dev != loopback_dev) { + if (idev && idev->dev == dev) { + struct inet6_dev *loopback_idev = + in6_dev_get(loopback_dev); + if (loopback_idev) { + rt->rt6i_idev = loopback_idev; + in6_dev_put(idev); + } + } + if (rt->n && rt->n->dev == dev) { + rt->n->dev = loopback_dev; + dev_hold(loopback_dev); + dev_put(dev); } } } -static __inline__ int rt6_check_expired(const struct rt6_info *rt) +static bool rt6_check_expired(const struct rt6_info *rt) { - return (rt->rt6i_flags & RTF_EXPIRES) && - time_after(jiffies, rt->dst.expires); + struct rt6_info *ort = NULL; + + if (rt->rt6i_flags & RTF_EXPIRES) { + if (time_after(jiffies, rt->dst.expires)) + return true; + } else if (rt->dst.from) { + ort = (struct rt6_info *) rt->dst.from; + return (ort->rt6i_flags & RTF_EXPIRES) && + time_after(jiffies, ort->dst.expires); + } + return false; } -static inline int rt6_need_strict(const struct in6_addr *daddr) +static bool rt6_need_strict(const struct in6_addr *daddr) { return ipv6_addr_type(daddr) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); @@ -403,7 +452,7 @@ static void rt6_probe(struct rt6_info *rt) * to no more than one per minute. */ rcu_read_lock(); - neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL; + neigh = rt ? rt->n : NULL; if (!neigh || (neigh->nud_state & NUD_VALID)) goto out; read_lock_bh(&neigh->lock); @@ -450,7 +499,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt) int m; rcu_read_lock(); - neigh = dst_get_neighbour_noref(&rt->dst); + neigh = rt->n; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) m = 1; @@ -620,12 +669,11 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); if (rt) { - if (!addrconf_finite_timeout(lifetime)) { - rt->rt6i_flags &= ~RTF_EXPIRES; - } else { - rt->dst.expires = jiffies + HZ * lifetime; - rt->rt6i_flags |= RTF_EXPIRES; - } + if (!addrconf_finite_timeout(lifetime)) + rt6_clean_expires(rt); + else + rt6_set_expires(rt, jiffies + HZ * lifetime); + dst_release(&rt->dst); } return 0; @@ -730,7 +778,7 @@ int ip6_ins_rt(struct rt6_info *rt) return __ip6_ins_rt(rt, &info); } -static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort, +static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, const struct in6_addr *daddr, const struct in6_addr *saddr) { @@ -782,9 +830,7 @@ static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort, goto retry; } - if (net_ratelimit()) - printk(KERN_WARNING - "ipv6: Neighbour table overflow.\n"); + net_warn_ratelimited("Neighbour table overflow\n"); dst_free(&rt->dst); return NULL; } @@ -800,7 +846,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, if (rt) { rt->rt6i_flags |= RTF_CACHE; - dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst))); + rt->n = neigh_clone(ort->n); } return rt; } @@ -834,7 +880,7 @@ restart: dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) nrt = rt6_alloc_clone(rt, &fl6->daddr); @@ -881,6 +927,16 @@ static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table * return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); } +static struct dst_entry *ip6_route_input_lookup(struct net *net, + struct net_device *dev, + struct flowi6 *fl6, int flags) +{ + if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) + flags |= RT6_LOOKUP_F_IFACE; + + return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input); +} + void ip6_route_input(struct sk_buff *skb) { const struct ipv6hdr *iph = ipv6_hdr(skb); @@ -895,10 +951,7 @@ void ip6_route_input(struct sk_buff *skb) .flowi6_proto = iph->nexthdr, }; - if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) - flags |= RT6_LOOKUP_F_IFACE; - - skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input)); + skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags)); } static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, @@ -912,6 +965,8 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk, { int flags = 0; + fl6->flowi6_iif = net->loopback_dev->ifindex; + if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) flags |= RT6_LOOKUP_F_IFACE; @@ -932,10 +987,11 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0); if (rt) { - memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); - new = &rt->dst; + memset(new + 1, 0, sizeof(*rt) - sizeof(*new)); + rt6_init_peer(rt, net->ipv6.peers); + new->__use = 1; new->input = dst_discard; new->output = dst_discard; @@ -947,10 +1003,10 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori rt->rt6i_idev = ort->rt6i_idev; if (rt->rt6i_idev) in6_dev_hold(rt->rt6i_idev); - rt->dst.expires = 0; rt->rt6i_gateway = ort->rt6i_gateway; - rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; + rt->rt6i_flags = ort->rt6i_flags; + rt6_clean_expires(rt); rt->rt6i_metric = 0; memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); @@ -977,7 +1033,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { if (rt->rt6i_peer_genid != rt6_peer_genid()) { - if (!rt->rt6i_peer) + if (!rt6_has_peer(rt)) rt6_bind_peer(rt, 0); rt->rt6i_peer_genid = rt6_peer_genid(); } @@ -1012,19 +1068,22 @@ static void ip6_link_failure(struct sk_buff *skb) rt = (struct rt6_info *) skb_dst(skb); if (rt) { - if (rt->rt6i_flags & RTF_CACHE) { - dst_set_expires(&rt->dst, 0); - rt->rt6i_flags |= RTF_EXPIRES; - } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) + if (rt->rt6i_flags & RTF_CACHE) + rt6_update_expires(rt, 0); + else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) rt->rt6i_node->fn_sernum = -1; } } -static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct rt6_info *rt6 = (struct rt6_info*)dst; + dst_confirm(dst); if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { + struct net *net = dev_net(dst->dev); + rt6->rt6i_flags |= RTF_MODIFIED; if (mtu < IPV6_MIN_MTU) { u32 features = dst_metric(dst, RTAX_FEATURES); @@ -1033,9 +1092,66 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) dst_metric_set(dst, RTAX_FEATURES, features); } dst_metric_set(dst, RTAX_MTU, mtu); + rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); } } +void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, + int oif, u32 mark) +{ + const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; + struct dst_entry *dst; + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = oif; + fl6.flowi6_mark = mark; + fl6.flowi6_flags = 0; + fl6.daddr = iph->daddr; + fl6.saddr = iph->saddr; + fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; + + dst = ip6_route_output(net, NULL, &fl6); + if (!dst->error) + ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu)); + dst_release(dst); +} +EXPORT_SYMBOL_GPL(ip6_update_pmtu); + +void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) +{ + ip6_update_pmtu(skb, sock_net(sk), mtu, + sk->sk_bound_dev_if, sk->sk_mark); +} +EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); + +void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) +{ + const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; + struct dst_entry *dst; + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = oif; + fl6.flowi6_mark = mark; + fl6.flowi6_flags = 0; + fl6.daddr = iph->daddr; + fl6.saddr = iph->saddr; + fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; + + dst = ip6_route_output(net, NULL, &fl6); + if (!dst->error) + rt6_do_redirect(dst, NULL, skb); + dst_release(dst); +} +EXPORT_SYMBOL_GPL(ip6_redirect); + +void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) +{ + ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); +} +EXPORT_SYMBOL_GPL(ip6_sk_redirect); + static unsigned int ip6_default_advmss(const struct dst_entry *dst) { struct net_device *dev = dst->dev; @@ -1092,7 +1208,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, if (unlikely(!idev)) return ERR_PTR(-ENODEV); - rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0); + rt = ip6_dst_alloc(net, dev, 0, NULL); if (unlikely(!rt)) { in6_dev_put(idev); dst = ERR_PTR(-ENOMEM); @@ -1102,7 +1218,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, if (neigh) neigh_hold(neigh); else { - neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr); + neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr); if (IS_ERR(neigh)) { in6_dev_put(idev); dst_free(&rt->dst); @@ -1112,7 +1228,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->dst.flags |= DST_HOST; rt->dst.output = ip6_output; - dst_set_neighbour(&rt->dst, neigh); + rt->n = neigh; atomic_set(&rt->dst.__refcnt, 1); rt->rt6i_dst.addr = fl6->daddr; rt->rt6i_dst.plen = 128; @@ -1264,7 +1380,7 @@ int ip6_route_add(struct fib6_config *cfg) !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { table = fib6_get_table(net, cfg->fc_table); if (!table) { - printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n"); + pr_warn("NLM_F_CREATE should be specified when creating new route\n"); table = fib6_new_table(net, cfg->fc_table); } } else { @@ -1274,7 +1390,7 @@ int ip6_route_add(struct fib6_config *cfg) if (!table) goto out; - rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT); + rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table); if (!rt) { err = -ENOMEM; @@ -1282,9 +1398,12 @@ int ip6_route_add(struct fib6_config *cfg) } rt->dst.obsolete = -1; - rt->dst.expires = (cfg->fc_flags & RTF_EXPIRES) ? - jiffies + clock_t_to_jiffies(cfg->fc_expires) : - 0; + + if (cfg->fc_flags & RTF_EXPIRES) + rt6_set_expires(rt, jiffies + + clock_t_to_jiffies(cfg->fc_expires)); + else + rt6_clean_expires(rt); if (cfg->fc_protocol == RTPROT_UNSPEC) cfg->fc_protocol = RTPROT_BOOT; @@ -1525,108 +1644,93 @@ static int ip6_route_del(struct fib6_config *cfg) return err; } -/* - * Handle redirects - */ -struct ip6rd_flowi { - struct flowi6 fl6; - struct in6_addr gateway; -}; - -static struct rt6_info *__ip6_route_redirect(struct net *net, - struct fib6_table *table, - struct flowi6 *fl6, - int flags) +static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) { - struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; - struct rt6_info *rt; - struct fib6_node *fn; + struct net *net = dev_net(skb->dev); + struct netevent_redirect netevent; + struct rt6_info *rt, *nrt = NULL; + const struct in6_addr *target; + struct ndisc_options ndopts; + const struct in6_addr *dest; + struct neighbour *old_neigh; + struct inet6_dev *in6_dev; + struct neighbour *neigh; + struct icmp6hdr *icmph; + int optlen, on_link; + u8 *lladdr; - /* - * Get the "current" route for this destination and - * check if the redirect has come from approriate router. - * - * RFC 2461 specifies that redirects should only be - * accepted if they come from the nexthop to the target. - * Due to the way the routes are chosen, this notion - * is a bit fuzzy and one might need to check all possible - * routes. - */ + optlen = skb->tail - skb->transport_header; + optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); - read_lock_bh(&table->tb6_lock); - fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); -restart: - for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { - /* - * Current route is on-link; redirect is always invalid. - * - * Seems, previous statement is not true. It could - * be node, which looks for us as on-link (f.e. proxy ndisc) - * But then router serving it might decide, that we should - * know truth 8)8) --ANK (980726). - */ - if (rt6_check_expired(rt)) - continue; - if (!(rt->rt6i_flags & RTF_GATEWAY)) - continue; - if (fl6->flowi6_oif != rt->dst.dev->ifindex) - continue; - if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) - continue; - break; + if (optlen < 0) { + net_dbg_ratelimited("rt6_do_redirect: packet too short\n"); + return; } - if (!rt) - rt = net->ipv6.ip6_null_entry; - BACKTRACK(net, &fl6->saddr); -out: - dst_hold(&rt->dst); + icmph = icmp6_hdr(skb); + target = (const struct in6_addr *) (icmph + 1); + dest = target + 1; - read_unlock_bh(&table->tb6_lock); + if (ipv6_addr_is_multicast(dest)) { + net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n"); + return; + } - return rt; -}; + on_link = 0; + if (ipv6_addr_equal(dest, target)) { + on_link = 1; + } else if (ipv6_addr_type(target) != + (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { + net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n"); + return; + } -static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest, - const struct in6_addr *src, - const struct in6_addr *gateway, - struct net_device *dev) -{ - int flags = RT6_LOOKUP_F_HAS_SADDR; - struct net *net = dev_net(dev); - struct ip6rd_flowi rdfl = { - .fl6 = { - .flowi6_oif = dev->ifindex, - .daddr = *dest, - .saddr = *src, - }, - }; + in6_dev = __in6_dev_get(skb->dev); + if (!in6_dev) + return; + if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) + return; - rdfl.gateway = *gateway; + /* RFC2461 8.1: + * The IP source address of the Redirect MUST be the same as the current + * first-hop router for the specified ICMP Destination Address. + */ - if (rt6_need_strict(dest)) - flags |= RT6_LOOKUP_F_IFACE; + if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) { + net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); + return; + } - return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6, - flags, __ip6_route_redirect); -} + lladdr = NULL; + if (ndopts.nd_opts_tgt_lladdr) { + lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, + skb->dev); + if (!lladdr) { + net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n"); + return; + } + } -void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, - const struct in6_addr *saddr, - struct neighbour *neigh, u8 *lladdr, int on_link) -{ - struct rt6_info *rt, *nrt = NULL; - struct netevent_redirect netevent; - struct net *net = dev_net(neigh->dev); + rt = (struct rt6_info *) dst; + if (rt == net->ipv6.ip6_null_entry) { + net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); + return; + } - rt = ip6_route_redirect(dest, src, saddr, neigh->dev); + /* Redirect received -> path was valid. + * Look, redirects are sent only in response to data packets, + * so that this nexthop apparently is reachable. --ANK + */ + dst_confirm(&rt->dst); - if (rt == net->ipv6.ip6_null_entry) { - if (net_ratelimit()) - printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " - "for redirect target\n"); + neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); + if (!neigh) + return; + + /* Duplicate redirect: silently ignore. */ + old_neigh = rt->n; + if (neigh == old_neigh) goto out; - } /* * We have finally decided to accept it. @@ -1639,17 +1743,6 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, NEIGH_UPDATE_F_ISROUTER)) ); - /* - * Redirect received -> path was valid. - * Look, redirects are sent only in response to data packets, - * so that this nexthop apparently is reachable. --ANK - */ - dst_confirm(&rt->dst); - - /* Duplicate redirect: silently ignore. */ - if (neigh == dst_get_neighbour_noref_raw(&rt->dst)) - goto out; - nrt = ip6_rt_copy(rt, dest); if (!nrt) goto out; @@ -1659,145 +1752,37 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, nrt->rt6i_flags &= ~RTF_GATEWAY; nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; - dst_set_neighbour(&nrt->dst, neigh_clone(neigh)); + nrt->n = neigh_clone(neigh); if (ip6_ins_rt(nrt)) goto out; netevent.old = &rt->dst; + netevent.old_neigh = old_neigh; netevent.new = &nrt->dst; + netevent.new_neigh = neigh; + netevent.daddr = dest; call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); if (rt->rt6i_flags & RTF_CACHE) { + rt = (struct rt6_info *) dst_clone(&rt->dst); ip6_del_rt(rt); - return; - } - -out: - dst_release(&rt->dst); -} - -/* - * Handle ICMP "packet too big" messages - * i.e. Path MTU discovery - */ - -static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr, - struct net *net, u32 pmtu, int ifindex) -{ - struct rt6_info *rt, *nrt; - int allfrag = 0; -again: - rt = rt6_lookup(net, daddr, saddr, ifindex, 0); - if (!rt) - return; - - if (rt6_check_expired(rt)) { - ip6_del_rt(rt); - goto again; - } - - if (pmtu >= dst_mtu(&rt->dst)) - goto out; - - if (pmtu < IPV6_MIN_MTU) { - /* - * According to RFC2460, PMTU is set to the IPv6 Minimum Link - * MTU (1280) and a fragment header should always be included - * after a node receiving Too Big message reporting PMTU is - * less than the IPv6 Minimum Link MTU. - */ - pmtu = IPV6_MIN_MTU; - allfrag = 1; - } - - /* New mtu received -> path was valid. - They are sent only in response to data packets, - so that this nexthop apparently is reachable. --ANK - */ - dst_confirm(&rt->dst); - - /* Host route. If it is static, it would be better - not to override it, but add new one, so that - when cache entry will expire old pmtu - would return automatically. - */ - if (rt->rt6i_flags & RTF_CACHE) { - dst_metric_set(&rt->dst, RTAX_MTU, pmtu); - if (allfrag) { - u32 features = dst_metric(&rt->dst, RTAX_FEATURES); - features |= RTAX_FEATURE_ALLFRAG; - dst_metric_set(&rt->dst, RTAX_FEATURES, features); - } - dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); - rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; - goto out; } - /* Network route. - Two cases are possible: - 1. It is connected route. Action: COW - 2. It is gatewayed route or NONEXTHOP route. Action: clone it. - */ - if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) - nrt = rt6_alloc_cow(rt, daddr, saddr); - else - nrt = rt6_alloc_clone(rt, daddr); - - if (nrt) { - dst_metric_set(&nrt->dst, RTAX_MTU, pmtu); - if (allfrag) { - u32 features = dst_metric(&nrt->dst, RTAX_FEATURES); - features |= RTAX_FEATURE_ALLFRAG; - dst_metric_set(&nrt->dst, RTAX_FEATURES, features); - } - - /* According to RFC 1981, detecting PMTU increase shouldn't be - * happened within 5 mins, the recommended timer is 10 mins. - * Here this route expiration time is set to ip6_rt_mtu_expires - * which is 10 mins. After 10 mins the decreased pmtu is expired - * and detecting PMTU increase will be automatically happened. - */ - dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); - nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; - - ip6_ins_rt(nrt); - } out: - dst_release(&rt->dst); -} - -void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr, - struct net_device *dev, u32 pmtu) -{ - struct net *net = dev_net(dev); - - /* - * RFC 1981 states that a node "MUST reduce the size of the packets it - * is sending along the path" that caused the Packet Too Big message. - * Since it's not possible in the general case to determine which - * interface was used to send the original packet, we update the MTU - * on the interface that will be used to send future packets. We also - * update the MTU on the interface that received the Packet Too Big in - * case the original packet was forced out that interface with - * SO_BINDTODEVICE or similar. This is the next best thing to the - * correct behaviour, which would be to update the MTU on all - * interfaces. - */ - rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0); - rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex); + neigh_release(neigh); } /* * Misc support functions */ -static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, +static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, const struct in6_addr *dest) { struct net *net = dev_net(ort->dst.dev); - struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, - ort->dst.dev, 0); + struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0, + ort->rt6i_table); if (rt) { rt->dst.input = ort->dst.input; @@ -1812,10 +1797,14 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, if (rt->rt6i_idev) in6_dev_hold(rt->rt6i_idev); rt->dst.lastuse = jiffies; - rt->dst.expires = 0; rt->rt6i_gateway = ort->rt6i_gateway; - rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; + rt->rt6i_flags = ort->rt6i_flags; + if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == + (RTF_DEFAULT | RTF_ADDRCONF)) + rt6_set_from(rt, ort); + else + rt6_clean_expires(rt); rt->rt6i_metric = 0; #ifdef CONFIG_IPV6_SUBTREES @@ -1863,7 +1852,7 @@ out: static struct rt6_info *rt6_add_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr, int ifindex, - unsigned pref) + unsigned int pref) { struct fib6_config cfg = { .fc_table = RT6_TABLE_INFO, @@ -2077,14 +2066,11 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, bool anycast) { struct net *net = dev_net(idev->dev); - struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, - net->loopback_dev, 0); + struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL); int err; if (!rt) { - if (net_ratelimit()) - pr_warning("IPv6: Maximum number of routes reached," - " consider increasing route/max_size.\n"); + net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n"); return ERR_PTR(-ENOMEM); } @@ -2193,10 +2179,9 @@ void rt6_ifdown(struct net *net, struct net_device *dev) icmp6_clean_all(fib6_ifdown, &adn); } -struct rt6_mtu_change_arg -{ +struct rt6_mtu_change_arg { struct net_device *dev; - unsigned mtu; + unsigned int mtu; }; static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) @@ -2238,7 +2223,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) return 0; } -void rt6_mtu_change(struct net_device *dev, unsigned mtu) +void rt6_mtu_change(struct net_device *dev, unsigned int mtu) { struct rt6_mtu_change_arg arg = { .dev = dev, @@ -2377,13 +2362,11 @@ static int rt6_fill_node(struct net *net, int iif, int type, u32 pid, u32 seq, int prefix, int nowait, unsigned int flags) { - const struct inet_peer *peer; struct rtmsg *rtm; struct nlmsghdr *nlh; long expires; u32 table; struct neighbour *n; - u32 ts, tsage; if (prefix) { /* user wants prefix routes only */ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -2406,7 +2389,8 @@ static int rt6_fill_node(struct net *net, else table = RT6_TABLE_UNSPEC; rtm->rtm_table = table; - NLA_PUT_U32(skb, RTA_TABLE, table); + if (nla_put_u32(skb, RTA_TABLE, table)) + goto nla_put_failure; if (rt->rt6i_flags & RTF_REJECT) rtm->rtm_type = RTN_UNREACHABLE; else if (rt->rt6i_flags & RTF_LOCAL) @@ -2420,25 +2404,31 @@ static int rt6_fill_node(struct net *net, rtm->rtm_protocol = rt->rt6i_protocol; if (rt->rt6i_flags & RTF_DYNAMIC) rtm->rtm_protocol = RTPROT_REDIRECT; - else if (rt->rt6i_flags & RTF_ADDRCONF) - rtm->rtm_protocol = RTPROT_KERNEL; - else if (rt->rt6i_flags & RTF_DEFAULT) - rtm->rtm_protocol = RTPROT_RA; + else if (rt->rt6i_flags & RTF_ADDRCONF) { + if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO)) + rtm->rtm_protocol = RTPROT_RA; + else + rtm->rtm_protocol = RTPROT_KERNEL; + } if (rt->rt6i_flags & RTF_CACHE) rtm->rtm_flags |= RTM_F_CLONED; if (dst) { - NLA_PUT(skb, RTA_DST, 16, dst); + if (nla_put(skb, RTA_DST, 16, dst)) + goto nla_put_failure; rtm->rtm_dst_len = 128; } else if (rtm->rtm_dst_len) - NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); + if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr)) + goto nla_put_failure; #ifdef CONFIG_IPV6_SUBTREES if (src) { - NLA_PUT(skb, RTA_SRC, 16, src); + if (nla_put(skb, RTA_SRC, 16, src)) + goto nla_put_failure; rtm->rtm_src_len = 128; - } else if (rtm->rtm_src_len) - NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); + } else if (rtm->rtm_src_len && + nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr)) + goto nla_put_failure; #endif if (iif) { #ifdef CONFIG_IPV6_MROUTE @@ -2456,24 +2446,27 @@ static int rt6_fill_node(struct net *net, } } else #endif - NLA_PUT_U32(skb, RTA_IIF, iif); + if (nla_put_u32(skb, RTA_IIF, iif)) + goto nla_put_failure; } else if (dst) { struct in6_addr saddr_buf; - if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0) - NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); + if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && + nla_put(skb, RTA_PREFSRC, 16, &saddr_buf)) + goto nla_put_failure; } if (rt->rt6i_prefsrc.plen) { struct in6_addr saddr_buf; saddr_buf = rt->rt6i_prefsrc.addr; - NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); + if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf)) + goto nla_put_failure; } if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; rcu_read_lock(); - n = dst_get_neighbour_noref(&rt->dst); + n = rt->n; if (n) { if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) { rcu_read_unlock(); @@ -2482,11 +2475,11 @@ static int rt6_fill_node(struct net *net, } rcu_read_unlock(); - if (rt->dst.dev) - NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); - - NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); - + if (rt->dst.dev && + nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) + goto nla_put_failure; + if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) + goto nla_put_failure; if (!(rt->rt6i_flags & RTF_EXPIRES)) expires = 0; else if (rt->dst.expires - jiffies < INT_MAX) @@ -2494,15 +2487,7 @@ static int rt6_fill_node(struct net *net, else expires = INT_MAX; - peer = rt->rt6i_peer; - ts = tsage = 0; - if (peer && peer->tcp_ts_stamp) { - ts = peer->tcp_ts; - tsage = get_seconds() - peer->tcp_ts_stamp; - } - - if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage, - expires, rt->dst.error) < 0) + if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -2537,7 +2522,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void struct sk_buff *skb; struct rtmsg *rtm; struct flowi6 fl6; - int err, iif = 0; + int err, iif = 0, oif = 0; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); if (err < 0) @@ -2564,19 +2549,34 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void iif = nla_get_u32(tb[RTA_IIF]); if (tb[RTA_OIF]) - fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]); + oif = nla_get_u32(tb[RTA_OIF]); if (iif) { struct net_device *dev; + int flags = 0; + dev = __dev_get_by_index(net, iif); if (!dev) { err = -ENODEV; goto errout; } + + fl6.flowi6_iif = iif; + + if (!ipv6_addr_any(&fl6.saddr)) + flags |= RT6_LOOKUP_F_HAS_SADDR; + + rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6, + flags); + } else { + fl6.flowi6_oif = oif; + + rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); } skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) { + dst_release(&rt->dst); err = -ENOBUFS; goto errout; } @@ -2587,7 +2587,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb_reset_mac_header(skb); skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); - rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6); skb_dst_set(skb, &rt->dst); err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, @@ -2681,7 +2680,7 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) seq_puts(m, "00000000000000000000000000000000 00 "); #endif rcu_read_lock(); - n = dst_get_neighbour_noref(&rt->dst); + n = rt->n; if (n) { seq_printf(m, "%pi6", n->primary_key); } else { @@ -2916,10 +2915,6 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; -#ifdef CONFIG_PROC_FS - proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); - proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); -#endif net->ipv6.ip6_rt_gc_expire = 30*HZ; ret = 0; @@ -2940,10 +2935,6 @@ out_ip6_dst_ops: static void __net_exit ip6_route_net_exit(struct net *net) { -#ifdef CONFIG_PROC_FS - proc_net_remove(net, "ipv6_route"); - proc_net_remove(net, "rt6_stats"); -#endif kfree(net->ipv6.ip6_null_entry); #ifdef CONFIG_IPV6_MULTIPLE_TABLES kfree(net->ipv6.ip6_prohibit_entry); @@ -2952,11 +2943,58 @@ static void __net_exit ip6_route_net_exit(struct net *net) dst_entries_destroy(&net->ipv6.ip6_dst_ops); } +static int __net_init ip6_route_net_init_late(struct net *net) +{ +#ifdef CONFIG_PROC_FS + proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); + proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); +#endif + return 0; +} + +static void __net_exit ip6_route_net_exit_late(struct net *net) +{ +#ifdef CONFIG_PROC_FS + proc_net_remove(net, "ipv6_route"); + proc_net_remove(net, "rt6_stats"); +#endif +} + static struct pernet_operations ip6_route_net_ops = { .init = ip6_route_net_init, .exit = ip6_route_net_exit, }; +static int __net_init ipv6_inetpeer_init(struct net *net) +{ + struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); + + if (!bp) + return -ENOMEM; + inet_peer_base_init(bp); + net->ipv6.peers = bp; + return 0; +} + +static void __net_exit ipv6_inetpeer_exit(struct net *net) +{ + struct inet_peer_base *bp = net->ipv6.peers; + + net->ipv6.peers = NULL; + inetpeer_invalidate_tree(bp); + kfree(bp); +} + +static struct pernet_operations ipv6_inetpeer_ops = { + .init = ipv6_inetpeer_init, + .exit = ipv6_inetpeer_exit, +}; + +static struct pernet_operations ip6_route_net_late_ops = { + .init = ip6_route_net_init_late, + .exit = ip6_route_net_exit_late, +}; + static struct notifier_block ip6_route_dev_notifier = { .notifier_call = ip6_route_dev_notify, .priority = 0, @@ -2977,10 +3015,14 @@ int __init ip6_route_init(void) if (ret) goto out_kmem_cache; - ret = register_pernet_subsys(&ip6_route_net_ops); + ret = register_pernet_subsys(&ipv6_inetpeer_ops); if (ret) goto out_dst_entries; + ret = register_pernet_subsys(&ip6_route_net_ops); + if (ret) + goto out_register_inetpeer; + ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; /* Registering of the loopback is done before this portion of code, @@ -3006,19 +3048,25 @@ int __init ip6_route_init(void) if (ret) goto xfrm6_init; + ret = register_pernet_subsys(&ip6_route_net_late_ops); + if (ret) + goto fib6_rules_init; + ret = -ENOBUFS; if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) || __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) || __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) - goto fib6_rules_init; + goto out_register_late_subsys; ret = register_netdevice_notifier(&ip6_route_dev_notifier); if (ret) - goto fib6_rules_init; + goto out_register_late_subsys; out: return ret; +out_register_late_subsys: + unregister_pernet_subsys(&ip6_route_net_late_ops); fib6_rules_init: fib6_rules_cleanup(); xfrm6_init: @@ -3027,6 +3075,8 @@ out_fib6_init: fib6_gc_cleanup(); out_register_subsys: unregister_pernet_subsys(&ip6_route_net_ops); +out_register_inetpeer: + unregister_pernet_subsys(&ipv6_inetpeer_ops); out_dst_entries: dst_entries_destroy(&ip6_dst_blackhole_ops); out_kmem_cache: @@ -3037,9 +3087,11 @@ out_kmem_cache: void ip6_route_cleanup(void) { unregister_netdevice_notifier(&ip6_route_dev_notifier); + unregister_pernet_subsys(&ip6_route_net_late_ops); fib6_rules_cleanup(); xfrm6_fini(); fib6_gc_cleanup(); + unregister_pernet_subsys(&ipv6_inetpeer_ops); unregister_pernet_subsys(&ip6_route_net_ops); dst_entries_destroy(&ip6_dst_blackhole_ops); kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index c4ffd1743528..3bd1bfc01f85 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -17,6 +17,8 @@ * Fred Templin <fred.l.templin@boeing.com>: isatap support */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> @@ -87,35 +89,51 @@ struct sit_net { /* often modified stats are per cpu, other are shared (netdev->stats) */ struct pcpu_tstats { - unsigned long rx_packets; - unsigned long rx_bytes; - unsigned long tx_packets; - unsigned long tx_bytes; -} __attribute__((aligned(4*sizeof(unsigned long)))); + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + struct u64_stats_sync syncp; +}; -static struct net_device_stats *ipip6_get_stats(struct net_device *dev) +static struct rtnl_link_stats64 *ipip6_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot) { - struct pcpu_tstats sum = { 0 }; int i; for_each_possible_cpu(i) { const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - - sum.rx_packets += tstats->rx_packets; - sum.rx_bytes += tstats->rx_bytes; - sum.tx_packets += tstats->tx_packets; - sum.tx_bytes += tstats->tx_bytes; + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; + unsigned int start; + + do { + start = u64_stats_fetch_begin_bh(&tstats->syncp); + rx_packets = tstats->rx_packets; + tx_packets = tstats->tx_packets; + rx_bytes = tstats->rx_bytes; + tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + + tot->rx_packets += rx_packets; + tot->tx_packets += tx_packets; + tot->rx_bytes += rx_bytes; + tot->tx_bytes += tx_bytes; } - dev->stats.rx_packets = sum.rx_packets; - dev->stats.rx_bytes = sum.rx_bytes; - dev->stats.tx_packets = sum.tx_packets; - dev->stats.tx_bytes = sum.tx_bytes; - return &dev->stats; + + tot->rx_errors = dev->stats.rx_errors; + tot->tx_fifo_errors = dev->stats.tx_fifo_errors; + tot->tx_carrier_errors = dev->stats.tx_carrier_errors; + tot->tx_dropped = dev->stats.tx_dropped; + tot->tx_aborted_errors = dev->stats.tx_aborted_errors; + tot->tx_errors = dev->stats.tx_errors; + + return tot; } + /* * Must be invoked with rcu_read_lock */ -static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net, +static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net, struct net_device *dev, __be32 remote, __be32 local) { unsigned int h0 = HASH(remote); @@ -509,9 +527,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) case ICMP_PORT_UNREACH: /* Impossible event. */ return 0; - case ICMP_FRAG_NEEDED: - /* Soft state for pmtu is maintained by IP core. */ - return 0; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, @@ -524,6 +539,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info) if (code != ICMP_EXC_TTL) return 0; break; + case ICMP_REDIRECT: + break; } err = -ENOENT; @@ -533,7 +550,23 @@ static int ipip6_err(struct sk_buff *skb, u32 info) skb->dev, iph->daddr, iph->saddr); - if (t == NULL || t->parms.iph.daddr == 0) + if (t == NULL) + goto out; + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { + ipv4_update_pmtu(skb, dev_net(skb->dev), info, + t->dev->ifindex, 0, IPPROTO_IPV6, 0); + err = 0; + goto out; + } + if (type == ICMP_REDIRECT) { + ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0, + IPPROTO_IPV6, 0); + err = 0; + goto out; + } + + if (t->parms.iph.daddr == 0) goto out; err = 0; @@ -686,12 +719,11 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (neigh == NULL) { - if (net_ratelimit()) - printk(KERN_DEBUG "sit: nexthop == NULL\n"); + net_dbg_ratelimited("sit: nexthop == NULL\n"); goto tx_error; } - addr6 = (const struct in6_addr*)&neigh->primary_key; + addr6 = (const struct in6_addr *)&neigh->primary_key; addr_type = ipv6_addr_type(addr6); if ((addr_type & IPV6_ADDR_UNICAST) && @@ -716,12 +748,11 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (neigh == NULL) { - if (net_ratelimit()) - printk(KERN_DEBUG "sit: nexthop == NULL\n"); + net_dbg_ratelimited("sit: nexthop == NULL\n"); goto tx_error; } - addr6 = (const struct in6_addr*)&neigh->primary_key; + addr6 = (const struct in6_addr *)&neigh->primary_key; addr_type = ipv6_addr_type(addr6); if (addr_type == IPV6_ADDR_ANY) { @@ -776,7 +807,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (tunnel->parms.iph.daddr && skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->len > mtu) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); @@ -1126,7 +1157,7 @@ static const struct net_device_ops ipip6_netdev_ops = { .ndo_start_xmit = ipip6_tunnel_xmit, .ndo_do_ioctl = ipip6_tunnel_ioctl, .ndo_change_mtu = ipip6_tunnel_change_mtu, - .ndo_get_stats = ipip6_get_stats, + .ndo_get_stats64= ipip6_get_stats64, }; static void ipip6_dev_free(struct net_device *dev) @@ -1287,7 +1318,7 @@ static int __init sit_init(void) { int err; - printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n"); + pr_info("IPv6 over IPv4 tunneling driver\n"); err = register_pernet_device(&sit_net_ops); if (err < 0) @@ -1295,7 +1326,7 @@ static int __init sit_init(void) err = xfrm4_tunnel_register(&sit_handler, AF_INET6); if (err < 0) { unregister_pernet_device(&sit_net_ops); - printk(KERN_INFO "sit init: Can't add protocol\n"); + pr_info("%s: can't add protocol\n", __func__); } return err; } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 8e951d8d3b81..bb46061c813a 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -21,9 +21,6 @@ #include <net/ipv6.h> #include <net/tcp.h> -extern int sysctl_tcp_syncookies; -extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; - #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) @@ -180,7 +177,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, &hash_location, 0); + tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) goto out; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 166a57c47d39..e85c48bd404f 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -16,32 +16,8 @@ #include <net/addrconf.h> #include <net/inet_frag.h> -static struct ctl_table empty[1]; - -static ctl_table ipv6_static_skeleton[] = { - { - .procname = "neigh", - .maxlen = 0, - .mode = 0555, - .child = empty, - }, - { } -}; - static ctl_table ipv6_table_template[] = { { - .procname = "route", - .maxlen = 0, - .mode = 0555, - .child = ipv6_route_table_template - }, - { - .procname = "icmp", - .maxlen = 0, - .mode = 0555, - .child = ipv6_icmp_table_template - }, - { .procname = "bindv6only", .data = &init_net.ipv6.sysctl.bindv6only, .maxlen = sizeof(int), @@ -62,13 +38,6 @@ static ctl_table ipv6_rotable[] = { { } }; -struct ctl_path net_ipv6_ctl_path[] = { - { .procname = "net", }, - { .procname = "ipv6", }, - { }, -}; -EXPORT_SYMBOL_GPL(net_ipv6_ctl_path); - static int __net_init ipv6_sysctl_net_init(struct net *net) { struct ctl_table *ipv6_table; @@ -81,28 +50,37 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) GFP_KERNEL); if (!ipv6_table) goto out; + ipv6_table[0].data = &net->ipv6.sysctl.bindv6only; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) goto out_ipv6_table; - ipv6_table[0].child = ipv6_route_table; ipv6_icmp_table = ipv6_icmp_sysctl_init(net); if (!ipv6_icmp_table) goto out_ipv6_route_table; - ipv6_table[1].child = ipv6_icmp_table; - ipv6_table[2].data = &net->ipv6.sysctl.bindv6only; - - net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path, - ipv6_table); - if (!net->ipv6.sysctl.table) + net->ipv6.sysctl.hdr = register_net_sysctl(net, "net/ipv6", ipv6_table); + if (!net->ipv6.sysctl.hdr) goto out_ipv6_icmp_table; + net->ipv6.sysctl.route_hdr = + register_net_sysctl(net, "net/ipv6/route", ipv6_route_table); + if (!net->ipv6.sysctl.route_hdr) + goto out_unregister_ipv6_table; + + net->ipv6.sysctl.icmp_hdr = + register_net_sysctl(net, "net/ipv6/icmp", ipv6_icmp_table); + if (!net->ipv6.sysctl.icmp_hdr) + goto out_unregister_route_table; + err = 0; out: return err; - +out_unregister_route_table: + unregister_net_sysctl_table(net->ipv6.sysctl.route_hdr); +out_unregister_ipv6_table: + unregister_net_sysctl_table(net->ipv6.sysctl.hdr); out_ipv6_icmp_table: kfree(ipv6_icmp_table); out_ipv6_route_table: @@ -118,11 +96,13 @@ static void __net_exit ipv6_sysctl_net_exit(struct net *net) struct ctl_table *ipv6_route_table; struct ctl_table *ipv6_icmp_table; - ipv6_table = net->ipv6.sysctl.table->ctl_table_arg; - ipv6_route_table = ipv6_table[0].child; - ipv6_icmp_table = ipv6_table[1].child; + ipv6_table = net->ipv6.sysctl.hdr->ctl_table_arg; + ipv6_route_table = net->ipv6.sysctl.route_hdr->ctl_table_arg; + ipv6_icmp_table = net->ipv6.sysctl.icmp_hdr->ctl_table_arg; - unregister_net_sysctl_table(net->ipv6.sysctl.table); + unregister_net_sysctl_table(net->ipv6.sysctl.icmp_hdr); + unregister_net_sysctl_table(net->ipv6.sysctl.route_hdr); + unregister_net_sysctl_table(net->ipv6.sysctl.hdr); kfree(ipv6_table); kfree(ipv6_route_table); @@ -140,7 +120,7 @@ int ipv6_sysctl_register(void) { int err = -ENOMEM; - ip6_header = register_net_sysctl_rotable(net_ipv6_ctl_path, ipv6_rotable); + ip6_header = register_net_sysctl(&init_net, "net/ipv6", ipv6_rotable); if (ip6_header == NULL) goto out; @@ -160,18 +140,3 @@ void ipv6_sysctl_unregister(void) unregister_net_sysctl_table(ip6_header); unregister_pernet_subsys(&ipv6_sysctl_net_ops); } - -static struct ctl_table_header *ip6_base; - -int ipv6_static_sysctl_register(void) -{ - ip6_base = register_sysctl_paths(net_ipv6_ctl_path, ipv6_static_skeleton); - if (ip6_base == NULL) - return -ENOMEM; - return 0; -} - -void ipv6_static_sysctl_unregister(void) -{ - unregister_net_sysctl_table(ip6_base); -} diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 12c6ece67f39..0302ec3fecfc 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -277,22 +277,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, rt = (struct rt6_info *) dst; if (tcp_death_row.sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && - ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) { - struct inet_peer *peer = rt6_get_peer(rt); - /* - * VJ's idea. We save last timestamp seen from - * the destination in peer table, when entering state - * TIME-WAIT * and initialize rx_opt.ts_recent from it, - * when trying new connection. - */ - if (peer) { - inet_peer_refcheck(peer); - if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { - tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; - tp->rx_opt.ts_recent = peer->tcp_ts; - } - } - } + ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) + tcp_fetch_timewait_stamp(sk, dst); icsk->icsk_ext_hdr_len = 0; if (np->opt) @@ -377,6 +363,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); + if (type == NDISC_REDIRECT) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + + if (dst) + dst->ops->redirect(dst, sk, skb); + } + if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst; @@ -385,41 +378,14 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) goto out; - /* icmp should have updated the destination cache entry */ - dst = __sk_dst_check(sk, np->dst_cookie); - - if (dst == NULL) { - struct inet_sock *inet = inet_sk(sk); - struct flowi6 fl6; - - /* BUGGG_FUTURE: Again, it is not clear how - to handle rthdr case. Ignore this complexity - for now. - */ - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_TCP; - fl6.daddr = np->daddr; - fl6.saddr = np->saddr; - fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; - fl6.fl6_dport = inet->inet_dport; - fl6.fl6_sport = inet->inet_sport; - security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); - - dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); - if (IS_ERR(dst)) { - sk->sk_err_soft = -PTR_ERR(dst); - goto out; - } - - } else - dst_hold(dst); + dst = inet6_csk_update_pmtu(sk, ntohl(info)); + if (!dst) + goto out; if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { tcp_sync_mss(sk, dst_mtu(dst)); tcp_simple_retransmit(sk); - } /* else let the usual retransmit timer handle it */ - dst_release(dst); + } goto out; } @@ -475,60 +441,43 @@ out: } -static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, - struct request_values *rvp) +static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, + struct flowi6 *fl6, + struct request_sock *req, + struct request_values *rvp, + u16 queue_mapping) { struct inet6_request_sock *treq = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff * skb; - struct ipv6_txoptions *opt = NULL; - struct in6_addr * final_p, final; - struct flowi6 fl6; - struct dst_entry *dst; - int err; - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_TCP; - fl6.daddr = treq->rmt_addr; - fl6.saddr = treq->loc_addr; - fl6.flowlabel = 0; - fl6.flowi6_oif = treq->iif; - fl6.flowi6_mark = sk->sk_mark; - fl6.fl6_dport = inet_rsk(req)->rmt_port; - fl6.fl6_sport = inet_rsk(req)->loc_port; - security_req_classify_flow(req, flowi6_to_flowi(&fl6)); - - opt = np->opt; - final_p = fl6_update_dst(&fl6, opt, &final); + int err = -ENOMEM; - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); - if (IS_ERR(dst)) { - err = PTR_ERR(dst); - dst = NULL; + /* First, grab a route. */ + if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) goto done; - } + skb = tcp_make_synack(sk, dst, req, rvp); - err = -ENOMEM; + if (skb) { __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); - fl6.daddr = treq->rmt_addr; - err = ip6_xmit(sk, skb, &fl6, opt, np->tclass); + fl6->daddr = treq->rmt_addr; + skb_set_queue_mapping(skb, queue_mapping); + err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass); err = net_xmit_eval(err); } done: - if (opt && opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); - dst_release(dst); return err; } static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req, struct request_values *rvp) { + struct flowi6 fl6; + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - return tcp_v6_send_synack(sk, req, rvp); + return tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0); } static void tcp_v6_reqsk_destructor(struct request_sock *req) @@ -723,12 +672,10 @@ static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) NULL, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { - if (net_ratelimit()) { - printk(KERN_INFO "MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n", - genhash ? "failed" : "mismatch", - &ip6h->saddr, ntohs(th->source), - &ip6h->daddr, ntohs(th->dest)); - } + net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n", + genhash ? "failed" : "mismatch", + &ip6h->saddr, ntohs(th->source), + &ip6h->daddr, ntohs(th->dest)); return 1; } return 0; @@ -1057,7 +1004,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); __u32 isn = TCP_SKB_CB(skb)->when; struct dst_entry *dst = NULL; - int want_cookie = 0; + struct flowi6 fl6; + bool want_cookie = false; if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_conn_request(sk, skb); @@ -1085,7 +1033,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, &hash_location, 0); + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); if (tmp_opt.cookie_plus > 0 && tmp_opt.saw_tstamp && @@ -1118,7 +1066,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) while (l-- > 0) *c++ ^= *hash_location++; - want_cookie = 0; /* not our kind of cookie */ + want_cookie = false; /* not our kind of cookie */ tmp_ext.cookie_out_never = 0; /* false */ tmp_ext.cookie_plus = tmp_opt.cookie_plus; } else if (!tp->rx_opt.cookie_in_always) { @@ -1140,7 +1088,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) treq->rmt_addr = ipv6_hdr(skb)->saddr; treq->loc_addr = ipv6_hdr(skb)->daddr; if (!want_cookie || tmp_opt.tstamp_ok) - TCP_ECN_create_request(req, tcp_hdr(skb)); + TCP_ECN_create_request(req, skb); treq->iif = sk->sk_bound_dev_if; @@ -1150,8 +1098,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) treq->iif = inet6_iif(skb); if (!isn) { - struct inet_peer *peer = NULL; - if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { @@ -1176,14 +1122,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) */ if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle && - (dst = inet6_csk_route_req(sk, req)) != NULL && - (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL && - ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6, - &treq->rmt_addr)) { - inet_peer_refcheck(peer); - if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && - (s32)(peer->tcp_ts - req->ts_recent) > - TCP_PAWS_WINDOW) { + (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) { + if (!tcp_peer_is_proven(req, dst, true)) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); goto drop_and_release; } @@ -1192,8 +1132,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) else if (!sysctl_tcp_syncookies && (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < (sysctl_max_syn_backlog >> 2)) && - (!peer || !peer->tcp_ts_stamp) && - (!dst || !dst_metric(dst, RTAX_RTT))) { + !tcp_peer_is_proven(req, dst, false)) { /* Without syncookies last quarter of * backlog is filled with destinations, * proven to be alive. @@ -1212,10 +1151,12 @@ have_isn: tcp_rsk(req)->snt_isn = isn; tcp_rsk(req)->snt_synack = tcp_time_stamp; - security_inet_conn_request(sk, skb, req); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_release; - if (tcp_v6_send_synack(sk, req, - (struct request_values *)&tmp_ext) || + if (tcp_v6_send_synack(sk, dst, &fl6, req, + (struct request_values *)&tmp_ext, + skb_get_queue_mapping(skb)) || want_cookie) goto drop_and_free; @@ -1240,10 +1181,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct inet_sock *newinet; struct tcp_sock *newtp; struct sock *newsk; - struct ipv6_txoptions *opt; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; #endif + struct flowi6 fl6; if (skb->protocol == htons(ETH_P_IP)) { /* @@ -1300,13 +1241,12 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, } treq = inet6_rsk(req); - opt = np->opt; if (sk_acceptq_is_full(sk)) goto out_overflow; if (!dst) { - dst = inet6_csk_route_req(sk, req); + dst = inet6_csk_route_req(sk, &fl6, req); if (!dst) goto out; } @@ -1353,7 +1293,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->pktoptions = NULL; if (treq->pktopts != NULL) { newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC); - kfree_skb(treq->pktopts); + consume_skb(treq->pktopts); treq->pktopts = NULL; if (newnp->pktoptions) skb_set_owner_r(newnp->pktoptions, newsk); @@ -1369,11 +1309,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, but we make one more one thing there: reattach optmem to newsk. */ - if (opt) { - newnp->opt = ipv6_dup_options(newsk, opt); - if (opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); - } + if (np->opt) + newnp->opt = ipv6_dup_options(newsk, np->opt); inet_csk(newsk)->icsk_ext_hdr_len = 0; if (newnp->opt) @@ -1383,6 +1320,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_mtup_init(newsk); tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric_advmss(dst); + if (tcp_sk(sk)->rx_opt.user_mss && + tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) + newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; + tcp_initialize_rcv_mss(newsk); if (tcp_rsk(req)->snt_synack) tcp_valid_rtt_meas(newsk, @@ -1416,8 +1357,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, out_overflow: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); out_nonewsk: - if (opt && opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); dst_release(dst); out: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); @@ -1645,7 +1584,7 @@ process: #ifdef CONFIG_NET_DMA struct tcp_sock *tp = tcp_sk(sk); if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) - tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY); + tp->ucopy.dma_chan = net_dma_find_channel(); if (tp->ucopy.dma_chan) ret = tcp_v6_do_rcv(sk, skb); else @@ -1654,7 +1593,8 @@ process: if (!tcp_prequeue(sk, skb)) ret = tcp_v6_do_rcv(sk, skb); } - } else if (unlikely(sk_add_backlog(sk, skb))) { + } else if (unlikely(sk_add_backlog(sk, skb, + sk->sk_rcvbuf + sk->sk_sndbuf))) { bh_unlock_sock(sk); NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); goto discard_and_relse; @@ -1727,42 +1667,10 @@ do_time_wait: goto discard_it; } -static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) -{ - struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - struct inet_peer *peer; - - if (!rt || - !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) { - peer = inet_getpeer_v6(&np->daddr, 1); - *release_it = true; - } else { - if (!rt->rt6i_peer) - rt6_bind_peer(rt, 1); - peer = rt->rt6i_peer; - *release_it = false; - } - - return peer; -} - -static void *tcp_v6_tw_get_peer(struct sock *sk) -{ - const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); - const struct inet_timewait_sock *tw = inet_twsk(sk); - - if (tw->tw_family == AF_INET) - return tcp_v4_tw_get_peer(sk); - - return inet_getpeer_v6(&tw6->tw_v6_daddr, 1); -} - static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), .twsk_unique = tcp_twsk_unique, .twsk_destructor= tcp_twsk_destructor, - .twsk_getpeer = tcp_v6_tw_get_peer, }; static const struct inet_connection_sock_af_ops ipv6_specific = { @@ -1771,8 +1679,8 @@ static const struct inet_connection_sock_af_ops ipv6_specific = { .rebuild_header = inet6_sk_rebuild_header, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, - .get_peer = tcp_v6_get_peer, .net_header_len = sizeof(struct ipv6hdr), + .net_frag_header_len = sizeof(struct frag_hdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, @@ -1802,7 +1710,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .rebuild_header = inet_sk_rebuild_header, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, - .get_peer = tcp_v4_get_peer, .net_header_len = sizeof(struct iphdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, @@ -1829,64 +1736,15 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { static int tcp_v6_init_sock(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); - - skb_queue_head_init(&tp->out_of_order_queue); - tcp_init_xmit_timers(sk); - tcp_prequeue_init(tp); - - icsk->icsk_rto = TCP_TIMEOUT_INIT; - tp->mdev = TCP_TIMEOUT_INIT; - - /* So many TCP implementations out there (incorrectly) count the - * initial SYN frame in their delayed-ACK and congestion control - * algorithms that we must have the following bandaid to talk - * efficiently to them. -DaveM - */ - tp->snd_cwnd = 2; - - /* See draft-stevens-tcpca-spec-01 for discussion of the - * initialization of these values. - */ - tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; - tp->snd_cwnd_clamp = ~0; - tp->mss_cache = TCP_MSS_DEFAULT; - - tp->reordering = sysctl_tcp_reordering; - sk->sk_state = TCP_CLOSE; + tcp_init_sock(sk); icsk->icsk_af_ops = &ipv6_specific; - icsk->icsk_ca_ops = &tcp_init_congestion_ops; - icsk->icsk_sync_mss = tcp_sync_mss; - sk->sk_write_space = sk_stream_write_space; - sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); #ifdef CONFIG_TCP_MD5SIG - tp->af_specific = &tcp_sock_ipv6_specific; + tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; #endif - /* TCP Cookie Transactions */ - if (sysctl_tcp_cookie_size > 0) { - /* Default, cookies without s_data_payload. */ - tp->cookie_values = - kzalloc(sizeof(*tp->cookie_values), - sk->sk_allocation); - if (tp->cookie_values != NULL) - kref_init(&tp->cookie_values->kref); - } - /* Presumed zeroed, in order of appearance: - * cookie_in_always, cookie_out_never, - * s_data_constant, s_data_in, s_data_out - */ - sk->sk_sndbuf = sysctl_tcp_wmem[1]; - sk->sk_rcvbuf = sysctl_tcp_rmem[1]; - - local_bh_disable(); - sock_update_memcg(sk); - sk_sockets_allocated_inc(sk); - local_bh_enable(); - return 0; } @@ -2090,6 +1948,7 @@ struct proto tcpv6_prot = { .sendmsg = tcp_sendmsg, .sendpage = tcp_sendpage, .backlog_rcv = tcp_v6_do_rcv, + .release_cb = tcp_release_cb, .hash = tcp_v6_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 4f3cec12aa85..4b0f50d9a962 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -19,6 +19,8 @@ * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> */ +#define pr_fmt(fmt) "IPv6: " fmt + #include <linux/icmpv6.h> #include <linux/init.h> #include <linux/module.h> @@ -160,11 +162,11 @@ static const struct inet6_protocol tunnel46_protocol = { static int __init tunnel6_init(void) { if (inet6_add_protocol(&tunnel6_protocol, IPPROTO_IPV6)) { - printk(KERN_ERR "tunnel6 init(): can't add protocol\n"); + pr_err("%s: can't add protocol\n", __func__); return -EAGAIN; } if (inet6_add_protocol(&tunnel46_protocol, IPPROTO_IPIP)) { - printk(KERN_ERR "tunnel6 init(): can't add protocol\n"); + pr_err("%s: can't add protocol\n", __func__); inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6); return -EAGAIN; } @@ -174,9 +176,9 @@ static int __init tunnel6_init(void) static void __exit tunnel6_fini(void) { if (inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP)) - printk(KERN_ERR "tunnel6 close: can't remove protocol\n"); + pr_err("%s: can't remove protocol\n", __func__); if (inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6)) - printk(KERN_ERR "tunnel6 close: can't remove protocol\n"); + pr_err("%s: can't remove protocol\n", __func__); } module_init(tunnel6_init); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 37b0699e95e5..99d0077b56b8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -48,6 +48,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <trace/events/skb.h> #include "udp_impl.h" int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) @@ -103,7 +104,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) { unsigned int hash2_nulladdr = udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum); - unsigned int hash2_partial = + unsigned int hash2_partial = udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0); /* precompute partial secondary hash */ @@ -349,7 +350,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, bool slow; if (addr_len) - *addr_len=sizeof(struct sockaddr_in6); + *addr_len = sizeof(struct sockaddr_in6); if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len); @@ -385,15 +386,16 @@ try_again: if (skb_csum_unnecessary(skb)) err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), - msg->msg_iov, copied ); + msg->msg_iov, copied); else { err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); if (err == -EINVAL) goto csum_copy_err; } - if (err) + if (unlikely(err)) { + trace_kfree_skb(skb, udpv6_recvmsg); goto out_free; - + } if (!peeked) { if (is_udp4) UDP_INC_STATS_USER(sock_net(sk), @@ -479,6 +481,11 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk == NULL) return; + if (type == ICMPV6_PKT_TOOBIG) + ip6_sk_update_pmtu(skb, sk, info); + if (type == NDISC_REDIRECT) + ip6_sk_redirect(skb, sk); + np = inet6_sk(sk); if (!icmpv6_err_convert(type, code, &err) && !np->recverr) @@ -496,6 +503,28 @@ out: sock_put(sk); } +static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int rc; + + if (!ipv6_addr_any(&inet6_sk(sk)->daddr)) + sock_rps_save_rxhash(sk, skb); + + rc = sock_queue_rcv_skb(sk, skb); + if (rc < 0) { + int is_udplite = IS_UDPLITE(sk); + + /* Note that an ENOMEM error is charged twice */ + if (rc == -ENOMEM) + UDP6_INC_STATS_BH(sock_net(sk), + UDP_MIB_RCVBUFERRORS, is_udplite); + UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + kfree_skb(skb); + return -1; + } + return 0; +} + static __inline__ void udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info ) @@ -503,18 +532,54 @@ static __inline__ void udpv6_err(struct sk_buff *skb, __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); } -int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) +static struct static_key udpv6_encap_needed __read_mostly; +void udpv6_encap_enable(void) +{ + if (!static_key_enabled(&udpv6_encap_needed)) + static_key_slow_inc(&udpv6_encap_needed); +} +EXPORT_SYMBOL(udpv6_encap_enable); + +int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); int rc; int is_udplite = IS_UDPLITE(sk); - if (!ipv6_addr_any(&inet6_sk(sk)->daddr)) - sock_rps_save_rxhash(sk, skb); - if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto drop; + if (static_key_false(&udpv6_encap_needed) && up->encap_type) { + int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); + + /* + * This is an encapsulation socket so pass the skb to + * the socket's udp_encap_rcv() hook. Otherwise, just + * fall through and pass this up the UDP socket. + * up->encap_rcv() returns the following value: + * =0 if skb was successfully passed to the encap + * handler or was discarded by it. + * >0 if skb should be passed on to UDP. + * <0 if skb should be resubmitted as proto -N + */ + + /* if we're overly short, let UDP handle it */ + encap_rcv = ACCESS_ONCE(up->encap_rcv); + if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) { + int ret; + + ret = encap_rcv(sk, skb); + if (ret <= 0) { + UDP_INC_STATS_BH(sock_net(sk), + UDP_MIB_INDATAGRAMS, + is_udplite); + return -ret; + } + } + + /* FALLTHROUGH -- it's a UDP Packet */ + } + /* * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). */ @@ -539,21 +604,25 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) goto drop; } + if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) + goto drop; + skb_dst_drop(skb); - rc = sock_queue_rcv_skb(sk, skb); - if (rc < 0) { - /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) - UDP6_INC_STATS_BH(sock_net(sk), - UDP_MIB_RCVBUFERRORS, is_udplite); - goto drop_no_sk_drops_inc; + + bh_lock_sock(sk); + rc = 0; + if (!sock_owned_by_user(sk)) + rc = __udpv6_queue_rcv_skb(sk, skb); + else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { + bh_unlock_sock(sk); + goto drop; } + bh_unlock_sock(sk); - return 0; + return rc; drop: - atomic_inc(&sk->sk_drops); -drop_no_sk_drops_inc: UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + atomic_inc(&sk->sk_drops); kfree_skb(skb); return -1; } @@ -602,37 +671,27 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, static void flush_stack(struct sock **stack, unsigned int count, struct sk_buff *skb, unsigned int final) { - unsigned int i; + struct sk_buff *skb1 = NULL; struct sock *sk; - struct sk_buff *skb1; + unsigned int i; for (i = 0; i < count; i++) { - skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); - sk = stack[i]; - if (skb1) { - if (sk_rcvqueues_full(sk, skb1)) { - kfree_skb(skb1); - goto drop; - } - bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) - udpv6_queue_rcv_skb(sk, skb1); - else if (sk_add_backlog(sk, skb1)) { - kfree_skb(skb1); - bh_unlock_sock(sk); - goto drop; - } - bh_unlock_sock(sk); - continue; + if (likely(skb1 == NULL)) + skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); + if (!skb1) { + atomic_inc(&sk->sk_drops); + UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, + IS_UDPLITE(sk)); + UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, + IS_UDPLITE(sk)); } -drop: - atomic_inc(&sk->sk_drops); - UDP6_INC_STATS_BH(sock_net(sk), - UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk)); - UDP6_INC_STATS_BH(sock_net(sk), - UDP_MIB_INERRORS, IS_UDPLITE(sk)); + + if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0) + skb1 = NULL; } + if (unlikely(skb1)) + kfree_skb(skb1); } /* * Note: called only from the BH handler context, @@ -772,39 +831,29 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, * for sock caches... i'll skip this for now. */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); + if (sk != NULL) { + int ret = udpv6_queue_rcv_skb(sk, skb); + sock_put(sk); - if (sk == NULL) { - if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) - goto discard; - - if (udp_lib_checksum_complete(skb)) - goto discard; - UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, - proto == IPPROTO_UDPLITE); - - icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + /* a return value > 0 means to resubmit the input, but + * it wants the return to be -protocol, or 0 + */ + if (ret > 0) + return -ret; - kfree_skb(skb); return 0; } - /* deliver */ - - if (sk_rcvqueues_full(sk, skb)) { - sock_put(sk); + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; - } - bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) - udpv6_queue_rcv_skb(sk, skb); - else if (sk_add_backlog(sk, skb)) { - atomic_inc(&sk->sk_drops); - bh_unlock_sock(sk); - sock_put(sk); + + if (udp_lib_checksum_complete(skb)) goto discard; - } - bh_unlock_sock(sk); - sock_put(sk); + + UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + + kfree_skb(skb); return 0; short_packet: @@ -1337,7 +1386,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, * do checksum of UDP packets sent as multiple IP fragments. */ offset = skb_checksum_start_offset(skb); - csum = skb_checksum(skb, offset, skb->len- offset, 0); + csum = skb_checksum(skb, offset, skb->len - offset, 0); offset += skb->csum_offset; *(__sum16 *)(skb->data + offset) = csum_fold(csum); skb->ip_summed = CHECKSUM_NONE; @@ -1471,7 +1520,7 @@ struct proto udpv6_prot = { .getsockopt = udpv6_getsockopt, .sendmsg = udpv6_sendmsg, .recvmsg = udpv6_recvmsg, - .backlog_rcv = udpv6_queue_rcv_skb, + .backlog_rcv = __udpv6_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .rehash = udp_v6_rehash, diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 8ea65e032733..ef39812107b1 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -99,12 +99,11 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, if (!xdst->u.rt6.rt6i_idev) return -ENODEV; - xdst->u.rt6.rt6i_peer = rt->rt6i_peer; - if (rt->rt6i_peer) - atomic_inc(&rt->rt6i_peer->refcnt); + rt6_transfer_peer(&xdst->u.rt6, rt); /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ + xdst->u.rt6.n = neigh_clone(rt->n); xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | RTF_LOCAL); xdst->u.rt6.rt6i_metric = rt->rt6i_metric; @@ -208,12 +207,22 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops) return dst_entries_get_fast(ops) > ops->gc_thresh * 2; } -static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) +static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->update_pmtu(path, mtu); + path->ops->update_pmtu(path, sk, skb, mtu); +} + +static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) +{ + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + struct dst_entry *path = xdst->route; + + path->ops->redirect(path, sk, skb); } static void xfrm6_dst_destroy(struct dst_entry *dst) @@ -223,8 +232,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); dst_destroy_metrics_generic(dst); - if (likely(xdst->u.rt6.rt6i_peer)) - inet_putpeer(xdst->u.rt6.rt6i_peer); + if (rt6_has_peer(&xdst->u.rt6)) { + struct inet_peer *peer = rt6_peer_ptr(&xdst->u.rt6); + inet_putpeer(peer); + } xfrm_dst_destroy(xdst); } @@ -260,6 +271,7 @@ static struct dst_ops xfrm6_dst_ops = { .protocol = cpu_to_be16(ETH_P_IPV6), .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, + .redirect = xfrm6_redirect, .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, @@ -334,8 +346,8 @@ int __init xfrm6_init(void) goto out_policy; #ifdef CONFIG_SYSCTL - sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv6_ctl_path, - xfrm6_policy_table); + sysctl_hdr = register_net_sysctl(&init_net, "net/ipv6", + xfrm6_policy_table); #endif out: return ret; diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 4fe1db12d2a3..ee5a7065aacc 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -68,9 +68,9 @@ static DEFINE_SPINLOCK(xfrm6_tunnel_spi_lock); static struct kmem_cache *xfrm6_tunnel_spi_kmem __read_mostly; -static inline unsigned xfrm6_tunnel_spi_hash_byaddr(const xfrm_address_t *addr) +static inline unsigned int xfrm6_tunnel_spi_hash_byaddr(const xfrm_address_t *addr) { - unsigned h; + unsigned int h; h = (__force u32)(addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3]); h ^= h >> 16; @@ -80,7 +80,7 @@ static inline unsigned xfrm6_tunnel_spi_hash_byaddr(const xfrm_address_t *addr) return h; } -static inline unsigned xfrm6_tunnel_spi_hash_byspi(u32 spi) +static inline unsigned int xfrm6_tunnel_spi_hash_byspi(u32 spi) { return spi % XFRM6_TUNNEL_SPI_BYSPI_HSIZE; } |
