summaryrefslogtreecommitdiff
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c105
1 files changed, 77 insertions, 28 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b050cf980a57..75406603fa1e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -126,10 +126,13 @@ static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
static int ip_rt_error_cost __read_mostly = HZ;
static int ip_rt_error_burst __read_mostly = 5 * HZ;
static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
-static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
+static u32 ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
static int ip_rt_min_advmss __read_mostly = 256;
static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
+
+static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU;
+
/*
* Interface to generic destination cache.
*/
@@ -501,7 +504,8 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
}
EXPORT_SYMBOL(__ip_select_ident);
-static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
+static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
+ const struct sock *sk,
const struct iphdr *iph,
int oif, u8 tos,
u8 prot, u32 mark, int flow_flags)
@@ -517,19 +521,21 @@ static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
flowi4_init_output(fl4, oif, mark, tos,
RT_SCOPE_UNIVERSE, prot,
flow_flags,
- iph->daddr, iph->saddr, 0, 0);
+ iph->daddr, iph->saddr, 0, 0,
+ sock_net_uid(net, sk));
}
static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
const struct sock *sk)
{
+ const struct net *net = dev_net(skb->dev);
const struct iphdr *iph = ip_hdr(skb);
int oif = skb->dev->ifindex;
u8 tos = RT_TOS(iph->tos);
u8 prot = iph->protocol;
u32 mark = skb->mark;
- __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
+ __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
}
static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
@@ -546,7 +552,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
inet_sk_flowi_flags(sk),
- daddr, inet->inet_saddr, 0, 0);
+ daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
rcu_read_unlock();
}
@@ -609,6 +615,7 @@ static inline u32 fnhe_hashfun(__be32 daddr)
static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
{
rt->rt_pmtu = fnhe->fnhe_pmtu;
+ rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
rt->dst.expires = fnhe->fnhe_expires;
if (fnhe->fnhe_gw) {
@@ -619,14 +626,17 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
}
static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
- u32 pmtu, unsigned long expires)
+ u32 pmtu, bool lock, unsigned long expires)
{
struct fnhe_hash_bucket *hash;
struct fib_nh_exception *fnhe;
struct rtable *rt;
+ u32 genid, hval;
unsigned int i;
int depth;
- u32 hval = fnhe_hashfun(daddr);
+
+ genid = fnhe_genid(dev_net(nh->nh_dev));
+ hval = fnhe_hashfun(daddr);
spin_lock_bh(&fnhe_lock);
@@ -649,12 +659,15 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
}
if (fnhe) {
+ if (fnhe->fnhe_genid != genid)
+ fnhe->fnhe_genid = genid;
if (gw)
fnhe->fnhe_gw = gw;
if (pmtu) {
fnhe->fnhe_pmtu = pmtu;
- fnhe->fnhe_expires = max(1UL, expires);
+ fnhe->fnhe_mtu_locked = lock;
}
+ fnhe->fnhe_expires = max(1UL, expires);
/* Update all cached dsts too */
rt = rcu_dereference(fnhe->fnhe_rth_input);
if (rt)
@@ -673,10 +686,11 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_next = hash->chain;
rcu_assign_pointer(hash->chain, fnhe);
}
- fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
+ fnhe->fnhe_genid = genid;
fnhe->fnhe_daddr = daddr;
fnhe->fnhe_gw = gw;
fnhe->fnhe_pmtu = pmtu;
+ fnhe->fnhe_mtu_locked = lock;
fnhe->fnhe_expires = expires;
/* Exception created; mark the cached routes for the nexthop
@@ -747,7 +761,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
goto reject_redirect;
}
- n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
+ n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
+ if (!n)
+ n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
if (!IS_ERR(n)) {
if (!(n->nud_state & NUD_VALID)) {
neigh_event_send(n, NULL);
@@ -756,7 +772,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
struct fib_nh *nh = &FIB_RES_NH(res);
update_or_create_fnhe(nh, fl4->daddr, new_gw,
- 0, jiffies + ip_rt_gc_timeout);
+ 0, false,
+ jiffies + ip_rt_gc_timeout);
}
if (kill_route)
rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -787,6 +804,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
struct rtable *rt;
struct flowi4 fl4;
const struct iphdr *iph = (const struct iphdr *) skb->data;
+ struct net *net = dev_net(skb->dev);
int oif = skb->dev->ifindex;
u8 tos = RT_TOS(iph->tos);
u8 prot = iph->protocol;
@@ -794,7 +812,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
rt = (struct rtable *) dst;
- __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0);
+ __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
__ip_do_redirect(rt, skb, &fl4, true);
}
@@ -968,15 +986,18 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
{
struct dst_entry *dst = &rt->dst;
struct fib_result res;
+ bool lock = false;
- if (dst_metric_locked(dst, RTAX_MTU))
+ if (ip_mtu_locked(dst))
return;
if (ipv4_mtu(dst) < mtu)
return;
- if (mtu < ip_rt_min_pmtu)
+ if (mtu < ip_rt_min_pmtu) {
+ lock = true;
mtu = ip_rt_min_pmtu;
+ }
if (rt->rt_pmtu == mtu &&
time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
@@ -986,7 +1007,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
struct fib_nh *nh = &FIB_RES_NH(res);
- update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
+ update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
jiffies + ip_rt_mtu_expires);
}
rcu_read_unlock();
@@ -1012,7 +1033,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
if (!mark)
mark = IP4_REPLY_MARK(net, skb->mark);
- __build_flow_key(&fl4, NULL, iph, oif,
+ __build_flow_key(net, &fl4, NULL, iph, oif,
RT_TOS(iph->tos), protocol, mark, flow_flags);
rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
@@ -1028,7 +1049,7 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
struct flowi4 fl4;
struct rtable *rt;
- __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
+ __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
if (!fl4.flowi4_mark)
fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
@@ -1047,6 +1068,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
struct rtable *rt;
struct dst_entry *odst = NULL;
bool new = false;
+ struct net *net = sock_net(sk);
bh_lock_sock(sk);
@@ -1060,7 +1082,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
goto out;
}
- __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
+ __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
rt = (struct rtable *)odst;
if (odst->obsolete && !odst->ops->check(odst, 0)) {
@@ -1100,7 +1122,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net,
struct flowi4 fl4;
struct rtable *rt;
- __build_flow_key(&fl4, NULL, iph, oif,
+ __build_flow_key(net, &fl4, NULL, iph, oif,
RT_TOS(iph->tos), protocol, mark, flow_flags);
rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
@@ -1115,9 +1137,10 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
const struct iphdr *iph = (const struct iphdr *) skb->data;
struct flowi4 fl4;
struct rtable *rt;
+ struct net *net = sock_net(sk);
- __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
- rt = __ip_route_output_key(sock_net(sk), &fl4);
+ __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
+ rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
__ip_do_redirect(rt, skb, &fl4, false);
ip_rt_put(rt);
@@ -1239,9 +1262,9 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
if (mtu)
return mtu;
- mtu = dst->dev->mtu;
+ mtu = READ_ONCE(dst->dev->mtu);
- if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
+ if (unlikely(ip_mtu_locked(dst))) {
if (rt->rt_uses_gateway && mtu > 576)
mtu = 576;
}
@@ -1354,8 +1377,12 @@ static void rt_add_uncached_list(struct rtable *rt)
static void ipv4_dst_destroy(struct dst_entry *dst)
{
+ struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
struct rtable *rt = (struct rtable *) dst;
+ if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt))
+ kfree(p);
+
if (!list_empty(&rt->rt_uncached)) {
struct uncached_list *ul = rt->rt_uncached_list;
@@ -1407,7 +1434,11 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
rt->rt_gateway = nh->nh_gw;
rt->rt_uses_gateway = 1;
}
- dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+ dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
+ if (fi->fib_metrics != &dst_default_metrics) {
+ rt->dst._metrics |= DST_METRICS_REFCOUNTED;
+ atomic_inc(&fi->fib_metrics->refcnt);
+ }
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
@@ -1456,6 +1487,7 @@ static struct rtable *rt_dst_alloc(struct net_device *dev,
rt->rt_is_input = 0;
rt->rt_iif = 0;
rt->rt_pmtu = 0;
+ rt->rt_mtu_locked = 0;
rt->rt_gateway = 0;
rt->rt_uses_gateway = 0;
rt->rt_table_id = 0;
@@ -1956,6 +1988,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
{
int res;
+ tos &= IPTOS_RT_MASK;
rcu_read_lock();
/* Multicast recognition logic is moved from route cache to here.
@@ -2375,6 +2408,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_is_input = ort->rt_is_input;
rt->rt_iif = ort->rt_iif;
rt->rt_pmtu = ort->rt_pmtu;
+ rt->rt_mtu_locked = ort->rt_mtu_locked;
rt->rt_genid = rt_genid_ipv4(net);
rt->rt_flags = ort->rt_flags;
@@ -2428,7 +2462,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
r->rtm_dst_len = 32;
r->rtm_src_len = 0;
r->rtm_tos = fl4->flowi4_tos;
- r->rtm_table = table_id;
+ r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT;
if (nla_put_u32(skb, RTA_TABLE, table_id))
goto nla_put_failure;
r->rtm_type = rt->rt_type;
@@ -2477,6 +2511,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
if (rt->rt_pmtu && expires)
metrics[RTAX_MTU - 1] = rt->rt_pmtu;
+ if (rt->rt_mtu_locked && expires)
+ metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
if (rtnetlink_put_metrics(skb, metrics) < 0)
goto nla_put_failure;
@@ -2484,6 +2520,11 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
goto nla_put_failure;
+ if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
+ nla_put_u32(skb, RTA_UID,
+ from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
+ goto nla_put_failure;
+
error = rt->dst.error;
if (rt_is_input_route(rt)) {
@@ -2492,7 +2533,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
int err = ipmr_get_route(net, skb,
fl4->saddr, fl4->daddr,
- r, nowait);
+ r, nowait, portid);
+
if (err <= 0) {
if (!nowait) {
if (err == 0)
@@ -2535,6 +2577,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
int mark;
struct sk_buff *skb;
u32 table_id = RT_TABLE_MAIN;
+ kuid_t uid;
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
if (err < 0)
@@ -2555,13 +2598,17 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
skb_reset_network_header(skb);
/* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
- ip_hdr(skb)->protocol = IPPROTO_ICMP;
+ ip_hdr(skb)->protocol = IPPROTO_UDP;
skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
+ if (tb[RTA_UID])
+ uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
+ else
+ uid = (iif ? INVALID_UID : current_uid());
memset(&fl4, 0, sizeof(fl4));
fl4.daddr = dst;
@@ -2569,6 +2616,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
fl4.flowi4_tos = rtm->rtm_tos;
fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
fl4.flowi4_mark = mark;
+ fl4.flowi4_uid = uid;
if (netif_index_is_l3_master(net, fl4.flowi4_oif))
fl4.flowi4_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF;
@@ -2749,7 +2797,8 @@ static struct ctl_table ipv4_route_table[] = {
.data = &ip_rt_min_pmtu,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &ip_min_valid_pmtu,
},
{
.procname = "min_adv_mss",