diff options
| author | David S. Miller <davem@davemloft.net> | 2014-05-13 18:35:18 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2014-05-13 18:35:18 -0400 |
| commit | b6bd26c4de0141d0736a51487e4ca37390fcae03 (patch) | |
| tree | b8040290a249b8242767c1bb7f433deb40b4e6e0 /include | |
| parent | 87e067cda6df60b55cea0239c2f3cee81e9f46df (diff) | |
| parent | 84f39b08d7868ce10eeaf640627cb89777f0ae93 (diff) | |
Merge branch 'inet_fwmark_reflect'
Lorenzo Colitti says:
====================
Make mark-based routing work better with multiple separate networks.
Mark-based routing (ip rule fwmark 17 lookup 100) combined with
either iptables marking (iptables -j MARK --set-mark 17) or
application-based marking (the SO_MARK setsockopt) are a good
way to deal with connecting simultaneously to multiple networks.
Each network can be given a routing table, and ip rules can
be configured to make different fwmarks select different
networks. Applications can select networks them by setting
appropriate socket marks, and iptables rules can be used to
handle non-aware applications, enforce policy, etc.
This patch series improves functionality when mark-based routing
is used in this way. Current behaviour has the following
limitations:
1. Kernel-originated replies that are not associated with a
socket always use a mark of zero. This means that, for
example, when the kernel sends a ping reply or a TCP reset,
it does not send it on the network from which it received the
original packet.
2. Path MTU discovery, which is triggered by incoming packets,
does not always work correctly, because the routing lookups it
uses to clone routes do not take the fwmark into account and
thus can happen in the wrong routing table.
3. Application-based marking works well for outbound connections,
but does not work well for incoming connections. Marking a
listening socket causes that socket to only accept
connections from a given network, and sockets that are
returned by accept() are not marked (and are thus not routed
correctly).
sysctl. This causes route lookups for kernel-generated replies
and PMTUD to use the fwmark of the packet that caused them.
which causes TCP sockets returned by accept() to be marked with
the same mark that sent the intial SYN packet.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
| -rw-r--r-- | include/net/inet_sock.h | 10 | ||||
| -rw-r--r-- | include/net/ip.h | 3 | ||||
| -rw-r--r-- | include/net/ipv6.h | 3 | ||||
| -rw-r--r-- | include/net/netns/ipv4.h | 3 | ||||
| -rw-r--r-- | include/net/netns/ipv6.h | 1 |
5 files changed, 20 insertions, 0 deletions
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 1833c3f389ee..b1edf17bec01 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -90,6 +90,7 @@ struct inet_request_sock { kmemcheck_bitfield_end(flags); struct ip_options_rcu *opt; struct sk_buff *pktopts; + u32 ir_mark; }; static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) @@ -97,6 +98,15 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) return (struct inet_request_sock *)sk; } +static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb) +{ + if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) { + return skb->mark; + } else { + return sk->sk_mark; + } +} + struct inet_cork { unsigned int flags; __be32 addr; diff --git a/include/net/ip.h b/include/net/ip.h index 55752985c144..14c50a1650ef 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -231,6 +231,9 @@ void ipfrag_init(void); void ip_static_sysctl_init(void); +#define IP4_REPLY_MARK(net, mark) \ + ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) + static inline bool ip_is_fragment(const struct iphdr *iph) { return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 5b40ad297b8c..ba810d0546bc 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -113,6 +113,9 @@ struct frag_hdr { #define IP6_MF 0x0001 #define IP6_OFFSET 0xFFF8 +#define IP6_REPLY_MARK(net, mark) \ + ((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0) + #include <net/sock.h> /* sysctls */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index b2704fd0ec80..2f0cfad66666 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -77,6 +77,9 @@ struct netns_ipv4 { int sysctl_ip_no_pmtu_disc; int sysctl_ip_fwd_use_pmtu; + int sysctl_fwmark_reflect; + int sysctl_tcp_fwmark_accept; + struct ping_group_range ping_group_range; atomic_t dev_addr_genid; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 21edaf1f7916..19d3446e59d2 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -30,6 +30,7 @@ struct netns_sysctl_ipv6 { int flowlabel_consistency; int icmpv6_time; int anycast_src_echo_reply; + int fwmark_reflect; }; struct netns_ipv6 { |
