summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-08-17 21:33:06 -0700
committerDavid S. Miller <davem@davemloft.net>2015-08-17 21:33:06 -0700
commit0b233dc7167884f95f08e796ac6a6767ae7d0d70 (patch)
treee10beeab2d0b925502ef3d2d3fea169ace63b636 /include
parentf376d4adfdd2d860aea3e99aa86e98324621ece7 (diff)
parent65d7ab8de582bc668e3dabb6ff48f750098a6e78 (diff)
Merge branch 'Identifier-Locator-Addressing'
Tom Herbert says: ==================== net: Identifier Locator Addressing - Part I This patch set provides rudimentary support for Identifier Locator Addressing or ILA. The basic concept of ILA is that we split an IPv6 address into a 64 bit locator and 64 bit identifier. The identifier is the identity of an entity in communication ("who"), and the locator expresses the location of the entity ("where"). Applications use externally visible address that contains the identifier. When a packet is actually sent, a translation is done that overwrites the first 64 bits of the address with a locator. The packet can then be forwarded over the network to the host where the addressed entity is located. At the receiver, the reverse translation is done so the that the application sees the original, untranslated address. Presumably an external control plane will provide identifier->locator mappings. v2: - Fix compilation erros when LWT not configured - Consolidate ILA into a single ila.c v3: - Change pseudohdr argument od inet_proto_csum_replace functions to be a bool v4: - In ila_build_state check locator being in netlink params before allocating tunnel state The data path for ILA is a simple NAT translation that only operates on the upper 64 bits of a destination address in IPv6 packets. The basic process is: 1) Lookup 64 bit identifier (lower 64 bits of destination) 2) If a match is found a) Overwrite locator (upper 64 bits of destination) with the new locator b) Adjust any checksum that has destination address included in pseudo header 3) Send or receive packet ILA is a means to implement tunnels or network virtualization without encapsulation. Since there is no encapsulation involved, we assume that stateless support in the network for IPv6 (e.g. RSS, ECMP, TSO, etc.) just works. Also, since we're minimally changing the packet many of the worries about encapsulation (MTU, checksum, fragmentation) are not relevant. The downside is that, ILA is not extensible like other encapsulations (GUE for instance) so it might not be appropriate for all use cases. Also, this only makes sense to do in IPv6! A key aspect of ILA is performance. The intent is that ILA would be used in data centers in virtualizing tasks or jobs. In the fullest incarnation all intra data center communications might be targeted to virtual ILA addresses. This is basically adding a new virtualization capability to the existing services in a datacenter, so there is a strong expectation is that this does not degrade performance for existing applications. Performance seems to be dependent on how ILA is hooked into kernel. ILA can be implemented under some different models: - Mechanically it is a form a stateless DNAT - It can be thought of as a type of (source) routing - As a functional replacement of encapsulation In this patch set we hook into the data path using Light Weight Tunnels (LWT) infrastructure. As part of that, we add support in LWT to redirect dst input. iproute will be modified to take a new ila encap type. ILA can be configured like: ip route add 3333:0:0:1:5555:0:2:0/128 \ encap ila 2001:0:0:2 via 2401:db00:20:911a:face:0:27:0 ip -6 addr add 3333:0:0:1:5555:0:1:0/128 dev eth0 ip route add table local local 2001:0:0:1:5555:0:1:0/128 encap ila 3333:0:0:1 dev lo So sending to destination 3333:0:0:1:5555:0:2:0 will have destination of 2001:0:0:2:5555:0:2:0 on the wire. Performance results are below. With ILA we see about a 10% drop in pps compared to non-ILA. Much of this drop can be attributed to the loss of early demux on input (translation occurs after it is attempted). We will address this in the next patch set. Also, IPvlan input path does not work with ILA since the routing is bypassed-- this will be addressed in a future patch. Performance testing: Performing netperf TCP_RR with 200 clients: Non-ILA baseline 84.92% CPU utilization 1861922.9 tps 93/163/330 50/90/99% latencies ILA single destination 83.16% CPU utilization 1679683.4 tps 105/180/332 50/90/99% latencies References: Slides from netconf: http://vger.kernel.org/netconf2015Herbert-ILA.pdf Slides from presentation at IETF: https://www.ietf.org/proceedings/92/slides/slides-92-nvo3-1.pdf I-D: https://tools.ietf.org/html/draft-herbert-nvo3-ila-00 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/net/checksum.h8
-rw-r--r--include/net/lwtunnel.h30
-rw-r--r--include/uapi/linux/ila.h15
-rw-r--r--include/uapi/linux/lwtunnel.h1
4 files changed, 50 insertions, 4 deletions
diff --git a/include/net/checksum.h b/include/net/checksum.h
index 2d1d73cb773e..9fcaedf994ee 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -140,14 +140,16 @@ static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new)
struct sk_buff;
void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
- __be32 from, __be32 to, int pseudohdr);
+ __be32 from, __be32 to, bool pseudohdr);
void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
const __be32 *from, const __be32 *to,
- int pseudohdr);
+ bool pseudohdr);
+void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
+ __wsum diff, bool pseudohdr);
static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
__be16 from, __be16 to,
- int pseudohdr)
+ bool pseudohdr)
{
inet_proto_csum_replace4(sum, skb, (__force __be32)from,
(__force __be32)to, pseudohdr);
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 33bd30963a95..e25b60eb262d 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -11,12 +11,15 @@
#define LWTUNNEL_HASH_SIZE (1 << LWTUNNEL_HASH_BITS)
/* lw tunnel state flags */
-#define LWTUNNEL_STATE_OUTPUT_REDIRECT 0x1
+#define LWTUNNEL_STATE_OUTPUT_REDIRECT BIT(0)
+#define LWTUNNEL_STATE_INPUT_REDIRECT BIT(1)
struct lwtunnel_state {
__u16 type;
__u16 flags;
atomic_t refcnt;
+ int (*orig_output)(struct sock *sk, struct sk_buff *skb);
+ int (*orig_input)(struct sk_buff *);
int len;
__u8 data[0];
};
@@ -25,6 +28,7 @@ struct lwtunnel_encap_ops {
int (*build_state)(struct net_device *dev, struct nlattr *encap,
struct lwtunnel_state **ts);
int (*output)(struct sock *sk, struct sk_buff *skb);
+ int (*input)(struct sk_buff *skb);
int (*fill_encap)(struct sk_buff *skb,
struct lwtunnel_state *lwtstate);
int (*get_encap_size)(struct lwtunnel_state *lwtstate);
@@ -58,6 +62,13 @@ static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate)
return false;
}
+static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate)
+{
+ if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_INPUT_REDIRECT))
+ return true;
+
+ return false;
+}
int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
unsigned int num);
int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
@@ -72,6 +83,8 @@ struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
int lwtunnel_output(struct sock *sk, struct sk_buff *skb);
int lwtunnel_output6(struct sock *sk, struct sk_buff *skb);
+int lwtunnel_input(struct sk_buff *skb);
+int lwtunnel_input6(struct sk_buff *skb);
#else
@@ -90,6 +103,11 @@ static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate)
return false;
}
+static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate)
+{
+ return false;
+}
+
static inline int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
unsigned int num)
{
@@ -142,6 +160,16 @@ static inline int lwtunnel_output6(struct sock *sk, struct sk_buff *skb)
return -EOPNOTSUPP;
}
+static inline int lwtunnel_input(struct sk_buff *skb)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int lwtunnel_input6(struct sk_buff *skb)
+{
+ return -EOPNOTSUPP;
+}
+
#endif
#endif /* __NET_LWTUNNEL_H */
diff --git a/include/uapi/linux/ila.h b/include/uapi/linux/ila.h
new file mode 100644
index 000000000000..7ed9e670814e
--- /dev/null
+++ b/include/uapi/linux/ila.h
@@ -0,0 +1,15 @@
+/* ila.h - ILA Interface */
+
+#ifndef _UAPI_LINUX_ILA_H
+#define _UAPI_LINUX_ILA_H
+
+enum {
+ ILA_ATTR_UNSPEC,
+ ILA_ATTR_LOCATOR, /* u64 */
+
+ __ILA_ATTR_MAX,
+};
+
+#define ILA_ATTR_MAX (__ILA_ATTR_MAX - 1)
+
+#endif /* _UAPI_LINUX_ILA_H */
diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h
index 3bf223bc2367..aa84ca396bcb 100644
--- a/include/uapi/linux/lwtunnel.h
+++ b/include/uapi/linux/lwtunnel.h
@@ -7,6 +7,7 @@ enum lwtunnel_encap_types {
LWTUNNEL_ENCAP_NONE,
LWTUNNEL_ENCAP_MPLS,
LWTUNNEL_ENCAP_IP,
+ LWTUNNEL_ENCAP_ILA,
__LWTUNNEL_ENCAP_MAX,
};