From 25a91d8d91911f84a03a039339b29537e7f1970d Mon Sep 17 00:00:00 2001 From: Fan Du Date: Sat, 18 Jan 2014 09:54:23 +0800 Subject: skbuff: Introduce skb_to_sgvec_nomark to map skb without mark new end As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given sglist without mark the sg which contain last skb data as the end. So the caller can mannipulate sg list as will when padding new data after the first call without calling sg_unmark_end to expend sg list. Signed-off-by: Fan Du Signed-off-by: Steffen Klassert --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f589c9af8cbf..c574bf3bd6f6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -691,6 +691,8 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, gfp_t priority); +int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, + int offset, int len); int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len); int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer); -- cgit v1.2.3 From ca925cf1534ebcec332c08719a7dee6ee1782ce4 Mon Sep 17 00:00:00 2001 From: Fan Du Date: Sat, 18 Jan 2014 09:55:27 +0800 Subject: flowcache: Make flow cache name space aware Inserting a entry into flowcache, or flushing flowcache should be based on per net scope. The reason to do so is flushing operation from fat netns crammed with flow entries will also making the slim netns with only a few flow cache entries go away in original implementation. Since flowcache is tightly coupled with IPsec, so it would be easier to put flow cache global parameters into xfrm namespace part. And one last thing needs to do is bumping flow cache genid, and flush flow cache should also be made in per net style. Signed-off-by: Fan Du Signed-off-by: Steffen Klassert --- include/net/flow.h | 5 +++-- include/net/flowcache.h | 25 +++++++++++++++++++++++++ include/net/netns/xfrm.h | 11 +++++++++++ 3 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 include/net/flowcache.h (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index d23e7fa2042e..bee3741e5a6f 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -218,9 +218,10 @@ struct flow_cache_object *flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver, void *ctx); +int flow_cache_init(struct net *net); -void flow_cache_flush(void); -void flow_cache_flush_deferred(void); +void flow_cache_flush(struct net *net); +void flow_cache_flush_deferred(struct net *net); extern atomic_t flow_cache_genid; #endif diff --git a/include/net/flowcache.h b/include/net/flowcache.h new file mode 100644 index 000000000000..c8f665ec6e0d --- /dev/null +++ b/include/net/flowcache.h @@ -0,0 +1,25 @@ +#ifndef _NET_FLOWCACHE_H +#define _NET_FLOWCACHE_H + +#include +#include +#include +#include + +struct flow_cache_percpu { + struct hlist_head *hash_table; + int hash_count; + u32 hash_rnd; + int hash_rnd_recalc; + struct tasklet_struct flush_tasklet; +}; + +struct flow_cache { + u32 hash_shift; + struct flow_cache_percpu __percpu *percpu; + struct notifier_block hotcpu_notifier; + int low_watermark; + int high_watermark; + struct timer_list rnd_timer; +}; +#endif /* _NET_FLOWCACHE_H */ diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 1006a265beb3..52d0086d55d3 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -6,6 +6,7 @@ #include #include #include +#include struct ctl_table_header; @@ -61,6 +62,16 @@ struct netns_xfrm { spinlock_t xfrm_policy_sk_bundle_lock; rwlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; + + /* flow cache part */ + struct flow_cache flow_cache_global; + struct kmem_cache *flow_cachep; + atomic_t flow_cache_genid; + struct list_head flow_cache_gc_list; + spinlock_t flow_cache_gc_lock; + struct work_struct flow_cache_gc_work; + struct work_struct flow_cache_flush_work; + struct mutex flow_flush_sem; }; #endif -- cgit v1.2.3 From 0f24558e91563888d51e9be5b70981da920c37ac Mon Sep 17 00:00:00 2001 From: Horia Geanta Date: Wed, 12 Feb 2014 16:20:06 +0200 Subject: xfrm: avoid creating temporary SA when there are no listeners In the case when KMs have no listeners, km_query() will fail and temporary SAs are garbage collected immediately after their allocation. This causes strain on memory allocation, leading even to OOM since temporary SA alloc/free cycle is performed for every packet and garbage collection does not keep up the pace. The sane thing to do is to make sure we have audience before temporary SA allocation. Signed-off-by: Horia Geanta Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index afa5730fb3bd..5313ccfdeedf 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -594,6 +594,7 @@ struct xfrm_mgr { const struct xfrm_migrate *m, int num_bundles, const struct xfrm_kmaddress *k); + bool (*is_alive)(const struct km_event *c); }; int xfrm_register_km(struct xfrm_mgr *km); @@ -1646,6 +1647,20 @@ static inline int xfrm_aevent_is_on(struct net *net) rcu_read_unlock(); return ret; } + +static inline int xfrm_acquire_is_on(struct net *net) +{ + struct sock *nlsk; + int ret = 0; + + rcu_read_lock(); + nlsk = rcu_dereference(net->xfrm.nlsk); + if (nlsk) + ret = netlink_has_listeners(nlsk, XFRMNLGRP_ACQUIRE); + rcu_read_unlock(); + + return ret; +} #endif static inline int xfrm_alg_len(const struct xfrm_algo *alg) -- cgit v1.2.3 From d3623099d3509fa68fa28235366049dd3156c63a Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 14 Feb 2014 15:30:36 +0100 Subject: ipsec: add support of limited SA dump The goal of this patch is to allow userland to dump only a part of SA by specifying a filter during the dump. The kernel is in charge to filter SA, this avoids to generate useless netlink traffic (it save also some cpu cycles). This is particularly useful when there is a big number of SA set on the system. Note that I removed the union in struct xfrm_state_walk to fix a problem on arm. struct netlink_callback->args is defined as a array of 6 long and the first long is used in xfrm code to flag the cb as initialized. Hence, we must have: sizeof(struct xfrm_state_walk) <= sizeof(long) * 5. With the union, it was false on arm (sizeof(struct xfrm_state_walk) was sizeof(long) * 7), due to the padding. In fact, whatever the arch is, this union seems useless, there will be always padding after it. Removing it will not increase the size of this struct (and reduce it on arm). Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 10 +++++----- include/uapi/linux/pfkeyv2.h | 15 ++++++++++++++- include/uapi/linux/xfrm.h | 10 ++++++++++ 3 files changed, 29 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 5313ccfdeedf..45332acac022 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -118,11 +118,10 @@ struct xfrm_state_walk { struct list_head all; u8 state; - union { - u8 dying; - u8 proto; - }; + u8 dying; + u8 proto; u32 seq; + struct xfrm_filter *filter; }; /* Full description of state of transformer. */ @@ -1406,7 +1405,8 @@ static inline void xfrm_sysctl_fini(struct net *net) } #endif -void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto); +void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto, + struct xfrm_filter *filter); int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *); void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net); diff --git a/include/uapi/linux/pfkeyv2.h b/include/uapi/linux/pfkeyv2.h index 0b80c806631f..ada7f0171ccc 100644 --- a/include/uapi/linux/pfkeyv2.h +++ b/include/uapi/linux/pfkeyv2.h @@ -235,6 +235,18 @@ struct sadb_x_kmaddress { } __attribute__((packed)); /* sizeof(struct sadb_x_kmaddress) == 8 */ +/* To specify the SA dump filter */ +struct sadb_x_filter { + __u16 sadb_x_filter_len; + __u16 sadb_x_filter_exttype; + __u32 sadb_x_filter_saddr[4]; + __u32 sadb_x_filter_daddr[4]; + __u16 sadb_x_filter_family; + __u8 sadb_x_filter_splen; + __u8 sadb_x_filter_dplen; +} __attribute__((packed)); +/* sizeof(struct sadb_x_filter) == 40 */ + /* Message types */ #define SADB_RESERVED 0 #define SADB_GETSPI 1 @@ -358,7 +370,8 @@ struct sadb_x_kmaddress { #define SADB_X_EXT_SEC_CTX 24 /* Used with MIGRATE to pass @ to IKE for negotiation */ #define SADB_X_EXT_KMADDRESS 25 -#define SADB_EXT_MAX 25 +#define SADB_X_EXT_FILTER 26 +#define SADB_EXT_MAX 26 /* Identity Extension values */ #define SADB_IDENTTYPE_RESERVED 0 diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index a8cd6a4a2970..6550c679584f 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -298,6 +298,8 @@ enum xfrm_attr_type_t { XFRMA_TFCPAD, /* __u32 */ XFRMA_REPLAY_ESN_VAL, /* struct xfrm_replay_esn */ XFRMA_SA_EXTRA_FLAGS, /* __u32 */ + XFRMA_PROTO, /* __u8 */ + XFRMA_FILTER, /* struct xfrm_filter */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) @@ -474,6 +476,14 @@ struct xfrm_user_mapping { __be16 new_sport; }; +struct xfrm_filter { + xfrm_address_t saddr; + xfrm_address_t daddr; + __u16 family; + __u8 splen; + __u8 dplen; +}; + #ifndef __KERNEL__ /* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 -- cgit v1.2.3 From 1a1ccc96abb2ed9b8fbb71018e64b97324caef53 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 19 Feb 2014 10:07:34 +0100 Subject: xfrm: Remove caching of xfrm_policy_sk_bundles We currently cache socket policy bundles at xfrm_policy_sk_bundles. These cached bundles are never used. Instead we create and cache a new one whenever xfrm_lookup() is called on a socket policy. Most protocols cache the used routes to the socket, so let's remove the unused caching of socket policy bundles in xfrm. Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 52d0086d55d3..51f0dce7b643 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -59,7 +59,6 @@ struct netns_xfrm { struct dst_ops xfrm6_dst_ops; #endif spinlock_t xfrm_state_lock; - spinlock_t xfrm_policy_sk_bundle_lock; rwlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; -- cgit v1.2.3