From 24ea591d2201c3257d666466e8fac50a6cf3c52f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 6 Jul 2015 05:18:03 -0700
Subject: net: sched: extend percpu stats helpers

qdisc_bstats_update_cpu() and other helpers were added to support
percpu stats for qdisc.

We want to add percpu stats for tc action, so this patch add common
helpers.

qdisc_bstats_update_cpu() is renamed to qdisc_bstats_cpu_update()
qdisc_qstats_drop_cpu() is renamed to qdisc_qstats_cpu_drop()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 2738f6f87908..2eab08c38e32 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -513,17 +513,20 @@ static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
 	bstats->packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
 }
 
-static inline void qdisc_bstats_update_cpu(struct Qdisc *sch,
-					   const struct sk_buff *skb)
+static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
+				     const struct sk_buff *skb)
 {
-	struct gnet_stats_basic_cpu *bstats =
-				this_cpu_ptr(sch->cpu_bstats);
-
 	u64_stats_update_begin(&bstats->syncp);
 	bstats_update(&bstats->bstats, skb);
 	u64_stats_update_end(&bstats->syncp);
 }
 
+static inline void qdisc_bstats_cpu_update(struct Qdisc *sch,
+					   const struct sk_buff *skb)
+{
+	bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb);
+}
+
 static inline void qdisc_bstats_update(struct Qdisc *sch,
 				       const struct sk_buff *skb)
 {
@@ -547,16 +550,24 @@ static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count)
 	sch->qstats.drops += count;
 }
 
-static inline void qdisc_qstats_drop(struct Qdisc *sch)
+static inline void qstats_drop_inc(struct gnet_stats_queue *qstats)
 {
-	sch->qstats.drops++;
+	qstats->drops++;
 }
 
-static inline void qdisc_qstats_drop_cpu(struct Qdisc *sch)
+static inline void qstats_overlimit_inc(struct gnet_stats_queue *qstats)
 {
-	struct gnet_stats_queue *qstats = this_cpu_ptr(sch->cpu_qstats);
+	qstats->overlimits++;
+}
 
-	qstats->drops++;
+static inline void qdisc_qstats_drop(struct Qdisc *sch)
+{
+	qstats_drop_inc(&sch->qstats);
+}
+
+static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch)
+{
+	qstats_drop_inc(this_cpu_ptr(sch->cpu_qstats));
 }
 
 static inline void qdisc_qstats_overlimit(struct Qdisc *sch)
-- 
cgit v1.2.3


From 519c818e8fb646eef1e8bfedd18519bec47bc9a9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 6 Jul 2015 05:18:04 -0700
Subject: net: sched: add percpu stats to actions

Reuse existing percpu infrastructure John Fastabend added for qdisc.

This patch adds a new cpustats parameter to tcf_hash_create() and all
actions pass false, meaning this patch should have no effect yet.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 3ee4c92afd1b..db2063ffd181 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -21,6 +21,8 @@ struct tcf_common {
 	struct gnet_stats_rate_est64	tcfc_rate_est;
 	spinlock_t			tcfc_lock;
 	struct rcu_head			tcfc_rcu;
+	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+	struct gnet_stats_queue __percpu *cpu_qstats;
 };
 #define tcf_head	common.tcfc_head
 #define tcf_index	common.tcfc_index
@@ -103,7 +105,7 @@ int tcf_hash_release(struct tc_action *a, int bind);
 u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo);
 int tcf_hash_check(u32 index, struct tc_action *a, int bind);
 int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
-		    int size, int bind);
+		    int size, int bind, bool cpustats);
 void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est);
 void tcf_hash_insert(struct tc_action *a);
 
-- 
cgit v1.2.3


From cc6510a9504fd3c03d76bd68d99653148342eecc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 6 Jul 2015 05:18:06 -0700
Subject: net_sched: act_gact: use a separate packet counters for gact_determ()

Second step for gact RCU operation :

We want to get rid of the spinlock protecting gact operations.
Stats (packets/bytes) will soon be per cpu.

gact_determ() would not work without a central packet counter,
so lets add it for this mode.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_gact.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index 9fc9b578908a..592a6bc02b0b 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -6,9 +6,10 @@
 struct tcf_gact {
 	struct tcf_common	common;
 #ifdef CONFIG_GACT_PROB
-        u16			tcfg_ptype;
-        u16			tcfg_pval;
-        int			tcfg_paction;
+	u16			tcfg_ptype;
+	u16			tcfg_pval;
+	int			tcfg_paction;
+	atomic_t		packets;
 #endif
 };
 #define to_gact(a) \
-- 
cgit v1.2.3


From 56e5d1ca183d8616fab377d7d466c244b4dbb3b9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 6 Jul 2015 05:18:08 -0700
Subject: net_sched: act_gact: remove spinlock in fast path

Final step for gact RCU operation :

1) Use percpu stats
2) update lastuse only every clock tick to avoid false sharing
3) Remove spinlock acquisition, as it is no longer needed.

Since this is the last contended lock in packet RX when tc gact is used,
this gives impressive gain.

My host with 8 RX queues was handling 5 Mpps before the patch,
and more than 11 Mpps after patch.

Tested:

On receiver :

dev=eth0
tc qdisc del dev $dev ingress 2>/dev/null
tc qdisc add dev $dev ingress
tc filter del dev $dev root pref 10 2>/dev/null
tc filter del dev $dev pref 10 2>/dev/null
tc filter add dev $dev est 1sec 4sec parent ffff: protocol ip prio 1 \
	u32 match ip src 7.0.0.0/8 flowid 1:15 action drop

Sender sends packets flood from 7/8 network

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index db2063ffd181..8d2a707a9e87 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -70,6 +70,17 @@ static inline void tcf_hashinfo_destroy(struct tcf_hashinfo *hf)
 	kfree(hf->htab);
 }
 
+/* Update lastuse only if needed, to avoid dirtying a cache line.
+ * We use a temp variable to avoid fetching jiffies twice.
+ */
+static inline void tcf_lastuse_update(struct tcf_t *tm)
+{
+	unsigned long now = jiffies;
+
+	if (tm->lastuse != now)
+		tm->lastuse = now;
+}
+
 #ifdef CONFIG_NET_CLS_ACT
 
 #define ACT_P_CREATED 1
-- 
cgit v1.2.3


From 2ee22a90c7afac265bb6f7abea610b938195e2b8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 6 Jul 2015 05:18:09 -0700
Subject: net_sched: act_mirred: remove spinlock in fast path

Like act_gact, act_mirred can be lockless in packet processing

1) Use percpu stats
2) update lastuse only every clock tick to avoid false sharing
3) use rcu to protect tcfm_dev
4) Remove spinlock usage, as it is no longer needed.

Next step : add multi queue capability to ifb device

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_mirred.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index 4dd77a1c106b..dae96bae1c19 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -8,7 +8,7 @@ struct tcf_mirred {
 	int			tcfm_eaction;
 	int			tcfm_ifindex;
 	int			tcfm_ok_push;
-	struct net_device	*tcfm_dev;
+	struct net_device __rcu	*tcfm_dev;
 	struct list_head	tcfm_list;
 };
 #define to_mirred(a) \
-- 
cgit v1.2.3


From 071d5080e33d6f24139e4213c2d9f97a2c21b602 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 9 Jul 2015 13:16:29 -0700
Subject: tcp: add tcp_in_slow_start helper

Add a helper to test the slow start condition in various congestion
control modules and other places. This is to prepare a slight improvement
in policy as to exactly when to slow start.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Nandita Dukkipati <nanditad@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 950cfecaad3c..dba22fc1b065 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -989,6 +989,11 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
 
 #define TCP_INFINITE_SSTHRESH	0x7fffffff
 
+static inline bool tcp_in_slow_start(const struct tcp_sock *tp)
+{
+	return tp->snd_cwnd <= tp->snd_ssthresh;
+}
+
 static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
 {
 	return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
@@ -1065,7 +1070,7 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk)
 	const struct tcp_sock *tp = tcp_sk(sk);
 
 	/* If in slow start, ensure cwnd grows to twice what was ACKed. */
-	if (tp->snd_cwnd <= tp->snd_ssthresh)
+	if (tcp_in_slow_start(tp))
 		return tp->snd_cwnd < 2 * tp->max_packets_out;
 
 	return tp->is_cwnd_limited;
-- 
cgit v1.2.3


From 76174004a0f19785a328f40388e87e982bbf69b9 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 9 Jul 2015 13:16:30 -0700
Subject: tcp: do not slow start when cwnd equals ssthresh

In the original design slow start is only used to raise cwnd
when cwnd is stricly below ssthresh. It makes little sense
to slow start when cwnd == ssthresh: especially
when hystart has set ssthresh in the initial ramp, or after
recovery when cwnd resets to ssthresh. Not doing so will
also help reduce the buffer bloat slightly.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Nandita Dukkipati <nanditad@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index dba22fc1b065..364426a2be5a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -991,7 +991,7 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
 
 static inline bool tcp_in_slow_start(const struct tcp_sock *tp)
 {
-	return tp->snd_cwnd <= tp->snd_ssthresh;
+	return tp->snd_cwnd < tp->snd_ssthresh;
 }
 
 static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
-- 
cgit v1.2.3


From 3fd2f1b9d91284afb957ab9899a83279d0e09f29 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 8 Jul 2015 14:28:28 -0700
Subject: inet: remove BUG_ON() in twsk_destructor()

Kernel will crash the same if one of the pointer is NULL anyway.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/timewait_sock.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h
index 68f0ecad6c6e..1a47946f95ba 100644
--- a/include/net/timewait_sock.h
+++ b/include/net/timewait_sock.h
@@ -33,9 +33,6 @@ static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 
 static inline void twsk_destructor(struct sock *sk)
 {
-	BUG_ON(sk == NULL);
-	BUG_ON(sk->sk_prot == NULL);
-	BUG_ON(sk->sk_prot->twsk_prot == NULL);
 	if (sk->sk_prot->twsk_prot->twsk_destructor != NULL)
 		sk->sk_prot->twsk_prot->twsk_destructor(sk);
 }
-- 
cgit v1.2.3


From fc01538f9fb75572c969ca9988176ffc2a8741d6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 8 Jul 2015 14:28:29 -0700
Subject: inet: simplify timewait refcounting

timewait sockets have a complex refcounting logic.
Once we realize it should be similar to established and
syn_recv sockets, we can use sk_nulls_del_node_init_rcu()
and remove inet_twsk_unhash()

In particular, deferred inet_twsk_put() added in commit
13475a30b66cd ("tcp: connect() race with timewait reuse")
looks unecessary : When removing a timewait socket from
ehash or bhash, caller must own a reference on the socket
anyway.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_hashtables.h    | 4 ++--
 include/net/inet_timewait_sock.h | 6 ++----
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/net')

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index b73c88a19dd4..b07d126694a7 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -205,8 +205,8 @@ void inet_put_port(struct sock *sk);
 
 void inet_hashinfo_init(struct inet_hashinfo *h);
 
-int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw);
-int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw);
+void __inet_hash_nolisten(struct sock *sk, struct sock *osk);
+void __inet_hash(struct sock *sk, struct sock *osk);
 void inet_hash(struct sock *sk);
 void inet_unhash(struct sock *sk);
 
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 360c4802288d..96f52a4711c8 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -100,10 +100,8 @@ static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)
 void inet_twsk_free(struct inet_timewait_sock *tw);
 void inet_twsk_put(struct inet_timewait_sock *tw);
 
-int inet_twsk_unhash(struct inet_timewait_sock *tw);
-
-int inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
-			  struct inet_hashinfo *hashinfo);
+void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
+			   struct inet_hashinfo *hashinfo);
 
 struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
 					   struct inet_timewait_death_row *dr,
-- 
cgit v1.2.3


From dbe7faa4045ea83a37b691b12bb02a8f86c2d2e9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 8 Jul 2015 14:28:30 -0700
Subject: inet: inet_twsk_deschedule factorization

inet_twsk_deschedule() calls are followed by inet_twsk_put().

Only particular case is in inet_twsk_purge() but there is no point
to defer the inet_twsk_put() after re-enabling BH.

Lets rename inet_twsk_deschedule() to inet_twsk_deschedule_put()
and move the inet_twsk_put() inside.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_timewait_sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 96f52a4711c8..879d6e5a973b 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -111,7 +111,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 			   struct inet_hashinfo *hashinfo);
 
 void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo);
-void inet_twsk_deschedule(struct inet_timewait_sock *tw);
+void inet_twsk_deschedule_put(struct inet_timewait_sock *tw);
 
 void inet_twsk_purge(struct inet_hashinfo *hashinfo,
 		     struct inet_timewait_death_row *twdr, int family);
-- 
cgit v1.2.3


From 35a256fee52c7c207796302681fa95189c85b408 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Wed, 8 Jul 2015 16:58:22 -0700
Subject: ipv6: Nonlocal bind

Add support to allow non-local binds similar to how this was done for IPv4.
Non-local binds are very useful in emulating the Internet in a box, etc.

This add the ip_nonlocal_bind sysctl under ipv6.

Testing:

Set up nonlocal binding and receive routing on a host, e.g.:

ip -6 rule add from ::/0 iif eth0 lookup 200
ip -6 route add local 2001:0:0:1::/64 dev lo proto kernel scope host table 200
sysctl -w net.ipv6.ip_nonlocal_bind=1

Set up routing to 2001:0:0:1::/64 on peer to go to first host

ping6 -I 2001:0:0:1::1 peer-address -- to verify

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 8d93544a2d2b..c0368db6df54 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -31,6 +31,7 @@ struct netns_sysctl_ipv6 {
 	int auto_flowlabels;
 	int icmpv6_time;
 	int anycast_src_echo_reply;
+	int ip_nonlocal_bind;
 	int fwmark_reflect;
 	int idgen_retries;
 	int idgen_delay;
-- 
cgit v1.2.3


From 085db2c04557d31db61541f361bd8b4de92c9939 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 10 Jul 2015 18:15:06 -0500
Subject: netfilter: Per network namespace netfilter hooks.

- Add a new set of functions for registering and unregistering per
  network namespace hooks.

- Modify the old global namespace hook functions to use the per
  network namespace hooks in their implementation, so their remains a
  single list that needs to be walked for any hook (this is important
  for keeping the hook priority working and for keeping the code
  walking the hooks simple).

- Only allow registering the per netdevice hooks in the network
  namespace where the network device lives.

- Dynamically allocate the structures in the per network namespace
  hook list in nf_register_net_hook, and unregister them in
  nf_unregister_net_hook.

  Dynamic allocate is required somewhere as the number of network
  namespaces are not fixed so we might as well allocate them in the
  registration function.

  The chain of registered hooks on any list is expected to be small so
  the cost of walking that list to find the entry we are unregistering
  should also be small.

  Performing the management of the dynamically allocated list entries
  in the registration and unregistration functions keeps the complexity
  from spreading.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/net/netns/netfilter.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index 532e4ba64f49..38aa4983e2a9 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -14,5 +14,6 @@ struct netns_nf {
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header *nf_log_dir_header;
 #endif
+	struct list_head hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 };
 #endif
-- 
cgit v1.2.3


From 5c48f1201744233d4f235c7dd916d5196ed20716 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 17 Jun 2015 09:58:06 +0200
Subject: mac80211: remove exposing 'mfp' to drivers

There's no driver using this, so remove it.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 6b1077c2a63f..43dbddfa06c0 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1675,7 +1675,6 @@ struct ieee80211_sta_rates {
  * @tdls: indicates whether the STA is a TDLS peer
  * @tdls_initiator: indicates the STA is an initiator of the TDLS link. Only
  *	valid if the STA is a TDLS peer in the first place.
- * @mfp: indicates whether the STA uses management frame protection or not.
  * @txq: per-TID data TX queues (if driver uses the TXQ abstraction)
  */
 struct ieee80211_sta {
@@ -1693,7 +1692,6 @@ struct ieee80211_sta {
 	struct ieee80211_sta_rates __rcu *rates;
 	bool tdls;
 	bool tdls_initiator;
-	bool mfp;
 
 	struct ieee80211_txq *txq[IEEE80211_NUM_TIDS];
 
-- 
cgit v1.2.3


From af9f9b22beee70aae58651cdbb9d6375e6e51797 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 11 Jun 2015 16:02:32 +0200
Subject: mac80211: don't store napi struct

When introducing multiple RX queues, a single NAPI struct will not
be sufficient. Instead of trying to store multiple, simply change
the API to have the NAPI struct passed to the RX function. This of
course means that drivers using rx_irqsafe() cannot use NAPI, but
that seems a reasonable trade-off, particularly since only two of
all drivers are currently using it at all.

While at it, we can now remove the IEEE80211_RX_REORDER_TIMER flag
again since this code path cannot have a napi struct anyway.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 37 ++++++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 13 deletions(-)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 43dbddfa06c0..ff68b8c4ab35 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3694,20 +3694,28 @@ void ieee80211_free_hw(struct ieee80211_hw *hw);
 void ieee80211_restart_hw(struct ieee80211_hw *hw);
 
 /**
- * ieee80211_napi_add - initialize mac80211 NAPI context
- * @hw: the hardware to initialize the NAPI context on
- * @napi: the NAPI context to initialize
- * @napi_dev: dummy NAPI netdevice, here to not waste the space if the
- *	driver doesn't use NAPI
- * @poll: poll function
- * @weight: default weight
+ * ieee80211_rx_napi - receive frame from NAPI context
  *
- * See also netif_napi_add().
+ * Use this function to hand received frames to mac80211. The receive
+ * buffer in @skb must start with an IEEE 802.11 header. In case of a
+ * paged @skb is used, the driver is recommended to put the ieee80211
+ * header of the frame on the linear part of the @skb to avoid memory
+ * allocation and/or memcpy by the stack.
+ *
+ * This function may not be called in IRQ context. Calls to this function
+ * for a single hardware must be synchronized against each other. Calls to
+ * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be
+ * mixed for a single hardware. Must not run concurrently with
+ * ieee80211_tx_status() or ieee80211_tx_status_ni().
+ *
+ * This function must be called with BHs disabled.
+ *
+ * @hw: the hardware this frame came in on
+ * @skb: the buffer to receive, owned by mac80211 after this call
+ * @napi: the NAPI context
  */
-void ieee80211_napi_add(struct ieee80211_hw *hw, struct napi_struct *napi,
-			struct net_device *napi_dev,
-			int (*poll)(struct napi_struct *, int),
-			int weight);
+void ieee80211_rx_napi(struct ieee80211_hw *hw, struct sk_buff *skb,
+		       struct napi_struct *napi);
 
 /**
  * ieee80211_rx - receive frame
@@ -3729,7 +3737,10 @@ void ieee80211_napi_add(struct ieee80211_hw *hw, struct napi_struct *napi,
  * @hw: the hardware this frame came in on
  * @skb: the buffer to receive, owned by mac80211 after this call
  */
-void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb);
+static inline void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
+{
+	ieee80211_rx_napi(hw, skb, NULL);
+}
 
 /**
  * ieee80211_rx_irqsafe - receive frame
-- 
cgit v1.2.3


From 0c028b5fd1bd10d5777756e571c6c1971f04062b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 12 Jun 2015 14:33:54 +0200
Subject: mac80211: remove zero-length A-MPDU subframe reporting

As there's no driver using this capability and reporting zero-length
A-MPDU subframes for radiotap monitoring, remove the capability to
free up two RX flags.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ff68b8c4ab35..7417fee18185 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -997,9 +997,6 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
  * @RX_FLAG_AMPDU_DETAILS: A-MPDU details are known, in particular the reference
  *	number (@ampdu_reference) must be populated and be a distinct number for
  *	each A-MPDU
- * @RX_FLAG_AMPDU_REPORT_ZEROLEN: driver reports 0-length subframes
- * @RX_FLAG_AMPDU_IS_ZEROLEN: This is a zero-length subframe, for
- *	monitoring purposes only
  * @RX_FLAG_AMPDU_LAST_KNOWN: last subframe is known, should be set on all
  *	subframes of a single A-MPDU
  * @RX_FLAG_AMPDU_IS_LAST: this subframe is the last subframe of the A-MPDU
@@ -1039,8 +1036,7 @@ enum mac80211_rx_flags {
 	RX_FLAG_NO_SIGNAL_VAL		= BIT(12),
 	RX_FLAG_HT_GF			= BIT(13),
 	RX_FLAG_AMPDU_DETAILS		= BIT(14),
-	RX_FLAG_AMPDU_REPORT_ZEROLEN	= BIT(15),
-	RX_FLAG_AMPDU_IS_ZEROLEN	= BIT(16),
+	/* bits 15/16 free */
 	RX_FLAG_AMPDU_LAST_KNOWN	= BIT(17),
 	RX_FLAG_AMPDU_IS_LAST		= BIT(18),
 	RX_FLAG_AMPDU_DELIM_CRC_ERROR	= BIT(19),
-- 
cgit v1.2.3


From 981d94a80174e4f33bd5015fb49051bfc2eb00d2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 12 Jun 2015 14:39:02 +0200
Subject: mac80211: support device/driver PN check for CCMP/GCMP

When there are multiple RX queues, the PN checks in mac80211 cannot be
used since packets might be processed out of order on different CPUs.

Allow the driver to report that the PN has been checked, drivers that
will use multi-queue RX will have to set this flag.

For now, the flag is only valid when the frame has been decrypted, in
theory that restriction doesn't have to be there, but in practice the
hardware will have decrypted the frame already.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 7417fee18185..4d3d2686f278 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -973,6 +973,10 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
  * @RX_FLAG_IV_STRIPPED: The IV/ICV are stripped from this frame.
  *	If this flag is set, the stack cannot do any replay detection
  *	hence the driver or hardware will have to do that.
+ * @RX_FLAG_PN_VALIDATED: Currently only valid for CCMP/GCMP frames, this
+ *	flag indicates that the PN was verified for replay protection.
+ *	Note that this flag is also currently only supported when a frame
+ *	is also decrypted (ie. @RX_FLAG_DECRYPTED must be set)
  * @RX_FLAG_FAILED_FCS_CRC: Set this flag if the FCS check failed on
  *	the frame.
  * @RX_FLAG_FAILED_PLCP_CRC: Set this flag if the PCLP check failed on
@@ -1036,7 +1040,8 @@ enum mac80211_rx_flags {
 	RX_FLAG_NO_SIGNAL_VAL		= BIT(12),
 	RX_FLAG_HT_GF			= BIT(13),
 	RX_FLAG_AMPDU_DETAILS		= BIT(14),
-	/* bits 15/16 free */
+	RX_FLAG_PN_VALIDATED		= BIT(15),
+	/* bit 16 free */
 	RX_FLAG_AMPDU_LAST_KNOWN	= BIT(17),
 	RX_FLAG_AMPDU_IS_LAST		= BIT(18),
 	RX_FLAG_AMPDU_DELIM_CRC_ERROR	= BIT(19),
-- 
cgit v1.2.3


From b98fb44ffceeac717789e8f2fb3497e6b8c5c65b Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Wed, 10 Jun 2015 20:42:59 +0300
Subject: mac80211: define TDLS wider BW support bits

Allow a device to specify support for the TDLS wider-bandwidth feature.
Indicate this support during TDLS setup in the ext-capab IE and set an
appropriate station flag when our TDLS peer supports it.
This feature gives TDLS peers the ability to use a wider channel than
the base width of the BSS. For instance VHT capable TDLS peers connected
on a 20MHz channel can extend the channel to 80MHz, if regulatory
considerations allow it.

Do not cap the bandwidth of such stations by the current BSS channel width
in mac80211.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 4d3d2686f278..8f61a230c482 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1887,6 +1887,9 @@ struct ieee80211_txq {
  * @IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS: The HW supports scanning on all bands
  *	in one command, mac80211 doesn't have to run separate scans per band.
  *
+ * @IEEE80211_HW_TDLS_WIDER_BW: The device/driver supports wider bandwidth
+ *	than then BSS bandwidth for a TDLS link on the base channel.
+ *
  * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  */
 enum ieee80211_hw_flags {
@@ -1919,6 +1922,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_CHANCTX_STA_CSA,
 	IEEE80211_HW_SUPPORTS_CLONED_SKBS,
 	IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS,
+	IEEE80211_HW_TDLS_WIDER_BW,
 
 	/* keep last, obviously */
 	NUM_IEEE80211_HW_FLAGS
-- 
cgit v1.2.3


From f9a060f4b2003eb7350762e60dfc576447e44bad Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 12 Jun 2015 14:55:34 +0200
Subject: mac80211: add pointer for driver use to key

Some drivers may need to store data per key, for example for PN
validation. Allow this by adding a pointer to the struct that
the driver can assign.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 8f61a230c482..484cc14fb947 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1492,8 +1492,10 @@ enum ieee80211_key_flags {
  * 	- Temporal Authenticator Rx MIC Key (64 bits)
  * @icv_len: The ICV length for this key type
  * @iv_len: The IV length for this key type
+ * @drv_priv: pointer for driver use
  */
 struct ieee80211_key_conf {
+	void *drv_priv;
 	atomic64_t tx_pn;
 	u32 cipher;
 	u8 icv_len;
-- 
cgit v1.2.3


From 33d8783c58427683b533664f67f8c4378ed64495 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 23 Jun 2015 17:47:05 +0200
Subject: cfg80211: allow mgmt_frame_register callback to sleep

This callback is currently not allowed to sleep, which makes it more
difficult to implement proper driver methods in mac80211 than it has
to be. Instead of doing asynchronous work here in mac80211, make it
possible for the callback to sleep by doing some asynchronous work
in cfg80211. This also enables improvements to other drivers, like
ath6kl, that would like to sleep in this callback.

While at it, also fix the code to call the driver on the implicit
unregistration when an interface is removed, and do that also when
a P2P-Device wdev is destroyed (otherwise we leak the structs.)

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index a741678f24a2..9a529c48f6ca 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2369,8 +2369,7 @@ struct cfg80211_qos_map {
  *	method returns 0.)
  *
  * @mgmt_frame_register: Notify driver that a management frame type was
- *	registered. Note that this callback may not sleep, and cannot run
- *	concurrently with itself.
+ *	registered. The callback is allowed to sleep.
  *
  * @set_antenna: Set antenna configuration (tx_ant, rx_ant) on the device.
  *	Parameters are bitmaps of allowed antennas to use for TX/RX. Drivers may
-- 
cgit v1.2.3


From b87a173e25d6bf5c26f13d329cdddf57dbd4061a Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 15 Jul 2015 14:21:41 +0200
Subject: cls_cgroup: factor out classid retrieval

Split out retrieving the cgroups net_cls classid retrieval into its
own function, so that it can be reused later on from other parts of
the traffic control subsystem. If there's no skb->sk, then the small
helper returns 0 as well, which in cls_cgroup terms means 'could not
classify'.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/cls_cgroup.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'include/net')

diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index c15d39456e14..ccd6d8bffa4d 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -49,9 +49,38 @@ static inline void sock_update_classid(struct sock *sk)
 	if (classid != sk->sk_classid)
 		sk->sk_classid = classid;
 }
+
+static inline u32 task_get_classid(const struct sk_buff *skb)
+{
+	u32 classid = task_cls_state(current)->classid;
+
+	/* Due to the nature of the classifier it is required to ignore all
+	 * packets originating from softirq context as accessing `current'
+	 * would lead to false results.
+	 *
+	 * This test assumes that all callers of dev_queue_xmit() explicitly
+	 * disable bh. Knowing this, it is possible to detect softirq based
+	 * calls by looking at the number of nested bh disable calls because
+	 * softirqs always disables bh.
+	 */
+	if (in_serving_softirq()) {
+		/* If there is an sk_classid we'll use that. */
+		if (!skb->sk)
+			return 0;
+
+		classid = skb->sk->sk_classid;
+	}
+
+	return classid;
+}
 #else /* !CONFIG_CGROUP_NET_CLASSID */
 static inline void sock_update_classid(struct sock *sk)
 {
 }
+
+static inline u32 task_get_classid(const struct sk_buff *skb)
+{
+	return 0;
+}
 #endif /* CONFIG_CGROUP_NET_CLASSID */
 #endif  /* _NET_CLS_CGROUP_H */
-- 
cgit v1.2.3


From 1a3b2ec93d4277b121979321b4024b438cb09504 Mon Sep 17 00:00:00 2001
From: Scott Feldman <sfeldma@gmail.com>
Date: Sat, 18 Jul 2015 18:24:50 -0700
Subject: switchdev: add offload_fwd_mark generator helper

skb->offload_fwd_mark and dev->offload_fwd_mark are 32-bit and should be
unique for device and may even be unique for a sub-set of ports within
device, so add switchdev helper function to generate unique marks based on
port's switch ID and group_ifindex.  group_ifindex would typically be the
container dev's ifindex, such as the bridge's ifindex.

The generator uses a global hash table to store offload_fwd_marks hashed by
{switch ID, group_ifindex} key.

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/net')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index d5671f118bfc..89da8934519b 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -157,6 +157,9 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 			    struct net_device *dev,
 			    struct net_device *filter_dev, int idx);
+void switchdev_port_fwd_mark_set(struct net_device *dev,
+				 struct net_device *group_dev,
+				 bool joining);
 
 #else
 
@@ -271,6 +274,12 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb,
 	return -EOPNOTSUPP;
 }
 
+static inline void switchdev_port_fwd_mark_set(struct net_device *dev,
+					       struct net_device *group_dev,
+					       bool joining)
+{
+}
+
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
-- 
cgit v1.2.3


From 499a24256862714539e902c0499b67da2bb3ab72 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Tue, 21 Jul 2015 10:43:46 +0200
Subject: lwtunnel: infrastructure for handling light weight tunnels like mpls

Provides infrastructure to parse/dump/store encap information for
light weight tunnels like mpls. Encap information for such tunnels
is associated with fib routes.

This infrastructure is based on previous suggestions from
Eric Biederman to follow the xfrm infrastructure.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/lwtunnel.h | 132 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)
 create mode 100644 include/net/lwtunnel.h

(limited to 'include/net')

diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
new file mode 100644
index 000000000000..df24b3611ff4
--- /dev/null
+++ b/include/net/lwtunnel.h
@@ -0,0 +1,132 @@
+#ifndef __NET_LWTUNNEL_H
+#define __NET_LWTUNNEL_H 1
+
+#include <linux/lwtunnel.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <net/route.h>
+
+#define LWTUNNEL_HASH_BITS   7
+#define LWTUNNEL_HASH_SIZE   (1 << LWTUNNEL_HASH_BITS)
+
+/* lw tunnel state flags */
+#define LWTUNNEL_STATE_OUTPUT_REDIRECT 0x1
+
+struct lwtunnel_state {
+	__u16		type;
+	__u16		flags;
+	atomic_t	refcnt;
+	int             len;
+	__u8            data[0];
+};
+
+struct lwtunnel_encap_ops {
+	int (*build_state)(struct net_device *dev, struct nlattr *encap,
+			   struct lwtunnel_state **ts);
+	int (*output)(struct sock *sk, struct sk_buff *skb);
+	int (*fill_encap)(struct sk_buff *skb,
+			  struct lwtunnel_state *lwtstate);
+	int (*get_encap_size)(struct lwtunnel_state *lwtstate);
+	int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b);
+};
+
+extern const struct lwtunnel_encap_ops __rcu *
+		lwtun_encaps[LWTUNNEL_ENCAP_MAX+1];
+
+#ifdef CONFIG_LWTUNNEL
+static inline void lwtunnel_state_get(struct lwtunnel_state *lws)
+{
+	atomic_inc(&lws->refcnt);
+}
+
+static inline void lwtunnel_state_put(struct lwtunnel_state *lws)
+{
+	if (!lws)
+		return;
+
+	if (atomic_dec_and_test(&lws->refcnt))
+		kfree(lws);
+}
+
+static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate)
+{
+	if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_OUTPUT_REDIRECT))
+		return true;
+
+	return false;
+}
+
+int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
+			   unsigned int num);
+int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
+			   unsigned int num);
+int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
+			 struct nlattr *encap,
+			 struct lwtunnel_state **lws);
+int lwtunnel_fill_encap(struct sk_buff *skb,
+			struct lwtunnel_state *lwtstate);
+int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate);
+struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
+int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
+
+#else
+
+static inline void lwtunnel_state_get(struct lwtunnel_state *lws)
+{
+}
+
+static inline void lwtunnel_state_put(struct lwtunnel_state *lws)
+{
+}
+
+static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate)
+{
+	return false;
+}
+
+static inline int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
+					 unsigned int num)
+{
+	return -EOPNOTSUPP;
+
+}
+
+static inline int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
+					 unsigned int num)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
+				       struct nlattr *encap,
+				       struct lwtunnel_state **lws)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int lwtunnel_fill_encap(struct sk_buff *skb,
+				      struct lwtunnel_state *lwtstate)
+{
+	return 0;
+}
+
+static inline int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
+{
+	return 0;
+}
+
+static inline struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len)
+{
+	return NULL;
+}
+
+static inline int lwtunnel_cmp_encap(struct lwtunnel_state *a,
+				     struct lwtunnel_state *b)
+{
+	return 0;
+}
+
+#endif
+
+#endif /* __NET_LWTUNNEL_H */
-- 
cgit v1.2.3


From 571e722676fe386bb66f72a75b64a6ebf535c077 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Tue, 21 Jul 2015 10:43:47 +0200
Subject: ipv4: support for fib route lwtunnel encap attributes

This patch adds support in ipv4 fib functions to parse user
provided encap attributes and attach encap state data to fib_nh
and rtable.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h | 5 ++++-
 include/net/route.h  | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 49c142bdf01e..5e0196084f1e 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -44,7 +44,9 @@ struct fib_config {
 	u32			fc_flow;
 	u32			fc_nlflags;
 	struct nl_info		fc_nlinfo;
- };
+	struct nlattr		*fc_encap;
+	u16			fc_encap_type;
+};
 
 struct fib_info;
 struct rtable;
@@ -89,6 +91,7 @@ struct fib_nh {
 	struct rtable __rcu * __percpu *nh_pcpu_rth_output;
 	struct rtable __rcu	*nh_rth_input;
 	struct fnhe_hash_bucket	__rcu *nh_exceptions;
+	struct lwtunnel_state	*nh_lwtstate;
 };
 
 /*
diff --git a/include/net/route.h b/include/net/route.h
index fe22d03afb6a..2d45f419477f 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -66,6 +66,7 @@ struct rtable {
 
 	struct list_head	rt_uncached;
 	struct uncached_list	*rt_uncached_list;
+	struct lwtunnel_state   *rt_lwtstate;
 };
 
 static inline bool rt_is_input_route(const struct rtable *rt)
-- 
cgit v1.2.3


From 19e42e45150672124b6a4341e2bc7982d247f0ac Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Tue, 21 Jul 2015 10:43:48 +0200
Subject: ipv6: support for fib route lwtunnel encap attributes

This patch adds support in ipv6 fib functions to parse Netlink
RTA encap attributes and attach encap state data to rt6_info.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 3b76849c190f..276328e3daa6 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -51,6 +51,8 @@ struct fib6_config {
 	struct nlattr	*fc_mp;
 
 	struct nl_info	fc_nlinfo;
+	struct nlattr	*fc_encap;
+	u16		fc_encap_type;
 };
 
 struct fib6_node {
@@ -131,6 +133,7 @@ struct rt6_info {
 	/* more non-fragment space at head required */
 	unsigned short			rt6i_nfheader_len;
 	u8				rt6i_protocol;
+	struct lwtunnel_state		*rt6i_lwtstate;
 };
 
 static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
-- 
cgit v1.2.3


From ffce41962ef64b8e685e5b621caf24bf381addd9 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Tue, 21 Jul 2015 10:43:49 +0200
Subject: lwtunnel: support dst output redirect function

This patch introduces lwtunnel_output function to call corresponding
lwtunnels output function to xmit the packet.

It adds two variants lwtunnel_output and lwtunnel_output6 for ipv4 and
ipv6 respectively today. But this is subject to change when lwtstate will
reside in dst or dst_metadata (as per upstream discussions).

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/lwtunnel.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/net')

diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index df24b3611ff4..918e03c1dafa 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -69,6 +69,8 @@ int lwtunnel_fill_encap(struct sk_buff *skb,
 int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate);
 struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
+int lwtunnel_output(struct sock *sk, struct sk_buff *skb);
+int lwtunnel_output6(struct sock *sk, struct sk_buff *skb);
 
 #else
 
@@ -127,6 +129,16 @@ static inline int lwtunnel_cmp_encap(struct lwtunnel_state *a,
 	return 0;
 }
 
+static inline int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int lwtunnel_output6(struct sock *sk, struct sk_buff *skb)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif
 
 #endif /* __NET_LWTUNNEL_H */
-- 
cgit v1.2.3


From e3e4712ec0961ed586a8db340bd994c4ad7f5dba Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Tue, 21 Jul 2015 10:43:53 +0200
Subject: mpls: ip tunnel support

This implementation uses lwtunnel infrastructure to register
hooks for mpls tunnel encaps.

It picks cues from iptunnel_encaps infrastructure and previous
mpls iptunnel RFC patches from Eric W. Biederman and Robert Shearman

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/mpls_iptunnel.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 include/net/mpls_iptunnel.h

(limited to 'include/net')

diff --git a/include/net/mpls_iptunnel.h b/include/net/mpls_iptunnel.h
new file mode 100644
index 000000000000..4757997f76ed
--- /dev/null
+++ b/include/net/mpls_iptunnel.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2015 Cumulus Networks, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _NET_MPLS_IPTUNNEL_H
+#define _NET_MPLS_IPTUNNEL_H 1
+
+#define MAX_NEW_LABELS 2
+
+struct mpls_iptunnel_encap {
+	u32	label[MAX_NEW_LABELS];
+	u32	labels;
+};
+
+static inline struct mpls_iptunnel_encap *mpls_lwtunnel_encap(struct lwtunnel_state *lwtstate)
+{
+	return (struct mpls_iptunnel_encap *)lwtstate->data;
+}
+
+#endif
-- 
cgit v1.2.3


From 1d8fff907342d2339796dbd27ea47d0e76a6a2d0 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 21 Jul 2015 10:43:54 +0200
Subject: ip_tunnel: Make ovs_tunnel_info and ovs_key_ipv4_tunnel generic

Rename the tunnel metadata data structures currently internal to
OVS and make them generic for use by all IP tunnels.

Both structures are kernel internal and will stay that way. Their
members are exposed to user space through individual Netlink
attributes by OVS. It will therefore be possible to extend/modify
these structures without affecting user ABI.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h | 63 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index d8214cb88bbc..6b9d559ce5f5 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -22,6 +22,28 @@
 /* Keep error state on tunnel for 30 sec */
 #define IPTUNNEL_ERR_TIMEO	(30*HZ)
 
+/* Used to memset ip_tunnel padding. */
+#define IP_TUNNEL_KEY_SIZE					\
+	(offsetof(struct ip_tunnel_key, tp_dst) +		\
+	 FIELD_SIZEOF(struct ip_tunnel_key, tp_dst))
+
+struct ip_tunnel_key {
+	__be64			tun_id;
+	__be32			ipv4_src;
+	__be32			ipv4_dst;
+	__be16			tun_flags;
+	__u8			ipv4_tos;
+	__u8			ipv4_ttl;
+	__be16			tp_src;
+	__be16			tp_dst;
+} __packed __aligned(4); /* Minimize padding. */
+
+struct ip_tunnel_info {
+	struct ip_tunnel_key	key;
+	const void		*options;
+	u8			options_len;
+};
+
 /* 6rd prefix/relay information */
 #ifdef CONFIG_IPV6_SIT_6RD
 struct ip_tunnel_6rd_parm {
@@ -136,6 +158,47 @@ int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *op,
 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op,
 			    unsigned int num);
 
+static inline void __ip_tunnel_info_init(struct ip_tunnel_info *tun_info,
+					 __be32 saddr, __be32 daddr,
+					 u8 tos, u8 ttl,
+					 __be16 tp_src, __be16 tp_dst,
+					 __be64 tun_id, __be16 tun_flags,
+					 const void *opts, u8 opts_len)
+{
+	tun_info->key.tun_id = tun_id;
+	tun_info->key.ipv4_src = saddr;
+	tun_info->key.ipv4_dst = daddr;
+	tun_info->key.ipv4_tos = tos;
+	tun_info->key.ipv4_ttl = ttl;
+	tun_info->key.tun_flags = tun_flags;
+
+	/* For the tunnel types on the top of IPsec, the tp_src and tp_dst of
+	 * the upper tunnel are used.
+	 * E.g: GRE over IPSEC, the tp_src and tp_port are zero.
+	 */
+	tun_info->key.tp_src = tp_src;
+	tun_info->key.tp_dst = tp_dst;
+
+	/* Clear struct padding. */
+	if (sizeof(tun_info->key) != IP_TUNNEL_KEY_SIZE)
+		memset((unsigned char *)&tun_info->key + IP_TUNNEL_KEY_SIZE,
+		       0, sizeof(tun_info->key) - IP_TUNNEL_KEY_SIZE);
+
+	tun_info->options = opts;
+	tun_info->options_len = opts_len;
+}
+
+static inline void ip_tunnel_info_init(struct ip_tunnel_info *tun_info,
+				       const struct iphdr *iph,
+				       __be16 tp_src, __be16 tp_dst,
+				       __be64 tun_id, __be16 tun_flags,
+				       const void *opts, u8 opts_len)
+{
+	__ip_tunnel_info_init(tun_info, iph->saddr, iph->daddr,
+			      iph->tos, iph->ttl, tp_src, tp_dst,
+			      tun_id, tun_flags, opts, opts_len);
+}
+
 #ifdef CONFIG_INET
 
 int ip_tunnel_init(struct net_device *dev);
-- 
cgit v1.2.3


From f38a9eb1f77b296ff07e000823884a0f64d67b2a Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 21 Jul 2015 10:43:56 +0200
Subject: dst: Metadata destinations

Introduces a new dst_metadata which enables to carry per packet metadata
between forwarding and processing elements via the skb->dst pointer.

The structure is set up to be a union. Thus, each separate type of
metadata requires its own dst instance. If demand arises to carry
multiple types of metadata concurrently, metadata dst entries can be
made stackable.

The metadata dst entry is refcnt'ed as expected for now but a non
reference counted use is possible if the reference is forced before
queueing the skb.

In order to allow allocating dsts with variable length, the existing
dst_alloc() is split into a dst_alloc() and dst_init() function. The
existing dst_init() function to initialize the subsystem is being
renamed to dst_subsys_init() to make it clear what is what.

The check before ip_route_input() is changed to ignore metadata dsts
and drop the dst inside the routing function thus allowing to interpret
metadata in a later commit.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h          |  6 +++++-
 include/net/dst_metadata.h | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 1 deletion(-)
 create mode 100644 include/net/dst_metadata.h

(limited to 'include/net')

diff --git a/include/net/dst.h b/include/net/dst.h
index 2bc73f8a00a9..2578811cef51 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -57,6 +57,7 @@ struct dst_entry {
 #define DST_FAKE_RTABLE		0x0040
 #define DST_XFRM_TUNNEL		0x0080
 #define DST_XFRM_QUEUE		0x0100
+#define DST_METADATA		0x0200
 
 	unsigned short		pending_confirm;
 
@@ -356,6 +357,9 @@ static inline int dst_discard(struct sk_buff *skb)
 }
 void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref,
 		int initial_obsolete, unsigned short flags);
+void dst_init(struct dst_entry *dst, struct dst_ops *ops,
+	      struct net_device *dev, int initial_ref, int initial_obsolete,
+	      unsigned short flags);
 void __dst_free(struct dst_entry *dst);
 struct dst_entry *dst_destroy(struct dst_entry *dst);
 
@@ -457,7 +461,7 @@ static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie)
 	return dst;
 }
 
-void dst_init(void);
+void dst_subsys_init(void);
 
 /* Flags for xfrm_lookup flags argument. */
 enum {
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
new file mode 100644
index 000000000000..4f7694f3c7d0
--- /dev/null
+++ b/include/net/dst_metadata.h
@@ -0,0 +1,32 @@
+#ifndef __NET_DST_METADATA_H
+#define __NET_DST_METADATA_H 1
+
+#include <linux/skbuff.h>
+#include <net/ip_tunnels.h>
+#include <net/dst.h>
+
+struct metadata_dst {
+	struct dst_entry		dst;
+	size_t				opts_len;
+};
+
+static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb)
+{
+	struct metadata_dst *md_dst = (struct metadata_dst *) skb_dst(skb);
+
+	if (md_dst && md_dst->dst.flags & DST_METADATA)
+		return md_dst;
+
+	return NULL;
+}
+
+static inline bool skb_valid_dst(const struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+
+	return dst && !(dst->flags & DST_METADATA);
+}
+
+struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags);
+
+#endif /* __NET_DST_METADATA_H */
-- 
cgit v1.2.3


From ee122c79d4227f6ec642157834b6a90fcffa4382 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 21 Jul 2015 10:43:58 +0200
Subject: vxlan: Flow based tunneling

Allows putting a VXLAN device into a new flow-based mode in which
skbs with a ip_tunnel_info dst metadata attached will be encapsulated
according to the instructions stored in there with the VXLAN device
defaults taken into consideration.

Similar on the receive side, if the VXLAN_F_COLLECT_METADATA flag is
set, the packet processing will populate a ip_tunnel_info struct for
each packet received and attach it to the skb using the new metadata
dst.  The metadata structure will contain the outer header and tunnel
header fields which have been stripped off. Layers further up in the
stack such as routing, tc or netfitler can later match on these fields
and perform forwarding. It is the responsibility of upper layers to
ensure that the flag is set if the metadata is needed. The flag limits
the additional cost of metadata collecting based on demand.

This prepares the VXLAN device to be steered by the routing and other
subsystems which allows to support encapsulation for a large number
of tunnel endpoints and tunnel ids through a single net_device which
improves the scalability.

It also allows for OVS to leverage this mode which in turn allows for
the removal of the OVS specific VXLAN code.

Because the skb is currently scrubed in vxlan_rcv(), the attachment of
the new dst metadata is postponed until after scrubing which requires
the temporary addition of a new member to vxlan_metadata. This member
is removed again in a later commit after the indirect VXLAN receive API
has been removed.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst_metadata.h | 13 +++++++++++++
 include/net/ip_tunnels.h   | 14 ++++++++++++++
 include/net/vxlan.h        | 10 +++++++++-
 3 files changed, 36 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 4f7694f3c7d0..e843937fb30a 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -8,6 +8,9 @@
 struct metadata_dst {
 	struct dst_entry		dst;
 	size_t				opts_len;
+	union {
+		struct ip_tunnel_info	tun_info;
+	} u;
 };
 
 static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb)
@@ -20,6 +23,16 @@ static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb)
 	return NULL;
 }
 
+static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb)
+{
+	struct metadata_dst *md_dst = skb_metadata_dst(skb);
+
+	if (md_dst)
+		return &md_dst->u.tun_info;
+
+	return NULL;
+}
+
 static inline bool skb_valid_dst(const struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 6b9d559ce5f5..d11530f1c1e2 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -38,10 +38,19 @@ struct ip_tunnel_key {
 	__be16			tp_dst;
 } __packed __aligned(4); /* Minimize padding. */
 
+/* Indicates whether the tunnel info structure represents receive
+ * or transmit tunnel parameters.
+ */
+enum {
+	IP_TUNNEL_INFO_RX,
+	IP_TUNNEL_INFO_TX,
+};
+
 struct ip_tunnel_info {
 	struct ip_tunnel_key	key;
 	const void		*options;
 	u8			options_len;
+	u8			mode;
 };
 
 /* 6rd prefix/relay information */
@@ -284,6 +293,11 @@ static inline void iptunnel_xmit_stats(int err,
 	}
 }
 
+static inline void *ip_tunnel_info_opts(struct ip_tunnel_info *info, size_t n)
+{
+	return info + 1;
+}
+
 #endif /* CONFIG_INET */
 
 #endif /* __NET_IP_TUNNELS_H */
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 0082b5d33d7d..80a2da29e088 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -7,6 +7,7 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/udp.h>
+#include <net/dst_metadata.h>
 
 #define VNI_HASH_BITS	10
 #define VNI_HASH_SIZE	(1<<VNI_HASH_BITS)
@@ -97,6 +98,9 @@ struct vxlanhdr {
 struct vxlan_metadata {
 	__be32		vni;
 	u32		gbp;
+
+	/* Temporary until vxlan_rcv() API is gone */
+	struct metadata_dst *tun_dst;
 };
 
 struct vxlan_sock;
@@ -130,6 +134,8 @@ struct vxlan_sock {
 #define VXLAN_F_REMCSUM_RX		0x400
 #define VXLAN_F_GBP			0x800
 #define VXLAN_F_REMCSUM_NOPARTIAL	0x1000
+#define VXLAN_F_COLLECT_METADATA	0x2000
+#define VXLAN_F_FLOW_BASED		0x4000
 
 /* Flags that are used in the receive path. These flags must match in
  * order for a socket to be shareable
@@ -137,7 +143,9 @@ struct vxlan_sock {
 #define VXLAN_F_RCV_FLAGS		(VXLAN_F_GBP |			\
 					 VXLAN_F_UDP_ZERO_CSUM6_RX |	\
 					 VXLAN_F_REMCSUM_RX |		\
-					 VXLAN_F_REMCSUM_NOPARTIAL)
+					 VXLAN_F_REMCSUM_NOPARTIAL |	\
+					 VXLAN_F_COLLECT_METADATA |	\
+					 VXLAN_F_FLOW_BASED)
 
 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 				  vxlan_rcv_t *rcv, void *data,
-- 
cgit v1.2.3


From 1b7179d3adff0ab71f85ee24d7de28ccb7734b89 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 21 Jul 2015 10:43:59 +0200
Subject: route: Extend flow representation with tunnel key

Add a new flowi_tunnel structure which is a subset of ip_tunnel_key to
allow routes to match on tunnel metadata. For now, the tunnel id is
added to flowi_tunnel which allows for routes to be bound to specific
virtual tunnels.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/net')

diff --git a/include/net/flow.h b/include/net/flow.h
index 8109a159d1b3..3098ae33a178 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -19,6 +19,10 @@
 
 #define LOOPBACK_IFINDEX	1
 
+struct flowi_tunnel {
+	__be64			tun_id;
+};
+
 struct flowi_common {
 	int	flowic_oif;
 	int	flowic_iif;
@@ -30,6 +34,7 @@ struct flowi_common {
 #define FLOWI_FLAG_ANYSRC		0x01
 #define FLOWI_FLAG_KNOWN_NH		0x02
 	__u32	flowic_secid;
+	struct flowi_tunnel flowic_tun_key;
 };
 
 union flowi_uli {
@@ -66,6 +71,7 @@ struct flowi4 {
 #define flowi4_proto		__fl_common.flowic_proto
 #define flowi4_flags		__fl_common.flowic_flags
 #define flowi4_secid		__fl_common.flowic_secid
+#define flowi4_tun_key		__fl_common.flowic_tun_key
 
 	/* (saddr,daddr) must be grouped, same order as in IP header */
 	__be32			saddr;
@@ -95,6 +101,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
 	fl4->flowi4_proto = proto;
 	fl4->flowi4_flags = flags;
 	fl4->flowi4_secid = 0;
+	fl4->flowi4_tun_key.tun_id = 0;
 	fl4->daddr = daddr;
 	fl4->saddr = saddr;
 	fl4->fl4_dport = dport;
@@ -165,6 +172,7 @@ struct flowi {
 #define flowi_proto	u.__fl_common.flowic_proto
 #define flowi_flags	u.__fl_common.flowic_flags
 #define flowi_secid	u.__fl_common.flowic_secid
+#define flowi_tun_key	u.__fl_common.flowic_tun_key
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4)
-- 
cgit v1.2.3


From 3093fbe7ff4bc7d1571fc217dade1cf80330a714 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 21 Jul 2015 10:44:00 +0200
Subject: route: Per route IP tunnel metadata via lightweight tunnel

This introduces a new IP tunnel lightweight tunnel type which allows
to specify IP tunnel instructions per route. Only IPv4 is supported
at this point.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst_metadata.h | 12 +++++++++++-
 include/net/ip_tunnels.h   |  7 ++++++-
 2 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index e843937fb30a..7b0306894663 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -23,13 +23,23 @@ static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb)
 	return NULL;
 }
 
-static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb)
+static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb,
+						     int family)
 {
 	struct metadata_dst *md_dst = skb_metadata_dst(skb);
+	struct rtable *rt;
 
 	if (md_dst)
 		return &md_dst->u.tun_info;
 
+	switch (family) {
+	case AF_INET:
+		rt = (struct rtable *)skb_dst(skb);
+		if (rt && rt->rt_lwtstate)
+			return lwt_tun_info(rt->rt_lwtstate);
+		break;
+	}
+
 	return NULL;
 }
 
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index d11530f1c1e2..0b7e18cfa0b4 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -9,9 +9,9 @@
 #include <net/dsfield.h>
 #include <net/gro_cells.h>
 #include <net/inet_ecn.h>
-#include <net/ip.h>
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
+#include <net/lwtunnel.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
@@ -298,6 +298,11 @@ static inline void *ip_tunnel_info_opts(struct ip_tunnel_info *info, size_t n)
 	return info + 1;
 }
 
+static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
+{
+	return (struct ip_tunnel_info *)lwtstate->data;
+}
+
 #endif /* CONFIG_INET */
 
 #endif /* __NET_IP_TUNNELS_H */
-- 
cgit v1.2.3


From e7030878fc8448492b6e5cecd574043f63271298 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 21 Jul 2015 10:44:01 +0200
Subject: fib: Add fib rule match on tunnel id

This add the ability to select a routing table based on the tunnel
id which allows to maintain separate routing tables for each virtual
tunnel network.

ip rule add from all tunnel-id 100 lookup 100
ip rule add from all tunnel-id 200 lookup 200

A new static key controls the collection of metadata at tunnel level
upon demand.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h  |  1 +
 include/net/ip_tunnels.h | 11 +++++++++++
 2 files changed, 12 insertions(+)

(limited to 'include/net')

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 903a55efbffe..4e8f804f4589 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -19,6 +19,7 @@ struct fib_rule {
 	u8			action;
 	/* 3 bytes hole, try to use */
 	u32			target;
+	__be64			tun_id;
 	struct fib_rule __rcu	*ctarget;
 	struct net		*fr_net;
 
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 0b7e18cfa0b4..0a5a7763eec2 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -303,6 +303,17 @@ static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstat
 	return (struct ip_tunnel_info *)lwtstate->data;
 }
 
+extern struct static_key ip_tunnel_metadata_cnt;
+
+/* Returns > 0 if metadata should be collected */
+static inline int ip_tunnel_collect_metadata(void)
+{
+	return static_key_false(&ip_tunnel_metadata_cnt);
+}
+
+void ip_tunnel_need_metadata(void);
+void ip_tunnel_unneed_metadata(void);
+
 #endif /* CONFIG_INET */
 
 #endif /* __NET_IP_TUNNELS_H */
-- 
cgit v1.2.3


From 0dfbdf4102b9303d3ddf2177c0220098ff99f6de Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 21 Jul 2015 10:44:02 +0200
Subject: vxlan: Factor out device configuration

This factors out the device configuration out of the RTNL newlink
API which allows for in-kernel creation of VXLAN net_devices.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/vxlan.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

(limited to 'include/net')

diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 80a2da29e088..19535f85eb2c 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -95,6 +95,11 @@ struct vxlanhdr {
 #define VXLAN_VNI_MASK  (VXLAN_VID_MASK << 8)
 #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
 
+#define VNI_HASH_BITS	10
+#define VNI_HASH_SIZE	(1<<VNI_HASH_BITS)
+#define FDB_HASH_BITS	8
+#define FDB_HASH_SIZE	(1<<FDB_HASH_BITS)
+
 struct vxlan_metadata {
 	__be32		vni;
 	u32		gbp;
@@ -121,6 +126,57 @@ struct vxlan_sock {
 	u32		  flags;
 };
 
+union vxlan_addr {
+	struct sockaddr_in sin;
+	struct sockaddr_in6 sin6;
+	struct sockaddr sa;
+};
+
+struct vxlan_rdst {
+	union vxlan_addr	 remote_ip;
+	__be16			 remote_port;
+	u32			 remote_vni;
+	u32			 remote_ifindex;
+	struct list_head	 list;
+	struct rcu_head		 rcu;
+};
+
+struct vxlan_config {
+	union vxlan_addr	remote_ip;
+	union vxlan_addr	saddr;
+	u32			vni;
+	int			remote_ifindex;
+	int			mtu;
+	__be16			dst_port;
+	__u16			port_min;
+	__u16			port_max;
+	__u8			tos;
+	__u8			ttl;
+	u32			flags;
+	unsigned long		age_interval;
+	unsigned int		addrmax;
+	bool			no_share;
+};
+
+/* Pseudo network device */
+struct vxlan_dev {
+	struct hlist_node hlist;	/* vni hash table */
+	struct list_head  next;		/* vxlan's per namespace list */
+	struct vxlan_sock *vn_sock;	/* listening socket */
+	struct net_device *dev;
+	struct net	  *net;		/* netns for packet i/o */
+	struct vxlan_rdst default_dst;	/* default destination */
+	u32		  flags;	/* VXLAN_F_* in vxlan.h */
+
+	struct timer_list age_timer;
+	spinlock_t	  hash_lock;
+	unsigned int	  addrcnt;
+
+	struct vxlan_config	cfg;
+
+	struct hlist_head fdb_head[FDB_HASH_SIZE];
+};
+
 #define VXLAN_F_LEARN			0x01
 #define VXLAN_F_PROXY			0x02
 #define VXLAN_F_RSC			0x04
@@ -151,6 +207,9 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 				  vxlan_rcv_t *rcv, void *data,
 				  bool no_share, u32 flags);
 
+struct net_device *vxlan_dev_create(struct net *net, const char *name,
+				    u8 name_assign_type, struct vxlan_config *conf);
+
 void vxlan_sock_release(struct vxlan_sock *vs);
 
 int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
-- 
cgit v1.2.3


From 614732eaa12dd462c0ab274700bed14f36afea5e Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 21 Jul 2015 10:44:06 +0200
Subject: openvswitch: Use regular VXLAN net_device device

This gets rid of all OVS specific VXLAN code in the receive and
transmit path by using a VXLAN net_device to represent the vport.
Only a small shim layer remains which takes care of handling the
VXLAN specific OVS Netlink configuration.

Unexports vxlan_sock_add(), vxlan_sock_release(), vxlan_xmit_skb()
since they are no longer needed.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rtnetlink.h |  1 +
 include/net/vxlan.h     | 24 ++++--------------------
 2 files changed, 5 insertions(+), 20 deletions(-)

(limited to 'include/net')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 343d922d15c2..18fdb98185ab 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -141,6 +141,7 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
 				    unsigned char name_assign_type,
 				    const struct rtnl_link_ops *ops,
 				    struct nlattr *tb[]);
+int rtnl_delete_link(struct net_device *dev);
 int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm);
 
 int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len);
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 19535f85eb2c..eb8d721cdb67 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -101,22 +101,12 @@ struct vxlanhdr {
 #define FDB_HASH_SIZE	(1<<FDB_HASH_BITS)
 
 struct vxlan_metadata {
-	__be32		vni;
 	u32		gbp;
-
-	/* Temporary until vxlan_rcv() API is gone */
-	struct metadata_dst *tun_dst;
 };
 
-struct vxlan_sock;
-typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb,
-			   struct vxlan_metadata *md);
-
 /* per UDP socket information */
 struct vxlan_sock {
 	struct hlist_node hlist;
-	vxlan_rcv_t	 *rcv;
-	void		 *data;
 	struct work_struct del_work;
 	struct socket	 *sock;
 	struct rcu_head	  rcu;
@@ -203,19 +193,13 @@ struct vxlan_dev {
 					 VXLAN_F_COLLECT_METADATA |	\
 					 VXLAN_F_FLOW_BASED)
 
-struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
-				  vxlan_rcv_t *rcv, void *data,
-				  bool no_share, u32 flags);
-
 struct net_device *vxlan_dev_create(struct net *net, const char *name,
 				    u8 name_assign_type, struct vxlan_config *conf);
 
-void vxlan_sock_release(struct vxlan_sock *vs);
-
-int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
-		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
-		   __be16 src_port, __be16 dst_port, struct vxlan_metadata *md,
-		   bool xnet, u32 vxflags);
+static inline __be16 vxlan_dev_dst_port(struct vxlan_dev *vxlan)
+{
+	return inet_sk(vxlan->vn_sock->sock->sk)->inet_sport;
+}
 
 static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
 						     netdev_features_t features)
-- 
cgit v1.2.3


From e181a5430491f038c198f0eacc3142d6e871c2da Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Sun, 19 Jul 2015 22:21:13 +0200
Subject: net: #ifdefify sk_classid member of struct sock

The sk_classid member is only required when CONFIG_CGROUP_NET_CLASSID is
enabled. #ifdefify it to reduce the size of struct sock on 32 bit
systems, at least.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 05a8c1aea251..4353ef70bf48 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -429,7 +429,9 @@ struct sock {
 	void			*sk_security;
 #endif
 	__u32			sk_mark;
+#ifdef CONFIG_CGROUP_NET_CLASSID
 	u32			sk_classid;
+#endif
 	struct cg_proto		*sk_cgrp;
 	void			(*sk_state_change)(struct sock *sk);
 	void			(*sk_data_ready)(struct sock *sk);
-- 
cgit v1.2.3


From 052831879945be0d9fad2216b127147c565ec1b1 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Wed, 22 Jul 2015 14:43:58 +0200
Subject: ip_tunnel: Provide tunnel metadata API for CONFIG_INET=n

Account for the configuration FIB_RULES=y && INET=n as FIB_RULES can
be selected by IPV6 or DECNET without INET.

Fixes: e7030878fc84 ("fib: Add fib rule match on tunnel id")
Fixes: 3093fbe7ff4b ("route: Per route IP tunnel metadata via lightweight tunnel")
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 0a5a7763eec2..d975b3ebd6c7 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -314,6 +314,21 @@ static inline int ip_tunnel_collect_metadata(void)
 void ip_tunnel_need_metadata(void);
 void ip_tunnel_unneed_metadata(void);
 
+#else /* CONFIG_INET */
+
+static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
+{
+	return NULL;
+}
+
+static inline void ip_tunnel_need_metadata(void)
+{
+}
+
+static inline void ip_tunnel_unneed_metadata(void)
+{
+}
+
 #endif /* CONFIG_INET */
 
 #endif /* __NET_IP_TUNNELS_H */
-- 
cgit v1.2.3


From 045a0fa0c5f5ea0f16c009f924ea579634afbba8 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 23 Jul 2015 10:08:44 +0200
Subject: ip_tunnel: Call ip_tunnel_core_init() from inet_init()

Convert the module_init() to a invocation from inet_init() since
ip_tunnel_core is part of the INET built-in.

Fixes: 3093fbe7ff4 ("route: Per route IP tunnel metadata via lightweight tunnel")
Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index d975b3ebd6c7..47984415f5d1 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -311,6 +311,8 @@ static inline int ip_tunnel_collect_metadata(void)
 	return static_key_false(&ip_tunnel_metadata_cnt);
 }
 
+void __init ip_tunnel_core_init(void);
+
 void ip_tunnel_need_metadata(void);
 void ip_tunnel_unneed_metadata(void);
 
-- 
cgit v1.2.3


From a6cb869b3b7c16fd7c3ee766dd9f9a4fdda7edf9 Mon Sep 17 00:00:00 2001
From: Varka Bhadram <varkabhadram@gmail.com>
Date: Wed, 24 Jun 2015 11:36:35 +0200
Subject: cfg802154: add PM hooks

This patch help to implement suspend/resume in mac802154, these
hooks will be run before the device is suspended and after it
resumes.

Signed-off-by: Varka Bhadram <varkab@cdac.in>
Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/cfg802154.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index 290a9a69af07..382f94b59f2f 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -34,6 +34,8 @@ struct cfg802154_ops {
 							   int type);
 	void	(*del_virtual_intf_deprecated)(struct wpan_phy *wpan_phy,
 					       struct net_device *dev);
+	int	(*suspend)(struct wpan_phy *wpan_phy);
+	int	(*resume)(struct wpan_phy *wpan_phy);
 	int	(*add_virtual_intf)(struct wpan_phy *wpan_phy,
 				    const char *name,
 				    unsigned char name_assign_type,
-- 
cgit v1.2.3


From 729a8989b3fa8ae7965c537dfccbd08512e84d3c Mon Sep 17 00:00:00 2001
From: Varka Bhadram <varkabhadram@gmail.com>
Date: Tue, 7 Jul 2015 10:50:42 +0530
Subject: mac802154: do not export ieee802154_rx()

Right now there are no other users for ieee802154_rx()
in kernel. So lets remove EXPORT_SYMBOL() for this.

Also it moves the function prototype from global header
file to local header file.

Signed-off-by: Varka Bhadram <varkabhadram@gmail.com>
Acked-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/mac802154.h | 17 -----------------
 1 file changed, 17 deletions(-)

(limited to 'include/net')

diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index f534a46911dc..b7f99615224b 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -320,23 +320,6 @@ int ieee802154_register_hw(struct ieee802154_hw *hw);
  */
 void ieee802154_unregister_hw(struct ieee802154_hw *hw);
 
-/**
- * ieee802154_rx - receive frame
- *
- * Use this function to hand received frames to mac802154. The receive
- * buffer in @skb must start with an IEEE 802.15.4 header. In case of a
- * paged @skb is used, the driver is recommended to put the ieee802154
- * header of the frame on the linear part of the @skb to avoid memory
- * allocation and/or memcpy by the stack.
- *
- * This function may not be called in IRQ context. Calls to this function
- * for a single hardware must be synchronized against each other.
- *
- * @hw: the hardware this frame came in on
- * @skb: the buffer to receive, owned by mac802154 after this call
- */
-void ieee802154_rx(struct ieee802154_hw *hw, struct sk_buff *skb);
-
 /**
  * ieee802154_rx_irqsafe - receive frame
  *
-- 
cgit v1.2.3


From cb02a25583b59ce48267472cd092485d754964f9 Mon Sep 17 00:00:00 2001
From: Dean Jenkins <Dean_Jenkins@mentor.com>
Date: Tue, 23 Jun 2015 17:59:38 +0100
Subject: Bluetooth: __l2cap_wait_ack() use msecs_to_jiffies()

Use msecs_to_jiffies() instead of using HZ so that it
is easier to specify the time in milliseconds.

Also add a #define L2CAP_WAIT_ACK_POLL_PERIOD to specify the 200ms
polling period so that it is defined in a single place.

Signed-off-by: Dean Jenkins <Dean_Jenkins@mentor.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 2239a3753092..3dcad4159b0b 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -55,6 +55,7 @@
 #define L2CAP_INFO_TIMEOUT		msecs_to_jiffies(4000)
 #define L2CAP_MOVE_TIMEOUT		msecs_to_jiffies(4000)
 #define L2CAP_MOVE_ERTX_TIMEOUT		msecs_to_jiffies(60000)
+#define L2CAP_WAIT_ACK_POLL_PERIOD	msecs_to_jiffies(200)
 
 #define L2CAP_A2MP_DEFAULT_MTU		670
 
-- 
cgit v1.2.3


From e432c72c464d2deb6c66d1e2a5f548dc1f0ef4dc Mon Sep 17 00:00:00 2001
From: Dean Jenkins <Dean_Jenkins@mentor.com>
Date: Tue, 23 Jun 2015 17:59:39 +0100
Subject: Bluetooth: __l2cap_wait_ack() add defensive timeout

Add a timeout to prevent the do while loop running in an
infinite loop. This ensures that the channel will be
instructed to close within 10 seconds so prevents
l2cap_sock_shutdown() getting stuck forever.

Returns -ENOLINK when the timeout is reached. The channel
will be subequently closed and not all data will be ACK'ed.

Signed-off-by: Dean Jenkins <Dean_Jenkins@mentor.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 3dcad4159b0b..c98afc08cc26 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -56,6 +56,7 @@
 #define L2CAP_MOVE_TIMEOUT		msecs_to_jiffies(4000)
 #define L2CAP_MOVE_ERTX_TIMEOUT		msecs_to_jiffies(60000)
 #define L2CAP_WAIT_ACK_POLL_PERIOD	msecs_to_jiffies(200)
+#define L2CAP_WAIT_ACK_TIMEOUT		msecs_to_jiffies(10000)
 
 #define L2CAP_A2MP_DEFAULT_MTU		670
 
-- 
cgit v1.2.3


From 8757825b128e71319150219b0745d3ecb87f34aa Mon Sep 17 00:00:00 2001
From: Seungyoun Ju <sy39.ju@samsung.com>
Date: Mon, 13 Jul 2015 17:28:13 +0900
Subject: Bluetooth: hci_check_conn_params() check proper range

Slave latency range has been changed in Core Spec. 4.2 by Erratum 5419
of ESR08_V1.0.0. And it should be applied to Core Spec. 4.0 and 4.1.

Before:
   connSlaveLatency <= ((connSupervisionTimeout / connIntervalMax) - 1)

After:
   connSlaveLatency <= ((connSupervisionTimeout / (connIntervalMax*2)) - 1)

This patch makes hci_check_conn_params() check the allowable slave
latency range using the changed way.

Signed-off-by: Seungyoun Ju <sy39.ju@samsung.com>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 3bd618d3e55d..2a6b0919e23f 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1297,7 +1297,7 @@ static inline int hci_check_conn_params(u16 min, u16 max, u16 latency,
 	if (max >= to_multiplier * 8)
 		return -EINVAL;
 
-	max_latency = (to_multiplier * 8 / max) - 1;
+	max_latency = (to_multiplier * 4 / max) - 1;
 	if (latency > 499 || latency > max_latency)
 		return -EINVAL;
 
-- 
cgit v1.2.3


From 5a6228a0b472062646434cd2536d109c102b606e Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Fri, 24 Jul 2015 12:28:36 +0200
Subject: lwtunnel: change prototype of lwtunnel_state_get()

It saves some lines and simplify a bit the code when the state is returning
by this function. It's also useful to handle a NULL entry.

To avoid too long lines, I've also renamed lwtunnel_state_get() and
lwtunnel_state_put() to lwtstate_get() and lwtstate_put().

CC: Thomas Graf <tgraf@suug.ch>
CC: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/lwtunnel.h | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 918e03c1dafa..b02039081b04 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -35,12 +35,16 @@ extern const struct lwtunnel_encap_ops __rcu *
 		lwtun_encaps[LWTUNNEL_ENCAP_MAX+1];
 
 #ifdef CONFIG_LWTUNNEL
-static inline void lwtunnel_state_get(struct lwtunnel_state *lws)
+static inline struct lwtunnel_state *
+lwtstate_get(struct lwtunnel_state *lws)
 {
-	atomic_inc(&lws->refcnt);
+	if (lws)
+		atomic_inc(&lws->refcnt);
+
+	return lws;
 }
 
-static inline void lwtunnel_state_put(struct lwtunnel_state *lws)
+static inline void lwtstate_put(struct lwtunnel_state *lws)
 {
 	if (!lws)
 		return;
@@ -74,11 +78,13 @@ int lwtunnel_output6(struct sock *sk, struct sk_buff *skb);
 
 #else
 
-static inline void lwtunnel_state_get(struct lwtunnel_state *lws)
+static inline struct lwtunnel_state *
+lwtstate_get(struct lwtunnel_state *lws)
 {
+	return lws;
 }
 
-static inline void lwtunnel_state_put(struct lwtunnel_state *lws)
+static inline void lwtstate_put(struct lwtunnel_state *lws)
 {
 }
 
-- 
cgit v1.2.3


From 205845a34763432040496908c8f52f1f97e5ee62 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Fri, 24 Jul 2015 15:50:31 +0200
Subject: bonding: convert num_grat_arp to the new bonding option API

num_grat_arp wasn't converted to the new bonding option API, so do this
now and remove the specific sysfs store option in order to use the
standard one. num_grat_arp is the same as num_unsol_na so add it as an
alias with the same option settings. An important difference is the option
name which is matched in bond_sysfs_store_option().

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Acked-by: Veaceslav Falico <vfalico@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/bond_options.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/bond_options.h b/include/net/bond_options.h
index c28aca25320e..1797235cd590 100644
--- a/include/net/bond_options.h
+++ b/include/net/bond_options.h
@@ -66,6 +66,7 @@ enum {
 	BOND_OPT_AD_ACTOR_SYS_PRIO,
 	BOND_OPT_AD_ACTOR_SYSTEM,
 	BOND_OPT_AD_USER_PORT_KEY,
+	BOND_OPT_NUM_PEER_NOTIF_ALIAS,
 	BOND_OPT_LAST
 };
 
-- 
cgit v1.2.3


From 877d1f6291f8e391237e324be58479a3e3a7407c Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Tue, 28 Jul 2015 16:02:05 -0700
Subject: net: Set sk_txhash from a random number

This patch creates sk_set_txhash and eliminates protocol specific
inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
random number instead of performing flow dissection. sk_set_txash
is also allowed to be called multiple times for the same socket,
we'll need this when redoing the hash for negative routing advice.

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h   | 16 ----------------
 include/net/ipv6.h | 19 -------------------
 include/net/sock.h |  8 ++++++++
 3 files changed, 8 insertions(+), 35 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip.h b/include/net/ip.h
index d5fe9f2ab699..bee5f3582e38 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -370,22 +370,6 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow,
 	flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
 }
 
-static inline void inet_set_txhash(struct sock *sk)
-{
-	struct inet_sock *inet = inet_sk(sk);
-	struct flow_keys keys;
-
-	memset(&keys, 0, sizeof(keys));
-
-	keys.addrs.v4addrs.src = inet->inet_saddr;
-	keys.addrs.v4addrs.dst = inet->inet_daddr;
-	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
-	keys.ports.src = inet->inet_sport;
-	keys.ports.dst = inet->inet_dport;
-
-	sk->sk_txhash = flow_hash_from_keys(&keys);
-}
-
 static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto)
 {
 	const struct iphdr *iph = skb_gro_network_header(skb);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 82dbdb092a5d..7c79798bcaab 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -707,25 +707,6 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static inline void ip6_set_txhash(struct sock *sk)
-{
-	struct inet_sock *inet = inet_sk(sk);
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct flow_keys keys;
-
-	memset(&keys, 0, sizeof(keys));
-
-	memcpy(&keys.addrs.v6addrs.src, &np->saddr,
-	       sizeof(keys.addrs.v6addrs.src));
-	memcpy(&keys.addrs.v6addrs.dst, &sk->sk_v6_daddr,
-	       sizeof(keys.addrs.v6addrs.dst));
-	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
-	keys.ports.src = inet->inet_sport;
-	keys.ports.dst = inet->inet_dport;
-
-	sk->sk_txhash = flow_hash_from_keys(&keys);
-}
-
 static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
 					__be32 flowlabel, bool autolabel)
 {
diff --git a/include/net/sock.h b/include/net/sock.h
index 4353ef70bf48..fe735c4841f6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1687,6 +1687,14 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
 kuid_t sock_i_uid(struct sock *sk);
 unsigned long sock_i_ino(struct sock *sk);
 
+static inline void sk_set_txhash(struct sock *sk)
+{
+	sk->sk_txhash = prandom_u32();
+
+	if (unlikely(!sk->sk_txhash))
+		sk->sk_txhash = 1;
+}
+
 static inline struct dst_entry *
 __sk_dst_get(struct sock *sk)
 {
-- 
cgit v1.2.3


From 265f94ff54d62503663d9c788ba1f082e448f8b8 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Tue, 28 Jul 2015 16:02:06 -0700
Subject: net: Recompute sk_txhash on negative routing advice

When a connection is failing a transport protocol calls
dst_negative_advice to try to get a better route. This patch includes
changing the sk_txhash in that function. This provides a rudimentary
method to try to find a different path in the network since sk_txhash
affects ECMP on the local host and through the network (via flow labels
or UDP source port in encapsulation).

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index fe735c4841f6..24aa75c5317a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1695,6 +1695,12 @@ static inline void sk_set_txhash(struct sock *sk)
 		sk->sk_txhash = 1;
 }
 
+static inline void sk_rethink_txhash(struct sock *sk)
+{
+	if (sk->sk_txhash)
+		sk_set_txhash(sk);
+}
+
 static inline struct dst_entry *
 __sk_dst_get(struct sock *sk)
 {
@@ -1719,6 +1725,8 @@ static inline void dst_negative_advice(struct sock *sk)
 {
 	struct dst_entry *ndst, *dst = __sk_dst_get(sk);
 
+	sk_rethink_txhash(sk);
+
 	if (dst && dst->ops->negative_advice) {
 		ndst = dst->ops->negative_advice(dst);
 
-- 
cgit v1.2.3


From 92a99bf3bae7c1267db87bb3e3babda2c6dcc8a7 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Wed, 29 Jul 2015 09:45:40 +0200
Subject: lwtunnel: Make lwtun_encaps[] static

Any external user should use the registration API instead of
accessing this directly.

Cc: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/lwtunnel.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index b02039081b04..33bd30963a95 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -31,9 +31,6 @@ struct lwtunnel_encap_ops {
 	int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b);
 };
 
-extern const struct lwtunnel_encap_ops __rcu *
-		lwtun_encaps[LWTUNNEL_ENCAP_MAX+1];
-
 #ifdef CONFIG_LWTUNNEL
 static inline struct lwtunnel_state *
 lwtstate_get(struct lwtunnel_state *lws)
-- 
cgit v1.2.3


From d3aa45ce6b94c65b83971257317867db13e5f492 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 30 Jul 2015 15:36:57 -0700
Subject: bpf: add helpers to access tunnel metadata

Introduce helpers to let eBPF programs attached to TC manipulate tunnel metadata:
bpf_skb_[gs]et_tunnel_key(skb, key, size, flags)
skb: pointer to skb
key: pointer to 'struct bpf_tunnel_key'
size: size of 'struct bpf_tunnel_key'
flags: room for future extensions

First eBPF program that uses these helpers will allocate per_cpu
metadata_dst structures that will be used on TX.
On RX metadata_dst is allocated by tunnel driver.

Typical usage for TX:
struct bpf_tunnel_key tkey;
... populate tkey ...
bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), 0);
bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);

RX:
struct bpf_tunnel_key tkey = {};
bpf_skb_get_tunnel_key(skb, &tkey, sizeof(tkey), 0);
... lookup or redirect based on tkey ...

'struct bpf_tunnel_key' will be extended in the future by adding
elements to the end and the 'size' argument will indicate which fields
are populated, thereby keeping backwards compatibility.
The 'flags' argument may be used as well when the 'size' is not enough or
to indicate completely different layout of bpf_tunnel_key.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst_metadata.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 7b0306894663..075f523ff23f 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -51,5 +51,6 @@ static inline bool skb_valid_dst(const struct sk_buff *skb)
 }
 
 struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags);
+struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags);
 
 #endif /* __NET_DST_METADATA_H */
-- 
cgit v1.2.3


From 343d60aada5a358ca186d6e9e353230379c426d8 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Thu, 30 Jul 2015 13:34:53 -0700
Subject: ipv6: change ipv6_stub_impl.ipv6_dst_lookup to take net argument

This patch adds net argument to ipv6_stub_impl.ipv6_dst_lookup
for use cases where sk is not available (like mpls).
sk appears to be needed to get the namespace 'net' and is optional
otherwise. This patch series changes ipv6_stub_impl.ipv6_dst_lookup
to take net argument. sk remains optional.

All callers of ipv6_stub_impl.ipv6_dst_lookup have been modified
to pass net. I have modified them to use already available
'net' in the scope of the call. I can change them to
sock_net(sk) to avoid any unintended change in behaviour if sock
namespace is different. They dont seem to be from code inspection.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h | 4 ++--
 include/net/ipv6.h     | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index def59d3a34d5..0c3ac5acb85f 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -158,8 +158,8 @@ struct ipv6_stub {
 				 const struct in6_addr *addr);
 	int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex,
 				 const struct in6_addr *addr);
-	int (*ipv6_dst_lookup)(struct sock *sk, struct dst_entry **dst,
-				struct flowi6 *fl6);
+	int (*ipv6_dst_lookup)(struct net *net, struct sock *sk,
+			       struct dst_entry **dst, struct flowi6 *fl6);
 	void (*udpv6_encap_enable)(void);
 	void (*ndisc_send_na)(struct net_device *dev, struct neighbour *neigh,
 			      const struct in6_addr *daddr,
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 7c79798bcaab..eecdfc92f807 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -813,7 +813,8 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
 			      &inet6_sk(sk)->cork);
 }
 
-int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6);
+int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
+		   struct flowi6 *fl6);
 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 				      const struct in6_addr *final_dst);
 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
-- 
cgit v1.2.3


From 67800f9b1f4eb5bbefc32e3f5044097354bc85b3 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Fri, 31 Jul 2015 16:52:11 -0700
Subject: ipv6: Call skb_get_hash_flowi6 to get skb->hash in ip6_make_flowlabel

We can't call skb_get_hash here since the packet is not complete to do
flow_dissector. Create hash based on flowi6 instead.

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index eecdfc92f807..3e334b33ef3a 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -708,12 +708,13 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,
 
 #if IS_ENABLED(CONFIG_IPV6)
 static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
-					__be32 flowlabel, bool autolabel)
+					__be32 flowlabel, bool autolabel,
+					struct flowi6 *fl6)
 {
 	if (!flowlabel && (autolabel || net->ipv6.sysctl.auto_flowlabels)) {
 		u32 hash;
 
-		hash = skb_get_hash(skb);
+		hash = skb_get_hash_flowi6(skb, fl6);
 
 		/* Since this is being sent on the wire obfuscate hash a bit
 		 * to minimize possbility that any useful information to an
-- 
cgit v1.2.3


From 42240901f7c438636715b9cb6ed93f4441ffc091 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Fri, 31 Jul 2015 16:52:12 -0700
Subject: ipv6: Implement different admin modes for automatic flow labels

Change the meaning of net.ipv6.auto_flowlabels to provide a mode for
automatic flow labels generation. There are four modes:

0: flow labels are disabled
1: flow labels are enabled, sockets can opt-out
2: flow labels are allowed, sockets can opt-in
3: flow labels are enabled and enforced, no opt-out for sockets

np->autoflowlabel is initialized according to the sysctl value.

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 59 ++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 46 insertions(+), 13 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 3e334b33ef3a..c02c1c03363a 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -707,36 +707,69 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
+
+/* Sysctl settings for net ipv6.auto_flowlabels */
+#define IP6_AUTO_FLOW_LABEL_OFF		0
+#define IP6_AUTO_FLOW_LABEL_OPTOUT	1
+#define IP6_AUTO_FLOW_LABEL_OPTIN	2
+#define IP6_AUTO_FLOW_LABEL_FORCED	3
+
+#define IP6_AUTO_FLOW_LABEL_MAX		IP6_AUTO_FLOW_LABEL_FORCED
+
+#define IP6_DEFAULT_AUTO_FLOW_LABELS	IP6_AUTO_FLOW_LABEL_OFF
+
 static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
 					__be32 flowlabel, bool autolabel,
 					struct flowi6 *fl6)
 {
-	if (!flowlabel && (autolabel || net->ipv6.sysctl.auto_flowlabels)) {
-		u32 hash;
+	u32 hash;
 
-		hash = skb_get_hash_flowi6(skb, fl6);
+	if (flowlabel ||
+	    net->ipv6.sysctl.auto_flowlabels == IP6_AUTO_FLOW_LABEL_OFF ||
+	    (!autolabel &&
+	     net->ipv6.sysctl.auto_flowlabels != IP6_AUTO_FLOW_LABEL_FORCED))
+		return flowlabel;
 
-		/* Since this is being sent on the wire obfuscate hash a bit
-		 * to minimize possbility that any useful information to an
-		 * attacker is leaked. Only lower 20 bits are relevant.
-		 */
-		hash ^= hash >> 12;
+	hash = skb_get_hash_flowi6(skb, fl6);
 
-		flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
+	/* Since this is being sent on the wire obfuscate hash a bit
+	 * to minimize possbility that any useful information to an
+	 * attacker is leaked. Only lower 20 bits are relevant.
+	 */
+	rol32(hash, 16);
 
-		if (net->ipv6.sysctl.flowlabel_state_ranges)
-			flowlabel |= IPV6_FLOWLABEL_STATELESS_FLAG;
-	}
+	flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
+
+	if (net->ipv6.sysctl.flowlabel_state_ranges)
+		flowlabel |= IPV6_FLOWLABEL_STATELESS_FLAG;
 
 	return flowlabel;
 }
+
+static inline int ip6_default_np_autolabel(struct net *net)
+{
+	switch (net->ipv6.sysctl.auto_flowlabels) {
+	case IP6_AUTO_FLOW_LABEL_OFF:
+	case IP6_AUTO_FLOW_LABEL_OPTIN:
+	default:
+		return 0;
+	case IP6_AUTO_FLOW_LABEL_OPTOUT:
+	case IP6_AUTO_FLOW_LABEL_FORCED:
+		return 1;
+	}
+}
 #else
 static inline void ip6_set_txhash(struct sock *sk) { }
 static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
-					__be32 flowlabel, bool autolabel)
+					__be32 flowlabel, bool autolabel,
+					struct flowi6 *fl6)
 {
 	return flowlabel;
 }
+static inline int ip6_default_np_autolabel(struct net *net)
+{
+	return 0;
+}
 #endif
 
 
-- 
cgit v1.2.3


From b56774163f994efce3f5603f35aa4e677c3e725a Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Fri, 31 Jul 2015 16:52:14 -0700
Subject: ipv6: Enable auto flow labels by default

Initialize auto_flowlabels to one. This enables automatic flow labels,
individual socket may disable them using the IPV6_AUTOFLOWLABEL socket
option.

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index c02c1c03363a..711cca428cc8 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -716,7 +716,7 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,
 
 #define IP6_AUTO_FLOW_LABEL_MAX		IP6_AUTO_FLOW_LABEL_FORCED
 
-#define IP6_DEFAULT_AUTO_FLOW_LABELS	IP6_AUTO_FLOW_LABEL_OFF
+#define IP6_DEFAULT_AUTO_FLOW_LABELS	IP6_AUTO_FLOW_LABEL_OPTOUT
 
 static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
 					__be32 flowlabel, bool autolabel,
-- 
cgit v1.2.3


From bbde9fc1824aab58bc78c084163007dd6c03fe5b Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 31 May 2015 17:54:44 +0200
Subject: netfilter: factor out packet duplication for IPv4/IPv6

Extracted from the xtables TEE target. This creates two new modules for IPv4
and IPv6 that are shared between the TEE target and the new nf_tables dup
expressions.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/ipv4/nf_dup_ipv4.h | 7 +++++++
 include/net/netfilter/ipv6/nf_dup_ipv6.h | 7 +++++++
 2 files changed, 14 insertions(+)
 create mode 100644 include/net/netfilter/ipv4/nf_dup_ipv4.h
 create mode 100644 include/net/netfilter/ipv6/nf_dup_ipv6.h

(limited to 'include/net')

diff --git a/include/net/netfilter/ipv4/nf_dup_ipv4.h b/include/net/netfilter/ipv4/nf_dup_ipv4.h
new file mode 100644
index 000000000000..42008f10dfc4
--- /dev/null
+++ b/include/net/netfilter/ipv4/nf_dup_ipv4.h
@@ -0,0 +1,7 @@
+#ifndef _NF_DUP_IPV4_H_
+#define _NF_DUP_IPV4_H_
+
+void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum,
+		 const struct in_addr *gw, int oif);
+
+#endif /* _NF_DUP_IPV4_H_ */
diff --git a/include/net/netfilter/ipv6/nf_dup_ipv6.h b/include/net/netfilter/ipv6/nf_dup_ipv6.h
new file mode 100644
index 000000000000..ed6bd66fa5a0
--- /dev/null
+++ b/include/net/netfilter/ipv6/nf_dup_ipv6.h
@@ -0,0 +1,7 @@
+#ifndef _NF_DUP_IPV6_H_
+#define _NF_DUP_IPV6_H_
+
+void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum,
+		 const struct in6_addr *gw, int oif);
+
+#endif /* _NF_DUP_IPV6_H_ */
-- 
cgit v1.2.3


From d877f07112f1e5a247c6b585c971a93895c9f738 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 31 May 2015 18:04:11 +0200
Subject: netfilter: nf_tables: add nft_dup expression

This new expression uses the nf_dup engine to clone packets to a given gateway.
Unlike xt_TEE, we use an index to indicate output interface which should be
fine at this stage.

Moreover, change to the preemtion-safe this_cpu_read(nf_skb_duplicated) from
nf_dup_ipv{4,6} to silence a lockdep splat.

Based on the original tee expression from Arturo Borrero Gonzalez, although
this patch has diverted quite a bit from this initial effort due to the
change to support maps.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nft_dup.h | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 include/net/netfilter/nft_dup.h

(limited to 'include/net')

diff --git a/include/net/netfilter/nft_dup.h b/include/net/netfilter/nft_dup.h
new file mode 100644
index 000000000000..6b84cf6491a2
--- /dev/null
+++ b/include/net/netfilter/nft_dup.h
@@ -0,0 +1,9 @@
+#ifndef _NFT_DUP_H_
+#define _NFT_DUP_H_
+
+struct nft_dup_inet {
+	enum nft_registers	sreg_addr:8;
+	enum nft_registers	sreg_dev:8;
+};
+
+#endif /* _NFT_DUP_H_ */
-- 
cgit v1.2.3


From 3499abb249bb5ed9d21031944bc3059ec4aa2909 Mon Sep 17 00:00:00 2001
From: Andreas Schultz <aschultz@tpip.net>
Date: Wed, 5 Aug 2015 17:51:45 +0200
Subject: netfilter: nfacct: per network namespace support

- Move the nfnl_acct_list into the network namespace, initialize
  and destroy it per namespace
- Keep track of refcnt on nfacct objects, the old logic does not
  longer work with a per namespace list
- Adjust xt_nfacct to pass the namespace when registring objects

Signed-off-by: Andreas Schultz <aschultz@tpip.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/net_namespace.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index e951453e0a23..2dcea635ecce 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -118,6 +118,9 @@ struct net {
 #endif
 	struct sock		*nfnl;
 	struct sock		*nfnl_stash;
+#if IS_ENABLED(CONFIG_NETFILTER_NETLINK_ACCT)
+	struct list_head        nfnl_acct_list;
+#endif
 #endif
 #ifdef CONFIG_WEXT_CORE
 	struct sk_buff_head	wext_nlevents;
-- 
cgit v1.2.3


From da8b43c0e1dcea3bcac5f37ea59934ddaa137aed Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Tue, 4 Aug 2015 22:51:07 -0700
Subject: vxlan: combine VXLAN_FLOWBASED into VXLAN_COLLECT_METADATA

IFLA_VXLAN_FLOWBASED is useless without IFLA_VXLAN_COLLECT_METADATA,
so combine them into single IFLA_VXLAN_COLLECT_METADATA flag.
'flowbased' doesn't convey real meaning of the vxlan tunnel mode.
This mode can be used by routing, tc+bpf and ovs.
Only ovs is strictly flow based, so 'collect metadata' is a better
name for this tunnel mode.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/vxlan.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index eb8d721cdb67..e4534f1b2d8c 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -181,7 +181,6 @@ struct vxlan_dev {
 #define VXLAN_F_GBP			0x800
 #define VXLAN_F_REMCSUM_NOPARTIAL	0x1000
 #define VXLAN_F_COLLECT_METADATA	0x2000
-#define VXLAN_F_FLOW_BASED		0x4000
 
 /* Flags that are used in the receive path. These flags must match in
  * order for a socket to be shareable
@@ -190,8 +189,7 @@ struct vxlan_dev {
 					 VXLAN_F_UDP_ZERO_CSUM6_RX |	\
 					 VXLAN_F_REMCSUM_RX |		\
 					 VXLAN_F_REMCSUM_NOPARTIAL |	\
-					 VXLAN_F_COLLECT_METADATA |	\
-					 VXLAN_F_FLOW_BASED)
+					 VXLAN_F_COLLECT_METADATA)
 
 struct net_device *vxlan_dev_create(struct net *net, const char *name,
 				    u8 name_assign_type, struct vxlan_config *conf);
-- 
cgit v1.2.3


From 1525c386a1f01612c6f3f27241113d7fc8e6d72d Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Thu, 6 Aug 2015 01:44:02 -0400
Subject: net: switchdev: change fdb addr for a byte array

The address in the switchdev_obj_fdb structure is currently represented
as a pointer. Replacing it for a 6-byte array allows switchdev to carry
addresses directly read from hardware registers, not stored by the
switch chip driver (as in Rocker).

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 89da8934519b..e90e1a0fa579 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -70,7 +70,7 @@ struct switchdev_obj {
 			u32 tb_id;
 		} ipv4_fib;
 		struct switchdev_obj_fdb {		/* PORT_FDB */
-			const unsigned char *addr;
+			u8 addr[ETH_ALEN];
 			u16 vid;
 		} fdb;
 	} u;
-- 
cgit v1.2.3


From 890248261a18c7ae22923095dfadea2c0a2a304a Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Thu, 6 Aug 2015 01:44:03 -0400
Subject: net: switchdev: support static FDB addresses

This patch adds a is_static boolean to the switchdev_obj_fdb structure,
in order to set the ndm_state to either NUD_NOARP or NUD_REACHABLE.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index e90e1a0fa579..0e296b82aef3 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -72,6 +72,7 @@ struct switchdev_obj {
 		struct switchdev_obj_fdb {		/* PORT_FDB */
 			u8 addr[ETH_ALEN];
 			u16 vid;
+			bool is_static;
 		} fdb;
 	} u;
 };
-- 
cgit v1.2.3


From 55045ddded0f39d84c2ca019508973be8c595a78 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Thu, 6 Aug 2015 01:44:04 -0400
Subject: net: dsa: add support for switchdev FDB objects

Remove the fdb_{add,del,getnext} function pointer in favor of new
port_fdb_{add,del,getnext}.

Implement the switchdev_port_obj_{add,del,dump} functions in DSA to
support the SWITCHDEV_OBJ_PORT_FDB objects.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'include/net')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index fbca63ba8f73..091d35f77180 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -296,12 +296,16 @@ struct dsa_switch_driver {
 				     u32 br_port_mask);
 	int	(*port_stp_update)(struct dsa_switch *ds, int port,
 				   u8 state);
-	int	(*fdb_add)(struct dsa_switch *ds, int port,
-			   const unsigned char *addr, u16 vid);
-	int	(*fdb_del)(struct dsa_switch *ds, int port,
-			   const unsigned char *addr, u16 vid);
-	int	(*fdb_getnext)(struct dsa_switch *ds, int port,
-			       unsigned char *addr, bool *is_static);
+
+	/*
+	 * Forwarding database
+	 */
+	int	(*port_fdb_add)(struct dsa_switch *ds, int port, u16 vid,
+				const u8 addr[ETH_ALEN]);
+	int	(*port_fdb_del)(struct dsa_switch *ds, int port, u16 vid,
+				const u8 addr[ETH_ALEN]);
+	int	(*port_fdb_getnext)(struct dsa_switch *ds, int port, u16 *vid,
+				    u8 addr[ETH_ALEN], bool *is_static);
 };
 
 void register_switch_driver(struct dsa_switch_driver *type);
-- 
cgit v1.2.3


From 51e0e5d8124ece158927a4c2288c0929d3b53aa3 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Mon, 10 Aug 2015 21:15:53 +0200
Subject: ieee802154: 6lowpan: remove multiple lowpan per wpan support

We currently supports multiple lowpan interfaces per wpan interface. I
never saw any use case into such functionality. We drop this feature now
because it's much easier do deal with address changes inside the under
laying wpan interface.

This patch removes the multiple lowpan interface and adds a lowpan_dev
netdev pointer into the wpan_dev, if this pointer isn't null the wpan
interface belongs to the assigned lowpan interface.

Reviewed-by: Stefan Schmidt <stefan@osg.samsung.com>
Tested-by: Stefan Schmidt <stefan@osg.samsung.com>
Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/cfg802154.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index 382f94b59f2f..e53b6bfda976 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -173,6 +173,9 @@ struct wpan_dev {
 	struct list_head list;
 	struct net_device *netdev;
 
+	/* lowpan interface, set when the wpan_dev belongs to one lowpan_dev */
+	struct net_device *lowpan_dev;
+
 	u32 identifier;
 
 	/* MAC PIB */
-- 
cgit v1.2.3


From c91208d819c814e7f418c7a083059cf533ad0396 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Mon, 10 Aug 2015 21:15:58 +0200
Subject: ieee802154: add ack request default handling

This patch introduce a new mib entry which isn't part of 802.15.4 but
useful as default behaviour to set the ack request bit or not if we
don't know if the ack request bit should set. This is currently used for
stacks like IEEE 802.15.4 6LoWPAN.

Reviewed-by: Stefan Schmidt <stefan@osg.samsung.com>
Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/cfg802154.h | 5 +++++
 include/net/nl802154.h  | 4 ++++
 2 files changed, 9 insertions(+)

(limited to 'include/net')

diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index e53b6bfda976..76b1ffaea863 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -63,6 +63,8 @@ struct cfg802154_ops {
 					 s8 max_frame_retries);
 	int	(*set_lbt_mode)(struct wpan_phy *wpan_phy,
 				struct wpan_dev *wpan_dev, bool mode);
+	int	(*set_ackreq_default)(struct wpan_phy *wpan_phy,
+				      struct wpan_dev *wpan_dev, bool ackreq);
 };
 
 static inline bool
@@ -196,6 +198,9 @@ struct wpan_dev {
 	bool lbt;
 
 	bool promiscuous_mode;
+
+	/* fallback for acknowledgment bit setting */
+	bool ackreq;
 };
 
 #define to_phy(_dev)	container_of(_dev, struct wpan_phy, dev)
diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index b0ab530d28cd..cf2713d8b975 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -52,6 +52,8 @@ enum nl802154_commands {
 
 	NL802154_CMD_SET_LBT_MODE,
 
+	NL802154_CMD_SET_ACKREQ_DEFAULT,
+
 	/* add new commands above here */
 
 	/* used to define NL802154_CMD_MAX below */
@@ -104,6 +106,8 @@ enum nl802154_attrs {
 
 	NL802154_ATTR_SUPPORTED_COMMANDS,
 
+	NL802154_ATTR_ACKREQ_DEFAULT,
+
 	/* add attributes here, update the policy in nl802154.c */
 
 	__NL802154_ATTR_AFTER_LAST,
-- 
cgit v1.2.3


From 158e92185075184ebc5f25bab61fdd598693e28d Mon Sep 17 00:00:00 2001
From: Jakub Pawlowski <jpawlowski@google.com>
Date: Fri, 7 Aug 2015 20:22:51 +0200
Subject: Bluetooth: preparation for new connect procedure

Currently, when trying to connect to already paired device that just
rotated its RPA MAC address, old address would be used and connection
would fail. In order to fix that, kernel must scan and receive
advertisement with fresh RPA before connecting.

This patch adds some fields to hci_conn_params, in preparation to new
connect procedure.

explicit_connect will be used to override any current auto_connect action,
and connect to device when ad is received.

HCI_AUTO_CONN_EXPLICIT was added to auto_connect enum. When this value
will be used, explicit connect is the only action, and params can be
removed after successful connection.

HCI_CONN_SCANNING is added to hci_conn flags. When it's set, connect is
scan phase. It gets cleared when advertisement is received, and
HCI_OP_LE_CREATE_CONN is sent.

Signed-off-by: Jakub Pawlowski <jpawlowski@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 2a6b0919e23f..c8d2b5a89d08 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -512,9 +512,11 @@ struct hci_conn_params {
 		HCI_AUTO_CONN_DIRECT,
 		HCI_AUTO_CONN_ALWAYS,
 		HCI_AUTO_CONN_LINK_LOSS,
+		HCI_AUTO_CONN_EXPLICIT,
 	} auto_connect;
 
 	struct hci_conn *conn;
+	bool explicit_connect;
 };
 
 extern struct list_head hci_dev_list;
@@ -639,6 +641,7 @@ enum {
 	HCI_CONN_DROP,
 	HCI_CONN_PARAM_REMOVAL_PEND,
 	HCI_CONN_NEW_LINK_KEY,
+	HCI_CONN_SCANNING,
 };
 
 static inline bool hci_conn_ssp_enabled(struct hci_conn *conn)
-- 
cgit v1.2.3


From e7d9ab731ac7babaf2e1b7b5e2280f5f555d263f Mon Sep 17 00:00:00 2001
From: Jakub Pawlowski <jpawlowski@google.com>
Date: Fri, 7 Aug 2015 20:22:52 +0200
Subject: Bluetooth: add hci_lookup_le_connect

This patch adds hci_lookup_le_connect method, that will be used to check
wether outgoing le connection attempt is in progress.

Signed-off-by: Jakub Pawlowski <jpawlowski@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index c8d2b5a89d08..f0a9fc1d06e0 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -811,6 +811,26 @@ static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev,
 	return NULL;
 }
 
+static inline struct hci_conn *hci_lookup_le_connect(struct hci_dev *hdev)
+{
+	struct hci_conn_hash *h = &hdev->conn_hash;
+	struct hci_conn  *c;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(c, &h->list, list) {
+		if (c->type == LE_LINK && c->state == BT_CONNECT &&
+		    !test_bit(HCI_CONN_SCANNING, &c->flags)) {
+			rcu_read_unlock();
+			return c;
+		}
+	}
+
+	rcu_read_unlock();
+
+	return NULL;
+}
+
 int hci_disconnect(struct hci_conn *conn, __u8 reason);
 bool hci_setup_sync(struct hci_conn *conn, __u16 handle);
 void hci_sco_setup(struct hci_conn *conn, __u8 status);
-- 
cgit v1.2.3


From f75113a26008980ca13834fb6573145523596776 Mon Sep 17 00:00:00 2001
From: Jakub Pawlowski <jpawlowski@google.com>
Date: Fri, 7 Aug 2015 20:22:53 +0200
Subject: Bluetooth: add hci_connect_le_scan

Currently, when trying to connect to already paired device that just
rotated its RPA MAC address, old address would be used and connection
would fail. In order to fix that, kernel must scan and receive
advertisement with fresh RPA before connecting.

This patch adds hci_connect_le_scan with dependencies, new method that
will be used to connect to remote LE devices. Instead of just sending
connect request, it adds a device to whitelist. Later patches will make
use of this whitelist to send conenct request when advertisement is
received, and properly handle timeouts.

Signed-off-by: Jakub Pawlowski <jpawlowski@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index f0a9fc1d06e0..9e1a59e01fa2 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -846,6 +846,9 @@ void hci_chan_del(struct hci_chan *chan);
 void hci_chan_list_flush(struct hci_conn *conn);
 struct hci_chan *hci_chan_lookup_handle(struct hci_dev *hdev, __u16 handle);
 
+struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
+				     u8 dst_type, u8 sec_level,
+				     u16 conn_timeout, u8 role);
 struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 				u8 dst_type, u8 sec_level, u16 conn_timeout,
 				u8 role);
@@ -1011,6 +1014,9 @@ void hci_conn_params_clear_disabled(struct hci_dev *hdev);
 struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
 						  bdaddr_t *addr,
 						  u8 addr_type);
+struct hci_conn_params *hci_explicit_connect_lookup(struct hci_dev *hdev,
+						    bdaddr_t *addr,
+						    u8 addr_type);
 
 void hci_uuids_clear(struct hci_dev *hdev);
 
-- 
cgit v1.2.3


From fb811395cd5a71b9e94a068f524a6f4a21b67bdb Mon Sep 17 00:00:00 2001
From: Rick Jones <rick.jones2@hp.com>
Date: Fri, 7 Aug 2015 11:10:37 -0700
Subject: net: add explicit logging and stat for neighbour table overflow

Add an explicit neighbour table overflow message (ratelimited) and
statistic to make diagnosing neighbour table overflows tractable in
the wild.

Diagnosing a neighbour table overflow can be quite difficult in the wild
because there is no explicit dmesg logged.  Callers to neighbour code
seem to use net_dbg_ratelimit when the neighbour call fails which means
the "base message" is not emitted and the callback suppressed messages
from the ratelimiting can end-up juxtaposed with unrelated messages.
Further, a forced garbage collection will increment a stat on each call
whether it was successful in freeing-up a table entry or not, so that
statistic is only a hint.  So, add a net_info_ratelimited message and
explicit statistic to the neighbour code.

Signed-off-by: Rick Jones <rick.jones2@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index bd33e66f49aa..8b683841e574 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -125,6 +125,7 @@ struct neigh_statistics {
 	unsigned long forced_gc_runs;	/* number of forced GC runs */
 
 	unsigned long unres_discards;	/* number of unresolved drops */
+	unsigned long table_fulls;      /* times even gc couldn't help */
 };
 
 #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field)
-- 
cgit v1.2.3


From 2e15ea390e6f4466655066d97e22ec66870a042c Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Fri, 7 Aug 2015 23:51:42 -0700
Subject: ip_gre: Add support to collect tunnel metadata.

Following patch create new tunnel flag which enable
tunnel metadata collection on given device.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 47984415f5d1..984dbfa15e13 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -82,6 +82,8 @@ struct ip_tunnel_dst {
 	__be32				 saddr;
 };
 
+struct metadata_dst;
+
 struct ip_tunnel {
 	struct ip_tunnel __rcu	*next;
 	struct hlist_node hash_node;
@@ -115,6 +117,7 @@ struct ip_tunnel {
 	unsigned int		prl_count;	/* # of entries in PRL */
 	int			ip_tnl_net_id;
 	struct gro_cells	gro_cells;
+	bool			collect_md;
 };
 
 #define TUNNEL_CSUM		__cpu_to_be16(0x01)
@@ -149,6 +152,7 @@ struct tnl_ptk_info {
 struct ip_tunnel_net {
 	struct net_device *fb_tunnel_dev;
 	struct hlist_head tunnels[IP_TNL_HASH_SIZE];
+	struct ip_tunnel __rcu *collect_md_tun;
 };
 
 struct ip_tunnel_encap_ops {
@@ -235,7 +239,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
 				   __be32 key);
 
 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
-		  const struct tnl_ptk_info *tpi, bool log_ecn_error);
+		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
+		  bool log_ecn_error);
 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
 			 struct ip_tunnel_parm *p);
 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
-- 
cgit v1.2.3


From b2acd1dc3949cd60c571844d495594f05f0351f4 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Fri, 7 Aug 2015 23:51:47 -0700
Subject: openvswitch: Use regular GRE net_device instead of vport

Using GRE tunnel meta data collection feature, we can implement
OVS GRE vport. This patch removes all of the OVS
specific GRE code and make OVS use a ip_gre net_device.
Minimal GRE vport is kept to handle compatibility with
current userspace application.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gre.h | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'include/net')

diff --git a/include/net/gre.h b/include/net/gre.h
index b53182018743..e3e08459bf67 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -33,16 +33,8 @@ struct gre_cisco_protocol {
 int gre_cisco_register(struct gre_cisco_protocol *proto);
 int gre_cisco_unregister(struct gre_cisco_protocol *proto);
 
-void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
-		      int hdr_len);
-
-static inline struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
-						  bool csum)
-{
-	return iptunnel_handle_offloads(skb, csum,
-					csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
-}
-
+struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
+				       u8 name_assign_type);
 
 static inline int ip_gre_calc_hlen(__be16 o_flags)
 {
-- 
cgit v1.2.3


From 9f57c67c379d88a10e8ad676426fee5ae7341b14 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Fri, 7 Aug 2015 23:51:52 -0700
Subject: gre: Remove support for sharing GRE protocol hook.

Support for sharing GREPROTO_CISCO port was added so that
OVS gre port and kernel GRE devices can co-exist. After
flow-based tunneling patches OVS GRE protocol processing
is completely moved to ip_gre module. so there is no need
for GRE protocol hook. Following patch consolidates
GRE protocol related functions into ip_gre module.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gre.h | 80 +++++--------------------------------------------------
 1 file changed, 6 insertions(+), 74 deletions(-)

(limited to 'include/net')

diff --git a/include/net/gre.h b/include/net/gre.h
index e3e08459bf67..97eafdc47eea 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -4,6 +4,12 @@
 #include <linux/skbuff.h>
 #include <net/ip_tunnels.h>
 
+struct gre_base_hdr {
+	__be16 flags;
+	__be16 protocol;
+};
+#define GRE_HEADER_SECTION 4
+
 #define GREPROTO_CISCO		0
 #define GREPROTO_PPTP		1
 #define GREPROTO_MAX		2
@@ -14,83 +20,9 @@ struct gre_protocol {
 	void (*err_handler)(struct sk_buff *skb, u32 info);
 };
 
-struct gre_base_hdr {
-	__be16 flags;
-	__be16 protocol;
-};
-#define GRE_HEADER_SECTION 4
-
 int gre_add_protocol(const struct gre_protocol *proto, u8 version);
 int gre_del_protocol(const struct gre_protocol *proto, u8 version);
 
-struct gre_cisco_protocol {
-	int (*handler)(struct sk_buff *skb, const struct tnl_ptk_info *tpi);
-	int (*err_handler)(struct sk_buff *skb, u32 info,
-			   const struct tnl_ptk_info *tpi);
-	u8 priority;
-};
-
-int gre_cisco_register(struct gre_cisco_protocol *proto);
-int gre_cisco_unregister(struct gre_cisco_protocol *proto);
-
 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 				       u8 name_assign_type);
-
-static inline int ip_gre_calc_hlen(__be16 o_flags)
-{
-	int addend = 4;
-
-	if (o_flags&TUNNEL_CSUM)
-		addend += 4;
-	if (o_flags&TUNNEL_KEY)
-		addend += 4;
-	if (o_flags&TUNNEL_SEQ)
-		addend += 4;
-	return addend;
-}
-
-static inline __be16 gre_flags_to_tnl_flags(__be16 flags)
-{
-	__be16 tflags = 0;
-
-	if (flags & GRE_CSUM)
-		tflags |= TUNNEL_CSUM;
-	if (flags & GRE_ROUTING)
-		tflags |= TUNNEL_ROUTING;
-	if (flags & GRE_KEY)
-		tflags |= TUNNEL_KEY;
-	if (flags & GRE_SEQ)
-		tflags |= TUNNEL_SEQ;
-	if (flags & GRE_STRICT)
-		tflags |= TUNNEL_STRICT;
-	if (flags & GRE_REC)
-		tflags |= TUNNEL_REC;
-	if (flags & GRE_VERSION)
-		tflags |= TUNNEL_VERSION;
-
-	return tflags;
-}
-
-static inline __be16 tnl_flags_to_gre_flags(__be16 tflags)
-{
-	__be16 flags = 0;
-
-	if (tflags & TUNNEL_CSUM)
-		flags |= GRE_CSUM;
-	if (tflags & TUNNEL_ROUTING)
-		flags |= GRE_ROUTING;
-	if (tflags & TUNNEL_KEY)
-		flags |= GRE_KEY;
-	if (tflags & TUNNEL_SEQ)
-		flags |= GRE_SEQ;
-	if (tflags & TUNNEL_STRICT)
-		flags |= GRE_STRICT;
-	if (tflags & TUNNEL_REC)
-		flags |= GRE_REC;
-	if (tflags & TUNNEL_VERSION)
-		flags |= GRE_VERSION;
-
-	return flags;
-}
-
 #endif
-- 
cgit v1.2.3


From 308ac9143ee2208f54d061eca54a89da509b5d92 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 8 Aug 2015 21:40:01 +0200
Subject: netfilter: nf_conntrack: push zone object into functions

This patch replaces the zone id which is pushed down into functions
with the actual zone object. It's a bigger one-time change, but
needed for later on extending zones with a direction parameter, and
thus decoupling this additional information from all call-sites.

No functional changes in this patch.

The default zone becomes a global const object, namely nf_ct_zone_dflt
and will be returned directly in various cases, one being, when there's
f.e. no zoning support.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h        | 10 +++++++--
 include/net/netfilter/nf_conntrack_core.h   |  3 ++-
 include/net/netfilter/nf_conntrack_expect.h | 11 +++++++---
 include/net/netfilter/nf_conntrack_zones.h  | 33 ++++++++++++++++++++---------
 4 files changed, 41 insertions(+), 16 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 37cd3911d5c5..f5e23c6dee8b 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -250,8 +250,12 @@ void nf_ct_untracked_status_or(unsigned long bits);
 void nf_ct_iterate_cleanup(struct net *net,
 			   int (*iter)(struct nf_conn *i, void *data),
 			   void *data, u32 portid, int report);
+
+struct nf_conntrack_zone;
+
 void nf_conntrack_free(struct nf_conn *ct);
-struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
+struct nf_conn *nf_conntrack_alloc(struct net *net,
+				   const struct nf_conntrack_zone *zone,
 				   const struct nf_conntrack_tuple *orig,
 				   const struct nf_conntrack_tuple *repl,
 				   gfp_t gfp);
@@ -291,7 +295,9 @@ extern unsigned int nf_conntrack_max;
 extern unsigned int nf_conntrack_hash_rnd;
 void init_nf_conntrack_hash_rnd(void);
 
-struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags);
+struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
+				 const struct nf_conntrack_zone *zone,
+				 gfp_t flags);
 
 #define NF_CT_STAT_INC(net, count)	  __this_cpu_inc((net)->ct.stat->count)
 #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index f2f0fa3bb150..c03f9c42b3cd 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -52,7 +52,8 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 
 /* Find a connection corresponding to a tuple. */
 struct nf_conntrack_tuple_hash *
-nf_conntrack_find_get(struct net *net, u16 zone,
+nf_conntrack_find_get(struct net *net,
+		      const struct nf_conntrack_zone *zone,
 		      const struct nf_conntrack_tuple *tuple);
 
 int __nf_conntrack_confirm(struct sk_buff *skb);
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 3f3aecbc8632..dce56f09ac9a 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -4,7 +4,9 @@
 
 #ifndef _NF_CONNTRACK_EXPECT_H
 #define _NF_CONNTRACK_EXPECT_H
+
 #include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 
 extern unsigned int nf_ct_expect_hsize;
 extern unsigned int nf_ct_expect_max;
@@ -76,15 +78,18 @@ int nf_conntrack_expect_init(void);
 void nf_conntrack_expect_fini(void);
 
 struct nf_conntrack_expect *
-__nf_ct_expect_find(struct net *net, u16 zone,
+__nf_ct_expect_find(struct net *net,
+		    const struct nf_conntrack_zone *zone,
 		    const struct nf_conntrack_tuple *tuple);
 
 struct nf_conntrack_expect *
-nf_ct_expect_find_get(struct net *net, u16 zone,
+nf_ct_expect_find_get(struct net *net,
+		      const struct nf_conntrack_zone *zone,
 		      const struct nf_conntrack_tuple *tuple);
 
 struct nf_conntrack_expect *
-nf_ct_find_expectation(struct net *net, u16 zone,
+nf_ct_find_expectation(struct net *net,
+		       const struct nf_conntrack_zone *zone,
 		       const struct nf_conntrack_tuple *tuple);
 
 void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h
index 034efe8d45a5..0788bb0f267d 100644
--- a/include/net/netfilter/nf_conntrack_zones.h
+++ b/include/net/netfilter/nf_conntrack_zones.h
@@ -1,25 +1,38 @@
 #ifndef _NF_CONNTRACK_ZONES_H
 #define _NF_CONNTRACK_ZONES_H
 
-#define NF_CT_DEFAULT_ZONE	0
-
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-#include <net/netfilter/nf_conntrack_extend.h>
+#define NF_CT_DEFAULT_ZONE_ID	0
 
 struct nf_conntrack_zone {
 	u16	id;
 };
 
-static inline u16 nf_ct_zone(const struct nf_conn *ct)
+extern const struct nf_conntrack_zone nf_ct_zone_dflt;
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack_extend.h>
+
+static inline const struct nf_conntrack_zone *
+nf_ct_zone(const struct nf_conn *ct)
 {
+	const struct nf_conntrack_zone *nf_ct_zone = NULL;
+
 #ifdef CONFIG_NF_CONNTRACK_ZONES
-	struct nf_conntrack_zone *nf_ct_zone;
 	nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE);
-	if (nf_ct_zone)
-		return nf_ct_zone->id;
 #endif
-	return NF_CT_DEFAULT_ZONE;
+	return nf_ct_zone ? nf_ct_zone : &nf_ct_zone_dflt;
 }
 
-#endif /* CONFIG_NF_CONNTRACK || CONFIG_NF_CONNTRACK_MODULE */
+static inline const struct nf_conntrack_zone *
+nf_ct_zone_tmpl(const struct nf_conn *tmpl)
+{
+	return tmpl ? nf_ct_zone(tmpl) : &nf_ct_zone_dflt;
+}
+
+static inline bool nf_ct_zone_equal(const struct nf_conn *a,
+				    const struct nf_conntrack_zone *b)
+{
+	return nf_ct_zone(a)->id == b->id;
+}
+#endif /* IS_ENABLED(CONFIG_NF_CONNTRACK) */
 #endif /* _NF_CONNTRACK_ZONES_H */
-- 
cgit v1.2.3


From 42a7b32b73d6bf22e4bdd7bf68746e2d71f4cd8d Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Mon, 10 Aug 2015 16:58:11 -0600
Subject: xfrm: Add oif to dst lookups

Rules can be installed that direct route lookups to specific tables based
on oif. Plumb the oif through the xfrm lookups so it gets set in the flow
struct and passed to the resolver routines.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index f0ee97eec24d..312e3fee9ccf 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -285,10 +285,13 @@ struct xfrm_policy_afinfo {
 	unsigned short		family;
 	struct dst_ops		*dst_ops;
 	void			(*garbage_collect)(struct net *net);
-	struct dst_entry	*(*dst_lookup)(struct net *net, int tos,
+	struct dst_entry	*(*dst_lookup)(struct net *net,
+					       int tos, int oif,
 					       const xfrm_address_t *saddr,
 					       const xfrm_address_t *daddr);
-	int			(*get_saddr)(struct net *net, xfrm_address_t *saddr, xfrm_address_t *daddr);
+	int			(*get_saddr)(struct net *net, int oif,
+					     xfrm_address_t *saddr,
+					     xfrm_address_t *daddr);
 	void			(*decode_session)(struct sk_buff *skb,
 						  struct flowi *fl,
 						  int reverse);
-- 
cgit v1.2.3


From cdf0969763e020923abe28fddc605add572febc2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 11 Aug 2015 12:00:37 -0700
Subject: Revert "Merge branch 'mv88e6xxx-switchdev-fdb'"

This reverts commit f1d5ca434413b20cd3f8c18ff2b634b7782149a5, reversing
changes made to 4933d85c5173832ebd261756522095837583c458.

I applied v2 instead of v3.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h       | 16 ++++++----------
 include/net/switchdev.h |  3 +--
 2 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'include/net')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 091d35f77180..fbca63ba8f73 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -296,16 +296,12 @@ struct dsa_switch_driver {
 				     u32 br_port_mask);
 	int	(*port_stp_update)(struct dsa_switch *ds, int port,
 				   u8 state);
-
-	/*
-	 * Forwarding database
-	 */
-	int	(*port_fdb_add)(struct dsa_switch *ds, int port, u16 vid,
-				const u8 addr[ETH_ALEN]);
-	int	(*port_fdb_del)(struct dsa_switch *ds, int port, u16 vid,
-				const u8 addr[ETH_ALEN]);
-	int	(*port_fdb_getnext)(struct dsa_switch *ds, int port, u16 *vid,
-				    u8 addr[ETH_ALEN], bool *is_static);
+	int	(*fdb_add)(struct dsa_switch *ds, int port,
+			   const unsigned char *addr, u16 vid);
+	int	(*fdb_del)(struct dsa_switch *ds, int port,
+			   const unsigned char *addr, u16 vid);
+	int	(*fdb_getnext)(struct dsa_switch *ds, int port,
+			       unsigned char *addr, bool *is_static);
 };
 
 void register_switch_driver(struct dsa_switch_driver *type);
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 0e296b82aef3..89da8934519b 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -70,9 +70,8 @@ struct switchdev_obj {
 			u32 tb_id;
 		} ipv4_fib;
 		struct switchdev_obj_fdb {		/* PORT_FDB */
-			u8 addr[ETH_ALEN];
+			const unsigned char *addr;
 			u16 vid;
-			bool is_static;
 		} fdb;
 	} u;
 };
-- 
cgit v1.2.3


From 2a778e1b58990e15de5cba4badec1fa7ecb87e80 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Mon, 10 Aug 2015 09:09:49 -0400
Subject: net: dsa: change FDB routines prototypes

Change the prototype of port_getnext to include a vid parameter.

This is necessary to introduce the support for VLAN.

Also rename the fdb_{add,del,getnext} function pointers to
port_fdb_{add,del,getnext} since they are specific to a given port.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/net')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index fbca63ba8f73..6356f437e911 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -296,12 +296,17 @@ struct dsa_switch_driver {
 				     u32 br_port_mask);
 	int	(*port_stp_update)(struct dsa_switch *ds, int port,
 				   u8 state);
-	int	(*fdb_add)(struct dsa_switch *ds, int port,
-			   const unsigned char *addr, u16 vid);
-	int	(*fdb_del)(struct dsa_switch *ds, int port,
-			   const unsigned char *addr, u16 vid);
-	int	(*fdb_getnext)(struct dsa_switch *ds, int port,
-			       unsigned char *addr, bool *is_static);
+
+	/*
+	 * Forwarding database
+	 */
+	int	(*port_fdb_add)(struct dsa_switch *ds, int port,
+				const unsigned char *addr, u16 vid);
+	int	(*port_fdb_del)(struct dsa_switch *ds, int port,
+				const unsigned char *addr, u16 vid);
+	int	(*port_fdb_getnext)(struct dsa_switch *ds, int port,
+				    unsigned char *addr, u16 *vid,
+				    bool *is_static);
 };
 
 void register_switch_driver(struct dsa_switch_driver *type);
-- 
cgit v1.2.3


From ce80e7bc57e25062c361de8fb6444129a63bac6d Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Mon, 10 Aug 2015 09:09:52 -0400
Subject: net: switchdev: support static FDB addresses

This patch adds an ndm_state member to the switchdev_obj_fdb structure,
in order to support static FDB addresses.

Set Rocker ndm_state to NUD_REACHABLE.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Acked-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 89da8934519b..319baab3b48e 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -72,6 +72,7 @@ struct switchdev_obj {
 		struct switchdev_obj_fdb {		/* PORT_FDB */
 			const unsigned char *addr;
 			u16 vid;
+			u16 ndm_state;
 		} fdb;
 	} u;
 };
-- 
cgit v1.2.3


From b72f6f51dc5abce94c1b5ee0186e9407ea0f919f Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Tue, 11 Aug 2015 21:44:08 +0200
Subject: 6lowpan: add generic 6lowpan netdev private data

This patch introduced the 6lowpan netdev private data struct. We name it
lowpan_priv and it's placed at the beginning of netdev private data. All
lowpan interfaces should allocate this room at first of netdev private
data. 6LoWPAN LL private data can be allocate by additional netdev private
data, e.g. dev->priv_size should be "sizeof(struct lowpan_priv) +
sizeof(LL_LOWPAN_PRIVATE_DATA)".

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/6lowpan.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include/net')

diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h
index dc03d77ad23b..a2f59ec98d24 100644
--- a/include/net/6lowpan.h
+++ b/include/net/6lowpan.h
@@ -197,6 +197,27 @@
 #define LOWPAN_NHC_UDP_CS_P_11	0xF3 /* source & dest = 0xF0B + 4bit inline */
 #define LOWPAN_NHC_UDP_CS_C	0x04 /* checksum elided */
 
+#define LOWPAN_PRIV_SIZE(llpriv_size)	\
+	(sizeof(struct lowpan_priv) + llpriv_size)
+
+enum lowpan_lltypes {
+	LOWPAN_LLTYPE_BTLE,
+	LOWPAN_LLTYPE_IEEE802154,
+};
+
+struct lowpan_priv {
+	enum lowpan_lltypes lltype;
+
+	/* must be last */
+	u8 priv[0] __aligned(sizeof(void *));
+};
+
+static inline
+struct lowpan_priv *lowpan_priv(const struct net_device *dev)
+{
+	return netdev_priv(dev);
+}
+
 #ifdef DEBUG
 /* print data in line */
 static inline void raw_dump_inline(const char *caller, char *msg,
@@ -372,6 +393,8 @@ lowpan_uncompress_size(const struct sk_buff *skb, u16 *dgram_offset)
 	return skb->len + uncomp_header - ret;
 }
 
+void lowpan_netdev_setup(struct net_device *dev, enum lowpan_lltypes lltype);
+
 int
 lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev,
 			 const u8 *saddr, const u8 saddr_type,
-- 
cgit v1.2.3


From 4b58c37bb9d4282446f7a0194dbc44325787ac8c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 8 Jul 2015 15:41:48 +0300
Subject: mac80211: remove ieee80211_aes_cmac_calculate_k1_k2()

The iwlwifi driver was the only driver that used this, but as
it turns out it never needed it, so we can remove it.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 484cc14fb947..e3314e516681 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -4330,19 +4330,6 @@ void ieee80211_get_tkip_rx_p1k(struct ieee80211_key_conf *keyconf,
 void ieee80211_get_tkip_p2k(struct ieee80211_key_conf *keyconf,
 			    struct sk_buff *skb, u8 *p2k);
 
-/**
- * ieee80211_aes_cmac_calculate_k1_k2 - calculate the AES-CMAC sub keys
- *
- * This function computes the two AES-CMAC sub-keys, based on the
- * previously installed master key.
- *
- * @keyconf: the parameter passed with the set key
- * @k1: a buffer to be filled with the 1st sub-key
- * @k2: a buffer to be filled with the 2nd sub-key
- */
-void ieee80211_aes_cmac_calculate_k1_k2(struct ieee80211_key_conf *keyconf,
-					u8 *k1, u8 *k2);
-
 /**
  * ieee80211_get_key_tx_seq - get key TX sequence counter
  *
-- 
cgit v1.2.3


From 111495361598205967f1be4e07d4726b0f762d60 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Thu, 13 Aug 2015 12:52:17 -0400
Subject: net: dsa: add support for switchdev VLAN objects

Add new functions in DSA drivers to access hardware VLAN entries through
SWITCHDEV_OBJ_PORT_VLAN objects:

 - port_pvid_get() and vlan_getnext() to dump a VLAN
 - port_vlan_del() to exclude a port from a VLAN
 - port_pvid_set() and port_vlan_add() to join a port to a VLAN

The DSA infrastructure will ensure that each VLAN of the given range
does not already belong to another bridge. If it does, it will fallback
to software VLAN and won't program the hardware.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/net')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 6356f437e911..bd9b76502458 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -297,6 +297,17 @@ struct dsa_switch_driver {
 	int	(*port_stp_update)(struct dsa_switch *ds, int port,
 				   u8 state);
 
+	/*
+	 * VLAN support
+	 */
+	int	(*port_pvid_get)(struct dsa_switch *ds, int port, u16 *pvid);
+	int	(*port_pvid_set)(struct dsa_switch *ds, int port, u16 pvid);
+	int	(*port_vlan_add)(struct dsa_switch *ds, int port, u16 vid,
+				 bool untagged);
+	int	(*port_vlan_del)(struct dsa_switch *ds, int port, u16 vid);
+	int	(*vlan_getnext)(struct dsa_switch *ds, u16 *vid,
+				unsigned long *ports, unsigned long *untagged);
+
 	/*
 	 * Forwarding database
 	 */
-- 
cgit v1.2.3


From 4e3c89920cd3a6cfce22c6f537690747c26128dd Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Thu, 13 Aug 2015 14:59:00 -0600
Subject: net: Introduce VRF related flags and helpers

Add a VRF_MASTER flag for interfaces and helper functions for determining
if a device is a VRF_MASTER.

Add link attribute for passing VRF_TABLE id.

Add vrf_ptr to netdevice.

Add various macros for determining if a device is a VRF device, the index
of the master VRF device and table associated with VRF device.

Signed-off-by: Shrijeet Mukherjee <shm@cumulusnetworks.com>
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/vrf.h | 139 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 139 insertions(+)
 create mode 100644 include/net/vrf.h

(limited to 'include/net')

diff --git a/include/net/vrf.h b/include/net/vrf.h
new file mode 100644
index 000000000000..0484d29d4589
--- /dev/null
+++ b/include/net/vrf.h
@@ -0,0 +1,139 @@
+/*
+ * include/net/net_vrf.h - adds vrf dev structure definitions
+ * Copyright (c) 2015 Cumulus Networks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __LINUX_NET_VRF_H
+#define __LINUX_NET_VRF_H
+
+struct net_vrf_dev {
+	struct rcu_head		rcu;
+	int                     ifindex; /* ifindex of master dev */
+	u32                     tb_id;   /* table id for VRF */
+};
+
+struct slave {
+	struct list_head	list;
+	struct net_device	*dev;
+};
+
+struct slave_queue {
+	struct list_head	all_slaves;
+	int			num_slaves;
+};
+
+struct net_vrf {
+	struct slave_queue	queue;
+	struct rtable           *rth;
+	u32			tb_id;
+};
+
+
+#if IS_ENABLED(CONFIG_NET_VRF)
+/* called with rcu_read_lock() */
+static inline int vrf_master_ifindex_rcu(const struct net_device *dev)
+{
+	struct net_vrf_dev *vrf_ptr;
+	int ifindex = 0;
+
+	if (!dev)
+		return 0;
+
+	if (netif_is_vrf(dev))
+		ifindex = dev->ifindex;
+	else {
+		vrf_ptr = rcu_dereference(dev->vrf_ptr);
+		if (vrf_ptr)
+			ifindex = vrf_ptr->ifindex;
+	}
+
+	return ifindex;
+}
+
+/* called with rcu_read_lock */
+static inline int vrf_dev_table_rcu(const struct net_device *dev)
+{
+	int tb_id = 0;
+
+	if (dev) {
+		struct net_vrf_dev *vrf_ptr;
+
+		vrf_ptr = rcu_dereference(dev->vrf_ptr);
+		if (vrf_ptr)
+			tb_id = vrf_ptr->tb_id;
+	}
+	return tb_id;
+}
+
+static inline int vrf_dev_table(const struct net_device *dev)
+{
+	int tb_id;
+
+	rcu_read_lock();
+	tb_id = vrf_dev_table_rcu(dev);
+	rcu_read_unlock();
+
+	return tb_id;
+}
+
+/* called with rtnl */
+static inline int vrf_dev_table_rtnl(const struct net_device *dev)
+{
+	int tb_id = 0;
+
+	if (dev) {
+		struct net_vrf_dev *vrf_ptr;
+
+		vrf_ptr = rtnl_dereference(dev->vrf_ptr);
+		if (vrf_ptr)
+			tb_id = vrf_ptr->tb_id;
+	}
+	return tb_id;
+}
+
+/* caller has already checked netif_is_vrf(dev) */
+static inline struct rtable *vrf_dev_get_rth(const struct net_device *dev)
+{
+	struct rtable *rth = ERR_PTR(-ENETUNREACH);
+	struct net_vrf *vrf = netdev_priv(dev);
+
+	if (vrf) {
+		rth = vrf->rth;
+		atomic_inc(&rth->dst.__refcnt);
+	}
+	return rth;
+}
+
+#else
+static inline int vrf_master_ifindex_rcu(const struct net_device *dev)
+{
+	return 0;
+}
+
+static inline int vrf_dev_table_rcu(const struct net_device *dev)
+{
+	return 0;
+}
+
+static inline int vrf_dev_table(const struct net_device *dev)
+{
+	return 0;
+}
+
+static inline int vrf_dev_table_rtnl(const struct net_device *dev)
+{
+	return 0;
+}
+
+static inline struct rtable *vrf_dev_get_rth(const struct net_device *dev)
+{
+	return ERR_PTR(-ENETUNREACH);
+}
+#endif
+
+#endif /* __LINUX_NET_VRF_H */
-- 
cgit v1.2.3


From 613d09b30f8b589d5a9b49775054c8865db95d1c Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Thu, 13 Aug 2015 14:59:02 -0600
Subject: net: Use VRF device index for lookups on TX

As with ingress use the index of VRF master device for route lookups on
egress. However, the oif should only be used to direct the lookups to a
specific table. Routes in the table are not based on the VRF device but
rather interfaces that are part of the VRF so do not consider the oif for
lookups within the table. The FLOWI_FLAG_VRFSRC is used to control this
latter part.

Signed-off-by: Shrijeet Mukherjee <shm@cumulusnetworks.com>
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow.h  | 1 +
 include/net/route.h | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/flow.h b/include/net/flow.h
index 3098ae33a178..f305588fc162 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -33,6 +33,7 @@ struct flowi_common {
 	__u8	flowic_flags;
 #define FLOWI_FLAG_ANYSRC		0x01
 #define FLOWI_FLAG_KNOWN_NH		0x02
+#define FLOWI_FLAG_VRFSRC		0x04
 	__u32	flowic_secid;
 	struct flowi_tunnel flowic_tun_key;
 };
diff --git a/include/net/route.h b/include/net/route.h
index 2d45f419477f..94189d4bd899 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -251,6 +251,9 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32
 	if (inet_sk(sk)->transparent)
 		flow_flags |= FLOWI_FLAG_ANYSRC;
 
+	if (netif_index_is_vrf(sock_net(sk), oif))
+		flow_flags |= FLOWI_FLAG_VRFSRC;
+
 	flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,
 			   protocol, flow_flags, dst, src, dport, sport);
 }
-- 
cgit v1.2.3


From 15be405eb2ea943ac5fa2aab7d0ba282e9ef1301 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Thu, 13 Aug 2015 14:59:04 -0600
Subject: net: Add inet_addr lookup by table

Currently inet_addr_type and inet_dev_addr_type expect local addresses
to be in the local table. With the VRF device local routes for devices
associated with a VRF will be in the table associated with the VRF.
Provide an alternate inet_addr lookup to use a specific table rather
than defaulting to the local table.

Signed-off-by: Shrijeet Mukherjee <shm@cumulusnetworks.com>
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/route.h b/include/net/route.h
index 94189d4bd899..6ba681f0b98d 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -189,6 +189,7 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk);
 void ip_rt_send_redirect(struct sk_buff *skb);
 
 unsigned int inet_addr_type(struct net *net, __be32 addr);
+unsigned int inet_addr_type_table(struct net *net, __be32 addr, int tb_id);
 unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
 				__be32 addr);
 void ip_rt_multicast_event(struct in_device *);
-- 
cgit v1.2.3


From 30bbaa19500559d7625c65632195413f639b3b97 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Thu, 13 Aug 2015 14:59:05 -0600
Subject: net: Fix up inet_addr_type checks

Currently inet_addr_type and inet_dev_addr_type expect local addresses
to be in the local table. With the VRF device local routes for devices
associated with a VRF will be in the table associated with the VRF.
Provide an alternate inet_addr lookup to use a specific table rather
than defaulting to the local table.

inet_addr_type_dev_table keeps the same semantics as inet_addr_type but
if the passed in device is enslaved to a VRF then the table for that VRF
is used for the lookup.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/route.h b/include/net/route.h
index 6ba681f0b98d..6dda2c1bf8c6 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -192,6 +192,9 @@ unsigned int inet_addr_type(struct net *net, __be32 addr);
 unsigned int inet_addr_type_table(struct net *net, __be32 addr, int tb_id);
 unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
 				__be32 addr);
+unsigned int inet_addr_type_dev_table(struct net *net,
+				      const struct net_device *dev,
+				      __be32 addr);
 void ip_rt_multicast_event(struct in_device *);
 int ip_rt_ioctl(struct net *, unsigned int cmd, void __user *arg);
 void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt);
-- 
cgit v1.2.3


From dc028da54ed353edd44dca88b7eb19fd5126c354 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Sun, 16 Aug 2015 17:13:27 -0600
Subject: inet: Move VRF table lookup to inlined function

Table lookup compiles out when VRF is not enabled.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/vrf.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/net')

diff --git a/include/net/vrf.h b/include/net/vrf.h
index 0484d29d4589..40e3793c7a05 100644
--- a/include/net/vrf.h
+++ b/include/net/vrf.h
@@ -81,6 +81,25 @@ static inline int vrf_dev_table(const struct net_device *dev)
 	return tb_id;
 }
 
+static inline int vrf_dev_table_ifindex(struct net *net, int ifindex)
+{
+	struct net_device *dev;
+	int tb_id = 0;
+
+	if (!ifindex)
+		return 0;
+
+	rcu_read_lock();
+
+	dev = dev_get_by_index_rcu(net, ifindex);
+	if (dev)
+		tb_id = vrf_dev_table_rcu(dev);
+
+	rcu_read_unlock();
+
+	return tb_id;
+}
+
 /* called with rtnl */
 static inline int vrf_dev_table_rtnl(const struct net_device *dev)
 {
@@ -125,6 +144,11 @@ static inline int vrf_dev_table(const struct net_device *dev)
 	return 0;
 }
 
+static inline int vrf_dev_table_ifindex(struct net *net, int ifindex)
+{
+	return 0;
+}
+
 static inline int vrf_dev_table_rtnl(const struct net_device *dev)
 {
 	return 0;
-- 
cgit v1.2.3


From deedb59039f111c41aa5a54ee384c8e7c08bc78a Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 14 Aug 2015 16:03:39 +0200
Subject: netfilter: nf_conntrack: add direction support for zones

This work adds a direction parameter to netfilter zones, so identity
separation can be performed only in original/reply or both directions
(default). This basically opens up the possibility of doing NAT with
conflicting IP address/port tuples from multiple, isolated tenants
on a host (e.g. from a netns) without requiring each tenant to NAT
twice resp. to use its own dedicated IP address to SNAT to, meaning
overlapping tuples can be made unique with the zone identifier in
original direction, where the NAT engine will then allocate a unique
tuple in the commonly shared default zone for the reply direction.
In some restricted, local DNAT cases, also port redirection could be
used for making the reply traffic unique w/o requiring SNAT.

The consensus we've reached and discussed at NFWS and since the initial
implementation [1] was to directly integrate the direction meta data
into the existing zones infrastructure, as opposed to the ct->mark
approach we proposed initially.

As we pass the nf_conntrack_zone object directly around, we don't have
to touch all call-sites, but only those, that contain equality checks
of zones. Thus, based on the current direction (original or reply),
we either return the actual id, or the default NF_CT_DEFAULT_ZONE_ID.
CT expectations are direction-agnostic entities when expectations are
being compared among themselves, so we can only use the identifier
in this case.

Note that zone identifiers can not be included into the hash mix
anymore as they don't contain a "stable" value that would be equal
for both directions at all times, f.e. if only zone->id would
unconditionally be xor'ed into the table slot hash, then replies won't
find the corresponding conntracking entry anymore.

If no particular direction is specified when configuring zones, the
behaviour is exactly as we expect currently (both directions).

Support has been added for the CT netlink interface as well as the
x_tables raw CT target, which both already offer existing interfaces
to user space for the configuration of zones.

Below a minimal, simplified collision example (script in [2]) with
netperf sessions:

  +--- tenant-1 ---+   mark := 1
  |    netperf     |--+
  +----------------+  |                CT zone := mark [ORIGINAL]
   [ip,sport] := X   +--------------+  +--- gateway ---+
                     | mark routing |--|     SNAT      |-- ... +
                     +--------------+  +---------------+       |
  +--- tenant-2 ---+  |                                     ~~~|~~~
  |    netperf     |--+                +-----------+           |
  +----------------+   mark := 2       | netserver |------ ... +
   [ip,sport] := X                     +-----------+
                                        [ip,port] := Y
On the gateway netns, example:

  iptables -t raw -A PREROUTING -j CT --zone mark --zone-dir ORIGINAL
  iptables -t nat -A POSTROUTING -o <dev> -j SNAT --to-source <ip> --random-fully

  iptables -t mangle -A PREROUTING -m conntrack --ctdir ORIGINAL -j CONNMARK --save-mark
  iptables -t mangle -A POSTROUTING -m conntrack --ctdir REPLY -j CONNMARK --restore-mark

conntrack dump from gateway netns:

  netperf -H 10.1.1.2 -t TCP_STREAM -l60 -p12865,5555 from each tenant netns

  tcp 6 431995 ESTABLISHED src=40.1.1.1 dst=10.1.1.2 sport=5555 dport=12865 zone-orig=1
                           src=10.1.1.2 dst=10.1.1.1 sport=12865 dport=1024
               [ASSURED] mark=1 secctx=system_u:object_r:unlabeled_t:s0 use=1

  tcp 6 431994 ESTABLISHED src=40.1.1.1 dst=10.1.1.2 sport=5555 dport=12865 zone-orig=2
                           src=10.1.1.2 dst=10.1.1.1 sport=12865 dport=5555
               [ASSURED] mark=2 secctx=system_u:object_r:unlabeled_t:s0 use=1

  tcp 6 299 ESTABLISHED src=40.1.1.1 dst=10.1.1.2 sport=39438 dport=33768 zone-orig=1
                        src=10.1.1.2 dst=10.1.1.1 sport=33768 dport=39438
               [ASSURED] mark=1 secctx=system_u:object_r:unlabeled_t:s0 use=1

  tcp 6 300 ESTABLISHED src=40.1.1.1 dst=10.1.1.2 sport=32889 dport=40206 zone-orig=2
                        src=10.1.1.2 dst=10.1.1.1 sport=40206 dport=32889
               [ASSURED] mark=2 secctx=system_u:object_r:unlabeled_t:s0 use=2

Taking this further, test script in [2] creates 200 tenants and runs
original-tuple colliding netperf sessions each. A conntrack -L dump in
the gateway netns also confirms 200 overlapping entries, all in ESTABLISHED
state as expected.

I also did run various other tests with some permutations of the script,
to mention some: SNAT in random/random-fully/persistent mode, no zones (no
overlaps), static zones (original, reply, both directions), etc.

  [1] http://thread.gmane.org/gmane.comp.security.firewalls.netfilter.devel/57412/
  [2] https://paste.fedoraproject.org/242835/65657871/

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_zones.h | 31 +++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h
index 0788bb0f267d..3942ddf0d4ff 100644
--- a/include/net/netfilter/nf_conntrack_zones.h
+++ b/include/net/netfilter/nf_conntrack_zones.h
@@ -1,10 +1,18 @@
 #ifndef _NF_CONNTRACK_ZONES_H
 #define _NF_CONNTRACK_ZONES_H
 
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
+
 #define NF_CT_DEFAULT_ZONE_ID	0
 
+#define NF_CT_ZONE_DIR_ORIG	(1 << IP_CT_DIR_ORIGINAL)
+#define NF_CT_ZONE_DIR_REPL	(1 << IP_CT_DIR_REPLY)
+
+#define NF_CT_DEFAULT_ZONE_DIR	(NF_CT_ZONE_DIR_ORIG | NF_CT_ZONE_DIR_REPL)
+
 struct nf_conntrack_zone {
 	u16	id;
+	u16	dir;
 };
 
 extern const struct nf_conntrack_zone nf_ct_zone_dflt;
@@ -29,8 +37,29 @@ nf_ct_zone_tmpl(const struct nf_conn *tmpl)
 	return tmpl ? nf_ct_zone(tmpl) : &nf_ct_zone_dflt;
 }
 
+static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone,
+					  enum ip_conntrack_dir dir)
+{
+	return zone->dir & (1 << dir);
+}
+
+static inline u16 nf_ct_zone_id(const struct nf_conntrack_zone *zone,
+				enum ip_conntrack_dir dir)
+{
+	return nf_ct_zone_matches_dir(zone, dir) ?
+	       zone->id : NF_CT_DEFAULT_ZONE_ID;
+}
+
 static inline bool nf_ct_zone_equal(const struct nf_conn *a,
-				    const struct nf_conntrack_zone *b)
+				    const struct nf_conntrack_zone *b,
+				    enum ip_conntrack_dir dir)
+{
+	return nf_ct_zone_id(nf_ct_zone(a), dir) ==
+	       nf_ct_zone_id(b, dir);
+}
+
+static inline bool nf_ct_zone_equal_any(const struct nf_conn *a,
+					const struct nf_conntrack_zone *b)
 {
 	return nf_ct_zone(a)->id == b->id;
 }
-- 
cgit v1.2.3


From 5e8018fc61423e677398d4ad4d72df70b9788e77 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 14 Aug 2015 16:03:40 +0200
Subject: netfilter: nf_conntrack: add efficient mark to zone mapping

This work adds the possibility of deriving the zone id from the skb->mark
field in a scalable manner. This allows for having only a single template
serving hundreds/thousands of different zones, for example, instead of the
need to have one match for each zone as an extra CT jump target.

Note that we'd need to have this information attached to the template as at
the time when we're trying to lookup a possible ct object, we already need
to know zone information for a possible match when going into
__nf_conntrack_find_get(). This work provides a minimal implementation for
a possible mapping.

In order to not add/expose an extra ct->status bit, the zone structure has
been extended to carry a flag for deriving the mark.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_zones.h | 45 ++++++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h
index 3942ddf0d4ff..5316c7b3a374 100644
--- a/include/net/netfilter/nf_conntrack_zones.h
+++ b/include/net/netfilter/nf_conntrack_zones.h
@@ -10,9 +10,12 @@
 
 #define NF_CT_DEFAULT_ZONE_DIR	(NF_CT_ZONE_DIR_ORIG | NF_CT_ZONE_DIR_REPL)
 
+#define NF_CT_FLAG_MARK		1
+
 struct nf_conntrack_zone {
 	u16	id;
-	u16	dir;
+	u8	flags;
+	u8	dir;
 };
 
 extern const struct nf_conntrack_zone nf_ct_zone_dflt;
@@ -32,9 +35,45 @@ nf_ct_zone(const struct nf_conn *ct)
 }
 
 static inline const struct nf_conntrack_zone *
-nf_ct_zone_tmpl(const struct nf_conn *tmpl)
+nf_ct_zone_init(struct nf_conntrack_zone *zone, u16 id, u8 dir, u8 flags)
+{
+	zone->id = id;
+	zone->flags = flags;
+	zone->dir = dir;
+
+	return zone;
+}
+
+static inline const struct nf_conntrack_zone *
+nf_ct_zone_tmpl(const struct nf_conn *tmpl, const struct sk_buff *skb,
+		struct nf_conntrack_zone *tmp)
+{
+	const struct nf_conntrack_zone *zone;
+
+	if (!tmpl)
+		return &nf_ct_zone_dflt;
+
+	zone = nf_ct_zone(tmpl);
+	if (zone->flags & NF_CT_FLAG_MARK)
+		zone = nf_ct_zone_init(tmp, skb->mark, zone->dir, 0);
+
+	return zone;
+}
+
+static inline int nf_ct_zone_add(struct nf_conn *ct, gfp_t flags,
+				 const struct nf_conntrack_zone *info)
 {
-	return tmpl ? nf_ct_zone(tmpl) : &nf_ct_zone_dflt;
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	struct nf_conntrack_zone *nf_ct_zone;
+
+	nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, flags);
+	if (!nf_ct_zone)
+		return -ENOMEM;
+
+	nf_ct_zone_init(nf_ct_zone, info->id, info->dir,
+			info->flags);
+#endif
+	return 0;
 }
 
 static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone,
-- 
cgit v1.2.3


From 2536862311d2276454ddef9dc36d6551a4b400fd Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Mon, 17 Aug 2015 13:42:24 -0700
Subject: lwt: Add support to redirect dst.input

This patch adds the capability to redirect dst input in the same way
that dst output is redirected by LWT.

Also, save the original dst.input and and dst.out when setting up
lwtunnel redirection. These can be called by the client as a pass-
through.

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/lwtunnel.h | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 33bd30963a95..e25b60eb262d 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -11,12 +11,15 @@
 #define LWTUNNEL_HASH_SIZE   (1 << LWTUNNEL_HASH_BITS)
 
 /* lw tunnel state flags */
-#define LWTUNNEL_STATE_OUTPUT_REDIRECT 0x1
+#define LWTUNNEL_STATE_OUTPUT_REDIRECT	BIT(0)
+#define LWTUNNEL_STATE_INPUT_REDIRECT	BIT(1)
 
 struct lwtunnel_state {
 	__u16		type;
 	__u16		flags;
 	atomic_t	refcnt;
+	int		(*orig_output)(struct sock *sk, struct sk_buff *skb);
+	int		(*orig_input)(struct sk_buff *);
 	int             len;
 	__u8            data[0];
 };
@@ -25,6 +28,7 @@ struct lwtunnel_encap_ops {
 	int (*build_state)(struct net_device *dev, struct nlattr *encap,
 			   struct lwtunnel_state **ts);
 	int (*output)(struct sock *sk, struct sk_buff *skb);
+	int (*input)(struct sk_buff *skb);
 	int (*fill_encap)(struct sk_buff *skb,
 			  struct lwtunnel_state *lwtstate);
 	int (*get_encap_size)(struct lwtunnel_state *lwtstate);
@@ -58,6 +62,13 @@ static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate)
 	return false;
 }
 
+static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate)
+{
+	if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_INPUT_REDIRECT))
+		return true;
+
+	return false;
+}
 int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
 			   unsigned int num);
 int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
@@ -72,6 +83,8 @@ struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
 int lwtunnel_output(struct sock *sk, struct sk_buff *skb);
 int lwtunnel_output6(struct sock *sk, struct sk_buff *skb);
+int lwtunnel_input(struct sk_buff *skb);
+int lwtunnel_input6(struct sk_buff *skb);
 
 #else
 
@@ -90,6 +103,11 @@ static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate)
 	return false;
 }
 
+static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate)
+{
+	return false;
+}
+
 static inline int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
 					 unsigned int num)
 {
@@ -142,6 +160,16 @@ static inline int lwtunnel_output6(struct sock *sk, struct sk_buff *skb)
 	return -EOPNOTSUPP;
 }
 
+static inline int lwtunnel_input(struct sk_buff *skb)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int lwtunnel_input6(struct sk_buff *skb)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif
 
 #endif /* __NET_LWTUNNEL_H */
-- 
cgit v1.2.3


From 4b048d6d9d0b0b90e1e94f2393796bbf1fa8df4e Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Mon, 17 Aug 2015 13:42:25 -0700
Subject: net: Change pseudohdr argument of inet_proto_csum_replace* to be a
 bool

inet_proto_csum_replace4,2,16 take a pseudohdr argument which indicates
the checksum field carries a pseudo header. This argument should be a
boolean instead of an int.

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/checksum.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/checksum.h b/include/net/checksum.h
index 2d1d73cb773e..619f3445d57e 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -140,14 +140,14 @@ static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new)
 
 struct sk_buff;
 void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
-			      __be32 from, __be32 to, int pseudohdr);
+			      __be32 from, __be32 to, bool pseudohdr);
 void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
 			       const __be32 *from, const __be32 *to,
-			       int pseudohdr);
+			       bool pseudohdr);
 
 static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
 					    __be16 from, __be16 to,
-					    int pseudohdr)
+					    bool pseudohdr)
 {
 	inet_proto_csum_replace4(sum, skb, (__force __be32)from,
 				 (__force __be32)to, pseudohdr);
-- 
cgit v1.2.3


From abc5d1ff3e8f9b4a9d274818459b123e31981dc9 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Mon, 17 Aug 2015 13:42:26 -0700
Subject: net: Add inet_proto_csum_replace_by_diff utility function

This function updates a checksum field value and skb->csum based on
a value which is the difference between the old and new checksum.

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/checksum.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/checksum.h b/include/net/checksum.h
index 619f3445d57e..9fcaedf994ee 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -144,6 +144,8 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
 void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
 			       const __be32 *from, const __be32 *to,
 			       bool pseudohdr);
+void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
+				     __wsum diff, bool pseudohdr);
 
 static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
 					    __be16 from, __be16 to,
-- 
cgit v1.2.3


From 60045cbfc0b291dae8dd5b929d67b87c5ea954d4 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Mon, 17 Aug 2015 23:52:51 +0200
Subject: net: dsa: Add dsa_is_dsa_port() helper

Add an inline helper for determining is a port is a DSA port.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/net')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index bd9b76502458..b34d812bc5d0 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -171,6 +171,11 @@ static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p)
 	return !!(ds->index == ds->dst->cpu_switch && p == ds->dst->cpu_port);
 }
 
+static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p)
+{
+	return !!((ds->dsa_port_mask) & (1 << p));
+}
+
 static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
 {
 	return ds->phys_port_mask & (1 << p) && ds->ports[p];
-- 
cgit v1.2.3


From df383e6240ef222703648072dafd2a1ae21b0d2a Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Tue, 18 Aug 2015 18:41:13 +0200
Subject: lwtunnel: fix memory leak

The built lwtunnel_state struct has to be freed after comparison.

Fixes: 571e722676fe3 ("ipv4: support for fib route lwtunnel encap attributes")
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/lwtunnel.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index e25b60eb262d..34fd8f70c2ca 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -36,6 +36,11 @@ struct lwtunnel_encap_ops {
 };
 
 #ifdef CONFIG_LWTUNNEL
+static inline void lwtstate_free(struct lwtunnel_state *lws)
+{
+	kfree(lws);
+}
+
 static inline struct lwtunnel_state *
 lwtstate_get(struct lwtunnel_state *lws)
 {
@@ -51,7 +56,7 @@ static inline void lwtstate_put(struct lwtunnel_state *lws)
 		return;
 
 	if (atomic_dec_and_test(&lws->refcnt))
-		kfree(lws);
+		lwtstate_free(lws);
 }
 
 static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate)
-- 
cgit v1.2.3


From db5dbec5ef2d4565bb8d42709802de66b06f9965 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Tue, 18 Aug 2015 20:28:02 +0300
Subject: vrf: drop unused num_slaves member

slave_queue has a num_slaves member which is unused, drop it.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/vrf.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/vrf.h b/include/net/vrf.h
index 40e3793c7a05..3bb4af462ed6 100644
--- a/include/net/vrf.h
+++ b/include/net/vrf.h
@@ -24,7 +24,6 @@ struct slave {
 
 struct slave_queue {
 	struct list_head	all_slaves;
-	int			num_slaves;
 };
 
 struct net_vrf {
-- 
cgit v1.2.3


From bf798657eb5ba57552096843c315f096fdf9b715 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 12 Aug 2015 17:41:00 +0200
Subject: netfilter: nf_tables: Use 32 bit addressing register from
 nft_type_to_reg()

nft_type_to_reg() needs to return the register in the new 32 bit addressing,
otherwise we hit EINVAL when using mappings.

Fixes: 49499c3 ("netfilter: nf_tables: switch registers to 32 bit addressing")
Reported-by: Andreas Schultz <aschultz@tpip.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 2a246680a6c3..aa8bee72c9d3 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -125,7 +125,7 @@ static inline enum nft_data_types nft_dreg_to_type(enum nft_registers reg)
 
 static inline enum nft_registers nft_type_to_reg(enum nft_data_types type)
 {
-	return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1;
+	return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE;
 }
 
 unsigned int nft_parse_register(const struct nlattr *attr);
-- 
cgit v1.2.3


From 18e1db67e93ed75d9dc0d34c8d783ccf10547c2b Mon Sep 17 00:00:00 2001
From: Bernhard Thaler <bernhard.thaler@wvnet.at>
Date: Thu, 13 Aug 2015 08:58:15 +0200
Subject: netfilter: bridge: fix IPv6 packets not being bridged with
 CONFIG_IPV6=n

230ac490f7fba introduced a dependency to CONFIG_IPV6 which breaks bridging
of IPv6 packets on a bridge with CONFIG_IPV6=n.

Sysctl entry /proc/sys/net/bridge/bridge-nf-call-ip6tables defaults to 1,
for this reason packets are handled by br_nf_pre_routing_ipv6(). When compiled
with CONFIG_IPV6=n this function returns NF_DROP but should return NF_ACCEPT
to let packets through.

Change CONFIG_IPV6=n br_nf_pre_routing_ipv6() return value to NF_ACCEPT.

Tested with a simple bridge with two interfaces and IPv6 packets trying
to pass from host on left side to host on right side of the bridge.

Fixes: 230ac490f7fba ("netfilter: bridge: split ipv6 code into separated file")
Signed-off-by: Bernhard Thaler <bernhard.thaler@wvnet.at>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/br_netfilter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index bab824bde92c..d4c6b5f30acd 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -59,7 +59,7 @@ static inline unsigned int
 br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		       const struct nf_hook_state *state)
 {
-	return NF_DROP;
+	return NF_ACCEPT;
 }
 #endif
 
-- 
cgit v1.2.3


From 824e7383e92815cb591793c74cc836aa5165f7f8 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Wed, 19 Aug 2015 15:46:17 +0800
Subject: lwtunnel: Fix the sparse warnings in fib_encap_match
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When CONFIG_LWTUNNEL config is not enabled, the lwtstate_free() is not
declared in lwtunnel.h at all. However, even in this case, the function
is still referenced in fib_semantics.c so that there appears the
following sparse warnings:

net/ipv4/fib_semantics.c:553:17: error: undefined identifier 'lwtstate_free'
  CC      net/ipv4/fib_semantics.o
  net/ipv4/fib_semantics.c: In function ‘fib_encap_match’:
  net/ipv4/fib_semantics.c:553:3: error: implicit declaration of function ‘lwtstate_free’ [-Werror=implicit-function-declaration]
  cc1: some warnings being treated as errors
  make[1]: *** [net/ipv4/fib_semantics.o] Error 1
  make: *** [net/ipv4/fib_semantics.o] Error 2

To eliminate the error, we define an empty function for lwtstate_free()
in lwtunnel.h when CONFIG_LWTUNNEL is disabled.

Fixes: df383e6240ef ("lwtunnel: fix memory leak")
Cc: Jiri Benc <jbenc@redhat.com>
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/lwtunnel.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 34fd8f70c2ca..cfee53916ba5 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -93,6 +93,10 @@ int lwtunnel_input6(struct sk_buff *skb);
 
 #else
 
+static inline void lwtstate_free(struct lwtunnel_state *lws)
+{
+}
+
 static inline struct lwtunnel_state *
 lwtstate_get(struct lwtunnel_state *lws)
 {
-- 
cgit v1.2.3


From 18041e31743d278b6323518d20a2ef656c3cc689 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Tue, 18 Aug 2015 21:40:16 +0300
Subject: vrf: vrf_master_ifindex_rcu is not always called with rcu read lock

While running net-next I hit this:
[  634.073119] ===============================
[  634.073150] [ INFO: suspicious RCU usage. ]
[  634.073182] 4.2.0-rc6+ #45 Not tainted
[  634.073213] -------------------------------
[  634.073244] include/net/vrf.h:38 suspicious rcu_dereference_check()
usage!
[  634.073274]
               other info that might help us debug this:

[  634.073307]
               rcu_scheduler_active = 1, debug_locks = 1
[  634.073338] 2 locks held by swapper/0/0:
[  634.073369]  #0:  (((&n->timer))){+.-...}, at: [<ffffffff8112bc35>]
call_timer_fn+0x5/0x480
[  634.073412]  #1:  (slock-AF_INET){+.-...}, at: [<ffffffff8174f0f5>]
icmp_send+0x155/0x5f0
[  634.073450]
               stack backtrace:
[  634.073483] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.2.0-rc6+ #45
[  634.073514] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS
VirtualBox 12/01/2006
[  634.073545]  0000000000000000 0593ba8242d9ace4 ffff88002fc03b48
ffffffff81803f1b
[  634.073612]  0000000000000000 ffffffff81e12500 ffff88002fc03b78
ffffffff811003c5
[  634.073642]  0000000000000000 ffff88002ec4e600 ffffffff81f00f80
ffff88002fc03cf0
[  634.073669] Call Trace:
[  634.073694]  <IRQ>  [<ffffffff81803f1b>] dump_stack+0x4c/0x65
[  634.073728]  [<ffffffff811003c5>] lockdep_rcu_suspicious+0xc5/0x100
[  634.073763]  [<ffffffff8174eb56>] icmp_route_lookup+0x176/0x5c0
[  634.073793]  [<ffffffff8174f2fb>] ? icmp_send+0x35b/0x5f0
[  634.073818]  [<ffffffff8174f274>] ? icmp_send+0x2d4/0x5f0
[  634.073844]  [<ffffffff8174f3ce>] icmp_send+0x42e/0x5f0
[  634.073873]  [<ffffffff8170b662>] ipv4_link_failure+0x22/0xa0
[  634.073899]  [<ffffffff8174bdda>] arp_error_report+0x3a/0x80
[  634.073926]  [<ffffffff816d6100>] ? neigh_lookup+0x2c0/0x2c0
[  634.073952]  [<ffffffff816d396e>] neigh_invalidate+0x8e/0x110
[  634.073984]  [<ffffffff816d62ae>] neigh_timer_handler+0x1ae/0x290
[  634.074013]  [<ffffffff816d6100>] ? neigh_lookup+0x2c0/0x2c0
[  634.074013]  [<ffffffff8112bce3>] call_timer_fn+0xb3/0x480
[  634.074013]  [<ffffffff8112bc35>] ? call_timer_fn+0x5/0x480
[  634.074013]  [<ffffffff816d6100>] ? neigh_lookup+0x2c0/0x2c0
[  634.074013]  [<ffffffff8112c2bc>] run_timer_softirq+0x20c/0x430
[  634.074013]  [<ffffffff810af50e>] __do_softirq+0xde/0x630
[  634.074013]  [<ffffffff810afc97>] irq_exit+0x117/0x120
[  634.074013]  [<ffffffff81810976>] smp_apic_timer_interrupt+0x46/0x60
[  634.074013]  [<ffffffff8180e950>] apic_timer_interrupt+0x70/0x80
[  634.074013]  <EOI>  [<ffffffff8106b9d6>] ? native_safe_halt+0x6/0x10
[  634.074013]  [<ffffffff81101d8d>] ? trace_hardirqs_on+0xd/0x10
[  634.074013]  [<ffffffff81027d43>] default_idle+0x23/0x200
[  634.074013]  [<ffffffff8102852f>] arch_cpu_idle+0xf/0x20
[  634.074013]  [<ffffffff810f89ba>] default_idle_call+0x2a/0x40
[  634.074013]  [<ffffffff810f8dcc>] cpu_startup_entry+0x39c/0x4c0
[  634.074013]  [<ffffffff817f9cad>] rest_init+0x13d/0x150
[  634.074013]  [<ffffffff81f69038>] start_kernel+0x4a8/0x4c9
[  634.074013]  [<ffffffff81f68120>] ?
early_idt_handler_array+0x120/0x120
[  634.074013]  [<ffffffff81f68339>] x86_64_start_reservations+0x2a/0x2c
[  634.074013]  [<ffffffff81f68485>] x86_64_start_kernel+0x14a/0x16d

It would seem vrf_master_ifindex_rcu() can be called without RCU held in
other contexts as well so introduce a new helper which acquires rcu and
returns the ifindex.
Also add curly braces around both the "if" and "else" parts as per the
style guide.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/vrf.h | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/vrf.h b/include/net/vrf.h
index 3bb4af462ed6..5bfb16237fd7 100644
--- a/include/net/vrf.h
+++ b/include/net/vrf.h
@@ -43,9 +43,9 @@ static inline int vrf_master_ifindex_rcu(const struct net_device *dev)
 	if (!dev)
 		return 0;
 
-	if (netif_is_vrf(dev))
+	if (netif_is_vrf(dev)) {
 		ifindex = dev->ifindex;
-	else {
+	} else {
 		vrf_ptr = rcu_dereference(dev->vrf_ptr);
 		if (vrf_ptr)
 			ifindex = vrf_ptr->ifindex;
@@ -54,6 +54,17 @@ static inline int vrf_master_ifindex_rcu(const struct net_device *dev)
 	return ifindex;
 }
 
+static inline int vrf_master_ifindex(const struct net_device *dev)
+{
+	int ifindex;
+
+	rcu_read_lock();
+	ifindex = vrf_master_ifindex_rcu(dev);
+	rcu_read_unlock();
+
+	return ifindex;
+}
+
 /* called with rcu_read_lock */
 static inline int vrf_dev_table_rcu(const struct net_device *dev)
 {
@@ -133,6 +144,11 @@ static inline int vrf_master_ifindex_rcu(const struct net_device *dev)
 	return 0;
 }
 
+static inline int vrf_master_ifindex(const struct net_device *dev)
+{
+	return 0;
+}
+
 static inline int vrf_dev_table_rcu(const struct net_device *dev)
 {
 	return 0;
-- 
cgit v1.2.3


From fdf79bd48876812acf0de58ed7a8bc1b3a3c67d6 Mon Sep 17 00:00:00 2001
From: Robert Baldyga <r.baldyga@samsung.com>
Date: Thu, 20 Aug 2015 17:26:00 +0200
Subject: NFC: nci: Add post_setup handler

Some drivers require non-standard configuration after NCI_CORE_INIT
request, because they need to know ndev->manufact_specific_info or
ndev->manufact_id. This patch adds post_setup handler allowing to do
such custom configuration.

Signed-off-by: Robert Baldyga <r.baldyga@samsung.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci_core.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 01fc8c531115..1bdaa5f51107 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -79,6 +79,7 @@ struct nci_ops {
 	int   (*close)(struct nci_dev *ndev);
 	int   (*send)(struct nci_dev *ndev, struct sk_buff *skb);
 	int   (*setup)(struct nci_dev *ndev);
+	int   (*post_setup)(struct nci_dev *ndev);
 	int   (*fw_download)(struct nci_dev *ndev, const char *firmware_name);
 	__u32 (*get_rfprotocol)(struct nci_dev *ndev, __u8 rf_protocol);
 	int   (*discover_se)(struct nci_dev *ndev);
-- 
cgit v1.2.3


From 025a0cb8380b7100d39fb426db9192b6c59595dc Mon Sep 17 00:00:00 2001
From: Robert Baldyga <r.baldyga@samsung.com>
Date: Thu, 20 Aug 2015 17:26:01 +0200
Subject: NFC: nci: export nci_core_reset and nci_core_init

Some drivers needs to have ability to reinit NCI core, for example
after updating firmware in setup() of post_setup() callback. This
patch makes nci_core_reset() and nci_core_init() functions public,
to make it possible.

Signed-off-by: Robert Baldyga <r.baldyga@samsung.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci_core.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 1bdaa5f51107..d0d0f1e53bb9 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -278,6 +278,8 @@ int nci_request(struct nci_dev *ndev,
 			    unsigned long opt),
 		unsigned long opt, __u32 timeout);
 int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload);
+int nci_core_reset(struct nci_dev *ndev);
+int nci_core_init(struct nci_dev *ndev);
 
 int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb);
 int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val);
-- 
cgit v1.2.3


From 29e76924cf087bc6a9114a9244828fd13ae959bb Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Wed, 19 Aug 2015 21:26:43 +0200
Subject: nfc: netlink: Add capability to reply to vendor_cmd with data

A proprietary vendor command may send back useful data to the user
application.

For example, the field level applied on the NFC router antenna.

Still based on net/wireless/nl80211.c implementation,
add nfc_vendor_cmd_alloc_reply_skb and nfc_vendor_cmd_reply in
order to send back over netlink data generated by a proprietary
command.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nfc.h | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

(limited to 'include/net')

diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index f9e58ae45f9c..30afc9a6718c 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -203,6 +203,7 @@ struct nfc_dev {
 	int n_vendor_cmds;
 
 	struct nfc_ops *ops;
+	struct genl_info *cur_cmd_info;
 };
 #define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev)
 
@@ -318,4 +319,44 @@ static inline int nfc_set_vendor_cmds(struct nfc_dev *dev,
 	return 0;
 }
 
+struct sk_buff *__nfc_alloc_vendor_cmd_reply_skb(struct nfc_dev *dev,
+						 enum nfc_attrs attr,
+						 u32 oui, u32 subcmd,
+						 int approxlen);
+int nfc_vendor_cmd_reply(struct sk_buff *skb);
+
+/**
+ * nfc_vendor_cmd_alloc_reply_skb - allocate vendor command reply
+ * @dev: nfc device
+ * @oui: vendor oui
+ * @approxlen: an upper bound of the length of the data that will
+ *      be put into the skb
+ *
+ * This function allocates and pre-fills an skb for a reply to
+ * a vendor command. Since it is intended for a reply, calling
+ * it outside of a vendor command's doit() operation is invalid.
+ *
+ * The returned skb is pre-filled with some identifying data in
+ * a way that any data that is put into the skb (with skb_put(),
+ * nla_put() or similar) will end up being within the
+ * %NFC_ATTR_VENDOR_DATA attribute, so all that needs to be done
+ * with the skb is adding data for the corresponding userspace tool
+ * which can then read that data out of the vendor data attribute.
+ * You must not modify the skb in any other way.
+ *
+ * When done, call nfc_vendor_cmd_reply() with the skb and return
+ * its error code as the result of the doit() operation.
+ *
+ * Return: An allocated and pre-filled skb. %NULL if any errors happen.
+ */
+static inline struct sk_buff *
+nfc_vendor_cmd_alloc_reply_skb(struct nfc_dev *dev,
+				u32 oui, u32 subcmd, int approxlen)
+{
+	return __nfc_alloc_vendor_cmd_reply_skb(dev,
+						NFC_ATTR_VENDOR_DATA,
+						oui,
+						subcmd, approxlen);
+}
+
 #endif /* __NET_NFC_H */
-- 
cgit v1.2.3


From ac1cf3990c99802eae3aa735b35c94a2131eb9fe Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Thu, 20 Aug 2015 13:56:20 +0200
Subject: ip_tunnels: remove custom alignment and packing

The custom alignment of struct ip_tunnel_key is unnecessary. In struct
sw_flow_key, it starts at offset 256, in struct ip_tunnel_info it's the
first field.

The structure is also packed even without the __packed keyword.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 984dbfa15e13..81cf11c931e4 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -36,7 +36,7 @@ struct ip_tunnel_key {
 	__u8			ipv4_ttl;
 	__be16			tp_src;
 	__be16			tp_dst;
-} __packed __aligned(4); /* Minimize padding. */
+};
 
 /* Indicates whether the tunnel info structure represents receive
  * or transmit tunnel parameters.
-- 
cgit v1.2.3


From 6b8847c5a2bafbbf92f4b779f87165093457ea68 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Thu, 20 Aug 2015 13:56:21 +0200
Subject: ip_tunnels: use u8/u16/u32

The ip_tunnels.h include file uses mixture of __u16 and u16 (etc.) types.
Unify it to the non-underscore variants.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 81cf11c931e4..ca173f22f07f 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -32,8 +32,8 @@ struct ip_tunnel_key {
 	__be32			ipv4_src;
 	__be32			ipv4_dst;
 	__be16			tun_flags;
-	__u8			ipv4_tos;
-	__u8			ipv4_ttl;
+	u8			ipv4_tos;
+	u8			ipv4_ttl;
 	__be16			tp_src;
 	__be16			tp_dst;
 };
@@ -64,8 +64,8 @@ struct ip_tunnel_6rd_parm {
 #endif
 
 struct ip_tunnel_encap {
-	__u16			type;
-	__u16			flags;
+	u16			type;
+	u16			flags;
 	__be16			sport;
 	__be16			dport;
 };
@@ -95,8 +95,8 @@ struct ip_tunnel {
 					 * arrived */
 
 	/* These four fields used only by GRE */
-	__u32		i_seqno;	/* The last seen seqno	*/
-	__u32		o_seqno;	/* The last output seqno */
+	u32		i_seqno;	/* The last seen seqno	*/
+	u32		o_seqno;	/* The last output seqno */
 	int		tun_hlen;	/* Precalculated header length */
 	int		mlink;
 
@@ -273,8 +273,8 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
 
 int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
 int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
-		  __be32 src, __be32 dst, __u8 proto,
-		  __u8 tos, __u8 ttl, __be16 df, bool xnet);
+		  __be32 src, __be32 dst, u8 proto,
+		  u8 tos, u8 ttl, __be16 df, bool xnet);
 
 struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum,
 					 int gso_type_mask);
-- 
cgit v1.2.3


From 376534a3d17002d608985bd67c3b0880eacadd14 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Thu, 20 Aug 2015 13:56:22 +0200
Subject: ip_tunnels: use offsetofend

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index ca173f22f07f..cc3b39e9010b 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -23,9 +23,7 @@
 #define IPTUNNEL_ERR_TIMEO	(30*HZ)
 
 /* Used to memset ip_tunnel padding. */
-#define IP_TUNNEL_KEY_SIZE					\
-	(offsetof(struct ip_tunnel_key, tp_dst) +		\
-	 FIELD_SIZEOF(struct ip_tunnel_key, tp_dst))
+#define IP_TUNNEL_KEY_SIZE	offsetofend(struct ip_tunnel_key, tp_dst)
 
 struct ip_tunnel_key {
 	__be64			tun_id;
-- 
cgit v1.2.3


From c1ea5d672aaff08da337dee735dbb548e3415585 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Thu, 20 Aug 2015 13:56:23 +0200
Subject: ip_tunnels: add IPv6 addresses to ip_tunnel_key

Add the IPv6 addresses as an union with IPv4 ones. When using IPv4, the
newly introduced padding after the IPv4 addresses needs to be zeroed out.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index cc3b39e9010b..6a51371dad00 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -25,10 +25,24 @@
 /* Used to memset ip_tunnel padding. */
 #define IP_TUNNEL_KEY_SIZE	offsetofend(struct ip_tunnel_key, tp_dst)
 
+/* Used to memset ipv4 address padding. */
+#define IP_TUNNEL_KEY_IPV4_PAD	offsetofend(struct ip_tunnel_key, u.ipv4.dst)
+#define IP_TUNNEL_KEY_IPV4_PAD_LEN				\
+	(FIELD_SIZEOF(struct ip_tunnel_key, u) -		\
+	 FIELD_SIZEOF(struct ip_tunnel_key, u.ipv4))
+
 struct ip_tunnel_key {
 	__be64			tun_id;
-	__be32			ipv4_src;
-	__be32			ipv4_dst;
+	union {
+		struct {
+			__be32	src;
+			__be32	dst;
+		} ipv4;
+		struct {
+			struct in6_addr src;
+			struct in6_addr dst;
+		} ipv6;
+	} u;
 	__be16			tun_flags;
 	u8			ipv4_tos;
 	u8			ipv4_ttl;
@@ -177,8 +191,10 @@ static inline void __ip_tunnel_info_init(struct ip_tunnel_info *tun_info,
 					 const void *opts, u8 opts_len)
 {
 	tun_info->key.tun_id = tun_id;
-	tun_info->key.ipv4_src = saddr;
-	tun_info->key.ipv4_dst = daddr;
+	tun_info->key.u.ipv4.src = saddr;
+	tun_info->key.u.ipv4.dst = daddr;
+	memset((unsigned char *)&tun_info->key + IP_TUNNEL_KEY_IPV4_PAD,
+	       0, IP_TUNNEL_KEY_IPV4_PAD_LEN);
 	tun_info->key.ipv4_tos = tos;
 	tun_info->key.ipv4_ttl = ttl;
 	tun_info->key.tun_flags = tun_flags;
-- 
cgit v1.2.3


From 7c383fb2254c44e096427470da6a36380169b548 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Thu, 20 Aug 2015 13:56:24 +0200
Subject: ip_tunnels: use tos and ttl fields also for IPv6

Rename the ipv4_tos and ipv4_ttl fields to just 'tos' and 'ttl', as they'll
be used with IPv6 tunnels, too.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 6a51371dad00..224e4ecec91b 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -44,8 +44,8 @@ struct ip_tunnel_key {
 		} ipv6;
 	} u;
 	__be16			tun_flags;
-	u8			ipv4_tos;
-	u8			ipv4_ttl;
+	u8			tos;		/* TOS for IPv4, TC for IPv6 */
+	u8			ttl;		/* TTL for IPv4, HL for IPv6 */
 	__be16			tp_src;
 	__be16			tp_dst;
 };
@@ -195,8 +195,8 @@ static inline void __ip_tunnel_info_init(struct ip_tunnel_info *tun_info,
 	tun_info->key.u.ipv4.dst = daddr;
 	memset((unsigned char *)&tun_info->key + IP_TUNNEL_KEY_IPV4_PAD,
 	       0, IP_TUNNEL_KEY_IPV4_PAD_LEN);
-	tun_info->key.ipv4_tos = tos;
-	tun_info->key.ipv4_ttl = ttl;
+	tun_info->key.tos = tos;
+	tun_info->key.ttl = ttl;
 	tun_info->key.tun_flags = tun_flags;
 
 	/* For the tunnel types on the top of IPsec, the tp_src and tp_dst of
-- 
cgit v1.2.3


From 61adedf3e3f1d3f032c5a6a299978d91eff6d555 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Thu, 20 Aug 2015 13:56:25 +0200
Subject: route: move lwtunnel state to dst_entry

Currently, the lwtunnel state resides in per-protocol data. This is
a problem if we encapsulate ipv6 traffic in an ipv4 tunnel (or vice versa).
The xmit function of the tunnel does not know whether the packet has been
routed to it by ipv4 or ipv6, yet it needs the lwtstate data. Moving the
lwtstate data to dst_entry makes such inter-protocol tunneling possible.

As a bonus, this brings a nice diffstat.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h          |  3 ++-
 include/net/dst_metadata.h | 15 +++++----------
 include/net/ip6_fib.h      |  1 -
 include/net/lwtunnel.h     | 12 ------------
 include/net/route.h        |  1 -
 5 files changed, 7 insertions(+), 25 deletions(-)

(limited to 'include/net')

diff --git a/include/net/dst.h b/include/net/dst.h
index 2578811cef51..0a9a723f6c19 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -44,6 +44,7 @@ struct dst_entry {
 #else
 	void			*__pad1;
 #endif
+	struct lwtunnel_state   *lwtstate;
 	int			(*input)(struct sk_buff *);
 	int			(*output)(struct sock *sk, struct sk_buff *skb);
 
@@ -89,7 +90,7 @@ struct dst_entry {
 	 * (L1_CACHE_SIZE would be too much)
 	 */
 #ifdef CONFIG_64BIT
-	long			__pad_to_align_refcnt[2];
+	long			__pad_to_align_refcnt[1];
 #endif
 	/*
 	 * __refcnt wants to be on a different cache line from
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 075f523ff23f..2cb52d562272 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -23,22 +23,17 @@ static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb)
 	return NULL;
 }
 
-static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb,
-						     int family)
+static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb)
 {
 	struct metadata_dst *md_dst = skb_metadata_dst(skb);
-	struct rtable *rt;
+	struct dst_entry *dst;
 
 	if (md_dst)
 		return &md_dst->u.tun_info;
 
-	switch (family) {
-	case AF_INET:
-		rt = (struct rtable *)skb_dst(skb);
-		if (rt && rt->rt_lwtstate)
-			return lwt_tun_info(rt->rt_lwtstate);
-		break;
-	}
+	dst = skb_dst(skb);
+	if (dst && dst->lwtstate)
+		return lwt_tun_info(dst->lwtstate);
 
 	return NULL;
 }
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 276328e3daa6..063d30474cf6 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -133,7 +133,6 @@ struct rt6_info {
 	/* more non-fragment space at head required */
 	unsigned short			rt6i_nfheader_len;
 	u8				rt6i_protocol;
-	struct lwtunnel_state		*rt6i_lwtstate;
 };
 
 static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index cfee53916ba5..843489884448 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -87,9 +87,7 @@ int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate);
 struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
 int lwtunnel_output(struct sock *sk, struct sk_buff *skb);
-int lwtunnel_output6(struct sock *sk, struct sk_buff *skb);
 int lwtunnel_input(struct sk_buff *skb);
-int lwtunnel_input6(struct sk_buff *skb);
 
 #else
 
@@ -164,21 +162,11 @@ static inline int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
 	return -EOPNOTSUPP;
 }
 
-static inline int lwtunnel_output6(struct sock *sk, struct sk_buff *skb)
-{
-	return -EOPNOTSUPP;
-}
-
 static inline int lwtunnel_input(struct sk_buff *skb)
 {
 	return -EOPNOTSUPP;
 }
 
-static inline int lwtunnel_input6(struct sk_buff *skb)
-{
-	return -EOPNOTSUPP;
-}
-
 #endif
 
 #endif /* __NET_LWTUNNEL_H */
diff --git a/include/net/route.h b/include/net/route.h
index 6dda2c1bf8c6..395d79bb556c 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -66,7 +66,6 @@ struct rtable {
 
 	struct list_head	rt_uncached;
 	struct uncached_list	*rt_uncached_list;
-	struct lwtunnel_state   *rt_lwtstate;
 };
 
 static inline bool rt_is_input_route(const struct rtable *rt)
-- 
cgit v1.2.3


From ab450605b35caa768ca33e86db9403229bf42be4 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Thu, 20 Aug 2015 13:56:27 +0200
Subject: ipv6: ndisc: inherit metadata dst when creating ndisc requests

If output device wants to see the dst, inherit the dst of the original skb
in the ndisc request.

This is an IPv6 counterpart of commit 0accfc268f4d ("arp: Inherit metadata
dst when creating ARP requests").

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ndisc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index b3a7751251b4..aba5695fadb0 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -182,7 +182,8 @@ int ndisc_rcv(struct sk_buff *skb);
 
 void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
 		   const struct in6_addr *solicit,
-		   const struct in6_addr *daddr, const struct in6_addr *saddr);
+		   const struct in6_addr *daddr, const struct in6_addr *saddr,
+		   struct sk_buff *oskb);
 
 void ndisc_send_rs(struct net_device *dev,
 		   const struct in6_addr *saddr, const struct in6_addr *daddr);
-- 
cgit v1.2.3


From 705cc62f6728c5a23e3c82465aa94e652e0b50e4 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Thu, 20 Aug 2015 13:56:28 +0200
Subject: vxlan: provide access function for vxlan socket address family

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/vxlan.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/net')

diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index e4534f1b2d8c..43677e6b9c43 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -241,3 +241,8 @@ static inline void vxlan_get_rx_port(struct net_device *netdev)
 }
 #endif
 #endif
+
+static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs)
+{
+	return vs->sock->sk->sk_family;
+}
-- 
cgit v1.2.3


From 904af04d30f303d96902584206457128c3051d8d Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Thu, 20 Aug 2015 13:56:31 +0200
Subject: ipv6: route: extend flow representation with tunnel key

Use flowi_tunnel in flowi6 similarly to what is done with IPv4.
This complements commit 1b7179d3adff ("route: Extend flow representation
with tunnel key").

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/flow.h b/include/net/flow.h
index f305588fc162..9e0297c4c11d 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -130,6 +130,7 @@ struct flowi6 {
 #define flowi6_proto		__fl_common.flowic_proto
 #define flowi6_flags		__fl_common.flowic_flags
 #define flowi6_secid		__fl_common.flowic_secid
+#define flowi6_tun_key		__fl_common.flowic_tun_key
 	struct in6_addr		daddr;
 	struct in6_addr		saddr;
 	__be32			flowlabel;
-- 
cgit v1.2.3


From e4ff67513096e6e196ca58043fce04d0f87babbe Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 26 Jul 2015 15:03:27 +0300
Subject: ipvs: add sync_maxlen parameter for the sync daemon

Allow setups with large MTU to send large sync packets by
adding sync_maxlen parameter. The default value is now based
on MTU but no more than 1500 for compatibility reasons.

To avoid problems if MTU changes allow fragmentation by
sending packets with DF=0. Problem reported by Dan Carpenter.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 4e3731ee4eac..2fdc13caf712 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -846,6 +846,13 @@ struct ipvs_master_sync_state {
 /* How much time to keep dests in trash */
 #define IP_VS_DEST_TRASH_PERIOD		(120 * HZ)
 
+struct ipvs_sync_daemon_cfg {
+	int			syncid;
+	u16			sync_maxlen;
+	/* multicast interface name */
+	char			mcast_ifn[IP_VS_IFNAME_MAXLEN];
+};
+
 /* IPVS in network namespace */
 struct netns_ipvs {
 	int			gen;		/* Generation */
@@ -961,15 +968,10 @@ struct netns_ipvs {
 	spinlock_t		sync_buff_lock;
 	struct task_struct	**backup_threads;
 	int			threads_mask;
-	int			send_mesg_maxlen;
-	int			recv_mesg_maxlen;
 	volatile int		sync_state;
-	volatile int		master_syncid;
-	volatile int		backup_syncid;
 	struct mutex		sync_mutex;
-	/* multicast interface name */
-	char			master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-	char			backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+	struct ipvs_sync_daemon_cfg	mcfg;	/* Master Configuration */
+	struct ipvs_sync_daemon_cfg	bcfg;	/* Backup Configuration */
 	/* net name space ptr */
 	struct net		*net;            /* Needed by timer routines */
 	/* Number of heterogeneous destinations, needed becaus heterogeneous
@@ -1408,7 +1410,8 @@ static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest)
 /* IPVS sync daemon data and function prototypes
  * (from ip_vs_sync.c)
  */
-int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid);
+int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *cfg,
+		      int state);
 int stop_sync_thread(struct net *net, int state);
 void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts);
 
-- 
cgit v1.2.3


From d33288172e72c4729e8b9f2243fb40601afabc8f Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 26 Jul 2015 15:03:28 +0300
Subject: ipvs: add more mcast parameters for the sync daemon

- mcast_group: configure the multicast address, now IPv6
is supported too

- mcast_port: configure the multicast port

- mcast_ttl: configure the multicast TTL/HOP_LIMIT

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 2fdc13caf712..9b9ca87a4210 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -847,8 +847,12 @@ struct ipvs_master_sync_state {
 #define IP_VS_DEST_TRASH_PERIOD		(120 * HZ)
 
 struct ipvs_sync_daemon_cfg {
+	union nf_inet_addr	mcast_group;
 	int			syncid;
 	u16			sync_maxlen;
+	u16			mcast_port;
+	u8			mcast_af;
+	u8			mcast_ttl;
 	/* multicast interface name */
 	char			mcast_ifn[IP_VS_IFNAME_MAXLEN];
 };
-- 
cgit v1.2.3


From 58ce31cca1ffe057f4744c3f671e3e84606d3d4a Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Wed, 19 Aug 2015 17:07:33 -0700
Subject: vxlan: GRO support at tunnel layer

Add calls to gro_cells infrastructure to do GRO when receiving on a tunnel.

Testing:

Ran 200 netperf TCP_STREAM instance

  - With fix (GRO enabled on VXLAN interface)

    Verify GRO is happening.

    9084 MBps tput
    3.44% CPU utilization

  - Without fix (GRO disabled on VXLAN interface)

    Verified no GRO is happening.

    9084 MBps tput
    5.54% CPU utilization

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/vxlan.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 43677e6b9c43..6b3234599a2c 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -161,6 +161,7 @@ struct vxlan_dev {
 	struct timer_list age_timer;
 	spinlock_t	  hash_lock;
 	unsigned int	  addrcnt;
+	struct gro_cells  gro_cells;
 
 	struct vxlan_config	cfg;
 
-- 
cgit v1.2.3


From 751a587ac9f9a8bf314590fbac32d9e418060c5a Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Fri, 21 Aug 2015 12:41:14 +0200
Subject: route: fix breakage after moving lwtunnel state

__recnt and related fields need to be in its own cacheline for performance
reasons. Commit 61adedf3e3f1 ("route: move lwtunnel state to dst_entry")
broke that on 32bit archs, causing BUILD_BUG_ON in dst_hold to be triggered.

This patch fixes the breakage by moving the lwtunnel state to the end of
dst_entry on 32bit archs. Unfortunately, this makes it share the cacheline
with __refcnt and may affect performance, thus further patches may be
needed.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Fixes: 61adedf3e3f1 ("route: move lwtunnel state to dst_entry")
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/dst.h b/include/net/dst.h
index 0a9a723f6c19..ef8f1d43a203 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -44,7 +44,6 @@ struct dst_entry {
 #else
 	void			*__pad1;
 #endif
-	struct lwtunnel_state   *lwtstate;
 	int			(*input)(struct sk_buff *);
 	int			(*output)(struct sock *sk, struct sk_buff *skb);
 
@@ -85,11 +84,12 @@ struct dst_entry {
 	__u32			__pad2;
 #endif
 
+#ifdef CONFIG_64BIT
+	struct lwtunnel_state   *lwtstate;
 	/*
 	 * Align __refcnt to a 64 bytes alignment
 	 * (L1_CACHE_SIZE would be too much)
 	 */
-#ifdef CONFIG_64BIT
 	long			__pad_to_align_refcnt[1];
 #endif
 	/*
@@ -99,6 +99,9 @@ struct dst_entry {
 	atomic_t		__refcnt;	/* client references	*/
 	int			__use;
 	unsigned long		lastuse;
+#ifndef CONFIG_64BIT
+	struct lwtunnel_state   *lwtstate;
+#endif
 	union {
 		struct dst_entry	*next;
 		struct rtable __rcu	*rt_next;
-- 
cgit v1.2.3


From 127eb7cd3c210afead788991a30950a9e36759ea Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Mon, 24 Aug 2015 09:45:41 -0700
Subject: lwt: Add cfg argument to build_state

Add cfg and family arguments to lwt build state functions. cfg is a void
pointer and will either be a pointer to a fib_config or fib6_config
structure. The family parameter indicates which one (either AF_INET
or AF_INET6).

LWT encpasulation implementation may use the fib configuration to build
the LWT state.

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/lwtunnel.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 843489884448..fce0e35e74d0 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -26,6 +26,7 @@ struct lwtunnel_state {
 
 struct lwtunnel_encap_ops {
 	int (*build_state)(struct net_device *dev, struct nlattr *encap,
+			   unsigned int family, const void *cfg,
 			   struct lwtunnel_state **ts);
 	int (*output)(struct sock *sk, struct sk_buff *skb);
 	int (*input)(struct sk_buff *skb);
@@ -80,6 +81,7 @@ int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
 			   unsigned int num);
 int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
 			 struct nlattr *encap,
+			 unsigned int family, const void *cfg,
 			 struct lwtunnel_state **lws);
 int lwtunnel_fill_encap(struct sk_buff *skb,
 			struct lwtunnel_state *lwtstate);
@@ -130,6 +132,7 @@ static inline int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
 
 static inline int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
 				       struct nlattr *encap,
+				       unsigned int family, const void *cfg,
 				       struct lwtunnel_state **lws)
 {
 	return -EOPNOTSUPP;
-- 
cgit v1.2.3


From 6f021c62d64f38092bc2a0c5fe7b81d5e5b21a00 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Aug 2015 12:30:00 -0700
Subject: tcp: fix slow start after idle vs TSO/GSO

slow start after idle might reduce cwnd, but we perform this
after first packet was cooked and sent.

With TSO/GSO, it means that we might send a full TSO packet
even if cwnd should have been reduced to IW10.

Moving the SSAI check in skb_entail() makes sense, because
we slightly reduce number of times this check is done,
especially for large send() and TCP Small queue callbacks from
softirq context.

As Neal pointed out, we also need to perform the check
if/when receive window opens.

Tested:

Following packetdrill test demonstrates the problem
// Test of slow start after idle

`sysctl -q net.ipv4.tcp_slow_start_after_idle=1`

0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0    setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0    bind(3, ..., ...) = 0
+0    listen(3, 1) = 0

+0    < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0    > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 6>
+.100 < . 1:1(0) ack 1 win 511
+0    accept(3, ..., ...) = 4
+0    setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0

+0    write(4, ..., 26000) = 26000
+0    > . 1:5001(5000) ack 1
+0    > . 5001:10001(5000) ack 1
+0    %{ assert tcpi_snd_cwnd == 10 }%

+.100 < . 1:1(0) ack 10001 win 511
+0    %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
+0    > . 10001:20001(10000) ack 1
+0    > P. 20001:26001(6000) ack 1

+.100 < . 1:1(0) ack 26001 win 511
+0    %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%

+4 write(4, ..., 20000) = 20000
// If slow start after idle works properly, we should send 5 MSS here (cwnd/2)
+0    > . 26001:31001(5000) ack 1
+0    %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+0    > . 31001:36001(5000) ack 1

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 364426a2be5a..309801f7eb82 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1165,6 +1165,19 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
 }
 
 u32 tcp_default_init_rwnd(u32 mss);
+void tcp_cwnd_restart(struct sock *sk, s32 delta);
+
+static inline void tcp_slow_start_after_idle_check(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	s32 delta;
+
+	if (!sysctl_tcp_slow_start_after_idle || tp->packets_out)
+		return;
+	delta = tcp_time_stamp - tp->lsndtime;
+	if (delta > inet_csk(sk)->icsk_rto)
+		tcp_cwnd_restart(sk, delta);
+}
 
 /* Determine a window scaling and initial window to offer. */
 void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd,
-- 
cgit v1.2.3


From 43e122b014c955a33220fabbd09c4b5e4f422c3c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Aug 2015 17:38:02 -0700
Subject: tcp: refine pacing rate determination

When TCP pacing was added back in linux-3.12, we chose
to apply a fixed ratio of 200 % against current rate,
to allow probing for optimal throughput even during
slow start phase, where cwnd can be doubled every other gRTT.

At Google, we found it was better applying a different ratio
while in Congestion Avoidance phase.
This ratio was set to 120 %.

We've used the normal tcp_in_slow_start() helper for a while,
then tuned the condition to select the conservative ratio
as soon as cwnd >= ssthresh/2 :

- After cwnd reduction, it is safer to ramp up more slowly,
  as we approach optimal cwnd.
- Initial ramp up (ssthresh == INFINITY) still allows doubling
  cwnd every other RTT.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 309801f7eb82..4a7b03947a38 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -281,6 +281,8 @@ extern unsigned int sysctl_tcp_notsent_lowat;
 extern int sysctl_tcp_min_tso_segs;
 extern int sysctl_tcp_autocorking;
 extern int sysctl_tcp_invalid_ratelimit;
+extern int sysctl_tcp_pacing_ss_ratio;
+extern int sysctl_tcp_pacing_ca_ratio;
 
 extern atomic_long_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
-- 
cgit v1.2.3


From 2c0027cd54cc3ed856e87d9aeddb6ef00f5f17f4 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Sun, 23 Aug 2015 08:21:22 -0600
Subject: inetpeer: remove dead code

Remove various inlined functions not referenced in the kernel.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h | 67 --------------------------------------------------
 1 file changed, 67 deletions(-)

(limited to 'include/net')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index d5332ddcea3f..002f0bd27001 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -65,71 +65,12 @@ struct inet_peer_base {
 	int			total;
 };
 
-#define INETPEER_BASE_BIT	0x1UL
-
-static inline struct inet_peer *inetpeer_ptr(unsigned long val)
-{
-	BUG_ON(val & INETPEER_BASE_BIT);
-	return (struct inet_peer *) val;
-}
-
-static inline struct inet_peer_base *inetpeer_base_ptr(unsigned long val)
-{
-	if (!(val & INETPEER_BASE_BIT))
-		return NULL;
-	val &= ~INETPEER_BASE_BIT;
-	return (struct inet_peer_base *) val;
-}
-
-static inline bool inetpeer_ptr_is_peer(unsigned long val)
-{
-	return !(val & INETPEER_BASE_BIT);
-}
-
-static inline void __inetpeer_ptr_set_peer(unsigned long *val, struct inet_peer *peer)
-{
-	/* This implicitly clears INETPEER_BASE_BIT */
-	*val = (unsigned long) peer;
-}
-
-static inline bool inetpeer_ptr_set_peer(unsigned long *ptr, struct inet_peer *peer)
-{
-	unsigned long val = (unsigned long) peer;
-	unsigned long orig = *ptr;
-
-	if (!(orig & INETPEER_BASE_BIT) ||
-	    cmpxchg(ptr, orig, val) != orig)
-		return false;
-	return true;
-}
-
-static inline void inetpeer_init_ptr(unsigned long *ptr, struct inet_peer_base *base)
-{
-	*ptr = (unsigned long) base | INETPEER_BASE_BIT;
-}
-
-static inline void inetpeer_transfer_peer(unsigned long *to, unsigned long *from)
-{
-	unsigned long val = *from;
-
-	*to = val;
-	if (inetpeer_ptr_is_peer(val)) {
-		struct inet_peer *peer = inetpeer_ptr(val);
-		atomic_inc(&peer->refcnt);
-	}
-}
-
 void inet_peer_base_init(struct inet_peer_base *);
 
 void inet_initpeers(void) __init;
 
 #define INETPEER_METRICS_NEW	(~(u32) 0)
 
-static inline bool inet_metrics_new(const struct inet_peer *p)
-{
-	return p->metrics[RTAX_LOCK-1] == INETPEER_METRICS_NEW;
-}
-
 /* can be called with or without local BH being disabled */
 struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 			       const struct inetpeer_addr *daddr,
@@ -163,12 +104,4 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout);
 
 void inetpeer_invalidate_tree(struct inet_peer_base *);
 
-/*
- * temporary check to make sure we dont access rid, tcp_ts,
- * tcp_ts_stamp if no refcount is taken on inet_peer
- */
-static inline void inet_peer_refcheck(const struct inet_peer *p)
-{
-	WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0);
-}
 #endif /* _NET_INETPEER_H */
-- 
cgit v1.2.3


From 48e92c44bd73a8bc213560058e6b18e45929526e Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Tue, 25 Aug 2015 18:36:50 +0200
Subject: vxlan: fix multiple inclusion of vxlan.h

The vxlan_get_sk_family inline function was added after the last #endif,
making multiple inclusion of net/vxlan.h fail. Move it to the proper place.

Reported-by: Mark Rustad <mark.d.rustad@intel.com>
Fixes: 705cc62f6728c ("vxlan: provide access function for vxlan socket address family")
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/vxlan.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 6b3234599a2c..480a319b4c92 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -241,9 +241,10 @@ static inline void vxlan_get_rx_port(struct net_device *netdev)
 {
 }
 #endif
-#endif
 
 static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs)
 {
 	return vs->sock->sk->sk_family;
 }
+
+#endif
-- 
cgit v1.2.3


From 3c645621b79828be7a46fb2694eb423b343b4bbe Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Tue, 25 Aug 2015 20:06:31 -0700
Subject: net_sched: make tcf_hash_destroy() static

tcf_hash_destroy() used once. Make it static.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 4519c81304bd..9d446f136607 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -111,7 +111,6 @@ struct tc_action_ops {
 };
 
 int tcf_hash_search(struct tc_action *a, u32 index);
-void tcf_hash_destroy(struct tc_action *a);
 u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo);
 int tcf_hash_check(u32 index, struct tc_action *a, int bind);
 int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
-- 
cgit v1.2.3


From cff82457c5584f6a96d2b85d1a88b81ba304a330 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Tue, 25 Aug 2015 20:06:35 -0700
Subject: net_sched: act_bpf: remove spinlock in fast path

Similar to act_gact/act_mirred, act_bpf can be lockless in packet processing
with extra care taken to free bpf programs after rcu grace period.
Replacement of existing act_bpf (very rare) is done with synchronize_rcu()
and final destruction is done from tc_action_ops->cleanup() callback that is
called from tcf_exts_destroy()->tcf_action_destroy()->__tcf_hash_release() when
bind and refcnt reach zero which is only possible when classifier is destroyed.
Previous two patches fixed the last two classifiers (tcindex and rsvp) to
call tcf_exts_destroy() from rcu callback.

Similar to gact/mirred there is a race between prog->filter and
prog->tcf_action. Meaning that the program being replaced may use
previous default action if it happened to return TC_ACT_UNSPEC.
act_mirred race betwen tcf_action and tcfm_dev is similar.
In all cases the race is harmless.
Long term we may want to improve the situation by replacing the whole
tc_action->priv as single pointer instead of updating inner fields one by one.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_bpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/tc_act/tc_bpf.h b/include/net/tc_act/tc_bpf.h
index a152e9858b2c..958d69cfb19c 100644
--- a/include/net/tc_act/tc_bpf.h
+++ b/include/net/tc_act/tc_bpf.h
@@ -15,7 +15,7 @@
 
 struct tcf_bpf {
 	struct tcf_common	common;
-	struct bpf_prog		*filter;
+	struct bpf_prog __rcu	*filter;
 	union {
 		u32		bpf_fd;
 		u16		bpf_num_ops;
-- 
cgit v1.2.3


From e79e259588a414589a016edc428ee8dd308f81ad Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Wed, 26 Aug 2015 11:31:47 -0700
Subject: dst: Add __skb_dst_copy() variation

This variation on skb_dst_copy() doesn't require two skbs.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/dst.h b/include/net/dst.h
index ef8f1d43a203..4c4801645371 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -289,13 +289,18 @@ static inline void skb_dst_drop(struct sk_buff *skb)
 	}
 }
 
-static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb)
+static inline void __skb_dst_copy(struct sk_buff *nskb, unsigned long refdst)
 {
-	nskb->_skb_refdst = oskb->_skb_refdst;
+	nskb->_skb_refdst = refdst;
 	if (!(nskb->_skb_refdst & SKB_DST_NOREF))
 		dst_clone(skb_dst(nskb));
 }
 
+static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb)
+{
+	__skb_dst_copy(nskb, oskb->_skb_refdst);
+}
+
 /**
  * skb_dst_force - makes sure skb dst is refcounted
  * @skb: buffer
-- 
cgit v1.2.3


From 86ca02e77408bb58ba596c1a411ec7f631733690 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Wed, 26 Aug 2015 11:31:51 -0700
Subject: netfilter: connlabels: Export setting connlabel length

Add functions to change connlabel length into nf_conntrack_labels.c so
they may be reused by other modules like OVS and nftables without
needing to jump through xt_match_check() hoops.

Suggested-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Florian Westphal <fw@strlen.de>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_conntrack_labels.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h
index dec6336bf850..7e2b1d025f50 100644
--- a/include/net/netfilter/nf_conntrack_labels.h
+++ b/include/net/netfilter/nf_conntrack_labels.h
@@ -54,7 +54,11 @@ int nf_connlabels_replace(struct nf_conn *ct,
 #ifdef CONFIG_NF_CONNTRACK_LABELS
 int nf_conntrack_labels_init(void);
 void nf_conntrack_labels_fini(void);
+int nf_connlabels_get(struct net *net, unsigned int n_bits);
+void nf_connlabels_put(struct net *net);
 #else
 static inline int nf_conntrack_labels_init(void) { return 0; }
 static inline void nf_conntrack_labels_fini(void) {}
+static inline int nf_connlabels_get(struct net *net, unsigned int n_bits) { return 0; }
+static inline void nf_connlabels_put(struct net *net) {}
 #endif
-- 
cgit v1.2.3


From 3b3ae880266d148bf73a573a766bc9b78c08d805 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 26 Aug 2015 23:00:06 +0200
Subject: net: sched: consolidate tc_classify{,_compat}

For classifiers getting invoked via tc_classify(), we always need an
extra function call into tc_classify_compat(), as both are being
exported as symbols and tc_classify() itself doesn't do much except
handling of reclassifications when tp->classify() returned with
TC_ACT_RECLASSIFY.

CBQ and ATM are the only qdiscs that directly call into tc_classify_compat(),
all others use tc_classify(). When tc actions are being configured
out in the kernel, tc_classify() effectively does nothing besides
delegating.

We could spare this layer and consolidate both functions. pktgen on
single CPU constantly pushing skbs directly into the netif_receive_skb()
path with a dummy classifier on ingress qdisc attached, improves
slightly from 22.3Mpps to 23.1Mpps.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 2342bf12cb78..401038d2f9b8 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -110,10 +110,8 @@ static inline void qdisc_run(struct Qdisc *q)
 		__qdisc_run(q);
 }
 
-int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
-		       struct tcf_result *res);
 int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
-		struct tcf_result *res);
+		struct tcf_result *res, bool compat_mode);
 
 static inline __be16 tc_skb_protocol(const struct sk_buff *skb)
 {
-- 
cgit v1.2.3


From c29a70d2cadfea443c027d23481f820530b70057 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Wed, 26 Aug 2015 23:46:50 -0700
Subject: tunnel: introduce udp_tun_rx_dst()

Introduce function udp_tun_rx_dst() to initialize tunnel dst on
receive path.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Reviewed-by: Jesse Gross <jesse@nicira.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst_metadata.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++
 include/net/udp_tunnel.h   |  4 +++
 2 files changed, 65 insertions(+)

(limited to 'include/net')

diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 2cb52d562272..60c03326c087 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -48,4 +48,65 @@ static inline bool skb_valid_dst(const struct sk_buff *skb)
 struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags);
 struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags);
 
+static inline struct metadata_dst *tun_rx_dst(__be16 flags,
+					      __be64 tunnel_id, int md_size)
+{
+	struct metadata_dst *tun_dst;
+	struct ip_tunnel_info *info;
+
+	tun_dst = metadata_dst_alloc(md_size, GFP_ATOMIC);
+	if (!tun_dst)
+		return NULL;
+
+	info = &tun_dst->u.tun_info;
+	info->mode = IP_TUNNEL_INFO_RX;
+	info->key.tun_flags = flags;
+	info->key.tun_id = tunnel_id;
+	info->key.tp_src = 0;
+	info->key.tp_dst = 0;
+	return tun_dst;
+}
+
+static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb,
+						 __be16 flags,
+						 __be64 tunnel_id,
+						 int md_size)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	struct metadata_dst *tun_dst;
+	struct ip_tunnel_info *info;
+
+	tun_dst = tun_rx_dst(flags, tunnel_id, md_size);
+	if (!tun_dst)
+		return NULL;
+
+	info = &tun_dst->u.tun_info;
+	info->key.u.ipv4.src = iph->saddr;
+	info->key.u.ipv4.dst = iph->daddr;
+	info->key.tos = iph->tos;
+	info->key.ttl = iph->ttl;
+	return tun_dst;
+}
+
+static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb,
+						 __be16 flags,
+						 __be64 tunnel_id,
+						 int md_size)
+{
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	struct metadata_dst *tun_dst;
+	struct ip_tunnel_info *info;
+
+	tun_dst = tun_rx_dst(flags, tunnel_id, md_size);
+	if (!tun_dst)
+		return NULL;
+
+	info = &tun_dst->u.tun_info;
+	info->key.u.ipv6.src = ip6h->saddr;
+	info->key.u.ipv6.dst = ip6h->daddr;
+	info->key.tos = ipv6_get_dsfield(ip6h);
+	info->key.ttl = ip6h->hop_limit;
+	return tun_dst;
+}
+
 #endif /* __NET_DST_METADATA_H */
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index c491c1221606..35041d0fc21e 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -93,6 +93,10 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
 
 void udp_tunnel_sock_release(struct socket *sock);
 
+struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family,
+				    __be16 flags, __be64 tunnel_id,
+				    int md_size);
+
 static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb,
 							 bool udp_csum)
 {
-- 
cgit v1.2.3


From e305ac6cf5a1e1386aedce7ef9cb773635d5845c Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Wed, 26 Aug 2015 23:46:52 -0700
Subject: geneve: Add support to collect tunnel metadata.

Following patch create new tunnel flag which enable
tunnel metadata collection on given device. These devices
can be used by tunnel metadata based routing or by OVS.
Geneve Consolidation patch get rid of collect_md_tun to
simplify tunnel lookup further.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Reviewed-by: Jesse Gross <jesse@nicira.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/geneve.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/geneve.h b/include/net/geneve.h
index 2a0543a1899d..4245e1d23b9b 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -96,6 +96,9 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 		    __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port,
 		    __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt,
 		    bool csum, bool xnet);
+
+struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
+					u8 name_assign_type, u16 dst_port);
 #endif /*ifdef CONFIG_INET */
 
 #endif /*ifdef__NET_GENEVE_H */
-- 
cgit v1.2.3


From 371bd1061d29562e6423435073623add8c475ee2 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Wed, 26 Aug 2015 23:46:54 -0700
Subject: geneve: Consolidate Geneve functionality in single module.

geneve_core module handles send and receive functionality.
This way OVS could use the Geneve API. Now with use of
tunnel meatadata mode OVS can directly use Geneve netdevice.
So there is no need for separate module for Geneve. Following
patch consolidates Geneve protocol processing in single module.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Reviewed-by: Jesse Gross <jesse@nicira.com>
Acked-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/geneve.h | 34 ----------------------------------
 1 file changed, 34 deletions(-)

(limited to 'include/net')

diff --git a/include/net/geneve.h b/include/net/geneve.h
index 4245e1d23b9b..3106ed6eae0d 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -62,41 +62,7 @@ struct genevehdr {
 	struct geneve_opt options[];
 };
 
-static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
-{
-	return (struct genevehdr *)(udp_hdr(skb) + 1);
-}
-
 #ifdef CONFIG_INET
-struct geneve_sock;
-
-typedef void (geneve_rcv_t)(struct geneve_sock *gs, struct sk_buff *skb);
-
-struct geneve_sock {
-	struct list_head	list;
-	geneve_rcv_t		*rcv;
-	void			*rcv_data;
-	struct socket		*sock;
-	struct rcu_head		rcu;
-	int			refcnt;
-	struct udp_offload	udp_offloads;
-};
-
-#define GENEVE_VER 0
-#define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
-
-struct geneve_sock *geneve_sock_add(struct net *net, __be16 port,
-				    geneve_rcv_t *rcv, void *data,
-				    bool no_share, bool ipv6);
-
-void geneve_sock_release(struct geneve_sock *vs);
-
-int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
-		    struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos,
-		    __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port,
-		    __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt,
-		    bool csum, bool xnet);
-
 struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
 					u8 name_assign_type, u16 dst_port);
 #endif /*ifdef CONFIG_INET */
-- 
cgit v1.2.3


From d66d6c3152e8d5a6db42a56bf7ae1c6cae87ba48 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Thu, 27 Aug 2015 21:21:38 +0200
Subject: net: sched: register noqueue qdisc

This way users can attach noqueue just like any other qdisc using tc
without having to mess with tx_queue_len first.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 2eab08c38e32..444faa89a55f 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -340,6 +340,7 @@ extern struct Qdisc noop_qdisc;
 extern struct Qdisc_ops noop_qdisc_ops;
 extern struct Qdisc_ops pfifo_fast_ops;
 extern struct Qdisc_ops mq_qdisc_ops;
+extern struct Qdisc_ops noqueue_qdisc_ops;
 extern const struct Qdisc_ops *default_qdisc_ops;
 
 struct Qdisc_class_common {
-- 
cgit v1.2.3


From 72afa352d6a3d4da7783b5ddee02b94be49e051a Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Thu, 27 Aug 2015 16:06:59 -0700
Subject: net: Introduce ipv4_addr_hash and use it for tcp metrics

Refactors a common line into helper function.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip.h b/include/net/ip.h
index bee5f3582e38..7b9e1c782aa3 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -458,6 +458,11 @@ static __inline__ void inet_reset_saddr(struct sock *sk)
 
 #endif
 
+static inline unsigned int ipv4_addr_hash(__be32 ip)
+{
+	return (__force unsigned int) ip;
+}
+
 bool ip_call_ra_chain(struct sk_buff *skb);
 
 /*
-- 
cgit v1.2.3


From 3abef286cf2f138de353fb0b54453621de961043 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Thu, 27 Aug 2015 16:07:00 -0700
Subject: net: Add set,get helpers for inetpeer addresses

Use inetpeer set,get helpers in tcp_metrics rather than peeking into
the inetpeer_addr struct.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include/net')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 002f0bd27001..f75b9e7036a2 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -71,6 +71,29 @@ void inet_initpeers(void) __init;
 
 #define INETPEER_METRICS_NEW	(~(u32) 0)
 
+static inline void inetpeer_set_addr_v4(struct inetpeer_addr *iaddr, __be32 ip)
+{
+	iaddr->addr.a4 = ip;
+	iaddr->family = AF_INET;
+}
+
+static inline __be32 inetpeer_get_addr_v4(struct inetpeer_addr *iaddr)
+{
+	return iaddr->addr.a4;
+}
+
+static inline void inetpeer_set_addr_v6(struct inetpeer_addr *iaddr,
+					struct in6_addr *in6)
+{
+	iaddr->addr.in6 = *in6;
+	iaddr->family = AF_INET6;
+}
+
+static inline struct in6_addr *inetpeer_get_addr_v6(struct inetpeer_addr *iaddr)
+{
+	return &iaddr->addr.in6;
+}
+
 /* can be called with or without local BH being disabled */
 struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 			       const struct inetpeer_addr *daddr,
-- 
cgit v1.2.3


From d39d14ffa24cca9f0e44aa4a63315f4c44c56a93 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Thu, 27 Aug 2015 16:07:01 -0700
Subject: net: Add helper function to compare inetpeer addresses

tcp_metrics and inetpeer both have functions to compare inetpeer
addresses. Consolidate into 1 version.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/net')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index f75b9e7036a2..9d9b3446731d 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -121,6 +121,22 @@ static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base,
 	return inet_getpeer(base, &daddr, create);
 }
 
+static inline int inetpeer_addr_cmp(const struct inetpeer_addr *a,
+				    const struct inetpeer_addr *b)
+{
+	int i, n = (a->family == AF_INET ? 1 : 4);
+
+	for (i = 0; i < n; i++) {
+		if (a->addr.a6[i] == b->addr.a6[i])
+			continue;
+		if ((__force u32)a->addr.a6[i] < (__force u32)b->addr.a6[i])
+			return -1;
+		return 1;
+	}
+
+	return 0;
+}
+
 /* can be called from BH context or outside */
 void inet_putpeer(struct inet_peer *p);
 bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout);
-- 
cgit v1.2.3


From 5345c2e12d41f815c1009c9dee72f3d5fcfd4282 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Thu, 27 Aug 2015 16:07:02 -0700
Subject: net: Refactor inetpeer address struct

Move the inetpeer_addr_base union to inetpeer_addr and drop
inetpeer_addr_base.

Both the a6 and in6_addr overlays are not needed; drop the __be32 version
and rename in6 to a6 for consistency with ipv4. Add a new u32 array to
the union which removes the need for the typecast in the compare function
and the use of a consistent arg for both ipv4 and ipv6 addresses which
makes the compare function more readable.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h | 35 +++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)

(limited to 'include/net')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 9d9b3446731d..e34f98aa93b1 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -15,16 +15,14 @@
 #include <net/ipv6.h>
 #include <linux/atomic.h>
 
-struct inetpeer_addr_base {
+#define INETPEER_MAXKEYSZ   (sizeof(struct in6_addr) / sizeof(u32))
+
+struct inetpeer_addr {
 	union {
 		__be32			a4;
-		__be32			a6[4];
-		struct in6_addr		in6;
+		struct in6_addr		a6;
+		u32			key[INETPEER_MAXKEYSZ];
 	};
-};
-
-struct inetpeer_addr {
-	struct inetpeer_addr_base	addr;
 	__u16				family;
 };
 
@@ -73,25 +71,25 @@ void inet_initpeers(void) __init;
 
 static inline void inetpeer_set_addr_v4(struct inetpeer_addr *iaddr, __be32 ip)
 {
-	iaddr->addr.a4 = ip;
+	iaddr->a4 = ip;
 	iaddr->family = AF_INET;
 }
 
 static inline __be32 inetpeer_get_addr_v4(struct inetpeer_addr *iaddr)
 {
-	return iaddr->addr.a4;
+	return iaddr->a4;
 }
 
 static inline void inetpeer_set_addr_v6(struct inetpeer_addr *iaddr,
 					struct in6_addr *in6)
 {
-	iaddr->addr.in6 = *in6;
+	iaddr->a6 = *in6;
 	iaddr->family = AF_INET6;
 }
 
 static inline struct in6_addr *inetpeer_get_addr_v6(struct inetpeer_addr *iaddr)
 {
-	return &iaddr->addr.in6;
+	return &iaddr->a6;
 }
 
 /* can be called with or without local BH being disabled */
@@ -105,7 +103,7 @@ static inline struct inet_peer *inet_getpeer_v4(struct inet_peer_base *base,
 {
 	struct inetpeer_addr daddr;
 
-	daddr.addr.a4 = v4daddr;
+	daddr.a4 = v4daddr;
 	daddr.family = AF_INET;
 	return inet_getpeer(base, &daddr, create);
 }
@@ -116,7 +114,7 @@ static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base,
 {
 	struct inetpeer_addr daddr;
 
-	daddr.addr.in6 = *v6daddr;
+	daddr.a6 = *v6daddr;
 	daddr.family = AF_INET6;
 	return inet_getpeer(base, &daddr, create);
 }
@@ -124,12 +122,17 @@ static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base,
 static inline int inetpeer_addr_cmp(const struct inetpeer_addr *a,
 				    const struct inetpeer_addr *b)
 {
-	int i, n = (a->family == AF_INET ? 1 : 4);
+	int i, n;
+
+	if (a->family == AF_INET)
+		n = sizeof(a->a4) / sizeof(u32);
+	else
+		n = sizeof(a->a6) / sizeof(u32);
 
 	for (i = 0; i < n; i++) {
-		if (a->addr.a6[i] == b->addr.a6[i])
+		if (a->key[i] == b->key[i])
 			continue;
-		if ((__force u32)a->addr.a6[i] < (__force u32)b->addr.a6[i])
+		if (a->key[i] < b->key[i])
 			return -1;
 		return 1;
 	}
-- 
cgit v1.2.3


From 192132b9a034d87566294be0fba5f8f75c2cf16b Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Thu, 27 Aug 2015 16:07:03 -0700
Subject: net: Add support for VRFs to inetpeer cache

inetpeer caches based on address only, so duplicate IP addresses within
a namespace return the same cached entry. Enhance the ipv4 address key
to contain both the IPv4 address and VRF device index.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index e34f98aa93b1..4a6009d4486b 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -15,11 +15,17 @@
 #include <net/ipv6.h>
 #include <linux/atomic.h>
 
+/* IPv4 address key for cache lookups */
+struct ipv4_addr_key {
+	__be32	addr;
+	int	vif;
+};
+
 #define INETPEER_MAXKEYSZ   (sizeof(struct in6_addr) / sizeof(u32))
 
 struct inetpeer_addr {
 	union {
-		__be32			a4;
+		struct ipv4_addr_key	a4;
 		struct in6_addr		a6;
 		u32			key[INETPEER_MAXKEYSZ];
 	};
@@ -71,13 +77,13 @@ void inet_initpeers(void) __init;
 
 static inline void inetpeer_set_addr_v4(struct inetpeer_addr *iaddr, __be32 ip)
 {
-	iaddr->a4 = ip;
+	iaddr->a4.addr = ip;
 	iaddr->family = AF_INET;
 }
 
 static inline __be32 inetpeer_get_addr_v4(struct inetpeer_addr *iaddr)
 {
-	return iaddr->a4;
+	return iaddr->a4.addr;
 }
 
 static inline void inetpeer_set_addr_v6(struct inetpeer_addr *iaddr,
@@ -99,11 +105,12 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 
 static inline struct inet_peer *inet_getpeer_v4(struct inet_peer_base *base,
 						__be32 v4daddr,
-						int create)
+						int vif, int create)
 {
 	struct inetpeer_addr daddr;
 
-	daddr.a4 = v4daddr;
+	daddr.a4.addr = v4daddr;
+	daddr.a4.vif = vif;
 	daddr.family = AF_INET;
 	return inet_getpeer(base, &daddr, create);
 }
-- 
cgit v1.2.3


From 46fa062ad63146dd138ec0f017e71224471e8ea5 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Fri, 28 Aug 2015 20:48:19 +0200
Subject: ip_tunnels: convert the mode field of ip_tunnel_info to flags

The mode field holds a single bit of information only (whether the
ip_tunnel_info struct is for rx or tx). Change the mode field to bit flags.
This allows more mode flags to be added.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst_metadata.h | 1 -
 include/net/ip_tunnels.h   | 9 ++-------
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/net')

diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 60c03326c087..2b83f0d232e0 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -59,7 +59,6 @@ static inline struct metadata_dst *tun_rx_dst(__be16 flags,
 		return NULL;
 
 	info = &tun_dst->u.tun_info;
-	info->mode = IP_TUNNEL_INFO_RX;
 	info->key.tun_flags = flags;
 	info->key.tun_id = tunnel_id;
 	info->key.tp_src = 0;
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 224e4ecec91b..9bdb3948798f 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -50,13 +50,8 @@ struct ip_tunnel_key {
 	__be16			tp_dst;
 };
 
-/* Indicates whether the tunnel info structure represents receive
- * or transmit tunnel parameters.
- */
-enum {
-	IP_TUNNEL_INFO_RX,
-	IP_TUNNEL_INFO_TX,
-};
+/* Flags for ip_tunnel_info mode. */
+#define IP_TUNNEL_INFO_TX	0x01	/* represents tx tunnel parameters */
 
 struct ip_tunnel_info {
 	struct ip_tunnel_key	key;
-- 
cgit v1.2.3


From 7f9562a1f405306eacb97f95d78cb996e33f27f5 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Fri, 28 Aug 2015 20:48:20 +0200
Subject: ip_tunnels: record IP version in tunnel info

There's currently nothing preventing directing packets with IPv6
encapsulation data to IPv4 tunnels (and vice versa). If this happens,
IPv6 addresses are incorrectly interpreted as IPv4 ones.

Track whether the given ip_tunnel_key contains IPv4 or IPv6 data. Store this
in ip_tunnel_info. Reject packets at appropriate places if they are supposed
to be encapsulated into an incompatible protocol.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst_metadata.h |  1 +
 include/net/ip_tunnels.h   | 10 ++++++++++
 2 files changed, 11 insertions(+)

(limited to 'include/net')

diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 2b83f0d232e0..d32f49cc621d 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -105,6 +105,7 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb,
 	info->key.u.ipv6.dst = ip6h->daddr;
 	info->key.tos = ipv6_get_dsfield(ip6h);
 	info->key.ttl = ip6h->hop_limit;
+	info->mode = IP_TUNNEL_INFO_IPV6;
 	return tun_dst;
 }
 
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 9bdb3948798f..2b4fa06e91bd 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -4,6 +4,7 @@
 #include <linux/if_tunnel.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/socket.h>
 #include <linux/types.h>
 #include <linux/u64_stats_sync.h>
 #include <net/dsfield.h>
@@ -52,6 +53,7 @@ struct ip_tunnel_key {
 
 /* Flags for ip_tunnel_info mode. */
 #define IP_TUNNEL_INFO_TX	0x01	/* represents tx tunnel parameters */
+#define IP_TUNNEL_INFO_IPV6	0x02	/* key contains IPv6 addresses */
 
 struct ip_tunnel_info {
 	struct ip_tunnel_key	key;
@@ -208,6 +210,8 @@ static inline void __ip_tunnel_info_init(struct ip_tunnel_info *tun_info,
 
 	tun_info->options = opts;
 	tun_info->options_len = opts_len;
+
+	tun_info->mode = 0;
 }
 
 static inline void ip_tunnel_info_init(struct ip_tunnel_info *tun_info,
@@ -221,6 +225,12 @@ static inline void ip_tunnel_info_init(struct ip_tunnel_info *tun_info,
 			      tun_id, tun_flags, opts, opts_len);
 }
 
+static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info
+					       *tun_info)
+{
+	return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET;
+}
+
 #ifdef CONFIG_INET
 
 int ip_tunnel_init(struct net_device *dev);
-- 
cgit v1.2.3


From a43a9ef6a2e510fec61176ff2c34fab3e7d581da Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Fri, 28 Aug 2015 20:48:22 +0200
Subject: vxlan: do not receive IPv4 packets on IPv6 socket

By default (subject to the sysctl settings), IPv6 sockets listen also for
IPv4 traffic. Vxlan is not prepared for that and expects IPv6 header in
packets received through an IPv6 socket.

In addition, it's currently not possible to have both IPv4 and IPv6 vxlan
tunnel on the same port (unless bindv6only sysctl is enabled), as it's not
possible to create and bind both IPv4 and IPv6 vxlan interfaces and there's
no way to specify both IPv4 and IPv6 remote/group IP addresses.

Set IPV6_V6ONLY on vxlan sockets to fix both of these issues. This is not
done globally in udp_tunnel, as l2tp and tipc seems to work okay when
receiving IPv4 packets on IPv6 socket and people may rely on this behavior.
The other tunnels (geneve and fou) do not support IPv6.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp_tunnel.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 35041d0fc21e..cb2f89f20f5c 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -31,7 +31,8 @@ struct udp_port_cfg {
 	__be16			peer_udp_port;
 	unsigned int		use_udp_checksums:1,
 				use_udp6_tx_checksums:1,
-				use_udp6_rx_checksums:1;
+				use_udp6_rx_checksums:1,
+				ipv6_v6only:1;
 };
 
 int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
-- 
cgit v1.2.3


From c4c6bc314618f60ba69b0cbf93e506e4c38a11d2 Mon Sep 17 00:00:00 2001
From: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Date: Sun, 30 Aug 2015 11:29:41 +0530
Subject: net: Introduce helper functions to get the per cpu data

Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 7b9e1c782aa3..9b9ca2839399 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -202,10 +202,20 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
 #define NET_ADD_STATS_BH(net, field, adnd) SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd)
 #define NET_ADD_STATS_USER(net, field, adnd) SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd)
 
+u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offct);
 unsigned long snmp_fold_field(void __percpu *mib, int offt);
 #if BITS_PER_LONG==32
+u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offct,
+			 size_t syncp_offset);
 u64 snmp_fold_field64(void __percpu *mib, int offt, size_t sync_off);
 #else
+static inline u64  snmp_get_cpu_field64(void __percpu *mib, int cpu, int offct,
+					size_t syncp_offset)
+{
+	return snmp_get_cpu_field(mib, cpu, offct);
+
+}
+
 static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_off)
 {
 	return snmp_fold_field(mib, offt);
-- 
cgit v1.2.3


From 4c22279848c531fc7f555d463daf3d0df963bd41 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Sun, 30 Aug 2015 18:09:38 -0700
Subject: ip-tunnel: Use API to access tunnel metadata options.

Currently tun-info options pointer is used in few cases to
pass options around. But tunnel options can be accessed using
ip_tunnel_info_opts() API without using the pointer. Following
patch removes the redundant pointer and consistently make use
of API.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Reviewed-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst_metadata.h | 31 ++++++++++-----------
 include/net/ip_tunnels.h   | 67 +++++++++++++++++++++-------------------------
 2 files changed, 45 insertions(+), 53 deletions(-)

(limited to 'include/net')

diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index d32f49cc621d..547ab8241593 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -48,21 +48,16 @@ static inline bool skb_valid_dst(const struct sk_buff *skb)
 struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags);
 struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags);
 
-static inline struct metadata_dst *tun_rx_dst(__be16 flags,
-					      __be64 tunnel_id, int md_size)
+static inline struct metadata_dst *tun_rx_dst(int md_size)
 {
 	struct metadata_dst *tun_dst;
-	struct ip_tunnel_info *info;
 
 	tun_dst = metadata_dst_alloc(md_size, GFP_ATOMIC);
 	if (!tun_dst)
 		return NULL;
 
-	info = &tun_dst->u.tun_info;
-	info->key.tun_flags = flags;
-	info->key.tun_id = tunnel_id;
-	info->key.tp_src = 0;
-	info->key.tp_dst = 0;
+	tun_dst->u.tun_info.options_len = 0;
+	tun_dst->u.tun_info.mode = 0;
 	return tun_dst;
 }
 
@@ -73,17 +68,14 @@ static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb,
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct metadata_dst *tun_dst;
-	struct ip_tunnel_info *info;
 
-	tun_dst = tun_rx_dst(flags, tunnel_id, md_size);
+	tun_dst = tun_rx_dst(md_size);
 	if (!tun_dst)
 		return NULL;
 
-	info = &tun_dst->u.tun_info;
-	info->key.u.ipv4.src = iph->saddr;
-	info->key.u.ipv4.dst = iph->daddr;
-	info->key.tos = iph->tos;
-	info->key.ttl = iph->ttl;
+	ip_tunnel_key_init(&tun_dst->u.tun_info.key,
+			   iph->saddr, iph->daddr, iph->tos, iph->ttl,
+			   0, 0, tunnel_id, flags);
 	return tun_dst;
 }
 
@@ -96,16 +88,21 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb,
 	struct metadata_dst *tun_dst;
 	struct ip_tunnel_info *info;
 
-	tun_dst = tun_rx_dst(flags, tunnel_id, md_size);
+	tun_dst = tun_rx_dst(md_size);
 	if (!tun_dst)
 		return NULL;
 
 	info = &tun_dst->u.tun_info;
+	info->mode = IP_TUNNEL_INFO_IPV6;
+	info->key.tun_flags = flags;
+	info->key.tun_id = tunnel_id;
+	info->key.tp_src = 0;
+	info->key.tp_dst = 0;
+
 	info->key.u.ipv6.src = ip6h->saddr;
 	info->key.u.ipv6.dst = ip6h->daddr;
 	info->key.tos = ipv6_get_dsfield(ip6h);
 	info->key.ttl = ip6h->hop_limit;
-	info->mode = IP_TUNNEL_INFO_IPV6;
 	return tun_dst;
 }
 
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 2b4fa06e91bd..9a6a3ba888e8 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -57,7 +57,6 @@ struct ip_tunnel_key {
 
 struct ip_tunnel_info {
 	struct ip_tunnel_key	key;
-	const void		*options;
 	u8			options_len;
 	u8			mode;
 };
@@ -180,49 +179,32 @@ int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *op,
 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op,
 			    unsigned int num);
 
-static inline void __ip_tunnel_info_init(struct ip_tunnel_info *tun_info,
-					 __be32 saddr, __be32 daddr,
-					 u8 tos, u8 ttl,
-					 __be16 tp_src, __be16 tp_dst,
-					 __be64 tun_id, __be16 tun_flags,
-					 const void *opts, u8 opts_len)
+static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
+				      __be32 saddr, __be32 daddr,
+				      u8 tos, u8 ttl,
+				      __be16 tp_src, __be16 tp_dst,
+				      __be64 tun_id, __be16 tun_flags)
 {
-	tun_info->key.tun_id = tun_id;
-	tun_info->key.u.ipv4.src = saddr;
-	tun_info->key.u.ipv4.dst = daddr;
-	memset((unsigned char *)&tun_info->key + IP_TUNNEL_KEY_IPV4_PAD,
+	key->tun_id = tun_id;
+	key->u.ipv4.src = saddr;
+	key->u.ipv4.dst = daddr;
+	memset((unsigned char *)key + IP_TUNNEL_KEY_IPV4_PAD,
 	       0, IP_TUNNEL_KEY_IPV4_PAD_LEN);
-	tun_info->key.tos = tos;
-	tun_info->key.ttl = ttl;
-	tun_info->key.tun_flags = tun_flags;
+	key->tos = tos;
+	key->ttl = ttl;
+	key->tun_flags = tun_flags;
 
 	/* For the tunnel types on the top of IPsec, the tp_src and tp_dst of
 	 * the upper tunnel are used.
 	 * E.g: GRE over IPSEC, the tp_src and tp_port are zero.
 	 */
-	tun_info->key.tp_src = tp_src;
-	tun_info->key.tp_dst = tp_dst;
+	key->tp_src = tp_src;
+	key->tp_dst = tp_dst;
 
 	/* Clear struct padding. */
-	if (sizeof(tun_info->key) != IP_TUNNEL_KEY_SIZE)
-		memset((unsigned char *)&tun_info->key + IP_TUNNEL_KEY_SIZE,
-		       0, sizeof(tun_info->key) - IP_TUNNEL_KEY_SIZE);
-
-	tun_info->options = opts;
-	tun_info->options_len = opts_len;
-
-	tun_info->mode = 0;
-}
-
-static inline void ip_tunnel_info_init(struct ip_tunnel_info *tun_info,
-				       const struct iphdr *iph,
-				       __be16 tp_src, __be16 tp_dst,
-				       __be64 tun_id, __be16 tun_flags,
-				       const void *opts, u8 opts_len)
-{
-	__ip_tunnel_info_init(tun_info, iph->saddr, iph->daddr,
-			      iph->tos, iph->ttl, tp_src, tp_dst,
-			      tun_id, tun_flags, opts, opts_len);
+	if (sizeof(*key) != IP_TUNNEL_KEY_SIZE)
+		memset((unsigned char *)key + IP_TUNNEL_KEY_SIZE,
+		       0, sizeof(*key) - IP_TUNNEL_KEY_SIZE);
 }
 
 static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info
@@ -317,11 +299,24 @@ static inline void iptunnel_xmit_stats(int err,
 	}
 }
 
-static inline void *ip_tunnel_info_opts(struct ip_tunnel_info *info, size_t n)
+static inline void *ip_tunnel_info_opts(struct ip_tunnel_info *info)
 {
 	return info + 1;
 }
 
+static inline void ip_tunnel_info_opts_get(void *to,
+					   const struct ip_tunnel_info *info)
+{
+	memcpy(to, info + 1, info->options_len);
+}
+
+static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
+					   const void *from, int len)
+{
+	memcpy(ip_tunnel_info_opts(info), from, len);
+	info->options_len = len;
+}
+
 static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
 {
 	return (struct ip_tunnel_info *)lwtstate->data;
-- 
cgit v1.2.3


From c3a8d9474684d391b0afc3970d9b249add15ec07 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 31 Aug 2015 15:58:47 +0200
Subject: tcp: use dctcp if enabled on the route to the initiator

Currently, the following case doesn't use DCTCP, even if it should:
A responder has f.e. Cubic as system wide default, but for a specific
route to the initiating host, DCTCP is being set in RTAX_CC_ALGO. The
initiating host then uses DCTCP as congestion control, but since the
initiator sets ECT(0), tcp_ecn_create_request() doesn't set ecn_ok,
and we have to fall back to Reno after 3WHS completes.

We were thinking on how to solve this in a minimal, non-intrusive
way without bloating tcp_ecn_create_request() needlessly: lets cache
the CA ecn option flag in RTAX_FEATURES. In other words, when ECT(0)
is set on the SYN packet, set ecn_ok=1 iff route RTAX_FEATURES
contains the unexposed (internal-only) DST_FEATURE_ECN_CA. This allows
to only do a single metric feature lookup inside tcp_ecn_create_request().

Joint work with Florian Westphal.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h | 6 ++++++
 include/net/tcp.h | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/dst.h b/include/net/dst.h
index 4c4801645371..9261d928303d 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -207,6 +207,12 @@ static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val)
 		p[metric-1] = val;
 }
 
+/* Kernel-internal feature bits that are unallocated in user space. */
+#define DST_FEATURE_ECN_CA	(1 << 31)
+
+#define DST_FEATURE_MASK	(DST_FEATURE_ECN_CA)
+#define DST_FEATURE_ECN_MASK	(DST_FEATURE_ECN_CA | RTAX_FEATURE_ECN)
+
 static inline u32
 dst_feature(const struct dst_entry *dst, u32 feature)
 {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4a7b03947a38..0cab28cd43a9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -888,7 +888,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
 extern struct tcp_congestion_ops tcp_reno;
 
 struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
-u32 tcp_ca_get_key_by_name(const char *name);
+u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca);
 #ifdef CONFIG_INET
 char *tcp_ca_get_name_by_key(u32 key, char *buffer);
 #else
-- 
cgit v1.2.3


From c42858eaf4926eb2f44f3e26731b276ab966ac28 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 31 Aug 2015 13:57:34 -0700
Subject: gro_cells: remove spinlock protecting receive queues

As David pointed out, spinlock are no longer needed
to protect the per cpu queues used in gro cells infrastructure.

Also use new napi_complete_done() API so that gro_flush_timeout
tweaks have an effect.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gro_cells.h | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

(limited to 'include/net')

diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h
index 0f712c0bc0bf..cf6c74550baa 100644
--- a/include/net/gro_cells.h
+++ b/include/net/gro_cells.h
@@ -32,37 +32,28 @@ static inline void gro_cells_receive(struct gro_cells *gcells, struct sk_buff *s
 		return;
 	}
 
-	/* We run in BH context */
-	spin_lock(&cell->napi_skbs.lock);
-
 	__skb_queue_tail(&cell->napi_skbs, skb);
 	if (skb_queue_len(&cell->napi_skbs) == 1)
 		napi_schedule(&cell->napi);
-
-	spin_unlock(&cell->napi_skbs.lock);
 }
 
-/* called unser BH context */
+/* called under BH context */
 static inline int gro_cell_poll(struct napi_struct *napi, int budget)
 {
 	struct gro_cell *cell = container_of(napi, struct gro_cell, napi);
 	struct sk_buff *skb;
 	int work_done = 0;
 
-	spin_lock(&cell->napi_skbs.lock);
 	while (work_done < budget) {
 		skb = __skb_dequeue(&cell->napi_skbs);
 		if (!skb)
 			break;
-		spin_unlock(&cell->napi_skbs.lock);
 		napi_gro_receive(napi, skb);
 		work_done++;
-		spin_lock(&cell->napi_skbs.lock);
 	}
 
 	if (work_done < budget)
-		napi_complete(napi);
-	spin_unlock(&cell->napi_skbs.lock);
+		napi_complete_done(napi, work_done);
 	return work_done;
 }
 
@@ -77,7 +68,7 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de
 	for_each_possible_cpu(i) {
 		struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);
 
-		skb_queue_head_init(&cell->napi_skbs);
+		__skb_queue_head_init(&cell->napi_skbs);
 		netif_napi_add(dev, &cell->napi, gro_cell_poll, 64);
 		napi_enable(&cell->napi);
 	}
@@ -92,8 +83,9 @@ static inline void gro_cells_destroy(struct gro_cells *gcells)
 		return;
 	for_each_possible_cpu(i) {
 		struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);
+
 		netif_napi_del(&cell->napi);
-		skb_queue_purge(&cell->napi_skbs);
+		__skb_queue_purge(&cell->napi_skbs);
 	}
 	free_percpu(gcells->cells);
 	gcells->cells = NULL;
-- 
cgit v1.2.3


From 63b6c13dbb7d3e36f031629f7e4e86dacfcab8cf Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Mon, 31 Aug 2015 20:05:57 -0700
Subject: tun_dst: Remove opts_size

opts_size is only written and never read. Following patch
removes this unused variable.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst_metadata.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 547ab8241593..af9d5382f6cb 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -7,7 +7,6 @@
 
 struct metadata_dst {
 	struct dst_entry		dst;
-	size_t				opts_len;
 	union {
 		struct ip_tunnel_info	tun_info;
 	} u;
-- 
cgit v1.2.3


From 9cf94eab8b309e8bcc78b41dd1561c75b537dd0b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 31 Aug 2015 19:11:02 +0200
Subject: netfilter: conntrack: use nf_ct_tmpl_free in CT/synproxy error paths

Commit 0838aa7fcfcd ("netfilter: fix netns dependencies with conntrack
templates") migrated templates to the new allocator api, but forgot to
update error paths for them in CT and synproxy to use nf_ct_tmpl_free()
instead of nf_conntrack_free().

Due to that, memory is being freed into the wrong kmemcache, but also
we drop the per net reference count of ct objects causing an imbalance.

In Brad's case, this leads to a wrap-around of net->ct.count and thus
lets __nf_conntrack_alloc() refuse to create a new ct object:

  [   10.340913] xt_addrtype: ipv6 does not support BROADCAST matching
  [   10.810168] nf_conntrack: table full, dropping packet
  [   11.917416] r8169 0000:07:00.0 eth0: link up
  [   11.917438] IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
  [   12.815902] nf_conntrack: table full, dropping packet
  [   15.688561] nf_conntrack: table full, dropping packet
  [   15.689365] nf_conntrack: table full, dropping packet
  [   15.690169] nf_conntrack: table full, dropping packet
  [   15.690967] nf_conntrack: table full, dropping packet
  [...]

With slab debugging, it also reports the wrong kmemcache (kmalloc-512 vs.
nf_conntrack_ffffffff81ce75c0) and reports poison overwrites, etc. Thus,
to fix the problem, export and use nf_ct_tmpl_free() instead.

Fixes: 0838aa7fcfcd ("netfilter: fix netns dependencies with conntrack templates")
Reported-by: Brad Jackson <bjackson0971@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 37cd3911d5c5..4023c4ce260f 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -292,6 +292,7 @@ extern unsigned int nf_conntrack_hash_rnd;
 void init_nf_conntrack_hash_rnd(void);
 
 struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags);
+void nf_ct_tmpl_free(struct nf_conn *tmpl);
 
 #define NF_CT_STAT_INC(net, count)	  __this_cpu_inc((net)->ct.stat->count)
 #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
-- 
cgit v1.2.3


From 9b8ff51822893e743eee09350c1928daa3ef503f Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 1 Sep 2015 14:26:35 -0600
Subject: net: Make table id type u32

A number of VRF patches used 'int' for table id. It should be u32 to be
consistent with the rest of the stack.

Fixes:
4e3c89920cd3a ("net: Introduce VRF related flags and helpers")
15be405eb2ea9 ("net: Add inet_addr lookup by table")
30bbaa1950055 ("net: Fix up inet_addr_type checks")
021dd3b8a142d ("net: Add routes to the table associated with the device")
dc028da54ed35 ("inet: Move VRF table lookup to inlined function")
f6d3c19274c74 ("net: FIB tracepoints")

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h |  2 +-
 include/net/vrf.h   | 24 ++++++++++++------------
 2 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/net')

diff --git a/include/net/route.h b/include/net/route.h
index 395d79bb556c..cc61cb95f059 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -188,7 +188,7 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk);
 void ip_rt_send_redirect(struct sk_buff *skb);
 
 unsigned int inet_addr_type(struct net *net, __be32 addr);
-unsigned int inet_addr_type_table(struct net *net, __be32 addr, int tb_id);
+unsigned int inet_addr_type_table(struct net *net, __be32 addr, u32 tb_id);
 unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
 				__be32 addr);
 unsigned int inet_addr_type_dev_table(struct net *net,
diff --git a/include/net/vrf.h b/include/net/vrf.h
index 5bfb16237fd7..593e6094ddd4 100644
--- a/include/net/vrf.h
+++ b/include/net/vrf.h
@@ -66,9 +66,9 @@ static inline int vrf_master_ifindex(const struct net_device *dev)
 }
 
 /* called with rcu_read_lock */
-static inline int vrf_dev_table_rcu(const struct net_device *dev)
+static inline u32 vrf_dev_table_rcu(const struct net_device *dev)
 {
-	int tb_id = 0;
+	u32 tb_id = 0;
 
 	if (dev) {
 		struct net_vrf_dev *vrf_ptr;
@@ -80,9 +80,9 @@ static inline int vrf_dev_table_rcu(const struct net_device *dev)
 	return tb_id;
 }
 
-static inline int vrf_dev_table(const struct net_device *dev)
+static inline u32 vrf_dev_table(const struct net_device *dev)
 {
-	int tb_id;
+	u32 tb_id;
 
 	rcu_read_lock();
 	tb_id = vrf_dev_table_rcu(dev);
@@ -91,10 +91,10 @@ static inline int vrf_dev_table(const struct net_device *dev)
 	return tb_id;
 }
 
-static inline int vrf_dev_table_ifindex(struct net *net, int ifindex)
+static inline u32 vrf_dev_table_ifindex(struct net *net, int ifindex)
 {
 	struct net_device *dev;
-	int tb_id = 0;
+	u32 tb_id = 0;
 
 	if (!ifindex)
 		return 0;
@@ -111,9 +111,9 @@ static inline int vrf_dev_table_ifindex(struct net *net, int ifindex)
 }
 
 /* called with rtnl */
-static inline int vrf_dev_table_rtnl(const struct net_device *dev)
+static inline u32 vrf_dev_table_rtnl(const struct net_device *dev)
 {
-	int tb_id = 0;
+	u32 tb_id = 0;
 
 	if (dev) {
 		struct net_vrf_dev *vrf_ptr;
@@ -149,22 +149,22 @@ static inline int vrf_master_ifindex(const struct net_device *dev)
 	return 0;
 }
 
-static inline int vrf_dev_table_rcu(const struct net_device *dev)
+static inline u32 vrf_dev_table_rcu(const struct net_device *dev)
 {
 	return 0;
 }
 
-static inline int vrf_dev_table(const struct net_device *dev)
+static inline u32 vrf_dev_table(const struct net_device *dev)
 {
 	return 0;
 }
 
-static inline int vrf_dev_table_ifindex(struct net *net, int ifindex)
+static inline u32 vrf_dev_table_ifindex(struct net *net, int ifindex)
 {
 	return 0;
 }
 
-static inline int vrf_dev_table_rtnl(const struct net_device *dev)
+static inline u32 vrf_dev_table_rtnl(const struct net_device *dev)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From e5276937ae6e654a811345f0716266f12e77bede Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Tue, 1 Sep 2015 09:24:23 -0700
Subject: flow_dissector: Move skb related functions to skbuff.h

Move the flow dissector functions that are specific to skbuffs into
skbuff.h out of flow_dissector.h. This makes flow_dissector.h have
no dependencies on skbuff.h.

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_dissector.h | 50 --------------------------------------------
 1 file changed, 50 deletions(-)

(limited to 'include/net')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 1a8c22419936..6777a84c6f94 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -2,7 +2,6 @@
 #define _NET_FLOW_DISSECTOR_H
 
 #include <linux/types.h>
-#include <linux/skbuff.h>
 #include <linux/in6.h>
 #include <uapi/linux/if_ether.h>
 
@@ -134,23 +133,6 @@ struct flow_dissector {
 	unsigned short int offset[FLOW_DISSECTOR_KEY_MAX];
 };
 
-void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
-			     const struct flow_dissector_key *key,
-			     unsigned int key_count);
-
-bool __skb_flow_dissect(const struct sk_buff *skb,
-			struct flow_dissector *flow_dissector,
-			void *target_container,
-			void *data, __be16 proto, int nhoff, int hlen);
-
-static inline bool skb_flow_dissect(const struct sk_buff *skb,
-				    struct flow_dissector *flow_dissector,
-				    void *target_container)
-{
-	return __skb_flow_dissect(skb, flow_dissector, target_container,
-				  NULL, 0, 0, 0);
-}
-
 struct flow_keys {
 	struct flow_dissector_key_control control;
 #define FLOW_KEYS_HASH_START_FIELD basic
@@ -170,38 +152,6 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow);
 extern struct flow_dissector flow_keys_dissector;
 extern struct flow_dissector flow_keys_buf_dissector;
 
-static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
-					      struct flow_keys *flow)
-{
-	memset(flow, 0, sizeof(*flow));
-	return __skb_flow_dissect(skb, &flow_keys_dissector, flow,
-				  NULL, 0, 0, 0);
-}
-
-static inline bool skb_flow_dissect_flow_keys_buf(struct flow_keys *flow,
-						  void *data, __be16 proto,
-						  int nhoff, int hlen)
-{
-	memset(flow, 0, sizeof(*flow));
-	return __skb_flow_dissect(NULL, &flow_keys_buf_dissector, flow,
-				  data, proto, nhoff, hlen);
-}
-
-__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
-			    void *data, int hlen_proto);
-
-static inline __be32 skb_flow_get_ports(const struct sk_buff *skb,
-					int thoff, u8 ip_proto)
-{
-	return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0);
-}
-
-u32 flow_hash_from_keys(struct flow_keys *keys);
-void __skb_get_hash(struct sk_buff *skb);
-u32 skb_get_poff(const struct sk_buff *skb);
-u32 __skb_get_poff(const struct sk_buff *skb, void *data,
-		   const struct flow_keys *keys, int hlen);
-
 /* struct flow_keys_digest:
  *
  * This structure is used to hold a digest of the full flow keys. This is a
-- 
cgit v1.2.3


From bcc83839ffdb063dd2b0370cd85c4f825761fc59 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Tue, 1 Sep 2015 09:24:24 -0700
Subject: skbuff: Make __skb_set_sw_hash a general function

Move __skb_set_sw_hash to skbuff.h and add __skb_set_hash which is
a common method (between __skb_set_sw_hash and skb_set_hash) to set
the hash in an skbuff.

Also, move skb_clear_hash to be closer to __skb_set_hash.

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_dissector.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/net')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 6777a84c6f94..af76c496f7db 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -167,4 +167,9 @@ struct flow_keys_digest {
 void make_flow_keys_digest(struct flow_keys_digest *digest,
 			   const struct flow_keys *flow);
 
+static inline bool flow_keys_have_l4(struct flow_keys *keys)
+{
+	return (keys->ports.ports || keys->tags.flow_label);
+}
+
 #endif
-- 
cgit v1.2.3


From c6cc1ca7f4d70cbb3ea3a5ca163c5dabaf155cdb Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Tue, 1 Sep 2015 09:24:25 -0700
Subject: flowi: Abstract out functions to get flow hash based on flowi

Create __get_hash_from_flowi6 and __get_hash_from_flowi4 to get the
flow keys and hash based on flowi structures. These are called by
__skb_get_hash_flowi6 and __skb_get_hash_flowi4. Also, created
get_hash_from_flowi6 and get_hash_from_flowi4 which can be called
when just the hash value for a flowi is needed.

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow.h           | 19 +++++++++++++++++++
 include/net/flow_dissector.h |  2 ++
 2 files changed, 21 insertions(+)

(limited to 'include/net')

diff --git a/include/net/flow.h b/include/net/flow.h
index 9e0297c4c11d..dafe97c3c048 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -10,6 +10,7 @@
 #include <linux/socket.h>
 #include <linux/in6.h>
 #include <linux/atomic.h>
+#include <net/flow_dissector.h>
 
 /*
  * ifindex generation is per-net namespace, and loopback is
@@ -243,4 +244,22 @@ void flow_cache_flush(struct net *net);
 void flow_cache_flush_deferred(struct net *net);
 extern atomic_t flow_cache_genid;
 
+__u32 __get_hash_from_flowi6(struct flowi6 *fl6, struct flow_keys *keys);
+
+static inline __u32 get_hash_from_flowi6(struct flowi6 *fl6)
+{
+	struct flow_keys keys;
+
+	return __get_hash_from_flowi6(fl6, &keys);
+}
+
+__u32 __get_hash_from_flowi4(struct flowi4 *fl4, struct flow_keys *keys);
+
+static inline __u32 get_hash_from_flowi4(struct flowi4 *fl4)
+{
+	struct flow_keys keys;
+
+	return __get_hash_from_flowi4(fl4, &keys);
+}
+
 #endif
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index af76c496f7db..74fe160f0b05 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -172,4 +172,6 @@ static inline bool flow_keys_have_l4(struct flow_keys *keys)
 	return (keys->ports.ports || keys->tags.flow_label);
 }
 
+u32 flow_hash_from_keys(struct flow_keys *keys);
+
 #endif
-- 
cgit v1.2.3


From 807e165dc44fd93f9d378f861f0540a158d7343a Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Tue, 1 Sep 2015 09:24:28 -0700
Subject: flow_dissector: Add control/reporting of fragmentation

Add an input flag to flow dissector on rather dissection should be
attempted on a first fragment. Also add key_control flags to indicate
that a packet is a fragment or first fragment.

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_dissector.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 74fe160f0b05..34102270b086 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -12,6 +12,8 @@
 struct flow_dissector_key_control {
 	u16	thoff;
 	u16	addr_type;
+	u32	is_fragment:1;
+	u32	first_frag:1;
 };
 
 /**
@@ -122,6 +124,8 @@ enum flow_dissector_key_id {
 	FLOW_DISSECTOR_KEY_MAX,
 };
 
+#define FLOW_DISSECTOR_F_PARSE_1ST_FRAG		BIT(0)
+
 struct flow_dissector_key {
 	enum flow_dissector_key_id key_id;
 	size_t offset; /* offset of struct flow_dissector_key_*
-- 
cgit v1.2.3


From 8306b688f1a6621b9efe3b0d827e26750528b12a Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Tue, 1 Sep 2015 09:24:30 -0700
Subject: flow_dissector: Add flag to stop parsing at L3

Add an input flag to flow dissector on rather dissection should be
stopped when an L3 packet is encountered. This would be useful if a
caller just wanted to get IP addresses of the outermost header (e.g.
to do an L3 hash).

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_dissector.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 34102270b086..bb81e3b576e7 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -125,6 +125,7 @@ enum flow_dissector_key_id {
 };
 
 #define FLOW_DISSECTOR_F_PARSE_1ST_FRAG		BIT(0)
+#define FLOW_DISSECTOR_F_STOP_AT_L3		BIT(1)
 
 struct flow_dissector_key {
 	enum flow_dissector_key_id key_id;
-- 
cgit v1.2.3


From 872b1abb1ed47a691f465fb3d285f6cf6bcd8663 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Tue, 1 Sep 2015 09:24:31 -0700
Subject: flow_dissector: Add flag to stop parsing when an IPv6 flow label is
 seen

Add an input flag to flow dissector on rather dissection should be
stopped when a flow label is encountered. Presumably, the flow label
is derived from a sufficient hash of an inner transport packet so
further dissection is not needed (that is ports are not included in
the flow hash). Using the flow label instead of ports has the additional
benefit that packet fragments should hash to same value as non-fragments
for a flow (assuming that the same flow label is used).

We set this flag by default in for skb_get_hash.

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_dissector.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index bb81e3b576e7..66dbc3498efb 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -126,6 +126,7 @@ enum flow_dissector_key_id {
 
 #define FLOW_DISSECTOR_F_PARSE_1ST_FRAG		BIT(0)
 #define FLOW_DISSECTOR_F_STOP_AT_L3		BIT(1)
+#define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL	BIT(2)
 
 struct flow_dissector_key {
 	enum flow_dissector_key_id key_id;
-- 
cgit v1.2.3


From 823b96939578eae67b9d6c0e33a39d6a7b6401e7 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Tue, 1 Sep 2015 09:24:32 -0700
Subject: flow_dissector: Add control/reporting of encapsulation

Add an input flag to flow dissector on rather dissection should stop
when encapsulation is detected (IP/IP or GRE). Also, add a key_control
flag that indicates encapsulation was encountered during the
dissection.

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_dissector.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 66dbc3498efb..bddd1089dbce 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -14,6 +14,7 @@ struct flow_dissector_key_control {
 	u16	addr_type;
 	u32	is_fragment:1;
 	u32	first_frag:1;
+	u32	encapsulation:1;
 };
 
 /**
@@ -127,6 +128,7 @@ enum flow_dissector_key_id {
 #define FLOW_DISSECTOR_F_PARSE_1ST_FRAG		BIT(0)
 #define FLOW_DISSECTOR_F_STOP_AT_L3		BIT(1)
 #define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL	BIT(2)
+#define FLOW_DISSECTOR_F_STOP_AT_ENCAP		BIT(3)
 
 struct flow_dissector_key {
 	enum flow_dissector_key_id key_id;
-- 
cgit v1.2.3


From 4b36993d3df0834eff3b4172962de0343a4d9123 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 1 Sep 2015 16:46:08 -0700
Subject: flow_dissector: Don't use bit fields.

Just have a flags member instead.

   In file included from include/linux/linkage.h:4:0,
                    from include/linux/kernel.h:6,
                    from net/core/flow_dissector.c:1:
   In function 'flow_keys_hash_start',
       inlined from 'flow_hash_from_keys' at net/core/flow_dissector.c:553:34:
>> include/linux/compiler.h:447:38: error: call to '__compiletime_assert_459' declared with attribute error: BUILD_BUG_ON failed: FLOW_KEYS_HASH_OFFSET % sizeof(u32)

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_dissector.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index bddd1089dbce..8c8548cf5888 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -12,11 +12,13 @@
 struct flow_dissector_key_control {
 	u16	thoff;
 	u16	addr_type;
-	u32	is_fragment:1;
-	u32	first_frag:1;
-	u32	encapsulation:1;
+	u32	flags;
 };
 
+#define FLOW_DIS_IS_FRAGMENT	BIT(0)
+#define FLOW_DIS_FIRST_FRAG	BIT(1)
+#define FLOW_DIS_ENCAPSULATION	BIT(2)
+
 /**
  * struct flow_dissector_key_basic:
  * @thoff: Transport header offset
-- 
cgit v1.2.3


From 20a17bf6c04e3eca8824c930ecc55ab832558e3b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 1 Sep 2015 21:19:17 -0700
Subject: flow_dissector: Use 'const' where possible.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/flow.h b/include/net/flow.h
index dafe97c3c048..acd6a096250e 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -244,18 +244,18 @@ void flow_cache_flush(struct net *net);
 void flow_cache_flush_deferred(struct net *net);
 extern atomic_t flow_cache_genid;
 
-__u32 __get_hash_from_flowi6(struct flowi6 *fl6, struct flow_keys *keys);
+__u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys);
 
-static inline __u32 get_hash_from_flowi6(struct flowi6 *fl6)
+static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6)
 {
 	struct flow_keys keys;
 
 	return __get_hash_from_flowi6(fl6, &keys);
 }
 
-__u32 __get_hash_from_flowi4(struct flowi4 *fl4, struct flow_keys *keys);
+__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys);
 
-static inline __u32 get_hash_from_flowi4(struct flowi4 *fl4)
+static inline __u32 get_hash_from_flowi4(const struct flowi4 *fl4)
 {
 	struct flow_keys keys;
 
-- 
cgit v1.2.3


From 62da98656b62a5ca57f22263705175af8ded5aa1 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 3 Sep 2015 01:26:07 +0200
Subject: netfilter: nf_conntrack: make nf_ct_zone_dflt built-in

Fengguang reported, that some randconfig generated the following linker
issue with nf_ct_zone_dflt object involved:

  [...]
  CC      init/version.o
  LD      init/built-in.o
  net/built-in.o: In function `ipv4_conntrack_defrag':
  nf_defrag_ipv4.c:(.text+0x93e95): undefined reference to `nf_ct_zone_dflt'
  net/built-in.o: In function `ipv6_defrag':
  nf_defrag_ipv6_hooks.c:(.text+0xe3ffe): undefined reference to `nf_ct_zone_dflt'
  make: *** [vmlinux] Error 1

Given that configurations exist where we have a built-in part, which is
accessing nf_ct_zone_dflt such as the two handlers nf_ct_defrag_user()
and nf_ct6_defrag_user(), and a part that configures nf_conntrack as a
module, we must move nf_ct_zone_dflt into a fixed, guaranteed built-in
area when netfilter is configured in general.

Therefore, split the more generic parts into a common header under
include/linux/netfilter/ and move nf_ct_zone_dflt into the built-in
section that already holds parts related to CONFIG_NF_CONNTRACK in the
netfilter core. This fixes the issue on my side.

Fixes: 308ac9143ee2 ("netfilter: nf_conntrack: push zone object into functions")
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_conntrack_zones.h | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h
index 5316c7b3a374..4e32512cef32 100644
--- a/include/net/netfilter/nf_conntrack_zones.h
+++ b/include/net/netfilter/nf_conntrack_zones.h
@@ -1,24 +1,7 @@
 #ifndef _NF_CONNTRACK_ZONES_H
 #define _NF_CONNTRACK_ZONES_H
 
-#include <linux/netfilter/nf_conntrack_tuple_common.h>
-
-#define NF_CT_DEFAULT_ZONE_ID	0
-
-#define NF_CT_ZONE_DIR_ORIG	(1 << IP_CT_DIR_ORIGINAL)
-#define NF_CT_ZONE_DIR_REPL	(1 << IP_CT_DIR_REPLY)
-
-#define NF_CT_DEFAULT_ZONE_DIR	(NF_CT_ZONE_DIR_ORIG | NF_CT_ZONE_DIR_REPL)
-
-#define NF_CT_FLAG_MARK		1
-
-struct nf_conntrack_zone {
-	u16	id;
-	u8	flags;
-	u8	dir;
-};
-
-extern const struct nf_conntrack_zone nf_ct_zone_dflt;
+#include <linux/netfilter/nf_conntrack_zones_common.h>
 
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 #include <net/netfilter/nf_conntrack_extend.h>
-- 
cgit v1.2.3


From 22f66895e60cfc55b92f6fa93f05bb3fbdbd0bed Mon Sep 17 00:00:00 2001
From: Avri Altman <avri.altman@intel.com>
Date: Tue, 18 Aug 2015 16:52:07 +0300
Subject: mac80211: protect non-HT BSS when HT TDLS traffic exists

HT TDLS traffic should be protected in a non-HT BSS to avoid
collisions. Therefore, when TDLS peers join/leave, check if
protection is (now) needed and set the ht_operation_mode of
the virtual interface according to the HT capabilities of the
TDLS peer(s).

This works because a non-HT BSS connection never sets (or
otherwise uses) the ht_operation_mode; it just means that
drivers must be aware that this field applies to all HT
traffic for this virtual interface, not just the traffic
within the BSS. Document that.

Signed-off-by: Avri Altman <avri.altman@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e3314e516681..bfc569498bfa 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -477,7 +477,9 @@ struct ieee80211_event {
  * @chandef: Channel definition for this BSS -- the hardware might be
  *	configured a higher bandwidth than this BSS uses, for example.
  * @ht_operation_mode: HT operation mode like in &struct ieee80211_ht_operation.
- *	This field is only valid when the channel type is one of the HT types.
+ *	This field is only valid when the channel is a wide HT/VHT channel.
+ *	Note that with TDLS this can be the case (channel is HT, protection must
+ *	be used from this field) even when the BSS association isn't using HT.
  * @cqm_rssi_thold: Connection quality monitor RSSI threshold, a zero value
  *	implies disabled
  * @cqm_rssi_hyst: Connection quality monitor RSSI hysteresis
-- 
cgit v1.2.3


From 33398cf2f360c5ce24c8a22436d52a06ad4e5eb5 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.cz>
Date: Tue, 8 Sep 2015 15:01:02 -0700
Subject: memcg: export struct mem_cgroup

mem_cgroup structure is defined in mm/memcontrol.c currently which means
that the code outside of this file has to use external API even for
trivial access stuff.

This patch exports mm_struct with its dependencies and makes some of the
exported functions inlines.  This even helps to reduce the code size a bit
(make defconfig + CONFIG_MEMCG=y)

  text		data    bss     dec     	 hex 	filename
  12355346        1823792 1089536 15268674         e8fb42 vmlinux.before
  12354970        1823792 1089536 15268298         e8f9ca vmlinux.after

This is not much (370B) but better than nothing.

We also save a function call in some hot paths like callers of
mem_cgroup_count_vm_event which is used for accounting.

The patch doesn't introduce any functional changes.

[vdavykov@parallels.com: inline memcg_kmem_is_active]
[vdavykov@parallels.com: do not expose type outside of CONFIG_MEMCG]
[akpm@linux-foundation.org: memcontrol.h needs eventfd.h for eventfd_ctx]
[akpm@linux-foundation.org: export mem_cgroup_from_task() to modules]
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/net/sock.h | 28 ----------------------------
 1 file changed, 28 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 43c6abcf06ab..a98c71ea40c5 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1042,34 +1042,6 @@ struct proto {
 #endif
 };
 
-/*
- * Bits in struct cg_proto.flags
- */
-enum cg_proto_flags {
-	/* Currently active and new sockets should be assigned to cgroups */
-	MEMCG_SOCK_ACTIVE,
-	/* It was ever activated; we must disarm static keys on destruction */
-	MEMCG_SOCK_ACTIVATED,
-};
-
-struct cg_proto {
-	struct page_counter	memory_allocated;	/* Current allocated memory. */
-	struct percpu_counter	sockets_allocated;	/* Current number of sockets. */
-	int			memory_pressure;
-	long			sysctl_mem[3];
-	unsigned long		flags;
-	/*
-	 * memcg field is used to find which memcg we belong directly
-	 * Each memcg struct can hold more than one cg_proto, so container_of
-	 * won't really cut.
-	 *
-	 * The elegant solution would be having an inverse function to
-	 * proto_cgroup in struct proto, but that means polluting the structure
-	 * for everybody, instead of just for memcg users.
-	 */
-	struct mem_cgroup	*memcg;
-};
-
 int proto_register(struct proto *prot, int alloc_slab);
 void proto_unregister(struct proto *prot);
 
-- 
cgit v1.2.3


From e752eb68811aeece2220e183e23369a34122fb5e Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Tue, 8 Sep 2015 15:01:16 -0700
Subject: memcg: move memcg_proto_active from sock.h

The only user is sock_update_memcg which is living in memcontrol.c so it
doesn't make much sense to pollute sock.h by this inline helper.  Move it
to memcontrol.c and open code it into its only caller.

Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/net/sock.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index a98c71ea40c5..7aa78440559a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1045,11 +1045,6 @@ struct proto {
 int proto_register(struct proto *prot, int alloc_slab);
 void proto_unregister(struct proto *prot);
 
-static inline bool memcg_proto_active(struct cg_proto *cg_proto)
-{
-	return test_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
-}
-
 #ifdef SOCK_REFCNT_DEBUG
 static inline void sk_refcnt_debug_inc(struct sock *sk)
 {
-- 
cgit v1.2.3


From f53de1e9a4aaf8cbe08845da6f7ff26a078ac507 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Wed, 9 Sep 2015 14:20:56 +0200
Subject: net: ipv6: use common fib_default_rule_pref

This switches IPv6 policy routing to use the shared
fib_default_rule_pref() function of IPv4 and DECnet. It is also used in
multicast routing for IPv4 as well as IPv6.

The motivation for this patch is a complaint about iproute2 behaving
inconsistent between IPv4 and IPv6 when adding policy rules: Formerly,
IPv6 rules were assigned a fixed priority of 0x3FFF whereas for IPv4 the
assigned priority value was decreased with each rule added.

Since then all users of the default_pref field have been converted to
assign the generic function fib_default_rule_pref(), fib_nl_newrule()
may just use it directly instead. Therefore get rid of the function
pointer altogether and make fib_default_rule_pref() static, as it's not
used outside fib_rules.c anymore.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 4e8f804f4589..59160de702b6 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -66,7 +66,6 @@ struct fib_rules_ops {
 					   struct nlattr **);
 	int			(*fill)(struct fib_rule *, struct sk_buff *,
 					struct fib_rule_hdr *);
-	u32			(*default_pref)(struct fib_rules_ops *ops);
 	size_t			(*nlmsg_payload)(struct fib_rule *);
 
 	/* Called after modifications to the rules set, must flush
@@ -118,5 +117,4 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags,
 		     struct fib_lookup_arg *);
 int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,
 			 u32 flags);
-u32 fib_default_rule_pref(struct fib_rules_ops *ops);
 #endif
-- 
cgit v1.2.3