From c8a8585431efba0faaf41167f8f7c27c48307ca6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vianney=20le=20Cl=C3=A9ment=20de=20Saint-Marcq?=
 <vianney.leclement@essensium.com>
Date: Mon, 30 Mar 2015 10:34:58 +0200
Subject: iio: core: Introduce IIO_CHAN_INFO_CALIBEMISSIVITY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Contact-less IR temperature sensors measure the temperature of an object
by using its thermal radiation.  Surfaces with different emissivity
ratios emit different amounts of energy at the same temperature.

IIO_CHAN_INFO_CALIBEMISSIVITY allows the user to inform the sensor of the
emissivity of the object in front of it, in order to effectively measure
its temperature.

A device providing such setting is Melexis's MLX90614:
http://melexis.com/Assets/IR-sensor-thermometer-MLX90614-Datasheet-5152.aspx.

Signed-off-by: Vianney le Clément de Saint-Marcq <vianney.leclement@essensium.com>
Cc: Arnout Vandecappelle (Essensium/Mind) <arnout@mind.be>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/iio.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index d86b753e9b30..b1e46ae89aa7 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -43,6 +43,7 @@ enum iio_chan_info_enum {
 	IIO_CHAN_INFO_CALIBWEIGHT,
 	IIO_CHAN_INFO_DEBOUNCE_COUNT,
 	IIO_CHAN_INFO_DEBOUNCE_TIME,
+	IIO_CHAN_INFO_CALIBEMISSIVITY,
 };
 
 enum iio_shared_by {
-- 
cgit v1.2.3


From d0e83059a6c9b04f00264a74b8f6439948de4613 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 20 Apr 2015 13:39:03 +0800
Subject: crypto: rng - Convert crypto_rng to new style crypto_type

This patch converts the top-level crypto_rng to the "new" style.
It was the last algorithm type added before we switched over
to the new way of doing things exemplified by shash.

All users will automatically switch over to the new interface.

Note that this patch does not touch the low-level interface to
rng implementations.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 10df5d2d093a..781f7d546020 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -655,19 +655,12 @@ struct compress_tfm {
 	                      u8 *dst, unsigned int *dlen);
 };
 
-struct rng_tfm {
-	int (*rng_gen_random)(struct crypto_rng *tfm, u8 *rdata,
-			      unsigned int dlen);
-	int (*rng_reset)(struct crypto_rng *tfm, u8 *seed, unsigned int slen);
-};
-
 #define crt_ablkcipher	crt_u.ablkcipher
 #define crt_aead	crt_u.aead
 #define crt_blkcipher	crt_u.blkcipher
 #define crt_cipher	crt_u.cipher
 #define crt_hash	crt_u.hash
 #define crt_compress	crt_u.compress
-#define crt_rng		crt_u.rng
 
 struct crypto_tfm {
 
@@ -680,7 +673,6 @@ struct crypto_tfm {
 		struct cipher_tfm cipher;
 		struct hash_tfm hash;
 		struct compress_tfm compress;
-		struct rng_tfm rng;
 	} crt_u;
 
 	void (*exit)(struct crypto_tfm *tfm);
@@ -714,10 +706,6 @@ struct crypto_hash {
 	struct crypto_tfm base;
 };
 
-struct crypto_rng {
-	struct crypto_tfm base;
-};
-
 enum {
 	CRYPTOA_UNSPEC,
 	CRYPTOA_ALG,
-- 
cgit v1.2.3


From acec27ff35af9caf34d76d16ee17ff3b292e7d83 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 21 Apr 2015 10:46:38 +0800
Subject: crypto: rng - Convert low-level crypto_rng to new style

This patch converts the low-level crypto_rng interface to the
"new" style.

This allows existing implementations to be converted over one-
by-one.  Once that is complete we can then remove the old rng
interface.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 781f7d546020..2fa9b05360f7 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -427,7 +427,7 @@ struct compress_alg {
 };
 
 /**
- * struct rng_alg - random number generator definition
+ * struct old_rng_alg - random number generator definition
  * @rng_make_random: The function defined by this variable obtains a random
  *		     number. The random number generator transform must generate
  *		     the random number out of the context provided with this
@@ -445,7 +445,7 @@ struct compress_alg {
  *	      seeding is implemented internally without the need of support by
  *	      the consumer. In this case, the seed size is set to zero.
  */
-struct rng_alg {
+struct old_rng_alg {
 	int (*rng_make_random)(struct crypto_rng *tfm, u8 *rdata,
 			       unsigned int dlen);
 	int (*rng_reset)(struct crypto_rng *tfm, u8 *seed, unsigned int slen);
@@ -559,7 +559,7 @@ struct crypto_alg {
 		struct blkcipher_alg blkcipher;
 		struct cipher_alg cipher;
 		struct compress_alg compress;
-		struct rng_alg rng;
+		struct old_rng_alg rng;
 	} cra_u;
 
 	int (*cra_init)(struct crypto_tfm *tfm);
-- 
cgit v1.2.3


From 94f1bb15bed84ad6c893916b7e7b9db6f1d7eec6 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 21 Apr 2015 10:46:46 +0800
Subject: crypto: rng - Remove old low-level rng interface

Now that all rng implementations have switched over to the new
interface, we can remove the old low-level interface.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 30 ------------------------------
 1 file changed, 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 2fa9b05360f7..ee14140f8893 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -138,7 +138,6 @@ struct crypto_async_request;
 struct crypto_aead;
 struct crypto_blkcipher;
 struct crypto_hash;
-struct crypto_rng;
 struct crypto_tfm;
 struct crypto_type;
 struct aead_givcrypt_request;
@@ -426,40 +425,12 @@ struct compress_alg {
 			      unsigned int slen, u8 *dst, unsigned int *dlen);
 };
 
-/**
- * struct old_rng_alg - random number generator definition
- * @rng_make_random: The function defined by this variable obtains a random
- *		     number. The random number generator transform must generate
- *		     the random number out of the context provided with this
- *		     call.
- * @rng_reset: Reset of the random number generator by clearing the entire state.
- *	       With the invocation of this function call, the random number
- *             generator shall completely reinitialize its state. If the random
- *	       number generator requires a seed for setting up a new state,
- *	       the seed must be provided by the consumer while invoking this
- *	       function. The required size of the seed is defined with
- *	       @seedsize .
- * @seedsize: The seed size required for a random number generator
- *	      initialization defined with this variable. Some random number
- *	      generators like the SP800-90A DRBG does not require a seed as the
- *	      seeding is implemented internally without the need of support by
- *	      the consumer. In this case, the seed size is set to zero.
- */
-struct old_rng_alg {
-	int (*rng_make_random)(struct crypto_rng *tfm, u8 *rdata,
-			       unsigned int dlen);
-	int (*rng_reset)(struct crypto_rng *tfm, u8 *seed, unsigned int slen);
-
-	unsigned int seedsize;
-};
-
 
 #define cra_ablkcipher	cra_u.ablkcipher
 #define cra_aead	cra_u.aead
 #define cra_blkcipher	cra_u.blkcipher
 #define cra_cipher	cra_u.cipher
 #define cra_compress	cra_u.compress
-#define cra_rng		cra_u.rng
 
 /**
  * struct crypto_alg - definition of a cryptograpic cipher algorithm
@@ -559,7 +530,6 @@ struct crypto_alg {
 		struct blkcipher_alg blkcipher;
 		struct cipher_alg cipher;
 		struct compress_alg compress;
-		struct old_rng_alg rng;
 	} cra_u;
 
 	int (*cra_init)(struct crypto_tfm *tfm);
-- 
cgit v1.2.3


From 4796cf9b02b5bea141632e21d64556a7eb883a65 Mon Sep 17 00:00:00 2001
From: Yingjoe Chen <yingjoe.chen@mediatek.com>
Date: Fri, 10 Apr 2015 21:55:50 +0800
Subject: time: Remove nonexistent function prototype

The function clocksource_get_next() was removed in commit 75c5158f70
(timekeeping: Update clocksource with stop_machine), but the
prototype was not removed with it. Remove the prototype.

Signed-off-by: Yingjoe Chen <yingjoe.chen@mediatek.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: <srv_heupstream@mediatek.com>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/1428674150-1780-1-git-send-email-yingjoe.chen@mediatek.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clocksource.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 135509821c39..a25fc6e873b8 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -181,7 +181,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
 
 extern int clocksource_unregister(struct clocksource*);
 extern void clocksource_touch_watchdog(void);
-extern struct clocksource* clocksource_get_next(void);
 extern void clocksource_change_rating(struct clocksource *cs, int rating);
 extern void clocksource_suspend(void);
 extern void clocksource_resume(void);
-- 
cgit v1.2.3


From 91e5a2170e795989da9f90c18ba18984f23acc5b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 13 Apr 2015 21:02:22 +0000
Subject: hrtimer: Document hrtimer_forward[_now]() proper

Document the calling context conditions.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150413210035.178751779@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 05f6df1fdf5b..7770676c387a 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -418,7 +418,22 @@ static inline int hrtimer_callback_running(struct hrtimer *timer)
 extern u64
 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval);
 
-/* Forward a hrtimer so it expires after the hrtimer's current now */
+/**
+ * hrtimer_forward_now - forward the timer expiry so it expires after now
+ * @timer:	hrtimer to forward
+ * @interval:	the interval to forward
+ *
+ * Forward the timer expiry so it will expire after the current time
+ * of the hrtimer clock base. Returns the number of overruns.
+ *
+ * Can be safely called from the callback function of @timer. If
+ * called from other contexts @timer must neither be enqueued nor
+ * running the callback and the caller needs to take care of
+ * serialization.
+ *
+ * Note: This only updates the timer expiry value and does not requeue
+ * the timer.
+ */
 static inline u64 hrtimer_forward_now(struct hrtimer *timer,
 				      ktime_t interval)
 {
-- 
cgit v1.2.3


From 398ca17fb54b212cdc9da7ff4a17a35c48dd2103 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 14 Apr 2015 21:08:27 +0000
Subject: hrtimer: Get rid of the resolution field in hrtimer_clock_base

The field has no value because all clock bases have the same
resolution. The resolution only changes when we switch to high
resolution timer mode. We can evaluate that from a single static
variable as well. In the !HIGHRES case its simply a constant.

Export the variable, so we can simplify the usage sites.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20150414203500.645454122@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 7770676c387a..bc6f91b5443b 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -137,7 +137,6 @@ struct hrtimer_sleeper {
  *			timer to a base on another cpu.
  * @clockid:		clock id for per_cpu support
  * @active:		red black tree root node for the active timers
- * @resolution:		the resolution of the clock, in nanoseconds
  * @get_time:		function to retrieve the current time of the clock
  * @softirq_time:	the time when running the hrtimer queue in the softirq
  * @offset:		offset of this clock to the monotonic base
@@ -147,7 +146,6 @@ struct hrtimer_clock_base {
 	int			index;
 	clockid_t		clockid;
 	struct timerqueue_head	active;
-	ktime_t			resolution;
 	ktime_t			(*get_time)(void);
 	ktime_t			softirq_time;
 	ktime_t			offset;
@@ -295,11 +293,15 @@ extern void hrtimer_peek_ahead_timers(void);
 
 extern void clock_was_set_delayed(void);
 
+extern unsigned int hrtimer_resolution;
+
 #else
 
 # define MONOTONIC_RES_NSEC	LOW_RES_NSEC
 # define KTIME_MONOTONIC_RES	KTIME_LOW_RES
 
+#define hrtimer_resolution	LOW_RES_NSEC
+
 static inline void hrtimer_peek_ahead_timers(void) { }
 
 /*
-- 
cgit v1.2.3


From 056a3cacbc46e5aca27b350ce4ecb3b33ebb0700 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 14 Apr 2015 21:08:32 +0000
Subject: hrtimer: Get rid of hrtimer_get_res()

The resolution is directly accessible now. So its simpler just to fill
in the values of the timespec and be done with it.

Text size reduction (combined with "hrtimer: Get rid of the resolution
field in hrtimer_clock_base"):
       x8664 -61, i386 -221, ARM -60, power64 -48

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20150414203500.879888080@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index bc6f91b5443b..8025156c8fa1 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -385,7 +385,6 @@ static inline int hrtimer_restart(struct hrtimer *timer)
 
 /* Query timers: */
 extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
-extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
 
 extern ktime_t hrtimer_get_next_event(void);
 
-- 
cgit v1.2.3


From a6ffebce7f89f6f97cc22838a5d4383b15d6774f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 14 Apr 2015 21:08:34 +0000
Subject: hrtimer: Make the statistics fields smaller

No point in having usigned long for /proc/timer_list statistics. Make
them unsigned int.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20150414203500.959773467@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 8025156c8fa1..d39f2847754c 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -187,10 +187,10 @@ struct hrtimer_cpu_base {
 	int				in_hrtirq;
 	int				hres_active;
 	int				hang_detected;
-	unsigned long			nr_events;
-	unsigned long			nr_retries;
-	unsigned long			nr_hangs;
-	ktime_t				max_hang_time;
+	unsigned int			nr_events;
+	unsigned int			nr_retries;
+	unsigned int			nr_hangs;
+	unsigned int			max_hang_time;
 #endif
 	struct hrtimer_clock_base	clock_base[HRTIMER_MAX_CLOCK_BASES];
 };
-- 
cgit v1.2.3


From 21d6d52a1b7028e6a6840bd82e354aefa9a5e203 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 14 Apr 2015 21:08:35 +0000
Subject: hrtimer: Get rid of softirq time

The softirq time field in the clock bases is an optimization from the
early days of hrtimers. It provides a coarse "jiffies" like time
mostly for self rearming timers.

But that comes with a price:
    - Larger code size
    - Extra storage space
    - Duplicated functions with really small differences

The benefit of this is optimization is marginal for contemporary
systems.

Consolidate everything on the high resolution timer
implementation. This makes further optimizations possible.

Text size reduction:
       x8664 -95, i386 -356, ARM -148, ARM64 -40, power64 -16

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20150414203501.039977424@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 24 +++++-------------------
 1 file changed, 5 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index d39f2847754c..e292830b58f0 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -138,7 +138,6 @@ struct hrtimer_sleeper {
  * @clockid:		clock id for per_cpu support
  * @active:		red black tree root node for the active timers
  * @get_time:		function to retrieve the current time of the clock
- * @softirq_time:	the time when running the hrtimer queue in the softirq
  * @offset:		offset of this clock to the monotonic base
  */
 struct hrtimer_clock_base {
@@ -147,7 +146,6 @@ struct hrtimer_clock_base {
 	clockid_t		clockid;
 	struct timerqueue_head	active;
 	ktime_t			(*get_time)(void);
-	ktime_t			softirq_time;
 	ktime_t			offset;
 };
 
@@ -260,19 +258,16 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
 	return ktime_sub(timer->node.expires, timer->base->get_time());
 }
 
-#ifdef CONFIG_HIGH_RES_TIMERS
-struct clock_event_device;
-
-extern void hrtimer_interrupt(struct clock_event_device *dev);
-
-/*
- * In high resolution mode the time reference must be read accurate
- */
 static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer)
 {
 	return timer->base->get_time();
 }
 
+#ifdef CONFIG_HIGH_RES_TIMERS
+struct clock_event_device;
+
+extern void hrtimer_interrupt(struct clock_event_device *dev);
+
 static inline int hrtimer_is_hres_active(struct hrtimer *timer)
 {
 	return timer->base->cpu_base->hres_active;
@@ -304,15 +299,6 @@ extern unsigned int hrtimer_resolution;
 
 static inline void hrtimer_peek_ahead_timers(void) { }
 
-/*
- * In non high resolution mode the time reference is taken from
- * the base softirq time variable.
- */
-static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer)
-{
-	return timer->base->softirq_time;
-}
-
 static inline int hrtimer_is_hres_active(struct hrtimer *timer)
 {
 	return 0;
-- 
cgit v1.2.3


From 868a3e915f7f5eba8f8cb4f7da2276760807c51c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 14 Apr 2015 21:08:37 +0000
Subject: hrtimer: Make offset update smarter

On every tick/hrtimer interrupt we update the offset variables of the
clock bases. That's silly because these offsets change very seldom.

Add a sequence counter to the time keeping code which keeps track of
the offset updates (clock_was_set()). Have a sequence cache in the
hrtimer cpu bases to evaluate whether the offsets must be updated or
not. This allows us later to avoid pointless cacheline pollution.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/20150414203501.132820245@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
---
 include/linux/hrtimer.h             | 4 ++--
 include/linux/timekeeper_internal.h | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index e292830b58f0..5e04f8fc26f6 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -163,7 +163,7 @@ enum  hrtimer_base_type {
  *			and timers
  * @cpu:		cpu number
  * @active_bases:	Bitfield to mark bases with active timers
- * @clock_was_set:	Indicates that clock was set from irq context.
+ * @clock_was_set_seq:	Sequence counter of clock was set events
  * @expires_next:	absolute time of the next event which was scheduled
  *			via clock_set_next_event()
  * @in_hrtirq:		hrtimer_interrupt() is currently executing
@@ -179,7 +179,7 @@ struct hrtimer_cpu_base {
 	raw_spinlock_t			lock;
 	unsigned int			cpu;
 	unsigned int			active_bases;
-	unsigned int			clock_was_set;
+	unsigned int			clock_was_set_seq;
 #ifdef CONFIG_HIGH_RES_TIMERS
 	ktime_t				expires_next;
 	int				in_hrtirq;
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index fb86963859c7..6f8276ae579c 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -49,6 +49,7 @@ struct tk_read_base {
  * @offs_boot:		Offset clock monotonic -> clock boottime
  * @offs_tai:		Offset clock monotonic -> clock tai
  * @tai_offset:		The current UTC to TAI offset in seconds
+ * @clock_was_set_seq:	The sequence number of clock was set events
  * @raw_time:		Monotonic raw base time in timespec64 format
  * @cycle_interval:	Number of clock cycles in one NTP interval
  * @xtime_interval:	Number of clock shifted nano seconds in one NTP
@@ -85,6 +86,7 @@ struct timekeeper {
 	ktime_t			offs_boot;
 	ktime_t			offs_tai;
 	s32			tai_offset;
+	unsigned int		clock_was_set_seq;
 	struct timespec64	raw_time;
 
 	/* The following members are for timekeeping internal use */
-- 
cgit v1.2.3


From e19ffe8be2cd0a1f726b235443eba21e64f6be5e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 14 Apr 2015 21:08:39 +0000
Subject: hrtimer: Use bits for various boolean indicators

No point in wasting 12 byte storage space. Generates better code as well.

Text size reduction:
       x8664 -64, i386 -16, ARM -132, ARM64 -0, power64 -48

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20150414203501.227955358@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 5e04f8fc26f6..17a59ddcc79a 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -181,10 +181,10 @@ struct hrtimer_cpu_base {
 	unsigned int			active_bases;
 	unsigned int			clock_was_set_seq;
 #ifdef CONFIG_HIGH_RES_TIMERS
+	unsigned int			in_hrtirq	: 1,
+					hres_active	: 1,
+					hang_detected	: 1;
 	ktime_t				expires_next;
-	int				in_hrtirq;
-	int				hres_active;
-	int				hang_detected;
 	unsigned int			nr_events;
 	unsigned int			nr_retries;
 	unsigned int			nr_hangs;
-- 
cgit v1.2.3


From 6d9a1411393d51f17bee3fe163430b21b2cb2de9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 14 Apr 2015 21:08:42 +0000
Subject: hrtimer: Cache line align the hrtimer cpu base

We really want that data structure to start at a cache line boundary.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20150414203501.417597627@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 17a59ddcc79a..0853f52f8ffb 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -191,7 +191,7 @@ struct hrtimer_cpu_base {
 	unsigned int			max_hang_time;
 #endif
 	struct hrtimer_clock_base	clock_base[HRTIMER_MAX_CLOCK_BASES];
-};
+} ____cacheline_aligned;
 
 static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
 {
-- 
cgit v1.2.3


From b8e38413ac2c33c497e72895fcd5da709fd1b908 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 14 Apr 2015 21:08:44 +0000
Subject: hrtimer: Align the hrtimer clock bases as well

We don't use cacheline_align here because that might waste lot of
space on 32bit machine with 64 bytes cachelines and on 64bit machines
with 128 bytes cachelines.

The size of struct hrtimer_clock_base is 64byte on 64bit and 32byte on
32bit machines. So we utilize the cache lines proper.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20150414203501.498165771@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 0853f52f8ffb..e5c22d611850 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -130,6 +130,12 @@ struct hrtimer_sleeper {
 	struct task_struct *task;
 };
 
+#ifdef CONFIG_64BIT
+# define HRTIMER_CLOCK_BASE_ALIGN	64
+#else
+# define HRTIMER_CLOCK_BASE_ALIGN	32
+#endif
+
 /**
  * struct hrtimer_clock_base - the timer base for a specific clock
  * @cpu_base:		per cpu clock base
@@ -147,7 +153,7 @@ struct hrtimer_clock_base {
 	struct timerqueue_head	active;
 	ktime_t			(*get_time)(void);
 	ktime_t			offset;
-};
+} __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN)));
 
 enum  hrtimer_base_type {
 	HRTIMER_BASE_MONOTONIC,
@@ -195,6 +201,8 @@ struct hrtimer_cpu_base {
 
 static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
 {
+	BUILD_BUG_ON(sizeof(struct hrtimer_clock_base) > HRTIMER_CLOCK_BASE_ALIGN);
+
 	timer->node.expires = time;
 	timer->_softexpires = time;
 }
-- 
cgit v1.2.3


From c320642e1ced3b81592610e374894fea995f475b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 14 Apr 2015 21:08:46 +0000
Subject: timerqueue: Let timerqueue_add/del return information

The hrtimer code is interested whether the added timer is the first
one to expire and whether the removed timer was the last one in the
tree. The add/del routines have that information already. So we can
return it right away instead of reevaluating it at the call site.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/20150414203501.579063647@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timerqueue.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h
index a520fd70a59f..7eec17ad7fa1 100644
--- a/include/linux/timerqueue.h
+++ b/include/linux/timerqueue.h
@@ -16,10 +16,10 @@ struct timerqueue_head {
 };
 
 
-extern void timerqueue_add(struct timerqueue_head *head,
-				struct timerqueue_node *node);
-extern void timerqueue_del(struct timerqueue_head *head,
-				struct timerqueue_node *node);
+extern bool timerqueue_add(struct timerqueue_head *head,
+			   struct timerqueue_node *node);
+extern bool timerqueue_del(struct timerqueue_head *head,
+			   struct timerqueue_node *node);
 extern struct timerqueue_node *timerqueue_iterate_next(
 						struct timerqueue_node *node);
 
-- 
cgit v1.2.3


From 895bdfa793f6e912d1a58fc445b3dd4d686f7bd3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 14 Apr 2015 21:08:49 +0000
Subject: hrtimer: Keep pointer to first timer and simplify __remove_hrtimer()

__remove_hrtimer() needs to evaluate the expiry time to figure out
whether the timer which is removed is eventually the first expiring
timer on the cpu. Keep a pointer to it, which is lazily updated, so we
can avoid the evaluation dance and retrieve the information from there.

Generates slightly better code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20150414203501.752838019@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index e5c22d611850..d194c1dacdaa 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -172,6 +172,7 @@ enum  hrtimer_base_type {
  * @clock_was_set_seq:	Sequence counter of clock was set events
  * @expires_next:	absolute time of the next event which was scheduled
  *			via clock_set_next_event()
+ * @next_timer:		Pointer to the first expiring timer
  * @in_hrtirq:		hrtimer_interrupt() is currently executing
  * @hres_active:	State of high resolution mode
  * @hang_detected:	The last hrtimer interrupt detected a hang
@@ -180,6 +181,10 @@ enum  hrtimer_base_type {
  * @nr_hangs:		Total number of hrtimer interrupt hangs
  * @max_hang_time:	Maximum time spent in hrtimer_interrupt
  * @clock_base:		array of clock bases for this cpu
+ *
+ * Note: next_timer is just an optimization for __remove_hrtimer().
+ *	 Do not dereference the pointer because it is not reliable on
+ *	 cross cpu removals.
  */
 struct hrtimer_cpu_base {
 	raw_spinlock_t			lock;
@@ -191,6 +196,7 @@ struct hrtimer_cpu_base {
 					hres_active	: 1,
 					hang_detected	: 1;
 	ktime_t				expires_next;
+	struct hrtimer			*next_timer;
 	unsigned int			nr_events;
 	unsigned int			nr_retries;
 	unsigned int			nr_hangs;
-- 
cgit v1.2.3


From c6eb3f70d4482806dc2d3e1e3c7736f497b1d418 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 14 Apr 2015 21:08:51 +0000
Subject: hrtimer: Get rid of hrtimer softirq

hrtimer softirq is a leftover from the initial implementation and
serves only the purpose to handle the enqueueing of already expired
timers in the high resolution timer mode. We discussed whether we
change the return value and force all start sites to handle that the
timer is already expired, but that would be a Herculean task and I'm
not sure whether its a good idea to enforce that handling on
everyone.

A simpler solution is to enforce a timer interrupt instead of raising
and scheduling a softirq. Just use the existing infrastructure to do
so and remove all the softirq leftovers.

The HRTIMER softirq enum is now unused, but kept around because trace
parsers rely on the existing numbering.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20150414203501.840834708@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h   | 1 -
 include/linux/interrupt.h | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index d194c1dacdaa..048270a27bc5 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -459,7 +459,6 @@ extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode);
 
 /* Soft interrupt function to run the hrtimer queues: */
 extern void hrtimer_run_queues(void);
-extern void hrtimer_run_pending(void);
 
 /* Bootup initialization: */
 extern void __init hrtimers_init(void);
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 950ae4501826..6bf15a66bce7 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -413,7 +413,8 @@ enum
 	BLOCK_IOPOLL_SOFTIRQ,
 	TASKLET_SOFTIRQ,
 	SCHED_SOFTIRQ,
-	HRTIMER_SOFTIRQ,
+	HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the
+			    numbering. Sigh! */
 	RCU_SOFTIRQ,    /* Preferable RCU should always be the last softirq */
 
 	NR_SOFTIRQS
-- 
cgit v1.2.3


From c1ad348b452aacd784fb97403d03d71723c72ee1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 14 Apr 2015 21:08:58 +0000
Subject: tick: Nohz: Rework next timer evaluation

The evaluation of the next timer in the nohz code is based on jiffies
while all the tick internals are nano seconds based. We have also to
convert hrtimer nanoseconds to jiffies in the !highres case. That's
just wrong and introduces interesting corner cases.

Turn it around and convert the next timer wheel timer expiry and the
rcu event to clock monotonic and base all calculations on
nanoseconds. That identifies the case where no timer is pending
clearly with an absolute expiry value of KTIME_MAX.

Makes the code more readable and gets rid of the jiffies magic in the
nohz code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Link: http://lkml.kernel.org/r/20150414203502.184198593@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h  | 2 +-
 include/linux/rcupdate.h | 6 ++++--
 include/linux/rcutree.h  | 2 +-
 include/linux/timer.h    | 7 -------
 4 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 048270a27bc5..2c68f71ffd24 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -386,7 +386,7 @@ static inline int hrtimer_restart(struct hrtimer *timer)
 /* Query timers: */
 extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
 
-extern ktime_t hrtimer_get_next_event(void);
+extern u64 hrtimer_get_next_event(void);
 
 /*
  * A timer is active, when it is enqueued into the rbtree or the
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 573a5afd5ed8..0627a447c589 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -44,6 +44,8 @@
 #include <linux/debugobjects.h>
 #include <linux/bug.h>
 #include <linux/compiler.h>
+#include <linux/ktime.h>
+
 #include <asm/barrier.h>
 
 extern int rcu_expedited; /* for sysctl */
@@ -1154,9 +1156,9 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 	__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
 
 #if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL)
-static inline int rcu_needs_cpu(unsigned long *delta_jiffies)
+static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
 {
-	*delta_jiffies = ULONG_MAX;
+	*nextevt = KTIME_MAX;
 	return 0;
 }
 #endif /* #if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL) */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index d2e583a6aaca..db2e31beaae7 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -32,7 +32,7 @@
 
 void rcu_note_context_switch(void);
 #ifndef CONFIG_RCU_NOCB_CPU_ALL
-int rcu_needs_cpu(unsigned long *delta_jiffies);
+int rcu_needs_cpu(u64 basem, u64 *nextevt);
 #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 void rcu_cpu_stall_reset(void);
 
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 8c5a197e1587..fbb80e0030bf 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -187,13 +187,6 @@ extern void set_timer_slack(struct timer_list *time, int slack_hz);
  */
 #define NEXT_TIMER_MAX_DELTA	((1UL << 30) - 1)
 
-/*
- * Return when the next timer-wheel timeout occurs (in absolute jiffies),
- * locks the timer base and does the comparison against the given
- * jiffie.
- */
-extern unsigned long get_next_timer_interrupt(unsigned long now);
-
 /*
  * Timer-statistics info:
  */
-- 
cgit v1.2.3


From 58f1f803f1d6ef9ab280de13246d65970a09cb95 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 14 Apr 2015 21:09:08 +0000
Subject: hrtimer: Get rid of __hrtimer_start_range_ns()

No more callers. Remove the leftovers.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20150414203502.707871492@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 2c68f71ffd24..a80baa86bb24 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -359,10 +359,6 @@ extern int hrtimer_start(struct hrtimer *timer, ktime_t tim,
 			 const enum hrtimer_mode mode);
 extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
 			unsigned long range_ns, const enum hrtimer_mode mode);
-extern int
-__hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
-			 unsigned long delta_ns,
-			 const enum hrtimer_mode mode, int wakeup);
 
 extern int hrtimer_cancel(struct hrtimer *timer);
 extern int hrtimer_try_to_cancel(struct hrtimer *timer);
-- 
cgit v1.2.3


From 02a171af1a46966dcdb5b38cdc33e4f43e92c778 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 14 Apr 2015 21:09:10 +0000
Subject: hrtimer: Make hrtimer_start() a inline wrapper

No point for an extra export just to set the extra argument of
hrtimer_start_range_ns() to 0.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20150414203502.808544539@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index a80baa86bb24..42074ab3d5c3 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -355,11 +355,26 @@ static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { }
 #endif
 
 /* Basic timer operations: */
-extern int hrtimer_start(struct hrtimer *timer, ktime_t tim,
-			 const enum hrtimer_mode mode);
 extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
 			unsigned long range_ns, const enum hrtimer_mode mode);
 
+/**
+ * hrtimer_start - (re)start an hrtimer on the current CPU
+ * @timer:	the timer to be added
+ * @tim:	expiry time
+ * @mode:	expiry mode: absolute (HRTIMER_MODE_ABS) or
+ *		relative (HRTIMER_MODE_REL)
+ *
+ * Returns:
+ *  0 on success
+ *  1 when the timer was active
+ */
+static inline int hrtimer_start(struct hrtimer *timer, ktime_t tim,
+				const enum hrtimer_mode mode)
+{
+	return hrtimer_start_range_ns(timer, tim, 0, mode);
+}
+
 extern int hrtimer_cancel(struct hrtimer *timer);
 extern int hrtimer_try_to_cancel(struct hrtimer *timer);
 
-- 
cgit v1.2.3


From b193217e6dc3f88b599b573b53e0e0f6671d969a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 14 Apr 2015 21:09:18 +0000
Subject: alarmtimer: Get rid of unused return value

We want to get rid of the hrtimer_start() return value and the alarm
timer return value is nowhere used. Remove it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/20150414203503.243910615@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/alarmtimer.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h
index a899402a5a0e..52f3b7da4f2d 100644
--- a/include/linux/alarmtimer.h
+++ b/include/linux/alarmtimer.h
@@ -43,8 +43,8 @@ struct alarm {
 
 void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
 		enum alarmtimer_restart (*function)(struct alarm *, ktime_t));
-int alarm_start(struct alarm *alarm, ktime_t start);
-int alarm_start_relative(struct alarm *alarm, ktime_t start);
+void alarm_start(struct alarm *alarm, ktime_t start);
+void alarm_start_relative(struct alarm *alarm, ktime_t start);
 void alarm_restart(struct alarm *alarm);
 int alarm_try_to_cancel(struct alarm *alarm);
 int alarm_cancel(struct alarm *alarm);
-- 
cgit v1.2.3


From 61699e13072a89880aa584dcc64c6da465fb2ccc Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 14 Apr 2015 21:09:23 +0000
Subject: hrtimer: Remove hrtimer_start() return value

No user was ever interested whether the timer was active or not when
it was started. All abusers of the return value are gone, so get rid
of it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20150414203503.483556394@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h   | 22 +++++++++-------------
 include/linux/interrupt.h |  6 +++---
 2 files changed, 12 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 42074ab3d5c3..470d876c2eda 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -355,7 +355,7 @@ static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { }
 #endif
 
 /* Basic timer operations: */
-extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
+extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
 			unsigned long range_ns, const enum hrtimer_mode mode);
 
 /**
@@ -364,34 +364,30 @@ extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
  * @tim:	expiry time
  * @mode:	expiry mode: absolute (HRTIMER_MODE_ABS) or
  *		relative (HRTIMER_MODE_REL)
- *
- * Returns:
- *  0 on success
- *  1 when the timer was active
  */
-static inline int hrtimer_start(struct hrtimer *timer, ktime_t tim,
-				const enum hrtimer_mode mode)
+static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim,
+				 const enum hrtimer_mode mode)
 {
-	return hrtimer_start_range_ns(timer, tim, 0, mode);
+	hrtimer_start_range_ns(timer, tim, 0, mode);
 }
 
 extern int hrtimer_cancel(struct hrtimer *timer);
 extern int hrtimer_try_to_cancel(struct hrtimer *timer);
 
-static inline int hrtimer_start_expires(struct hrtimer *timer,
-						enum hrtimer_mode mode)
+static inline void hrtimer_start_expires(struct hrtimer *timer,
+					 enum hrtimer_mode mode)
 {
 	unsigned long delta;
 	ktime_t soft, hard;
 	soft = hrtimer_get_softexpires(timer);
 	hard = hrtimer_get_expires(timer);
 	delta = ktime_to_ns(ktime_sub(hard, soft));
-	return hrtimer_start_range_ns(timer, soft, delta, mode);
+	hrtimer_start_range_ns(timer, soft, delta, mode);
 }
 
-static inline int hrtimer_restart(struct hrtimer *timer)
+static inline void hrtimer_restart(struct hrtimer *timer)
 {
-	return hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
+	hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
 }
 
 /* Query timers: */
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 6bf15a66bce7..be7e75c945e9 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -593,10 +593,10 @@ tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
 		     clockid_t which_clock, enum hrtimer_mode mode);
 
 static inline
-int tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time,
-			  const enum hrtimer_mode mode)
+void tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time,
+			   const enum hrtimer_mode mode)
 {
-	return hrtimer_start(&ttimer->timer, time, mode);
+	hrtimer_start(&ttimer->timer, time, mode);
 }
 
 static inline
-- 
cgit v1.2.3


From 59afdc7b32143528524455039e7557a46b60e4c8 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 22 Apr 2015 11:28:46 +0800
Subject: crypto: api - Move module sig ifdef into accessor function

Currently we're hiding mod->sig_ok under an ifdef in open code.
This patch adds a module_sig_ok accessor function and removes that
ifdef.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index c883b86ea964..1e5436042eb0 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -655,4 +655,16 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr,
 static inline void module_bug_cleanup(struct module *mod) {}
 #endif	/* CONFIG_GENERIC_BUG */
 
+#ifdef CONFIG_MODULE_SIG
+static inline bool module_sig_ok(struct module *module)
+{
+	return module->sig_ok;
+}
+#else	/* !CONFIG_MODULE_SIG */
+static inline bool module_sig_ok(struct module *module)
+{
+	return true;
+}
+#endif	/* CONFIG_MODULE_SIG */
+
 #endif /* _LINUX_MODULE_H */
-- 
cgit v1.2.3


From af87baedf2c23b1181f51323339210a26a64f7fc Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Mon, 13 Apr 2015 14:11:28 +0800
Subject: x86/htirq: Use new irqdomain interfaces to allocate/free IRQ

Use new irqdomain interfaces to allocate/free IRQ for HTIRQ, so we can
remove GENERIC_IRQ_LEGACY_ALLOC_HWIRQ later.

This patch changes the interfaces between arch independent PCI driver
and arch specific code. Currently HT_IRQ is only enabled on x86, so it
does not affect other architectures.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: David Cohen <david.a.cohen@linux.intel.com>
Cc: Sander Eikelenboom <linux@eikelenboom.it>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Link: http://lkml.kernel.org/r/1428905519-23704-7-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/htirq.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/htirq.h b/include/linux/htirq.h
index 70a1dbbf2093..5caa51b7b95c 100644
--- a/include/linux/htirq.h
+++ b/include/linux/htirq.h
@@ -15,6 +15,8 @@ void unmask_ht_irq(struct irq_data *data);
 
 /* The arch hook for getting things started */
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev);
+int arch_alloc_ht_irq(struct pci_dev *dev);
+void arch_free_ht_irq(int irq);
 
 /* For drivers of buggy hardware */
 typedef void (ht_irq_update_t)(struct pci_dev *dev, int irq,
-- 
cgit v1.2.3


From b106ee63abccbba5f5a52d6e43168a6a30c6d98a Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Mon, 13 Apr 2015 14:11:32 +0800
Subject: irq_remapping/vt-d: Enhance Intel IR driver to support hierarchical
 irqdomains

Enhance Intel interrupt remapping driver to support hierarchical
irqdomains. Implement intel_ir_chip to support stacked irq_chip.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Acked-by: Joerg Roedel <jroedel@suse.de>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: David Cohen <david.a.cohen@linux.intel.com>
Cc: Sander Eikelenboom <linux@eikelenboom.it>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: iommu@lists.linux-foundation.org
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Link: http://lkml.kernel.org/r/1428905519-23704-11-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/intel-iommu.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index a65208a8fe18..ecaf3a937845 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -286,6 +286,8 @@ struct q_inval {
 
 #define INTR_REMAP_TABLE_ENTRIES	65536
 
+struct irq_domain;
+
 struct ir_table {
 	struct irte *base;
 	unsigned long *bitmap;
@@ -335,6 +337,8 @@ struct intel_iommu {
 
 #ifdef CONFIG_IRQ_REMAP
 	struct ir_table *ir_table;	/* Interrupt remapping info */
+	struct irq_domain *ir_domain;
+	struct irq_domain *ir_msi_domain;
 #endif
 	struct device	*iommu_dev; /* IOMMU-sysfs device */
 	int		node;
-- 
cgit v1.2.3


From 34742db8eaf9ff364034f214ee5827701e131d4b Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Mon, 13 Apr 2015 14:11:41 +0800
Subject: iommu/vt-d: Refine the interfaces to create IRQ for DMAR unit

Refine the interfaces to create IRQ for DMAR unit. It's a preparation
for converting DMAR IRQ to hierarchical irqdomain on x86.

It also moves dmar_alloc_hwirq()/dmar_free_hwirq() from irq_remapping.h
to dmar.h. They are not irq_remapping specific.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: David Cohen <david.a.cohen@linux.intel.com>
Cc: Sander Eikelenboom <linux@eikelenboom.it>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: iommu@lists.linux-foundation.org
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Joerg Roedel <joro@8bytes.org>
Link: http://lkml.kernel.org/r/1428905519-23704-20-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/dmar.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 30624954dec5..84737565c1fd 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -227,6 +227,7 @@ extern void dmar_msi_read(int irq, struct msi_msg *msg);
 extern void dmar_msi_write(int irq, struct msi_msg *msg);
 extern int dmar_set_interrupt(struct intel_iommu *iommu);
 extern irqreturn_t dmar_fault(int irq, void *dev_id);
-extern int arch_setup_dmar_msi(unsigned int irq);
+extern int dmar_alloc_hwirq(int id, int node, void *arg);
+extern void dmar_free_hwirq(int irq);
 
 #endif /* __DMAR_H__ */
-- 
cgit v1.2.3


From 49e07d8f28c05347f237146a9ec66f6d958db83e Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Mon, 13 Apr 2015 14:11:43 +0800
Subject: x86/htirq: Use hierarchical irqdomain to manage Hypertransport
 interrupts

We have slightly changed the architecture interfaces to support htirq
PCI driver. It's safe because currently Hypertransport interrupt is
only enabled on x86 platforms.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: David Cohen <david.a.cohen@linux.intel.com>
Cc: Sander Eikelenboom <linux@eikelenboom.it>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Link: http://lkml.kernel.org/r/1428905519-23704-22-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/htirq.h | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/htirq.h b/include/linux/htirq.h
index 5caa51b7b95c..d4a527e58434 100644
--- a/include/linux/htirq.h
+++ b/include/linux/htirq.h
@@ -1,26 +1,38 @@
 #ifndef LINUX_HTIRQ_H
 #define LINUX_HTIRQ_H
 
+struct pci_dev;
+struct irq_data;
+
 struct ht_irq_msg {
 	u32	address_lo;	/* low 32 bits of the ht irq message */
 	u32	address_hi;	/* high 32 bits of the it irq message */
 };
 
+typedef void (ht_irq_update_t)(struct pci_dev *dev, int irq,
+			       struct ht_irq_msg *msg);
+
+struct ht_irq_cfg {
+	struct pci_dev *dev;
+	 /* Update callback used to cope with buggy hardware */
+	ht_irq_update_t *update;
+	unsigned pos;
+	unsigned idx;
+	struct ht_irq_msg msg;
+};
+
 /* Helper functions.. */
 void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg);
 void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg);
-struct irq_data;
 void mask_ht_irq(struct irq_data *data);
 void unmask_ht_irq(struct irq_data *data);
 
 /* The arch hook for getting things started */
-int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev);
-int arch_alloc_ht_irq(struct pci_dev *dev);
-void arch_free_ht_irq(int irq);
+int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev,
+		      ht_irq_update_t *update);
+void arch_teardown_ht_irq(unsigned int irq);
 
 /* For drivers of buggy hardware */
-typedef void (ht_irq_update_t)(struct pci_dev *dev, int irq,
-			       struct ht_irq_msg *msg);
 int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update);
 
 #endif /* LINUX_HTIRQ_H */
-- 
cgit v1.2.3


From 591fc116f3302da915bb57d4474a61a5e8884cec Mon Sep 17 00:00:00 2001
From: "Ivan T. Ivanov" <ivan.ivanov@linaro.org>
Date: Thu, 9 Apr 2015 11:34:22 +0300
Subject: usb: phy: msm: Use extcon framework for VBUS and ID detection

On recent Qualcomm platforms VBUS and ID lines are not routed to
USB PHY LINK controller. Use extcon framework to receive connect
and disconnect ID and VBUS notification.

Signed-off-by: Ivan T. Ivanov <ivan.ivanov@linaro.org>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/msm_hsusb.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h
index 7dbecf9a4656..c4d956e50d09 100644
--- a/include/linux/usb/msm_hsusb.h
+++ b/include/linux/usb/msm_hsusb.h
@@ -18,6 +18,7 @@
 #ifndef __ASM_ARCH_MSM_HSUSB_H
 #define __ASM_ARCH_MSM_HSUSB_H
 
+#include <linux/extcon.h>
 #include <linux/types.h>
 #include <linux/usb/otg.h>
 #include <linux/clk.h>
@@ -119,6 +120,17 @@ struct msm_otg_platform_data {
 	void (*setup_gpio)(enum usb_otg_state state);
 };
 
+/**
+ * struct msm_usb_cable - structure for exteternal connector cable
+ *			  state tracking
+ * @nb: hold event notification callback
+ * @conn: used for notification registration
+ */
+struct msm_usb_cable {
+	struct notifier_block		nb;
+	struct extcon_specific_cable_nb conn;
+};
+
 /**
  * struct msm_otg: OTG driver data. Shared by HCD and DCD.
  * @otg: USB OTG Transceiver structure.
@@ -138,6 +150,8 @@ struct msm_otg_platform_data {
  * @chg_type: The type of charger attached.
  * @dcd_retires: The retry count used to track Data contact
  *               detection process.
+ * @vbus: VBUS signal state trakining, using extcon framework
+ * @id: ID signal state trakining, using extcon framework
  */
 struct msm_otg {
 	struct usb_phy phy;
@@ -166,6 +180,9 @@ struct msm_otg {
 	struct reset_control *phy_rst;
 	struct reset_control *link_rst;
 	int vdd_levels[3];
+
+	struct msm_usb_cable vbus;
+	struct msm_usb_cable id;
 };
 
 #endif
-- 
cgit v1.2.3


From 44e42ae3a398b559c768b9b3c324d72b0b0b4479 Mon Sep 17 00:00:00 2001
From: "Ivan T. Ivanov" <ivan.ivanov@linaro.org>
Date: Thu, 9 Apr 2015 11:34:33 +0300
Subject: usb: phy: msm: Manual PHY and LINK controller VBUS change
 notification

VBUS is not routed to USB PHY on recent Qualcomm platforms. USB controller
must see VBUS in order to pull-up DP when setting RS bit. Henc configure
USB PHY and LINK registers sense VBUS and enable manual pullup on D+ line.

Cc: Vamsi Krishna <vskrishn@codeaurora.org>
Cc: Mayank Rana <mrana@codeaurora.org>
Signed-off-by: Ivan T. Ivanov <ivan.ivanov@linaro.org>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/msm_hsusb.h    | 5 +++++
 include/linux/usb/msm_hsusb_hw.h | 9 +++++++++
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h
index c4d956e50d09..e55a1504266e 100644
--- a/include/linux/usb/msm_hsusb.h
+++ b/include/linux/usb/msm_hsusb.h
@@ -150,6 +150,9 @@ struct msm_usb_cable {
  * @chg_type: The type of charger attached.
  * @dcd_retires: The retry count used to track Data contact
  *               detection process.
+ * @manual_pullup: true if VBUS is not routed to USB controller/phy
+ *	and controller driver therefore enables pull-up explicitly before
+ *	starting controller using usbcmd run/stop bit.
  * @vbus: VBUS signal state trakining, using extcon framework
  * @id: ID signal state trakining, using extcon framework
  */
@@ -181,6 +184,8 @@ struct msm_otg {
 	struct reset_control *link_rst;
 	int vdd_levels[3];
 
+	bool manual_pullup;
+
 	struct msm_usb_cable vbus;
 	struct msm_usb_cable id;
 };
diff --git a/include/linux/usb/msm_hsusb_hw.h b/include/linux/usb/msm_hsusb_hw.h
index a29f6030afb1..e159b39f67a2 100644
--- a/include/linux/usb/msm_hsusb_hw.h
+++ b/include/linux/usb/msm_hsusb_hw.h
@@ -21,6 +21,8 @@
 
 #define USB_AHBBURST         (MSM_USB_BASE + 0x0090)
 #define USB_AHBMODE          (MSM_USB_BASE + 0x0098)
+#define USB_GENCONFIG_2      (MSM_USB_BASE + 0x00a0)
+
 #define USB_CAPLENGTH        (MSM_USB_BASE + 0x0100) /* 8 bit */
 
 #define USB_USBCMD           (MSM_USB_BASE + 0x0140)
@@ -30,6 +32,9 @@
 #define USB_PHY_CTRL         (MSM_USB_BASE + 0x0240)
 #define USB_PHY_CTRL2        (MSM_USB_BASE + 0x0278)
 
+#define GENCONFIG_2_SESS_VLD_CTRL_EN	BIT(7)
+#define USBCMD_SESS_VLD_CTRL		BIT(25)
+
 #define USBCMD_RESET   2
 #define USB_USBINTR          (MSM_USB_BASE + 0x0148)
 
@@ -50,6 +55,10 @@
 #define ULPI_PWR_CLK_MNG_REG	0x88
 #define OTG_COMP_DISABLE	BIT(0)
 
+#define ULPI_MISC_A			0x96
+#define ULPI_MISC_A_VBUSVLDEXTSEL	BIT(1)
+#define ULPI_MISC_A_VBUSVLDEXT		BIT(0)
+
 #define ASYNC_INTR_CTRL         (1 << 29) /* Enable async interrupt */
 #define ULPI_STP_CTRL           (1 << 30) /* Block communication with PHY */
 #define PHY_RETEN               (1 << 1) /* PHY retention enable/disable */
-- 
cgit v1.2.3


From b189a2117223edbe40e0a187ae5c606cbdd6447c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 28 Apr 2015 14:04:07 +0200
Subject: usb: phy: Remove the phy-rcar-gen2-usb driver

The phy-rcar-gen2-usb driver, which supports legacy platform data only,
is no longer used since commit a483dcbfa21f919c ("ARM: shmobile: lager:
Remove legacy board support").

This driver was superseded by the DT-only phy-rcar-gen2 driver, which
was introduced in commit 1233f59f745b237d ("phy: Renesas R-Car Gen2 PHY
driver").

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/platform_data/usb-rcar-gen2-phy.h | 22 ----------------------
 1 file changed, 22 deletions(-)
 delete mode 100644 include/linux/platform_data/usb-rcar-gen2-phy.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/usb-rcar-gen2-phy.h b/include/linux/platform_data/usb-rcar-gen2-phy.h
deleted file mode 100644
index dd3ba46c0d90..000000000000
--- a/include/linux/platform_data/usb-rcar-gen2-phy.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2013 Renesas Solutions Corp.
- * Copyright (C) 2013 Cogent Embedded, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __USB_RCAR_GEN2_PHY_H
-#define __USB_RCAR_GEN2_PHY_H
-
-#include <linux/types.h>
-
-struct rcar_gen2_phy_platform_data {
-	/* USB channel 0 configuration */
-	bool chan0_pci:1;	/* true: PCI USB host 0, false: USBHS */
-	/* USB channel 2 configuration */
-	bool chan2_pci:1;	/* true: PCI USB host 2, false: USBSS */
-};
-
-#endif
-- 
cgit v1.2.3


From 042f7df15a4fff8eec42873f755aea848dcdedd1 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 30 Apr 2015 17:16:12 +0800
Subject: workqueue: Allow modifying low level unbound workqueue cpumask

Allow to modify the low-level unbound workqueues cpumask through
sysfs. This is performed by traversing the entire workqueue list
and calling apply_wqattrs_prepare() on the unbound workqueues
with the new low level mask. Only after all the preparation are done,
we commit them all together.

Ordered workqueues are ignored from the low level unbound workqueue
cpumask, it will be handled in near future.

All the (default & per-node) pwqs are mandatorily controlled by
the low level cpumask. If the user configured cpumask doesn't overlap
with the low level cpumask, the low level cpumask will be used for the
wq instead.

The comment of wq_calc_node_cpumask() is updated and explicitly
requires that its first argument should be the attrs of the default
pwq.

The default wq_unbound_cpumask is cpu_possible_mask.  The workqueue
subsystem doesn't know its best default value, let the system manager
or the other subsystem set it when needed.

Changed from V8:
  merge the calculating code for the attrs of the default pwq together.
  minor change the code&comments for saving the user configured attrs.
  remove unnecessary list_del().
  minor update the comment of wq_calc_node_cpumask().
  update the comment of workqueue_set_unbound_cpumask();

Cc: Christoph Lameter <cl@linux.com>
Cc: Kevin Hilman <khilman@linaro.org>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Mike Galbraith <bitbucket@online.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Original-patch-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index deee212af8e0..4618dd672d1b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -424,6 +424,7 @@ struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask);
 void free_workqueue_attrs(struct workqueue_attrs *attrs);
 int apply_workqueue_attrs(struct workqueue_struct *wq,
 			  const struct workqueue_attrs *attrs);
+int workqueue_set_unbound_cpumask(cpumask_var_t cpumask);
 
 extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
 			struct work_struct *work);
-- 
cgit v1.2.3


From 0bb549052d33f8992544764a6cf1299d06ba7e2f Mon Sep 17 00:00:00 2001
From: Peter Jones <pjones@redhat.com>
Date: Tue, 28 Apr 2015 18:44:31 -0400
Subject: efi: Add esrt support

Add sysfs files for the EFI System Resource Table (ESRT) under
/sys/firmware/efi/esrt and for each EFI System Resource Entry under
entries/ as a subdir.

The EFI System Resource Table (ESRT) provides a read-only catalog of
system components for which the system accepts firmware upgrades via
UEFI's "Capsule Update" feature.  This module allows userland utilities
to evaluate what firmware updates can be applied to this system, and
potentially arrange for those updates to occur.

The ESRT is described as part of the UEFI specification, in version 2.5
which should be available from http://uefi.org/specifications in early
2015.  If you're a member of the UEFI Forum, information about its
addition to the standard is available as UEFI Mantis 1090.

For some hardware platforms, additional restrictions may be found at
http://msdn.microsoft.com/en-us/library/windows/hardware/jj128256.aspx ,
and additional documentation may be found at
http://download.microsoft.com/download/5/F/5/5F5D16CD-2530-4289-8019-94C6A20BED3C/windows-uefi-firmware-update-platform.docx
.

Signed-off-by: Peter Jones <pjones@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 include/linux/efi.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index af5be0368dec..024c27e7c0fa 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -583,6 +583,9 @@ void efi_native_runtime_setup(void);
 #define EFI_FILE_INFO_ID \
     EFI_GUID(  0x9576e92, 0x6d3f, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b )
 
+#define EFI_SYSTEM_RESOURCE_TABLE_GUID \
+    EFI_GUID(  0xb122a263, 0x3661, 0x4f68, 0x99, 0x29, 0x78, 0xf8, 0xb0, 0xd6, 0x21, 0x80 )
+
 #define EFI_FILE_SYSTEM_GUID \
     EFI_GUID(  0x964e5b22, 0x6459, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b )
 
@@ -823,6 +826,7 @@ extern struct efi {
 	unsigned long fw_vendor;	/* fw_vendor */
 	unsigned long runtime;		/* runtime table */
 	unsigned long config_table;	/* config tables */
+	unsigned long esrt;		/* ESRT table */
 	efi_get_time_t *get_time;
 	efi_set_time_t *set_time;
 	efi_get_wakeup_time_t *get_wakeup_time;
@@ -875,6 +879,7 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned lon
 #endif
 extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr);
 extern int efi_config_init(efi_config_table_type_t *arch_tables);
+extern void __init efi_esrt_init(void);
 extern int efi_config_parse_tables(void *config_tables, int count, int sz,
 				   efi_config_table_type_t *arch_tables);
 extern u64 efi_get_iobase (void);
@@ -882,12 +887,15 @@ extern u32 efi_mem_type (unsigned long phys_addr);
 extern u64 efi_mem_attributes (unsigned long phys_addr);
 extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size);
 extern int __init efi_uart_console_only (void);
+extern u64 efi_mem_desc_end(efi_memory_desc_t *md);
+extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md);
 extern void efi_initialize_iomem_resources(struct resource *code_resource,
 		struct resource *data_resource, struct resource *bss_resource);
 extern void efi_get_time(struct timespec *now);
 extern void efi_reserve_boot_services(void);
 extern int efi_get_fdt_params(struct efi_fdt_params *params, int verbose);
 extern struct efi_memory_map memmap;
+extern struct kobject *efi_kobj;
 
 extern int efi_reboot_quirk_mode;
 extern bool efi_poweroff_required(void);
-- 
cgit v1.2.3


From d54385ce68cd18ab002b46f61246ad197cec92de Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Thu, 30 Apr 2015 14:53:54 -0700
Subject: etherdev: Process is_multicast_ether_addr at same size as other
 operations

This change makes it so that we process the address in
is_multicast_ether_addr at the same size as the other calls.  This allows
us to avoid duplicate reads when used with other calls such as
is_zero_ether_addr or eth_addr_copy.  In addition I have added a 64 bit
version of the function so in eth_type_trans we can process the destination
address as a 64 bit value throughout.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 606563ef8a72..c4a10f991fe0 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -110,7 +110,29 @@ static inline bool is_zero_ether_addr(const u8 *addr)
  */
 static inline bool is_multicast_ether_addr(const u8 *addr)
 {
-	return 0x01 & addr[0];
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+	u32 a = *(const u32 *)addr;
+#else
+	u16 a = *(const u16 *)addr;
+#endif
+#ifdef __BIG_ENDIAN
+	return 0x01 & (a >> ((sizeof(a) * 8) - 8));
+#else
+	return 0x01 & a;
+#endif
+}
+
+static inline bool is_multicast_ether_addr_64bits(const u8 addr[6+2])
+{
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
+#ifdef __BIG_ENDIAN
+	return 0x01 & ((*(const u64 *)addr) >> 56);
+#else
+	return 0x01 & (*(const u64 *)addr);
+#endif
+#else
+	return is_multicast_ether_addr(addr);
+#endif
 }
 
 /**
-- 
cgit v1.2.3


From 50fb799289501c2eab9f43fc9af513027e1e994f Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Fri, 1 May 2015 11:30:12 -0700
Subject: net: Add skb_get_hash_perturb

This calls flow_disect and __skb_get_hash to procure a hash for a
packet. Input includes a key to initialize jhash. This function
does not set skb->hash.

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 66e374d62f64..acb83e249e3f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -927,6 +927,8 @@ static inline __u32 skb_get_hash(struct sk_buff *skb)
 	return skb->hash;
 }
 
+__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb);
+
 static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
 {
 	return skb->hash;
-- 
cgit v1.2.3


From 9afd85c9e4552b276e2f4cfefd622bdeeffbbf26 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Sat, 2 May 2015 14:01:07 +0200
Subject: net: Export IGMP/MLD message validation code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With this patch, the IGMP and MLD message validation functions are moved
from the bridge code to IPv4/IPv6 multicast files. Some small
refactoring was done to enhance readibility and to iron out some
differences in behaviour between the IGMP and MLD parsing code (e.g. the
skb-cloning of MLD messages is now only done if necessary, just like the
IGMP part always did).

Finally, these IGMP and MLD message validation functions are exported so
that not only the bridge can use it but batman-adv later, too.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h   | 1 +
 include/linux/skbuff.h | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 2c677afeea47..193ad488d3e2 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -130,5 +130,6 @@ extern void ip_mc_unmap(struct in_device *);
 extern void ip_mc_remap(struct in_device *);
 extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr);
 extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr);
+int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed);
 
 #endif
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index acb83e249e3f..9c2f793573fa 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3419,6 +3419,9 @@ static inline void skb_checksum_none_assert(const struct sk_buff *skb)
 bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
 
 int skb_checksum_setup(struct sk_buff *skb, bool recalculate);
+struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
+				     unsigned int transport_len,
+				     __sum16(*skb_chkf)(struct sk_buff *skb));
 
 u32 skb_get_poff(const struct sk_buff *skb);
 u32 __skb_get_poff(const struct sk_buff *skb, void *data,
-- 
cgit v1.2.3


From 6cd9e9f629f11b9412d4e9aa294c029dbb36b3cf Mon Sep 17 00:00:00 2001
From: Kapileshwar Singh <kapileshwar.singh@arm.com>
Date: Wed, 18 Feb 2015 16:04:21 +0000
Subject: thermal: of: fix cooling device weights in device tree

Currently you can specify the weight of the cooling device in the device
tree but that information is not populated to the
thermal_bind_params where the fair share governor expects it to
be.  The of thermal zone device doesn't have a thermal_bind_params
structure and arguably it's better to pass the weight inside the
thermal_instance as it is specific to the bind of a cooling device to a
thermal zone parameter.

Core thermal code is fixed to populate the weight in the instance from
the thermal_bind_params, so platform code that was passing the weight
inside the thermal_bind_params continue to work seamlessly.

While we are at it, create a default value for the weight parameter for
those thermal zones that currently don't define it and remove the
hardcoded default in of-thermal.

Cc: Zhang Rui <rui.zhang@intel.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Len Brown <lenb@kernel.org>
Cc: Peter Feuerer <peter@piie.net>
Cc: Darren Hart <dvhart@infradead.org>
Cc: Eduardo Valentin <edubezval@gmail.com>
Cc: Kukjin Kim <kgene@kernel.org>
Cc: Durgadoss R <durgadoss.r@intel.com>
Signed-off-by: Kapileshwar Singh <kapileshwar.singh@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 include/linux/thermal.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 5eac316490ea..00dacd4dfdce 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -40,6 +40,9 @@
 /* No upper/lower limit requirement */
 #define THERMAL_NO_LIMIT	((u32)~0)
 
+/* Default weight of a bound cooling device */
+#define THERMAL_WEIGHT_DEFAULT 0
+
 /* Unit conversion macros */
 #define KELVIN_TO_CELSIUS(t)	(long)(((long)t-2732 >= 0) ?	\
 				((long)t-2732+5)/10 : ((long)t-2732-5)/10)
@@ -323,7 +326,8 @@ void thermal_zone_device_unregister(struct thermal_zone_device *);
 
 int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int,
 				     struct thermal_cooling_device *,
-				     unsigned long, unsigned long);
+				     unsigned long, unsigned long,
+				     unsigned int);
 int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int,
 				       struct thermal_cooling_device *);
 void thermal_zone_device_update(struct thermal_zone_device *);
-- 
cgit v1.2.3


From bcdcbbc71125c37195f97314f453ca9a3a4eb758 Mon Sep 17 00:00:00 2001
From: Javi Merino <javi.merino@arm.com>
Date: Wed, 18 Feb 2015 16:04:25 +0000
Subject: thermal: fair_share: generalize the weight concept

The fair share governor has the concept of weights, which is the
influence of each cooling device in a thermal zone.  The current
implementation forces the weights of all cooling devices in a thermal
zone to add up to a 100.  This complicates setups, as you need to know
in advance how many cooling devices you are going to have.  If you bind a
new cooling device, you have to modify all the other cooling devices
weights, which is error prone.  Furthermore, you can't specify a
"default" weight for platforms since that default value depends on the
number of cooling devices in the platform.

This patch generalizes the concept of weight by allowing any number to
be a "weight".  Weights are now relative to each other.  Platforms that
don't specify weights get the same default value for all their cooling
devices, so all their cdevs are considered to be equally influential.

It's important to note that previous users of the weights don't need to
alter the code: percentages continue to work as they used to.  This
patch just removes the constraint of all the weights in a thermal zone
having to add up to a 100.  If they do, you get the same behavior as
before.  If they don't, fair share now works for that platform.

Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Eduardo Valentin <edubezval@gmail.com>
Cc: Durgadoss R <durgadoss.r@intel.com>
Acked-by: Durgadoss R <durgadoss.r@intel.com>
Signed-off-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 include/linux/thermal.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 00dacd4dfdce..bac0f52c7a1e 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -217,9 +217,12 @@ struct thermal_bind_params {
 
 	/*
 	 * This is a measure of 'how effectively these devices can
-	 * cool 'this' thermal zone. The shall be determined by platform
-	 * characterization. This is on a 'percentage' scale.
-	 * See Documentation/thermal/sysfs-api.txt for more information.
+	 * cool 'this' thermal zone. It shall be determined by
+	 * platform characterization. This value is relative to the
+	 * rest of the weights so a cooling device whose weight is
+	 * double that of another cooling device is twice as
+	 * effective. See Documentation/thermal/sysfs-api.txt for more
+	 * information.
 	 */
 	int weight;
 
-- 
cgit v1.2.3


From e33df1d2f3a0141cd79e770f31999ba0dd7ebfa8 Mon Sep 17 00:00:00 2001
From: Javi Merino <javi.merino@arm.com>
Date: Thu, 26 Feb 2015 19:00:27 +0000
Subject: thermal: let governors have private data for each thermal zone

A governor may need to store its current state between calls to
throttle().  That state depends on the thermal zone, so store it as
private data in struct thermal_zone_device.

The governors may have two new ops: bind_to_tz() and unbind_from_tz().
When provided, these functions let governors do some initialization
and teardown when they are bound/unbound to a tz and possibly store that
information in the governor_data field of the struct
thermal_zone_device.

Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 include/linux/thermal.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index bac0f52c7a1e..edf9d53c67e6 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -165,6 +165,7 @@ struct thermal_attr {
  * @ops:	operations this &thermal_zone_device supports
  * @tzp:	thermal zone parameters
  * @governor:	pointer to the governor for this thermal zone
+ * @governor_data:	private pointer for governor data
  * @thermal_instances:	list of &struct thermal_instance of this thermal zone
  * @idr:	&struct idr to generate unique id for this zone's cooling
  *		devices
@@ -191,6 +192,7 @@ struct thermal_zone_device {
 	struct thermal_zone_device_ops *ops;
 	const struct thermal_zone_params *tzp;
 	struct thermal_governor *governor;
+	void *governor_data;
 	struct list_head thermal_instances;
 	struct idr idr;
 	struct mutex lock;
@@ -201,12 +203,19 @@ struct thermal_zone_device {
 /**
  * struct thermal_governor - structure that holds thermal governor information
  * @name:	name of the governor
+ * @bind_to_tz: callback called when binding to a thermal zone.  If it
+ *		returns 0, the governor is bound to the thermal zone,
+ *		otherwise it fails.
+ * @unbind_from_tz:	callback called when a governor is unbound from a
+ *			thermal zone.
  * @throttle:	callback called for every trip point even if temperature is
  *		below the trip point temperature
  * @governor_list:	node in thermal_governor_list (in thermal_core.c)
  */
 struct thermal_governor {
 	char name[THERMAL_NAME_LENGTH];
+	int (*bind_to_tz)(struct thermal_zone_device *tz);
+	void (*unbind_from_tz)(struct thermal_zone_device *tz);
 	int (*throttle)(struct thermal_zone_device *tz, int trip);
 	struct list_head	governor_list;
 };
-- 
cgit v1.2.3


From 35b11d2e3a66279a477e36cefb2603806295b8ce Mon Sep 17 00:00:00 2001
From: Javi Merino <javi.merino@arm.com>
Date: Thu, 26 Feb 2015 19:00:28 +0000
Subject: thermal: extend the cooling device API to include power information

Add three optional callbacks to the cooling device interface to allow
them to express power.  In addition to the callbacks, add helpers to
identify cooling devices that implement the power cooling device API.

Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 include/linux/thermal.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index edf9d53c67e6..bf3c55f405c2 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -63,6 +63,7 @@
 
 struct thermal_zone_device;
 struct thermal_cooling_device;
+struct thermal_instance;
 
 enum thermal_device_mode {
 	THERMAL_DEVICE_DISABLED = 0,
@@ -116,6 +117,12 @@ struct thermal_cooling_device_ops {
 	int (*get_max_state) (struct thermal_cooling_device *, unsigned long *);
 	int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *);
 	int (*set_cur_state) (struct thermal_cooling_device *, unsigned long);
+	int (*get_requested_power)(struct thermal_cooling_device *,
+				   struct thermal_zone_device *, u32 *);
+	int (*state2power)(struct thermal_cooling_device *,
+			   struct thermal_zone_device *, unsigned long, u32 *);
+	int (*power2state)(struct thermal_cooling_device *,
+			   struct thermal_zone_device *, u32, unsigned long *);
 };
 
 struct thermal_cooling_device {
@@ -331,6 +338,16 @@ void thermal_zone_of_sensor_unregister(struct device *dev,
 #endif
 
 #if IS_ENABLED(CONFIG_THERMAL)
+static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev)
+{
+	return cdev->ops->get_requested_power && cdev->ops->state2power &&
+		cdev->ops->power2state;
+}
+
+int power_actor_get_max_power(struct thermal_cooling_device *,
+			      struct thermal_zone_device *tz, u32 *max_power);
+int power_actor_set_power(struct thermal_cooling_device *,
+			  struct thermal_instance *, u32);
 struct thermal_zone_device *thermal_zone_device_register(const char *, int, int,
 		void *, struct thermal_zone_device_ops *,
 		const struct thermal_zone_params *, int, int);
@@ -359,6 +376,14 @@ struct thermal_instance *get_thermal_instance(struct thermal_zone_device *,
 void thermal_cdev_update(struct thermal_cooling_device *);
 void thermal_notify_framework(struct thermal_zone_device *, int);
 #else
+static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev)
+{ return false; }
+static inline int power_actor_get_max_power(struct thermal_cooling_device *cdev,
+			      struct thermal_zone_device *tz, u32 *max_power)
+{ return 0; }
+static inline int power_actor_set_power(struct thermal_cooling_device *cdev,
+			  struct thermal_instance *tz, u32 power)
+{ return 0; }
 static inline struct thermal_zone_device *thermal_zone_device_register(
 	const char *type, int trips, int mask, void *devdata,
 	struct thermal_zone_device_ops *ops,
-- 
cgit v1.2.3


From c36cf07176316fbe6a4bdbc23afcb0cbf7822bf2 Mon Sep 17 00:00:00 2001
From: Javi Merino <javi.merino@arm.com>
Date: Thu, 26 Feb 2015 19:00:29 +0000
Subject: thermal: cpu_cooling: implement the power cooling device API

Add a basic power model to the cpu cooling device to implement the
power cooling device API.  The power model uses the current frequency,
current load and OPPs for the power calculations.  The cpus must have
registered their OPPs using the OPP library.

Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Kapileshwar Singh <kapileshwar.singh@arm.com>
Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Signed-off-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 include/linux/cpu_cooling.h | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h
index bd955270d5aa..c156f5082758 100644
--- a/include/linux/cpu_cooling.h
+++ b/include/linux/cpu_cooling.h
@@ -28,6 +28,9 @@
 #include <linux/thermal.h>
 #include <linux/cpumask.h>
 
+typedef int (*get_static_t)(cpumask_t *cpumask, int interval,
+			    unsigned long voltage, u32 *power);
+
 #ifdef CONFIG_CPU_THERMAL
 /**
  * cpufreq_cooling_register - function to create cpufreq cooling device.
@@ -36,6 +39,10 @@
 struct thermal_cooling_device *
 cpufreq_cooling_register(const struct cpumask *clip_cpus);
 
+struct thermal_cooling_device *
+cpufreq_power_cooling_register(const struct cpumask *clip_cpus,
+			       u32 capacitance, get_static_t plat_static_func);
+
 /**
  * of_cpufreq_cooling_register - create cpufreq cooling device based on DT.
  * @np: a valid struct device_node to the cooling device device tree node.
@@ -45,6 +52,12 @@ cpufreq_cooling_register(const struct cpumask *clip_cpus);
 struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
 			    const struct cpumask *clip_cpus);
+
+struct thermal_cooling_device *
+of_cpufreq_power_cooling_register(struct device_node *np,
+				  const struct cpumask *clip_cpus,
+				  u32 capacitance,
+				  get_static_t plat_static_func);
 #else
 static inline struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
@@ -52,6 +65,15 @@ of_cpufreq_cooling_register(struct device_node *np,
 {
 	return ERR_PTR(-ENOSYS);
 }
+
+static inline struct thermal_cooling_device *
+of_cpufreq_power_cooling_register(struct device_node *np,
+				  const struct cpumask *clip_cpus,
+				  u32 capacitance,
+				  get_static_t plat_static_func)
+{
+	return NULL;
+}
 #endif
 
 /**
@@ -67,12 +89,29 @@ cpufreq_cooling_register(const struct cpumask *clip_cpus)
 {
 	return ERR_PTR(-ENOSYS);
 }
+static inline struct thermal_cooling_device *
+cpufreq_power_cooling_register(const struct cpumask *clip_cpus,
+			       u32 capacitance, get_static_t plat_static_func)
+{
+	return NULL;
+}
+
 static inline struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
 			    const struct cpumask *clip_cpus)
 {
 	return ERR_PTR(-ENOSYS);
 }
+
+static inline struct thermal_cooling_device *
+of_cpufreq_power_cooling_register(struct device_node *np,
+				  const struct cpumask *clip_cpus,
+				  u32 capacitance,
+				  get_static_t plat_static_func)
+{
+	return NULL;
+}
+
 static inline
 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 {
-- 
cgit v1.2.3


From 6b775e870c56c59c3e16531ea2307b797395f9f7 Mon Sep 17 00:00:00 2001
From: Javi Merino <javi.merino@arm.com>
Date: Mon, 2 Mar 2015 17:17:19 +0000
Subject: thermal: introduce the Power Allocator governor

The power allocator governor is a thermal governor that controls system
and device power allocation to control temperature.  Conceptually, the
implementation divides the sustainable power of a thermal zone among
all the heat sources in that zone.

This governor relies on "power actors", entities that represent heat
sources.  They can report current and maximum power consumption and
can set a given maximum power consumption, usually via a cooling
device.

The governor uses a Proportional Integral Derivative (PID) controller
driven by the temperature of the thermal zone.  The output of the
controller is a power budget that is then allocated to each power
actor that can have bearing on the temperature we are trying to
control.  It decides how much power to give each cooling device based
on the performance they are requesting.  The PID controller ensures
that the total power budget does not exceed the control temperature.

Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Signed-off-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 include/linux/thermal.h | 37 ++++++++++++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index bf3c55f405c2..6bbe11c97cea 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -59,6 +59,8 @@
 #define DEFAULT_THERMAL_GOVERNOR       "fair_share"
 #elif defined(CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE)
 #define DEFAULT_THERMAL_GOVERNOR       "user_space"
+#elif defined(CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR)
+#define DEFAULT_THERMAL_GOVERNOR       "power_allocator"
 #endif
 
 struct thermal_zone_device;
@@ -154,8 +156,7 @@ struct thermal_attr {
  * @devdata:	private pointer for device private data
  * @trips:	number of trip points the thermal zone supports
  * @passive_delay:	number of milliseconds to wait between polls when
- *			performing passive cooling.  Currenty only used by the
- *			step-wise governor
+ *			performing passive cooling.
  * @polling_delay:	number of milliseconds to wait between polls when
  *			checking whether trip points have been crossed (0 for
  *			interrupt driven systems)
@@ -165,7 +166,6 @@ struct thermal_attr {
  * @last_temperature:	previous temperature read
  * @emul_temperature:	emulated temperature when using CONFIG_THERMAL_EMULATION
  * @passive:		1 if you've crossed a passive trip point, 0 otherwise.
- *			Currenty only used by the step-wise governor.
  * @forced_passive:	If > 0, temperature at which to switch on all ACPI
  *			processor cooling devices.  Currently only used by the
  *			step-wise governor.
@@ -197,7 +197,7 @@ struct thermal_zone_device {
 	int passive;
 	unsigned int forced_passive;
 	struct thermal_zone_device_ops *ops;
-	const struct thermal_zone_params *tzp;
+	struct thermal_zone_params *tzp;
 	struct thermal_governor *governor;
 	void *governor_data;
 	struct list_head thermal_instances;
@@ -275,6 +275,33 @@ struct thermal_zone_params {
 
 	int num_tbps;	/* Number of tbp entries */
 	struct thermal_bind_params *tbp;
+
+	/*
+	 * Sustainable power (heat) that this thermal zone can dissipate in
+	 * mW
+	 */
+	u32 sustainable_power;
+
+	/*
+	 * Proportional parameter of the PID controller when
+	 * overshooting (i.e., when temperature is below the target)
+	 */
+	s32 k_po;
+
+	/*
+	 * Proportional parameter of the PID controller when
+	 * undershooting
+	 */
+	s32 k_pu;
+
+	/* Integral parameter of the PID controller */
+	s32 k_i;
+
+	/* Derivative parameter of the PID controller */
+	s32 k_d;
+
+	/* threshold below which the error is no longer accumulated */
+	s32 integral_cutoff;
 };
 
 struct thermal_genl_event {
@@ -350,7 +377,7 @@ int power_actor_set_power(struct thermal_cooling_device *,
 			  struct thermal_instance *, u32);
 struct thermal_zone_device *thermal_zone_device_register(const char *, int, int,
 		void *, struct thermal_zone_device_ops *,
-		const struct thermal_zone_params *, int, int);
+		struct thermal_zone_params *, int, int);
 void thermal_zone_device_unregister(struct thermal_zone_device *);
 
 int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int,
-- 
cgit v1.2.3


From f31105347cc56c13d552b844ada04418769d875d Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 28 Apr 2015 12:17:50 +0200
Subject: irqchip: irqc: Remove platform data support

As of commit 914d7d148411997c ("ARM: shmobile: r8a73a4: Remove legacy
code"), the Renesas R-Mobile/R-Car interrupt controller is used with DT
only, and interrupt numbers are thus always assigned automatically.

Drop the platform data declaration and all related support code.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Magnus Damm <magnus.damm@gmail.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Link: http://lkml.kernel.org/r/1430216270-31929-1-git-send-email-geert%2Brenesas@glider.be
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/platform_data/irq-renesas-irqc.h | 27 --------------------------
 1 file changed, 27 deletions(-)
 delete mode 100644 include/linux/platform_data/irq-renesas-irqc.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/irq-renesas-irqc.h b/include/linux/platform_data/irq-renesas-irqc.h
deleted file mode 100644
index 3ae17b3e00ed..000000000000
--- a/include/linux/platform_data/irq-renesas-irqc.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Renesas IRQC Driver
- *
- *  Copyright (C) 2013 Magnus Damm
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#ifndef __IRQ_RENESAS_IRQC_H__
-#define __IRQ_RENESAS_IRQC_H__
-
-struct renesas_irqc_config {
-	unsigned int irq_base;
-};
-
-#endif /* __IRQ_RENESAS_IRQC_H__ */
-- 
cgit v1.2.3


From 406c057c4e00744453d5b0731eb23629ec14dcdf Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Mon, 4 May 2015 21:54:20 -0400
Subject: libata: READ LOG DMA EXT support can be in either page 119 or 120

Support for the READ/WRITE LOG DMA EXT commands can be signaled either
in page 119 or page 120. We should check both pages.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/ata.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ata.h b/include/linux/ata.h
index b666b773e111..fed36418dd1c 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -704,9 +704,19 @@ static inline bool ata_id_wcache_enabled(const u16 *id)
 
 static inline bool ata_id_has_read_log_dma_ext(const u16 *id)
 {
+	/* Word 86 must have bit 15 set */
 	if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15)))
 		return false;
-	return id[ATA_ID_COMMAND_SET_3] & (1 << 3);
+
+	/* READ LOG DMA EXT support can be signaled either from word 119
+	 * or from word 120. The format is the same for both words: Bit
+	 * 15 must be cleared, bit 14 set and bit 3 set.
+	 */
+	if ((id[ATA_ID_COMMAND_SET_3] & 0xC008) == 0x4008 ||
+	    (id[ATA_ID_COMMAND_SET_4] & 0xC008) == 0x4008)
+		return true;
+
+	return false;
 }
 
 static inline bool ata_id_has_sense_reporting(const u16 *id)
-- 
cgit v1.2.3


From 5d3abf8ff67f49271a42c0f7fa4f20f9e046bf0e Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Mon, 4 May 2015 21:54:21 -0400
Subject: libata: Fall back to unqueued READ LOG EXT if the DMA variant fails

Some devices advertise support for the READ/WRITE LOG DMA EXT commands
but fail when we try to issue them. This can lead to queued TRIM being
unintentionally disabled since the relevant feature flag is located in a
general purpose log page.

Fall back to unqueued READ LOG EXT if the DMA variant fails while
reading a log page.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/libata.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 8dad4a307bb8..c3ef58014b33 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -424,6 +424,7 @@ enum {
 	ATA_HORKAGE_NOLPM	= (1 << 20),	/* don't use LPM */
 	ATA_HORKAGE_WD_BROKEN_LPM = (1 << 21),	/* some WDs have broken LPM */
 	ATA_HORKAGE_ZERO_AFTER_TRIM = (1 << 22),/* guarantees zero after trim */
+	ATA_HORKAGE_NO_NCQ_LOG	= (1 << 23),	/* don't use NCQ for log read */
 
 	 /* DMA mask for user DMA control: User visible values; DO NOT
 	    renumber */
-- 
cgit v1.2.3


From c4cf5261f8bffd9de132b50660a69148e7575bd6 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Fri, 17 Apr 2015 16:15:18 -0600
Subject: bio: skip atomic inc/dec of ->bi_remaining for non-chains

Struct bio has an atomic ref count for chained bio's, and we use this
to know when to end IO on the bio. However, most bio's are not chained,
so we don't need to always introduce this atomic operation as part of
ending IO.

Add a helper to elevate the bi_remaining count, and flag the bio as
now actually needing the decrement at end_io time. Rename the field
to __bi_remaining to catch any current users of this doing the
incrementing manually.

For high IOPS workloads, this reduces the overhead of bio_endio()
substantially.

Tested-by: Robert Elliott <elliott@hp.com>
Acked-by: Kent Overstreet <kent.overstreet@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/bio.h       | 11 +++++++++++
 include/linux/blk_types.h |  3 ++-
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index da3a127c9958..8bfe9eee6d1a 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -644,6 +644,17 @@ static inline struct bio *bio_list_get(struct bio_list *bl)
 	return bio;
 }
 
+/*
+ * Increment chain count for the bio. Make sure the CHAIN flag update
+ * is visible before the raised count.
+ */
+static inline void bio_inc_remaining(struct bio *bio)
+{
+	bio->bi_flags |= (1 << BIO_CHAIN);
+	smp_mb__before_atomic();
+	atomic_inc(&bio->__bi_remaining);
+}
+
 /*
  * bio_set is used to allow other portions of the IO system to
  * allocate their own private memory pools for bio and iovec structures.
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index a1b25e35ea5f..8b07e0603887 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -65,7 +65,7 @@ struct bio {
 	unsigned int		bi_seg_front_size;
 	unsigned int		bi_seg_back_size;
 
-	atomic_t		bi_remaining;
+	atomic_t		__bi_remaining;
 
 	bio_end_io_t		*bi_end_io;
 
@@ -122,6 +122,7 @@ struct bio {
 #define BIO_NULL_MAPPED 8	/* contains invalid user pages */
 #define BIO_QUIET	9	/* Make BIO Quiet */
 #define BIO_SNAP_STABLE	10	/* bio data must be snapshotted during write */
+#define BIO_CHAIN	11	/* chained bio, ->bi_remaining in effect */
 
 /*
  * Flags starting here get preserved by bio_reset() - this includes
-- 
cgit v1.2.3


From dac56212e8127dbc0bff7be35c508bc280213309 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Fri, 17 Apr 2015 16:23:59 -0600
Subject: bio: skip atomic inc/dec of ->bi_cnt for most use cases

Struct bio has a reference count that controls when it can be freed.
Most uses cases is allocating the bio, which then returns with a
single reference to it, doing IO, and then dropping that single
reference. We can remove this atomic_dec_and_test() in the completion
path, if nobody else is holding a reference to the bio.

If someone does call bio_get() on the bio, then we flag the bio as
now having valid count and that we must properly honor the reference
count when it's being put.

Tested-by: Robert Elliott <elliott@hp.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/bio.h       | 16 +++++++++++++++-
 include/linux/blk_types.h |  3 ++-
 2 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 8bfe9eee6d1a..7486ea103f6e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -290,7 +290,21 @@ static inline unsigned bio_segments(struct bio *bio)
  * returns. and then bio would be freed memory when if (bio->bi_flags ...)
  * runs
  */
-#define bio_get(bio)	atomic_inc(&(bio)->bi_cnt)
+static inline void bio_get(struct bio *bio)
+{
+	bio->bi_flags |= (1 << BIO_REFFED);
+	smp_mb__before_atomic();
+	atomic_inc(&bio->__bi_cnt);
+}
+
+static inline void bio_cnt_set(struct bio *bio, unsigned int count)
+{
+	if (count != 1) {
+		bio->bi_flags |= (1 << BIO_REFFED);
+		smp_mb__before_atomic();
+	}
+	atomic_set(&bio->__bi_cnt, count);
+}
 
 enum bip_flags {
 	BIP_BLOCK_INTEGRITY	= 1 << 0, /* block layer owns integrity data */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 8b07e0603887..93d2e7153816 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -92,7 +92,7 @@ struct bio {
 
 	unsigned short		bi_max_vecs;	/* max bvl_vecs we can hold */
 
-	atomic_t		bi_cnt;		/* pin count */
+	atomic_t		__bi_cnt;	/* pin count */
 
 	struct bio_vec		*bi_io_vec;	/* the actual vec list */
 
@@ -123,6 +123,7 @@ struct bio {
 #define BIO_QUIET	9	/* Make BIO Quiet */
 #define BIO_SNAP_STABLE	10	/* bio data must be snapshotted during write */
 #define BIO_CHAIN	11	/* chained bio, ->bi_remaining in effect */
+#define BIO_REFFED	12	/* bio has elevated ->bi_cnt */
 
 /*
  * Flags starting here get preserved by bio_reset() - this includes
-- 
cgit v1.2.3


From 84be456f883c4685680fba8e5154b5f72e92957e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 1 May 2015 12:46:15 +0200
Subject: remove <asm/scatterlist.h>

We don't have any arch specific scatterlist now that parisc switched over
to the generic one.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h      |  3 +--
 include/linux/dmapool.h     |  2 +-
 include/linux/scatterlist.h | 39 ++++++++++++++++++++++++++++++++-------
 3 files changed, 34 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7f9a516f24de..504af1e65ce1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -22,8 +22,7 @@
 #include <linux/smp.h>
 #include <linux/rcupdate.h>
 #include <linux/percpu-refcount.h>
-
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
 
 struct module;
 struct scsi_ioctl_command;
diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h
index 52456aa566a0..e1043f79122f 100644
--- a/include/linux/dmapool.h
+++ b/include/linux/dmapool.h
@@ -11,8 +11,8 @@
 #ifndef LINUX_DMAPOOL_H
 #define	LINUX_DMAPOOL_H
 
+#include <linux/scatterlist.h>
 #include <asm/io.h>
-#include <asm/scatterlist.h>
 
 struct device;
 
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index ed8f9e70df9b..eca1ec93775c 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -2,13 +2,39 @@
 #define _LINUX_SCATTERLIST_H
 
 #include <linux/string.h>
+#include <linux/types.h>
 #include <linux/bug.h>
 #include <linux/mm.h>
-
-#include <asm/types.h>
-#include <asm/scatterlist.h>
 #include <asm/io.h>
 
+struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+	unsigned long	sg_magic;
+#endif
+	unsigned long	page_link;
+	unsigned int	offset;
+	unsigned int	length;
+	dma_addr_t	dma_address;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+	unsigned int	dma_length;
+#endif
+};
+
+/*
+ * These macros should be used after a dma_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries dma_map_sg
+ * returns, or alternatively stop on the first sg_dma_len(sg) which
+ * is 0.
+ */
+#define sg_dma_address(sg)	((sg)->dma_address)
+
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+#define sg_dma_len(sg)		((sg)->dma_length)
+#else
+#define sg_dma_len(sg)		((sg)->length)
+#endif
+
 struct sg_table {
 	struct scatterlist *sgl;	/* the list */
 	unsigned int nents;		/* number of mapped entries */
@@ -18,10 +44,9 @@ struct sg_table {
 /*
  * Notes on SG table design.
  *
- * Architectures must provide an unsigned long page_link field in the
- * scatterlist struct. We use that to place the page pointer AND encode
- * information about the sg table as well. The two lower bits are reserved
- * for this information.
+ * We use the unsigned long page_link field in the scatterlist struct to place
+ * the page pointer AND encode information about the sg table as well. The two
+ * lower bits are reserved for this information.
  *
  * If bit 0 is set, then the page_link contains a pointer to the next sg
  * table list. Otherwise the next entry is at sg + 1.
-- 
cgit v1.2.3


From 4f8c9510ba71bb54477841bebb90154ef140860f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 17 Apr 2015 22:37:16 +0200
Subject: block: rename REQ_TYPE_SPECIAL to REQ_TYPE_DRV_PRIV

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7f9a516f24de..98c90272443b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -79,10 +79,10 @@ enum rq_cmd_type_bits {
 	REQ_TYPE_PM_SUSPEND,		/* suspend request */
 	REQ_TYPE_PM_RESUME,		/* resume request */
 	REQ_TYPE_PM_SHUTDOWN,		/* shutdown request */
-	REQ_TYPE_SPECIAL,		/* driver defined type */
+	REQ_TYPE_DRV_PRIV,		/* driver defined type */
 	/*
 	 * for ATA/ATAPI devices. this really doesn't belong here, ide should
-	 * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver
+	 * use REQ_TYPE_DRV_PRIV and use rq->cmd[0] with the range of driver
 	 * private REQ_LB opcodes to differentiate what type of request this is
 	 */
 	REQ_TYPE_ATA_TASKFILE,
-- 
cgit v1.2.3


From b42171ef7d938a66fa52e66a3d911ed63770b5ca Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 17 Apr 2015 22:37:17 +0200
Subject: block: move REQ_TYPE_ATA_TASKFILE and REQ_TYPE_ATA_PC to ide.h

These values are only used by the IDE driver, so move them into it
by allowing drivers to take cmd_type values after the first private
one.  Note that we have to turn cmd_type into a plain unsigned integer
so that gcc doesn't complain about mismatching enum types.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 11 ++---------
 include/linux/ide.h    |  7 +++++++
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 98c90272443b..9cb4d80a4987 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -79,14 +79,7 @@ enum rq_cmd_type_bits {
 	REQ_TYPE_PM_SUSPEND,		/* suspend request */
 	REQ_TYPE_PM_RESUME,		/* resume request */
 	REQ_TYPE_PM_SHUTDOWN,		/* shutdown request */
-	REQ_TYPE_DRV_PRIV,		/* driver defined type */
-	/*
-	 * for ATA/ATAPI devices. this really doesn't belong here, ide should
-	 * use REQ_TYPE_DRV_PRIV and use rq->cmd[0] with the range of driver
-	 * private REQ_LB opcodes to differentiate what type of request this is
-	 */
-	REQ_TYPE_ATA_TASKFILE,
-	REQ_TYPE_ATA_PC,
+	REQ_TYPE_DRV_PRIV,		/* driver defined types from here */
 };
 
 #define BLK_MAX_CDB	16
@@ -108,7 +101,7 @@ struct request {
 	struct blk_mq_ctx *mq_ctx;
 
 	u64 cmd_flags;
-	enum rq_cmd_type_bits cmd_type;
+	unsigned cmd_type;
 	unsigned long atomic_flags;
 
 	int cpu;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 93b5ca754b5b..62ac399144a6 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -39,6 +39,12 @@
 
 struct device;
 
+/* IDE-specific values for req->cmd_type */
+enum ata_cmd_type_bits {
+	REQ_TYPE_ATA_TASKFILE = REQ_TYPE_DRV_PRIV + 1,
+	REQ_TYPE_ATA_PC,
+};
+
 /* Error codes returned in rq->errors to the higher part of the driver. */
 enum {
 	IDE_DRV_ERROR_GENERAL	= 101,
@@ -1551,4 +1557,5 @@ static inline void ide_set_drivedata(ide_drive_t *drive, void *data)
 #define ide_host_for_each_port(i, port, host) \
 	for ((i) = 0; ((port) = (host)->ports[i]) || (i) < MAX_HOST_PORTS; (i)++)
 
+
 #endif /* _IDE_H */
-- 
cgit v1.2.3


From b0b93b48a30e809240ddd7449a6ad60a5ddf7b4d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 17 Apr 2015 22:37:18 +0200
Subject: block: move REQ_TYPE_SENSE to the ide driver

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 1 -
 include/linux/ide.h    | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9cb4d80a4987..6076b9e18dcb 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -75,7 +75,6 @@ struct request_list {
 enum rq_cmd_type_bits {
 	REQ_TYPE_FS		= 1,	/* fs request */
 	REQ_TYPE_BLOCK_PC,		/* scsi command */
-	REQ_TYPE_SENSE,			/* sense request */
 	REQ_TYPE_PM_SUSPEND,		/* suspend request */
 	REQ_TYPE_PM_RESUME,		/* resume request */
 	REQ_TYPE_PM_SHUTDOWN,		/* shutdown request */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 62ac399144a6..9856b7d455d9 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -43,6 +43,7 @@ struct device;
 enum ata_cmd_type_bits {
 	REQ_TYPE_ATA_TASKFILE = REQ_TYPE_DRV_PRIV + 1,
 	REQ_TYPE_ATA_PC,
+	REQ_TYPE_ATA_SENSE,	/* sense request */
 };
 
 /* Error codes returned in rq->errors to the higher part of the driver. */
-- 
cgit v1.2.3


From ac7cdff00a33d48d27217560fa3b16d802e5f535 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 17 Apr 2015 22:37:19 +0200
Subject: block: remove REQ_TYPE_PM_SHUTDOWN

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6076b9e18dcb..c2829ba5e738 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -77,7 +77,6 @@ enum rq_cmd_type_bits {
 	REQ_TYPE_BLOCK_PC,		/* scsi command */
 	REQ_TYPE_PM_SUSPEND,		/* suspend request */
 	REQ_TYPE_PM_RESUME,		/* resume request */
-	REQ_TYPE_PM_SHUTDOWN,		/* shutdown request */
 	REQ_TYPE_DRV_PRIV,		/* driver defined types from here */
 };
 
-- 
cgit v1.2.3


From a7928c1578c550bd6f4dec62d65132e6db226c57 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 17 Apr 2015 22:37:20 +0200
Subject: block: move PM request support to IDE

This removes the request types and hacks from the block code and into the
old IDE driver.  There is a small amunt of code duplication due to this,
but it's not too bad.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 21 +--------------------
 include/linux/ide.h    | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c2829ba5e738..2da818a48097 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -30,7 +30,6 @@ struct scsi_ioctl_command;
 
 struct request_queue;
 struct elevator_queue;
-struct request_pm_state;
 struct blk_trace;
 struct request;
 struct sg_io_hdr;
@@ -75,8 +74,6 @@ struct request_list {
 enum rq_cmd_type_bits {
 	REQ_TYPE_FS		= 1,	/* fs request */
 	REQ_TYPE_BLOCK_PC,		/* scsi command */
-	REQ_TYPE_PM_SUSPEND,		/* suspend request */
-	REQ_TYPE_PM_RESUME,		/* resume request */
 	REQ_TYPE_DRV_PRIV,		/* driver defined types from here */
 };
 
@@ -207,19 +204,6 @@ static inline unsigned short req_get_ioprio(struct request *req)
 	return req->ioprio;
 }
 
-/*
- * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
- * requests. Some step values could eventually be made generic.
- */
-struct request_pm_state
-{
-	/* PM state machine step value, currently driver specific */
-	int	pm_step;
-	/* requested PM state value (S1, S2, S3, S4, ...) */
-	u32	pm_state;
-	void*	data;		/* for driver use */
-};
-
 #include <linux/elevator.h>
 
 struct blk_queue_ctx;
@@ -601,10 +585,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 	(((rq)->cmd_flags & REQ_STARTED) && \
 	 ((rq)->cmd_type == REQ_TYPE_FS))
 
-#define blk_pm_request(rq)	\
-	((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \
-	 (rq)->cmd_type == REQ_TYPE_PM_RESUME)
-
 #define blk_rq_cpu_valid(rq)	((rq)->cpu != -1)
 #define blk_bidi_rq(rq)		((rq)->next_rq != NULL)
 /* rq->queuelist of dequeued request must be list_empty() */
@@ -838,6 +818,7 @@ extern void blk_stop_queue(struct request_queue *q);
 extern void blk_sync_queue(struct request_queue *q);
 extern void __blk_stop_queue(struct request_queue *q);
 extern void __blk_run_queue(struct request_queue *q);
+extern void __blk_run_queue_uncond(struct request_queue *q);
 extern void blk_run_queue(struct request_queue *);
 extern void blk_run_queue_async(struct request_queue *q);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 9856b7d455d9..a633898f36ac 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -44,8 +44,14 @@ enum ata_cmd_type_bits {
 	REQ_TYPE_ATA_TASKFILE = REQ_TYPE_DRV_PRIV + 1,
 	REQ_TYPE_ATA_PC,
 	REQ_TYPE_ATA_SENSE,	/* sense request */
+	REQ_TYPE_ATA_PM_SUSPEND,/* suspend request */
+	REQ_TYPE_ATA_PM_RESUME,	/* resume request */
 };
 
+#define ata_pm_request(rq)	\
+	((rq)->cmd_type == REQ_TYPE_ATA_PM_SUSPEND || \
+	 (rq)->cmd_type == REQ_TYPE_ATA_PM_RESUME)
+
 /* Error codes returned in rq->errors to the higher part of the driver. */
 enum {
 	IDE_DRV_ERROR_GENERAL	= 101,
@@ -1321,6 +1327,19 @@ struct ide_port_info {
 	u8			udma_mask;
 };
 
+/*
+ * State information carried for REQ_TYPE_ATA_PM_SUSPEND and REQ_TYPE_ATA_PM_RESUME
+ * requests.
+ */
+struct ide_pm_state {
+	/* PM state machine step value, currently driver specific */
+	int	pm_step;
+	/* requested PM state value (S1, S2, S3, S4, ...) */
+	u32	pm_state;
+	void*	data;		/* for driver use */
+};
+
+
 int ide_pci_init_one(struct pci_dev *, const struct ide_port_info *, void *);
 int ide_pci_init_two(struct pci_dev *, struct pci_dev *,
 		     const struct ide_port_info *, void *);
-- 
cgit v1.2.3


From cd8ae85299d54155702a56811b2e035e63064d3d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 3 May 2015 21:34:46 -0700
Subject: tcp: provide SYN headers for passive connections

This patch allows a server application to get the TCP SYN headers for
its passive connections.  This is useful if the server is doing
fingerprinting of clients based on SYN packet contents.

Two socket options are added: TCP_SAVE_SYN and TCP_SAVED_SYN.

The first is used on a socket to enable saving the SYN headers
for child connections. This can be set before or after the listen()
call.

The latter is used to retrieve the SYN headers for passive connections,
if the parent listener has enabled TCP_SAVE_SYN.

TCP_SAVED_SYN is read once, it frees the saved SYN headers.

The data returned in TCP_SAVED_SYN are network (IPv4/IPv6) and TCP
headers.

Original patch was written by Tom Herbert, I changed it to not hold
a full skb (and associated dst and conntracking reference).

We have used such patch for about 3 years at Google.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Tested-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 3b2911502a8c..e6fb5df22db1 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -199,6 +199,7 @@ struct tcp_sock {
 		syn_fastopen:1,	/* SYN includes Fast Open option */
 		syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
 		syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
+		save_syn:1,	/* Save headers of SYN packet */
 		is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP retransmit. */
 
@@ -326,6 +327,7 @@ struct tcp_sock {
 	 * socket. Used to retransmit SYNACKs etc.
 	 */
 	struct request_sock *fastopen_rsk;
+	u32	*saved_syn;
 };
 
 enum tsq_flags {
@@ -393,4 +395,10 @@ static inline int fastopen_init_queue(struct sock *sk, int backlog)
 	return 0;
 }
 
+static inline void tcp_saved_syn_free(struct tcp_sock *tp)
+{
+	kfree(tp->saved_syn);
+	tp->saved_syn = NULL;
+}
+
 #endif	/* _LINUX_TCP_H */
-- 
cgit v1.2.3


From 2c7a88c252bf3381958cf716f31b6b2e0f2f3fa7 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Mon, 4 May 2015 14:33:48 -0700
Subject: etherdev: Fix sparse error, make test usable by other functions

This change does two things.  First it fixes a sparse error for the fact
that the __be16 degrades to an integer.  Since that is actually what I am
kind of doing I am simply working around that by forcing both sides of the
comparison to u16.

Also I realized on some compilers I was generating another instruction for
big endian systems such as PowerPC since it was masking the value before
doing the comparison.  So to resolve that I have simply pulled the mask out
and wrapped it in an #ifndef __BIG_ENDIAN.

Lastly I pulled this all out into its own function.  I notices there are
similar checks in a number of other places so this function can be reused
there to help reduce overhead in these paths as well.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index c4a10f991fe0..9012f8775208 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -190,6 +190,24 @@ static inline bool is_valid_ether_addr(const u8 *addr)
 	return !is_multicast_ether_addr(addr) && !is_zero_ether_addr(addr);
 }
 
+/**
+ * eth_proto_is_802_3 - Determine if a given Ethertype/length is a protocol
+ * @proto: Ethertype/length value to be tested
+ *
+ * Check that the value from the Ethertype/length field is a valid Ethertype.
+ *
+ * Return true if the valid is an 802.3 supported Ethertype.
+ */
+static inline bool eth_proto_is_802_3(__be16 proto)
+{
+#ifndef __BIG_ENDIAN
+	/* if CPU is little endian mask off bits representing LSB */
+	proto &= htons(0xFF00);
+#endif
+	/* cast both to u16 and compare since LSB can be ignored */
+	return (__force u16)proto >= (__force u16)htons(ETH_P_802_3_MIN);
+}
+
 /**
  * eth_random_addr - Generate software assigned random Ethernet address
  * @addr: Pointer to a six-byte array containing the Ethernet address
-- 
cgit v1.2.3


From 9545b22da647cf6fbbac9c5a48c50fd72d892b11 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Mon, 4 May 2015 14:34:10 -0700
Subject: vlan: Use eth_proto_is_802_3

Replace "ntohs(proto) >= ETH_P_802_3_MIN" w/ eth_proto_is_802_3(proto).

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 920e4457ce6e..b9ab677c0c0a 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -539,7 +539,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb,
 	 */
 
 	proto = vhdr->h_vlan_encapsulated_proto;
-	if (ntohs(proto) >= ETH_P_802_3_MIN) {
+	if (eth_proto_is_802_3(proto)) {
 		skb->protocol = proto;
 		return;
 	}
-- 
cgit v1.2.3


From d5622a9c13752be46e6fcde9d31391ce0bb0598b Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 2 Mar 2015 15:45:41 +0000
Subject: clkdev: use clk_hw internally

clk_add_alias() calls clk_get() followed by clk_put() but in between
those two calls it saves away the struct clk pointer to a clk_lookup
structure. This leaves the 'clk' member of the clk_lookup pointing at
freed memory on configurations where CONFIG_COMMON_CLK=y. This is a
problem because clk_get_sys() will eventually try to dereference the
freed pointer by calling __clk_get_hw() on it. Fix this by saving away
the struct clk_hw pointer instead of the struct clk pointer so that when
we try to create a per-user struct clk in clk_get_sys() we don't
dereference a junk pointer.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/clkdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h
index 94bad77eeb4a..3003afad46c9 100644
--- a/include/linux/clkdev.h
+++ b/include/linux/clkdev.h
@@ -22,6 +22,7 @@ struct clk_lookup {
 	const char		*dev_id;
 	const char		*con_id;
 	struct clk		*clk;
+	struct clk_hw		*clk_hw;
 };
 
 #define CLKDEV_INIT(d, n, c)	\
-- 
cgit v1.2.3


From d2d14a77886485310ec66e575f00ea5232ac7a14 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 14 Mar 2015 15:12:35 +0000
Subject: clk: update clk API documentation to clarify clk_round_rate()

The idea is that rate = clk_round_rate(clk, r) is equivalent to:

	clk_set_rate(clk, r);
	rate = clk_get_rate(clk);

except that clk_round_rate() does not change the hardware in any way.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/clk.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clk.h b/include/linux/clk.h
index 68c16a6bedb3..cafb22df8d00 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -306,6 +306,20 @@ void devm_clk_put(struct device *dev, struct clk *clk);
  * @clk: clock source
  * @rate: desired clock rate in Hz
  *
+ * This answers the question "if I were to pass @rate to clk_set_rate(),
+ * what clock rate would I end up with?" without changing the hardware
+ * in any way.  In other words:
+ *
+ *   rate = clk_round_rate(clk, r);
+ *
+ * and:
+ *
+ *   clk_set_rate(clk, r);
+ *   rate = clk_get_rate(clk);
+ *
+ * are equivalent except the former does not modify the clock hardware
+ * in any way.
+ *
  * Returns rounded clock rate in Hz, or negative errno.
  */
 long clk_round_rate(struct clk *clk, unsigned long rate);
-- 
cgit v1.2.3


From 2d34e507293102f29ee94d9a9c5b890696d42452 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 9 Mar 2015 11:03:00 +0000
Subject: clkdev: get rid of redundant clk_add_alias() prototype in linux/clk.h

clk_add_alias() is provided by clkdev, and is not part of the clk API.
Howver, it is prototyped in two locations: linux/clkdev.h and
linux/clk.h.  This is a mess.  Get rid of the redundant and unnecessary
version in linux/clk.h.

Acked-by: Tony Lindgren <tony@atomide.com>
Tested-by: Robert Jarzmik <robert.jarzmik@free.fr>
Acked-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/clk.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clk.h b/include/linux/clk.h
index cafb22df8d00..0df4a51e1a78 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -485,19 +485,6 @@ static inline void clk_disable_unprepare(struct clk *clk)
 	clk_unprepare(clk);
 }
 
-/**
- * clk_add_alias - add a new clock alias
- * @alias: name for clock alias
- * @alias_dev_name: device name
- * @id: platform specific clock name
- * @dev: device
- *
- * Allows using generic clock names for drivers by adding a new alias.
- * Assumes clkdev, see clkdev.h for more info.
- */
-int clk_add_alias(const char *alias, const char *alias_dev_name, char *id,
-			struct device *dev);
-
 struct device_node;
 struct of_phandle_args;
 
-- 
cgit v1.2.3


From b3d8d7e89fab374d731dfb46fe048f09766ca9c8 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 9 Mar 2015 10:43:04 +0000
Subject: clkdev: const-ify connection id to clk_add_alias()

The connection id is only passed to clk_get() which is already const.
Const-ify this argument too.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/clkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h
index 3003afad46c9..cd93b215e3af 100644
--- a/include/linux/clkdev.h
+++ b/include/linux/clkdev.h
@@ -39,7 +39,7 @@ void clkdev_add(struct clk_lookup *cl);
 void clkdev_drop(struct clk_lookup *cl);
 
 void clkdev_add_table(struct clk_lookup *, size_t);
-int clk_add_alias(const char *, const char *, char *, struct device *);
+int clk_add_alias(const char *, const char *, const char *, struct device *);
 
 int clk_register_clkdev(struct clk *, const char *, const char *, ...);
 int clk_register_clkdevs(struct clk *, struct clk_lookup *, size_t);
-- 
cgit v1.2.3


From 2568999835d7797afce3dcc3a3f368051ffcaf1f Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 2 Mar 2015 15:40:29 +0000
Subject: clkdev: add clkdev_create() helper

Add a helper to allocate and add a clk_lookup structure.  This can not
only be used in several places in clkdev.c to simplify the code, but
more importantly, can be used by callers of the clkdev code to simplify
their clkdev creation and registration.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/clkdev.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h
index cd93b215e3af..a240b18e86fa 100644
--- a/include/linux/clkdev.h
+++ b/include/linux/clkdev.h
@@ -38,6 +38,9 @@ struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id,
 void clkdev_add(struct clk_lookup *cl);
 void clkdev_drop(struct clk_lookup *cl);
 
+struct clk_lookup *clkdev_create(struct clk *clk, const char *con_id,
+	const char *dev_fmt, ...);
+
 void clkdev_add_table(struct clk_lookup *, size_t);
 int clk_add_alias(const char *, const char *, const char *, struct device *);
 
-- 
cgit v1.2.3


From efb0de55b6a2ec15fc424e660601f22ae2fa487a Mon Sep 17 00:00:00 2001
From: Shobhit Kumar <shobhit.kumar@intel.com>
Date: Tue, 5 May 2015 15:04:18 +0530
Subject: pwm: Add support to remove registered consumer lookup tables

In case some drivers are unloading, they can remove lookup tables which
they had registered during their load time to avoid redundant entries if
loaded again.

CC: Samuel Ortiz <sameo@linux.intel.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Alexandre Courbot <gnurou@gmail.com>
Cc: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Shobhit Kumar <shobhit.kumar@intel.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 include/linux/pwm.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index e90628cac8fa..cfe2d8df5be0 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -290,10 +290,15 @@ struct pwm_lookup {
 
 #if IS_ENABLED(CONFIG_PWM)
 void pwm_add_table(struct pwm_lookup *table, size_t num);
+void pwm_remove_table(struct pwm_lookup *table, size_t num);
 #else
 static inline void pwm_add_table(struct pwm_lookup *table, size_t num)
 {
 }
+
+static inline void pwm_remove_table(struct pwm_lookup *table, size_t num)
+{
+}
 #endif
 
 #ifdef CONFIG_PWM_SYSFS
-- 
cgit v1.2.3


From fa76a3db7093a527333c380df82a0f158d9b8299 Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Thu, 9 Apr 2015 11:13:07 +0800
Subject: pinctrl: allow exlusive GPIO/mux pin allocation

Disallow simultaneous use of the the GPIO and peripheral mux
functions by setting a flag "strict" in struct pinctrl_desc.

The blackfin pinmux and gpio controller doesn't allow user to
set up a pin for both GPIO and peripheral function. So, add flag
strict in struct pinctrl_desc to check both gpio_owner and
mux_owner before approving the pin request.

v2-changes:
- if strict flag is set, check gpio_owner and mux_onwer in if and
  else clause

v3-changes:
- add kerneldoc for this struct
- augment Documentation/pinctrl.txt

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/pinctrl.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h
index 66e4697516de..fc6b0348c375 100644
--- a/include/linux/pinctrl/pinctrl.h
+++ b/include/linux/pinctrl/pinctrl.h
@@ -114,6 +114,8 @@ struct pinctrl_ops {
  *	of the pins field above
  * @pctlops: pin control operation vtable, to support global concepts like
  *	grouping of pins, this is optional.
+ * @strict: check both gpio_owner and mux_owner strictly before approving
+	the pin request
  * @pmxops: pinmux operations vtable, if you support pinmuxing in your driver
  * @confops: pin config operations vtable, if you support pin configuration in
  *	your driver
@@ -132,6 +134,7 @@ struct pinctrl_desc {
 	const struct pinctrl_ops *pctlops;
 	const struct pinmux_ops *pmxops;
 	const struct pinconf_ops *confops;
+	bool strict;
 	struct module *owner;
 #ifdef CONFIG_GENERIC_PINCONF
 	unsigned int num_custom_params;
-- 
cgit v1.2.3


From 8c4c2016345feefcd289ce2479eb70286d30825a Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 6 May 2015 14:19:13 +0200
Subject: pinctrl: move strict option to pinmux_ops

While the pinmux_ops are ideally just a vtable for pin mux
calls, the "strict" setting belongs so intuitively with the
pin multiplexing that we should move it here anyway. Putting
it in the top pinctrl_desc makes no sense.

Cc: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/pinctrl.h | 3 ---
 include/linux/pinctrl/pinmux.h  | 4 ++++
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h
index fc6b0348c375..66e4697516de 100644
--- a/include/linux/pinctrl/pinctrl.h
+++ b/include/linux/pinctrl/pinctrl.h
@@ -114,8 +114,6 @@ struct pinctrl_ops {
  *	of the pins field above
  * @pctlops: pin control operation vtable, to support global concepts like
  *	grouping of pins, this is optional.
- * @strict: check both gpio_owner and mux_owner strictly before approving
-	the pin request
  * @pmxops: pinmux operations vtable, if you support pinmuxing in your driver
  * @confops: pin config operations vtable, if you support pin configuration in
  *	your driver
@@ -134,7 +132,6 @@ struct pinctrl_desc {
 	const struct pinctrl_ops *pctlops;
 	const struct pinmux_ops *pmxops;
 	const struct pinconf_ops *confops;
-	bool strict;
 	struct module *owner;
 #ifdef CONFIG_GENERIC_PINCONF
 	unsigned int num_custom_params;
diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h
index 511bda9ed4bf..d3740fa7073f 100644
--- a/include/linux/pinctrl/pinmux.h
+++ b/include/linux/pinctrl/pinmux.h
@@ -56,6 +56,9 @@ struct pinctrl_dev;
  *	depending on whether the GPIO is configured as input or output,
  *	a direction selector function may be implemented as a backing
  *	to the GPIO controllers that need pin muxing.
+ * @strict: do not allow simultaneous use of the same pin for GPIO and another
+ *	function. Check both gpio_owner and mux_owner strictly before approving
+ *	the pin request.
  */
 struct pinmux_ops {
 	int (*request) (struct pinctrl_dev *pctldev, unsigned offset);
@@ -79,6 +82,7 @@ struct pinmux_ops {
 				   struct pinctrl_gpio_range *range,
 				   unsigned offset,
 				   bool input);
+	bool strict;
 };
 
 #endif /* CONFIG_PINMUX */
-- 
cgit v1.2.3


From cac089f9026e9ddb3481daf08f0fc4e5949fa1af Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Thu, 23 Apr 2015 16:56:22 -0700
Subject: gpio: omap: Allow building as a loadable module

We currently get all kinds of errors building the omap gpio driver
as a module starting with:

undefined reference to `omap2_gpio_resume_after_idle'
undefined reference to `omap2_gpio_prepare_for_idle'
...

Let's fix the issue by adding inline functions to the header.
Note that we can now also remove the two unused functions for
omap_set_gpio_debounce and omap_set_gpio_debounce_time.

Then doing rmmod on the module produces further warnings
because of missing exit related functions. Let's add those.

And finally, we can make the Kconfig entry just a tristate
option that's selected for omaps.

Cc: Javier Martinez Canillas <javier@dowhile0.org>
Cc: Kevin Hilman <khilman@deeprootsystems.com>
Cc: Nishanth Menon <nm@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@linaro.org>
Acked-by: Santosh Shilimkar <ssantosh@kernel.org>
Reviewed-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/platform_data/gpio-omap.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h
index 5d50b25a73d7..cb2618147c34 100644
--- a/include/linux/platform_data/gpio-omap.h
+++ b/include/linux/platform_data/gpio-omap.h
@@ -208,9 +208,17 @@ struct omap_gpio_platform_data {
 	int (*get_context_loss_count)(struct device *dev);
 };
 
+#if IS_BUILTIN(CONFIG_GPIO_OMAP)
 extern void omap2_gpio_prepare_for_idle(int off_mode);
 extern void omap2_gpio_resume_after_idle(void);
-extern void omap_set_gpio_debounce(int gpio, int enable);
-extern void omap_set_gpio_debounce_time(int gpio, int enable);
+#else
+static inline void omap2_gpio_prepare_for_idle(int off_mode)
+{
+}
+
+static inline void omap2_gpio_resume_after_idle(void)
+{
+}
+#endif
 
 #endif
-- 
cgit v1.2.3


From 66eb3bd857f5311f72c7c371f78ddc9c472befba Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Mon, 27 Apr 2015 18:04:05 +0200
Subject: pinctrl: use ERR_CAST instead of ERR_PTR/PTR_ERR

Inspired by scripts/coccinelle/api/err_cast.cocci

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/consumer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h
index 18eccefea06e..d7e5d608faa7 100644
--- a/include/linux/pinctrl/consumer.h
+++ b/include/linux/pinctrl/consumer.h
@@ -142,7 +142,7 @@ static inline struct pinctrl * __must_check pinctrl_get_select(
 	s = pinctrl_lookup_state(p, name);
 	if (IS_ERR(s)) {
 		pinctrl_put(p);
-		return ERR_PTR(PTR_ERR(s));
+		return ERR_CAST(s);
 	}
 
 	ret = pinctrl_select_state(p, s);
-- 
cgit v1.2.3


From 1d6b98774cff82860a3f044610e956bcbff556c1 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 31 Mar 2015 15:55:57 +0200
Subject: tty: constify return type of tty_name

All users of tty_name pass the result directly to a printf-like
function. This means we can actually let tty_name return the literal
"NULL tty" or tty->name directly, avoiding the strcpy and a lot of
medium-sized stack buffers. In preparation for that, make the return
type const char*.

While at it, we can also constify the tty parameter.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index fe5623c9af71..4cbecfc7b3c9 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -421,7 +421,7 @@ static inline struct tty_struct *tty_kref_get(struct tty_struct *tty)
 
 extern int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
 			      const char *routine);
-extern char *tty_name(struct tty_struct *tty, char *buf);
+extern const char *tty_name(const struct tty_struct *tty, char *buf);
 extern void tty_wait_until_sent(struct tty_struct *tty, long timeout);
 extern int tty_check_change(struct tty_struct *tty);
 extern void __stop_tty(struct tty_struct *tty);
-- 
cgit v1.2.3


From 429b474990cb4e5e8cfe2352daf649d0599cccb6 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 31 Mar 2015 15:55:59 +0200
Subject: tty: remove buf parameter from tty_name()

tty_name no longer uses the buf parameter, so remove it along with all
the 64 byte stack buffers that used to be passed in.

Mostly generated by the coccinelle script

@depends on patch@
identifier buf;
constant C;
expression tty;
@@
- char buf[C];
  <+...
- tty_name(tty, buf)
+ tty_name(tty)
  ...+>

allmodconfig compiles, so I'm fairly confident the stack buffers
weren't used for other purposes as well.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Peter Hurley <peter@hurleysoftware.com>
Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 4cbecfc7b3c9..9a72c9144d8a 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -421,7 +421,7 @@ static inline struct tty_struct *tty_kref_get(struct tty_struct *tty)
 
 extern int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
 			      const char *routine);
-extern const char *tty_name(const struct tty_struct *tty, char *buf);
+extern const char *tty_name(const struct tty_struct *tty);
 extern void tty_wait_until_sent(struct tty_struct *tty, long timeout);
 extern int tty_check_change(struct tty_struct *tty);
 extern void __stop_tty(struct tty_struct *tty);
-- 
cgit v1.2.3


From 6b3cddccf4eec0883feb065aea28dd9770bb17d0 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 11 Apr 2015 11:02:36 -0400
Subject: serial: core: Fix unused variable warnings from uart_console()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If CONFIG_SERIAL_CORE_CONSOLE=n, build warnings are generated by
uart_console() macro expansion:

drivers/tty/serial/of_serial.c: In function ‘of_serial_suspend_8250’:
drivers/tty/serial/of_serial.c:262:20: warning: unused variable ‘port’ [-Wunused-variable]
  struct uart_port *port = &port8250->port;
                    ^
drivers/tty/serial/of_serial.c: In function ‘of_serial_resume_8250’:
drivers/tty/serial/of_serial.c:272:20: warning: unused variable ‘port’ [-Wunused-variable]
  struct uart_port *port = &port8250->port;

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 025dad9dcde4..297d4fa1cfe5 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -35,7 +35,7 @@
 #define uart_console(port) \
 	((port)->cons && (port)->cons->index == (port)->line)
 #else
-#define uart_console(port)      (0)
+#define uart_console(port)      ({ (void)port; 0; })
 #endif
 
 struct uart_port;
-- 
cgit v1.2.3


From 17799359e7b3fa6ef4f2bf926cd6821cf7903ecf Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Sat, 28 Feb 2015 02:13:11 -0800
Subject: mtd: nand_bbt: make nand_scan_bbt() static

This implementation detail is no longer needed outside of nand_bbt.c.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/nand.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 3d4ea7eb2b68..6c51876941f3 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -833,7 +833,6 @@ struct nand_manufacturers {
 extern struct nand_flash_dev nand_flash_ids[];
 extern struct nand_manufacturers nand_manuf_ids[];
 
-extern int nand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd);
 extern int nand_default_bbt(struct mtd_info *mtd);
 extern int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs);
 extern int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs);
-- 
cgit v1.2.3


From 0097d12e504b3ce57b68810737ad6a5a64a98c68 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 30 Apr 2015 13:43:30 +0200
Subject: KVM: provide irq_unsafe kvm_guest_{enter|exit}

Several kvm architectures disable interrupts before kvm_guest_enter.
kvm_guest_enter then uses local_irq_save/restore to disable interrupts
again or for the first time. Lets provide underscore versions of
kvm_guest_{enter|exit} that assume being called locked.
kvm_guest_enter now disables interrupts for the full function and
thus we can remove the check for preemptible.

This patch then adopts s390/kvm to use local_irq_disable/enable calls
which are slighty cheaper that local_irq_save/restore and call these
new functions.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ad45054309a0..efc16df1fc5d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -762,16 +762,10 @@ static inline void kvm_iommu_unmap_pages(struct kvm *kvm,
 }
 #endif
 
-static inline void kvm_guest_enter(void)
+/* must be called with irqs disabled */
+static inline void __kvm_guest_enter(void)
 {
-	unsigned long flags;
-
-	BUG_ON(preemptible());
-
-	local_irq_save(flags);
 	guest_enter();
-	local_irq_restore(flags);
-
 	/* KVM does not hold any references to rcu protected data when it
 	 * switches CPU into a guest mode. In fact switching to a guest mode
 	 * is very similar to exiting to userspace from rcu point of view. In
@@ -783,12 +777,27 @@ static inline void kvm_guest_enter(void)
 		rcu_virt_note_context_switch(smp_processor_id());
 }
 
+/* must be called with irqs disabled */
+static inline void __kvm_guest_exit(void)
+{
+	guest_exit();
+}
+
+static inline void kvm_guest_enter(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__kvm_guest_enter();
+	local_irq_restore(flags);
+}
+
 static inline void kvm_guest_exit(void)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
-	guest_exit();
+	__kvm_guest_exit();
 	local_irq_restore(flags);
 }
 
-- 
cgit v1.2.3


From 653f52c316a49c5ee2701bc13b15879f20790662 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Thu, 23 Apr 2015 11:52:37 -0400
Subject: kvm,x86: load guest FPU context more eagerly

Currently KVM will clear the FPU bits in CR0.TS in the VMCS, and trap to
re-load them every time the guest accesses the FPU after a switch back into
the guest from the host.

This patch copies the x86 task switch semantics for FPU loading, with the
FPU loaded eagerly after first use if the system uses eager fpu mode,
or if the guest uses the FPU frequently.

In the latter case, after loading the FPU for 255 times, the fpu_counter
will roll over, and we will revert to loading the FPU on demand, until
it has been established that the guest is still actively using the FPU.

This mirrors the x86 task switch policy, which seems to work.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index efc16df1fc5d..b7a08cd6f4a8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -230,6 +230,7 @@ struct kvm_vcpu {
 
 	int fpu_active;
 	int guest_fpu_loaded, guest_xcr0_loaded;
+	unsigned char fpu_counter;
 	wait_queue_head_t wq;
 	struct pid *pid;
 	int sigset_active;
-- 
cgit v1.2.3


From aed5ed47724f6a7453fa62e3c90f3cee93edbfe3 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 6 May 2015 18:04:23 +0200
Subject: context_tracking: Protect against recursion

Context tracking recursion can happen when an exception triggers
in the middle of a call to a context tracking probe.

This special case can be caused by vmalloc faults. If an access
to a memory area allocated by vmalloc happens in the middle of
context_tracking_enter(), we may run into an endless fault loop
because the exception in turn calls context_tracking_enter()
which faults on the same vmalloc'ed memory, triggering an
exception again, etc...

Some rare crashes have been reported so lets protect against
this with a recursion counter.

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Rafael J . Wysocki <rafael.j.wysocki@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1430928266-24888-2-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/context_tracking_state.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index 6b7b96a32b75..678ecdf90cf6 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -12,6 +12,7 @@ struct context_tracking {
 	 * may be further optimized using static keys.
 	 */
 	bool active;
+	int recursion;
 	enum ctx_state {
 		CONTEXT_KERNEL = 0,
 		CONTEXT_USER,
-- 
cgit v1.2.3


From fafe870f31212a72f3c2d74e7b90e4ef39e83ee1 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 6 May 2015 18:04:24 +0200
Subject: context_tracking: Inherit TIF_NOHZ through forks instead of context
 switches

TIF_NOHZ is used by context_tracking to force syscall slow-path
on every task in order to track userspace roundtrips. As such,
it must be set on all running tasks.

It's currently explicitly inherited through context switches.
There is no need to do it in this fast-path though. The flag
could simply be set once for all on all tasks, whether they are
running or not.

Lets do this by setting the flag for the init task on early boot,
and let it propagate through fork inheritance.

While at it, mark context_tracking_cpu_set() as init code, we
only need it at early boot time.

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Dave Jones <davej@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Paul E . McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J . Wysocki <rafael.j.wysocki@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1430928266-24888-3-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/context_tracking.h | 10 ----------
 include/linux/sched.h            |  3 +++
 2 files changed, 3 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 2821838256b4..b96bd299966f 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -14,8 +14,6 @@ extern void context_tracking_enter(enum ctx_state state);
 extern void context_tracking_exit(enum ctx_state state);
 extern void context_tracking_user_enter(void);
 extern void context_tracking_user_exit(void);
-extern void __context_tracking_task_switch(struct task_struct *prev,
-					   struct task_struct *next);
 
 static inline void user_enter(void)
 {
@@ -51,19 +49,11 @@ static inline void exception_exit(enum ctx_state prev_ctx)
 	}
 }
 
-static inline void context_tracking_task_switch(struct task_struct *prev,
-						struct task_struct *next)
-{
-	if (context_tracking_is_enabled())
-		__context_tracking_task_switch(prev, next);
-}
 #else
 static inline void user_enter(void) { }
 static inline void user_exit(void) { }
 static inline enum ctx_state exception_enter(void) { return 0; }
 static inline void exception_exit(enum ctx_state prev_ctx) { }
-static inline void context_tracking_task_switch(struct task_struct *prev,
-						struct task_struct *next) { }
 #endif /* !CONFIG_CONTEXT_TRACKING */
 
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26a2e6122734..185a750e4ed4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2532,6 +2532,9 @@ static inline unsigned long wait_task_inactive(struct task_struct *p,
 }
 #endif
 
+#define tasklist_empty() \
+	list_empty(&init_task.tasks)
+
 #define next_task(p) \
 	list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
 
-- 
cgit v1.2.3


From 83dedea8a07fb4bf91863764b15c1c4ec00330f9 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@ezchip.com>
Date: Wed, 6 May 2015 18:04:25 +0200
Subject: nohz: Add tick_nohz_full_add_cpus_to() API

This API is useful to modify a cpumask indicating some special
nohz-type functionality so that the nohz cores are automatically
added to that set.

Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Jones <davej@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1429024675-18938-1-git-send-email-cmetcalf@ezchip.com
Link: http://lkml.kernel.org/r/1430928266-24888-4-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/tick.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tick.h b/include/linux/tick.h
index f8492da57ad3..4191b5623a28 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -134,6 +134,12 @@ static inline bool tick_nohz_full_cpu(int cpu)
 	return cpumask_test_cpu(cpu, tick_nohz_full_mask);
 }
 
+static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask)
+{
+	if (tick_nohz_full_enabled())
+		cpumask_or(mask, mask, tick_nohz_full_mask);
+}
+
 extern void __tick_nohz_full_check(void);
 extern void tick_nohz_full_kick(void);
 extern void tick_nohz_full_kick_cpu(int cpu);
@@ -142,6 +148,7 @@ extern void __tick_nohz_task_switch(struct task_struct *tsk);
 #else
 static inline bool tick_nohz_full_enabled(void) { return false; }
 static inline bool tick_nohz_full_cpu(int cpu) { return false; }
+static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
 static inline void __tick_nohz_full_check(void) { }
 static inline void tick_nohz_full_kick_cpu(int cpu) { }
 static inline void tick_nohz_full_kick(void) { }
-- 
cgit v1.2.3


From c6201cd8513db2db54b248a862672849ed9ccb82 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 7 May 2015 09:52:22 -0500
Subject: PCI/MSI: Remove unused pci_msi_off()

pci_msi_off() is unused, so remove it.

Removes the exported symbol pci_msi_off().

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 353db8dc4c6e..50b7c7d0206f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -974,7 +974,6 @@ void pci_intx(struct pci_dev *dev, int enable);
 bool pci_intx_mask_supported(struct pci_dev *dev);
 bool pci_check_and_mask_intx(struct pci_dev *dev);
 bool pci_check_and_unmask_intx(struct pci_dev *dev);
-void pci_msi_off(struct pci_dev *dev);
 int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size);
 int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask);
 int pci_wait_for_pending(struct pci_dev *dev, int pos, u16 mask);
-- 
cgit v1.2.3


From 385f83f85cd9428db82cae5e6f6f786be113b24c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 28 Apr 2015 12:21:40 +0200
Subject: dmaengine: Remove Renesas Audio DMAC peri peri platform data

Commit 3cd44dcd35a6 ("dmaengine: remove Renesas Audio DMAC peri peri")
forgot to remove the header file with the platform data definitions.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/platform_data/dma-rcar-audmapp.h | 34 --------------------------
 1 file changed, 34 deletions(-)
 delete mode 100644 include/linux/platform_data/dma-rcar-audmapp.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/dma-rcar-audmapp.h b/include/linux/platform_data/dma-rcar-audmapp.h
deleted file mode 100644
index 471fffebbeb4..000000000000
--- a/include/linux/platform_data/dma-rcar-audmapp.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * This is for Renesas R-Car Audio-DMAC-peri-peri.
- *
- * Copyright (C) 2014 Renesas Electronics Corporation
- * Copyright (C) 2014 Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
- *
- * This file is based on the include/linux/sh_dma.h
- *
- * Header for the new SH dmaengine driver
- *
- * Copyright (C) 2010 Guennadi Liakhovetski <g.liakhovetski@gmx.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef SH_AUDMAPP_H
-#define SH_AUDMAPP_H
-
-#include <linux/dmaengine.h>
-
-struct audmapp_slave_config {
-	int		slave_id;
-	dma_addr_t	src;
-	dma_addr_t	dst;
-	u32		chcr;
-};
-
-struct audmapp_pdata {
-	struct audmapp_slave_config *slave;
-	int slave_num;
-};
-
-#endif /* SH_AUDMAPP_H */
-- 
cgit v1.2.3


From 3289bdb429884c0279bf9ab72dff7b934f19dfc6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 14 Apr 2015 13:19:42 +0200
Subject: sched: Move the loadavg code to a more obvious location

I could not find the loadavg code.. turns out it was hidden in a file
called proc.c. It further got mingled up with the cruft per rq load
indexes (which we really want to get rid of).

Move the per rq load indexes into the fair.c load-balance code (that's
the only thing that uses them) and rename proc.c to loadavg.c so we
can find it again.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
[ Did minor cleanups to the code. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26a2e6122734..85cf253bc366 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -173,7 +173,12 @@ extern unsigned long nr_iowait_cpu(int cpu);
 extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
 
 extern void calc_global_load(unsigned long ticks);
+
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 extern void update_cpu_load_nohz(void);
+#else
+static inline void update_cpu_load_nohz(void) { }
+#endif
 
 extern unsigned long get_parent_ip(unsigned long addr);
 
-- 
cgit v1.2.3


From b76808e6808e34e7e78131d2b8cb0535622b8e9f Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@dabbelt.com>
Date: Thu, 30 Apr 2015 21:19:57 -0700
Subject: signals, sched: Change all uses of JOBCTL_* from 'int' to 'long'

c56fb6564dcd ("Fix a misaligned load inside ptrace_attach()") makes
jobctl an "unsigned long".  It makes sense to have the masks applied
to it match that type.  This is currently just a cosmetic change, but
it will prevent the mask from being unexpectedly truncated if we ever
end up with masks with more bits.

One instance of "signr" is an int, but I left this alone because the
mask ensures that it will never overflow.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bobby.prani@gmail.com
Cc: oleg@redhat.com
Cc: paulmck@linux.vnet.ibm.com
Cc: richard@nod.at
Cc: vdavydov@parallels.com
Link: http://lkml.kernel.org/r/1430453997-32459-4-git-send-email-palmer@dabbelt.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 85cf253bc366..4f066cb625ad 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2082,22 +2082,22 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
 #define JOBCTL_TRAPPING_BIT	21	/* switching to TRACED */
 #define JOBCTL_LISTENING_BIT	22	/* ptracer is listening for events */
 
-#define JOBCTL_STOP_DEQUEUED	(1 << JOBCTL_STOP_DEQUEUED_BIT)
-#define JOBCTL_STOP_PENDING	(1 << JOBCTL_STOP_PENDING_BIT)
-#define JOBCTL_STOP_CONSUME	(1 << JOBCTL_STOP_CONSUME_BIT)
-#define JOBCTL_TRAP_STOP	(1 << JOBCTL_TRAP_STOP_BIT)
-#define JOBCTL_TRAP_NOTIFY	(1 << JOBCTL_TRAP_NOTIFY_BIT)
-#define JOBCTL_TRAPPING		(1 << JOBCTL_TRAPPING_BIT)
-#define JOBCTL_LISTENING	(1 << JOBCTL_LISTENING_BIT)
+#define JOBCTL_STOP_DEQUEUED	(1UL << JOBCTL_STOP_DEQUEUED_BIT)
+#define JOBCTL_STOP_PENDING	(1UL << JOBCTL_STOP_PENDING_BIT)
+#define JOBCTL_STOP_CONSUME	(1UL << JOBCTL_STOP_CONSUME_BIT)
+#define JOBCTL_TRAP_STOP	(1UL << JOBCTL_TRAP_STOP_BIT)
+#define JOBCTL_TRAP_NOTIFY	(1UL << JOBCTL_TRAP_NOTIFY_BIT)
+#define JOBCTL_TRAPPING		(1UL << JOBCTL_TRAPPING_BIT)
+#define JOBCTL_LISTENING	(1UL << JOBCTL_LISTENING_BIT)
 
 #define JOBCTL_TRAP_MASK	(JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
 #define JOBCTL_PENDING_MASK	(JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
 
 extern bool task_set_jobctl_pending(struct task_struct *task,
-				    unsigned int mask);
+				    unsigned long mask);
 extern void task_clear_jobctl_trapping(struct task_struct *task);
 extern void task_clear_jobctl_pending(struct task_struct *task,
-				      unsigned int mask);
+				      unsigned long mask);
 
 static inline void rcu_copy_process(struct task_struct *p)
 {
-- 
cgit v1.2.3


From 7e60598785f30cf3dc9e476cc0fc3feeb37a0c63 Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@dabbelt.com>
Date: Thu, 30 Apr 2015 21:19:56 -0700
Subject: sched/wait: Change wait_on_bit*() to take an unsigned long *, not a
 void *

The implementations of wait_on_bit*() will only work with long-aligned
memory on systems that don't support misaligned loads and stores.

This patch changes the function prototypes to ensure that the compiler
will enforce alignment.

Running

  make defconfig
  make KFLAGS="-Werror"

seems to indicate that, as of c56fb6564dcd ("Fix a misaligned load
inside ptrace_attach()"), there are now no users of non-long-aligned
calls to wait_on_bit*().  I additionally tried a few "make randconfig"
attempts, none of which failed to compile for this reason.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bobby.prani@gmail.com
Cc: oleg@redhat.com
Cc: paulmck@linux.vnet.ibm.com
Cc: richard@nod.at
Cc: vdavydov@parallels.com
Link: http://lkml.kernel.org/r/1430453997-32459-3-git-send-email-palmer@dabbelt.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 2db83349865b..d69ac4ecc88b 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -969,7 +969,7 @@ extern int bit_wait_io_timeout(struct wait_bit_key *);
  * on that signal.
  */
 static inline int
-wait_on_bit(void *word, int bit, unsigned mode)
+wait_on_bit(unsigned long *word, int bit, unsigned mode)
 {
 	might_sleep();
 	if (!test_bit(bit, word))
@@ -994,7 +994,7 @@ wait_on_bit(void *word, int bit, unsigned mode)
  * on that signal.
  */
 static inline int
-wait_on_bit_io(void *word, int bit, unsigned mode)
+wait_on_bit_io(unsigned long *word, int bit, unsigned mode)
 {
 	might_sleep();
 	if (!test_bit(bit, word))
@@ -1020,7 +1020,8 @@ wait_on_bit_io(void *word, int bit, unsigned mode)
  * received a signal and the mode permitted wakeup on that signal.
  */
 static inline int
-wait_on_bit_timeout(void *word, int bit, unsigned mode, unsigned long timeout)
+wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode,
+		    unsigned long timeout)
 {
 	might_sleep();
 	if (!test_bit(bit, word))
@@ -1047,7 +1048,8 @@ wait_on_bit_timeout(void *word, int bit, unsigned mode, unsigned long timeout)
  * on that signal.
  */
 static inline int
-wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode)
+wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action,
+		   unsigned mode)
 {
 	might_sleep();
 	if (!test_bit(bit, word))
@@ -1075,7 +1077,7 @@ wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode
  * the @mode allows that signal to wake the process.
  */
 static inline int
-wait_on_bit_lock(void *word, int bit, unsigned mode)
+wait_on_bit_lock(unsigned long *word, int bit, unsigned mode)
 {
 	might_sleep();
 	if (!test_and_set_bit(bit, word))
@@ -1099,7 +1101,7 @@ wait_on_bit_lock(void *word, int bit, unsigned mode)
  * the @mode allows that signal to wake the process.
  */
 static inline int
-wait_on_bit_lock_io(void *word, int bit, unsigned mode)
+wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode)
 {
 	might_sleep();
 	if (!test_and_set_bit(bit, word))
@@ -1125,7 +1127,8 @@ wait_on_bit_lock_io(void *word, int bit, unsigned mode)
  * the @mode allows that signal to wake the process.
  */
 static inline int
-wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned mode)
+wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action,
+			unsigned mode)
 {
 	might_sleep();
 	if (!test_and_set_bit(bit, word))
-- 
cgit v1.2.3


From e7cc4173115347bcdaa5de2824dd46ef2c58425f Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@dabbelt.com>
Date: Thu, 30 Apr 2015 21:19:55 -0700
Subject: signals, ptrace, sched: Fix a misaligned load inside ptrace_attach()

The misaligned load exception arises when running ptrace_attach() on
the RISC-V (which hasn't been upstreamed yet).  The problem is that
wait_on_bit() takes a void* but then proceeds to call test_bit(),
which takes a long*.  This allows an int-aligned pointer to be passed
to test_bit(), which promptly fails.  This will manifest on any other
asm-generic port where unaligned loads trap, where sizeof(long) >
sizeof(int), and where task_struct.jobctl ends up not being
long-aligned.

This patch changes task_struct.jobctl to be a long, which ensures it
has the correct alignment.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bobby.prani@gmail.com
Cc: oleg@redhat.com
Cc: paulmck@linux.vnet.ibm.com
Cc: richard@nod.at
Cc: vdavydov@parallels.com
Link: http://lkml.kernel.org/r/1430453997-32459-2-git-send-email-palmer@dabbelt.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4f066cb625ad..fb650a2f4a73 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1374,7 +1374,7 @@ struct task_struct {
 	int exit_state;
 	int exit_code, exit_signal;
 	int pdeath_signal;  /*  The signal sent when the parent dies  */
-	unsigned int jobctl;	/* JOBCTL_*, siglock protected */
+	unsigned long jobctl;	/* JOBCTL_*, siglock protected */
 
 	/* Used for emulating ABI behavior of previous Linux versions */
 	unsigned int personality;
-- 
cgit v1.2.3


From 316c1608d15c736439d4065ed12f306db554b3da Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Tue, 28 Apr 2015 13:00:20 -0700
Subject: sched, timer: Convert usages of ACCESS_ONCE() in the scheduler to
 READ_ONCE()/WRITE_ONCE()

ACCESS_ONCE doesn't work reliably on non-scalar types. This patch removes
the rest of the existing usages of ACCESS_ONCE() in the scheduler, and use
the new READ_ONCE() and WRITE_ONCE() APIs as appropriate.

Signed-off-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Waiman Long <Waiman.Long@hp.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Aswin Chandramouleeswaran <aswin@hp.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1430251224-5764-2-git-send-email-jason.low2@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index fb650a2f4a73..d70910355b20 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3085,13 +3085,13 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
 static inline unsigned long task_rlimit(const struct task_struct *tsk,
 		unsigned int limit)
 {
-	return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur);
+	return READ_ONCE(tsk->signal->rlim[limit].rlim_cur);
 }
 
 static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
 		unsigned int limit)
 {
-	return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max);
+	return READ_ONCE(tsk->signal->rlim[limit].rlim_max);
 }
 
 static inline unsigned long rlimit(unsigned int limit)
-- 
cgit v1.2.3


From 1018016c706f7ff9f56fde3a649789c47085a293 Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Tue, 28 Apr 2015 13:00:22 -0700
Subject: sched, timer: Replace spinlocks with atomics in
 thread_group_cputimer(), to improve scalability

While running a database workload, we found a scalability issue with itimers.

Much of the problem was caused by the thread_group_cputimer spinlock.
Each time we account for group system/user time, we need to obtain a
thread_group_cputimer's spinlock to update the timers. On larger systems
(such as a 16 socket machine), this caused more than 30% of total time
spent trying to obtain this kernel lock to update these group timer stats.

This patch converts the timers to 64-bit atomic variables and use
atomic add to update them without a lock. With this patch, the percent
of total time spent updating thread group cputimer timers was reduced
from 30% down to less than 1%.

Note: On 32-bit systems using the generic 64-bit atomics, this causes
sample_group_cputimer() to take locks 3 times instead of just 1 time.
However, we tested this patch on a 32-bit system ARM system using the
generic atomics and did not find the overhead to be much of an issue.
An explanation for why this isn't an issue is that 32-bit systems usually
have small numbers of CPUs, and cacheline contention from extra spinlocks
called periodically is not really apparent on smaller systems.

Signed-off-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Aswin Chandramouleeswaran <aswin@hp.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Waiman Long <Waiman.Long@hp.com>
Link: http://lkml.kernel.org/r/1430251224-5764-4-git-send-email-jason.low2@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/init_task.h |  7 ++++---
 include/linux/sched.h     | 10 +++-------
 2 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 696d22312b31..7b9d8b59e7bf 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -50,9 +50,10 @@ extern struct fs_struct init_fs;
 	.cpu_timers	= INIT_CPU_TIMERS(sig.cpu_timers),		\
 	.rlim		= INIT_RLIMITS,					\
 	.cputimer	= { 						\
-		.cputime = INIT_CPUTIME,				\
-		.running = 0,						\
-		.lock = __RAW_SPIN_LOCK_UNLOCKED(sig.cputimer.lock),	\
+		.utime 		  = ATOMIC64_INIT(0),			\
+		.stime		  = ATOMIC64_INIT(0),			\
+		.sum_exec_runtime = ATOMIC64_INIT(0),			\
+		.running 	  = 0					\
 	},								\
 	.cred_guard_mutex =						\
 		 __MUTEX_INITIALIZER(sig.cred_guard_mutex),		\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d70910355b20..a45874c3fab6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -598,9 +598,10 @@ struct task_cputime {
  * used for thread group CPU timer calculations.
  */
 struct thread_group_cputimer {
-	struct task_cputime cputime;
+	atomic64_t utime;
+	atomic64_t stime;
+	atomic64_t sum_exec_runtime;
 	int running;
-	raw_spinlock_t lock;
 };
 
 #include <linux/rwsem.h>
@@ -2967,11 +2968,6 @@ static __always_inline bool need_resched(void)
 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
 void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
 
-static inline void thread_group_cputime_init(struct signal_struct *sig)
-{
-	raw_spin_lock_init(&sig->cputimer.lock);
-}
-
 /*
  * Reevaluate whether the task has signals pending delivery.
  * Wake the task if so.
-- 
cgit v1.2.3


From 971e8a985482c76487edb5a49811e99b96e846e1 Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Tue, 28 Apr 2015 13:00:23 -0700
Subject: sched, timer: Provide an atomic 'struct task_cputime' data structure

This patch adds an atomic variant of the 'struct task_cputime' data structure,
which can be used to store and update task_cputime statistics without
needing to do locking.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Aswin Chandramouleeswaran <aswin@hp.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Waiman Long <Waiman.Long@hp.com>
Link: http://lkml.kernel.org/r/1430251224-5764-5-git-send-email-jason.low2@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a45874c3fab6..6eb78cd45da7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -572,6 +572,23 @@ struct task_cputime {
 		.sum_exec_runtime = 0,				\
 	}
 
+/*
+ * This is the atomic variant of task_cputime, which can be used for
+ * storing and updating task_cputime statistics without locking.
+ */
+struct task_cputime_atomic {
+	atomic64_t utime;
+	atomic64_t stime;
+	atomic64_t sum_exec_runtime;
+};
+
+#define INIT_CPUTIME_ATOMIC \
+	(struct task_cputime_atomic) {				\
+		.utime = ATOMIC64_INIT(0),			\
+		.stime = ATOMIC64_INIT(0),			\
+		.sum_exec_runtime = ATOMIC64_INIT(0),		\
+	}
+
 #ifdef CONFIG_PREEMPT_COUNT
 #define PREEMPT_DISABLED	(1 + PREEMPT_ENABLED)
 #else
-- 
cgit v1.2.3


From 7110744516276e906f9197e2857d026eb2343393 Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Tue, 28 Apr 2015 13:00:24 -0700
Subject: sched, timer: Use the atomic task_cputime in thread_group_cputimer

Recent optimizations were made to thread_group_cputimer to improve its
scalability by keeping track of cputime stats without a lock. However,
the values were open coded to the structure, causing them to be at
a different abstraction level from the regular task_cputime structure.
Furthermore, any subsequent similar optimizations would not be able to
share the new code, since they are specific to thread_group_cputimer.

This patch adds the new task_cputime_atomic data structure (introduced in
the previous patch in the series) to thread_group_cputimer for keeping
track of the cputime atomically, which also helps generalize the code.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Aswin Chandramouleeswaran <aswin@hp.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Waiman Long <Waiman.Long@hp.com>
Link: http://lkml.kernel.org/r/1430251224-5764-6-git-send-email-jason.low2@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/init_task.h | 6 ++----
 include/linux/sched.h     | 4 +---
 2 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 7b9d8b59e7bf..bb9b075f0eb0 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -50,10 +50,8 @@ extern struct fs_struct init_fs;
 	.cpu_timers	= INIT_CPU_TIMERS(sig.cpu_timers),		\
 	.rlim		= INIT_RLIMITS,					\
 	.cputimer	= { 						\
-		.utime 		  = ATOMIC64_INIT(0),			\
-		.stime		  = ATOMIC64_INIT(0),			\
-		.sum_exec_runtime = ATOMIC64_INIT(0),			\
-		.running 	  = 0					\
+		.cputime_atomic	= INIT_CPUTIME_ATOMIC,			\
+		.running	= 0,					\
 	},								\
 	.cred_guard_mutex =						\
 		 __MUTEX_INITIALIZER(sig.cred_guard_mutex),		\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6eb78cd45da7..4adc536a3b03 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -615,9 +615,7 @@ struct task_cputime_atomic {
  * used for thread group CPU timer calculations.
  */
 struct thread_group_cputimer {
-	atomic64_t utime;
-	atomic64_t stime;
-	atomic64_t sum_exec_runtime;
+	struct task_cputime_atomic cputime_atomic;
 	int running;
 };
 
-- 
cgit v1.2.3


From 7675104990ed255b9315a82ae827ff312a2a88a2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 1 May 2015 08:27:50 -0700
Subject: sched: Implement lockless wake-queues

This is useful for locking primitives that can effect multiple
wakeups per operation and want to avoid lock internal lock contention
by delaying the wakeups until we've released the lock internal locks.

Alternatively it can be used to avoid issuing multiple wakeups, and
thus save a few cycles, in packet processing. Queue all target tasks
and wakeup once you've processed all packets. That way you avoid
waking the target task multiple times if there were multiple packets
for the same task.

Properties of a wake_q are:
- Lockless, as queue head must reside on the stack.
- Being a queue, maintains wakeup order passed by the callers. This can
  be important for otherwise, in scenarios where highly contended locks
  could affect any reliance on lock fairness.
- A queued task cannot be added again until it is woken up.

This patch adds the needed infrastructure into the scheduler code
and uses the new wake_list to delay the futex wakeups until
after we've released the hash bucket locks.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
[tweaks, adjustments, comments, etc.]
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chris Mason <clm@fb.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: George Spelvin <linux@horizon.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1430494072-30283-2-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4adc536a3b03..254d88e80f65 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -920,6 +920,50 @@ enum cpu_idle_type {
 #define SCHED_CAPACITY_SHIFT	10
 #define SCHED_CAPACITY_SCALE	(1L << SCHED_CAPACITY_SHIFT)
 
+/*
+ * Wake-queues are lists of tasks with a pending wakeup, whose
+ * callers have already marked the task as woken internally,
+ * and can thus carry on. A common use case is being able to
+ * do the wakeups once the corresponding user lock as been
+ * released.
+ *
+ * We hold reference to each task in the list across the wakeup,
+ * thus guaranteeing that the memory is still valid by the time
+ * the actual wakeups are performed in wake_up_q().
+ *
+ * One per task suffices, because there's never a need for a task to be
+ * in two wake queues simultaneously; it is forbidden to abandon a task
+ * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is
+ * already in a wake queue, the wakeup will happen soon and the second
+ * waker can just skip it.
+ *
+ * The WAKE_Q macro declares and initializes the list head.
+ * wake_up_q() does NOT reinitialize the list; it's expected to be
+ * called near the end of a function, where the fact that the queue is
+ * not used again will be easy to see by inspection.
+ *
+ * Note that this can cause spurious wakeups. schedule() callers
+ * must ensure the call is done inside a loop, confirming that the
+ * wakeup condition has in fact occurred.
+ */
+struct wake_q_node {
+	struct wake_q_node *next;
+};
+
+struct wake_q_head {
+	struct wake_q_node *first;
+	struct wake_q_node **lastp;
+};
+
+#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
+
+#define WAKE_Q(name)					\
+	struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
+
+extern void wake_q_add(struct wake_q_head *head,
+		       struct task_struct *task);
+extern void wake_up_q(struct wake_q_head *head);
+
 /*
  * sched-domains (multiprocessor balancing) declarations:
  */
@@ -1532,6 +1576,8 @@ struct task_struct {
 	/* Protection of the PI data structures: */
 	raw_spinlock_t pi_lock;
 
+	struct wake_q_node wake_q;
+
 #ifdef CONFIG_RT_MUTEXES
 	/* PI waiters blocked on a rt_mutex held by this task */
 	struct rb_root pi_waiters;
-- 
cgit v1.2.3


From ff303e66c240ba6269e31817a386995440a18c99 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 17 Apr 2015 20:05:30 +0200
Subject: perf: Fix software migrate events

Stephane asked about PERF_COUNT_SW_CPU_MIGRATIONS and I realized it
was borken:

 > The problem is that the task isn't actually scheduled while its being
 > migrated (obviously), and if its not scheduled, the counters aren't
 > scheduled either, so there's no observing of the fact.
 >
 > A further problem with migrations is that many migrations happen from
 > softirq context, which is nested inside the 'random' task context of
 > whoemever happens to run at that time, similarly for the wakeup
 > migrations triggered from (soft)irq context. All those end up being
 > accounted in the task that's currently running, eg. your 'ls'.

The below cures this by marking a task as migrated and accounting it
on the subsequent sched_in().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 24 ++++++++++++++++++++++++
 include/linux/sched.h      |  7 ++++---
 2 files changed, 28 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 61992cf2e977..e86f85abeda7 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -798,11 +798,33 @@ perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
 
 extern struct static_key_deferred perf_sched_events;
 
+static __always_inline bool
+perf_sw_migrate_enabled(void)
+{
+	if (static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_CPU_MIGRATIONS]))
+		return true;
+	return false;
+}
+
+static inline void perf_event_task_migrate(struct task_struct *task)
+{
+	if (perf_sw_migrate_enabled())
+		task->sched_migrated = 1;
+}
+
 static inline void perf_event_task_sched_in(struct task_struct *prev,
 					    struct task_struct *task)
 {
 	if (static_key_false(&perf_sched_events.key))
 		__perf_event_task_sched_in(prev, task);
+
+	if (perf_sw_migrate_enabled() && task->sched_migrated) {
+		struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
+
+		perf_fetch_caller_regs(regs);
+		___perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, regs, 0);
+		task->sched_migrated = 0;
+	}
 }
 
 static inline void perf_event_task_sched_out(struct task_struct *prev,
@@ -925,6 +947,8 @@ perf_aux_output_skip(struct perf_output_handle *handle,
 static inline void *
 perf_get_aux(struct perf_output_handle *handle)				{ return NULL; }
 static inline void
+perf_event_task_migrate(struct task_struct *task)			{ }
+static inline void
 perf_event_task_sched_in(struct task_struct *prev,
 			 struct task_struct *task)			{ }
 static inline void
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26a2e6122734..2c5e6c3db654 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1356,9 +1356,6 @@ struct task_struct {
 #endif
 
 	struct mm_struct *mm, *active_mm;
-#ifdef CONFIG_COMPAT_BRK
-	unsigned brk_randomized:1;
-#endif
 	/* per-thread vma caching */
 	u32 vmacache_seqnum;
 	struct vm_area_struct *vmacache[VMACACHE_SIZE];
@@ -1381,10 +1378,14 @@ struct task_struct {
 	/* Revert to default priority/policy when forking */
 	unsigned sched_reset_on_fork:1;
 	unsigned sched_contributes_to_load:1;
+	unsigned sched_migrated:1;
 
 #ifdef CONFIG_MEMCG_KMEM
 	unsigned memcg_kmem_skip_account:1;
 #endif
+#ifdef CONFIG_COMPAT_BRK
+	unsigned brk_randomized:1;
+#endif
 
 	unsigned long atomic_flags; /* Flags needing atomic access. */
 
-- 
cgit v1.2.3


From 59aabfc7e959f5f213e4e5cc7567ab4934da2adf Mon Sep 17 00:00:00 2001
From: Waiman Long <Waiman.Long@hp.com>
Date: Thu, 30 Apr 2015 17:12:16 -0400
Subject: locking/rwsem: Reduce spinlock contention in wakeup after
 up_read()/up_write()

In up_write()/up_read(), rwsem_wake() will be called whenever it
detects that some writers/readers are waiting. The rwsem_wake()
function will take the wait_lock and call __rwsem_do_wake() to do the
real wakeup.  For a heavily contended rwsem, doing a spin_lock() on
wait_lock will cause further contention on the heavily contended rwsem
cacheline resulting in delay in the completion of the up_read/up_write
operations.

This patch makes the wait_lock taking and the call to __rwsem_do_wake()
optional if at least one spinning writer is present. The spinning
writer will be able to take the rwsem and call rwsem_wake() later
when it calls up_write(). With the presence of a spinning writer,
rwsem_wake() will now try to acquire the lock using trylock. If that
fails, it will just quit.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
Acked-by: Jason Low <jason.low2@hp.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Douglas Hatch <doug.hatch@hp.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1430428337-16802-2-git-send-email-Waiman.Long@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/osq_lock.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/osq_lock.h b/include/linux/osq_lock.h
index 3a6490e81b28..703ea5c30a33 100644
--- a/include/linux/osq_lock.h
+++ b/include/linux/osq_lock.h
@@ -32,4 +32,9 @@ static inline void osq_lock_init(struct optimistic_spin_queue *lock)
 extern bool osq_lock(struct optimistic_spin_queue *lock);
 extern void osq_unlock(struct optimistic_spin_queue *lock);
 
+static inline bool osq_is_locked(struct optimistic_spin_queue *lock)
+{
+	return atomic_read(&lock->tail) != OSQ_UNLOCKED_VAL;
+}
+
 #endif
-- 
cgit v1.2.3


From 663fdcbee0a656cdaef934e7f50e6c2670373bc9 Mon Sep 17 00:00:00 2001
From: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Date: Thu, 30 Apr 2015 17:27:21 +0530
Subject: kernel: Replace reference to ASSIGN_ONCE() with WRITE_ONCE() in
 comment

Looks like commit :

 43239cbe79fc ("kernel: Change ASSIGN_ONCE(val, x) to WRITE_ONCE(x, val)")

left behind a reference to ASSIGN_ONCE(). Update this to WRITE_ONCE().

Signed-off-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: borntraeger@de.ibm.com
Cc: dave@stgolabs.net
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/20150430115721.22278.94082.stgit@preeti.in.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/compiler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 867722591be2..a7c0941d10da 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -450,7 +450,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
  * with an explicit memory barrier or atomic instruction that provides the
  * required ordering.
  *
- * If possible use READ_ONCE/ASSIGN_ONCE instead.
+ * If possible use READ_ONCE()/WRITE_ONCE() instead.
  */
 #define __ACCESS_ONCE(x) ({ \
 	 __maybe_unused typeof(x) __var = (__force typeof(x)) 0; \
-- 
cgit v1.2.3


From 56f13c0d9524c5816f5dc9c91b9d766d6b1064ca Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Thu, 9 Apr 2015 12:35:47 +0300
Subject: dmaengine: of_dma: Support for DMA routers

DMA routers are transparent devices used to mux DMA requests from
peripherals to DMA controllers. They are used when the SoC integrates more
devices with DMA requests then their controller can handle.
DRA7x is one example of such SoC, where the sDMA can hanlde 128 DMA request
lines, but in SoC level it has 205 DMA requests.

The of_dma_router will be registered as of_dma_controller with special
xlate function and additional parameters. The driver for the router is
responsible to craft the dma_spec (in the of_dma_route_allocate callback)
which can be used to requests a DMA channel from the real DMA controller.
This way the router can be transparent for the system while remaining generic
enough to be used in different environments.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 17 +++++++++++++++++
 include/linux/of_dma.h    | 21 +++++++++++++++++++++
 2 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index ad419757241f..abf63ceabef9 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -221,6 +221,16 @@ struct dma_chan_percpu {
 	unsigned long bytes_transferred;
 };
 
+/**
+ * struct dma_router - DMA router structure
+ * @dev: pointer to the DMA router device
+ * @route_free: function to be called when the route can be disconnected
+ */
+struct dma_router {
+	struct device *dev;
+	void (*route_free)(struct device *dev, void *route_data);
+};
+
 /**
  * struct dma_chan - devices supply DMA channels, clients use them
  * @device: ptr to the dma device who supplies this channel, always !%NULL
@@ -232,6 +242,8 @@ struct dma_chan_percpu {
  * @local: per-cpu pointer to a struct dma_chan_percpu
  * @client_count: how many clients are using this channel
  * @table_count: number of appearances in the mem-to-mem allocation table
+ * @router: pointer to the DMA router structure
+ * @route_data: channel specific data for the router
  * @private: private data for certain client-channel associations
  */
 struct dma_chan {
@@ -247,6 +259,11 @@ struct dma_chan {
 	struct dma_chan_percpu __percpu *local;
 	int client_count;
 	int table_count;
+
+	/* DMA router */
+	struct dma_router *router;
+	void *route_data;
+
 	void *private;
 };
 
diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h
index 56bc026c143f..98ba7525929e 100644
--- a/include/linux/of_dma.h
+++ b/include/linux/of_dma.h
@@ -23,6 +23,9 @@ struct of_dma {
 	struct device_node	*of_node;
 	struct dma_chan		*(*of_dma_xlate)
 				(struct of_phandle_args *, struct of_dma *);
+	void			*(*of_dma_route_allocate)
+				(struct of_phandle_args *, struct of_dma *);
+	struct dma_router	*dma_router;
 	void			*of_dma_data;
 };
 
@@ -37,12 +40,20 @@ extern int of_dma_controller_register(struct device_node *np,
 		(struct of_phandle_args *, struct of_dma *),
 		void *data);
 extern void of_dma_controller_free(struct device_node *np);
+
+extern int of_dma_router_register(struct device_node *np,
+		void *(*of_dma_route_allocate)
+		(struct of_phandle_args *, struct of_dma *),
+		struct dma_router *dma_router);
+#define of_dma_router_free of_dma_controller_free
+
 extern struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
 						     const char *name);
 extern struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
 		struct of_dma *ofdma);
 extern struct dma_chan *of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec,
 		struct of_dma *ofdma);
+
 #else
 static inline int of_dma_controller_register(struct device_node *np,
 		struct dma_chan *(*of_dma_xlate)
@@ -56,6 +67,16 @@ static inline void of_dma_controller_free(struct device_node *np)
 {
 }
 
+static inline int of_dma_router_register(struct device_node *np,
+		void *(*of_dma_route_allocate)
+		(struct of_phandle_args *, struct of_dma *),
+		struct dma_router *dma_router)
+{
+	return -ENODEV;
+}
+
+#define of_dma_router_free of_dma_controller_free
+
 static inline struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
 						     const char *name)
 {
-- 
cgit v1.2.3


From 4ae92bc77ac8e620f7c8d59b5882a4cb0d1c4ef1 Mon Sep 17 00:00:00 2001
From: Nicolas Schichan <nschichan@freebox.fr>
Date: Wed, 6 May 2015 16:12:27 +0200
Subject: net: filter: add a callback to allow classic post-verifier
 transformations

This is in preparation for use by the seccomp code, the rationale is
not to duplicate additional code within the seccomp layer, but instead,
have it abstracted and hidden within the classic BPF API.

As an interim step, this now also makes bpf_prepare_filter() visible
(not as exported symbol though), so that seccomp can reuse that code
path instead of reimplementing it.

Joint work with Daniel Borkmann.

Signed-off-by: Nicolas Schichan <nschichan@freebox.fr>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Kees Cook <keescook@chromium.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index fa11b3a367be..91996247cb55 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -384,7 +384,13 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 int sk_attach_bpf(u32 ufd, struct sock *sk);
 int sk_detach_filter(struct sock *sk);
 
+typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter,
+				       unsigned int flen);
+
 int bpf_check_classic(const struct sock_filter *filter, unsigned int flen);
+struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
+				    bpf_aux_classic_check_t trans);
+
 int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
 		  unsigned int len);
 
-- 
cgit v1.2.3


From d9e12f42e58da475379b9080708b94f2095904af Mon Sep 17 00:00:00 2001
From: Nicolas Schichan <nschichan@freebox.fr>
Date: Wed, 6 May 2015 16:12:28 +0200
Subject: seccomp: simplify seccomp_prepare_filter and reuse bpf_prepare_filter

Remove the calls to bpf_check_classic(), bpf_convert_filter() and
bpf_migrate_runtime() and let bpf_prepare_filter() take care of that
instead.

seccomp_check_filter() is passed to bpf_prepare_filter() so that it
gets called from there, after bpf_check_classic().

We can now remove exposure of two internal classic BPF functions
previously used by seccomp. The export of bpf_check_classic() symbol,
previously known as sk_chk_filter(), was there since pre git times,
and no in-tree module was using it, therefore remove it.

Joint work with Daniel Borkmann.

Signed-off-by: Nicolas Schichan <nschichan@freebox.fr>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Kees Cook <keescook@chromium.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 91996247cb55..0dcb44bcfc5f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -363,9 +363,6 @@ int sk_filter(struct sock *sk, struct sk_buff *skb);
 void bpf_prog_select_runtime(struct bpf_prog *fp);
 void bpf_prog_free(struct bpf_prog *fp);
 
-int bpf_convert_filter(struct sock_filter *prog, int len,
-		       struct bpf_insn *new_prog, int *new_len);
-
 struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
 struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 				  gfp_t gfp_extra_flags);
@@ -387,7 +384,6 @@ int sk_detach_filter(struct sock *sk);
 typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter,
 				       unsigned int flen);
 
-int bpf_check_classic(const struct sock_filter *filter, unsigned int flen);
 struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
 				    bpf_aux_classic_check_t trans);
 
-- 
cgit v1.2.3


From ac67eb2c5347bd9976308c0e0cf1d9e7ca690342 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 6 May 2015 16:12:30 +0200
Subject: seccomp, filter: add and use bpf_prog_create_from_user from seccomp

Seccomp has always been a special candidate when it comes to preparation
of its filters in seccomp_prepare_filter(). Due to the extra checks and
filter rewrite it partially duplicates code and has BPF internals exposed.

This patch adds a generic API inside the BPF code code that seccomp can use
and thus keep it's filter preparation code minimal and better maintainable.
The other side-effect is that now classic JITs can add seccomp support as
well by only providing a BPF_LDX | BPF_W | BPF_ABS translation.

Tested with seccomp and BPF test suites.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Nicolas Schichan <nschichan@freebox.fr>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Kees Cook <keescook@chromium.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 0dcb44bcfc5f..3c03a6085b82 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -374,19 +374,17 @@ static inline void bpf_prog_unlock_free(struct bpf_prog *fp)
 	__bpf_prog_free(fp);
 }
 
+typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter,
+				       unsigned int flen);
+
 int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog);
+int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
+			      bpf_aux_classic_check_t trans);
 void bpf_prog_destroy(struct bpf_prog *fp);
 
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 int sk_attach_bpf(u32 ufd, struct sock *sk);
 int sk_detach_filter(struct sock *sk);
-
-typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter,
-				       unsigned int flen);
-
-struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
-				    bpf_aux_classic_check_t trans);
-
 int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
 		  unsigned int len);
 
-- 
cgit v1.2.3


From 59324cf35aba5336b611074028777838a963d03b Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 7 May 2015 11:02:53 +0200
Subject: netlink: allow to listen "all" netns

More accurately, listen all netns that have a nsid assigned into the netns
where the netlink socket is opened.
For this purpose, a netlink socket option is added:
NETLINK_LISTEN_ALL_NSID. When this option is set on a netlink socket, this
socket will receive netlink notifications from all netns that have a nsid
assigned into the netns where the socket has been opened. The nsid is sent
to userland via an anscillary data.

With this patch, a daemon needs only one socket to listen many netns. This
is useful when the number of netns is high.

Because 0 is a valid value for a nsid, the field nsid_is_set indicates if
the field nsid is valid or not. skb->cb is initialized to 0 on skb
allocation, thus we are sure that we will never send a nsid 0 by error to
the userland.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 6835c1279df7..9120edb650a0 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -28,6 +28,8 @@ struct netlink_skb_parms {
 	__u32			dst_group;
 	__u32			flags;
 	struct sock		*sk;
+	bool			nsid_is_set;
+	int			nsid;
 };
 
 #define NETLINK_CB(skb)		(*(struct netlink_skb_parms*)&((skb)->cb))
-- 
cgit v1.2.3


From 920ce39f6c204d4ce4d8acebe7522f0dfa95f662 Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Fri, 8 May 2015 14:31:50 -0700
Subject: sched, timer: Fix documentation for 'struct thread_group_cputimer'

Fix the docbook build bug reported by Fengguang Wu.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Jason Low <jason.low2@hp.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: aswin@hp.com
Cc: bp@alien8.de
Cc: dave@stgolabs.net
Cc: fweisbec@gmail.com
Cc: mgorman@suse.de
Cc: oleg@redhat.com
Cc: paulmck@linux.vnet.ibm.com
Cc: preeti@linux.vnet.ibm.com
Cc: riel@redhat.com
Cc: rostedt@goodmis.org
Cc: scott.norton@hp.com
Cc: torvalds@linux-foundation.org
Cc: umgwanakikbuti@gmail.com
Link: http://lkml.kernel.org/r/1431120710.5136.12.camel@j-VirtualBox
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 254d88e80f65..0eceeec5a01a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -606,10 +606,9 @@ struct task_cputime_atomic {
 
 /**
  * struct thread_group_cputimer - thread group interval timer counts
- * @cputime:		thread group interval timers.
+ * @cputime_atomic:	atomic thread group interval timers.
  * @running:		non-zero when there are timers running and
  * 			@cputime receives updates.
- * @lock:		lock for fields in this struct.
  *
  * This structure contains the version of task_cputime, above, that is
  * used for thread group CPU timer calculations.
-- 
cgit v1.2.3


From 34ef33f7da6b00900d3a896d33522a035a930245 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 28 Apr 2015 14:04:07 +0200
Subject: usb: phy: Remove the phy-rcar-gen2-usb driver

The phy-rcar-gen2-usb driver, which supports legacy platform data only,
is no longer used since commit a483dcbfa21f919c ("ARM: shmobile: lager:
Remove legacy board support").

This driver was superseded by the DT-only phy-rcar-gen2 driver, which
was introduced in commit 1233f59f745b237d ("phy: Renesas R-Car Gen2 PHY
driver").

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/platform_data/usb-rcar-gen2-phy.h | 22 ----------------------
 1 file changed, 22 deletions(-)
 delete mode 100644 include/linux/platform_data/usb-rcar-gen2-phy.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/usb-rcar-gen2-phy.h b/include/linux/platform_data/usb-rcar-gen2-phy.h
deleted file mode 100644
index dd3ba46c0d90..000000000000
--- a/include/linux/platform_data/usb-rcar-gen2-phy.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2013 Renesas Solutions Corp.
- * Copyright (C) 2013 Cogent Embedded, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __USB_RCAR_GEN2_PHY_H
-#define __USB_RCAR_GEN2_PHY_H
-
-#include <linux/types.h>
-
-struct rcar_gen2_phy_platform_data {
-	/* USB channel 0 configuration */
-	bool chan0_pci:1;	/* true: PCI USB host 0, false: USBHS */
-	/* USB channel 2 configuration */
-	bool chan2_pci:1;	/* true: PCI USB host 2, false: USBSS */
-};
-
-#endif
-- 
cgit v1.2.3


From 1c5841e832e2d7563c31de4946118e78baf573a3 Mon Sep 17 00:00:00 2001
From: Eddie Huang <eddie.huang@mediatek.com>
Date: Tue, 28 Apr 2015 21:40:32 +0800
Subject: tty: serial: 8250: export early_serial8250_setup function

8250-like uart driver may call early_serial8250_setup to
reuse 8250_early.c character output function.

Signed-off-by: Eddie Huang <eddie.huang@mediatek.com>
Tested-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_8250.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 78097e7a330a..f0c68d88b6f4 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -137,6 +137,8 @@ extern int early_serial_setup(struct uart_port *port);
 
 extern unsigned int serial8250_early_in(struct uart_port *port, int offset);
 extern void serial8250_early_out(struct uart_port *port, int offset, int value);
+extern int early_serial8250_setup(struct earlycon_device *device,
+					 const char *options);
 extern void serial8250_do_set_termios(struct uart_port *port,
 		struct ktermios *termios, struct ktermios *old);
 extern int serial8250_do_startup(struct uart_port *port);
-- 
cgit v1.2.3


From c27ffc1080179c3f3b85e1e194fa61f1c9923b62 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 30 Apr 2015 18:21:25 +0200
Subject: serial: sh-sci: Move private definitions to private header file

Move private register definitions and enums from the public
<linux/serial_sci.h> header file to the driver private "sh-sci.h" header
file.

The common Serial Control Register definitions are left in the public
header file, as they're needed to fill in plat_sci_port.scscr on legacy
systems not using DT.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_sci.h | 67 +---------------------------------------------
 1 file changed, 1 insertion(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 6c5e3bb282b0..395fceb8c060 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -10,13 +10,6 @@
 
 #define SCIx_NOT_SUPPORTED	(-1)
 
-/* SCSMR (Serial Mode Register) */
-#define SCSMR_CHR	(1 << 6)	/* 7-bit Character Length */
-#define SCSMR_PE	(1 << 5)	/* Parity Enable */
-#define SCSMR_ODD	(1 << 4)	/* Odd Parity */
-#define SCSMR_STOP	(1 << 3)	/* Stop Bit Length */
-#define SCSMR_CKS	0x0003		/* Clock Select */
-
 /* Serial Control Register (@ = not supported by all parts) */
 #define SCSCR_TIE	(1 << 7)	/* Transmit Interrupt Enable */
 #define SCSCR_RIE	(1 << 6)	/* Receive Interrupt Enable */
@@ -26,43 +19,7 @@
 #define SCSCR_TOIE	(1 << 2)	/* Timeout Interrupt Enable @ */
 #define SCSCR_CKE1	(1 << 1)	/* Clock Enable 1 */
 #define SCSCR_CKE0	(1 << 0)	/* Clock Enable 0 */
-/* SCIFA/SCIFB only */
-#define SCSCR_TDRQE	(1 << 15)	/* Tx Data Transfer Request Enable */
-#define SCSCR_RDRQE	(1 << 14)	/* Rx Data Transfer Request Enable */
-
-/* SCxSR (Serial Status Register) on SCI */
-#define SCI_TDRE  0x80			/* Transmit Data Register Empty */
-#define SCI_RDRF  0x40			/* Receive Data Register Full */
-#define SCI_ORER  0x20			/* Overrun Error */
-#define SCI_FER   0x10			/* Framing Error */
-#define SCI_PER   0x08			/* Parity Error */
-#define SCI_TEND  0x04			/* Transmit End */
-
-#define SCI_DEFAULT_ERROR_MASK (SCI_PER | SCI_FER)
-
-/* SCxSR (Serial Status Register) on SCIF, HSCIF */
-#define SCIF_ER    0x0080		/* Receive Error */
-#define SCIF_TEND  0x0040		/* Transmission End */
-#define SCIF_TDFE  0x0020		/* Transmit FIFO Data Empty */
-#define SCIF_BRK   0x0010		/* Break Detect */
-#define SCIF_FER   0x0008		/* Framing Error */
-#define SCIF_PER   0x0004		/* Parity Error */
-#define SCIF_RDF   0x0002		/* Receive FIFO Data Full */
-#define SCIF_DR    0x0001		/* Receive Data Ready */
-
-#define SCIF_DEFAULT_ERROR_MASK (SCIF_PER | SCIF_FER | SCIF_ER | SCIF_BRK)
-
-/* SCFCR (FIFO Control Register) */
-#define SCFCR_LOOP	(1 << 0)	/* Loopback Test */
-
-/* SCSPTR (Serial Port Register), optional */
-#define SCSPTR_RTSIO	(1 << 7)	/* Serial Port RTS Pin Input/Output */
-#define SCSPTR_CTSIO	(1 << 5)	/* Serial Port CTS Pin Input/Output */
-#define SCSPTR_SPB2IO	(1 << 1)	/* Serial Port Break Input/Output */
-#define SCSPTR_SPB2DT	(1 << 0)	/* Serial Port Break Data */
-
-/* HSSRR HSCIF */
-#define HSCIF_SRE	0x8000		/* Sampling Rate Register Enable */
+
 
 enum {
 	SCIx_PROBE_REGTYPE,
@@ -82,28 +39,6 @@ enum {
 	SCIx_NR_REGTYPES,
 };
 
-/*
- * SCI register subset common for all port types.
- * Not all registers will exist on all parts.
- */
-enum {
-	SCSMR,				/* Serial Mode Register */
-	SCBRR,				/* Bit Rate Register */
-	SCSCR,				/* Serial Control Register */
-	SCxSR,				/* Serial Status Register */
-	SCFCR,				/* FIFO Control Register */
-	SCFDR,				/* FIFO Data Count Register */
-	SCxTDR,				/* Transmit (FIFO) Data Register */
-	SCxRDR,				/* Receive (FIFO) Data Register */
-	SCLSR,				/* Line Status Register */
-	SCTFDR,				/* Transmit FIFO Data Count Register */
-	SCRFDR,				/* Receive FIFO Data Count Register */
-	SCSPTR,				/* Serial Port Register */
-	HSSRR,				/* Sampling Rate Register */
-
-	SCIx_NR_REGS,
-};
-
 struct device;
 
 struct plat_sci_port_ops {
-- 
cgit v1.2.3


From d94a0a3857987c76c37a8095977fe554799ab69d Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 30 Apr 2015 18:21:29 +0200
Subject: serial: sh-sci: Standardize on using the BIT() macro to define
 register bits

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_sci.h | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 395fceb8c060..7c536ac5be05 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -1,6 +1,7 @@
 #ifndef __LINUX_SERIAL_SCI_H
 #define __LINUX_SERIAL_SCI_H
 
+#include <linux/bitops.h>
 #include <linux/serial_core.h>
 #include <linux/sh_dma.h>
 
@@ -11,14 +12,14 @@
 #define SCIx_NOT_SUPPORTED	(-1)
 
 /* Serial Control Register (@ = not supported by all parts) */
-#define SCSCR_TIE	(1 << 7)	/* Transmit Interrupt Enable */
-#define SCSCR_RIE	(1 << 6)	/* Receive Interrupt Enable */
-#define SCSCR_TE	(1 << 5)	/* Transmit Enable */
-#define SCSCR_RE	(1 << 4)	/* Receive Enable */
-#define SCSCR_REIE	(1 << 3)	/* Receive Error Interrupt Enable @ */
-#define SCSCR_TOIE	(1 << 2)	/* Timeout Interrupt Enable @ */
-#define SCSCR_CKE1	(1 << 1)	/* Clock Enable 1 */
-#define SCSCR_CKE0	(1 << 0)	/* Clock Enable 0 */
+#define SCSCR_TIE	BIT(7)	/* Transmit Interrupt Enable */
+#define SCSCR_RIE	BIT(6)	/* Receive Interrupt Enable */
+#define SCSCR_TE	BIT(5)	/* Transmit Enable */
+#define SCSCR_RE	BIT(4)	/* Receive Enable */
+#define SCSCR_REIE	BIT(3)	/* Receive Error Interrupt Enable @ */
+#define SCSCR_TOIE	BIT(2)	/* Timeout Interrupt Enable @ */
+#define SCSCR_CKE1	BIT(1)	/* Clock Enable 1 */
+#define SCSCR_CKE0	BIT(0)	/* Clock Enable 0 */
 
 
 enum {
@@ -48,7 +49,7 @@ struct plat_sci_port_ops {
 /*
  * Port-specific capabilities
  */
-#define SCIx_HAVE_RTSCTS	(1 << 0)
+#define SCIx_HAVE_RTSCTS	BIT(0)
 
 /*
  * Platform device specific platform_data struct
-- 
cgit v1.2.3


From bd63364caa8df38bad2b25b11b2a1b849475cce5 Mon Sep 17 00:00:00 2001
From: Scot Doyle <lkml14@scotdoyle.com>
Date: Thu, 26 Mar 2015 13:54:39 +0000
Subject: vt: add cursor blink interval escape sequence

Add an escape sequence to specify the current console's cursor blink
interval. The interval is specified as a number of milliseconds until
the next cursor display state toggle, from 50 to 65535. /proc/loadavg
did not show a difference with a one msec interval, but the lower
bound is set to 50 msecs since slower hardware wasn't tested.

Store the interval in the vc_data structure for later access by fbcon,
initializing the value to fbcon's current hardcoded value of 200 msecs.

Signed-off-by: Scot Doyle <lkml14@scotdoyle.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/console_struct.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index e859c98d1767..e329ee2667e1 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -104,6 +104,7 @@ struct vc_data {
 	unsigned int    vc_resize_user;         /* resize request from user */
 	unsigned int	vc_bell_pitch;		/* Console bell pitch */
 	unsigned int	vc_bell_duration;	/* Console bell duration */
+	unsigned short	vc_cur_blink_ms;	/* Cursor blink duration */
 	struct vc_data **vc_display_fg;		/* [!] Ptr to var holding fg console for this display */
 	struct uni_pagedir *vc_uni_pagedir;
 	struct uni_pagedir **vc_uni_pagedir_loc; /* [!] Location of uni_pagedir variable for this console */
-- 
cgit v1.2.3


From faaa44955dedc661f083636d816af90975a359ee Mon Sep 17 00:00:00 2001
From: Irina Tirdea <irina.tirdea@intel.com>
Date: Wed, 29 Apr 2015 21:16:39 +0300
Subject: iio: core: Introduce IIO_CHAN_INFO_OVERSAMPLING_RATIO

Some magnetometers can perform a number of repetitions in HW
for each measurement to increase accuracy. One example is
Bosch BMC150:
http://ae-bst.resource.bosch.com/media/products/dokumente/bmc150/BST-BMC150-DS000-04.pdf.

Introduce an interface to set the oversampling ratio
for these devices.

Signed-off-by: Irina Tirdea <irina.tirdea@intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/iio.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index b1e46ae89aa7..058441da4984 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -44,6 +44,7 @@ enum iio_chan_info_enum {
 	IIO_CHAN_INFO_DEBOUNCE_COUNT,
 	IIO_CHAN_INFO_DEBOUNCE_TIME,
 	IIO_CHAN_INFO_CALIBEMISSIVITY,
+	IIO_CHAN_INFO_OVERSAMPLING_RATIO,
 };
 
 enum iio_shared_by {
-- 
cgit v1.2.3


From 61ba64fc0768879a300599b011c176203bdf27d9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 2 May 2015 09:54:06 -0400
Subject: libfs: simple_follow_link()

let "fast" symlinks store the pointer to the body into ->i_link and
use simple_follow_link for ->follow_link()

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 35ec87e490b1..0ac758fcff00 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -656,6 +656,7 @@ struct inode {
 		struct pipe_inode_info	*i_pipe;
 		struct block_device	*i_bdev;
 		struct cdev		*i_cdev;
+		char			*i_link;
 	};
 
 	__u32			i_generation;
@@ -2721,6 +2722,8 @@ void __inode_sub_bytes(struct inode *inode, loff_t bytes);
 void inode_sub_bytes(struct inode *inode, loff_t bytes);
 loff_t inode_get_bytes(struct inode *inode);
 void inode_set_bytes(struct inode *inode, loff_t bytes);
+void *simple_follow_link(struct dentry *, struct nameidata *);
+extern const struct inode_operations simple_symlink_inode_operations;
 
 extern int iterate_dir(struct file *, struct dir_context *);
 
-- 
cgit v1.2.3


From 5723cb01f0295ace2b029b0737dd6525a2de337f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 2 May 2015 10:27:18 -0400
Subject: debugfs: switch to simple_follow_link()

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/debugfs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index cb25af461054..420311bcee38 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -45,7 +45,6 @@ extern struct dentry *arch_debugfs_dir;
 
 /* declared over in file.c */
 extern const struct file_operations debugfs_file_operations;
-extern const struct inode_operations debugfs_link_operations;
 
 struct dentry *debugfs_create_file(const char *name, umode_t mode,
 				   struct dentry *parent, void *data,
-- 
cgit v1.2.3


From 37882db0546c759ff75b561c188539ac96fd0bfe Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 23 Mar 2015 13:37:39 +1100
Subject: SECURITY: remove nameidata arg from inode_follow_link.

No ->inode_follow_link() methods use the nameidata arg, and
it is about to become private to namei.c.
So remove from all inode_follow_link() functions.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/security.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 18264ea9e314..62a66202ecf1 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -43,7 +43,6 @@ struct file;
 struct vfsmount;
 struct path;
 struct qstr;
-struct nameidata;
 struct iattr;
 struct fown_struct;
 struct file_operations;
@@ -477,7 +476,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  * @inode_follow_link:
  *	Check permission to follow a symbolic link when looking up a pathname.
  *	@dentry contains the dentry structure for the link.
- *	@nd contains the nameidata structure for the parent directory.
  *	Return 0 if permission is granted.
  * @inode_permission:
  *	Check permission before accessing an inode.  This hook is called by the
@@ -1553,7 +1551,7 @@ struct security_operations {
 	int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry,
 			     struct inode *new_dir, struct dentry *new_dentry);
 	int (*inode_readlink) (struct dentry *dentry);
-	int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd);
+	int (*inode_follow_link) (struct dentry *dentry);
 	int (*inode_permission) (struct inode *inode, int mask);
 	int (*inode_setattr)	(struct dentry *dentry, struct iattr *attr);
 	int (*inode_getattr) (const struct path *path);
@@ -1839,7 +1837,7 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
 			  struct inode *new_dir, struct dentry *new_dentry,
 			  unsigned int flags);
 int security_inode_readlink(struct dentry *dentry);
-int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd);
+int security_inode_follow_link(struct dentry *dentry);
 int security_inode_permission(struct inode *inode, int mask);
 int security_inode_setattr(struct dentry *dentry, struct iattr *attr);
 int security_inode_getattr(const struct path *path);
@@ -2241,8 +2239,7 @@ static inline int security_inode_readlink(struct dentry *dentry)
 	return 0;
 }
 
-static inline int security_inode_follow_link(struct dentry *dentry,
-					      struct nameidata *nd)
+static inline int security_inode_follow_link(struct dentry *dentry)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 680baacbca69d18a6d7315374ad83d05ac9c0977 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 2 May 2015 13:32:22 -0400
Subject: new ->follow_link() and ->put_link() calling conventions

a) instead of storing the symlink body (via nd_set_link()) and returning
an opaque pointer later passed to ->put_link(), ->follow_link() _stores_
that opaque pointer (into void * passed by address by caller) and returns
the symlink body.  Returning ERR_PTR() on error, NULL on jump (procfs magic
symlinks) and pointer to symlink body for normal symlinks.  Stored pointer
is ignored in all cases except the last one.

Storing NULL for opaque pointer (or not storing it at all) means no call
of ->put_link().

b) the body used to be passed to ->put_link() implicitly (via nameidata).
Now only the opaque pointer is.  In the cases when we used the symlink body
to free stuff, ->follow_link() now should store it as opaque pointer in addition
to returning it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h    | 12 ++++++------
 include/linux/namei.h |  2 --
 2 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0ac758fcff00..9ab934113a28 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1608,12 +1608,12 @@ struct file_operations {
 
 struct inode_operations {
 	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
-	void * (*follow_link) (struct dentry *, struct nameidata *);
+	const char * (*follow_link) (struct dentry *, void **, struct nameidata *);
 	int (*permission) (struct inode *, int);
 	struct posix_acl * (*get_acl)(struct inode *, int);
 
 	int (*readlink) (struct dentry *, char __user *,int);
-	void (*put_link) (struct dentry *, struct nameidata *, void *);
+	void (*put_link) (struct dentry *, void *);
 
 	int (*create) (struct inode *,struct dentry *, umode_t, bool);
 	int (*link) (struct dentry *,struct inode *,struct dentry *);
@@ -2705,13 +2705,13 @@ extern const struct file_operations generic_ro_fops;
 
 extern int readlink_copy(char __user *, int, const char *);
 extern int page_readlink(struct dentry *, char __user *, int);
-extern void *page_follow_link_light(struct dentry *, struct nameidata *);
-extern void page_put_link(struct dentry *, struct nameidata *, void *);
+extern const char *page_follow_link_light(struct dentry *, void **, struct nameidata *);
+extern void page_put_link(struct dentry *, void *);
 extern int __page_symlink(struct inode *inode, const char *symname, int len,
 		int nofs);
 extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern const struct inode_operations page_symlink_inode_operations;
-extern void kfree_put_link(struct dentry *, struct nameidata *, void *);
+extern void kfree_put_link(struct dentry *, void *);
 extern int generic_readlink(struct dentry *, char __user *, int);
 extern void generic_fillattr(struct inode *, struct kstat *);
 int vfs_getattr_nosec(struct path *path, struct kstat *stat);
@@ -2722,7 +2722,7 @@ void __inode_sub_bytes(struct inode *inode, loff_t bytes);
 void inode_sub_bytes(struct inode *inode, loff_t bytes);
 loff_t inode_get_bytes(struct inode *inode);
 void inode_set_bytes(struct inode *inode, loff_t bytes);
-void *simple_follow_link(struct dentry *, struct nameidata *);
+const char *simple_follow_link(struct dentry *, void **, struct nameidata *);
 extern const struct inode_operations simple_symlink_inode_operations;
 
 extern int iterate_dir(struct file *, struct dir_context *);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index c8990779f0c3..a5d5bed2c0e1 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -71,8 +71,6 @@ extern struct dentry *lock_rename(struct dentry *, struct dentry *);
 extern void unlock_rename(struct dentry *, struct dentry *);
 
 extern void nd_jump_link(struct nameidata *nd, struct path *path);
-extern void nd_set_link(struct nameidata *nd, char *path);
-extern char *nd_get_link(struct nameidata *nd);
 
 static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
 {
-- 
cgit v1.2.3


From 894bc8c4662ba9daceafe943a5ba0dd407da5cd3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 2 May 2015 07:16:16 -0400
Subject: namei: remove restrictions on nesting depth

The only restriction is that on the total amount of symlinks
crossed; how they are nested does not matter

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/namei.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index a5d5bed2c0e1..3a6cc9651712 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -11,6 +11,8 @@ struct nameidata;
 
 enum { MAX_NESTED_LINKS = 8 };
 
+#define MAXSYMLINKS 40
+
 /*
  * Type of the last component on LOOKUP_PARENT
  */
-- 
cgit v1.2.3


From 756daf263ea53a8bfc89db26cb92e963953253a1 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 23 Mar 2015 13:37:38 +1100
Subject: VFS: replace {, total_}link_count in task_struct with pointer to
 nameidata

task_struct currently contains two ad-hoc members for use by the VFS:
link_count and total_link_count.  These are only interesting to fs/namei.c,
so exposing them explicitly is poor layering.  Incidentally, link_count
isn't used anymore, so it can just die.

This patches replaces those with a single pointer to 'struct nameidata'.
This structure represents the current filename lookup of which
there can only be one per process, and is a natural place to
store total_link_count.

This will allow the current "nameidata" argument to all
follow_link operations to be removed as current->nameidata
can be used instead in the _very_ few instances that care about
it at all.

As there are occasional circumstances where pathname lookup can
recurse, such as through kern_path_locked, we always save and old
current->nameidata (if there is one) when setting a new value, and
make sure any active link_counts are preserved.

follow_mount and follow_automount now get a 'struct nameidata *'
rather than 'int flags' so that they can directly access
total_link_count, rather than going through 'current'.

Suggested-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26a2e6122734..f6c9b69d66f2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1461,7 +1461,7 @@ struct task_struct {
 				       it with task_lock())
 				     - initialized normally by setup_new_exec */
 /* file system info */
-	int link_count, total_link_count;
+	struct nameidata *nameidata;
 #ifdef CONFIG_SYSVIPC
 /* ipc stuff */
 	struct sysv_sem sysvsem;
-- 
cgit v1.2.3


From 6e77137b363b8d866ac29c5a0c95e953614fb2d8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 2 May 2015 13:37:52 -0400
Subject: don't pass nameidata to ->follow_link()

its only use is getting passed to nd_jump_link(), which can obtain
it from current->nameidata

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h    | 6 +++---
 include/linux/namei.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9ab934113a28..ed7c9f298759 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1608,7 +1608,7 @@ struct file_operations {
 
 struct inode_operations {
 	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
-	const char * (*follow_link) (struct dentry *, void **, struct nameidata *);
+	const char * (*follow_link) (struct dentry *, void **);
 	int (*permission) (struct inode *, int);
 	struct posix_acl * (*get_acl)(struct inode *, int);
 
@@ -2705,7 +2705,7 @@ extern const struct file_operations generic_ro_fops;
 
 extern int readlink_copy(char __user *, int, const char *);
 extern int page_readlink(struct dentry *, char __user *, int);
-extern const char *page_follow_link_light(struct dentry *, void **, struct nameidata *);
+extern const char *page_follow_link_light(struct dentry *, void **);
 extern void page_put_link(struct dentry *, void *);
 extern int __page_symlink(struct inode *inode, const char *symname, int len,
 		int nofs);
@@ -2722,7 +2722,7 @@ void __inode_sub_bytes(struct inode *inode, loff_t bytes);
 void inode_sub_bytes(struct inode *inode, loff_t bytes);
 loff_t inode_get_bytes(struct inode *inode);
 void inode_set_bytes(struct inode *inode, loff_t bytes);
-const char *simple_follow_link(struct dentry *, void **, struct nameidata *);
+const char *simple_follow_link(struct dentry *, void **);
 extern const struct inode_operations simple_symlink_inode_operations;
 
 extern int iterate_dir(struct file *, struct dir_context *);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 3a6cc9651712..d756304aa09b 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -72,7 +72,7 @@ extern int follow_up(struct path *);
 extern struct dentry *lock_rename(struct dentry *, struct dentry *);
 extern void unlock_rename(struct dentry *, struct dentry *);
 
-extern void nd_jump_link(struct nameidata *nd, struct path *path);
+extern void nd_jump_link(struct path *path);
 
 static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
 {
-- 
cgit v1.2.3


From 2da572c959dd5815aef153cf62010b16a498a0d3 Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Thu, 7 May 2015 13:49:14 -0400
Subject: lib: add software 842 compression/decompression

Add 842-format software compression and decompression functions.
Update the MAINTAINERS 842 section to include the new files.

The 842 compression function can compress any input data into the 842
compression format.  The 842 decompression function can decompress any
standard-format 842 compressed data - specifically, either a compressed
data buffer created by the 842 software compression function, or a
compressed data buffer created by the 842 hardware compressor (located
in PowerPC coprocessors).

The 842 compressed data format is explained in the header comments.

This is used in a later patch to provide a full software 842 compression
and decompression crypto interface.

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/sw842.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 include/linux/sw842.h

(limited to 'include/linux')

diff --git a/include/linux/sw842.h b/include/linux/sw842.h
new file mode 100644
index 000000000000..109ba041c2ae
--- /dev/null
+++ b/include/linux/sw842.h
@@ -0,0 +1,12 @@
+#ifndef __SW842_H__
+#define __SW842_H__
+
+#define SW842_MEM_COMPRESS	(0xf000)
+
+int sw842_compress(const u8 *src, unsigned int srclen,
+		   u8 *dst, unsigned int *destlen, void *wmem);
+
+int sw842_decompress(const u8 *src, unsigned int srclen,
+		     u8 *dst, unsigned int *destlen);
+
+#endif
-- 
cgit v1.2.3


From 7011a122383e36dab594406720fa1d089e0be8f9 Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Thu, 7 May 2015 13:49:17 -0400
Subject: crypto: nx - add NX-842 platform frontend driver

Add NX-842 frontend that allows using either the pSeries platform or
PowerNV platform driver (to be added by later patch) for the NX-842
hardware.  Update the MAINTAINERS file to include the new filenames.
Update Kconfig files to clarify titles and descriptions, and correct
dependencies.

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/nx842.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nx842.h b/include/linux/nx842.h
index a4d324c6406a..d919c22b7fd6 100644
--- a/include/linux/nx842.h
+++ b/include/linux/nx842.h
@@ -1,11 +1,13 @@
 #ifndef __NX842_H__
 #define __NX842_H__
 
-int nx842_get_workmem_size(void);
-int nx842_get_workmem_size_aligned(void);
+#define __NX842_PSERIES_MEM_COMPRESS	((PAGE_SIZE * 2) + 10240)
+
+#define NX842_MEM_COMPRESS	__NX842_PSERIES_MEM_COMPRESS
+
 int nx842_compress(const unsigned char *in, unsigned int in_len,
-		unsigned char *out, unsigned int *out_len, void *wrkmem);
+		   unsigned char *out, unsigned int *out_len, void *wrkmem);
 int nx842_decompress(const unsigned char *in, unsigned int in_len,
-		unsigned char *out, unsigned int *out_len, void *wrkmem);
+		     unsigned char *out, unsigned int *out_len, void *wrkmem);
 
 #endif
-- 
cgit v1.2.3


From 959e6659b6f74ec1fa4d391a3b88d63dc0189f36 Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Thu, 7 May 2015 13:49:18 -0400
Subject: crypto: nx - add nx842 constraints

Add "constraints" for the NX-842 driver.  The constraints are used to
indicate what the current NX-842 platform driver is capable of.  The
constraints tell the NX-842 user what alignment, min and max length, and
length multiple each provided buffers should conform to.  These are
required because the 842 hardware requires buffers to meet specific
constraints that vary based on platform - for example, the pSeries
max length is much lower than the PowerNV max length.

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/nx842.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nx842.h b/include/linux/nx842.h
index d919c22b7fd6..aa1a97e90dea 100644
--- a/include/linux/nx842.h
+++ b/include/linux/nx842.h
@@ -5,6 +5,15 @@
 
 #define NX842_MEM_COMPRESS	__NX842_PSERIES_MEM_COMPRESS
 
+struct nx842_constraints {
+	int alignment;
+	int multiple;
+	int minimum;
+	int maximum;
+};
+
+int nx842_constraints(struct nx842_constraints *constraints);
+
 int nx842_compress(const unsigned char *in, unsigned int in_len,
 		   unsigned char *out, unsigned int *out_len, void *wrkmem);
 int nx842_decompress(const unsigned char *in, unsigned int in_len,
-- 
cgit v1.2.3


From 99182a42b7ef3d5e4180992ce01befd9e87526d2 Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Thu, 7 May 2015 13:49:19 -0400
Subject: crypto: nx - add PowerNV platform NX-842 driver

Add driver for NX-842 hardware on the PowerNV platform.

This allows the use of the 842 compression hardware coprocessor on
the PowerNV platform.

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/nx842.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nx842.h b/include/linux/nx842.h
index aa1a97e90dea..4ddf68d9c0d4 100644
--- a/include/linux/nx842.h
+++ b/include/linux/nx842.h
@@ -1,9 +1,11 @@
 #ifndef __NX842_H__
 #define __NX842_H__
 
-#define __NX842_PSERIES_MEM_COMPRESS	((PAGE_SIZE * 2) + 10240)
+#define __NX842_PSERIES_MEM_COMPRESS	(10240)
+#define __NX842_POWERNV_MEM_COMPRESS	(1024)
 
-#define NX842_MEM_COMPRESS	__NX842_PSERIES_MEM_COMPRESS
+#define NX842_MEM_COMPRESS	(max_t(unsigned int,			\
+	__NX842_PSERIES_MEM_COMPRESS, __NX842_POWERNV_MEM_COMPRESS))
 
 struct nx842_constraints {
 	int alignment;
-- 
cgit v1.2.3


From b19e7f51a55fe740c18038d1d6957aedfc078d07 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vladimir_zapolskiy@mentor.com>
Date: Wed, 29 Apr 2015 18:34:59 +0300
Subject: gpio: gpio-generic: add flag to read out output value from reg_set

The change introduces BGPIOF_READ_OUTPUT_REG_SET flag for gpio-generic
GPIO chip implementation, which allows to get correct configured value
from reg_set register, input value is still get from reg_dat.

Signed-off-by: Vladimir Zapolskiy <vladimir_zapolskiy@mentor.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/basic_mmio_gpio.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/basic_mmio_gpio.h b/include/linux/basic_mmio_gpio.h
index 0e97856b2cff..14eea946e640 100644
--- a/include/linux/basic_mmio_gpio.h
+++ b/include/linux/basic_mmio_gpio.h
@@ -74,5 +74,6 @@ int bgpio_init(struct bgpio_chip *bgc, struct device *dev,
 #define BGPIOF_UNREADABLE_REG_SET	BIT(1) /* reg_set is unreadable */
 #define BGPIOF_UNREADABLE_REG_DIR	BIT(2) /* reg_dir is unreadable */
 #define BGPIOF_BIG_ENDIAN_BYTE_ORDER	BIT(3)
+#define BGPIOF_READ_OUTPUT_REG_SET     BIT(4) /* reg_set stores output value */
 
 #endif /* __BASIC_MMIO_GPIO_H */
-- 
cgit v1.2.3


From c884fbd452147e952ae160e750553d00ea4dc4c9 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Wed, 6 May 2015 13:29:06 +0300
Subject: gpio / ACPI: Add support for retrieving GpioInt resources from a
 device

ACPI specification knows two types of GPIOs: GpioIo and GpioInt. The latter
is used to describe that a given device interrupt line is connected to a
specific GPIO pin. Typical ACPI _CRS entry for such device looks like
below:

    Name (_CRS, ResourceTemplate ()
    {
        I2cSerialBus (0x004A, ControllerInitiated, 0x00061A80,
                      AddressingMode7Bit, "\\_SB.PCI0.I2C6",
                      0x00, ResourceConsumer)
        GpioIo (Exclusive, PullDefault, 0x0000, 0x0000,
                IoRestrictionOutputOnly, "\\_SB.GPO0",
                0x00, ResourceConsumer)
        {
            0x004B
        }
        GpioInt (Level, ActiveLow, Shared, PullDefault, 0x0000,
                 "\\_SB.GPO0", 0x00, ResourceConsumer)
        {
            0x004C
        }
    })

Currently drivers need to request a GPIO corresponding to the right GpioInt
and then translate that to Linux IRQ number. This adds unnecessary lines of
boiler-plate code.

We can ease this a bit by introducing acpi_dev_gpio_irq_get() analogous to
of_irq_get(). This function translates given GpioInt resource under the
device in question to the suitable Linux IRQ number.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/acpi.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index e4da5e35e29c..f57c440642cd 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -721,6 +721,8 @@ static inline void acpi_dev_remove_driver_gpios(struct acpi_device *adev)
 	if (adev)
 		adev->driver_gpios = NULL;
 }
+
+int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index);
 #else
 static inline int acpi_dev_add_driver_gpios(struct acpi_device *adev,
 			      const struct acpi_gpio_mapping *gpios)
@@ -728,6 +730,11 @@ static inline int acpi_dev_add_driver_gpios(struct acpi_device *adev,
 	return -ENXIO;
 }
 static inline void acpi_dev_remove_driver_gpios(struct acpi_device *adev) {}
+
+static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
+{
+	return -ENXIO;
+}
 #endif
 
 /* Device properties */
-- 
cgit v1.2.3


From bda0be7ad994812960e9f8f2d2757f72cb4a96cb Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 23 Mar 2015 13:37:39 +1100
Subject: security: make inode_follow_link RCU-walk aware

inode_follow_link now takes an inode and rcu flag as well as the
dentry.

inode is used in preference to d_backing_inode(dentry), particularly
in RCU-walk mode.

selinux_inode_follow_link() gets dentry_has_perm() and
inode_has_perm() open-coded into it so that it can call
avc_has_perm_flags() in way that is safe if LOOKUP_RCU is set.

Calling avc_has_perm_flags() with rcu_read_lock() held means
that when avc_has_perm_noaudit calls avc_compute_av(), the attempt
to rcu_read_unlock() before calling security_compute_av() will not
actually drop the RCU read-lock.

However as security_compute_av() is completely in a read_lock()ed
region, it should be safe with the RCU read-lock held.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/security.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 62a66202ecf1..52febde52479 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -476,6 +476,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  * @inode_follow_link:
  *	Check permission to follow a symbolic link when looking up a pathname.
  *	@dentry contains the dentry structure for the link.
+ *	@inode contains the inode, which itself is not stable in RCU-walk
+ *	@rcu indicates whether we are in RCU-walk mode.
  *	Return 0 if permission is granted.
  * @inode_permission:
  *	Check permission before accessing an inode.  This hook is called by the
@@ -1551,7 +1553,8 @@ struct security_operations {
 	int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry,
 			     struct inode *new_dir, struct dentry *new_dentry);
 	int (*inode_readlink) (struct dentry *dentry);
-	int (*inode_follow_link) (struct dentry *dentry);
+	int (*inode_follow_link) (struct dentry *dentry, struct inode *inode,
+				  bool rcu);
 	int (*inode_permission) (struct inode *inode, int mask);
 	int (*inode_setattr)	(struct dentry *dentry, struct iattr *attr);
 	int (*inode_getattr) (const struct path *path);
@@ -1837,7 +1840,8 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
 			  struct inode *new_dir, struct dentry *new_dentry,
 			  unsigned int flags);
 int security_inode_readlink(struct dentry *dentry);
-int security_inode_follow_link(struct dentry *dentry);
+int security_inode_follow_link(struct dentry *dentry, struct inode *inode,
+			       bool rcu);
 int security_inode_permission(struct inode *inode, int mask);
 int security_inode_setattr(struct dentry *dentry, struct iattr *attr);
 int security_inode_getattr(const struct path *path);
@@ -2239,7 +2243,9 @@ static inline int security_inode_readlink(struct dentry *dentry)
 	return 0;
 }
 
-static inline int security_inode_follow_link(struct dentry *dentry)
+static inline int security_inode_follow_link(struct dentry *dentry,
+					     struct inode *inode,
+					     bool rcu)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 5f2c4179e129bdc47870a81a65d0aff85aa18293 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 7 May 2015 11:14:26 -0400
Subject: switch ->put_link() from dentry to inode

only one instance looks at that argument at all; that sole
exception wants inode rather than dentry.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed7c9f298759..f21e3328f991 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1613,7 +1613,7 @@ struct inode_operations {
 	struct posix_acl * (*get_acl)(struct inode *, int);
 
 	int (*readlink) (struct dentry *, char __user *,int);
-	void (*put_link) (struct dentry *, void *);
+	void (*put_link) (struct inode *, void *);
 
 	int (*create) (struct inode *,struct dentry *, umode_t, bool);
 	int (*link) (struct dentry *,struct inode *,struct dentry *);
@@ -2706,12 +2706,12 @@ extern const struct file_operations generic_ro_fops;
 extern int readlink_copy(char __user *, int, const char *);
 extern int page_readlink(struct dentry *, char __user *, int);
 extern const char *page_follow_link_light(struct dentry *, void **);
-extern void page_put_link(struct dentry *, void *);
+extern void page_put_link(struct inode *, void *);
 extern int __page_symlink(struct inode *inode, const char *symname, int len,
 		int nofs);
 extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern const struct inode_operations page_symlink_inode_operations;
-extern void kfree_put_link(struct dentry *, void *);
+extern void kfree_put_link(struct inode *, void *);
 extern int generic_readlink(struct dentry *, char __user *, int);
 extern void generic_fillattr(struct inode *, struct kstat *);
 int vfs_getattr_nosec(struct path *path, struct kstat *stat);
-- 
cgit v1.2.3


From ecc087ff14352aed52b8e775b4511e7f9cfc64ec Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 7 May 2015 11:19:14 -0400
Subject: new helper: free_page_put_link()

similar to kfree_put_link()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index f21e3328f991..8f738512c874 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2712,6 +2712,7 @@ extern int __page_symlink(struct inode *inode, const char *symname, int len,
 extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern const struct inode_operations page_symlink_inode_operations;
 extern void kfree_put_link(struct inode *, void *);
+extern void free_page_put_link(struct inode *, void *);
 extern int generic_readlink(struct dentry *, char __user *, int);
 extern void generic_fillattr(struct inode *, struct kstat *);
 int vfs_getattr_nosec(struct path *path, struct kstat *stat);
-- 
cgit v1.2.3


From 140e807da12988e2a925fe029336e7bb67a8d4de Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 8 May 2015 21:07:08 -0500
Subject: tun: Utilize the normal socket network namespace refcounting.

There is no need for tun to do the weird network namespace refcounting.
The existing network namespace refcounting in tfile has almost exactly
the same lifetime.  So rewrite the code to use the struct sock network
namespace refcounting and remove the unnecessary hand rolled network
namespace refcounting and the unncesary tfile->net.

This change allows the tun code to directly call sock_put bypassing
sock_release and making SOCK_EXTERNALLY_ALLOCATED unnecessary.

Remove the now unncessary tun_release so that if anything tries to use
the sock_release code path the kernel will oops, and let us know about
the bug.

The macvtap code already uses it's internal socket this way.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 738ea48be889..8a5e81d2bdf7 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -38,7 +38,6 @@ struct net;
 #define SOCK_NOSPACE		2
 #define SOCK_PASSCRED		3
 #define SOCK_PASSSEC		4
-#define SOCK_EXTERNALLY_ALLOCATED 5
 
 #ifndef ARCH_HAS_SOCKET_TYPES
 /**
-- 
cgit v1.2.3


From eeb1bd5c40edb0e2fd925c8535e2fdebdbc5cef2 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 8 May 2015 21:08:05 -0500
Subject: net: Add a struct net parameter to sock_create_kern

This is long overdue, and is part of cleaning up how we allocate kernel
sockets that don't reference count struct net.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 8a5e81d2bdf7..04aa06852771 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -207,7 +207,7 @@ void sock_unregister(int family);
 int __sock_create(struct net *net, int family, int type, int proto,
 		  struct socket **res, int kern);
 int sock_create(int family, int type, int proto, struct socket **res);
-int sock_create_kern(int family, int type, int proto, struct socket **res);
+int sock_create_kern(struct net *net, int family, int type, int proto, struct socket **res);
 int sock_create_lite(int family, int type, int proto, struct socket **res);
 void sock_release(struct socket *sock);
 int sock_sendmsg(struct socket *sock, struct msghdr *msg);
-- 
cgit v1.2.3


From 11aa9c28b4209242a9de0a661a7b3405adb568a0 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 8 May 2015 21:09:13 -0500
Subject: net: Pass kern from net_proto_family.create to sk_alloc

In preparation for changing how struct net is refcounted
on kernel sockets pass the knowledge that we are creating
a kernel socket from sock_create_kern through to sk_alloc.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_pppox.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 66a7d7600f43..b49cf923becc 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -74,7 +74,7 @@ static inline struct sock *sk_pppox(struct pppox_sock *po)
 struct module;
 
 struct pppox_proto {
-	int		(*create)(struct net *net, struct socket *sock);
+	int		(*create)(struct net *net, struct socket *sock, int kern);
 	int		(*ioctl)(struct socket *sock, unsigned int cmd,
 				 unsigned long arg);
 	struct module	*owner;
-- 
cgit v1.2.3


From d2788d34885d4ce5ba17a8996fd95d28942e574e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 9 May 2015 22:51:32 +0200
Subject: net: sched: further simplify handle_ing

Ingress qdisc has no other purpose than calling into tc_classify()
that executes attached classifier(s) and action(s).

It has a 1:1 relationship to dev->ingress_queue. After having commit
087c1a601ad7 ("net: sched: run ingress qdisc without locks") removed
the central ingress lock, one major contention point is gone.

The extra indirection layers however, are not necessary for calling
into ingress qdisc. pktgen calling locally into netif_receive_skb()
with a dummy u32, single CPU result on a Supermicro X10SLM-F, Xeon
E3-1240: before ~21,1 Mpps, after patch ~22,9 Mpps.

We can redirect the private classifier list to the netdev directly,
without changing any classifier API bits (!) and execute on that from
handle_ing() side. The __QDISC_STATE_DEACTIVATE test can be removed,
ingress qdisc doesn't have a queue and thus dev_deactivate_queue()
is also not applicable, ingress_cl_list provides similar behaviour.
In other words, ingress qdisc acts like TCQ_F_BUILTIN qdisc.

One next possible step is the removal of the dev's ingress (dummy)
netdev_queue, and to only have the list member in the netdevice
itself.

Note, the filter chain is RCU protected and individual filter elements
are being kfree'd by sched subsystem after RCU grace period. RCU read
lock is being held by __netif_receive_skb_core().

Joint work with Alexei Starovoitov.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1899c74a7127..c4e1caf6056f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1655,7 +1655,11 @@ struct net_device {
 	rx_handler_func_t __rcu	*rx_handler;
 	void __rcu		*rx_handler_data;
 
+#if CONFIG_NET_CLS_ACT
+	struct tcf_proto __rcu  *ingress_cl_list;
+#endif
 	struct netdev_queue __rcu *ingress_queue;
+
 	unsigned char		broadcast[MAX_ADDR_LEN];
 #ifdef CONFIG_RFS_ACCEL
 	struct cpu_rmap		*rx_cpu_rmap;
-- 
cgit v1.2.3


From 6be109b31ccdb9c98e7be12687171f6602527a5d Mon Sep 17 00:00:00 2001
From: Arun Ramamurthy <arun.ramamurthy@broadcom.com>
Date: Wed, 22 Apr 2015 16:04:11 -0700
Subject: phy: core: Add devm_of_phy_get_by_index to phy-core

Some generic drivers, such as ehci, may use multiple phys and for such
drivers referencing phy(s) by name(s) does not make sense. Instead of
inventing new naming schemes and using custom code to iterate through them,
such drivers are better of using nameless phy bindings and using this newly
introduced API to iterate through them.

Signed-off-by: Arun Ramamurthy <arun.ramamurthy@broadcom.com>
Reviewed-by: Ray Jui <rjui@broadcom.com>
Reviewed-by: Scott Branden <sbranden@broadcom.com>
[kishon@ti.com: fix compilation errors]
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 include/linux/phy/phy.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
index a0197fa1b116..8cf05e341cff 100644
--- a/include/linux/phy/phy.h
+++ b/include/linux/phy/phy.h
@@ -133,6 +133,8 @@ struct phy *devm_phy_get(struct device *dev, const char *string);
 struct phy *devm_phy_optional_get(struct device *dev, const char *string);
 struct phy *devm_of_phy_get(struct device *dev, struct device_node *np,
 			    const char *con_id);
+struct phy *devm_of_phy_get_by_index(struct device *dev, struct device_node *np,
+				     int index);
 void phy_put(struct phy *phy);
 void devm_phy_put(struct device *dev, struct phy *phy);
 struct phy *of_phy_get(struct device_node *np, const char *con_id);
@@ -261,6 +263,13 @@ static inline struct phy *devm_of_phy_get(struct device *dev,
 	return ERR_PTR(-ENOSYS);
 }
 
+static inline struct phy *devm_of_phy_get_by_index(struct device *dev,
+						   struct device_node *np,
+						   int index)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
 static inline void phy_put(struct phy *phy)
 {
 }
-- 
cgit v1.2.3


From 4cda01e86f68dacc758b2daf6e8809f2ce915b85 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 11 May 2015 19:28:49 +0200
Subject: net: sched: fix typo in net_device ifdef

This should have been #ifdef not #if.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Fixes: d2788d34885d ("net: sched: further simplify handle_ing")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c4e1caf6056f..a6d706b2a947 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1655,7 +1655,7 @@ struct net_device {
 	rx_handler_func_t __rcu	*rx_handler;
 	void __rcu		*rx_handler_data;
 
-#if CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_CLS_ACT
 	struct tcf_proto __rcu  *ingress_cl_list;
 #endif
 	struct netdev_queue __rcu *ingress_queue;
-- 
cgit v1.2.3


From 5844feeaa4154d1c46d3462c7a4653d22356d8b4 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Fri, 23 Jan 2015 00:22:27 -0800
Subject: mtd: nand: add common DT init code

These are already-documented common bindings for NAND chips. Let's
handle them in nand_base.

If NAND controller drivers need to act on this data before bringing up
the NAND chip (e.g., fill out ECC callback functions, change HW modes,
etc.), then they can do so between calling nand_scan_ident() and
nand_scan_tail().

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/nand.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 6c51876941f3..f25e2bdd188c 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -26,6 +26,8 @@
 
 struct mtd_info;
 struct nand_flash_dev;
+struct device_node;
+
 /* Scan and identify a NAND device */
 extern int nand_scan(struct mtd_info *mtd, int max_chips);
 /*
@@ -542,6 +544,7 @@ struct nand_buffers {
  *			flash device
  * @IO_ADDR_W:		[BOARDSPECIFIC] address to write the 8 I/O lines of the
  *			flash device.
+ * @dn:			[BOARDSPECIFIC] device node describing this instance
  * @read_byte:		[REPLACEABLE] read one byte from the chip
  * @read_word:		[REPLACEABLE] read one word from the chip
  * @write_byte:		[REPLACEABLE] write a single byte to the chip on the
@@ -644,6 +647,8 @@ struct nand_chip {
 	void __iomem *IO_ADDR_R;
 	void __iomem *IO_ADDR_W;
 
+	struct device_node *dn;
+
 	uint8_t (*read_byte)(struct mtd_info *mtd);
 	u16 (*read_word)(struct mtd_info *mtd);
 	void (*write_byte)(struct mtd_info *mtd, uint8_t byte);
-- 
cgit v1.2.3


From 9d0be7f4810257a9b0fc78fff641f14409f14ab3 Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <edubezval@gmail.com>
Date: Mon, 11 May 2015 19:34:23 -0700
Subject: thermal: support slope and offset coefficients

It is common to have a linear extrapolation from
the current sensor readings and the actual temperature
value. This is specially the case when the sensor
is in use to extrapolate hotspots.

This patch adds slope and offset constants for
single sensor linear extrapolation equation. Because
the same sensor can be use in different locations,
from board to board, these constants are added
as part of thermal_zone_params.

The constants are available through sysfs.

It is up to the device driver to determine
the usage of these values.

Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 include/linux/thermal.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 6bbe11c97cea..037e9df2f610 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -302,6 +302,17 @@ struct thermal_zone_params {
 
 	/* threshold below which the error is no longer accumulated */
 	s32 integral_cutoff;
+
+	/*
+	 * @slope:	slope of a linear temperature adjustment curve.
+	 * 		Used by thermal zone drivers.
+	 */
+	int slope;
+	/*
+	 * @offset:	offset of a linear temperature adjustment curve.
+	 * 		Used by thermal zone drivers (default 0).
+	 */
+	int offset;
 };
 
 struct thermal_genl_event {
-- 
cgit v1.2.3


From 05ae797566a66d159cf1e2ee11bf3f6fae40c8eb Mon Sep 17 00:00:00 2001
From: Andrew Bresticker <abrestic@chromium.org>
Date: Mon, 4 May 2015 10:36:35 -0700
Subject: mailbox: Make mbox_chan_ops const

The mailbox controller's channel ops ought to be read-only.  Update
all the mailbox drivers to make their mbox_chan_ops const as well.

Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
Cc: Ashwin Chaugule <ashwin.chaugule@linaro.org>
Cc: Ley Foon Tan <lftan@altera.com>
Acked-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 include/linux/mailbox_controller.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h
index d4cf96f07cfc..68c42454439b 100644
--- a/include/linux/mailbox_controller.h
+++ b/include/linux/mailbox_controller.h
@@ -72,7 +72,7 @@ struct mbox_chan_ops {
  */
 struct mbox_controller {
 	struct device *dev;
-	struct mbox_chan_ops *ops;
+	const struct mbox_chan_ops *ops;
 	struct mbox_chan *chans;
 	int num_chans;
 	bool txdone_irq;
-- 
cgit v1.2.3


From 3c4ed7bdf5997d8020cbb8d4abbef2fcfb9f1284 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Sat, 2 May 2015 15:10:46 -0700
Subject: LSM: Split security.h

The security.h header file serves two purposes,
interfaces for users of the security modules and
interfaces for security modules. Users of the
security modules don't need to know about what's
in the security_operations structure, so pull it
out into it's own header, lsm_hooks.h

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: John Johansen <john.johansen@canonical.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Acked-by:  Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 include/linux/lsm_hooks.h | 358 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/security.h  | 305 ---------------------------------------
 2 files changed, 358 insertions(+), 305 deletions(-)
 create mode 100644 include/linux/lsm_hooks.h

(limited to 'include/linux')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
new file mode 100644
index 000000000000..c60f81b2d18c
--- /dev/null
+++ b/include/linux/lsm_hooks.h
@@ -0,0 +1,358 @@
+/*
+ * Linux Security Module interfaces
+ *
+ * Copyright (C) 2001 WireX Communications, Inc <chris@wirex.com>
+ * Copyright (C) 2001 Greg Kroah-Hartman <greg@kroah.com>
+ * Copyright (C) 2001 Networks Associates Technology, Inc <ssmalley@nai.com>
+ * Copyright (C) 2001 James Morris <jmorris@intercode.com.au>
+ * Copyright (C) 2001 Silicon Graphics, Inc. (Trust Technology Group)
+ * Copyright (C) 2015 Intel Corporation.
+ * Copyright (C) 2015 Casey Schaufler <casey@schaufler-ca.com>
+ *
+ *	This program is free software; you can redistribute it and/or modify
+ *	it under the terms of the GNU General Public License as published by
+ *	the Free Software Foundation; either version 2 of the License, or
+ *	(at your option) any later version.
+ *
+ *	Due to this file being licensed under the GPL there is controversy over
+ *	whether this permits you to write a module that #includes this file
+ *	without placing your module under the GPL.  Please consult a lawyer for
+ *	advice before doing this.
+ *
+ */
+
+#ifndef __LINUX_LSM_HOOKS_H
+#define __LINUX_LSM_HOOKS_H
+
+#include <linux/security.h>
+
+/* Maximum number of letters for an LSM name string */
+#define SECURITY_NAME_MAX	10
+
+#ifdef CONFIG_SECURITY
+
+struct security_operations {
+	char name[SECURITY_NAME_MAX + 1];
+
+	int (*binder_set_context_mgr)(struct task_struct *mgr);
+	int (*binder_transaction)(struct task_struct *from,
+					struct task_struct *to);
+	int (*binder_transfer_binder)(struct task_struct *from,
+					struct task_struct *to);
+	int (*binder_transfer_file)(struct task_struct *from,
+					struct task_struct *to,
+					struct file *file);
+
+	int (*ptrace_access_check)(struct task_struct *child,
+					unsigned int mode);
+	int (*ptrace_traceme)(struct task_struct *parent);
+	int (*capget)(struct task_struct *target, kernel_cap_t *effective,
+			kernel_cap_t *inheritable, kernel_cap_t *permitted);
+	int (*capset)(struct cred *new, const struct cred *old,
+			const kernel_cap_t *effective,
+			const kernel_cap_t *inheritable,
+			const kernel_cap_t *permitted);
+	int (*capable)(const struct cred *cred, struct user_namespace *ns,
+			int cap, int audit);
+	int (*quotactl)(int cmds, int type, int id, struct super_block *sb);
+	int (*quota_on)(struct dentry *dentry);
+	int (*syslog)(int type);
+	int (*settime)(const struct timespec *ts, const struct timezone *tz);
+	int (*vm_enough_memory)(struct mm_struct *mm, long pages);
+
+	int (*bprm_set_creds)(struct linux_binprm *bprm);
+	int (*bprm_check_security)(struct linux_binprm *bprm);
+	int (*bprm_secureexec)(struct linux_binprm *bprm);
+	void (*bprm_committing_creds)(struct linux_binprm *bprm);
+	void (*bprm_committed_creds)(struct linux_binprm *bprm);
+
+	int (*sb_alloc_security)(struct super_block *sb);
+	void (*sb_free_security)(struct super_block *sb);
+	int (*sb_copy_data)(char *orig, char *copy);
+	int (*sb_remount)(struct super_block *sb, void *data);
+	int (*sb_kern_mount)(struct super_block *sb, int flags, void *data);
+	int (*sb_show_options)(struct seq_file *m, struct super_block *sb);
+	int (*sb_statfs)(struct dentry *dentry);
+	int (*sb_mount)(const char *dev_name, struct path *path,
+			const char *type, unsigned long flags, void *data);
+	int (*sb_umount)(struct vfsmount *mnt, int flags);
+	int (*sb_pivotroot)(struct path *old_path, struct path *new_path);
+	int (*sb_set_mnt_opts)(struct super_block *sb,
+				struct security_mnt_opts *opts,
+				unsigned long kern_flags,
+				unsigned long *set_kern_flags);
+	int (*sb_clone_mnt_opts)(const struct super_block *oldsb,
+					struct super_block *newsb);
+	int (*sb_parse_opts_str)(char *options, struct security_mnt_opts *opts);
+	int (*dentry_init_security)(struct dentry *dentry, int mode,
+					struct qstr *name, void **ctx,
+					u32 *ctxlen);
+
+
+#ifdef CONFIG_SECURITY_PATH
+	int (*path_unlink)(struct path *dir, struct dentry *dentry);
+	int (*path_mkdir)(struct path *dir, struct dentry *dentry,
+				umode_t mode);
+	int (*path_rmdir)(struct path *dir, struct dentry *dentry);
+	int (*path_mknod)(struct path *dir, struct dentry *dentry,
+				umode_t mode, unsigned int dev);
+	int (*path_truncate)(struct path *path);
+	int (*path_symlink)(struct path *dir, struct dentry *dentry,
+				const char *old_name);
+	int (*path_link)(struct dentry *old_dentry, struct path *new_dir,
+				struct dentry *new_dentry);
+	int (*path_rename)(struct path *old_dir, struct dentry *old_dentry,
+				struct path *new_dir,
+				struct dentry *new_dentry);
+	int (*path_chmod)(struct path *path, umode_t mode);
+	int (*path_chown)(struct path *path, kuid_t uid, kgid_t gid);
+	int (*path_chroot)(struct path *path);
+#endif
+
+	int (*inode_alloc_security)(struct inode *inode);
+	void (*inode_free_security)(struct inode *inode);
+	int (*inode_init_security)(struct inode *inode, struct inode *dir,
+					const struct qstr *qstr,
+					const char **name, void **value,
+					size_t *len);
+	int (*inode_create)(struct inode *dir, struct dentry *dentry,
+				umode_t mode);
+	int (*inode_link)(struct dentry *old_dentry, struct inode *dir,
+				struct dentry *new_dentry);
+	int (*inode_unlink)(struct inode *dir, struct dentry *dentry);
+	int (*inode_symlink)(struct inode *dir, struct dentry *dentry,
+				const char *old_name);
+	int (*inode_mkdir)(struct inode *dir, struct dentry *dentry,
+				umode_t mode);
+	int (*inode_rmdir)(struct inode *dir, struct dentry *dentry);
+	int (*inode_mknod)(struct inode *dir, struct dentry *dentry,
+				umode_t mode, dev_t dev);
+	int (*inode_rename)(struct inode *old_dir, struct dentry *old_dentry,
+				struct inode *new_dir,
+				struct dentry *new_dentry);
+	int (*inode_readlink)(struct dentry *dentry);
+	int (*inode_follow_link)(struct dentry *dentry, struct nameidata *nd);
+	int (*inode_permission)(struct inode *inode, int mask);
+	int (*inode_setattr)(struct dentry *dentry, struct iattr *attr);
+	int (*inode_getattr)(const struct path *path);
+	int (*inode_setxattr)(struct dentry *dentry, const char *name,
+				const void *value, size_t size, int flags);
+	void (*inode_post_setxattr)(struct dentry *dentry, const char *name,
+					const void *value, size_t size,
+					int flags);
+	int (*inode_getxattr)(struct dentry *dentry, const char *name);
+	int (*inode_listxattr)(struct dentry *dentry);
+	int (*inode_removexattr)(struct dentry *dentry, const char *name);
+	int (*inode_need_killpriv)(struct dentry *dentry);
+	int (*inode_killpriv)(struct dentry *dentry);
+	int (*inode_getsecurity)(const struct inode *inode, const char *name,
+					void **buffer, bool alloc);
+	int (*inode_setsecurity)(struct inode *inode, const char *name,
+					const void *value, size_t size,
+					int flags);
+	int (*inode_listsecurity)(struct inode *inode, char *buffer,
+					size_t buffer_size);
+	void (*inode_getsecid)(const struct inode *inode, u32 *secid);
+
+	int (*file_permission)(struct file *file, int mask);
+	int (*file_alloc_security)(struct file *file);
+	void (*file_free_security)(struct file *file);
+	int (*file_ioctl)(struct file *file, unsigned int cmd,
+				unsigned long arg);
+	int (*mmap_addr)(unsigned long addr);
+	int (*mmap_file)(struct file *file, unsigned long reqprot,
+				unsigned long prot, unsigned long flags);
+	int (*file_mprotect)(struct vm_area_struct *vma, unsigned long reqprot,
+				unsigned long prot);
+	int (*file_lock)(struct file *file, unsigned int cmd);
+	int (*file_fcntl)(struct file *file, unsigned int cmd,
+				unsigned long arg);
+	void (*file_set_fowner)(struct file *file);
+	int (*file_send_sigiotask)(struct task_struct *tsk,
+					struct fown_struct *fown, int sig);
+	int (*file_receive)(struct file *file);
+	int (*file_open)(struct file *file, const struct cred *cred);
+
+	int (*task_create)(unsigned long clone_flags);
+	void (*task_free)(struct task_struct *task);
+	int (*cred_alloc_blank)(struct cred *cred, gfp_t gfp);
+	void (*cred_free)(struct cred *cred);
+	int (*cred_prepare)(struct cred *new, const struct cred *old,
+				gfp_t gfp);
+	void (*cred_transfer)(struct cred *new, const struct cred *old);
+	int (*kernel_act_as)(struct cred *new, u32 secid);
+	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
+	int (*kernel_fw_from_file)(struct file *file, char *buf, size_t size);
+	int (*kernel_module_request)(char *kmod_name);
+	int (*kernel_module_from_file)(struct file *file);
+	int (*task_fix_setuid)(struct cred *new, const struct cred *old,
+				int flags);
+	int (*task_setpgid)(struct task_struct *p, pid_t pgid);
+	int (*task_getpgid)(struct task_struct *p);
+	int (*task_getsid)(struct task_struct *p);
+	void (*task_getsecid)(struct task_struct *p, u32 *secid);
+	int (*task_setnice)(struct task_struct *p, int nice);
+	int (*task_setioprio)(struct task_struct *p, int ioprio);
+	int (*task_getioprio)(struct task_struct *p);
+	int (*task_setrlimit)(struct task_struct *p, unsigned int resource,
+				struct rlimit *new_rlim);
+	int (*task_setscheduler)(struct task_struct *p);
+	int (*task_getscheduler)(struct task_struct *p);
+	int (*task_movememory)(struct task_struct *p);
+	int (*task_kill)(struct task_struct *p, struct siginfo *info,
+				int sig, u32 secid);
+	int (*task_wait)(struct task_struct *p);
+	int (*task_prctl)(int option, unsigned long arg2, unsigned long arg3,
+				unsigned long arg4, unsigned long arg5);
+	void (*task_to_inode)(struct task_struct *p, struct inode *inode);
+
+	int (*ipc_permission)(struct kern_ipc_perm *ipcp, short flag);
+	void (*ipc_getsecid)(struct kern_ipc_perm *ipcp, u32 *secid);
+
+	int (*msg_msg_alloc_security)(struct msg_msg *msg);
+	void (*msg_msg_free_security)(struct msg_msg *msg);
+
+	int (*msg_queue_alloc_security)(struct msg_queue *msq);
+	void (*msg_queue_free_security)(struct msg_queue *msq);
+	int (*msg_queue_associate)(struct msg_queue *msq, int msqflg);
+	int (*msg_queue_msgctl)(struct msg_queue *msq, int cmd);
+	int (*msg_queue_msgsnd)(struct msg_queue *msq, struct msg_msg *msg,
+				int msqflg);
+	int (*msg_queue_msgrcv)(struct msg_queue *msq, struct msg_msg *msg,
+				struct task_struct *target, long type,
+				int mode);
+
+	int (*shm_alloc_security)(struct shmid_kernel *shp);
+	void (*shm_free_security)(struct shmid_kernel *shp);
+	int (*shm_associate)(struct shmid_kernel *shp, int shmflg);
+	int (*shm_shmctl)(struct shmid_kernel *shp, int cmd);
+	int (*shm_shmat)(struct shmid_kernel *shp, char __user *shmaddr,
+				int shmflg);
+
+	int (*sem_alloc_security)(struct sem_array *sma);
+	void (*sem_free_security)(struct sem_array *sma);
+	int (*sem_associate)(struct sem_array *sma, int semflg);
+	int (*sem_semctl)(struct sem_array *sma, int cmd);
+	int (*sem_semop)(struct sem_array *sma, struct sembuf *sops,
+				unsigned nsops, int alter);
+
+	int (*netlink_send)(struct sock *sk, struct sk_buff *skb);
+
+	void (*d_instantiate)(struct dentry *dentry, struct inode *inode);
+
+	int (*getprocattr)(struct task_struct *p, char *name, char **value);
+	int (*setprocattr)(struct task_struct *p, char *name, void *value,
+				size_t size);
+	int (*ismaclabel)(const char *name);
+	int (*secid_to_secctx)(u32 secid, char **secdata, u32 *seclen);
+	int (*secctx_to_secid)(const char *secdata, u32 seclen, u32 *secid);
+	void (*release_secctx)(char *secdata, u32 seclen);
+
+	int (*inode_notifysecctx)(struct inode *inode, void *ctx, u32 ctxlen);
+	int (*inode_setsecctx)(struct dentry *dentry, void *ctx, u32 ctxlen);
+	int (*inode_getsecctx)(struct inode *inode, void **ctx, u32 *ctxlen);
+
+#ifdef CONFIG_SECURITY_NETWORK
+	int (*unix_stream_connect)(struct sock *sock, struct sock *other,
+					struct sock *newsk);
+	int (*unix_may_send)(struct socket *sock, struct socket *other);
+
+	int (*socket_create)(int family, int type, int protocol, int kern);
+	int (*socket_post_create)(struct socket *sock, int family, int type,
+					int protocol, int kern);
+	int (*socket_bind)(struct socket *sock, struct sockaddr *address,
+				int addrlen);
+	int (*socket_connect)(struct socket *sock, struct sockaddr *address,
+				int addrlen);
+	int (*socket_listen)(struct socket *sock, int backlog);
+	int (*socket_accept)(struct socket *sock, struct socket *newsock);
+	int (*socket_sendmsg)(struct socket *sock, struct msghdr *msg,
+				int size);
+	int (*socket_recvmsg)(struct socket *sock, struct msghdr *msg,
+				int size, int flags);
+	int (*socket_getsockname)(struct socket *sock);
+	int (*socket_getpeername)(struct socket *sock);
+	int (*socket_getsockopt)(struct socket *sock, int level, int optname);
+	int (*socket_setsockopt)(struct socket *sock, int level, int optname);
+	int (*socket_shutdown)(struct socket *sock, int how);
+	int (*socket_sock_rcv_skb)(struct sock *sk, struct sk_buff *skb);
+	int (*socket_getpeersec_stream)(struct socket *sock,
+					char __user *optval,
+					int __user *optlen, unsigned len);
+	int (*socket_getpeersec_dgram)(struct socket *sock,
+					struct sk_buff *skb, u32 *secid);
+	int (*sk_alloc_security)(struct sock *sk, int family, gfp_t priority);
+	void (*sk_free_security)(struct sock *sk);
+	void (*sk_clone_security)(const struct sock *sk, struct sock *newsk);
+	void (*sk_getsecid)(struct sock *sk, u32 *secid);
+	void (*sock_graft)(struct sock *sk, struct socket *parent);
+	int (*inet_conn_request)(struct sock *sk, struct sk_buff *skb,
+					struct request_sock *req);
+	void (*inet_csk_clone)(struct sock *newsk,
+				const struct request_sock *req);
+	void (*inet_conn_established)(struct sock *sk, struct sk_buff *skb);
+	int (*secmark_relabel_packet)(u32 secid);
+	void (*secmark_refcount_inc)(void);
+	void (*secmark_refcount_dec)(void);
+	void (*req_classify_flow)(const struct request_sock *req,
+					struct flowi *fl);
+	int (*tun_dev_alloc_security)(void **security);
+	void (*tun_dev_free_security)(void *security);
+	int (*tun_dev_create)(void);
+	int (*tun_dev_attach_queue)(void *security);
+	int (*tun_dev_attach)(struct sock *sk, void *security);
+	int (*tun_dev_open)(void *security);
+#endif	/* CONFIG_SECURITY_NETWORK */
+
+#ifdef CONFIG_SECURITY_NETWORK_XFRM
+	int (*xfrm_policy_alloc_security)(struct xfrm_sec_ctx **ctxp,
+					  struct xfrm_user_sec_ctx *sec_ctx,
+						gfp_t gfp);
+	int (*xfrm_policy_clone_security)(struct xfrm_sec_ctx *old_ctx,
+						struct xfrm_sec_ctx **new_ctx);
+	void (*xfrm_policy_free_security)(struct xfrm_sec_ctx *ctx);
+	int (*xfrm_policy_delete_security)(struct xfrm_sec_ctx *ctx);
+	int (*xfrm_state_alloc)(struct xfrm_state *x,
+				struct xfrm_user_sec_ctx *sec_ctx);
+	int (*xfrm_state_alloc_acquire)(struct xfrm_state *x,
+					struct xfrm_sec_ctx *polsec,
+					u32 secid);
+	void (*xfrm_state_free_security)(struct xfrm_state *x);
+	int (*xfrm_state_delete_security)(struct xfrm_state *x);
+	int (*xfrm_policy_lookup)(struct xfrm_sec_ctx *ctx, u32 fl_secid,
+					u8 dir);
+	int (*xfrm_state_pol_flow_match)(struct xfrm_state *x,
+						struct xfrm_policy *xp,
+						const struct flowi *fl);
+	int (*xfrm_decode_session)(struct sk_buff *skb, u32 *secid, int ckall);
+#endif	/* CONFIG_SECURITY_NETWORK_XFRM */
+
+	/* key management security hooks */
+#ifdef CONFIG_KEYS
+	int (*key_alloc)(struct key *key, const struct cred *cred,
+				unsigned long flags);
+	void (*key_free)(struct key *key);
+	int (*key_permission)(key_ref_t key_ref, const struct cred *cred,
+				unsigned perm);
+	int (*key_getsecurity)(struct key *key, char **_buffer);
+#endif	/* CONFIG_KEYS */
+
+#ifdef CONFIG_AUDIT
+	int (*audit_rule_init)(u32 field, u32 op, char *rulestr,
+				void **lsmrule);
+	int (*audit_rule_known)(struct audit_krule *krule);
+	int (*audit_rule_match)(u32 secid, u32 field, u32 op, void *lsmrule,
+				struct audit_context *actx);
+	void (*audit_rule_free)(void *lsmrule);
+#endif /* CONFIG_AUDIT */
+};
+
+/* prototypes */
+extern int security_module_enable(struct security_operations *ops);
+extern int register_security(struct security_operations *ops);
+extern void __init security_fixup_ops(struct security_operations *ops);
+extern void reset_security_ops(void);
+
+#endif /* CONFIG_SECURITY */
+
+#endif /* ! __LINUX_LSM_HOOKS_H */
diff --git a/include/linux/security.h b/include/linux/security.h
index 18264ea9e314..f3d42c636f27 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -116,8 +116,6 @@ struct seq_file;
 
 extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb);
 
-void reset_security_ops(void);
-
 #ifdef CONFIG_MMU
 extern unsigned long mmap_min_addr;
 extern unsigned long dac_mmap_min_addr;
@@ -1457,312 +1455,9 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@ctxlen points to the place to put the length of @ctx.
  * This is the main security structure.
  */
-struct security_operations {
-	char name[SECURITY_NAME_MAX + 1];
-
-	int (*binder_set_context_mgr) (struct task_struct *mgr);
-	int (*binder_transaction) (struct task_struct *from,
-				   struct task_struct *to);
-	int (*binder_transfer_binder) (struct task_struct *from,
-				       struct task_struct *to);
-	int (*binder_transfer_file) (struct task_struct *from,
-				     struct task_struct *to, struct file *file);
-
-	int (*ptrace_access_check) (struct task_struct *child, unsigned int mode);
-	int (*ptrace_traceme) (struct task_struct *parent);
-	int (*capget) (struct task_struct *target,
-		       kernel_cap_t *effective,
-		       kernel_cap_t *inheritable, kernel_cap_t *permitted);
-	int (*capset) (struct cred *new,
-		       const struct cred *old,
-		       const kernel_cap_t *effective,
-		       const kernel_cap_t *inheritable,
-		       const kernel_cap_t *permitted);
-	int (*capable) (const struct cred *cred, struct user_namespace *ns,
-			int cap, int audit);
-	int (*quotactl) (int cmds, int type, int id, struct super_block *sb);
-	int (*quota_on) (struct dentry *dentry);
-	int (*syslog) (int type);
-	int (*settime) (const struct timespec *ts, const struct timezone *tz);
-	int (*vm_enough_memory) (struct mm_struct *mm, long pages);
-
-	int (*bprm_set_creds) (struct linux_binprm *bprm);
-	int (*bprm_check_security) (struct linux_binprm *bprm);
-	int (*bprm_secureexec) (struct linux_binprm *bprm);
-	void (*bprm_committing_creds) (struct linux_binprm *bprm);
-	void (*bprm_committed_creds) (struct linux_binprm *bprm);
-
-	int (*sb_alloc_security) (struct super_block *sb);
-	void (*sb_free_security) (struct super_block *sb);
-	int (*sb_copy_data) (char *orig, char *copy);
-	int (*sb_remount) (struct super_block *sb, void *data);
-	int (*sb_kern_mount) (struct super_block *sb, int flags, void *data);
-	int (*sb_show_options) (struct seq_file *m, struct super_block *sb);
-	int (*sb_statfs) (struct dentry *dentry);
-	int (*sb_mount) (const char *dev_name, struct path *path,
-			 const char *type, unsigned long flags, void *data);
-	int (*sb_umount) (struct vfsmount *mnt, int flags);
-	int (*sb_pivotroot) (struct path *old_path,
-			     struct path *new_path);
-	int (*sb_set_mnt_opts) (struct super_block *sb,
-				struct security_mnt_opts *opts,
-				unsigned long kern_flags,
-				unsigned long *set_kern_flags);
-	int (*sb_clone_mnt_opts) (const struct super_block *oldsb,
-				   struct super_block *newsb);
-	int (*sb_parse_opts_str) (char *options, struct security_mnt_opts *opts);
-	int (*dentry_init_security) (struct dentry *dentry, int mode,
-					struct qstr *name, void **ctx,
-					u32 *ctxlen);
-
-
-#ifdef CONFIG_SECURITY_PATH
-	int (*path_unlink) (struct path *dir, struct dentry *dentry);
-	int (*path_mkdir) (struct path *dir, struct dentry *dentry, umode_t mode);
-	int (*path_rmdir) (struct path *dir, struct dentry *dentry);
-	int (*path_mknod) (struct path *dir, struct dentry *dentry, umode_t mode,
-			   unsigned int dev);
-	int (*path_truncate) (struct path *path);
-	int (*path_symlink) (struct path *dir, struct dentry *dentry,
-			     const char *old_name);
-	int (*path_link) (struct dentry *old_dentry, struct path *new_dir,
-			  struct dentry *new_dentry);
-	int (*path_rename) (struct path *old_dir, struct dentry *old_dentry,
-			    struct path *new_dir, struct dentry *new_dentry);
-	int (*path_chmod) (struct path *path, umode_t mode);
-	int (*path_chown) (struct path *path, kuid_t uid, kgid_t gid);
-	int (*path_chroot) (struct path *path);
-#endif
-
-	int (*inode_alloc_security) (struct inode *inode);
-	void (*inode_free_security) (struct inode *inode);
-	int (*inode_init_security) (struct inode *inode, struct inode *dir,
-				    const struct qstr *qstr, const char **name,
-				    void **value, size_t *len);
-	int (*inode_create) (struct inode *dir,
-			     struct dentry *dentry, umode_t mode);
-	int (*inode_link) (struct dentry *old_dentry,
-			   struct inode *dir, struct dentry *new_dentry);
-	int (*inode_unlink) (struct inode *dir, struct dentry *dentry);
-	int (*inode_symlink) (struct inode *dir,
-			      struct dentry *dentry, const char *old_name);
-	int (*inode_mkdir) (struct inode *dir, struct dentry *dentry, umode_t mode);
-	int (*inode_rmdir) (struct inode *dir, struct dentry *dentry);
-	int (*inode_mknod) (struct inode *dir, struct dentry *dentry,
-			    umode_t mode, dev_t dev);
-	int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry,
-			     struct inode *new_dir, struct dentry *new_dentry);
-	int (*inode_readlink) (struct dentry *dentry);
-	int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd);
-	int (*inode_permission) (struct inode *inode, int mask);
-	int (*inode_setattr)	(struct dentry *dentry, struct iattr *attr);
-	int (*inode_getattr) (const struct path *path);
-	int (*inode_setxattr) (struct dentry *dentry, const char *name,
-			       const void *value, size_t size, int flags);
-	void (*inode_post_setxattr) (struct dentry *dentry, const char *name,
-				     const void *value, size_t size, int flags);
-	int (*inode_getxattr) (struct dentry *dentry, const char *name);
-	int (*inode_listxattr) (struct dentry *dentry);
-	int (*inode_removexattr) (struct dentry *dentry, const char *name);
-	int (*inode_need_killpriv) (struct dentry *dentry);
-	int (*inode_killpriv) (struct dentry *dentry);
-	int (*inode_getsecurity) (const struct inode *inode, const char *name, void **buffer, bool alloc);
-	int (*inode_setsecurity) (struct inode *inode, const char *name, const void *value, size_t size, int flags);
-	int (*inode_listsecurity) (struct inode *inode, char *buffer, size_t buffer_size);
-	void (*inode_getsecid) (const struct inode *inode, u32 *secid);
-
-	int (*file_permission) (struct file *file, int mask);
-	int (*file_alloc_security) (struct file *file);
-	void (*file_free_security) (struct file *file);
-	int (*file_ioctl) (struct file *file, unsigned int cmd,
-			   unsigned long arg);
-	int (*mmap_addr) (unsigned long addr);
-	int (*mmap_file) (struct file *file,
-			  unsigned long reqprot, unsigned long prot,
-			  unsigned long flags);
-	int (*file_mprotect) (struct vm_area_struct *vma,
-			      unsigned long reqprot,
-			      unsigned long prot);
-	int (*file_lock) (struct file *file, unsigned int cmd);
-	int (*file_fcntl) (struct file *file, unsigned int cmd,
-			   unsigned long arg);
-	void (*file_set_fowner) (struct file *file);
-	int (*file_send_sigiotask) (struct task_struct *tsk,
-				    struct fown_struct *fown, int sig);
-	int (*file_receive) (struct file *file);
-	int (*file_open) (struct file *file, const struct cred *cred);
-
-	int (*task_create) (unsigned long clone_flags);
-	void (*task_free) (struct task_struct *task);
-	int (*cred_alloc_blank) (struct cred *cred, gfp_t gfp);
-	void (*cred_free) (struct cred *cred);
-	int (*cred_prepare)(struct cred *new, const struct cred *old,
-			    gfp_t gfp);
-	void (*cred_transfer)(struct cred *new, const struct cred *old);
-	int (*kernel_act_as)(struct cred *new, u32 secid);
-	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
-	int (*kernel_fw_from_file)(struct file *file, char *buf, size_t size);
-	int (*kernel_module_request)(char *kmod_name);
-	int (*kernel_module_from_file)(struct file *file);
-	int (*task_fix_setuid) (struct cred *new, const struct cred *old,
-				int flags);
-	int (*task_setpgid) (struct task_struct *p, pid_t pgid);
-	int (*task_getpgid) (struct task_struct *p);
-	int (*task_getsid) (struct task_struct *p);
-	void (*task_getsecid) (struct task_struct *p, u32 *secid);
-	int (*task_setnice) (struct task_struct *p, int nice);
-	int (*task_setioprio) (struct task_struct *p, int ioprio);
-	int (*task_getioprio) (struct task_struct *p);
-	int (*task_setrlimit) (struct task_struct *p, unsigned int resource,
-			struct rlimit *new_rlim);
-	int (*task_setscheduler) (struct task_struct *p);
-	int (*task_getscheduler) (struct task_struct *p);
-	int (*task_movememory) (struct task_struct *p);
-	int (*task_kill) (struct task_struct *p,
-			  struct siginfo *info, int sig, u32 secid);
-	int (*task_wait) (struct task_struct *p);
-	int (*task_prctl) (int option, unsigned long arg2,
-			   unsigned long arg3, unsigned long arg4,
-			   unsigned long arg5);
-	void (*task_to_inode) (struct task_struct *p, struct inode *inode);
-
-	int (*ipc_permission) (struct kern_ipc_perm *ipcp, short flag);
-	void (*ipc_getsecid) (struct kern_ipc_perm *ipcp, u32 *secid);
-
-	int (*msg_msg_alloc_security) (struct msg_msg *msg);
-	void (*msg_msg_free_security) (struct msg_msg *msg);
-
-	int (*msg_queue_alloc_security) (struct msg_queue *msq);
-	void (*msg_queue_free_security) (struct msg_queue *msq);
-	int (*msg_queue_associate) (struct msg_queue *msq, int msqflg);
-	int (*msg_queue_msgctl) (struct msg_queue *msq, int cmd);
-	int (*msg_queue_msgsnd) (struct msg_queue *msq,
-				 struct msg_msg *msg, int msqflg);
-	int (*msg_queue_msgrcv) (struct msg_queue *msq,
-				 struct msg_msg *msg,
-				 struct task_struct *target,
-				 long type, int mode);
-
-	int (*shm_alloc_security) (struct shmid_kernel *shp);
-	void (*shm_free_security) (struct shmid_kernel *shp);
-	int (*shm_associate) (struct shmid_kernel *shp, int shmflg);
-	int (*shm_shmctl) (struct shmid_kernel *shp, int cmd);
-	int (*shm_shmat) (struct shmid_kernel *shp,
-			  char __user *shmaddr, int shmflg);
-
-	int (*sem_alloc_security) (struct sem_array *sma);
-	void (*sem_free_security) (struct sem_array *sma);
-	int (*sem_associate) (struct sem_array *sma, int semflg);
-	int (*sem_semctl) (struct sem_array *sma, int cmd);
-	int (*sem_semop) (struct sem_array *sma,
-			  struct sembuf *sops, unsigned nsops, int alter);
-
-	int (*netlink_send) (struct sock *sk, struct sk_buff *skb);
-
-	void (*d_instantiate) (struct dentry *dentry, struct inode *inode);
-
-	int (*getprocattr) (struct task_struct *p, char *name, char **value);
-	int (*setprocattr) (struct task_struct *p, char *name, void *value, size_t size);
-	int (*ismaclabel) (const char *name);
-	int (*secid_to_secctx) (u32 secid, char **secdata, u32 *seclen);
-	int (*secctx_to_secid) (const char *secdata, u32 seclen, u32 *secid);
-	void (*release_secctx) (char *secdata, u32 seclen);
-
-	int (*inode_notifysecctx)(struct inode *inode, void *ctx, u32 ctxlen);
-	int (*inode_setsecctx)(struct dentry *dentry, void *ctx, u32 ctxlen);
-	int (*inode_getsecctx)(struct inode *inode, void **ctx, u32 *ctxlen);
-
-#ifdef CONFIG_SECURITY_NETWORK
-	int (*unix_stream_connect) (struct sock *sock, struct sock *other, struct sock *newsk);
-	int (*unix_may_send) (struct socket *sock, struct socket *other);
-
-	int (*socket_create) (int family, int type, int protocol, int kern);
-	int (*socket_post_create) (struct socket *sock, int family,
-				   int type, int protocol, int kern);
-	int (*socket_bind) (struct socket *sock,
-			    struct sockaddr *address, int addrlen);
-	int (*socket_connect) (struct socket *sock,
-			       struct sockaddr *address, int addrlen);
-	int (*socket_listen) (struct socket *sock, int backlog);
-	int (*socket_accept) (struct socket *sock, struct socket *newsock);
-	int (*socket_sendmsg) (struct socket *sock,
-			       struct msghdr *msg, int size);
-	int (*socket_recvmsg) (struct socket *sock,
-			       struct msghdr *msg, int size, int flags);
-	int (*socket_getsockname) (struct socket *sock);
-	int (*socket_getpeername) (struct socket *sock);
-	int (*socket_getsockopt) (struct socket *sock, int level, int optname);
-	int (*socket_setsockopt) (struct socket *sock, int level, int optname);
-	int (*socket_shutdown) (struct socket *sock, int how);
-	int (*socket_sock_rcv_skb) (struct sock *sk, struct sk_buff *skb);
-	int (*socket_getpeersec_stream) (struct socket *sock, char __user *optval, int __user *optlen, unsigned len);
-	int (*socket_getpeersec_dgram) (struct socket *sock, struct sk_buff *skb, u32 *secid);
-	int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority);
-	void (*sk_free_security) (struct sock *sk);
-	void (*sk_clone_security) (const struct sock *sk, struct sock *newsk);
-	void (*sk_getsecid) (struct sock *sk, u32 *secid);
-	void (*sock_graft) (struct sock *sk, struct socket *parent);
-	int (*inet_conn_request) (struct sock *sk, struct sk_buff *skb,
-				  struct request_sock *req);
-	void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req);
-	void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb);
-	int (*secmark_relabel_packet) (u32 secid);
-	void (*secmark_refcount_inc) (void);
-	void (*secmark_refcount_dec) (void);
-	void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl);
-	int (*tun_dev_alloc_security) (void **security);
-	void (*tun_dev_free_security) (void *security);
-	int (*tun_dev_create) (void);
-	int (*tun_dev_attach_queue) (void *security);
-	int (*tun_dev_attach) (struct sock *sk, void *security);
-	int (*tun_dev_open) (void *security);
-#endif	/* CONFIG_SECURITY_NETWORK */
-
-#ifdef CONFIG_SECURITY_NETWORK_XFRM
-	int (*xfrm_policy_alloc_security) (struct xfrm_sec_ctx **ctxp,
-			struct xfrm_user_sec_ctx *sec_ctx, gfp_t gfp);
-	int (*xfrm_policy_clone_security) (struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctx);
-	void (*xfrm_policy_free_security) (struct xfrm_sec_ctx *ctx);
-	int (*xfrm_policy_delete_security) (struct xfrm_sec_ctx *ctx);
-	int (*xfrm_state_alloc) (struct xfrm_state *x,
-				 struct xfrm_user_sec_ctx *sec_ctx);
-	int (*xfrm_state_alloc_acquire) (struct xfrm_state *x,
-					 struct xfrm_sec_ctx *polsec,
-					 u32 secid);
-	void (*xfrm_state_free_security) (struct xfrm_state *x);
-	int (*xfrm_state_delete_security) (struct xfrm_state *x);
-	int (*xfrm_policy_lookup) (struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir);
-	int (*xfrm_state_pol_flow_match) (struct xfrm_state *x,
-					  struct xfrm_policy *xp,
-					  const struct flowi *fl);
-	int (*xfrm_decode_session) (struct sk_buff *skb, u32 *secid, int ckall);
-#endif	/* CONFIG_SECURITY_NETWORK_XFRM */
-
-	/* key management security hooks */
-#ifdef CONFIG_KEYS
-	int (*key_alloc) (struct key *key, const struct cred *cred, unsigned long flags);
-	void (*key_free) (struct key *key);
-	int (*key_permission) (key_ref_t key_ref,
-			       const struct cred *cred,
-			       unsigned perm);
-	int (*key_getsecurity)(struct key *key, char **_buffer);
-#endif	/* CONFIG_KEYS */
-
-#ifdef CONFIG_AUDIT
-	int (*audit_rule_init) (u32 field, u32 op, char *rulestr, void **lsmrule);
-	int (*audit_rule_known) (struct audit_krule *krule);
-	int (*audit_rule_match) (u32 secid, u32 field, u32 op, void *lsmrule,
-				 struct audit_context *actx);
-	void (*audit_rule_free) (void *lsmrule);
-#endif /* CONFIG_AUDIT */
-};
 
 /* prototypes */
 extern int security_init(void);
-extern int security_module_enable(struct security_operations *ops);
-extern int register_security(struct security_operations *ops);
-extern void __init security_fixup_ops(struct security_operations *ops);
-
 
 /* Security operations */
 int security_binder_set_context_mgr(struct task_struct *mgr);
-- 
cgit v1.2.3


From fe7bb272ee72b5cc377e02b556d0d718d12bbede Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Sat, 2 May 2015 15:10:53 -0700
Subject: LSM: Add the comment to lsm_hooks.h

Add the large comment describing the content of the
security_operations structure to lsm_hooks.h. This
wasn't done in the previous (1/7) patch because it
would have exceeded the mail list size limits.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: John Johansen <john.johansen@canonical.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Acked-by:  Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 include/linux/lsm_hooks.h | 1279 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1279 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index c60f81b2d18c..b4c91de510c2 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -31,6 +31,1285 @@
 
 #ifdef CONFIG_SECURITY
 
+/**
+ * struct security_operations - main security structure
+ *
+ * Security module identifier.
+ *
+ * @name:
+ *	A string that acts as a unique identifier for the LSM with max number
+ *	of characters = SECURITY_NAME_MAX.
+ *
+ * Security hooks for program execution operations.
+ *
+ * @bprm_set_creds:
+ *	Save security information in the bprm->security field, typically based
+ *	on information about the bprm->file, for later use by the apply_creds
+ *	hook.  This hook may also optionally check permissions (e.g. for
+ *	transitions between security domains).
+ *	This hook may be called multiple times during a single execve, e.g. for
+ *	interpreters.  The hook can tell whether it has already been called by
+ *	checking to see if @bprm->security is non-NULL.  If so, then the hook
+ *	may decide either to retain the security information saved earlier or
+ *	to replace it.
+ *	@bprm contains the linux_binprm structure.
+ *	Return 0 if the hook is successful and permission is granted.
+ * @bprm_check_security:
+ *	This hook mediates the point when a search for a binary handler will
+ *	begin.  It allows a check the @bprm->security value which is set in the
+ *	preceding set_creds call.  The primary difference from set_creds is
+ *	that the argv list and envp list are reliably available in @bprm.  This
+ *	hook may be called multiple times during a single execve; and in each
+ *	pass set_creds is called first.
+ *	@bprm contains the linux_binprm structure.
+ *	Return 0 if the hook is successful and permission is granted.
+ * @bprm_committing_creds:
+ *	Prepare to install the new security attributes of a process being
+ *	transformed by an execve operation, based on the old credentials
+ *	pointed to by @current->cred and the information set in @bprm->cred by
+ *	the bprm_set_creds hook.  @bprm points to the linux_binprm structure.
+ *	This hook is a good place to perform state changes on the process such
+ *	as closing open file descriptors to which access will no longer be
+ *	granted when the attributes are changed.  This is called immediately
+ *	before commit_creds().
+ * @bprm_committed_creds:
+ *	Tidy up after the installation of the new security attributes of a
+ *	process being transformed by an execve operation.  The new credentials
+ *	have, by this point, been set to @current->cred.  @bprm points to the
+ *	linux_binprm structure.  This hook is a good place to perform state
+ *	changes on the process such as clearing out non-inheritable signal
+ *	state.  This is called immediately after commit_creds().
+ * @bprm_secureexec:
+ *	Return a boolean value (0 or 1) indicating whether a "secure exec"
+ *	is required.  The flag is passed in the auxiliary table
+ *	on the initial stack to the ELF interpreter to indicate whether libc
+ *	should enable secure mode.
+ *	@bprm contains the linux_binprm structure.
+ *
+ * Security hooks for filesystem operations.
+ *
+ * @sb_alloc_security:
+ *	Allocate and attach a security structure to the sb->s_security field.
+ *	The s_security field is initialized to NULL when the structure is
+ *	allocated.
+ *	@sb contains the super_block structure to be modified.
+ *	Return 0 if operation was successful.
+ * @sb_free_security:
+ *	Deallocate and clear the sb->s_security field.
+ *	@sb contains the super_block structure to be modified.
+ * @sb_statfs:
+ *	Check permission before obtaining filesystem statistics for the @mnt
+ *	mountpoint.
+ *	@dentry is a handle on the superblock for the filesystem.
+ *	Return 0 if permission is granted.
+ * @sb_mount:
+ *	Check permission before an object specified by @dev_name is mounted on
+ *	the mount point named by @nd.  For an ordinary mount, @dev_name
+ *	identifies a device if the file system type requires a device.  For a
+ *	remount (@flags & MS_REMOUNT), @dev_name is irrelevant.  For a
+ *	loopback/bind mount (@flags & MS_BIND), @dev_name identifies the
+ *	pathname of the object being mounted.
+ *	@dev_name contains the name for object being mounted.
+ *	@path contains the path for mount point object.
+ *	@type contains the filesystem type.
+ *	@flags contains the mount flags.
+ *	@data contains the filesystem-specific data.
+ *	Return 0 if permission is granted.
+ * @sb_copy_data:
+ *	Allow mount option data to be copied prior to parsing by the filesystem,
+ *	so that the security module can extract security-specific mount
+ *	options cleanly (a filesystem may modify the data e.g. with strsep()).
+ *	This also allows the original mount data to be stripped of security-
+ *	specific options to avoid having to make filesystems aware of them.
+ *	@type the type of filesystem being mounted.
+ *	@orig the original mount data copied from userspace.
+ *	@copy copied data which will be passed to the security module.
+ *	Returns 0 if the copy was successful.
+ * @sb_remount:
+ *	Extracts security system specific mount options and verifies no changes
+ *	are being made to those options.
+ *	@sb superblock being remounted
+ *	@data contains the filesystem-specific data.
+ *	Return 0 if permission is granted.
+ * @sb_umount:
+ *	Check permission before the @mnt file system is unmounted.
+ *	@mnt contains the mounted file system.
+ *	@flags contains the unmount flags, e.g. MNT_FORCE.
+ *	Return 0 if permission is granted.
+ * @sb_pivotroot:
+ *	Check permission before pivoting the root filesystem.
+ *	@old_path contains the path for the new location of the
+ *	current root (put_old).
+ *	@new_path contains the path for the new root (new_root).
+ *	Return 0 if permission is granted.
+ * @sb_set_mnt_opts:
+ *	Set the security relevant mount options used for a superblock
+ *	@sb the superblock to set security mount options for
+ *	@opts binary data structure containing all lsm mount data
+ * @sb_clone_mnt_opts:
+ *	Copy all security options from a given superblock to another
+ *	@oldsb old superblock which contain information to clone
+ *	@newsb new superblock which needs filled in
+ * @sb_parse_opts_str:
+ *	Parse a string of security data filling in the opts structure
+ *	@options string containing all mount options known by the LSM
+ *	@opts binary data structure usable by the LSM
+ * @dentry_init_security:
+ *	Compute a context for a dentry as the inode is not yet available
+ *	since NFSv4 has no label backed by an EA anyway.
+ *	@dentry dentry to use in calculating the context.
+ *	@mode mode used to determine resource type.
+ *	@name name of the last path component used to create file
+ *	@ctx pointer to place the pointer to the resulting context in.
+ *	@ctxlen point to place the length of the resulting context.
+ *
+ *
+ * Security hooks for inode operations.
+ *
+ * @inode_alloc_security:
+ *	Allocate and attach a security structure to @inode->i_security.  The
+ *	i_security field is initialized to NULL when the inode structure is
+ *	allocated.
+ *	@inode contains the inode structure.
+ *	Return 0 if operation was successful.
+ * @inode_free_security:
+ *	@inode contains the inode structure.
+ *	Deallocate the inode security structure and set @inode->i_security to
+ *	NULL.
+ * @inode_init_security:
+ *	Obtain the security attribute name suffix and value to set on a newly
+ *	created inode and set up the incore security field for the new inode.
+ *	This hook is called by the fs code as part of the inode creation
+ *	transaction and provides for atomic labeling of the inode, unlike
+ *	the post_create/mkdir/... hooks called by the VFS.  The hook function
+ *	is expected to allocate the name and value via kmalloc, with the caller
+ *	being responsible for calling kfree after using them.
+ *	If the security module does not use security attributes or does
+ *	not wish to put a security attribute on this particular inode,
+ *	then it should return -EOPNOTSUPP to skip this processing.
+ *	@inode contains the inode structure of the newly created inode.
+ *	@dir contains the inode structure of the parent directory.
+ *	@qstr contains the last path component of the new object
+ *	@name will be set to the allocated name suffix (e.g. selinux).
+ *	@value will be set to the allocated attribute value.
+ *	@len will be set to the length of the value.
+ *	Returns 0 if @name and @value have been successfully set,
+ *		-EOPNOTSUPP if no security attribute is needed, or
+ *		-ENOMEM on memory allocation failure.
+ * @inode_create:
+ *	Check permission to create a regular file.
+ *	@dir contains inode structure of the parent of the new file.
+ *	@dentry contains the dentry structure for the file to be created.
+ *	@mode contains the file mode of the file to be created.
+ *	Return 0 if permission is granted.
+ * @inode_link:
+ *	Check permission before creating a new hard link to a file.
+ *	@old_dentry contains the dentry structure for an existing
+ *	link to the file.
+ *	@dir contains the inode structure of the parent directory
+ *	of the new link.
+ *	@new_dentry contains the dentry structure for the new link.
+ *	Return 0 if permission is granted.
+ * @path_link:
+ *	Check permission before creating a new hard link to a file.
+ *	@old_dentry contains the dentry structure for an existing link
+ *	to the file.
+ *	@new_dir contains the path structure of the parent directory of
+ *	the new link.
+ *	@new_dentry contains the dentry structure for the new link.
+ *	Return 0 if permission is granted.
+ * @inode_unlink:
+ *	Check the permission to remove a hard link to a file.
+ *	@dir contains the inode structure of parent directory of the file.
+ *	@dentry contains the dentry structure for file to be unlinked.
+ *	Return 0 if permission is granted.
+ * @path_unlink:
+ *	Check the permission to remove a hard link to a file.
+ *	@dir contains the path structure of parent directory of the file.
+ *	@dentry contains the dentry structure for file to be unlinked.
+ *	Return 0 if permission is granted.
+ * @inode_symlink:
+ *	Check the permission to create a symbolic link to a file.
+ *	@dir contains the inode structure of parent directory of
+ *	the symbolic link.
+ *	@dentry contains the dentry structure of the symbolic link.
+ *	@old_name contains the pathname of file.
+ *	Return 0 if permission is granted.
+ * @path_symlink:
+ *	Check the permission to create a symbolic link to a file.
+ *	@dir contains the path structure of parent directory of
+ *	the symbolic link.
+ *	@dentry contains the dentry structure of the symbolic link.
+ *	@old_name contains the pathname of file.
+ *	Return 0 if permission is granted.
+ * @inode_mkdir:
+ *	Check permissions to create a new directory in the existing directory
+ *	associated with inode structure @dir.
+ *	@dir contains the inode structure of parent of the directory
+ *	to be created.
+ *	@dentry contains the dentry structure of new directory.
+ *	@mode contains the mode of new directory.
+ *	Return 0 if permission is granted.
+ * @path_mkdir:
+ *	Check permissions to create a new directory in the existing directory
+ *	associated with path structure @path.
+ *	@dir contains the path structure of parent of the directory
+ *	to be created.
+ *	@dentry contains the dentry structure of new directory.
+ *	@mode contains the mode of new directory.
+ *	Return 0 if permission is granted.
+ * @inode_rmdir:
+ *	Check the permission to remove a directory.
+ *	@dir contains the inode structure of parent of the directory
+ *	to be removed.
+ *	@dentry contains the dentry structure of directory to be removed.
+ *	Return 0 if permission is granted.
+ * @path_rmdir:
+ *	Check the permission to remove a directory.
+ *	@dir contains the path structure of parent of the directory to be
+ *	removed.
+ *	@dentry contains the dentry structure of directory to be removed.
+ *	Return 0 if permission is granted.
+ * @inode_mknod:
+ *	Check permissions when creating a special file (or a socket or a fifo
+ *	file created via the mknod system call).  Note that if mknod operation
+ *	is being done for a regular file, then the create hook will be called
+ *	and not this hook.
+ *	@dir contains the inode structure of parent of the new file.
+ *	@dentry contains the dentry structure of the new file.
+ *	@mode contains the mode of the new file.
+ *	@dev contains the device number.
+ *	Return 0 if permission is granted.
+ * @path_mknod:
+ *	Check permissions when creating a file. Note that this hook is called
+ *	even if mknod operation is being done for a regular file.
+ *	@dir contains the path structure of parent of the new file.
+ *	@dentry contains the dentry structure of the new file.
+ *	@mode contains the mode of the new file.
+ *	@dev contains the undecoded device number. Use new_decode_dev() to get
+ *	the decoded device number.
+ *	Return 0 if permission is granted.
+ * @inode_rename:
+ *	Check for permission to rename a file or directory.
+ *	@old_dir contains the inode structure for parent of the old link.
+ *	@old_dentry contains the dentry structure of the old link.
+ *	@new_dir contains the inode structure for parent of the new link.
+ *	@new_dentry contains the dentry structure of the new link.
+ *	Return 0 if permission is granted.
+ * @path_rename:
+ *	Check for permission to rename a file or directory.
+ *	@old_dir contains the path structure for parent of the old link.
+ *	@old_dentry contains the dentry structure of the old link.
+ *	@new_dir contains the path structure for parent of the new link.
+ *	@new_dentry contains the dentry structure of the new link.
+ *	Return 0 if permission is granted.
+ * @path_chmod:
+ *	Check for permission to change DAC's permission of a file or directory.
+ *	@dentry contains the dentry structure.
+ *	@mnt contains the vfsmnt structure.
+ *	@mode contains DAC's mode.
+ *	Return 0 if permission is granted.
+ * @path_chown:
+ *	Check for permission to change owner/group of a file or directory.
+ *	@path contains the path structure.
+ *	@uid contains new owner's ID.
+ *	@gid contains new group's ID.
+ *	Return 0 if permission is granted.
+ * @path_chroot:
+ *	Check for permission to change root directory.
+ *	@path contains the path structure.
+ *	Return 0 if permission is granted.
+ * @inode_readlink:
+ *	Check the permission to read the symbolic link.
+ *	@dentry contains the dentry structure for the file link.
+ *	Return 0 if permission is granted.
+ * @inode_follow_link:
+ *	Check permission to follow a symbolic link when looking up a pathname.
+ *	@dentry contains the dentry structure for the link.
+ *	@nd contains the nameidata structure for the parent directory.
+ *	Return 0 if permission is granted.
+ * @inode_permission:
+ *	Check permission before accessing an inode.  This hook is called by the
+ *	existing Linux permission function, so a security module can use it to
+ *	provide additional checking for existing Linux permission checks.
+ *	Notice that this hook is called when a file is opened (as well as many
+ *	other operations), whereas the file_security_ops permission hook is
+ *	called when the actual read/write operations are performed.
+ *	@inode contains the inode structure to check.
+ *	@mask contains the permission mask.
+ *	Return 0 if permission is granted.
+ * @inode_setattr:
+ *	Check permission before setting file attributes.  Note that the kernel
+ *	call to notify_change is performed from several locations, whenever
+ *	file attributes change (such as when a file is truncated, chown/chmod
+ *	operations, transferring disk quotas, etc).
+ *	@dentry contains the dentry structure for the file.
+ *	@attr is the iattr structure containing the new file attributes.
+ *	Return 0 if permission is granted.
+ * @path_truncate:
+ *	Check permission before truncating a file.
+ *	@path contains the path structure for the file.
+ *	Return 0 if permission is granted.
+ * @inode_getattr:
+ *	Check permission before obtaining file attributes.
+ *	@mnt is the vfsmount where the dentry was looked up
+ *	@dentry contains the dentry structure for the file.
+ *	Return 0 if permission is granted.
+ * @inode_setxattr:
+ *	Check permission before setting the extended attributes
+ *	@value identified by @name for @dentry.
+ *	Return 0 if permission is granted.
+ * @inode_post_setxattr:
+ *	Update inode security field after successful setxattr operation.
+ *	@value identified by @name for @dentry.
+ * @inode_getxattr:
+ *	Check permission before obtaining the extended attributes
+ *	identified by @name for @dentry.
+ *	Return 0 if permission is granted.
+ * @inode_listxattr:
+ *	Check permission before obtaining the list of extended attribute
+ *	names for @dentry.
+ *	Return 0 if permission is granted.
+ * @inode_removexattr:
+ *	Check permission before removing the extended attribute
+ *	identified by @name for @dentry.
+ *	Return 0 if permission is granted.
+ * @inode_getsecurity:
+ *	Retrieve a copy of the extended attribute representation of the
+ *	security label associated with @name for @inode via @buffer.  Note that
+ *	@name is the remainder of the attribute name after the security prefix
+ *	has been removed. @alloc is used to specify of the call should return a
+ *	value via the buffer or just the value length Return size of buffer on
+ *	success.
+ * @inode_setsecurity:
+ *	Set the security label associated with @name for @inode from the
+ *	extended attribute value @value.  @size indicates the size of the
+ *	@value in bytes.  @flags may be XATTR_CREATE, XATTR_REPLACE, or 0.
+ *	Note that @name is the remainder of the attribute name after the
+ *	security. prefix has been removed.
+ *	Return 0 on success.
+ * @inode_listsecurity:
+ *	Copy the extended attribute names for the security labels
+ *	associated with @inode into @buffer.  The maximum size of @buffer
+ *	is specified by @buffer_size.  @buffer may be NULL to request
+ *	the size of the buffer required.
+ *	Returns number of bytes used/required on success.
+ * @inode_need_killpriv:
+ *	Called when an inode has been changed.
+ *	@dentry is the dentry being changed.
+ *	Return <0 on error to abort the inode change operation.
+ *	Return 0 if inode_killpriv does not need to be called.
+ *	Return >0 if inode_killpriv does need to be called.
+ * @inode_killpriv:
+ *	The setuid bit is being removed.  Remove similar security labels.
+ *	Called with the dentry->d_inode->i_mutex held.
+ *	@dentry is the dentry being changed.
+ *	Return 0 on success.  If error is returned, then the operation
+ *	causing setuid bit removal is failed.
+ * @inode_getsecid:
+ *	Get the secid associated with the node.
+ *	@inode contains a pointer to the inode.
+ *	@secid contains a pointer to the location where result will be saved.
+ *	In case of failure, @secid will be set to zero.
+ *
+ * Security hooks for file operations
+ *
+ * @file_permission:
+ *	Check file permissions before accessing an open file.  This hook is
+ *	called by various operations that read or write files.  A security
+ *	module can use this hook to perform additional checking on these
+ *	operations, e.g.  to revalidate permissions on use to support privilege
+ *	bracketing or policy changes.  Notice that this hook is used when the
+ *	actual read/write operations are performed, whereas the
+ *	inode_security_ops hook is called when a file is opened (as well as
+ *	many other operations).
+ *	Caveat:  Although this hook can be used to revalidate permissions for
+ *	various system call operations that read or write files, it does not
+ *	address the revalidation of permissions for memory-mapped files.
+ *	Security modules must handle this separately if they need such
+ *	revalidation.
+ *	@file contains the file structure being accessed.
+ *	@mask contains the requested permissions.
+ *	Return 0 if permission is granted.
+ * @file_alloc_security:
+ *	Allocate and attach a security structure to the file->f_security field.
+ *	The security field is initialized to NULL when the structure is first
+ *	created.
+ *	@file contains the file structure to secure.
+ *	Return 0 if the hook is successful and permission is granted.
+ * @file_free_security:
+ *	Deallocate and free any security structures stored in file->f_security.
+ *	@file contains the file structure being modified.
+ * @file_ioctl:
+ *	@file contains the file structure.
+ *	@cmd contains the operation to perform.
+ *	@arg contains the operational arguments.
+ *	Check permission for an ioctl operation on @file.  Note that @arg
+ *	sometimes represents a user space pointer; in other cases, it may be a
+ *	simple integer value.  When @arg represents a user space pointer, it
+ *	should never be used by the security module.
+ *	Return 0 if permission is granted.
+ * @mmap_addr :
+ *	Check permissions for a mmap operation at @addr.
+ *	@addr contains virtual address that will be used for the operation.
+ *	Return 0 if permission is granted.
+ * @mmap_file :
+ *	Check permissions for a mmap operation.  The @file may be NULL, e.g.
+ *	if mapping anonymous memory.
+ *	@file contains the file structure for file to map (may be NULL).
+ *	@reqprot contains the protection requested by the application.
+ *	@prot contains the protection that will be applied by the kernel.
+ *	@flags contains the operational flags.
+ *	Return 0 if permission is granted.
+ * @file_mprotect:
+ *	Check permissions before changing memory access permissions.
+ *	@vma contains the memory region to modify.
+ *	@reqprot contains the protection requested by the application.
+ *	@prot contains the protection that will be applied by the kernel.
+ *	Return 0 if permission is granted.
+ * @file_lock:
+ *	Check permission before performing file locking operations.
+ *	Note: this hook mediates both flock and fcntl style locks.
+ *	@file contains the file structure.
+ *	@cmd contains the posix-translated lock operation to perform
+ *	(e.g. F_RDLCK, F_WRLCK).
+ *	Return 0 if permission is granted.
+ * @file_fcntl:
+ *	Check permission before allowing the file operation specified by @cmd
+ *	from being performed on the file @file.  Note that @arg sometimes
+ *	represents a user space pointer; in other cases, it may be a simple
+ *	integer value.  When @arg represents a user space pointer, it should
+ *	never be used by the security module.
+ *	@file contains the file structure.
+ *	@cmd contains the operation to be performed.
+ *	@arg contains the operational arguments.
+ *	Return 0 if permission is granted.
+ * @file_set_fowner:
+ *	Save owner security information (typically from current->security) in
+ *	file->f_security for later use by the send_sigiotask hook.
+ *	@file contains the file structure to update.
+ *	Return 0 on success.
+ * @file_send_sigiotask:
+ *	Check permission for the file owner @fown to send SIGIO or SIGURG to the
+ *	process @tsk.  Note that this hook is sometimes called from interrupt.
+ *	Note that the fown_struct, @fown, is never outside the context of a
+ *	struct file, so the file structure (and associated security information)
+ *	can always be obtained:
+ *		container_of(fown, struct file, f_owner)
+ *	@tsk contains the structure of task receiving signal.
+ *	@fown contains the file owner information.
+ *	@sig is the signal that will be sent.  When 0, kernel sends SIGIO.
+ *	Return 0 if permission is granted.
+ * @file_receive:
+ *	This hook allows security modules to control the ability of a process
+ *	to receive an open file descriptor via socket IPC.
+ *	@file contains the file structure being received.
+ *	Return 0 if permission is granted.
+ * @file_open
+ *	Save open-time permission checking state for later use upon
+ *	file_permission, and recheck access if anything has changed
+ *	since inode_permission.
+ *
+ * Security hooks for task operations.
+ *
+ * @task_create:
+ *	Check permission before creating a child process.  See the clone(2)
+ *	manual page for definitions of the @clone_flags.
+ *	@clone_flags contains the flags indicating what should be shared.
+ *	Return 0 if permission is granted.
+ * @task_free:
+ *	@task task being freed
+ *	Handle release of task-related resources. (Note that this can be called
+ *	from interrupt context.)
+ * @cred_alloc_blank:
+ *	@cred points to the credentials.
+ *	@gfp indicates the atomicity of any memory allocations.
+ *	Only allocate sufficient memory and attach to @cred such that
+ *	cred_transfer() will not get ENOMEM.
+ * @cred_free:
+ *	@cred points to the credentials.
+ *	Deallocate and clear the cred->security field in a set of credentials.
+ * @cred_prepare:
+ *	@new points to the new credentials.
+ *	@old points to the original credentials.
+ *	@gfp indicates the atomicity of any memory allocations.
+ *	Prepare a new set of credentials by copying the data from the old set.
+ * @cred_transfer:
+ *	@new points to the new credentials.
+ *	@old points to the original credentials.
+ *	Transfer data from original creds to new creds
+ * @kernel_act_as:
+ *	Set the credentials for a kernel service to act as (subjective context).
+ *	@new points to the credentials to be modified.
+ *	@secid specifies the security ID to be set
+ *	The current task must be the one that nominated @secid.
+ *	Return 0 if successful.
+ * @kernel_create_files_as:
+ *	Set the file creation context in a set of credentials to be the same as
+ *	the objective context of the specified inode.
+ *	@new points to the credentials to be modified.
+ *	@inode points to the inode to use as a reference.
+ *	The current task must be the one that nominated @inode.
+ *	Return 0 if successful.
+ * @kernel_fw_from_file:
+ *	Load firmware from userspace (not called for built-in firmware).
+ *	@file contains the file structure pointing to the file containing
+ *	the firmware to load. This argument will be NULL if the firmware
+ *	was loaded via the uevent-triggered blob-based interface exposed
+ *	by CONFIG_FW_LOADER_USER_HELPER.
+ *	@buf pointer to buffer containing firmware contents.
+ *	@size length of the firmware contents.
+ *	Return 0 if permission is granted.
+ * @kernel_module_request:
+ *	Ability to trigger the kernel to automatically upcall to userspace for
+ *	userspace to load a kernel module with the given name.
+ *	@kmod_name name of the module requested by the kernel
+ *	Return 0 if successful.
+ * @kernel_module_from_file:
+ *	Load a kernel module from userspace.
+ *	@file contains the file structure pointing to the file containing
+ *	the kernel module to load. If the module is being loaded from a blob,
+ *	this argument will be NULL.
+ *	Return 0 if permission is granted.
+ * @task_fix_setuid:
+ *	Update the module's state after setting one or more of the user
+ *	identity attributes of the current process.  The @flags parameter
+ *	indicates which of the set*uid system calls invoked this hook.  If
+ *	@new is the set of credentials that will be installed.  Modifications
+ *	should be made to this rather than to @current->cred.
+ *	@old is the set of credentials that are being replaces
+ *	@flags contains one of the LSM_SETID_* values.
+ *	Return 0 on success.
+ * @task_setpgid:
+ *	Check permission before setting the process group identifier of the
+ *	process @p to @pgid.
+ *	@p contains the task_struct for process being modified.
+ *	@pgid contains the new pgid.
+ *	Return 0 if permission is granted.
+ * @task_getpgid:
+ *	Check permission before getting the process group identifier of the
+ *	process @p.
+ *	@p contains the task_struct for the process.
+ *	Return 0 if permission is granted.
+ * @task_getsid:
+ *	Check permission before getting the session identifier of the process
+ *	@p.
+ *	@p contains the task_struct for the process.
+ *	Return 0 if permission is granted.
+ * @task_getsecid:
+ *	Retrieve the security identifier of the process @p.
+ *	@p contains the task_struct for the process and place is into @secid.
+ *	In case of failure, @secid will be set to zero.
+ *
+ * @task_setnice:
+ *	Check permission before setting the nice value of @p to @nice.
+ *	@p contains the task_struct of process.
+ *	@nice contains the new nice value.
+ *	Return 0 if permission is granted.
+ * @task_setioprio
+ *	Check permission before setting the ioprio value of @p to @ioprio.
+ *	@p contains the task_struct of process.
+ *	@ioprio contains the new ioprio value
+ *	Return 0 if permission is granted.
+ * @task_getioprio
+ *	Check permission before getting the ioprio value of @p.
+ *	@p contains the task_struct of process.
+ *	Return 0 if permission is granted.
+ * @task_setrlimit:
+ *	Check permission before setting the resource limits of the current
+ *	process for @resource to @new_rlim.  The old resource limit values can
+ *	be examined by dereferencing (current->signal->rlim + resource).
+ *	@resource contains the resource whose limit is being set.
+ *	@new_rlim contains the new limits for @resource.
+ *	Return 0 if permission is granted.
+ * @task_setscheduler:
+ *	Check permission before setting scheduling policy and/or parameters of
+ *	process @p based on @policy and @lp.
+ *	@p contains the task_struct for process.
+ *	@policy contains the scheduling policy.
+ *	@lp contains the scheduling parameters.
+ *	Return 0 if permission is granted.
+ * @task_getscheduler:
+ *	Check permission before obtaining scheduling information for process
+ *	@p.
+ *	@p contains the task_struct for process.
+ *	Return 0 if permission is granted.
+ * @task_movememory
+ *	Check permission before moving memory owned by process @p.
+ *	@p contains the task_struct for process.
+ *	Return 0 if permission is granted.
+ * @task_kill:
+ *	Check permission before sending signal @sig to @p.  @info can be NULL,
+ *	the constant 1, or a pointer to a siginfo structure.  If @info is 1 or
+ *	SI_FROMKERNEL(info) is true, then the signal should be viewed as coming
+ *	from the kernel and should typically be permitted.
+ *	SIGIO signals are handled separately by the send_sigiotask hook in
+ *	file_security_ops.
+ *	@p contains the task_struct for process.
+ *	@info contains the signal information.
+ *	@sig contains the signal value.
+ *	@secid contains the sid of the process where the signal originated
+ *	Return 0 if permission is granted.
+ * @task_wait:
+ *	Check permission before allowing a process to reap a child process @p
+ *	and collect its status information.
+ *	@p contains the task_struct for process.
+ *	Return 0 if permission is granted.
+ * @task_prctl:
+ *	Check permission before performing a process control operation on the
+ *	current process.
+ *	@option contains the operation.
+ *	@arg2 contains a argument.
+ *	@arg3 contains a argument.
+ *	@arg4 contains a argument.
+ *	@arg5 contains a argument.
+ *	Return -ENOSYS if no-one wanted to handle this op, any other value to
+ *	cause prctl() to return immediately with that value.
+ * @task_to_inode:
+ *	Set the security attributes for an inode based on an associated task's
+ *	security attributes, e.g. for /proc/pid inodes.
+ *	@p contains the task_struct for the task.
+ *	@inode contains the inode structure for the inode.
+ *
+ * Security hooks for Netlink messaging.
+ *
+ * @netlink_send:
+ *	Save security information for a netlink message so that permission
+ *	checking can be performed when the message is processed.  The security
+ *	information can be saved using the eff_cap field of the
+ *	netlink_skb_parms structure.  Also may be used to provide fine
+ *	grained control over message transmission.
+ *	@sk associated sock of task sending the message.
+ *	@skb contains the sk_buff structure for the netlink message.
+ *	Return 0 if the information was successfully saved and message
+ *	is allowed to be transmitted.
+ *
+ * Security hooks for Unix domain networking.
+ *
+ * @unix_stream_connect:
+ *	Check permissions before establishing a Unix domain stream connection
+ *	between @sock and @other.
+ *	@sock contains the sock structure.
+ *	@other contains the peer sock structure.
+ *	@newsk contains the new sock structure.
+ *	Return 0 if permission is granted.
+ * @unix_may_send:
+ *	Check permissions before connecting or sending datagrams from @sock to
+ *	@other.
+ *	@sock contains the socket structure.
+ *	@other contains the peer socket structure.
+ *	Return 0 if permission is granted.
+ *
+ * The @unix_stream_connect and @unix_may_send hooks were necessary because
+ * Linux provides an alternative to the conventional file name space for Unix
+ * domain sockets.  Whereas binding and connecting to sockets in the file name
+ * space is mediated by the typical file permissions (and caught by the mknod
+ * and permission hooks in inode_security_ops), binding and connecting to
+ * sockets in the abstract name space is completely unmediated.  Sufficient
+ * control of Unix domain sockets in the abstract name space isn't possible
+ * using only the socket layer hooks, since we need to know the actual target
+ * socket, which is not looked up until we are inside the af_unix code.
+ *
+ * Security hooks for socket operations.
+ *
+ * @socket_create:
+ *	Check permissions prior to creating a new socket.
+ *	@family contains the requested protocol family.
+ *	@type contains the requested communications type.
+ *	@protocol contains the requested protocol.
+ *	@kern set to 1 if a kernel socket.
+ *	Return 0 if permission is granted.
+ * @socket_post_create:
+ *	This hook allows a module to update or allocate a per-socket security
+ *	structure. Note that the security field was not added directly to the
+ *	socket structure, but rather, the socket security information is stored
+ *	in the associated inode.  Typically, the inode alloc_security hook will
+ *	allocate and and attach security information to
+ *	sock->inode->i_security.  This hook may be used to update the
+ *	sock->inode->i_security field with additional information that wasn't
+ *	available when the inode was allocated.
+ *	@sock contains the newly created socket structure.
+ *	@family contains the requested protocol family.
+ *	@type contains the requested communications type.
+ *	@protocol contains the requested protocol.
+ *	@kern set to 1 if a kernel socket.
+ * @socket_bind:
+ *	Check permission before socket protocol layer bind operation is
+ *	performed and the socket @sock is bound to the address specified in the
+ *	@address parameter.
+ *	@sock contains the socket structure.
+ *	@address contains the address to bind to.
+ *	@addrlen contains the length of address.
+ *	Return 0 if permission is granted.
+ * @socket_connect:
+ *	Check permission before socket protocol layer connect operation
+ *	attempts to connect socket @sock to a remote address, @address.
+ *	@sock contains the socket structure.
+ *	@address contains the address of remote endpoint.
+ *	@addrlen contains the length of address.
+ *	Return 0 if permission is granted.
+ * @socket_listen:
+ *	Check permission before socket protocol layer listen operation.
+ *	@sock contains the socket structure.
+ *	@backlog contains the maximum length for the pending connection queue.
+ *	Return 0 if permission is granted.
+ * @socket_accept:
+ *	Check permission before accepting a new connection.  Note that the new
+ *	socket, @newsock, has been created and some information copied to it,
+ *	but the accept operation has not actually been performed.
+ *	@sock contains the listening socket structure.
+ *	@newsock contains the newly created server socket for connection.
+ *	Return 0 if permission is granted.
+ * @socket_sendmsg:
+ *	Check permission before transmitting a message to another socket.
+ *	@sock contains the socket structure.
+ *	@msg contains the message to be transmitted.
+ *	@size contains the size of message.
+ *	Return 0 if permission is granted.
+ * @socket_recvmsg:
+ *	Check permission before receiving a message from a socket.
+ *	@sock contains the socket structure.
+ *	@msg contains the message structure.
+ *	@size contains the size of message structure.
+ *	@flags contains the operational flags.
+ *	Return 0 if permission is granted.
+ * @socket_getsockname:
+ *	Check permission before the local address (name) of the socket object
+ *	@sock is retrieved.
+ *	@sock contains the socket structure.
+ *	Return 0 if permission is granted.
+ * @socket_getpeername:
+ *	Check permission before the remote address (name) of a socket object
+ *	@sock is retrieved.
+ *	@sock contains the socket structure.
+ *	Return 0 if permission is granted.
+ * @socket_getsockopt:
+ *	Check permissions before retrieving the options associated with socket
+ *	@sock.
+ *	@sock contains the socket structure.
+ *	@level contains the protocol level to retrieve option from.
+ *	@optname contains the name of option to retrieve.
+ *	Return 0 if permission is granted.
+ * @socket_setsockopt:
+ *	Check permissions before setting the options associated with socket
+ *	@sock.
+ *	@sock contains the socket structure.
+ *	@level contains the protocol level to set options for.
+ *	@optname contains the name of the option to set.
+ *	Return 0 if permission is granted.
+ * @socket_shutdown:
+ *	Checks permission before all or part of a connection on the socket
+ *	@sock is shut down.
+ *	@sock contains the socket structure.
+ *	@how contains the flag indicating how future sends and receives
+ *	are handled.
+ *	Return 0 if permission is granted.
+ * @socket_sock_rcv_skb:
+ *	Check permissions on incoming network packets.  This hook is distinct
+ *	from Netfilter's IP input hooks since it is the first time that the
+ *	incoming sk_buff @skb has been associated with a particular socket, @sk.
+ *	Must not sleep inside this hook because some callers hold spinlocks.
+ *	@sk contains the sock (not socket) associated with the incoming sk_buff.
+ *	@skb contains the incoming network data.
+ * @socket_getpeersec_stream:
+ *	This hook allows the security module to provide peer socket security
+ *	state for unix or connected tcp sockets to userspace via getsockopt
+ *	SO_GETPEERSEC.  For tcp sockets this can be meaningful if the
+ *	socket is associated with an ipsec SA.
+ *	@sock is the local socket.
+ *	@optval userspace memory where the security state is to be copied.
+ *	@optlen userspace int where the module should copy the actual length
+ *	of the security state.
+ *	@len as input is the maximum length to copy to userspace provided
+ *	by the caller.
+ *	Return 0 if all is well, otherwise, typical getsockopt return
+ *	values.
+ * @socket_getpeersec_dgram:
+ *	This hook allows the security module to provide peer socket security
+ *	state for udp sockets on a per-packet basis to userspace via
+ *	getsockopt SO_GETPEERSEC.  The application must first have indicated
+ *	the IP_PASSSEC option via getsockopt.  It can then retrieve the
+ *	security state returned by this hook for a packet via the SCM_SECURITY
+ *	ancillary message type.
+ *	@skb is the skbuff for the packet being queried
+ *	@secdata is a pointer to a buffer in which to copy the security data
+ *	@seclen is the maximum length for @secdata
+ *	Return 0 on success, error on failure.
+ * @sk_alloc_security:
+ *	Allocate and attach a security structure to the sk->sk_security field,
+ *	which is used to copy security attributes between local stream sockets.
+ * @sk_free_security:
+ *	Deallocate security structure.
+ * @sk_clone_security:
+ *	Clone/copy security structure.
+ * @sk_getsecid:
+ *	Retrieve the LSM-specific secid for the sock to enable caching
+ *	of network authorizations.
+ * @sock_graft:
+ *	Sets the socket's isec sid to the sock's sid.
+ * @inet_conn_request:
+ *	Sets the openreq's sid to socket's sid with MLS portion taken
+ *	from peer sid.
+ * @inet_csk_clone:
+ *	Sets the new child socket's sid to the openreq sid.
+ * @inet_conn_established:
+ *	Sets the connection's peersid to the secmark on skb.
+ * @secmark_relabel_packet:
+ *	check if the process should be allowed to relabel packets to
+ *	the given secid
+ * @security_secmark_refcount_inc
+ *	tells the LSM to increment the number of secmark labeling rules loaded
+ * @security_secmark_refcount_dec
+ *	tells the LSM to decrement the number of secmark labeling rules loaded
+ * @req_classify_flow:
+ *	Sets the flow's sid to the openreq sid.
+ * @tun_dev_alloc_security:
+ *	This hook allows a module to allocate a security structure for a TUN
+ *	device.
+ *	@security pointer to a security structure pointer.
+ *	Returns a zero on success, negative values on failure.
+ * @tun_dev_free_security:
+ *	This hook allows a module to free the security structure for a TUN
+ *	device.
+ *	@security pointer to the TUN device's security structure
+ * @tun_dev_create:
+ *	Check permissions prior to creating a new TUN device.
+ * @tun_dev_attach_queue:
+ *	Check permissions prior to attaching to a TUN device queue.
+ *	@security pointer to the TUN device's security structure.
+ * @tun_dev_attach:
+ *	This hook can be used by the module to update any security state
+ *	associated with the TUN device's sock structure.
+ *	@sk contains the existing sock structure.
+ *	@security pointer to the TUN device's security structure.
+ * @tun_dev_open:
+ *	This hook can be used by the module to update any security state
+ *	associated with the TUN device's security structure.
+ *	@security pointer to the TUN devices's security structure.
+ *
+ * Security hooks for XFRM operations.
+ *
+ * @xfrm_policy_alloc_security:
+ *	@ctxp is a pointer to the xfrm_sec_ctx being added to Security Policy
+ *	Database used by the XFRM system.
+ *	@sec_ctx contains the security context information being provided by
+ *	the user-level policy update program (e.g., setkey).
+ *	Allocate a security structure to the xp->security field; the security
+ *	field is initialized to NULL when the xfrm_policy is allocated.
+ *	Return 0 if operation was successful (memory to allocate, legal context)
+ *	@gfp is to specify the context for the allocation
+ * @xfrm_policy_clone_security:
+ *	@old_ctx contains an existing xfrm_sec_ctx.
+ *	@new_ctxp contains a new xfrm_sec_ctx being cloned from old.
+ *	Allocate a security structure in new_ctxp that contains the
+ *	information from the old_ctx structure.
+ *	Return 0 if operation was successful (memory to allocate).
+ * @xfrm_policy_free_security:
+ *	@ctx contains the xfrm_sec_ctx
+ *	Deallocate xp->security.
+ * @xfrm_policy_delete_security:
+ *	@ctx contains the xfrm_sec_ctx.
+ *	Authorize deletion of xp->security.
+ * @xfrm_state_alloc:
+ *	@x contains the xfrm_state being added to the Security Association
+ *	Database by the XFRM system.
+ *	@sec_ctx contains the security context information being provided by
+ *	the user-level SA generation program (e.g., setkey or racoon).
+ *	Allocate a security structure to the x->security field; the security
+ *	field is initialized to NULL when the xfrm_state is allocated. Set the
+ *	context to correspond to sec_ctx. Return 0 if operation was successful
+ *	(memory to allocate, legal context).
+ * @xfrm_state_alloc_acquire:
+ *	@x contains the xfrm_state being added to the Security Association
+ *	Database by the XFRM system.
+ *	@polsec contains the policy's security context.
+ *	@secid contains the secid from which to take the mls portion of the
+ *	context.
+ *	Allocate a security structure to the x->security field; the security
+ *	field is initialized to NULL when the xfrm_state is allocated. Set the
+ *	context to correspond to secid. Return 0 if operation was successful
+ *	(memory to allocate, legal context).
+ * @xfrm_state_free_security:
+ *	@x contains the xfrm_state.
+ *	Deallocate x->security.
+ * @xfrm_state_delete_security:
+ *	@x contains the xfrm_state.
+ *	Authorize deletion of x->security.
+ * @xfrm_policy_lookup:
+ *	@ctx contains the xfrm_sec_ctx for which the access control is being
+ *	checked.
+ *	@fl_secid contains the flow security label that is used to authorize
+ *	access to the policy xp.
+ *	@dir contains the direction of the flow (input or output).
+ *	Check permission when a flow selects a xfrm_policy for processing
+ *	XFRMs on a packet.  The hook is called when selecting either a
+ *	per-socket policy or a generic xfrm policy.
+ *	Return 0 if permission is granted, -ESRCH otherwise, or -errno
+ *	on other errors.
+ * @xfrm_state_pol_flow_match:
+ *	@x contains the state to match.
+ *	@xp contains the policy to check for a match.
+ *	@fl contains the flow to check for a match.
+ *	Return 1 if there is a match.
+ * @xfrm_decode_session:
+ *	@skb points to skb to decode.
+ *	@secid points to the flow key secid to set.
+ *	@ckall says if all xfrms used should be checked for same secid.
+ *	Return 0 if ckall is zero or all xfrms used have the same secid.
+ *
+ * Security hooks affecting all Key Management operations
+ *
+ * @key_alloc:
+ *	Permit allocation of a key and assign security data. Note that key does
+ *	not have a serial number assigned at this point.
+ *	@key points to the key.
+ *	@flags is the allocation flags
+ *	Return 0 if permission is granted, -ve error otherwise.
+ * @key_free:
+ *	Notification of destruction; free security data.
+ *	@key points to the key.
+ *	No return value.
+ * @key_permission:
+ *	See whether a specific operational right is granted to a process on a
+ *	key.
+ *	@key_ref refers to the key (key pointer + possession attribute bit).
+ *	@cred points to the credentials to provide the context against which to
+ *	evaluate the security data on the key.
+ *	@perm describes the combination of permissions required of this key.
+ *	Return 0 if permission is granted, -ve error otherwise.
+ * @key_getsecurity:
+ *	Get a textual representation of the security context attached to a key
+ *	for the purposes of honouring KEYCTL_GETSECURITY.  This function
+ *	allocates the storage for the NUL-terminated string and the caller
+ *	should free it.
+ *	@key points to the key to be queried.
+ *	@_buffer points to a pointer that should be set to point to the
+ *	resulting string (if no label or an error occurs).
+ *	Return the length of the string (including terminating NUL) or -ve if
+ *	an error.
+ *	May also return 0 (and a NULL buffer pointer) if there is no label.
+ *
+ * Security hooks affecting all System V IPC operations.
+ *
+ * @ipc_permission:
+ *	Check permissions for access to IPC
+ *	@ipcp contains the kernel IPC permission structure
+ *	@flag contains the desired (requested) permission set
+ *	Return 0 if permission is granted.
+ * @ipc_getsecid:
+ *	Get the secid associated with the ipc object.
+ *	@ipcp contains the kernel IPC permission structure.
+ *	@secid contains a pointer to the location where result will be saved.
+ *	In case of failure, @secid will be set to zero.
+ *
+ * Security hooks for individual messages held in System V IPC message queues
+ * @msg_msg_alloc_security:
+ *	Allocate and attach a security structure to the msg->security field.
+ *	The security field is initialized to NULL when the structure is first
+ *	created.
+ *	@msg contains the message structure to be modified.
+ *	Return 0 if operation was successful and permission is granted.
+ * @msg_msg_free_security:
+ *	Deallocate the security structure for this message.
+ *	@msg contains the message structure to be modified.
+ *
+ * Security hooks for System V IPC Message Queues
+ *
+ * @msg_queue_alloc_security:
+ *	Allocate and attach a security structure to the
+ *	msq->q_perm.security field. The security field is initialized to
+ *	NULL when the structure is first created.
+ *	@msq contains the message queue structure to be modified.
+ *	Return 0 if operation was successful and permission is granted.
+ * @msg_queue_free_security:
+ *	Deallocate security structure for this message queue.
+ *	@msq contains the message queue structure to be modified.
+ * @msg_queue_associate:
+ *	Check permission when a message queue is requested through the
+ *	msgget system call.  This hook is only called when returning the
+ *	message queue identifier for an existing message queue, not when a
+ *	new message queue is created.
+ *	@msq contains the message queue to act upon.
+ *	@msqflg contains the operation control flags.
+ *	Return 0 if permission is granted.
+ * @msg_queue_msgctl:
+ *	Check permission when a message control operation specified by @cmd
+ *	is to be performed on the message queue @msq.
+ *	The @msq may be NULL, e.g. for IPC_INFO or MSG_INFO.
+ *	@msq contains the message queue to act upon.  May be NULL.
+ *	@cmd contains the operation to be performed.
+ *	Return 0 if permission is granted.
+ * @msg_queue_msgsnd:
+ *	Check permission before a message, @msg, is enqueued on the message
+ *	queue, @msq.
+ *	@msq contains the message queue to send message to.
+ *	@msg contains the message to be enqueued.
+ *	@msqflg contains operational flags.
+ *	Return 0 if permission is granted.
+ * @msg_queue_msgrcv:
+ *	Check permission before a message, @msg, is removed from the message
+ *	queue, @msq.  The @target task structure contains a pointer to the
+ *	process that will be receiving the message (not equal to the current
+ *	process when inline receives are being performed).
+ *	@msq contains the message queue to retrieve message from.
+ *	@msg contains the message destination.
+ *	@target contains the task structure for recipient process.
+ *	@type contains the type of message requested.
+ *	@mode contains the operational flags.
+ *	Return 0 if permission is granted.
+ *
+ * Security hooks for System V Shared Memory Segments
+ *
+ * @shm_alloc_security:
+ *	Allocate and attach a security structure to the shp->shm_perm.security
+ *	field.  The security field is initialized to NULL when the structure is
+ *	first created.
+ *	@shp contains the shared memory structure to be modified.
+ *	Return 0 if operation was successful and permission is granted.
+ * @shm_free_security:
+ *	Deallocate the security struct for this memory segment.
+ *	@shp contains the shared memory structure to be modified.
+ * @shm_associate:
+ *	Check permission when a shared memory region is requested through the
+ *	shmget system call.  This hook is only called when returning the shared
+ *	memory region identifier for an existing region, not when a new shared
+ *	memory region is created.
+ *	@shp contains the shared memory structure to be modified.
+ *	@shmflg contains the operation control flags.
+ *	Return 0 if permission is granted.
+ * @shm_shmctl:
+ *	Check permission when a shared memory control operation specified by
+ *	@cmd is to be performed on the shared memory region @shp.
+ *	The @shp may be NULL, e.g. for IPC_INFO or SHM_INFO.
+ *	@shp contains shared memory structure to be modified.
+ *	@cmd contains the operation to be performed.
+ *	Return 0 if permission is granted.
+ * @shm_shmat:
+ *	Check permissions prior to allowing the shmat system call to attach the
+ *	shared memory segment @shp to the data segment of the calling process.
+ *	The attaching address is specified by @shmaddr.
+ *	@shp contains the shared memory structure to be modified.
+ *	@shmaddr contains the address to attach memory region to.
+ *	@shmflg contains the operational flags.
+ *	Return 0 if permission is granted.
+ *
+ * Security hooks for System V Semaphores
+ *
+ * @sem_alloc_security:
+ *	Allocate and attach a security structure to the sma->sem_perm.security
+ *	field.  The security field is initialized to NULL when the structure is
+ *	first created.
+ *	@sma contains the semaphore structure
+ *	Return 0 if operation was successful and permission is granted.
+ * @sem_free_security:
+ *	deallocate security struct for this semaphore
+ *	@sma contains the semaphore structure.
+ * @sem_associate:
+ *	Check permission when a semaphore is requested through the semget
+ *	system call.  This hook is only called when returning the semaphore
+ *	identifier for an existing semaphore, not when a new one must be
+ *	created.
+ *	@sma contains the semaphore structure.
+ *	@semflg contains the operation control flags.
+ *	Return 0 if permission is granted.
+ * @sem_semctl:
+ *	Check permission when a semaphore operation specified by @cmd is to be
+ *	performed on the semaphore @sma.  The @sma may be NULL, e.g. for
+ *	IPC_INFO or SEM_INFO.
+ *	@sma contains the semaphore structure.  May be NULL.
+ *	@cmd contains the operation to be performed.
+ *	Return 0 if permission is granted.
+ * @sem_semop
+ *	Check permissions before performing operations on members of the
+ *	semaphore set @sma.  If the @alter flag is nonzero, the semaphore set
+ *	may be modified.
+ *	@sma contains the semaphore structure.
+ *	@sops contains the operations to perform.
+ *	@nsops contains the number of operations to perform.
+ *	@alter contains the flag indicating whether changes are to be made.
+ *	Return 0 if permission is granted.
+ *
+ * @binder_set_context_mgr
+ *	Check whether @mgr is allowed to be the binder context manager.
+ *	@mgr contains the task_struct for the task being registered.
+ *	Return 0 if permission is granted.
+ * @binder_transaction
+ *	Check whether @from is allowed to invoke a binder transaction call
+ *	to @to.
+ *	@from contains the task_struct for the sending task.
+ *	@to contains the task_struct for the receiving task.
+ * @binder_transfer_binder
+ *	Check whether @from is allowed to transfer a binder reference to @to.
+ *	@from contains the task_struct for the sending task.
+ *	@to contains the task_struct for the receiving task.
+ * @binder_transfer_file
+ *	Check whether @from is allowed to transfer @file to @to.
+ *	@from contains the task_struct for the sending task.
+ *	@file contains the struct file being transferred.
+ *	@to contains the task_struct for the receiving task.
+ *
+ * @ptrace_access_check:
+ *	Check permission before allowing the current process to trace the
+ *	@child process.
+ *	Security modules may also want to perform a process tracing check
+ *	during an execve in the set_security or apply_creds hooks of
+ *	tracing check during an execve in the bprm_set_creds hook of
+ *	binprm_security_ops if the process is being traced and its security
+ *	attributes would be changed by the execve.
+ *	@child contains the task_struct structure for the target process.
+ *	@mode contains the PTRACE_MODE flags indicating the form of access.
+ *	Return 0 if permission is granted.
+ * @ptrace_traceme:
+ *	Check that the @parent process has sufficient permission to trace the
+ *	current process before allowing the current process to present itself
+ *	to the @parent process for tracing.
+ *	@parent contains the task_struct structure for debugger process.
+ *	Return 0 if permission is granted.
+ * @capget:
+ *	Get the @effective, @inheritable, and @permitted capability sets for
+ *	the @target process.  The hook may also perform permission checking to
+ *	determine if the current process is allowed to see the capability sets
+ *	of the @target process.
+ *	@target contains the task_struct structure for target process.
+ *	@effective contains the effective capability set.
+ *	@inheritable contains the inheritable capability set.
+ *	@permitted contains the permitted capability set.
+ *	Return 0 if the capability sets were successfully obtained.
+ * @capset:
+ *	Set the @effective, @inheritable, and @permitted capability sets for
+ *	the current process.
+ *	@new contains the new credentials structure for target process.
+ *	@old contains the current credentials structure for target process.
+ *	@effective contains the effective capability set.
+ *	@inheritable contains the inheritable capability set.
+ *	@permitted contains the permitted capability set.
+ *	Return 0 and update @new if permission is granted.
+ * @capable:
+ *	Check whether the @tsk process has the @cap capability in the indicated
+ *	credentials.
+ *	@cred contains the credentials to use.
+ *	@ns contains the user namespace we want the capability in
+ *	@cap contains the capability <include/linux/capability.h>.
+ *	@audit: Whether to write an audit message or not
+ *	Return 0 if the capability is granted for @tsk.
+ * @syslog:
+ *	Check permission before accessing the kernel message ring or changing
+ *	logging to the console.
+ *	See the syslog(2) manual page for an explanation of the @type values.
+ *	@type contains the type of action.
+ *	@from_file indicates the context of action (if it came from /proc).
+ *	Return 0 if permission is granted.
+ * @settime:
+ *	Check permission to change the system time.
+ *	struct timespec and timezone are defined in include/linux/time.h
+ *	@ts contains new time
+ *	@tz contains new timezone
+ *	Return 0 if permission is granted.
+ * @vm_enough_memory:
+ *	Check permissions for allocating a new virtual mapping.
+ *	@mm contains the mm struct it is being added to.
+ *	@pages contains the number of pages.
+ *	Return 0 if permission is granted.
+ *
+ * @ismaclabel:
+ *	Check if the extended attribute specified by @name
+ *	represents a MAC label. Returns 1 if name is a MAC
+ *	attribute otherwise returns 0.
+ *	@name full extended attribute name to check against
+ *	LSM as a MAC label.
+ *
+ * @secid_to_secctx:
+ *	Convert secid to security context.  If secdata is NULL the length of
+ *	the result will be returned in seclen, but no secdata will be returned.
+ *	This does mean that the length could change between calls to check the
+ *	length and the next call which actually allocates and returns the
+ *	secdata.
+ *	@secid contains the security ID.
+ *	@secdata contains the pointer that stores the converted security
+ *	context.
+ *	@seclen pointer which contains the length of the data
+ * @secctx_to_secid:
+ *	Convert security context to secid.
+ *	@secid contains the pointer to the generated security ID.
+ *	@secdata contains the security context.
+ *
+ * @release_secctx:
+ *	Release the security context.
+ *	@secdata contains the security context.
+ *	@seclen contains the length of the security context.
+ *
+ * Security hooks for Audit
+ *
+ * @audit_rule_init:
+ *	Allocate and initialize an LSM audit rule structure.
+ *	@field contains the required Audit action.
+ *	Fields flags are defined in include/linux/audit.h
+ *	@op contains the operator the rule uses.
+ *	@rulestr contains the context where the rule will be applied to.
+ *	@lsmrule contains a pointer to receive the result.
+ *	Return 0 if @lsmrule has been successfully set,
+ *	-EINVAL in case of an invalid rule.
+ *
+ * @audit_rule_known:
+ *	Specifies whether given @rule contains any fields related to
+ *	current LSM.
+ *	@rule contains the audit rule of interest.
+ *	Return 1 in case of relation found, 0 otherwise.
+ *
+ * @audit_rule_match:
+ *	Determine if given @secid matches a rule previously approved
+ *	by @audit_rule_known.
+ *	@secid contains the security id in question.
+ *	@field contains the field which relates to current LSM.
+ *	@op contains the operator that will be used for matching.
+ *	@rule points to the audit rule that will be checked against.
+ *	@actx points to the audit context associated with the check.
+ *	Return 1 if secid matches the rule, 0 if it does not, -ERRNO on failure.
+ *
+ * @audit_rule_free:
+ *	Deallocate the LSM audit rule structure previously allocated by
+ *	audit_rule_init.
+ *	@rule contains the allocated rule
+ *
+ * @inode_notifysecctx:
+ *	Notify the security module of what the security context of an inode
+ *	should be.  Initializes the incore security context managed by the
+ *	security module for this inode.  Example usage:  NFS client invokes
+ *	this hook to initialize the security context in its incore inode to the
+ *	value provided by the server for the file when the server returned the
+ *	file's attributes to the client.
+ *
+ *	Must be called with inode->i_mutex locked.
+ *
+ *	@inode we wish to set the security context of.
+ *	@ctx contains the string which we wish to set in the inode.
+ *	@ctxlen contains the length of @ctx.
+ *
+ * @inode_setsecctx:
+ *	Change the security context of an inode.  Updates the
+ *	incore security context managed by the security module and invokes the
+ *	fs code as needed (via __vfs_setxattr_noperm) to update any backing
+ *	xattrs that represent the context.  Example usage:  NFS server invokes
+ *	this hook to change the security context in its incore inode and on the
+ *	backing filesystem to a value provided by the client on a SETATTR
+ *	operation.
+ *
+ *	Must be called with inode->i_mutex locked.
+ *
+ *	@dentry contains the inode we wish to set the security context of.
+ *	@ctx contains the string which we wish to set in the inode.
+ *	@ctxlen contains the length of @ctx.
+ *
+ * @inode_getsecctx:
+ *	On success, returns 0 and fills out @ctx and @ctxlen with the security
+ *	context for the given @inode.
+ *
+ *	@inode we wish to get the security context of.
+ *	@ctx is a pointer in which to place the allocated security context.
+ *	@ctxlen points to the place to put the length of @ctx.
+ * This is the main security structure.
+ */
+
 struct security_operations {
 	char name[SECURITY_NAME_MAX + 1];
 
-- 
cgit v1.2.3


From 346033a28fb16b83dac2a74d8025ff8ee64a2c9b Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Sat, 2 May 2015 15:11:14 -0700
Subject: LSM: Remove a comment from security.h

Remove the large comment describing the content of the
security_operations structure from security.h. This
wasn't done in the previous (2/7) patch because it
would have exceeded the mail list size limits.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: John Johansen <john.johansen@canonical.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Acked-by:  Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 include/linux/security.h | 1270 ----------------------------------------------
 1 file changed, 1270 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index f3d42c636f27..a2a100e7ac6e 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -186,1276 +186,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
 	opts->num_mnt_opts = 0;
 }
 
-/**
- * struct security_operations - main security structure
- *
- * Security module identifier.
- *
- * @name:
- *	A string that acts as a unique identifier for the LSM with max number
- *	of characters = SECURITY_NAME_MAX.
- *
- * Security hooks for program execution operations.
- *
- * @bprm_set_creds:
- *	Save security information in the bprm->security field, typically based
- *	on information about the bprm->file, for later use by the apply_creds
- *	hook.  This hook may also optionally check permissions (e.g. for
- *	transitions between security domains).
- *	This hook may be called multiple times during a single execve, e.g. for
- *	interpreters.  The hook can tell whether it has already been called by
- *	checking to see if @bprm->security is non-NULL.  If so, then the hook
- *	may decide either to retain the security information saved earlier or
- *	to replace it.
- *	@bprm contains the linux_binprm structure.
- *	Return 0 if the hook is successful and permission is granted.
- * @bprm_check_security:
- *	This hook mediates the point when a search for a binary handler will
- *	begin.  It allows a check the @bprm->security value which is set in the
- *	preceding set_creds call.  The primary difference from set_creds is
- *	that the argv list and envp list are reliably available in @bprm.  This
- *	hook may be called multiple times during a single execve; and in each
- *	pass set_creds is called first.
- *	@bprm contains the linux_binprm structure.
- *	Return 0 if the hook is successful and permission is granted.
- * @bprm_committing_creds:
- *	Prepare to install the new security attributes of a process being
- *	transformed by an execve operation, based on the old credentials
- *	pointed to by @current->cred and the information set in @bprm->cred by
- *	the bprm_set_creds hook.  @bprm points to the linux_binprm structure.
- *	This hook is a good place to perform state changes on the process such
- *	as closing open file descriptors to which access will no longer be
- *	granted when the attributes are changed.  This is called immediately
- *	before commit_creds().
- * @bprm_committed_creds:
- *	Tidy up after the installation of the new security attributes of a
- *	process being transformed by an execve operation.  The new credentials
- *	have, by this point, been set to @current->cred.  @bprm points to the
- *	linux_binprm structure.  This hook is a good place to perform state
- *	changes on the process such as clearing out non-inheritable signal
- *	state.  This is called immediately after commit_creds().
- * @bprm_secureexec:
- *	Return a boolean value (0 or 1) indicating whether a "secure exec"
- *	is required.  The flag is passed in the auxiliary table
- *	on the initial stack to the ELF interpreter to indicate whether libc
- *	should enable secure mode.
- *	@bprm contains the linux_binprm structure.
- *
- * Security hooks for filesystem operations.
- *
- * @sb_alloc_security:
- *	Allocate and attach a security structure to the sb->s_security field.
- *	The s_security field is initialized to NULL when the structure is
- *	allocated.
- *	@sb contains the super_block structure to be modified.
- *	Return 0 if operation was successful.
- * @sb_free_security:
- *	Deallocate and clear the sb->s_security field.
- *	@sb contains the super_block structure to be modified.
- * @sb_statfs:
- *	Check permission before obtaining filesystem statistics for the @mnt
- *	mountpoint.
- *	@dentry is a handle on the superblock for the filesystem.
- *	Return 0 if permission is granted.
- * @sb_mount:
- *	Check permission before an object specified by @dev_name is mounted on
- *	the mount point named by @nd.  For an ordinary mount, @dev_name
- *	identifies a device if the file system type requires a device.  For a
- *	remount (@flags & MS_REMOUNT), @dev_name is irrelevant.  For a
- *	loopback/bind mount (@flags & MS_BIND), @dev_name identifies the
- *	pathname of the object being mounted.
- *	@dev_name contains the name for object being mounted.
- *	@path contains the path for mount point object.
- *	@type contains the filesystem type.
- *	@flags contains the mount flags.
- *	@data contains the filesystem-specific data.
- *	Return 0 if permission is granted.
- * @sb_copy_data:
- *	Allow mount option data to be copied prior to parsing by the filesystem,
- *	so that the security module can extract security-specific mount
- *	options cleanly (a filesystem may modify the data e.g. with strsep()).
- *	This also allows the original mount data to be stripped of security-
- *	specific options to avoid having to make filesystems aware of them.
- *	@type the type of filesystem being mounted.
- *	@orig the original mount data copied from userspace.
- *	@copy copied data which will be passed to the security module.
- *	Returns 0 if the copy was successful.
- * @sb_remount:
- *	Extracts security system specific mount options and verifies no changes
- *	are being made to those options.
- *	@sb superblock being remounted
- *	@data contains the filesystem-specific data.
- *	Return 0 if permission is granted.
- * @sb_umount:
- *	Check permission before the @mnt file system is unmounted.
- *	@mnt contains the mounted file system.
- *	@flags contains the unmount flags, e.g. MNT_FORCE.
- *	Return 0 if permission is granted.
- * @sb_pivotroot:
- *	Check permission before pivoting the root filesystem.
- *	@old_path contains the path for the new location of the current root (put_old).
- *	@new_path contains the path for the new root (new_root).
- *	Return 0 if permission is granted.
- * @sb_set_mnt_opts:
- *	Set the security relevant mount options used for a superblock
- *	@sb the superblock to set security mount options for
- *	@opts binary data structure containing all lsm mount data
- * @sb_clone_mnt_opts:
- *	Copy all security options from a given superblock to another
- *	@oldsb old superblock which contain information to clone
- *	@newsb new superblock which needs filled in
- * @sb_parse_opts_str:
- *	Parse a string of security data filling in the opts structure
- *	@options string containing all mount options known by the LSM
- *	@opts binary data structure usable by the LSM
- * @dentry_init_security:
- *	Compute a context for a dentry as the inode is not yet available
- *	since NFSv4 has no label backed by an EA anyway.
- *	@dentry dentry to use in calculating the context.
- *	@mode mode used to determine resource type.
- *	@name name of the last path component used to create file
- *	@ctx pointer to place the pointer to the resulting context in.
- *	@ctxlen point to place the length of the resulting context.
- *
- *
- * Security hooks for inode operations.
- *
- * @inode_alloc_security:
- *	Allocate and attach a security structure to @inode->i_security.  The
- *	i_security field is initialized to NULL when the inode structure is
- *	allocated.
- *	@inode contains the inode structure.
- *	Return 0 if operation was successful.
- * @inode_free_security:
- *	@inode contains the inode structure.
- *	Deallocate the inode security structure and set @inode->i_security to
- *	NULL.
- * @inode_init_security:
- *	Obtain the security attribute name suffix and value to set on a newly
- *	created inode and set up the incore security field for the new inode.
- *	This hook is called by the fs code as part of the inode creation
- *	transaction and provides for atomic labeling of the inode, unlike
- *	the post_create/mkdir/... hooks called by the VFS.  The hook function
- *	is expected to allocate the name and value via kmalloc, with the caller
- *	being responsible for calling kfree after using them.
- *	If the security module does not use security attributes or does
- *	not wish to put a security attribute on this particular inode,
- *	then it should return -EOPNOTSUPP to skip this processing.
- *	@inode contains the inode structure of the newly created inode.
- *	@dir contains the inode structure of the parent directory.
- *	@qstr contains the last path component of the new object
- *	@name will be set to the allocated name suffix (e.g. selinux).
- *	@value will be set to the allocated attribute value.
- *	@len will be set to the length of the value.
- *	Returns 0 if @name and @value have been successfully set,
- *		-EOPNOTSUPP if no security attribute is needed, or
- *		-ENOMEM on memory allocation failure.
- * @inode_create:
- *	Check permission to create a regular file.
- *	@dir contains inode structure of the parent of the new file.
- *	@dentry contains the dentry structure for the file to be created.
- *	@mode contains the file mode of the file to be created.
- *	Return 0 if permission is granted.
- * @inode_link:
- *	Check permission before creating a new hard link to a file.
- *	@old_dentry contains the dentry structure for an existing link to the file.
- *	@dir contains the inode structure of the parent directory of the new link.
- *	@new_dentry contains the dentry structure for the new link.
- *	Return 0 if permission is granted.
- * @path_link:
- *	Check permission before creating a new hard link to a file.
- *	@old_dentry contains the dentry structure for an existing link
- *	to the file.
- *	@new_dir contains the path structure of the parent directory of
- *	the new link.
- *	@new_dentry contains the dentry structure for the new link.
- *	Return 0 if permission is granted.
- * @inode_unlink:
- *	Check the permission to remove a hard link to a file.
- *	@dir contains the inode structure of parent directory of the file.
- *	@dentry contains the dentry structure for file to be unlinked.
- *	Return 0 if permission is granted.
- * @path_unlink:
- *	Check the permission to remove a hard link to a file.
- *	@dir contains the path structure of parent directory of the file.
- *	@dentry contains the dentry structure for file to be unlinked.
- *	Return 0 if permission is granted.
- * @inode_symlink:
- *	Check the permission to create a symbolic link to a file.
- *	@dir contains the inode structure of parent directory of the symbolic link.
- *	@dentry contains the dentry structure of the symbolic link.
- *	@old_name contains the pathname of file.
- *	Return 0 if permission is granted.
- * @path_symlink:
- *	Check the permission to create a symbolic link to a file.
- *	@dir contains the path structure of parent directory of
- *	the symbolic link.
- *	@dentry contains the dentry structure of the symbolic link.
- *	@old_name contains the pathname of file.
- *	Return 0 if permission is granted.
- * @inode_mkdir:
- *	Check permissions to create a new directory in the existing directory
- *	associated with inode structure @dir.
- *	@dir contains the inode structure of parent of the directory to be created.
- *	@dentry contains the dentry structure of new directory.
- *	@mode contains the mode of new directory.
- *	Return 0 if permission is granted.
- * @path_mkdir:
- *	Check permissions to create a new directory in the existing directory
- *	associated with path structure @path.
- *	@dir contains the path structure of parent of the directory
- *	to be created.
- *	@dentry contains the dentry structure of new directory.
- *	@mode contains the mode of new directory.
- *	Return 0 if permission is granted.
- * @inode_rmdir:
- *	Check the permission to remove a directory.
- *	@dir contains the inode structure of parent of the directory to be removed.
- *	@dentry contains the dentry structure of directory to be removed.
- *	Return 0 if permission is granted.
- * @path_rmdir:
- *	Check the permission to remove a directory.
- *	@dir contains the path structure of parent of the directory to be
- *	removed.
- *	@dentry contains the dentry structure of directory to be removed.
- *	Return 0 if permission is granted.
- * @inode_mknod:
- *	Check permissions when creating a special file (or a socket or a fifo
- *	file created via the mknod system call).  Note that if mknod operation
- *	is being done for a regular file, then the create hook will be called
- *	and not this hook.
- *	@dir contains the inode structure of parent of the new file.
- *	@dentry contains the dentry structure of the new file.
- *	@mode contains the mode of the new file.
- *	@dev contains the device number.
- *	Return 0 if permission is granted.
- * @path_mknod:
- *	Check permissions when creating a file. Note that this hook is called
- *	even if mknod operation is being done for a regular file.
- *	@dir contains the path structure of parent of the new file.
- *	@dentry contains the dentry structure of the new file.
- *	@mode contains the mode of the new file.
- *	@dev contains the undecoded device number. Use new_decode_dev() to get
- *	the decoded device number.
- *	Return 0 if permission is granted.
- * @inode_rename:
- *	Check for permission to rename a file or directory.
- *	@old_dir contains the inode structure for parent of the old link.
- *	@old_dentry contains the dentry structure of the old link.
- *	@new_dir contains the inode structure for parent of the new link.
- *	@new_dentry contains the dentry structure of the new link.
- *	Return 0 if permission is granted.
- * @path_rename:
- *	Check for permission to rename a file or directory.
- *	@old_dir contains the path structure for parent of the old link.
- *	@old_dentry contains the dentry structure of the old link.
- *	@new_dir contains the path structure for parent of the new link.
- *	@new_dentry contains the dentry structure of the new link.
- *	Return 0 if permission is granted.
- * @path_chmod:
- *	Check for permission to change DAC's permission of a file or directory.
- *	@dentry contains the dentry structure.
- *	@mnt contains the vfsmnt structure.
- *	@mode contains DAC's mode.
- *	Return 0 if permission is granted.
- * @path_chown:
- *	Check for permission to change owner/group of a file or directory.
- *	@path contains the path structure.
- *	@uid contains new owner's ID.
- *	@gid contains new group's ID.
- *	Return 0 if permission is granted.
- * @path_chroot:
- *	Check for permission to change root directory.
- *	@path contains the path structure.
- *	Return 0 if permission is granted.
- * @inode_readlink:
- *	Check the permission to read the symbolic link.
- *	@dentry contains the dentry structure for the file link.
- *	Return 0 if permission is granted.
- * @inode_follow_link:
- *	Check permission to follow a symbolic link when looking up a pathname.
- *	@dentry contains the dentry structure for the link.
- *	@nd contains the nameidata structure for the parent directory.
- *	Return 0 if permission is granted.
- * @inode_permission:
- *	Check permission before accessing an inode.  This hook is called by the
- *	existing Linux permission function, so a security module can use it to
- *	provide additional checking for existing Linux permission checks.
- *	Notice that this hook is called when a file is opened (as well as many
- *	other operations), whereas the file_security_ops permission hook is
- *	called when the actual read/write operations are performed.
- *	@inode contains the inode structure to check.
- *	@mask contains the permission mask.
- *	Return 0 if permission is granted.
- * @inode_setattr:
- *	Check permission before setting file attributes.  Note that the kernel
- *	call to notify_change is performed from several locations, whenever
- *	file attributes change (such as when a file is truncated, chown/chmod
- *	operations, transferring disk quotas, etc).
- *	@dentry contains the dentry structure for the file.
- *	@attr is the iattr structure containing the new file attributes.
- *	Return 0 if permission is granted.
- * @path_truncate:
- *	Check permission before truncating a file.
- *	@path contains the path structure for the file.
- *	Return 0 if permission is granted.
- * @inode_getattr:
- *	Check permission before obtaining file attributes.
- *	@mnt is the vfsmount where the dentry was looked up
- *	@dentry contains the dentry structure for the file.
- *	Return 0 if permission is granted.
- * @inode_setxattr:
- *	Check permission before setting the extended attributes
- *	@value identified by @name for @dentry.
- *	Return 0 if permission is granted.
- * @inode_post_setxattr:
- *	Update inode security field after successful setxattr operation.
- *	@value identified by @name for @dentry.
- * @inode_getxattr:
- *	Check permission before obtaining the extended attributes
- *	identified by @name for @dentry.
- *	Return 0 if permission is granted.
- * @inode_listxattr:
- *	Check permission before obtaining the list of extended attribute
- *	names for @dentry.
- *	Return 0 if permission is granted.
- * @inode_removexattr:
- *	Check permission before removing the extended attribute
- *	identified by @name for @dentry.
- *	Return 0 if permission is granted.
- * @inode_getsecurity:
- *	Retrieve a copy of the extended attribute representation of the
- *	security label associated with @name for @inode via @buffer.  Note that
- *	@name is the remainder of the attribute name after the security prefix
- *	has been removed. @alloc is used to specify of the call should return a
- *	value via the buffer or just the value length Return size of buffer on
- *	success.
- * @inode_setsecurity:
- *	Set the security label associated with @name for @inode from the
- *	extended attribute value @value.  @size indicates the size of the
- *	@value in bytes.  @flags may be XATTR_CREATE, XATTR_REPLACE, or 0.
- *	Note that @name is the remainder of the attribute name after the
- *	security. prefix has been removed.
- *	Return 0 on success.
- * @inode_listsecurity:
- *	Copy the extended attribute names for the security labels
- *	associated with @inode into @buffer.  The maximum size of @buffer
- *	is specified by @buffer_size.  @buffer may be NULL to request
- *	the size of the buffer required.
- *	Returns number of bytes used/required on success.
- * @inode_need_killpriv:
- *	Called when an inode has been changed.
- *	@dentry is the dentry being changed.
- *	Return <0 on error to abort the inode change operation.
- *	Return 0 if inode_killpriv does not need to be called.
- *	Return >0 if inode_killpriv does need to be called.
- * @inode_killpriv:
- *	The setuid bit is being removed.  Remove similar security labels.
- *	Called with the dentry->d_inode->i_mutex held.
- *	@dentry is the dentry being changed.
- *	Return 0 on success.  If error is returned, then the operation
- *	causing setuid bit removal is failed.
- * @inode_getsecid:
- *	Get the secid associated with the node.
- *	@inode contains a pointer to the inode.
- *	@secid contains a pointer to the location where result will be saved.
- *	In case of failure, @secid will be set to zero.
- *
- * Security hooks for file operations
- *
- * @file_permission:
- *	Check file permissions before accessing an open file.  This hook is
- *	called by various operations that read or write files.  A security
- *	module can use this hook to perform additional checking on these
- *	operations, e.g.  to revalidate permissions on use to support privilege
- *	bracketing or policy changes.  Notice that this hook is used when the
- *	actual read/write operations are performed, whereas the
- *	inode_security_ops hook is called when a file is opened (as well as
- *	many other operations).
- *	Caveat:  Although this hook can be used to revalidate permissions for
- *	various system call operations that read or write files, it does not
- *	address the revalidation of permissions for memory-mapped files.
- *	Security modules must handle this separately if they need such
- *	revalidation.
- *	@file contains the file structure being accessed.
- *	@mask contains the requested permissions.
- *	Return 0 if permission is granted.
- * @file_alloc_security:
- *	Allocate and attach a security structure to the file->f_security field.
- *	The security field is initialized to NULL when the structure is first
- *	created.
- *	@file contains the file structure to secure.
- *	Return 0 if the hook is successful and permission is granted.
- * @file_free_security:
- *	Deallocate and free any security structures stored in file->f_security.
- *	@file contains the file structure being modified.
- * @file_ioctl:
- *	@file contains the file structure.
- *	@cmd contains the operation to perform.
- *	@arg contains the operational arguments.
- *	Check permission for an ioctl operation on @file.  Note that @arg
- *	sometimes represents a user space pointer; in other cases, it may be a
- *	simple integer value.  When @arg represents a user space pointer, it
- *	should never be used by the security module.
- *	Return 0 if permission is granted.
- * @mmap_addr :
- *	Check permissions for a mmap operation at @addr.
- *	@addr contains virtual address that will be used for the operation.
- *	Return 0 if permission is granted.
- * @mmap_file :
- *	Check permissions for a mmap operation.  The @file may be NULL, e.g.
- *	if mapping anonymous memory.
- *	@file contains the file structure for file to map (may be NULL).
- *	@reqprot contains the protection requested by the application.
- *	@prot contains the protection that will be applied by the kernel.
- *	@flags contains the operational flags.
- *	Return 0 if permission is granted.
- * @file_mprotect:
- *	Check permissions before changing memory access permissions.
- *	@vma contains the memory region to modify.
- *	@reqprot contains the protection requested by the application.
- *	@prot contains the protection that will be applied by the kernel.
- *	Return 0 if permission is granted.
- * @file_lock:
- *	Check permission before performing file locking operations.
- *	Note: this hook mediates both flock and fcntl style locks.
- *	@file contains the file structure.
- *	@cmd contains the posix-translated lock operation to perform
- *	(e.g. F_RDLCK, F_WRLCK).
- *	Return 0 if permission is granted.
- * @file_fcntl:
- *	Check permission before allowing the file operation specified by @cmd
- *	from being performed on the file @file.  Note that @arg sometimes
- *	represents a user space pointer; in other cases, it may be a simple
- *	integer value.  When @arg represents a user space pointer, it should
- *	never be used by the security module.
- *	@file contains the file structure.
- *	@cmd contains the operation to be performed.
- *	@arg contains the operational arguments.
- *	Return 0 if permission is granted.
- * @file_set_fowner:
- *	Save owner security information (typically from current->security) in
- *	file->f_security for later use by the send_sigiotask hook.
- *	@file contains the file structure to update.
- *	Return 0 on success.
- * @file_send_sigiotask:
- *	Check permission for the file owner @fown to send SIGIO or SIGURG to the
- *	process @tsk.  Note that this hook is sometimes called from interrupt.
- *	Note that the fown_struct, @fown, is never outside the context of a
- *	struct file, so the file structure (and associated security information)
- *	can always be obtained:
- *		container_of(fown, struct file, f_owner)
- *	@tsk contains the structure of task receiving signal.
- *	@fown contains the file owner information.
- *	@sig is the signal that will be sent.  When 0, kernel sends SIGIO.
- *	Return 0 if permission is granted.
- * @file_receive:
- *	This hook allows security modules to control the ability of a process
- *	to receive an open file descriptor via socket IPC.
- *	@file contains the file structure being received.
- *	Return 0 if permission is granted.
- * @file_open
- *	Save open-time permission checking state for later use upon
- *	file_permission, and recheck access if anything has changed
- *	since inode_permission.
- *
- * Security hooks for task operations.
- *
- * @task_create:
- *	Check permission before creating a child process.  See the clone(2)
- *	manual page for definitions of the @clone_flags.
- *	@clone_flags contains the flags indicating what should be shared.
- *	Return 0 if permission is granted.
- * @task_free:
- *	@task task being freed
- *	Handle release of task-related resources. (Note that this can be called
- *	from interrupt context.)
- * @cred_alloc_blank:
- *	@cred points to the credentials.
- *	@gfp indicates the atomicity of any memory allocations.
- *	Only allocate sufficient memory and attach to @cred such that
- *	cred_transfer() will not get ENOMEM.
- * @cred_free:
- *	@cred points to the credentials.
- *	Deallocate and clear the cred->security field in a set of credentials.
- * @cred_prepare:
- *	@new points to the new credentials.
- *	@old points to the original credentials.
- *	@gfp indicates the atomicity of any memory allocations.
- *	Prepare a new set of credentials by copying the data from the old set.
- * @cred_transfer:
- *	@new points to the new credentials.
- *	@old points to the original credentials.
- *	Transfer data from original creds to new creds
- * @kernel_act_as:
- *	Set the credentials for a kernel service to act as (subjective context).
- *	@new points to the credentials to be modified.
- *	@secid specifies the security ID to be set
- *	The current task must be the one that nominated @secid.
- *	Return 0 if successful.
- * @kernel_create_files_as:
- *	Set the file creation context in a set of credentials to be the same as
- *	the objective context of the specified inode.
- *	@new points to the credentials to be modified.
- *	@inode points to the inode to use as a reference.
- *	The current task must be the one that nominated @inode.
- *	Return 0 if successful.
- * @kernel_fw_from_file:
- *	Load firmware from userspace (not called for built-in firmware).
- *	@file contains the file structure pointing to the file containing
- *	the firmware to load. This argument will be NULL if the firmware
- *	was loaded via the uevent-triggered blob-based interface exposed
- *	by CONFIG_FW_LOADER_USER_HELPER.
- *	@buf pointer to buffer containing firmware contents.
- *	@size length of the firmware contents.
- *	Return 0 if permission is granted.
- * @kernel_module_request:
- *	Ability to trigger the kernel to automatically upcall to userspace for
- *	userspace to load a kernel module with the given name.
- *	@kmod_name name of the module requested by the kernel
- *	Return 0 if successful.
- * @kernel_module_from_file:
- *	Load a kernel module from userspace.
- *	@file contains the file structure pointing to the file containing
- *	the kernel module to load. If the module is being loaded from a blob,
- *	this argument will be NULL.
- *	Return 0 if permission is granted.
- * @task_fix_setuid:
- *	Update the module's state after setting one or more of the user
- *	identity attributes of the current process.  The @flags parameter
- *	indicates which of the set*uid system calls invoked this hook.  If
- *	@new is the set of credentials that will be installed.  Modifications
- *	should be made to this rather than to @current->cred.
- *	@old is the set of credentials that are being replaces
- *	@flags contains one of the LSM_SETID_* values.
- *	Return 0 on success.
- * @task_setpgid:
- *	Check permission before setting the process group identifier of the
- *	process @p to @pgid.
- *	@p contains the task_struct for process being modified.
- *	@pgid contains the new pgid.
- *	Return 0 if permission is granted.
- * @task_getpgid:
- *	Check permission before getting the process group identifier of the
- *	process @p.
- *	@p contains the task_struct for the process.
- *	Return 0 if permission is granted.
- * @task_getsid:
- *	Check permission before getting the session identifier of the process
- *	@p.
- *	@p contains the task_struct for the process.
- *	Return 0 if permission is granted.
- * @task_getsecid:
- *	Retrieve the security identifier of the process @p.
- *	@p contains the task_struct for the process and place is into @secid.
- *	In case of failure, @secid will be set to zero.
- *
- * @task_setnice:
- *	Check permission before setting the nice value of @p to @nice.
- *	@p contains the task_struct of process.
- *	@nice contains the new nice value.
- *	Return 0 if permission is granted.
- * @task_setioprio
- *	Check permission before setting the ioprio value of @p to @ioprio.
- *	@p contains the task_struct of process.
- *	@ioprio contains the new ioprio value
- *	Return 0 if permission is granted.
- * @task_getioprio
- *	Check permission before getting the ioprio value of @p.
- *	@p contains the task_struct of process.
- *	Return 0 if permission is granted.
- * @task_setrlimit:
- *	Check permission before setting the resource limits of the current
- *	process for @resource to @new_rlim.  The old resource limit values can
- *	be examined by dereferencing (current->signal->rlim + resource).
- *	@resource contains the resource whose limit is being set.
- *	@new_rlim contains the new limits for @resource.
- *	Return 0 if permission is granted.
- * @task_setscheduler:
- *	Check permission before setting scheduling policy and/or parameters of
- *	process @p based on @policy and @lp.
- *	@p contains the task_struct for process.
- *	@policy contains the scheduling policy.
- *	@lp contains the scheduling parameters.
- *	Return 0 if permission is granted.
- * @task_getscheduler:
- *	Check permission before obtaining scheduling information for process
- *	@p.
- *	@p contains the task_struct for process.
- *	Return 0 if permission is granted.
- * @task_movememory
- *	Check permission before moving memory owned by process @p.
- *	@p contains the task_struct for process.
- *	Return 0 if permission is granted.
- * @task_kill:
- *	Check permission before sending signal @sig to @p.  @info can be NULL,
- *	the constant 1, or a pointer to a siginfo structure.  If @info is 1 or
- *	SI_FROMKERNEL(info) is true, then the signal should be viewed as coming
- *	from the kernel and should typically be permitted.
- *	SIGIO signals are handled separately by the send_sigiotask hook in
- *	file_security_ops.
- *	@p contains the task_struct for process.
- *	@info contains the signal information.
- *	@sig contains the signal value.
- *	@secid contains the sid of the process where the signal originated
- *	Return 0 if permission is granted.
- * @task_wait:
- *	Check permission before allowing a process to reap a child process @p
- *	and collect its status information.
- *	@p contains the task_struct for process.
- *	Return 0 if permission is granted.
- * @task_prctl:
- *	Check permission before performing a process control operation on the
- *	current process.
- *	@option contains the operation.
- *	@arg2 contains a argument.
- *	@arg3 contains a argument.
- *	@arg4 contains a argument.
- *	@arg5 contains a argument.
- *	Return -ENOSYS if no-one wanted to handle this op, any other value to
- *	cause prctl() to return immediately with that value.
- * @task_to_inode:
- *	Set the security attributes for an inode based on an associated task's
- *	security attributes, e.g. for /proc/pid inodes.
- *	@p contains the task_struct for the task.
- *	@inode contains the inode structure for the inode.
- *
- * Security hooks for Netlink messaging.
- *
- * @netlink_send:
- *	Save security information for a netlink message so that permission
- *	checking can be performed when the message is processed.  The security
- *	information can be saved using the eff_cap field of the
- *	netlink_skb_parms structure.  Also may be used to provide fine
- *	grained control over message transmission.
- *	@sk associated sock of task sending the message.
- *	@skb contains the sk_buff structure for the netlink message.
- *	Return 0 if the information was successfully saved and message
- *	is allowed to be transmitted.
- *
- * Security hooks for Unix domain networking.
- *
- * @unix_stream_connect:
- *	Check permissions before establishing a Unix domain stream connection
- *	between @sock and @other.
- *	@sock contains the sock structure.
- *	@other contains the peer sock structure.
- *	@newsk contains the new sock structure.
- *	Return 0 if permission is granted.
- * @unix_may_send:
- *	Check permissions before connecting or sending datagrams from @sock to
- *	@other.
- *	@sock contains the socket structure.
- *	@other contains the peer socket structure.
- *	Return 0 if permission is granted.
- *
- * The @unix_stream_connect and @unix_may_send hooks were necessary because
- * Linux provides an alternative to the conventional file name space for Unix
- * domain sockets.  Whereas binding and connecting to sockets in the file name
- * space is mediated by the typical file permissions (and caught by the mknod
- * and permission hooks in inode_security_ops), binding and connecting to
- * sockets in the abstract name space is completely unmediated.  Sufficient
- * control of Unix domain sockets in the abstract name space isn't possible
- * using only the socket layer hooks, since we need to know the actual target
- * socket, which is not looked up until we are inside the af_unix code.
- *
- * Security hooks for socket operations.
- *
- * @socket_create:
- *	Check permissions prior to creating a new socket.
- *	@family contains the requested protocol family.
- *	@type contains the requested communications type.
- *	@protocol contains the requested protocol.
- *	@kern set to 1 if a kernel socket.
- *	Return 0 if permission is granted.
- * @socket_post_create:
- *	This hook allows a module to update or allocate a per-socket security
- *	structure. Note that the security field was not added directly to the
- *	socket structure, but rather, the socket security information is stored
- *	in the associated inode.  Typically, the inode alloc_security hook will
- *	allocate and and attach security information to
- *	sock->inode->i_security.  This hook may be used to update the
- *	sock->inode->i_security field with additional information that wasn't
- *	available when the inode was allocated.
- *	@sock contains the newly created socket structure.
- *	@family contains the requested protocol family.
- *	@type contains the requested communications type.
- *	@protocol contains the requested protocol.
- *	@kern set to 1 if a kernel socket.
- * @socket_bind:
- *	Check permission before socket protocol layer bind operation is
- *	performed and the socket @sock is bound to the address specified in the
- *	@address parameter.
- *	@sock contains the socket structure.
- *	@address contains the address to bind to.
- *	@addrlen contains the length of address.
- *	Return 0 if permission is granted.
- * @socket_connect:
- *	Check permission before socket protocol layer connect operation
- *	attempts to connect socket @sock to a remote address, @address.
- *	@sock contains the socket structure.
- *	@address contains the address of remote endpoint.
- *	@addrlen contains the length of address.
- *	Return 0 if permission is granted.
- * @socket_listen:
- *	Check permission before socket protocol layer listen operation.
- *	@sock contains the socket structure.
- *	@backlog contains the maximum length for the pending connection queue.
- *	Return 0 if permission is granted.
- * @socket_accept:
- *	Check permission before accepting a new connection.  Note that the new
- *	socket, @newsock, has been created and some information copied to it,
- *	but the accept operation has not actually been performed.
- *	@sock contains the listening socket structure.
- *	@newsock contains the newly created server socket for connection.
- *	Return 0 if permission is granted.
- * @socket_sendmsg:
- *	Check permission before transmitting a message to another socket.
- *	@sock contains the socket structure.
- *	@msg contains the message to be transmitted.
- *	@size contains the size of message.
- *	Return 0 if permission is granted.
- * @socket_recvmsg:
- *	Check permission before receiving a message from a socket.
- *	@sock contains the socket structure.
- *	@msg contains the message structure.
- *	@size contains the size of message structure.
- *	@flags contains the operational flags.
- *	Return 0 if permission is granted.
- * @socket_getsockname:
- *	Check permission before the local address (name) of the socket object
- *	@sock is retrieved.
- *	@sock contains the socket structure.
- *	Return 0 if permission is granted.
- * @socket_getpeername:
- *	Check permission before the remote address (name) of a socket object
- *	@sock is retrieved.
- *	@sock contains the socket structure.
- *	Return 0 if permission is granted.
- * @socket_getsockopt:
- *	Check permissions before retrieving the options associated with socket
- *	@sock.
- *	@sock contains the socket structure.
- *	@level contains the protocol level to retrieve option from.
- *	@optname contains the name of option to retrieve.
- *	Return 0 if permission is granted.
- * @socket_setsockopt:
- *	Check permissions before setting the options associated with socket
- *	@sock.
- *	@sock contains the socket structure.
- *	@level contains the protocol level to set options for.
- *	@optname contains the name of the option to set.
- *	Return 0 if permission is granted.
- * @socket_shutdown:
- *	Checks permission before all or part of a connection on the socket
- *	@sock is shut down.
- *	@sock contains the socket structure.
- *	@how contains the flag indicating how future sends and receives are handled.
- *	Return 0 if permission is granted.
- * @socket_sock_rcv_skb:
- *	Check permissions on incoming network packets.  This hook is distinct
- *	from Netfilter's IP input hooks since it is the first time that the
- *	incoming sk_buff @skb has been associated with a particular socket, @sk.
- *	Must not sleep inside this hook because some callers hold spinlocks.
- *	@sk contains the sock (not socket) associated with the incoming sk_buff.
- *	@skb contains the incoming network data.
- * @socket_getpeersec_stream:
- *	This hook allows the security module to provide peer socket security
- *	state for unix or connected tcp sockets to userspace via getsockopt
- *	SO_GETPEERSEC.  For tcp sockets this can be meaningful if the
- *	socket is associated with an ipsec SA.
- *	@sock is the local socket.
- *	@optval userspace memory where the security state is to be copied.
- *	@optlen userspace int where the module should copy the actual length
- *	of the security state.
- *	@len as input is the maximum length to copy to userspace provided
- *	by the caller.
- *	Return 0 if all is well, otherwise, typical getsockopt return
- *	values.
- * @socket_getpeersec_dgram:
- *	This hook allows the security module to provide peer socket security
- *	state for udp sockets on a per-packet basis to userspace via
- *	getsockopt SO_GETPEERSEC.  The application must first have indicated
- *	the IP_PASSSEC option via getsockopt.  It can then retrieve the
- *	security state returned by this hook for a packet via the SCM_SECURITY
- *	ancillary message type.
- *	@skb is the skbuff for the packet being queried
- *	@secdata is a pointer to a buffer in which to copy the security data
- *	@seclen is the maximum length for @secdata
- *	Return 0 on success, error on failure.
- * @sk_alloc_security:
- *	Allocate and attach a security structure to the sk->sk_security field,
- *	which is used to copy security attributes between local stream sockets.
- * @sk_free_security:
- *	Deallocate security structure.
- * @sk_clone_security:
- *	Clone/copy security structure.
- * @sk_getsecid:
- *	Retrieve the LSM-specific secid for the sock to enable caching of network
- *	authorizations.
- * @sock_graft:
- *	Sets the socket's isec sid to the sock's sid.
- * @inet_conn_request:
- *	Sets the openreq's sid to socket's sid with MLS portion taken from peer sid.
- * @inet_csk_clone:
- *	Sets the new child socket's sid to the openreq sid.
- * @inet_conn_established:
- *	Sets the connection's peersid to the secmark on skb.
- * @secmark_relabel_packet:
- *	check if the process should be allowed to relabel packets to the given secid
- * @security_secmark_refcount_inc
- *	tells the LSM to increment the number of secmark labeling rules loaded
- * @security_secmark_refcount_dec
- *	tells the LSM to decrement the number of secmark labeling rules loaded
- * @req_classify_flow:
- *	Sets the flow's sid to the openreq sid.
- * @tun_dev_alloc_security:
- *	This hook allows a module to allocate a security structure for a TUN
- *	device.
- *	@security pointer to a security structure pointer.
- *	Returns a zero on success, negative values on failure.
- * @tun_dev_free_security:
- *	This hook allows a module to free the security structure for a TUN
- *	device.
- *	@security pointer to the TUN device's security structure
- * @tun_dev_create:
- *	Check permissions prior to creating a new TUN device.
- * @tun_dev_attach_queue:
- *	Check permissions prior to attaching to a TUN device queue.
- *	@security pointer to the TUN device's security structure.
- * @tun_dev_attach:
- *	This hook can be used by the module to update any security state
- *	associated with the TUN device's sock structure.
- *	@sk contains the existing sock structure.
- *	@security pointer to the TUN device's security structure.
- * @tun_dev_open:
- *	This hook can be used by the module to update any security state
- *	associated with the TUN device's security structure.
- *	@security pointer to the TUN devices's security structure.
- * @skb_owned_by:
- *	This hook sets the packet's owning sock.
- *	@skb is the packet.
- *	@sk the sock which owns the packet.
- *
- * Security hooks for XFRM operations.
- *
- * @xfrm_policy_alloc_security:
- *	@ctxp is a pointer to the xfrm_sec_ctx being added to Security Policy
- *	Database used by the XFRM system.
- *	@sec_ctx contains the security context information being provided by
- *	the user-level policy update program (e.g., setkey).
- *	Allocate a security structure to the xp->security field; the security
- *	field is initialized to NULL when the xfrm_policy is allocated.
- *	Return 0 if operation was successful (memory to allocate, legal context)
- *	@gfp is to specify the context for the allocation
- * @xfrm_policy_clone_security:
- *	@old_ctx contains an existing xfrm_sec_ctx.
- *	@new_ctxp contains a new xfrm_sec_ctx being cloned from old.
- *	Allocate a security structure in new_ctxp that contains the
- *	information from the old_ctx structure.
- *	Return 0 if operation was successful (memory to allocate).
- * @xfrm_policy_free_security:
- *	@ctx contains the xfrm_sec_ctx
- *	Deallocate xp->security.
- * @xfrm_policy_delete_security:
- *	@ctx contains the xfrm_sec_ctx.
- *	Authorize deletion of xp->security.
- * @xfrm_state_alloc:
- *	@x contains the xfrm_state being added to the Security Association
- *	Database by the XFRM system.
- *	@sec_ctx contains the security context information being provided by
- *	the user-level SA generation program (e.g., setkey or racoon).
- *	Allocate a security structure to the x->security field; the security
- *	field is initialized to NULL when the xfrm_state is allocated. Set the
- *	context to correspond to sec_ctx. Return 0 if operation was successful
- *	(memory to allocate, legal context).
- * @xfrm_state_alloc_acquire:
- *	@x contains the xfrm_state being added to the Security Association
- *	Database by the XFRM system.
- *	@polsec contains the policy's security context.
- *	@secid contains the secid from which to take the mls portion of the
- *	context.
- *	Allocate a security structure to the x->security field; the security
- *	field is initialized to NULL when the xfrm_state is allocated. Set the
- *	context to correspond to secid. Return 0 if operation was successful
- *	(memory to allocate, legal context).
- * @xfrm_state_free_security:
- *	@x contains the xfrm_state.
- *	Deallocate x->security.
- * @xfrm_state_delete_security:
- *	@x contains the xfrm_state.
- *	Authorize deletion of x->security.
- * @xfrm_policy_lookup:
- *	@ctx contains the xfrm_sec_ctx for which the access control is being
- *	checked.
- *	@fl_secid contains the flow security label that is used to authorize
- *	access to the policy xp.
- *	@dir contains the direction of the flow (input or output).
- *	Check permission when a flow selects a xfrm_policy for processing
- *	XFRMs on a packet.  The hook is called when selecting either a
- *	per-socket policy or a generic xfrm policy.
- *	Return 0 if permission is granted, -ESRCH otherwise, or -errno
- *	on other errors.
- * @xfrm_state_pol_flow_match:
- *	@x contains the state to match.
- *	@xp contains the policy to check for a match.
- *	@fl contains the flow to check for a match.
- *	Return 1 if there is a match.
- * @xfrm_decode_session:
- *	@skb points to skb to decode.
- *	@secid points to the flow key secid to set.
- *	@ckall says if all xfrms used should be checked for same secid.
- *	Return 0 if ckall is zero or all xfrms used have the same secid.
- *
- * Security hooks affecting all Key Management operations
- *
- * @key_alloc:
- *	Permit allocation of a key and assign security data. Note that key does
- *	not have a serial number assigned at this point.
- *	@key points to the key.
- *	@flags is the allocation flags
- *	Return 0 if permission is granted, -ve error otherwise.
- * @key_free:
- *	Notification of destruction; free security data.
- *	@key points to the key.
- *	No return value.
- * @key_permission:
- *	See whether a specific operational right is granted to a process on a
- *	key.
- *	@key_ref refers to the key (key pointer + possession attribute bit).
- *	@cred points to the credentials to provide the context against which to
- *	evaluate the security data on the key.
- *	@perm describes the combination of permissions required of this key.
- *	Return 0 if permission is granted, -ve error otherwise.
- * @key_getsecurity:
- *	Get a textual representation of the security context attached to a key
- *	for the purposes of honouring KEYCTL_GETSECURITY.  This function
- *	allocates the storage for the NUL-terminated string and the caller
- *	should free it.
- *	@key points to the key to be queried.
- *	@_buffer points to a pointer that should be set to point to the
- *	resulting string (if no label or an error occurs).
- *	Return the length of the string (including terminating NUL) or -ve if
- *	an error.
- *	May also return 0 (and a NULL buffer pointer) if there is no label.
- *
- * Security hooks affecting all System V IPC operations.
- *
- * @ipc_permission:
- *	Check permissions for access to IPC
- *	@ipcp contains the kernel IPC permission structure
- *	@flag contains the desired (requested) permission set
- *	Return 0 if permission is granted.
- * @ipc_getsecid:
- *	Get the secid associated with the ipc object.
- *	@ipcp contains the kernel IPC permission structure.
- *	@secid contains a pointer to the location where result will be saved.
- *	In case of failure, @secid will be set to zero.
- *
- * Security hooks for individual messages held in System V IPC message queues
- * @msg_msg_alloc_security:
- *	Allocate and attach a security structure to the msg->security field.
- *	The security field is initialized to NULL when the structure is first
- *	created.
- *	@msg contains the message structure to be modified.
- *	Return 0 if operation was successful and permission is granted.
- * @msg_msg_free_security:
- *	Deallocate the security structure for this message.
- *	@msg contains the message structure to be modified.
- *
- * Security hooks for System V IPC Message Queues
- *
- * @msg_queue_alloc_security:
- *	Allocate and attach a security structure to the
- *	msq->q_perm.security field. The security field is initialized to
- *	NULL when the structure is first created.
- *	@msq contains the message queue structure to be modified.
- *	Return 0 if operation was successful and permission is granted.
- * @msg_queue_free_security:
- *	Deallocate security structure for this message queue.
- *	@msq contains the message queue structure to be modified.
- * @msg_queue_associate:
- *	Check permission when a message queue is requested through the
- *	msgget system call.  This hook is only called when returning the
- *	message queue identifier for an existing message queue, not when a
- *	new message queue is created.
- *	@msq contains the message queue to act upon.
- *	@msqflg contains the operation control flags.
- *	Return 0 if permission is granted.
- * @msg_queue_msgctl:
- *	Check permission when a message control operation specified by @cmd
- *	is to be performed on the message queue @msq.
- *	The @msq may be NULL, e.g. for IPC_INFO or MSG_INFO.
- *	@msq contains the message queue to act upon.  May be NULL.
- *	@cmd contains the operation to be performed.
- *	Return 0 if permission is granted.
- * @msg_queue_msgsnd:
- *	Check permission before a message, @msg, is enqueued on the message
- *	queue, @msq.
- *	@msq contains the message queue to send message to.
- *	@msg contains the message to be enqueued.
- *	@msqflg contains operational flags.
- *	Return 0 if permission is granted.
- * @msg_queue_msgrcv:
- *	Check permission before a message, @msg, is removed from the message
- *	queue, @msq.  The @target task structure contains a pointer to the
- *	process that will be receiving the message (not equal to the current
- *	process when inline receives are being performed).
- *	@msq contains the message queue to retrieve message from.
- *	@msg contains the message destination.
- *	@target contains the task structure for recipient process.
- *	@type contains the type of message requested.
- *	@mode contains the operational flags.
- *	Return 0 if permission is granted.
- *
- * Security hooks for System V Shared Memory Segments
- *
- * @shm_alloc_security:
- *	Allocate and attach a security structure to the shp->shm_perm.security
- *	field.  The security field is initialized to NULL when the structure is
- *	first created.
- *	@shp contains the shared memory structure to be modified.
- *	Return 0 if operation was successful and permission is granted.
- * @shm_free_security:
- *	Deallocate the security struct for this memory segment.
- *	@shp contains the shared memory structure to be modified.
- * @shm_associate:
- *	Check permission when a shared memory region is requested through the
- *	shmget system call.  This hook is only called when returning the shared
- *	memory region identifier for an existing region, not when a new shared
- *	memory region is created.
- *	@shp contains the shared memory structure to be modified.
- *	@shmflg contains the operation control flags.
- *	Return 0 if permission is granted.
- * @shm_shmctl:
- *	Check permission when a shared memory control operation specified by
- *	@cmd is to be performed on the shared memory region @shp.
- *	The @shp may be NULL, e.g. for IPC_INFO or SHM_INFO.
- *	@shp contains shared memory structure to be modified.
- *	@cmd contains the operation to be performed.
- *	Return 0 if permission is granted.
- * @shm_shmat:
- *	Check permissions prior to allowing the shmat system call to attach the
- *	shared memory segment @shp to the data segment of the calling process.
- *	The attaching address is specified by @shmaddr.
- *	@shp contains the shared memory structure to be modified.
- *	@shmaddr contains the address to attach memory region to.
- *	@shmflg contains the operational flags.
- *	Return 0 if permission is granted.
- *
- * Security hooks for System V Semaphores
- *
- * @sem_alloc_security:
- *	Allocate and attach a security structure to the sma->sem_perm.security
- *	field.  The security field is initialized to NULL when the structure is
- *	first created.
- *	@sma contains the semaphore structure
- *	Return 0 if operation was successful and permission is granted.
- * @sem_free_security:
- *	deallocate security struct for this semaphore
- *	@sma contains the semaphore structure.
- * @sem_associate:
- *	Check permission when a semaphore is requested through the semget
- *	system call.  This hook is only called when returning the semaphore
- *	identifier for an existing semaphore, not when a new one must be
- *	created.
- *	@sma contains the semaphore structure.
- *	@semflg contains the operation control flags.
- *	Return 0 if permission is granted.
- * @sem_semctl:
- *	Check permission when a semaphore operation specified by @cmd is to be
- *	performed on the semaphore @sma.  The @sma may be NULL, e.g. for
- *	IPC_INFO or SEM_INFO.
- *	@sma contains the semaphore structure.  May be NULL.
- *	@cmd contains the operation to be performed.
- *	Return 0 if permission is granted.
- * @sem_semop
- *	Check permissions before performing operations on members of the
- *	semaphore set @sma.  If the @alter flag is nonzero, the semaphore set
- *	may be modified.
- *	@sma contains the semaphore structure.
- *	@sops contains the operations to perform.
- *	@nsops contains the number of operations to perform.
- *	@alter contains the flag indicating whether changes are to be made.
- *	Return 0 if permission is granted.
- *
- * @binder_set_context_mgr
- *	Check whether @mgr is allowed to be the binder context manager.
- *	@mgr contains the task_struct for the task being registered.
- *	Return 0 if permission is granted.
- * @binder_transaction
- *	Check whether @from is allowed to invoke a binder transaction call
- *	to @to.
- *	@from contains the task_struct for the sending task.
- *	@to contains the task_struct for the receiving task.
- * @binder_transfer_binder
- *	Check whether @from is allowed to transfer a binder reference to @to.
- *	@from contains the task_struct for the sending task.
- *	@to contains the task_struct for the receiving task.
- * @binder_transfer_file
- *	Check whether @from is allowed to transfer @file to @to.
- *	@from contains the task_struct for the sending task.
- *	@file contains the struct file being transferred.
- *	@to contains the task_struct for the receiving task.
- *
- * @ptrace_access_check:
- *	Check permission before allowing the current process to trace the
- *	@child process.
- *	Security modules may also want to perform a process tracing check
- *	during an execve in the set_security or apply_creds hooks of
- *	tracing check during an execve in the bprm_set_creds hook of
- *	binprm_security_ops if the process is being traced and its security
- *	attributes would be changed by the execve.
- *	@child contains the task_struct structure for the target process.
- *	@mode contains the PTRACE_MODE flags indicating the form of access.
- *	Return 0 if permission is granted.
- * @ptrace_traceme:
- *	Check that the @parent process has sufficient permission to trace the
- *	current process before allowing the current process to present itself
- *	to the @parent process for tracing.
- *	@parent contains the task_struct structure for debugger process.
- *	Return 0 if permission is granted.
- * @capget:
- *	Get the @effective, @inheritable, and @permitted capability sets for
- *	the @target process.  The hook may also perform permission checking to
- *	determine if the current process is allowed to see the capability sets
- *	of the @target process.
- *	@target contains the task_struct structure for target process.
- *	@effective contains the effective capability set.
- *	@inheritable contains the inheritable capability set.
- *	@permitted contains the permitted capability set.
- *	Return 0 if the capability sets were successfully obtained.
- * @capset:
- *	Set the @effective, @inheritable, and @permitted capability sets for
- *	the current process.
- *	@new contains the new credentials structure for target process.
- *	@old contains the current credentials structure for target process.
- *	@effective contains the effective capability set.
- *	@inheritable contains the inheritable capability set.
- *	@permitted contains the permitted capability set.
- *	Return 0 and update @new if permission is granted.
- * @capable:
- *	Check whether the @tsk process has the @cap capability in the indicated
- *	credentials.
- *	@cred contains the credentials to use.
- *	@ns contains the user namespace we want the capability in
- *	@cap contains the capability <include/linux/capability.h>.
- *	@audit: Whether to write an audit message or not
- *	Return 0 if the capability is granted for @tsk.
- * @syslog:
- *	Check permission before accessing the kernel message ring or changing
- *	logging to the console.
- *	See the syslog(2) manual page for an explanation of the @type values.
- *	@type contains the type of action.
- *	@from_file indicates the context of action (if it came from /proc).
- *	Return 0 if permission is granted.
- * @settime:
- *	Check permission to change the system time.
- *	struct timespec and timezone are defined in include/linux/time.h
- *	@ts contains new time
- *	@tz contains new timezone
- *	Return 0 if permission is granted.
- * @vm_enough_memory:
- *	Check permissions for allocating a new virtual mapping.
- *	@mm contains the mm struct it is being added to.
- *	@pages contains the number of pages.
- *	Return 0 if permission is granted.
- *
- * @ismaclabel:
- *	Check if the extended attribute specified by @name
- *	represents a MAC label. Returns 1 if name is a MAC
- *	attribute otherwise returns 0.
- *	@name full extended attribute name to check against
- *	LSM as a MAC label.
- *
- * @secid_to_secctx:
- *	Convert secid to security context.  If secdata is NULL the length of
- *	the result will be returned in seclen, but no secdata will be returned.
- *	This does mean that the length could change between calls to check the
- *	length and the next call which actually allocates and returns the secdata.
- *	@secid contains the security ID.
- *	@secdata contains the pointer that stores the converted security context.
- *	@seclen pointer which contains the length of the data
- * @secctx_to_secid:
- *	Convert security context to secid.
- *	@secid contains the pointer to the generated security ID.
- *	@secdata contains the security context.
- *
- * @release_secctx:
- *	Release the security context.
- *	@secdata contains the security context.
- *	@seclen contains the length of the security context.
- *
- * Security hooks for Audit
- *
- * @audit_rule_init:
- *	Allocate and initialize an LSM audit rule structure.
- *	@field contains the required Audit action. Fields flags are defined in include/linux/audit.h
- *	@op contains the operator the rule uses.
- *	@rulestr contains the context where the rule will be applied to.
- *	@lsmrule contains a pointer to receive the result.
- *	Return 0 if @lsmrule has been successfully set,
- *	-EINVAL in case of an invalid rule.
- *
- * @audit_rule_known:
- *	Specifies whether given @rule contains any fields related to current LSM.
- *	@rule contains the audit rule of interest.
- *	Return 1 in case of relation found, 0 otherwise.
- *
- * @audit_rule_match:
- *	Determine if given @secid matches a rule previously approved
- *	by @audit_rule_known.
- *	@secid contains the security id in question.
- *	@field contains the field which relates to current LSM.
- *	@op contains the operator that will be used for matching.
- *	@rule points to the audit rule that will be checked against.
- *	@actx points to the audit context associated with the check.
- *	Return 1 if secid matches the rule, 0 if it does not, -ERRNO on failure.
- *
- * @audit_rule_free:
- *	Deallocate the LSM audit rule structure previously allocated by
- *	audit_rule_init.
- *	@rule contains the allocated rule
- *
- * @inode_notifysecctx:
- *	Notify the security module of what the security context of an inode
- *	should be.  Initializes the incore security context managed by the
- *	security module for this inode.  Example usage:  NFS client invokes
- *	this hook to initialize the security context in its incore inode to the
- *	value provided by the server for the file when the server returned the
- *	file's attributes to the client.
- *
- * 	Must be called with inode->i_mutex locked.
- *
- * 	@inode we wish to set the security context of.
- * 	@ctx contains the string which we wish to set in the inode.
- * 	@ctxlen contains the length of @ctx.
- *
- * @inode_setsecctx:
- * 	Change the security context of an inode.  Updates the
- * 	incore security context managed by the security module and invokes the
- * 	fs code as needed (via __vfs_setxattr_noperm) to update any backing
- * 	xattrs that represent the context.  Example usage:  NFS server invokes
- * 	this hook to change the security context in its incore inode and on the
- * 	backing filesystem to a value provided by the client on a SETATTR
- * 	operation.
- *
- * 	Must be called with inode->i_mutex locked.
- *
- * 	@dentry contains the inode we wish to set the security context of.
- * 	@ctx contains the string which we wish to set in the inode.
- * 	@ctxlen contains the length of @ctx.
- *
- * @inode_getsecctx:
- *	On success, returns 0 and fills out @ctx and @ctxlen with the security
- *	context for the given @inode.
- *
- * 	@inode we wish to get the security context of.
- *	@ctx is a pointer in which to place the allocated security context.
- *	@ctxlen points to the place to put the length of @ctx.
- * This is the main security structure.
- */
-
 /* prototypes */
 extern int security_init(void);
 
-- 
cgit v1.2.3


From e20b043a6902ecb61c2c84355c3bae5149f391db Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Sat, 2 May 2015 15:11:36 -0700
Subject: LSM: Add security module hook list heads

Add a list header for each security hook. They aren't used until
later in the patch series. They are grouped together in a structure
so that there doesn't need to be an external address for each.

Macro-ize the initialization of the security_operations
for each security module in anticipation of changing out
the security_operations structure.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: John Johansen <john.johansen@canonical.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Acked-by:  Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 include/linux/lsm_hooks.h | 220 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 220 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index b4c91de510c2..27dd6fcacccc 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1626,6 +1626,226 @@ struct security_operations {
 #endif /* CONFIG_AUDIT */
 };
 
+struct security_hook_heads {
+	struct list_head binder_set_context_mgr;
+	struct list_head binder_transaction;
+	struct list_head binder_transfer_binder;
+	struct list_head binder_transfer_file;
+	struct list_head ptrace_access_check;
+	struct list_head ptrace_traceme;
+	struct list_head capget;
+	struct list_head capset;
+	struct list_head capable;
+	struct list_head quotactl;
+	struct list_head quota_on;
+	struct list_head syslog;
+	struct list_head settime;
+	struct list_head vm_enough_memory;
+	struct list_head bprm_set_creds;
+	struct list_head bprm_check_security;
+	struct list_head bprm_secureexec;
+	struct list_head bprm_committing_creds;
+	struct list_head bprm_committed_creds;
+	struct list_head sb_alloc_security;
+	struct list_head sb_free_security;
+	struct list_head sb_copy_data;
+	struct list_head sb_remount;
+	struct list_head sb_kern_mount;
+	struct list_head sb_show_options;
+	struct list_head sb_statfs;
+	struct list_head sb_mount;
+	struct list_head sb_umount;
+	struct list_head sb_pivotroot;
+	struct list_head sb_set_mnt_opts;
+	struct list_head sb_clone_mnt_opts;
+	struct list_head sb_parse_opts_str;
+	struct list_head dentry_init_security;
+#ifdef CONFIG_SECURITY_PATH
+	struct list_head path_unlink;
+	struct list_head path_mkdir;
+	struct list_head path_rmdir;
+	struct list_head path_mknod;
+	struct list_head path_truncate;
+	struct list_head path_symlink;
+	struct list_head path_link;
+	struct list_head path_rename;
+	struct list_head path_chmod;
+	struct list_head path_chown;
+	struct list_head path_chroot;
+#endif
+	struct list_head inode_alloc_security;
+	struct list_head inode_free_security;
+	struct list_head inode_init_security;
+	struct list_head inode_create;
+	struct list_head inode_link;
+	struct list_head inode_unlink;
+	struct list_head inode_symlink;
+	struct list_head inode_mkdir;
+	struct list_head inode_rmdir;
+	struct list_head inode_mknod;
+	struct list_head inode_rename;
+	struct list_head inode_readlink;
+	struct list_head inode_follow_link;
+	struct list_head inode_permission;
+	struct list_head inode_setattr;
+	struct list_head inode_getattr;
+	struct list_head inode_setxattr;
+	struct list_head inode_post_setxattr;
+	struct list_head inode_getxattr;
+	struct list_head inode_listxattr;
+	struct list_head inode_removexattr;
+	struct list_head inode_need_killpriv;
+	struct list_head inode_killpriv;
+	struct list_head inode_getsecurity;
+	struct list_head inode_setsecurity;
+	struct list_head inode_listsecurity;
+	struct list_head inode_getsecid;
+	struct list_head file_permission;
+	struct list_head file_alloc_security;
+	struct list_head file_free_security;
+	struct list_head file_ioctl;
+	struct list_head mmap_addr;
+	struct list_head mmap_file;
+	struct list_head file_mprotect;
+	struct list_head file_lock;
+	struct list_head file_fcntl;
+	struct list_head file_set_fowner;
+	struct list_head file_send_sigiotask;
+	struct list_head file_receive;
+	struct list_head file_open;
+	struct list_head task_create;
+	struct list_head task_free;
+	struct list_head cred_alloc_blank;
+	struct list_head cred_free;
+	struct list_head cred_prepare;
+	struct list_head cred_transfer;
+	struct list_head kernel_act_as;
+	struct list_head kernel_create_files_as;
+	struct list_head kernel_fw_from_file;
+	struct list_head kernel_module_request;
+	struct list_head kernel_module_from_file;
+	struct list_head task_fix_setuid;
+	struct list_head task_setpgid;
+	struct list_head task_getpgid;
+	struct list_head task_getsid;
+	struct list_head task_getsecid;
+	struct list_head task_setnice;
+	struct list_head task_setioprio;
+	struct list_head task_getioprio;
+	struct list_head task_setrlimit;
+	struct list_head task_setscheduler;
+	struct list_head task_getscheduler;
+	struct list_head task_movememory;
+	struct list_head task_kill;
+	struct list_head task_wait;
+	struct list_head task_prctl;
+	struct list_head task_to_inode;
+	struct list_head ipc_permission;
+	struct list_head ipc_getsecid;
+	struct list_head msg_msg_alloc_security;
+	struct list_head msg_msg_free_security;
+	struct list_head msg_queue_alloc_security;
+	struct list_head msg_queue_free_security;
+	struct list_head msg_queue_associate;
+	struct list_head msg_queue_msgctl;
+	struct list_head msg_queue_msgsnd;
+	struct list_head msg_queue_msgrcv;
+	struct list_head shm_alloc_security;
+	struct list_head shm_free_security;
+	struct list_head shm_associate;
+	struct list_head shm_shmctl;
+	struct list_head shm_shmat;
+	struct list_head sem_alloc_security;
+	struct list_head sem_free_security;
+	struct list_head sem_associate;
+	struct list_head sem_semctl;
+	struct list_head sem_semop;
+	struct list_head netlink_send;
+	struct list_head d_instantiate;
+	struct list_head getprocattr;
+	struct list_head setprocattr;
+	struct list_head ismaclabel;
+	struct list_head secid_to_secctx;
+	struct list_head secctx_to_secid;
+	struct list_head release_secctx;
+	struct list_head inode_notifysecctx;
+	struct list_head inode_setsecctx;
+	struct list_head inode_getsecctx;
+#ifdef CONFIG_SECURITY_NETWORK
+	struct list_head unix_stream_connect;
+	struct list_head unix_may_send;
+	struct list_head socket_create;
+	struct list_head socket_post_create;
+	struct list_head socket_bind;
+	struct list_head socket_connect;
+	struct list_head socket_listen;
+	struct list_head socket_accept;
+	struct list_head socket_sendmsg;
+	struct list_head socket_recvmsg;
+	struct list_head socket_getsockname;
+	struct list_head socket_getpeername;
+	struct list_head socket_getsockopt;
+	struct list_head socket_setsockopt;
+	struct list_head socket_shutdown;
+	struct list_head socket_sock_rcv_skb;
+	struct list_head socket_getpeersec_stream;
+	struct list_head socket_getpeersec_dgram;
+	struct list_head sk_alloc_security;
+	struct list_head sk_free_security;
+	struct list_head sk_clone_security;
+	struct list_head sk_getsecid;
+	struct list_head sock_graft;
+	struct list_head inet_conn_request;
+	struct list_head inet_csk_clone;
+	struct list_head inet_conn_established;
+	struct list_head secmark_relabel_packet;
+	struct list_head secmark_refcount_inc;
+	struct list_head secmark_refcount_dec;
+	struct list_head req_classify_flow;
+	struct list_head tun_dev_alloc_security;
+	struct list_head tun_dev_free_security;
+	struct list_head tun_dev_create;
+	struct list_head tun_dev_attach_queue;
+	struct list_head tun_dev_attach;
+	struct list_head tun_dev_open;
+	struct list_head skb_owned_by;
+#endif	/* CONFIG_SECURITY_NETWORK */
+#ifdef CONFIG_SECURITY_NETWORK_XFRM
+	struct list_head xfrm_policy_alloc_security;
+	struct list_head xfrm_policy_clone_security;
+	struct list_head xfrm_policy_free_security;
+	struct list_head xfrm_policy_delete_security;
+	struct list_head xfrm_state_alloc;
+	struct list_head xfrm_state_alloc_acquire;
+	struct list_head xfrm_state_free_security;
+	struct list_head xfrm_state_delete_security;
+	struct list_head xfrm_policy_lookup;
+	struct list_head xfrm_state_pol_flow_match;
+	struct list_head xfrm_decode_session;
+#endif	/* CONFIG_SECURITY_NETWORK_XFRM */
+#ifdef CONFIG_KEYS
+	struct list_head key_alloc;
+	struct list_head key_free;
+	struct list_head key_permission;
+	struct list_head key_getsecurity;
+#endif	/* CONFIG_KEYS */
+#ifdef CONFIG_AUDIT
+	struct list_head audit_rule_init;
+	struct list_head audit_rule_known;
+	struct list_head audit_rule_match;
+	struct list_head audit_rule_free;
+#endif /* CONFIG_AUDIT */
+};
+
+/*
+ * Initializing a security_hook_list structure takes
+ * up a lot of space in a source file. This macro takes
+ * care of the common case and reduces the amount of
+ * text involved.
+ * Casey says: Comment is true in the next patch.
+ */
+#define LSM_HOOK_INIT(HEAD, HOOK)	.HEAD = HOOK
+
 /* prototypes */
 extern int security_module_enable(struct security_operations *ops);
 extern int register_security(struct security_operations *ops);
-- 
cgit v1.2.3


From b1d9e6b0646d0e5ee5d9050bd236b6c65d66faef Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Sat, 2 May 2015 15:11:42 -0700
Subject: LSM: Switch to lists of hooks

Instead of using a vector of security operations
with explicit, special case stacking of the capability
and yama hooks use lists of hooks with capability and
yama hooks included as appropriate.

The security_operations structure is no longer required.
Instead, there is a union of the function pointers that
allows all the hooks lists to use a common mechanism for
list management while retaining typing. Each module
supplies an array describing the hooks it provides instead
of a sparsely populated security_operations structure.
The description includes the element that gets put on
the hook list, avoiding the issues surrounding individual
element allocation.

The method for registering security modules is changed to
reflect the information available. The method for removing
a module, currently only used by SELinux, has also changed.
It should be generic now, however if there are potential
race conditions based on ordering of hook removal that needs
to be addressed by the calling module.

The security hooks are called from the lists and the first
failure is returned.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: John Johansen <john.johansen@canonical.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Acked-by:  Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 include/linux/lsm_hooks.h | 77 ++++++++++++++++++++++++++++++++---------------
 include/linux/security.h  | 46 +++-------------------------
 2 files changed, 57 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 27dd6fcacccc..f014f2596e22 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -25,21 +25,10 @@
 #define __LINUX_LSM_HOOKS_H
 
 #include <linux/security.h>
-
-/* Maximum number of letters for an LSM name string */
-#define SECURITY_NAME_MAX	10
-
-#ifdef CONFIG_SECURITY
+#include <linux/init.h>
+#include <linux/rculist.h>
 
 /**
- * struct security_operations - main security structure
- *
- * Security module identifier.
- *
- * @name:
- *	A string that acts as a unique identifier for the LSM with max number
- *	of characters = SECURITY_NAME_MAX.
- *
  * Security hooks for program execution operations.
  *
  * @bprm_set_creds:
@@ -1310,9 +1299,7 @@
  * This is the main security structure.
  */
 
-struct security_operations {
-	char name[SECURITY_NAME_MAX + 1];
-
+union security_list_options {
 	int (*binder_set_context_mgr)(struct task_struct *mgr);
 	int (*binder_transaction)(struct task_struct *from,
 					struct task_struct *to);
@@ -1837,21 +1824,63 @@ struct security_hook_heads {
 #endif /* CONFIG_AUDIT */
 };
 
+/*
+ * Security module hook list structure.
+ * For use with generic list macros for common operations.
+ */
+struct security_hook_list {
+	struct list_head		list;
+	struct list_head		*head;
+	union security_list_options	hook;
+};
+
 /*
  * Initializing a security_hook_list structure takes
  * up a lot of space in a source file. This macro takes
  * care of the common case and reduces the amount of
  * text involved.
- * Casey says: Comment is true in the next patch.
  */
-#define LSM_HOOK_INIT(HEAD, HOOK)	.HEAD = HOOK
+#define LSM_HOOK_INIT(HEAD, HOOK) \
+	{ .head = &security_hook_heads.HEAD, .hook = { .HEAD = HOOK } }
+
+extern struct security_hook_heads security_hook_heads;
+
+static inline void security_add_hooks(struct security_hook_list *hooks,
+				      int count)
+{
+	int i;
 
-/* prototypes */
-extern int security_module_enable(struct security_operations *ops);
-extern int register_security(struct security_operations *ops);
-extern void __init security_fixup_ops(struct security_operations *ops);
-extern void reset_security_ops(void);
+	for (i = 0; i < count; i++)
+		list_add_tail_rcu(&hooks[i].list, hooks[i].head);
+}
 
-#endif /* CONFIG_SECURITY */
+#ifdef CONFIG_SECURITY_SELINUX_DISABLE
+/*
+ * Assuring the safety of deleting a security module is up to
+ * the security module involved. This may entail ordering the
+ * module's hook list in a particular way, refusing to disable
+ * the module once a policy is loaded or any number of other
+ * actions better imagined than described.
+ *
+ * The name of the configuration option reflects the only module
+ * that currently uses the mechanism. Any developer who thinks
+ * disabling their module is a good idea needs to be at least as
+ * careful as the SELinux team.
+ */
+static inline void security_delete_hooks(struct security_hook_list *hooks,
+						int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++)
+		list_del_rcu(&hooks[i].list);
+}
+#endif /* CONFIG_SECURITY_SELINUX_DISABLE */
+
+extern int __init security_module_enable(const char *module);
+extern void __init capability_add_hooks(void);
+#ifdef CONFIG_SECURITY_YAMA_STACKED
+void __init yama_add_hooks(void);
+#endif
 
 #endif /* ! __LINUX_LSM_HOOKS_H */
diff --git a/include/linux/security.h b/include/linux/security.h
index a2a100e7ac6e..8c8175d41b4c 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -27,6 +27,7 @@
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/string.h>
+#include <linux/mm.h>
 
 struct linux_binprm;
 struct cred;
@@ -54,9 +55,6 @@ struct xattr;
 struct xfrm_sec_ctx;
 struct mm_struct;
 
-/* Maximum number of letters for an LSM name string */
-#define SECURITY_NAME_MAX	10
-
 /* If capable should audit the security request */
 #define SECURITY_CAP_NOAUDIT 0
 #define SECURITY_CAP_AUDIT 1
@@ -69,10 +67,7 @@ struct audit_krule;
 struct user_namespace;
 struct timezone;
 
-/*
- * These functions are in security/capability.c and are used
- * as the default capabilities functions
- */
+/* These functions are in security/commoncap.c */
 extern int cap_capable(const struct cred *cred, struct user_namespace *ns,
 		       int cap, int audit);
 extern int cap_settime(const struct timespec *ts, const struct timezone *tz);
@@ -114,8 +109,6 @@ struct xfrm_state;
 struct xfrm_user_sec_ctx;
 struct seq_file;
 
-extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb);
-
 #ifdef CONFIG_MMU
 extern unsigned long mmap_min_addr;
 extern unsigned long dac_mmap_min_addr;
@@ -472,7 +465,7 @@ static inline int security_settime(const struct timespec *ts,
 
 static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
 {
-	return cap_vm_enough_memory(mm, pages);
+	return __vm_enough_memory(mm, pages, cap_vm_enough_memory(mm, pages));
 }
 
 static inline int security_bprm_set_creds(struct linux_binprm *bprm)
@@ -1075,7 +1068,7 @@ static inline int security_setprocattr(struct task_struct *p, char *name, void *
 
 static inline int security_netlink_send(struct sock *sk, struct sk_buff *skb)
 {
-	return cap_netlink_send(sk, skb);
+	return 0;
 }
 
 static inline int security_ismaclabel(const char *name)
@@ -1643,36 +1636,5 @@ static inline void free_secdata(void *secdata)
 { }
 #endif /* CONFIG_SECURITY */
 
-#ifdef CONFIG_SECURITY_YAMA
-extern int yama_ptrace_access_check(struct task_struct *child,
-				    unsigned int mode);
-extern int yama_ptrace_traceme(struct task_struct *parent);
-extern void yama_task_free(struct task_struct *task);
-extern int yama_task_prctl(int option, unsigned long arg2, unsigned long arg3,
-			   unsigned long arg4, unsigned long arg5);
-#else
-static inline int yama_ptrace_access_check(struct task_struct *child,
-					   unsigned int mode)
-{
-	return 0;
-}
-
-static inline int yama_ptrace_traceme(struct task_struct *parent)
-{
-	return 0;
-}
-
-static inline void yama_task_free(struct task_struct *task)
-{
-}
-
-static inline int yama_task_prctl(int option, unsigned long arg2,
-				  unsigned long arg3, unsigned long arg4,
-				  unsigned long arg5)
-{
-	return -ENOSYS;
-}
-#endif /* CONFIG_SECURITY_YAMA */
-
 #endif /* ! __LINUX_SECURITY_H */
 
-- 
cgit v1.2.3


From 6a4b6b0a3b55f23f4cc9ad85a1539c581bcb0874 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 4 May 2015 17:10:31 +0200
Subject: gpio: sysfs: clean up chip class-device handling

Clean gpio-chip class device registration and deregistration.

The class device is registered when a gpio-chip is added (or from
gpiolib_sysfs_init post-core init call), and deregistered when the chip
is removed.

Store the class device in struct gpio_chip directly rather than do a
class-device lookup on deregistration. This also removes the need for
the exported flag.

Signed-off-by: Johan Hovold <johan@kernel.org>
Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/driver.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index f1b36593ec9f..2c1e639f66bd 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -20,6 +20,7 @@ struct seq_file;
  * struct gpio_chip - abstract a GPIO controller
  * @label: for diagnostics
  * @dev: optional device providing the GPIOs
+ * @cdev: class device used by sysfs interface (may be NULL)
  * @owner: helps prevent removal of modules exporting active GPIOs
  * @list: links gpio_chips together for traversal
  * @request: optional hook for chip-specific activation, such as
@@ -57,7 +58,6 @@ struct seq_file;
  *	implies that if the chip supports IRQs, these IRQs need to be threaded
  *	as the chip access may sleep when e.g. reading out the IRQ status
  *	registers.
- * @exported: flags if the gpiochip is exported for use from sysfs. Private.
  * @irq_not_threaded: flag must be set if @can_sleep is set but the
  *	IRQs don't need to be threaded
  *
@@ -74,6 +74,7 @@ struct seq_file;
 struct gpio_chip {
 	const char		*label;
 	struct device		*dev;
+	struct device		*cdev;
 	struct module		*owner;
 	struct list_head        list;
 
@@ -109,7 +110,6 @@ struct gpio_chip {
 	const char		*const *names;
 	bool			can_sleep;
 	bool			irq_not_threaded;
-	bool			exported;
 
 #ifdef CONFIG_GPIOLIB_IRQCHIP
 	/*
-- 
cgit v1.2.3


From 166a85e44245d771bd7042f3ad72aa0e12bb53bd Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 4 May 2015 17:10:33 +0200
Subject: gpio: remove gpiod_sysfs_set_active_low

Remove gpiod_sysfs_set_active_low (and gpio_sysfs_set_active_low) which
allowed code to change the polarity of a gpio line even after it had
been exported through sysfs.

Drivers should not care, and generally does not know, about gpio-line
polarity which is a hardware feature that needs to be described by
firmware.

It is currently possible to define gpio-line polarity in device-tree and
acpi firmware or using platform data. Userspace can also change the
polarity through sysfs.

Note that drivers using the legacy gpio interface could still use
GPIOF_ACTIVE_LOW to change the polarity before exporting the gpio.

There are no in-kernel users of this interface.

Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Harry Wei <harryxiyou@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@zh-kernel.org
Cc: linux-arch@vger.kernel.org
Signed-off-by: Johan Hovold <johan@kernel.org>
Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio.h          | 7 -------
 include/linux/gpio/consumer.h | 6 ------
 2 files changed, 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index ab81339a8590..d12b5d566e4b 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -196,13 +196,6 @@ static inline int gpio_export_link(struct device *dev, const char *name,
 	return -EINVAL;
 }
 
-static inline int gpio_sysfs_set_active_low(unsigned gpio, int value)
-{
-	/* GPIO can never have been requested */
-	WARN_ON(1);
-	return -EINVAL;
-}
-
 static inline void gpio_unexport(unsigned gpio)
 {
 	/* GPIO can never have been exported */
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 3a7c9ffd5ab9..09a7fb0062a6 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -449,7 +449,6 @@ static inline int desc_to_gpio(const struct gpio_desc *desc)
 int gpiod_export(struct gpio_desc *desc, bool direction_may_change);
 int gpiod_export_link(struct device *dev, const char *name,
 		      struct gpio_desc *desc);
-int gpiod_sysfs_set_active_low(struct gpio_desc *desc, int value);
 void gpiod_unexport(struct gpio_desc *desc);
 
 #else  /* CONFIG_GPIOLIB && CONFIG_GPIO_SYSFS */
@@ -466,11 +465,6 @@ static inline int gpiod_export_link(struct device *dev, const char *name,
 	return -ENOSYS;
 }
 
-static inline int gpiod_sysfs_set_active_low(struct gpio_desc *desc, int value)
-{
-	return -ENOSYS;
-}
-
 static inline void gpiod_unexport(struct gpio_desc *desc)
 {
 }
-- 
cgit v1.2.3


From 0e39250845c0f91acc64264709b25f7f9b85c2c3 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 6 May 2015 21:11:51 -0700
Subject: net: Store virtual address instead of page in netdev_alloc_cache

This change makes it so that we store the virtual address of the page
in the netdev_alloc_cache instead of the page pointer.  The idea behind
this is to avoid multiple calls to page_address since the virtual address
is required for every access, but the page pointer is only needed at
allocation or reset of the page.

While I was at it I also reordered the netdev_alloc_cache structure a bit
so that the size is always 16 bytes by dropping size in the case where
PAGE_SIZE is greater than or equal to 32KB.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9c2f793573fa..8b9a2c35a9d7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2128,9 +2128,8 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
 		kfree_skb(skb);
 }
 
-#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
-#define NETDEV_FRAG_PAGE_MAX_SIZE  (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
-#define NETDEV_PAGECNT_MAX_BIAS	   NETDEV_FRAG_PAGE_MAX_SIZE
+#define NETDEV_FRAG_PAGE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)
+#define NETDEV_FRAG_PAGE_MAX_ORDER	get_order(NETDEV_FRAG_PAGE_MAX_SIZE)
 
 void *netdev_alloc_frag(unsigned int fragsz);
 
-- 
cgit v1.2.3


From b63ae8ca096dfdbfeef6a209c30a93a966518853 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 6 May 2015 21:11:57 -0700
Subject: mm/net: Rename and move page fragment handling from net/ to mm/

This change moves the __alloc_page_frag functionality out of the networking
stack and into the page allocation portion of mm.  The idea it so help make
this maintainable by placing it with other page allocation functions.

Since we are moving it from skbuff.c to page_alloc.c I have also renamed
the basic defines and structure from netdev_alloc_cache to page_frag_cache
to reflect that this is now part of a different kernel subsystem.

I have also added a simple __free_page_frag function which can handle
freeing the frags based on the skb->head pointer.  The model for this is
based off of __free_pages since we don't actually need to deal with all of
the cases that put_page handles.  I incorporated the virt_to_head_page call
and compound_order into the function as it actually allows for a signficant
size reduction by reducing code duplication.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/gfp.h      |  5 +++++
 include/linux/mm_types.h | 18 ++++++++++++++++++
 include/linux/skbuff.h   |  3 ---
 3 files changed, 23 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 97a9373e61e8..70a7fee1efb3 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -366,6 +366,11 @@ extern void free_pages(unsigned long addr, unsigned int order);
 extern void free_hot_cold_page(struct page *page, bool cold);
 extern void free_hot_cold_page_list(struct list_head *list, bool cold);
 
+struct page_frag_cache;
+extern void *__alloc_page_frag(struct page_frag_cache *nc,
+			       unsigned int fragsz, gfp_t gfp_mask);
+extern void __free_page_frag(void *addr);
+
 extern void __free_kmem_pages(struct page *page, unsigned int order);
 extern void free_kmem_pages(unsigned long addr, unsigned int order);
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 8d37e26a1007..0038ac7466fd 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -226,6 +226,24 @@ struct page_frag {
 #endif
 };
 
+#define PAGE_FRAG_CACHE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)
+#define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
+
+struct page_frag_cache {
+	void * va;
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+	__u16 offset;
+	__u16 size;
+#else
+	__u32 offset;
+#endif
+	/* we maintain a pagecount bias, so that we dont dirty cache line
+	 * containing page->_count every time we allocate a fragment.
+	 */
+	unsigned int		pagecnt_bias;
+	bool pfmemalloc;
+};
+
 typedef unsigned long __nocast vm_flags_t;
 
 /*
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8b9a2c35a9d7..0039fcc45b3b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2128,9 +2128,6 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
 		kfree_skb(skb);
 }
 
-#define NETDEV_FRAG_PAGE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)
-#define NETDEV_FRAG_PAGE_MAX_ORDER	get_order(NETDEV_FRAG_PAGE_MAX_SIZE)
-
 void *netdev_alloc_frag(unsigned int fragsz);
 
 struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length,
-- 
cgit v1.2.3


From 181edb2bfa22b50817684135ab6430ed2808abf0 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 6 May 2015 21:12:03 -0700
Subject: net: Add skb_free_frag to replace use of put_page in freeing
 skb->head

This change adds a function called skb_free_frag which is meant to
compliment the function netdev_alloc_frag.  The general idea is to enable a
more lightweight version of page freeing since we don't actually need all
the overhead of a put_page, and we don't quite fit the model of __free_pages.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0039fcc45b3b..c0b574a414e7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2182,6 +2182,11 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
 	return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);
 }
 
+static inline void skb_free_frag(void *addr)
+{
+	__free_page_frag(addr);
+}
+
 void *napi_alloc_frag(unsigned int fragsz);
 struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
 				 unsigned int length, gfp_t gfp_mask);
-- 
cgit v1.2.3


From 755a27e7e4c817dd51ade41668b380f26026899c Mon Sep 17 00:00:00 2001
From: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Date: Sun, 3 May 2015 18:18:02 +0800
Subject: tracing: remove unused ftrace_output_event() prototype

The prototype of ftrace_output_event was added by commit 1d6bae966e90
("tracing: Move raw output code from macro to standalone function")
but this function was not defined anywhere, and is still nowhere to be
found.

Link: http://lkml.kernel.org/r/1430648282-25792-1-git-send-email-nicolas.iooss_linux@m4x.org

Signed-off-by: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index f9ecf63d47f1..65ce6de91307 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -219,9 +219,6 @@ struct ftrace_event_class {
 extern int ftrace_event_reg(struct ftrace_event_call *event,
 			    enum trace_reg type, void *data);
 
-int ftrace_output_event(struct trace_iterator *iter, struct ftrace_event_call *event,
-			char *fmt, ...);
-
 int ftrace_event_define_field(struct ftrace_event_call *call,
 			      char *type, int len, char *item, int offset,
 			      int field_size, int sign, int filter);
-- 
cgit v1.2.3


From 020af89a41c41fd2c92d0da524968dfaba6269f0 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <Grygorii.Strashko@linaro.org>
Date: Mon, 27 Apr 2015 21:24:30 +0300
Subject: PM / sleep: Add macro to define common noirq system PM callbacks

The same approach is used as for the existing SET_SYSTEM_SLEEP_PM_OPS,
but for noirq callbacks.

New SET_NOIRQ_SYSTEM_SLEEP_PM_OPS, defined for CONFIG_PM_SLEEP, will
point ->suspend_noirq, ->freeze_noirq and ->poweroff_noirq to the same
function. Vice versa happens for ->resume_noirq, ->thaw_noirq and
->restore_noirq.

Signed-off-by: Grygorii Strashko <Grygorii.Strashko@linaro.org>
Acked-by: Santosh Shilimkar <ssantosh@kernel.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 2d29c64f8fb1..4890743892ef 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -342,6 +342,18 @@ struct dev_pm_ops {
 #define SET_LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
 #endif
 
+#ifdef CONFIG_PM_SLEEP
+#define SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+	.suspend_noirq = suspend_fn, \
+	.resume_noirq = resume_fn, \
+	.freeze_noirq = suspend_fn, \
+	.thaw_noirq = resume_fn, \
+	.poweroff_noirq = suspend_fn, \
+	.restore_noirq = resume_fn,
+#else
+#define SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
+#endif
+
 #ifdef CONFIG_PM
 #define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
 	.runtime_suspend = suspend_fn, \
-- 
cgit v1.2.3


From 75f504004ab866c8f84749303b0f70953724e259 Mon Sep 17 00:00:00 2001
From: Rajendra Nayak <rnayak@codeaurora.org>
Date: Thu, 23 Apr 2015 14:03:09 +0530
Subject: PM / clock_ops: Provide default runtime ops to users

Most users of PM clocks do the extact same things in the runtime
suspend/resume callbacks. Provide them USE_PM_CLK_RUNTIME_OPS so
as to avoid/remove boilerplate code.

Signed-off-by: Rajendra Nayak <rnayak@codeaurora.org>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Acked-by: Santosh Shilimkar <ssantosh@kernel.org>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_clock.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pm_clock.h b/include/linux/pm_clock.h
index 0b0039634410..25266c600021 100644
--- a/include/linux/pm_clock.h
+++ b/include/linux/pm_clock.h
@@ -20,6 +20,16 @@ struct pm_clk_notifier_block {
 
 struct clk;
 
+#ifdef CONFIG_PM
+extern int pm_clk_runtime_suspend(struct device *dev);
+extern int pm_clk_runtime_resume(struct device *dev);
+#define USE_PM_CLK_RUNTIME_OPS \
+	.runtime_suspend = pm_clk_runtime_suspend, \
+	.runtime_resume = pm_clk_runtime_resume,
+#else
+#define USE_PM_CLK_RUNTIME_OPS
+#endif
+
 #ifdef CONFIG_PM_CLK
 static inline bool pm_clk_no_clocks(struct device *dev)
 {
-- 
cgit v1.2.3


From 9d47c0a2d958e06322c88245749278633d333cca Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Sun, 10 May 2015 09:47:47 -0700
Subject: switchdev: s/swdev_/switchdev_/

Turned out that "switchdev" sticks. So just unify all related terms to use
this prefix.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Acked-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a6d706b2a947..2b39235b9f13 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1567,7 +1567,7 @@ struct net_device {
 	const struct net_device_ops *netdev_ops;
 	const struct ethtool_ops *ethtool_ops;
 #ifdef CONFIG_NET_SWITCHDEV
-	const struct swdev_ops *swdev_ops;
+	const struct switchdev_ops *switchdev_ops;
 #endif
 
 	const struct header_ops *header_ops;
-- 
cgit v1.2.3


From 7889cbee8357aaed85898d028829dfb4f75bae2c Mon Sep 17 00:00:00 2001
From: Scott Feldman <sfeldma@gmail.com>
Date: Sun, 10 May 2015 09:48:07 -0700
Subject: switchdev: remove NETIF_F_HW_SWITCH_OFFLOAD feature flag

Roopa said remove the feature flag for this series and she'll work on
bringing it back if needed at a later date.

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 7d59dc6ab789..9672781c593d 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -66,7 +66,6 @@ enum {
 	NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */
 	NETIF_F_HW_L2FW_DOFFLOAD_BIT,	/* Allow L2 Forwarding in Hardware */
 	NETIF_F_BUSY_POLL_BIT,		/* Busy poll */
-	NETIF_F_HW_SWITCH_OFFLOAD_BIT,  /* HW switch offload */
 
 	/*
 	 * Add your fresh new feature above and remember to update
@@ -125,7 +124,6 @@ enum {
 #define NETIF_F_HW_VLAN_STAG_TX	__NETIF_F(HW_VLAN_STAG_TX)
 #define NETIF_F_HW_L2FW_DOFFLOAD	__NETIF_F(HW_L2FW_DOFFLOAD)
 #define NETIF_F_BUSY_POLL	__NETIF_F(BUSY_POLL)
-#define NETIF_F_HW_SWITCH_OFFLOAD	__NETIF_F(HW_SWITCH_OFFLOAD)
 
 /* Features valid for ethtool to change */
 /* = all defined minus driver/device-class-related */
@@ -161,8 +159,7 @@ enum {
  */
 #define NETIF_F_ONE_FOR_ALL	(NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \
 				 NETIF_F_SG | NETIF_F_HIGHDMA |		\
-				 NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED | \
-				 NETIF_F_HW_SWITCH_OFFLOAD)
+				 NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED)
 
 /*
  * If one device doesn't support one of these features, then disable it
-- 
cgit v1.2.3


From 5d1d65f8bea6de3d9c2c60fdfdd2da02da5ea672 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 11 May 2015 17:48:12 +0800
Subject: crypto: aead - Convert top level interface to new style

This patch converts the top-level aead interface to the new style.
All user-level AEAD interface code have been moved into crypto/aead.h.

The allocation/free functions have switched over to the new way of
allocating tfms.

This patch also removes the double indrection on setkey so the
indirection now exists only at the alg level.

Apart from these there are no user-visible changes.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 442 +------------------------------------------------
 1 file changed, 1 insertion(+), 441 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index ee14140f8893..59ca4086ce6a 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -140,6 +140,7 @@ struct crypto_blkcipher;
 struct crypto_hash;
 struct crypto_tfm;
 struct crypto_type;
+struct aead_request;
 struct aead_givcrypt_request;
 struct skcipher_givcrypt_request;
 
@@ -174,32 +175,6 @@ struct ablkcipher_request {
 	void *__ctx[] CRYPTO_MINALIGN_ATTR;
 };
 
-/**
- *	struct aead_request - AEAD request
- *	@base: Common attributes for async crypto requests
- *	@assoclen: Length in bytes of associated data for authentication
- *	@cryptlen: Length of data to be encrypted or decrypted
- *	@iv: Initialisation vector
- *	@assoc: Associated data
- *	@src: Source data
- *	@dst: Destination data
- *	@__ctx: Start of private context data
- */
-struct aead_request {
-	struct crypto_async_request base;
-
-	unsigned int assoclen;
-	unsigned int cryptlen;
-
-	u8 *iv;
-
-	struct scatterlist *assoc;
-	struct scatterlist *src;
-	struct scatterlist *dst;
-
-	void *__ctx[] CRYPTO_MINALIGN_ATTR;
-};
-
 struct blkcipher_desc {
 	struct crypto_blkcipher *tfm;
 	void *info;
@@ -572,21 +547,6 @@ struct ablkcipher_tfm {
 	unsigned int reqsize;
 };
 
-struct aead_tfm {
-	int (*setkey)(struct crypto_aead *tfm, const u8 *key,
-	              unsigned int keylen);
-	int (*encrypt)(struct aead_request *req);
-	int (*decrypt)(struct aead_request *req);
-	int (*givencrypt)(struct aead_givcrypt_request *req);
-	int (*givdecrypt)(struct aead_givcrypt_request *req);
-
-	struct crypto_aead *base;
-
-	unsigned int ivsize;
-	unsigned int authsize;
-	unsigned int reqsize;
-};
-
 struct blkcipher_tfm {
 	void *iv;
 	int (*setkey)(struct crypto_tfm *tfm, const u8 *key,
@@ -626,7 +586,6 @@ struct compress_tfm {
 };
 
 #define crt_ablkcipher	crt_u.ablkcipher
-#define crt_aead	crt_u.aead
 #define crt_blkcipher	crt_u.blkcipher
 #define crt_cipher	crt_u.cipher
 #define crt_hash	crt_u.hash
@@ -638,7 +597,6 @@ struct crypto_tfm {
 	
 	union {
 		struct ablkcipher_tfm ablkcipher;
-		struct aead_tfm aead;
 		struct blkcipher_tfm blkcipher;
 		struct cipher_tfm cipher;
 		struct hash_tfm hash;
@@ -656,10 +614,6 @@ struct crypto_ablkcipher {
 	struct crypto_tfm base;
 };
 
-struct crypto_aead {
-	struct crypto_tfm base;
-};
-
 struct crypto_blkcipher {
 	struct crypto_tfm base;
 };
@@ -1151,400 +1105,6 @@ static inline void ablkcipher_request_set_crypt(
 	req->info = iv;
 }
 
-/**
- * DOC: Authenticated Encryption With Associated Data (AEAD) Cipher API
- *
- * The AEAD cipher API is used with the ciphers of type CRYPTO_ALG_TYPE_AEAD
- * (listed as type "aead" in /proc/crypto)
- *
- * The most prominent examples for this type of encryption is GCM and CCM.
- * However, the kernel supports other types of AEAD ciphers which are defined
- * with the following cipher string:
- *
- *	authenc(keyed message digest, block cipher)
- *
- * For example: authenc(hmac(sha256), cbc(aes))
- *
- * The example code provided for the asynchronous block cipher operation
- * applies here as well. Naturally all *ablkcipher* symbols must be exchanged
- * the *aead* pendants discussed in the following. In addtion, for the AEAD
- * operation, the aead_request_set_assoc function must be used to set the
- * pointer to the associated data memory location before performing the
- * encryption or decryption operation. In case of an encryption, the associated
- * data memory is filled during the encryption operation. For decryption, the
- * associated data memory must contain data that is used to verify the integrity
- * of the decrypted data. Another deviation from the asynchronous block cipher
- * operation is that the caller should explicitly check for -EBADMSG of the
- * crypto_aead_decrypt. That error indicates an authentication error, i.e.
- * a breach in the integrity of the message. In essence, that -EBADMSG error
- * code is the key bonus an AEAD cipher has over "standard" block chaining
- * modes.
- */
-
-static inline struct crypto_aead *__crypto_aead_cast(struct crypto_tfm *tfm)
-{
-	return (struct crypto_aead *)tfm;
-}
-
-/**
- * crypto_alloc_aead() - allocate AEAD cipher handle
- * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
- *	     AEAD cipher
- * @type: specifies the type of the cipher
- * @mask: specifies the mask for the cipher
- *
- * Allocate a cipher handle for an AEAD. The returned struct
- * crypto_aead is the cipher handle that is required for any subsequent
- * API invocation for that AEAD.
- *
- * Return: allocated cipher handle in case of success; IS_ERR() is true in case
- *	   of an error, PTR_ERR() returns the error code.
- */
-struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask);
-
-static inline struct crypto_tfm *crypto_aead_tfm(struct crypto_aead *tfm)
-{
-	return &tfm->base;
-}
-
-/**
- * crypto_free_aead() - zeroize and free aead handle
- * @tfm: cipher handle to be freed
- */
-static inline void crypto_free_aead(struct crypto_aead *tfm)
-{
-	crypto_free_tfm(crypto_aead_tfm(tfm));
-}
-
-static inline struct aead_tfm *crypto_aead_crt(struct crypto_aead *tfm)
-{
-	return &crypto_aead_tfm(tfm)->crt_aead;
-}
-
-/**
- * crypto_aead_ivsize() - obtain IV size
- * @tfm: cipher handle
- *
- * The size of the IV for the aead referenced by the cipher handle is
- * returned. This IV size may be zero if the cipher does not need an IV.
- *
- * Return: IV size in bytes
- */
-static inline unsigned int crypto_aead_ivsize(struct crypto_aead *tfm)
-{
-	return crypto_aead_crt(tfm)->ivsize;
-}
-
-/**
- * crypto_aead_authsize() - obtain maximum authentication data size
- * @tfm: cipher handle
- *
- * The maximum size of the authentication data for the AEAD cipher referenced
- * by the AEAD cipher handle is returned. The authentication data size may be
- * zero if the cipher implements a hard-coded maximum.
- *
- * The authentication data may also be known as "tag value".
- *
- * Return: authentication data size / tag size in bytes
- */
-static inline unsigned int crypto_aead_authsize(struct crypto_aead *tfm)
-{
-	return crypto_aead_crt(tfm)->authsize;
-}
-
-/**
- * crypto_aead_blocksize() - obtain block size of cipher
- * @tfm: cipher handle
- *
- * The block size for the AEAD referenced with the cipher handle is returned.
- * The caller may use that information to allocate appropriate memory for the
- * data returned by the encryption or decryption operation
- *
- * Return: block size of cipher
- */
-static inline unsigned int crypto_aead_blocksize(struct crypto_aead *tfm)
-{
-	return crypto_tfm_alg_blocksize(crypto_aead_tfm(tfm));
-}
-
-static inline unsigned int crypto_aead_alignmask(struct crypto_aead *tfm)
-{
-	return crypto_tfm_alg_alignmask(crypto_aead_tfm(tfm));
-}
-
-static inline u32 crypto_aead_get_flags(struct crypto_aead *tfm)
-{
-	return crypto_tfm_get_flags(crypto_aead_tfm(tfm));
-}
-
-static inline void crypto_aead_set_flags(struct crypto_aead *tfm, u32 flags)
-{
-	crypto_tfm_set_flags(crypto_aead_tfm(tfm), flags);
-}
-
-static inline void crypto_aead_clear_flags(struct crypto_aead *tfm, u32 flags)
-{
-	crypto_tfm_clear_flags(crypto_aead_tfm(tfm), flags);
-}
-
-/**
- * crypto_aead_setkey() - set key for cipher
- * @tfm: cipher handle
- * @key: buffer holding the key
- * @keylen: length of the key in bytes
- *
- * The caller provided key is set for the AEAD referenced by the cipher
- * handle.
- *
- * Note, the key length determines the cipher type. Many block ciphers implement
- * different cipher modes depending on the key size, such as AES-128 vs AES-192
- * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128
- * is performed.
- *
- * Return: 0 if the setting of the key was successful; < 0 if an error occurred
- */
-static inline int crypto_aead_setkey(struct crypto_aead *tfm, const u8 *key,
-				     unsigned int keylen)
-{
-	struct aead_tfm *crt = crypto_aead_crt(tfm);
-
-	return crt->setkey(crt->base, key, keylen);
-}
-
-/**
- * crypto_aead_setauthsize() - set authentication data size
- * @tfm: cipher handle
- * @authsize: size of the authentication data / tag in bytes
- *
- * Set the authentication data size / tag size. AEAD requires an authentication
- * tag (or MAC) in addition to the associated data.
- *
- * Return: 0 if the setting of the key was successful; < 0 if an error occurred
- */
-int crypto_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize);
-
-static inline struct crypto_aead *crypto_aead_reqtfm(struct aead_request *req)
-{
-	return __crypto_aead_cast(req->base.tfm);
-}
-
-/**
- * crypto_aead_encrypt() - encrypt plaintext
- * @req: reference to the aead_request handle that holds all information
- *	 needed to perform the cipher operation
- *
- * Encrypt plaintext data using the aead_request handle. That data structure
- * and how it is filled with data is discussed with the aead_request_*
- * functions.
- *
- * IMPORTANT NOTE The encryption operation creates the authentication data /
- *		  tag. That data is concatenated with the created ciphertext.
- *		  The ciphertext memory size is therefore the given number of
- *		  block cipher blocks + the size defined by the
- *		  crypto_aead_setauthsize invocation. The caller must ensure
- *		  that sufficient memory is available for the ciphertext and
- *		  the authentication tag.
- *
- * Return: 0 if the cipher operation was successful; < 0 if an error occurred
- */
-static inline int crypto_aead_encrypt(struct aead_request *req)
-{
-	return crypto_aead_crt(crypto_aead_reqtfm(req))->encrypt(req);
-}
-
-/**
- * crypto_aead_decrypt() - decrypt ciphertext
- * @req: reference to the ablkcipher_request handle that holds all information
- *	 needed to perform the cipher operation
- *
- * Decrypt ciphertext data using the aead_request handle. That data structure
- * and how it is filled with data is discussed with the aead_request_*
- * functions.
- *
- * IMPORTANT NOTE The caller must concatenate the ciphertext followed by the
- *		  authentication data / tag. That authentication data / tag
- *		  must have the size defined by the crypto_aead_setauthsize
- *		  invocation.
- *
- *
- * Return: 0 if the cipher operation was successful; -EBADMSG: The AEAD
- *	   cipher operation performs the authentication of the data during the
- *	   decryption operation. Therefore, the function returns this error if
- *	   the authentication of the ciphertext was unsuccessful (i.e. the
- *	   integrity of the ciphertext or the associated data was violated);
- *	   < 0 if an error occurred.
- */
-static inline int crypto_aead_decrypt(struct aead_request *req)
-{
-	if (req->cryptlen < crypto_aead_authsize(crypto_aead_reqtfm(req)))
-		return -EINVAL;
-
-	return crypto_aead_crt(crypto_aead_reqtfm(req))->decrypt(req);
-}
-
-/**
- * DOC: Asynchronous AEAD Request Handle
- *
- * The aead_request data structure contains all pointers to data required for
- * the AEAD cipher operation. This includes the cipher handle (which can be
- * used by multiple aead_request instances), pointer to plaintext and
- * ciphertext, asynchronous callback function, etc. It acts as a handle to the
- * aead_request_* API calls in a similar way as AEAD handle to the
- * crypto_aead_* API calls.
- */
-
-/**
- * crypto_aead_reqsize() - obtain size of the request data structure
- * @tfm: cipher handle
- *
- * Return: number of bytes
- */
-static inline unsigned int crypto_aead_reqsize(struct crypto_aead *tfm)
-{
-	return crypto_aead_crt(tfm)->reqsize;
-}
-
-/**
- * aead_request_set_tfm() - update cipher handle reference in request
- * @req: request handle to be modified
- * @tfm: cipher handle that shall be added to the request handle
- *
- * Allow the caller to replace the existing aead handle in the request
- * data structure with a different one.
- */
-static inline void aead_request_set_tfm(struct aead_request *req,
-					struct crypto_aead *tfm)
-{
-	req->base.tfm = crypto_aead_tfm(crypto_aead_crt(tfm)->base);
-}
-
-/**
- * aead_request_alloc() - allocate request data structure
- * @tfm: cipher handle to be registered with the request
- * @gfp: memory allocation flag that is handed to kmalloc by the API call.
- *
- * Allocate the request data structure that must be used with the AEAD
- * encrypt and decrypt API calls. During the allocation, the provided aead
- * handle is registered in the request data structure.
- *
- * Return: allocated request handle in case of success; IS_ERR() is true in case
- *	   of an error, PTR_ERR() returns the error code.
- */
-static inline struct aead_request *aead_request_alloc(struct crypto_aead *tfm,
-						      gfp_t gfp)
-{
-	struct aead_request *req;
-
-	req = kmalloc(sizeof(*req) + crypto_aead_reqsize(tfm), gfp);
-
-	if (likely(req))
-		aead_request_set_tfm(req, tfm);
-
-	return req;
-}
-
-/**
- * aead_request_free() - zeroize and free request data structure
- * @req: request data structure cipher handle to be freed
- */
-static inline void aead_request_free(struct aead_request *req)
-{
-	kzfree(req);
-}
-
-/**
- * aead_request_set_callback() - set asynchronous callback function
- * @req: request handle
- * @flags: specify zero or an ORing of the flags
- *	   CRYPTO_TFM_REQ_MAY_BACKLOG the request queue may back log and
- *	   increase the wait queue beyond the initial maximum size;
- *	   CRYPTO_TFM_REQ_MAY_SLEEP the request processing may sleep
- * @compl: callback function pointer to be registered with the request handle
- * @data: The data pointer refers to memory that is not used by the kernel
- *	  crypto API, but provided to the callback function for it to use. Here,
- *	  the caller can provide a reference to memory the callback function can
- *	  operate on. As the callback function is invoked asynchronously to the
- *	  related functionality, it may need to access data structures of the
- *	  related functionality which can be referenced using this pointer. The
- *	  callback function can access the memory via the "data" field in the
- *	  crypto_async_request data structure provided to the callback function.
- *
- * Setting the callback function that is triggered once the cipher operation
- * completes
- *
- * The callback function is registered with the aead_request handle and
- * must comply with the following template
- *
- *	void callback_function(struct crypto_async_request *req, int error)
- */
-static inline void aead_request_set_callback(struct aead_request *req,
-					     u32 flags,
-					     crypto_completion_t compl,
-					     void *data)
-{
-	req->base.complete = compl;
-	req->base.data = data;
-	req->base.flags = flags;
-}
-
-/**
- * aead_request_set_crypt - set data buffers
- * @req: request handle
- * @src: source scatter / gather list
- * @dst: destination scatter / gather list
- * @cryptlen: number of bytes to process from @src
- * @iv: IV for the cipher operation which must comply with the IV size defined
- *      by crypto_aead_ivsize()
- *
- * Setting the source data and destination data scatter / gather lists.
- *
- * For encryption, the source is treated as the plaintext and the
- * destination is the ciphertext. For a decryption operation, the use is
- * reversed - the source is the ciphertext and the destination is the plaintext.
- *
- * IMPORTANT NOTE AEAD requires an authentication tag (MAC). For decryption,
- *		  the caller must concatenate the ciphertext followed by the
- *		  authentication tag and provide the entire data stream to the
- *		  decryption operation (i.e. the data length used for the
- *		  initialization of the scatterlist and the data length for the
- *		  decryption operation is identical). For encryption, however,
- *		  the authentication tag is created while encrypting the data.
- *		  The destination buffer must hold sufficient space for the
- *		  ciphertext and the authentication tag while the encryption
- *		  invocation must only point to the plaintext data size. The
- *		  following code snippet illustrates the memory usage
- *		  buffer = kmalloc(ptbuflen + (enc ? authsize : 0));
- *		  sg_init_one(&sg, buffer, ptbuflen + (enc ? authsize : 0));
- *		  aead_request_set_crypt(req, &sg, &sg, ptbuflen, iv);
- */
-static inline void aead_request_set_crypt(struct aead_request *req,
-					  struct scatterlist *src,
-					  struct scatterlist *dst,
-					  unsigned int cryptlen, u8 *iv)
-{
-	req->src = src;
-	req->dst = dst;
-	req->cryptlen = cryptlen;
-	req->iv = iv;
-}
-
-/**
- * aead_request_set_assoc() - set the associated data scatter / gather list
- * @req: request handle
- * @assoc: associated data scatter / gather list
- * @assoclen: number of bytes to process from @assoc
- *
- * For encryption, the memory is filled with the associated data. For
- * decryption, the memory must point to the associated data.
- */
-static inline void aead_request_set_assoc(struct aead_request *req,
-					  struct scatterlist *assoc,
-					  unsigned int assoclen)
-{
-	req->assoc = assoc;
-	req->assoclen = assoclen;
-}
-
 /**
  * DOC: Synchronous Block Cipher API
  *
-- 
cgit v1.2.3


From a2029240e5836e73ebcc1a8ddb8c22d636f89c9a Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Mon, 11 May 2015 21:17:53 +0200
Subject: net: deinline netif_tx_stop_all_queues(), remove WARN_ON in
 netif_tx_stop_queue()

These functions compile to 60 bytes of machine code each.
With this .config: http://busybox.net/~vda/kernel_config
there are 617 calls of netif_tx_stop_queue()
and 49 calls of netif_tx_stop_all_queues() in vmlinux.

To fix this, remove WARN_ON in netif_tx_stop_queue()
as suggested by davem, and deinline netif_tx_stop_all_queues().

Change in code size is about 20k:

   text      data      bss       dec     hex filename
82426986 22255416 20627456 125309858 77813a2 vmlinux.before
82406248 22255416 20627456 125289120 777c2a0 vmlinux

gcc-4.7.2 still creates deinlined version of netif_tx_stop_queue
sometimes:

$ nm --size-sort vmlinux | grep netif_tx_stop_queue | wc -l
190

ffffffff81b558a8 <netif_tx_stop_queue>:
ffffffff81b558a8:       55                      push   %rbp
ffffffff81b558a9:       48 89 e5                mov    %rsp,%rbp
ffffffff81b558ac:       f0 80 8f e0 01 00 00    lock orb $0x1,0x1e0(%rdi)
ffffffff81b558b3:       01
ffffffff81b558b4:       5d                      pop    %rbp
ffffffff81b558b5:       c3                      retq

This needs additional fixing.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Alexei Starovoitov <alexei.starovoitov@gmail.com>
CC: Alexander Duyck <alexander.duyck@gmail.com>
CC: Joe Perches <joe@perches.com>
CC: David S. Miller <davem@davemloft.net>
CC: Jiri Pirko <jpirko@redhat.com>
CC: linux-kernel@vger.kernel.org
CC: netdev@vger.kernel.org
CC: netfilter-devel@vger.kernel.org
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2b39235b9f13..fa57915f440c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2559,10 +2559,6 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev)
 
 static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
 {
-	if (WARN_ON(!dev_queue)) {
-		pr_info("netif_stop_queue() cannot be called before register_netdev()\n");
-		return;
-	}
 	set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
 }
 
@@ -2578,15 +2574,7 @@ static inline void netif_stop_queue(struct net_device *dev)
 	netif_tx_stop_queue(netdev_get_tx_queue(dev, 0));
 }
 
-static inline void netif_tx_stop_all_queues(struct net_device *dev)
-{
-	unsigned int i;
-
-	for (i = 0; i < dev->num_tx_queues; i++) {
-		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		netif_tx_stop_queue(txq);
-	}
-}
+void netif_tx_stop_all_queues(struct net_device *dev);
 
 static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue)
 {
-- 
cgit v1.2.3


From cffc642d93f9324a06dfbd7da9af29652952a248 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Mon, 11 May 2015 22:22:44 -0700
Subject: test_bpf: add 173 new testcases for eBPF

add an exhaustive set of eBPF tests bringing total to:
test_bpf: Summary: 233 PASSED, 0 FAILED, [0/226 JIT'ed]

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 3c03a6085b82..ce1d72d34382 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -207,6 +207,16 @@ struct bpf_prog_aux;
 		.off   = OFF,					\
 		.imm   = 0 })
 
+/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
+
+#define BPF_STX_XADD(SIZE, DST, SRC, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 /* Memory store, *(uint *) (dst_reg + off16) = imm32 */
 
 #define BPF_ST_MEM(SIZE, DST, OFF, IMM)				\
-- 
cgit v1.2.3


From 9abdffe286c1532a54d5aee31571d3029be4026c Mon Sep 17 00:00:00 2001
From: Sumit Semwal <sumit.semwal@linaro.org>
Date: Tue, 5 May 2015 14:56:15 +0530
Subject: dma-buf: add ref counting for module as exporter

Add reference counting on a kernel module that exports dma-buf and
implements its operations. This prevents the module from being unloaded
while DMABUF file is in use.

The original patch [1] was submitted by Tomasz Stanislawski, but this
is a simpler way to do it.

v3: call module_put() as late as possible, per gregkh's comment.
v2: move owner to struct dma_buf, and use DEFINE_DMA_BUF_EXPORT_INFO
    macro to simplify the change.

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>

[1]: https://lkml.org/lkml/2012/8/8/163
---
 include/linux/dma-buf.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 2f0b431b73e0..f98bd7068d55 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -115,6 +115,8 @@ struct dma_buf_ops {
  * @attachments: list of dma_buf_attachment that denotes all devices attached.
  * @ops: dma_buf_ops associated with this buffer object.
  * @exp_name: name of the exporter; useful for debugging.
+ * @owner: pointer to exporter module; used for refcounting when exporter is a
+ *         kernel module.
  * @list_node: node for dma_buf accounting and debugging.
  * @priv: exporter specific private data for this buffer object.
  * @resv: reservation object linked to this dma-buf
@@ -129,6 +131,7 @@ struct dma_buf {
 	unsigned vmapping_counter;
 	void *vmap_ptr;
 	const char *exp_name;
+	struct module *owner;
 	struct list_head list_node;
 	void *priv;
 	struct reservation_object *resv;
@@ -164,7 +167,8 @@ struct dma_buf_attachment {
 
 /**
  * struct dma_buf_export_info - holds information needed to export a dma_buf
- * @exp_name:	name of the exporting module - useful for debugging.
+ * @exp_name:	name of the exporter - useful for debugging.
+ * @owner:	pointer to exporter module - used for refcounting kernel module
  * @ops:	Attach allocator-defined dma buf ops to the new buffer
  * @size:	Size of the buffer
  * @flags:	mode flags for the file
@@ -176,6 +180,7 @@ struct dma_buf_attachment {
  */
 struct dma_buf_export_info {
 	const char *exp_name;
+	struct module *owner;
 	const struct dma_buf_ops *ops;
 	size_t size;
 	int flags;
@@ -187,7 +192,8 @@ struct dma_buf_export_info {
  * helper macro for exporters; zeros and fills in most common values
  */
 #define DEFINE_DMA_BUF_EXPORT_INFO(a)	\
-	struct dma_buf_export_info a = { .exp_name = KBUILD_MODNAME }
+	struct dma_buf_export_info a = { .exp_name = KBUILD_MODNAME, \
+					 .owner = THIS_MODULE }
 
 /**
  * get_dma_buf - convenience wrapper for get_file.
-- 
cgit v1.2.3


From 25e4fe92a20bbffde87500615250f1d54bfb832f Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Tue, 12 May 2015 20:12:23 +0300
Subject: gpiolib: cleanup chained handler and data

Clean up chained handler and handler data if they were set by
gpiochip_set_chained_irqchip().

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/driver.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 2c1e639f66bd..96a678842cde 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -121,6 +121,7 @@ struct gpio_chip {
 	unsigned int		irq_base;
 	irq_flow_handler_t	irq_handler;
 	unsigned int		irq_default_type;
+	int			irq_parent;
 #endif
 
 #if defined(CONFIG_OF_GPIO)
-- 
cgit v1.2.3


From f05be589ff32e87821b86845625ed3d402d37dc7 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Fri, 10 Apr 2015 12:09:01 +0800
Subject: mfd: axp20x: Add AXP22x PMIC support

Add support for the AXP22x PMIC devices to the existing AXP20x driver.
This includes the AXP221 and AXP223, which are identical except for
the external data bus. Only AXP221 is added for now. AXP223 will be
added after it's Reduced Serial Bus (RSB) interface is supported.

AXP22x defines a new set of registers, power supplies and regulators,
but most of the API is similar to the AXP20x ones.

A new irq chip definition is used, even though the available interrupts
on AXP22x is a subset of those on AXP20x. This is done so the interrupt
numbers match those on the datasheet.

This patch only enables the interrupts, system power-off function, and PEK
sub-device. The regulator driver must first support different variants
before we enable it from the mfd driver.

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
[wens@csie.org: fix interrupts and move regulators to separate patch]
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/axp20x.h | 86 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index dfabd6db7ddf..95568eb798c3 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -14,6 +14,7 @@
 enum {
 	AXP202_ID = 0,
 	AXP209_ID,
+	AXP221_ID,
 	AXP288_ID,
 	NR_AXP20X_VARIANTS,
 };
@@ -45,6 +46,28 @@ enum {
 #define AXP20X_V_LTF_DISCHRG		0x3c
 #define AXP20X_V_HTF_DISCHRG		0x3d
 
+#define AXP22X_PWR_OUT_CTRL1		0x10
+#define AXP22X_PWR_OUT_CTRL2		0x12
+#define AXP22X_PWR_OUT_CTRL3		0x13
+#define AXP22X_DLDO1_V_OUT		0x15
+#define AXP22X_DLDO2_V_OUT		0x16
+#define AXP22X_DLDO3_V_OUT		0x17
+#define AXP22X_DLDO4_V_OUT		0x18
+#define AXP22X_ELDO1_V_OUT		0x19
+#define AXP22X_ELDO2_V_OUT		0x1a
+#define AXP22X_ELDO3_V_OUT		0x1b
+#define AXP22X_DC5LDO_V_OUT		0x1c
+#define AXP22X_DCDC1_V_OUT		0x21
+#define AXP22X_DCDC2_V_OUT		0x22
+#define AXP22X_DCDC3_V_OUT		0x23
+#define AXP22X_DCDC4_V_OUT		0x24
+#define AXP22X_DCDC5_V_OUT		0x25
+#define AXP22X_DCDC23_V_RAMP_CTRL	0x27
+#define AXP22X_ALDO1_V_OUT		0x28
+#define AXP22X_ALDO2_V_OUT		0x29
+#define AXP22X_ALDO3_V_OUT		0x2a
+#define AXP22X_CHRG_CTRL3		0x35
+
 /* Interrupt */
 #define AXP20X_IRQ1_EN			0x40
 #define AXP20X_IRQ2_EN			0x41
@@ -100,6 +123,9 @@ enum {
 #define AXP20X_VBUS_MON			0x8b
 #define AXP20X_OVER_TMP			0x8f
 
+#define AXP22X_PWREN_CTRL1		0x8c
+#define AXP22X_PWREN_CTRL2		0x8d
+
 /* GPIO */
 #define AXP20X_GPIO0_CTRL		0x90
 #define AXP20X_LDO5_V_OUT		0x91
@@ -108,6 +134,11 @@ enum {
 #define AXP20X_GPIO20_SS		0x94
 #define AXP20X_GPIO3_CTRL		0x95
 
+#define AXP22X_LDO_IO0_V_OUT		0x91
+#define AXP22X_LDO_IO1_V_OUT		0x93
+#define AXP22X_GPIO_STATE		0x94
+#define AXP22X_GPIO_PULL_DOWN		0x95
+
 /* Battery */
 #define AXP20X_CHRG_CC_31_24		0xb0
 #define AXP20X_CHRG_CC_23_16		0xb1
@@ -120,6 +151,9 @@ enum {
 #define AXP20X_CC_CTRL			0xb8
 #define AXP20X_FG_RES			0xb9
 
+/* AXP22X specific registers */
+#define AXP22X_BATLOW_THRES1		0xe6
+
 /* AXP288 specific registers */
 #define AXP288_PMIC_ADC_H               0x56
 #define AXP288_PMIC_ADC_L               0x57
@@ -158,6 +192,30 @@ enum {
 	AXP20X_REG_ID_MAX,
 };
 
+enum {
+	AXP22X_DCDC1 = 0,
+	AXP22X_DCDC2,
+	AXP22X_DCDC3,
+	AXP22X_DCDC4,
+	AXP22X_DCDC5,
+	AXP22X_DC1SW,
+	AXP22X_DC5LDO,
+	AXP22X_ALDO1,
+	AXP22X_ALDO2,
+	AXP22X_ALDO3,
+	AXP22X_ELDO1,
+	AXP22X_ELDO2,
+	AXP22X_ELDO3,
+	AXP22X_DLDO1,
+	AXP22X_DLDO2,
+	AXP22X_DLDO3,
+	AXP22X_DLDO4,
+	AXP22X_RTC_LDO,
+	AXP22X_LDO_IO0,
+	AXP22X_LDO_IO1,
+	AXP22X_REG_ID_MAX,
+};
+
 /* IRQs */
 enum {
 	AXP20X_IRQ_ACIN_OVER_V = 1,
@@ -199,6 +257,34 @@ enum {
 	AXP20X_IRQ_GPIO0_INPUT,
 };
 
+enum axp22x_irqs {
+	AXP22X_IRQ_ACIN_OVER_V = 1,
+	AXP22X_IRQ_ACIN_PLUGIN,
+	AXP22X_IRQ_ACIN_REMOVAL,
+	AXP22X_IRQ_VBUS_OVER_V,
+	AXP22X_IRQ_VBUS_PLUGIN,
+	AXP22X_IRQ_VBUS_REMOVAL,
+	AXP22X_IRQ_VBUS_V_LOW,
+	AXP22X_IRQ_BATT_PLUGIN,
+	AXP22X_IRQ_BATT_REMOVAL,
+	AXP22X_IRQ_BATT_ENT_ACT_MODE,
+	AXP22X_IRQ_BATT_EXIT_ACT_MODE,
+	AXP22X_IRQ_CHARG,
+	AXP22X_IRQ_CHARG_DONE,
+	AXP22X_IRQ_BATT_TEMP_HIGH,
+	AXP22X_IRQ_BATT_TEMP_LOW,
+	AXP22X_IRQ_DIE_TEMP_HIGH,
+	AXP22X_IRQ_PEK_SHORT,
+	AXP22X_IRQ_PEK_LONG,
+	AXP22X_IRQ_LOW_PWR_LVL1,
+	AXP22X_IRQ_LOW_PWR_LVL2,
+	AXP22X_IRQ_TIMER,
+	AXP22X_IRQ_PEK_RIS_EDGE,
+	AXP22X_IRQ_PEK_FAL_EDGE,
+	AXP22X_IRQ_GPIO1_INPUT,
+	AXP22X_IRQ_GPIO0_INPUT,
+};
+
 enum axp288_irqs {
 	AXP288_IRQ_VBUS_FALL     = 2,
 	AXP288_IRQ_VBUS_RISE,
-- 
cgit v1.2.3


From 275e2bc0f25d5eb99c99ebb7293fc3722533124b Mon Sep 17 00:00:00 2001
From: Sergey Popovich <popovich_sergei@mail.ua>
Date: Sat, 2 May 2015 19:28:17 +0200
Subject: netfilter: ipset: Fix ext_*() macros

So pointers returned by these macros could be
referenced with -> directly.

Signed-off-by: Sergey Popovich <popovich_sergei@mail.ua>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 34b172301558..f88be7258e5f 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -122,13 +122,13 @@ struct ip_set_skbinfo {
 struct ip_set;
 
 #define ext_timeout(e, s)	\
-(unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT])
+((unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT]))
 #define ext_counter(e, s)	\
-(struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER])
+((struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER]))
 #define ext_comment(e, s)	\
-(struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT])
+((struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT]))
 #define ext_skbinfo(e, s)	\
-(struct ip_set_skbinfo *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_SKBINFO])
+((struct ip_set_skbinfo *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_SKBINFO]))
 
 typedef int (*ipset_adtfn)(struct ip_set *set, void *value,
 			   const struct ip_set_ext *ext,
-- 
cgit v1.2.3


From 289fcff4bcdb1dcc0ce8788b7ea0f58a9e4a495f Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 13 May 2015 15:26:42 +0300
Subject: usb: add bus type for USB ULPI

UTMI+ Low Pin Interface (ULPI) is a commonly used PHY
interface for USB 2.0. The ULPI specification describes a
standard set of registers which the vendors can extend for
their specific needs. ULPI PHYs provide often functions
such as charger detection and ADP sensing and probing.

There are two major issues that the bus type is meant to
tackle:

Firstly, ULPI registers are accessed from the controller.
The bus provides convenient method for the controller
drivers to share that access with the actual PHY drivers.

Secondly, there are already platforms that assume ULPI PHYs
are runtime detected, such as many Intel Baytrail based
platforms. They do not provide any kind of hardware
description for the ULPI PHYs like separate ACPI device
object that could be used to enumerate a device from.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Acked-by: David Cohen <david.a.cohen@linux.intel.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/mod_devicetable.h |   6 ++
 include/linux/ulpi/driver.h     |  60 ++++++++++++++++++
 include/linux/ulpi/interface.h  |  23 +++++++
 include/linux/ulpi/regs.h       | 130 ++++++++++++++++++++++++++++++++++++++
 include/linux/usb/ulpi.h        | 134 +---------------------------------------
 5 files changed, 221 insertions(+), 132 deletions(-)
 create mode 100644 include/linux/ulpi/driver.h
 create mode 100644 include/linux/ulpi/interface.h
 create mode 100644 include/linux/ulpi/regs.h

(limited to 'include/linux')

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 3bfd56778c29..7ab00d61d30a 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -629,4 +629,10 @@ struct mcb_device_id {
 	kernel_ulong_t driver_data;
 };
 
+struct ulpi_device_id {
+	__u16 vendor;
+	__u16 product;
+	kernel_ulong_t driver_data;
+};
+
 #endif /* LINUX_MOD_DEVICETABLE_H */
diff --git a/include/linux/ulpi/driver.h b/include/linux/ulpi/driver.h
new file mode 100644
index 000000000000..388f6e08b9d4
--- /dev/null
+++ b/include/linux/ulpi/driver.h
@@ -0,0 +1,60 @@
+#ifndef __LINUX_ULPI_DRIVER_H
+#define __LINUX_ULPI_DRIVER_H
+
+#include <linux/mod_devicetable.h>
+
+#include <linux/device.h>
+
+struct ulpi_ops;
+
+/**
+ * struct ulpi - describes ULPI PHY device
+ * @id: vendor and product ids for ULPI device
+ * @ops: I/O access
+ * @dev: device interface
+ */
+struct ulpi {
+	struct ulpi_device_id id;
+	struct ulpi_ops *ops;
+	struct device dev;
+};
+
+#define to_ulpi_dev(d) container_of(d, struct ulpi, dev)
+
+static inline void ulpi_set_drvdata(struct ulpi *ulpi, void *data)
+{
+	dev_set_drvdata(&ulpi->dev, data);
+}
+
+static inline void *ulpi_get_drvdata(struct ulpi *ulpi)
+{
+	return dev_get_drvdata(&ulpi->dev);
+}
+
+/**
+ * struct ulpi_driver - describes a ULPI PHY driver
+ * @id_table: array of device identifiers supported by this driver
+ * @probe: binds this driver to ULPI device
+ * @remove: unbinds this driver from ULPI device
+ * @driver: the name and owner members must be initialized by the drivers
+ */
+struct ulpi_driver {
+	const struct ulpi_device_id *id_table;
+	int (*probe)(struct ulpi *ulpi);
+	void (*remove)(struct ulpi *ulpi);
+	struct device_driver driver;
+};
+
+#define to_ulpi_driver(d) container_of(d, struct ulpi_driver, driver)
+
+int ulpi_register_driver(struct ulpi_driver *drv);
+void ulpi_unregister_driver(struct ulpi_driver *drv);
+
+#define module_ulpi_driver(__ulpi_driver) \
+	module_driver(__ulpi_driver, ulpi_register_driver, \
+		      ulpi_unregister_driver)
+
+int ulpi_read(struct ulpi *ulpi, u8 addr);
+int ulpi_write(struct ulpi *ulpi, u8 addr, u8 val);
+
+#endif /* __LINUX_ULPI_DRIVER_H */
diff --git a/include/linux/ulpi/interface.h b/include/linux/ulpi/interface.h
new file mode 100644
index 000000000000..4de8ab491038
--- /dev/null
+++ b/include/linux/ulpi/interface.h
@@ -0,0 +1,23 @@
+#ifndef __LINUX_ULPI_INTERFACE_H
+#define __LINUX_ULPI_INTERFACE_H
+
+#include <linux/types.h>
+
+struct ulpi;
+
+/**
+ * struct ulpi_ops - ULPI register access
+ * @dev: the interface provider
+ * @read: read operation for ULPI register access
+ * @write: write operation for ULPI register access
+ */
+struct ulpi_ops {
+	struct device *dev;
+	int (*read)(struct ulpi_ops *ops, u8 addr);
+	int (*write)(struct ulpi_ops *ops, u8 addr, u8 val);
+};
+
+struct ulpi *ulpi_register_interface(struct device *, struct ulpi_ops *);
+void ulpi_unregister_interface(struct ulpi *);
+
+#endif /* __LINUX_ULPI_INTERFACE_H */
diff --git a/include/linux/ulpi/regs.h b/include/linux/ulpi/regs.h
new file mode 100644
index 000000000000..b5b8b8804560
--- /dev/null
+++ b/include/linux/ulpi/regs.h
@@ -0,0 +1,130 @@
+#ifndef __LINUX_ULPI_REGS_H
+#define __LINUX_ULPI_REGS_H
+
+/*
+ * Macros for Set and Clear
+ * See ULPI 1.1 specification to find the registers with Set and Clear offsets
+ */
+#define ULPI_SET(a)				(a + 1)
+#define ULPI_CLR(a)				(a + 2)
+
+/*
+ * Register Map
+ */
+#define ULPI_VENDOR_ID_LOW			0x00
+#define ULPI_VENDOR_ID_HIGH			0x01
+#define ULPI_PRODUCT_ID_LOW			0x02
+#define ULPI_PRODUCT_ID_HIGH			0x03
+#define ULPI_FUNC_CTRL				0x04
+#define ULPI_IFC_CTRL				0x07
+#define ULPI_OTG_CTRL				0x0a
+#define ULPI_USB_INT_EN_RISE			0x0d
+#define ULPI_USB_INT_EN_FALL			0x10
+#define ULPI_USB_INT_STS			0x13
+#define ULPI_USB_INT_LATCH			0x14
+#define ULPI_DEBUG				0x15
+#define ULPI_SCRATCH				0x16
+/* Optional Carkit Registers */
+#define ULPI_CARKIT_CTRL			0x19
+#define ULPI_CARKIT_INT_DELAY			0x1c
+#define ULPI_CARKIT_INT_EN			0x1d
+#define ULPI_CARKIT_INT_STS			0x20
+#define ULPI_CARKIT_INT_LATCH			0x21
+#define ULPI_CARKIT_PLS_CTRL			0x22
+/* Other Optional Registers */
+#define ULPI_TX_POS_WIDTH			0x25
+#define ULPI_TX_NEG_WIDTH			0x26
+#define ULPI_POLARITY_RECOVERY			0x27
+/* Access Extended Register Set */
+#define ULPI_ACCESS_EXTENDED			0x2f
+/* Vendor Specific */
+#define ULPI_VENDOR_SPECIFIC			0x30
+/* Extended Registers */
+#define ULPI_EXT_VENDOR_SPECIFIC		0x80
+
+/*
+ * Register Bits
+ */
+
+/* Function Control */
+#define ULPI_FUNC_CTRL_XCVRSEL			BIT(0)
+#define  ULPI_FUNC_CTRL_XCVRSEL_MASK		0x3
+#define  ULPI_FUNC_CTRL_HIGH_SPEED		0x0
+#define  ULPI_FUNC_CTRL_FULL_SPEED		0x1
+#define  ULPI_FUNC_CTRL_LOW_SPEED		0x2
+#define  ULPI_FUNC_CTRL_FS4LS			0x3
+#define ULPI_FUNC_CTRL_TERMSELECT		BIT(2)
+#define ULPI_FUNC_CTRL_OPMODE			BIT(3)
+#define  ULPI_FUNC_CTRL_OPMODE_MASK		(0x3 << 3)
+#define  ULPI_FUNC_CTRL_OPMODE_NORMAL		(0x0 << 3)
+#define  ULPI_FUNC_CTRL_OPMODE_NONDRIVING	(0x1 << 3)
+#define  ULPI_FUNC_CTRL_OPMODE_DISABLE_NRZI	(0x2 << 3)
+#define  ULPI_FUNC_CTRL_OPMODE_NOSYNC_NOEOP	(0x3 << 3)
+#define ULPI_FUNC_CTRL_RESET			BIT(5)
+#define ULPI_FUNC_CTRL_SUSPENDM			BIT(6)
+
+/* Interface Control */
+#define ULPI_IFC_CTRL_6_PIN_SERIAL_MODE		BIT(0)
+#define ULPI_IFC_CTRL_3_PIN_SERIAL_MODE		BIT(1)
+#define ULPI_IFC_CTRL_CARKITMODE		BIT(2)
+#define ULPI_IFC_CTRL_CLOCKSUSPENDM		BIT(3)
+#define ULPI_IFC_CTRL_AUTORESUME		BIT(4)
+#define ULPI_IFC_CTRL_EXTERNAL_VBUS		BIT(5)
+#define ULPI_IFC_CTRL_PASSTHRU			BIT(6)
+#define ULPI_IFC_CTRL_PROTECT_IFC_DISABLE	BIT(7)
+
+/* OTG Control */
+#define ULPI_OTG_CTRL_ID_PULLUP			BIT(0)
+#define ULPI_OTG_CTRL_DP_PULLDOWN		BIT(1)
+#define ULPI_OTG_CTRL_DM_PULLDOWN		BIT(2)
+#define ULPI_OTG_CTRL_DISCHRGVBUS		BIT(3)
+#define ULPI_OTG_CTRL_CHRGVBUS			BIT(4)
+#define ULPI_OTG_CTRL_DRVVBUS			BIT(5)
+#define ULPI_OTG_CTRL_DRVVBUS_EXT		BIT(6)
+#define ULPI_OTG_CTRL_EXTVBUSIND		BIT(7)
+
+/* USB Interrupt Enable Rising,
+ * USB Interrupt Enable Falling,
+ * USB Interrupt Status and
+ * USB Interrupt Latch
+ */
+#define ULPI_INT_HOST_DISCONNECT		BIT(0)
+#define ULPI_INT_VBUS_VALID			BIT(1)
+#define ULPI_INT_SESS_VALID			BIT(2)
+#define ULPI_INT_SESS_END			BIT(3)
+#define ULPI_INT_IDGRD				BIT(4)
+
+/* Debug */
+#define ULPI_DEBUG_LINESTATE0			BIT(0)
+#define ULPI_DEBUG_LINESTATE1			BIT(1)
+
+/* Carkit Control */
+#define ULPI_CARKIT_CTRL_CARKITPWR		BIT(0)
+#define ULPI_CARKIT_CTRL_IDGNDDRV		BIT(1)
+#define ULPI_CARKIT_CTRL_TXDEN			BIT(2)
+#define ULPI_CARKIT_CTRL_RXDEN			BIT(3)
+#define ULPI_CARKIT_CTRL_SPKLEFTEN		BIT(4)
+#define ULPI_CARKIT_CTRL_SPKRIGHTEN		BIT(5)
+#define ULPI_CARKIT_CTRL_MICEN			BIT(6)
+
+/* Carkit Interrupt Enable */
+#define ULPI_CARKIT_INT_EN_IDFLOAT_RISE		BIT(0)
+#define ULPI_CARKIT_INT_EN_IDFLOAT_FALL		BIT(1)
+#define ULPI_CARKIT_INT_EN_CARINTDET		BIT(2)
+#define ULPI_CARKIT_INT_EN_DP_RISE		BIT(3)
+#define ULPI_CARKIT_INT_EN_DP_FALL		BIT(4)
+
+/* Carkit Interrupt Status and
+ * Carkit Interrupt Latch
+ */
+#define ULPI_CARKIT_INT_IDFLOAT			BIT(0)
+#define ULPI_CARKIT_INT_CARINTDET		BIT(1)
+#define ULPI_CARKIT_INT_DP			BIT(2)
+
+/* Carkit Pulse Control*/
+#define ULPI_CARKIT_PLS_CTRL_TXPLSEN		BIT(0)
+#define ULPI_CARKIT_PLS_CTRL_RXPLSEN		BIT(1)
+#define ULPI_CARKIT_PLS_CTRL_SPKRLEFT_BIASEN	BIT(2)
+#define ULPI_CARKIT_PLS_CTRL_SPKRRIGHT_BIASEN	BIT(3)
+
+#endif /* __LINUX_ULPI_REGS_H */
diff --git a/include/linux/usb/ulpi.h b/include/linux/usb/ulpi.h
index 5c295c26ad37..5f07407a367a 100644
--- a/include/linux/usb/ulpi.h
+++ b/include/linux/usb/ulpi.h
@@ -12,6 +12,8 @@
 #define __LINUX_USB_ULPI_H
 
 #include <linux/usb/otg.h>
+#include <linux/ulpi/regs.h>
+
 /*-------------------------------------------------------------------------*/
 
 /*
@@ -49,138 +51,6 @@
 
 /*-------------------------------------------------------------------------*/
 
-/*
- * Macros for Set and Clear
- * See ULPI 1.1 specification to find the registers with Set and Clear offsets
- */
-#define ULPI_SET(a)				(a + 1)
-#define ULPI_CLR(a)				(a + 2)
-
-/*-------------------------------------------------------------------------*/
-
-/*
- * Register Map
- */
-#define ULPI_VENDOR_ID_LOW			0x00
-#define ULPI_VENDOR_ID_HIGH			0x01
-#define ULPI_PRODUCT_ID_LOW			0x02
-#define ULPI_PRODUCT_ID_HIGH			0x03
-#define ULPI_FUNC_CTRL				0x04
-#define ULPI_IFC_CTRL				0x07
-#define ULPI_OTG_CTRL				0x0a
-#define ULPI_USB_INT_EN_RISE			0x0d
-#define ULPI_USB_INT_EN_FALL			0x10
-#define ULPI_USB_INT_STS			0x13
-#define ULPI_USB_INT_LATCH			0x14
-#define ULPI_DEBUG				0x15
-#define ULPI_SCRATCH				0x16
-/* Optional Carkit Registers */
-#define ULPI_CARCIT_CTRL			0x19
-#define ULPI_CARCIT_INT_DELAY			0x1c
-#define ULPI_CARCIT_INT_EN			0x1d
-#define ULPI_CARCIT_INT_STS			0x20
-#define ULPI_CARCIT_INT_LATCH			0x21
-#define ULPI_CARCIT_PLS_CTRL			0x22
-/* Other Optional Registers */
-#define ULPI_TX_POS_WIDTH			0x25
-#define ULPI_TX_NEG_WIDTH			0x26
-#define ULPI_POLARITY_RECOVERY			0x27
-/* Access Extended Register Set */
-#define ULPI_ACCESS_EXTENDED			0x2f
-/* Vendor Specific */
-#define ULPI_VENDOR_SPECIFIC			0x30
-/* Extended Registers */
-#define ULPI_EXT_VENDOR_SPECIFIC		0x80
-
-/*-------------------------------------------------------------------------*/
-
-/*
- * Register Bits
- */
-
-/* Function Control */
-#define ULPI_FUNC_CTRL_XCVRSEL			(1 << 0)
-#define  ULPI_FUNC_CTRL_XCVRSEL_MASK		(3 << 0)
-#define  ULPI_FUNC_CTRL_HIGH_SPEED		(0 << 0)
-#define  ULPI_FUNC_CTRL_FULL_SPEED		(1 << 0)
-#define  ULPI_FUNC_CTRL_LOW_SPEED		(2 << 0)
-#define  ULPI_FUNC_CTRL_FS4LS			(3 << 0)
-#define ULPI_FUNC_CTRL_TERMSELECT		(1 << 2)
-#define ULPI_FUNC_CTRL_OPMODE			(1 << 3)
-#define  ULPI_FUNC_CTRL_OPMODE_MASK		(3 << 3)
-#define  ULPI_FUNC_CTRL_OPMODE_NORMAL		(0 << 3)
-#define  ULPI_FUNC_CTRL_OPMODE_NONDRIVING	(1 << 3)
-#define  ULPI_FUNC_CTRL_OPMODE_DISABLE_NRZI	(2 << 3)
-#define  ULPI_FUNC_CTRL_OPMODE_NOSYNC_NOEOP	(3 << 3)
-#define ULPI_FUNC_CTRL_RESET			(1 << 5)
-#define ULPI_FUNC_CTRL_SUSPENDM			(1 << 6)
-
-/* Interface Control */
-#define ULPI_IFC_CTRL_6_PIN_SERIAL_MODE		(1 << 0)
-#define ULPI_IFC_CTRL_3_PIN_SERIAL_MODE		(1 << 1)
-#define ULPI_IFC_CTRL_CARKITMODE		(1 << 2)
-#define ULPI_IFC_CTRL_CLOCKSUSPENDM		(1 << 3)
-#define ULPI_IFC_CTRL_AUTORESUME		(1 << 4)
-#define ULPI_IFC_CTRL_EXTERNAL_VBUS		(1 << 5)
-#define ULPI_IFC_CTRL_PASSTHRU			(1 << 6)
-#define ULPI_IFC_CTRL_PROTECT_IFC_DISABLE	(1 << 7)
-
-/* OTG Control */
-#define ULPI_OTG_CTRL_ID_PULLUP			(1 << 0)
-#define ULPI_OTG_CTRL_DP_PULLDOWN		(1 << 1)
-#define ULPI_OTG_CTRL_DM_PULLDOWN		(1 << 2)
-#define ULPI_OTG_CTRL_DISCHRGVBUS		(1 << 3)
-#define ULPI_OTG_CTRL_CHRGVBUS			(1 << 4)
-#define ULPI_OTG_CTRL_DRVVBUS			(1 << 5)
-#define ULPI_OTG_CTRL_DRVVBUS_EXT		(1 << 6)
-#define ULPI_OTG_CTRL_EXTVBUSIND		(1 << 7)
-
-/* USB Interrupt Enable Rising,
- * USB Interrupt Enable Falling,
- * USB Interrupt Status and
- * USB Interrupt Latch
- */
-#define ULPI_INT_HOST_DISCONNECT		(1 << 0)
-#define ULPI_INT_VBUS_VALID			(1 << 1)
-#define ULPI_INT_SESS_VALID			(1 << 2)
-#define ULPI_INT_SESS_END			(1 << 3)
-#define ULPI_INT_IDGRD				(1 << 4)
-
-/* Debug */
-#define ULPI_DEBUG_LINESTATE0			(1 << 0)
-#define ULPI_DEBUG_LINESTATE1			(1 << 1)
-
-/* Carkit Control */
-#define ULPI_CARKIT_CTRL_CARKITPWR		(1 << 0)
-#define ULPI_CARKIT_CTRL_IDGNDDRV		(1 << 1)
-#define ULPI_CARKIT_CTRL_TXDEN			(1 << 2)
-#define ULPI_CARKIT_CTRL_RXDEN			(1 << 3)
-#define ULPI_CARKIT_CTRL_SPKLEFTEN		(1 << 4)
-#define ULPI_CARKIT_CTRL_SPKRIGHTEN		(1 << 5)
-#define ULPI_CARKIT_CTRL_MICEN			(1 << 6)
-
-/* Carkit Interrupt Enable */
-#define ULPI_CARKIT_INT_EN_IDFLOAT_RISE		(1 << 0)
-#define ULPI_CARKIT_INT_EN_IDFLOAT_FALL		(1 << 1)
-#define ULPI_CARKIT_INT_EN_CARINTDET		(1 << 2)
-#define ULPI_CARKIT_INT_EN_DP_RISE		(1 << 3)
-#define ULPI_CARKIT_INT_EN_DP_FALL		(1 << 4)
-
-/* Carkit Interrupt Status and
- * Carkit Interrupt Latch
- */
-#define ULPI_CARKIT_INT_IDFLOAT			(1 << 0)
-#define ULPI_CARKIT_INT_CARINTDET		(1 << 1)
-#define ULPI_CARKIT_INT_DP			(1 << 2)
-
-/* Carkit Pulse Control*/
-#define ULPI_CARKIT_PLS_CTRL_TXPLSEN		(1 << 0)
-#define ULPI_CARKIT_PLS_CTRL_RXPLSEN		(1 << 1)
-#define ULPI_CARKIT_PLS_CTRL_SPKRLEFT_BIASEN	(1 << 2)
-#define ULPI_CARKIT_PLS_CTRL_SPKRRIGHT_BIASEN	(1 << 3)
-
-/*-------------------------------------------------------------------------*/
-
 #if IS_ENABLED(CONFIG_USB_ULPI)
 struct usb_phy *otg_ulpi_create(struct usb_phy_io_ops *ops,
 					unsigned int flags);
-- 
cgit v1.2.3


From f267caab44451baa70d25fa3191a68bb79ad1b08 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 13 May 2015 14:03:51 -0400
Subject: tracing: Remove unused prototype ftrace_event_define_field()

ftrace_event_define_field() has a prototype defined but never used.
Remove it.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 65ce6de91307..f8465d65f3c7 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -219,10 +219,6 @@ struct ftrace_event_class {
 extern int ftrace_event_reg(struct ftrace_event_call *event,
 			    enum trace_reg type, void *data);
 
-int ftrace_event_define_field(struct ftrace_event_call *call,
-			      char *type, int len, char *item, int offset,
-			      int field_size, int sign, int filter);
-
 struct ftrace_event_buffer {
 	struct ring_buffer		*buffer;
 	struct ring_buffer_event	*event;
@@ -238,10 +234,6 @@ void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer,
 
 void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer);
 
-int ftrace_event_define_field(struct ftrace_event_call *call,
-			      char *type, int len, char *item, int offset,
-			      int field_size, int sign, int filter);
-
 enum {
 	TRACE_EVENT_FL_FILTERED_BIT,
 	TRACE_EVENT_FL_CAP_ANY_BIT,
-- 
cgit v1.2.3


From af658dca221207174fc0a7bcdcd4cff7c589fdd8 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 29 Apr 2015 14:36:05 -0400
Subject: tracing: Rename ftrace_event.h to trace_events.h

The term "ftrace" is really the infrastructure of the function hooks,
and not the trace events. Rename ftrace_event.h to trace_events.h to
represent the trace_event infrastructure and decouple the term ftrace
from it.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 616 -------------------------------------------
 include/linux/trace_events.h | 616 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 616 insertions(+), 616 deletions(-)
 delete mode 100644 include/linux/ftrace_event.h
 create mode 100644 include/linux/trace_events.h

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
deleted file mode 100644
index f8465d65f3c7..000000000000
--- a/include/linux/ftrace_event.h
+++ /dev/null
@@ -1,616 +0,0 @@
-
-#ifndef _LINUX_FTRACE_EVENT_H
-#define _LINUX_FTRACE_EVENT_H
-
-#include <linux/ring_buffer.h>
-#include <linux/trace_seq.h>
-#include <linux/percpu.h>
-#include <linux/hardirq.h>
-#include <linux/perf_event.h>
-#include <linux/tracepoint.h>
-
-struct trace_array;
-struct trace_buffer;
-struct tracer;
-struct dentry;
-struct bpf_prog;
-
-struct trace_print_flags {
-	unsigned long		mask;
-	const char		*name;
-};
-
-struct trace_print_flags_u64 {
-	unsigned long long	mask;
-	const char		*name;
-};
-
-const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
-				   unsigned long flags,
-				   const struct trace_print_flags *flag_array);
-
-const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
-				     const struct trace_print_flags *symbol_array);
-
-#if BITS_PER_LONG == 32
-const char *ftrace_print_symbols_seq_u64(struct trace_seq *p,
-					 unsigned long long val,
-					 const struct trace_print_flags_u64
-								 *symbol_array);
-#endif
-
-const char *ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
-				     unsigned int bitmask_size);
-
-const char *ftrace_print_hex_seq(struct trace_seq *p,
-				 const unsigned char *buf, int len);
-
-const char *ftrace_print_array_seq(struct trace_seq *p,
-				   const void *buf, int count,
-				   size_t el_size);
-
-struct trace_iterator;
-struct trace_event;
-
-int ftrace_raw_output_prep(struct trace_iterator *iter,
-			   struct trace_event *event);
-
-/*
- * The trace entry - the most basic unit of tracing. This is what
- * is printed in the end as a single line in the trace output, such as:
- *
- *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
- */
-struct trace_entry {
-	unsigned short		type;
-	unsigned char		flags;
-	unsigned char		preempt_count;
-	int			pid;
-};
-
-#define FTRACE_MAX_EVENT						\
-	((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1)
-
-/*
- * Trace iterator - used by printout routines who present trace
- * results to users and which routines might sleep, etc:
- */
-struct trace_iterator {
-	struct trace_array	*tr;
-	struct tracer		*trace;
-	struct trace_buffer	*trace_buffer;
-	void			*private;
-	int			cpu_file;
-	struct mutex		mutex;
-	struct ring_buffer_iter	**buffer_iter;
-	unsigned long		iter_flags;
-
-	/* trace_seq for __print_flags() and __print_symbolic() etc. */
-	struct trace_seq	tmp_seq;
-
-	cpumask_var_t		started;
-
-	/* it's true when current open file is snapshot */
-	bool			snapshot;
-
-	/* The below is zeroed out in pipe_read */
-	struct trace_seq	seq;
-	struct trace_entry	*ent;
-	unsigned long		lost_events;
-	int			leftover;
-	int			ent_size;
-	int			cpu;
-	u64			ts;
-
-	loff_t			pos;
-	long			idx;
-
-	/* All new field here will be zeroed out in pipe_read */
-};
-
-enum trace_iter_flags {
-	TRACE_FILE_LAT_FMT	= 1,
-	TRACE_FILE_ANNOTATE	= 2,
-	TRACE_FILE_TIME_IN_NS	= 4,
-};
-
-
-typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
-				      int flags, struct trace_event *event);
-
-struct trace_event_functions {
-	trace_print_func	trace;
-	trace_print_func	raw;
-	trace_print_func	hex;
-	trace_print_func	binary;
-};
-
-struct trace_event {
-	struct hlist_node		node;
-	struct list_head		list;
-	int				type;
-	struct trace_event_functions	*funcs;
-};
-
-extern int register_ftrace_event(struct trace_event *event);
-extern int unregister_ftrace_event(struct trace_event *event);
-
-/* Return values for print_line callback */
-enum print_line_t {
-	TRACE_TYPE_PARTIAL_LINE	= 0,	/* Retry after flushing the seq */
-	TRACE_TYPE_HANDLED	= 1,
-	TRACE_TYPE_UNHANDLED	= 2,	/* Relay to other output functions */
-	TRACE_TYPE_NO_CONSUME	= 3	/* Handled but ask to not consume */
-};
-
-/*
- * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
- * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
- * simplifies those functions and keeps them in sync.
- */
-static inline enum print_line_t trace_handle_return(struct trace_seq *s)
-{
-	return trace_seq_has_overflowed(s) ?
-		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
-}
-
-void tracing_generic_entry_update(struct trace_entry *entry,
-				  unsigned long flags,
-				  int pc);
-struct ftrace_event_file;
-
-struct ring_buffer_event *
-trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer,
-				struct ftrace_event_file *ftrace_file,
-				int type, unsigned long len,
-				unsigned long flags, int pc);
-struct ring_buffer_event *
-trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer,
-				  int type, unsigned long len,
-				  unsigned long flags, int pc);
-void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
-					struct ring_buffer_event *event,
-					unsigned long flags, int pc);
-void trace_buffer_unlock_commit(struct ring_buffer *buffer,
-				struct ring_buffer_event *event,
-				unsigned long flags, int pc);
-void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
-				     struct ring_buffer_event *event,
-				     unsigned long flags, int pc,
-				     struct pt_regs *regs);
-void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
-					 struct ring_buffer_event *event);
-
-void tracing_record_cmdline(struct task_struct *tsk);
-
-int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...);
-
-struct event_filter;
-
-enum trace_reg {
-	TRACE_REG_REGISTER,
-	TRACE_REG_UNREGISTER,
-#ifdef CONFIG_PERF_EVENTS
-	TRACE_REG_PERF_REGISTER,
-	TRACE_REG_PERF_UNREGISTER,
-	TRACE_REG_PERF_OPEN,
-	TRACE_REG_PERF_CLOSE,
-	TRACE_REG_PERF_ADD,
-	TRACE_REG_PERF_DEL,
-#endif
-};
-
-struct ftrace_event_call;
-
-struct ftrace_event_class {
-	const char		*system;
-	void			*probe;
-#ifdef CONFIG_PERF_EVENTS
-	void			*perf_probe;
-#endif
-	int			(*reg)(struct ftrace_event_call *event,
-				       enum trace_reg type, void *data);
-	int			(*define_fields)(struct ftrace_event_call *);
-	struct list_head	*(*get_fields)(struct ftrace_event_call *);
-	struct list_head	fields;
-	int			(*raw_init)(struct ftrace_event_call *);
-};
-
-extern int ftrace_event_reg(struct ftrace_event_call *event,
-			    enum trace_reg type, void *data);
-
-struct ftrace_event_buffer {
-	struct ring_buffer		*buffer;
-	struct ring_buffer_event	*event;
-	struct ftrace_event_file	*ftrace_file;
-	void				*entry;
-	unsigned long			flags;
-	int				pc;
-};
-
-void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer,
-				  struct ftrace_event_file *ftrace_file,
-				  unsigned long len);
-
-void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer);
-
-enum {
-	TRACE_EVENT_FL_FILTERED_BIT,
-	TRACE_EVENT_FL_CAP_ANY_BIT,
-	TRACE_EVENT_FL_NO_SET_FILTER_BIT,
-	TRACE_EVENT_FL_IGNORE_ENABLE_BIT,
-	TRACE_EVENT_FL_WAS_ENABLED_BIT,
-	TRACE_EVENT_FL_USE_CALL_FILTER_BIT,
-	TRACE_EVENT_FL_TRACEPOINT_BIT,
-	TRACE_EVENT_FL_KPROBE_BIT,
-};
-
-/*
- * Event flags:
- *  FILTERED	  - The event has a filter attached
- *  CAP_ANY	  - Any user can enable for perf
- *  NO_SET_FILTER - Set when filter has error and is to be ignored
- *  IGNORE_ENABLE - For ftrace internal events, do not enable with debugfs file
- *  WAS_ENABLED   - Set and stays set when an event was ever enabled
- *                    (used for module unloading, if a module event is enabled,
- *                     it is best to clear the buffers that used it).
- *  USE_CALL_FILTER - For ftrace internal events, don't use file filter
- *  TRACEPOINT    - Event is a tracepoint
- *  KPROBE        - Event is a kprobe
- */
-enum {
-	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
-	TRACE_EVENT_FL_CAP_ANY		= (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
-	TRACE_EVENT_FL_NO_SET_FILTER	= (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
-	TRACE_EVENT_FL_IGNORE_ENABLE	= (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT),
-	TRACE_EVENT_FL_WAS_ENABLED	= (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT),
-	TRACE_EVENT_FL_USE_CALL_FILTER	= (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT),
-	TRACE_EVENT_FL_TRACEPOINT	= (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
-	TRACE_EVENT_FL_KPROBE		= (1 << TRACE_EVENT_FL_KPROBE_BIT),
-};
-
-struct ftrace_event_call {
-	struct list_head	list;
-	struct ftrace_event_class *class;
-	union {
-		char			*name;
-		/* Set TRACE_EVENT_FL_TRACEPOINT flag when using "tp" */
-		struct tracepoint	*tp;
-	};
-	struct trace_event	event;
-	char			*print_fmt;
-	struct event_filter	*filter;
-	void			*mod;
-	void			*data;
-	/*
-	 *   bit 0:		filter_active
-	 *   bit 1:		allow trace by non root (cap any)
-	 *   bit 2:		failed to apply filter
-	 *   bit 3:		ftrace internal event (do not enable)
-	 *   bit 4:		Event was enabled by module
-	 *   bit 5:		use call filter rather than file filter
-	 *   bit 6:		Event is a tracepoint
-	 */
-	int			flags; /* static flags of different events */
-
-#ifdef CONFIG_PERF_EVENTS
-	int				perf_refcount;
-	struct hlist_head __percpu	*perf_events;
-	struct bpf_prog			*prog;
-
-	int	(*perf_perm)(struct ftrace_event_call *,
-			     struct perf_event *);
-#endif
-};
-
-static inline const char *
-ftrace_event_name(struct ftrace_event_call *call)
-{
-	if (call->flags & TRACE_EVENT_FL_TRACEPOINT)
-		return call->tp ? call->tp->name : NULL;
-	else
-		return call->name;
-}
-
-struct trace_array;
-struct ftrace_subsystem_dir;
-
-enum {
-	FTRACE_EVENT_FL_ENABLED_BIT,
-	FTRACE_EVENT_FL_RECORDED_CMD_BIT,
-	FTRACE_EVENT_FL_FILTERED_BIT,
-	FTRACE_EVENT_FL_NO_SET_FILTER_BIT,
-	FTRACE_EVENT_FL_SOFT_MODE_BIT,
-	FTRACE_EVENT_FL_SOFT_DISABLED_BIT,
-	FTRACE_EVENT_FL_TRIGGER_MODE_BIT,
-	FTRACE_EVENT_FL_TRIGGER_COND_BIT,
-};
-
-/*
- * Ftrace event file flags:
- *  ENABLED	  - The event is enabled
- *  RECORDED_CMD  - The comms should be recorded at sched_switch
- *  FILTERED	  - The event has a filter attached
- *  NO_SET_FILTER - Set when filter has error and is to be ignored
- *  SOFT_MODE     - The event is enabled/disabled by SOFT_DISABLED
- *  SOFT_DISABLED - When set, do not trace the event (even though its
- *                   tracepoint may be enabled)
- *  TRIGGER_MODE  - When set, invoke the triggers associated with the event
- *  TRIGGER_COND  - When set, one or more triggers has an associated filter
- */
-enum {
-	FTRACE_EVENT_FL_ENABLED		= (1 << FTRACE_EVENT_FL_ENABLED_BIT),
-	FTRACE_EVENT_FL_RECORDED_CMD	= (1 << FTRACE_EVENT_FL_RECORDED_CMD_BIT),
-	FTRACE_EVENT_FL_FILTERED	= (1 << FTRACE_EVENT_FL_FILTERED_BIT),
-	FTRACE_EVENT_FL_NO_SET_FILTER	= (1 << FTRACE_EVENT_FL_NO_SET_FILTER_BIT),
-	FTRACE_EVENT_FL_SOFT_MODE	= (1 << FTRACE_EVENT_FL_SOFT_MODE_BIT),
-	FTRACE_EVENT_FL_SOFT_DISABLED	= (1 << FTRACE_EVENT_FL_SOFT_DISABLED_BIT),
-	FTRACE_EVENT_FL_TRIGGER_MODE	= (1 << FTRACE_EVENT_FL_TRIGGER_MODE_BIT),
-	FTRACE_EVENT_FL_TRIGGER_COND	= (1 << FTRACE_EVENT_FL_TRIGGER_COND_BIT),
-};
-
-struct ftrace_event_file {
-	struct list_head		list;
-	struct ftrace_event_call	*event_call;
-	struct event_filter		*filter;
-	struct dentry			*dir;
-	struct trace_array		*tr;
-	struct ftrace_subsystem_dir	*system;
-	struct list_head		triggers;
-
-	/*
-	 * 32 bit flags:
-	 *   bit 0:		enabled
-	 *   bit 1:		enabled cmd record
-	 *   bit 2:		enable/disable with the soft disable bit
-	 *   bit 3:		soft disabled
-	 *   bit 4:		trigger enabled
-	 *
-	 * Note: The bits must be set atomically to prevent races
-	 * from other writers. Reads of flags do not need to be in
-	 * sync as they occur in critical sections. But the way flags
-	 * is currently used, these changes do not affect the code
-	 * except that when a change is made, it may have a slight
-	 * delay in propagating the changes to other CPUs due to
-	 * caching and such. Which is mostly OK ;-)
-	 */
-	unsigned long		flags;
-	atomic_t		sm_ref;	/* soft-mode reference counter */
-	atomic_t		tm_ref;	/* trigger-mode reference counter */
-};
-
-#define __TRACE_EVENT_FLAGS(name, value)				\
-	static int __init trace_init_flags_##name(void)			\
-	{								\
-		event_##name.flags |= value;				\
-		return 0;						\
-	}								\
-	early_initcall(trace_init_flags_##name);
-
-#define __TRACE_EVENT_PERF_PERM(name, expr...)				\
-	static int perf_perm_##name(struct ftrace_event_call *tp_event, \
-				    struct perf_event *p_event)		\
-	{								\
-		return ({ expr; });					\
-	}								\
-	static int __init trace_init_perf_perm_##name(void)		\
-	{								\
-		event_##name.perf_perm = &perf_perm_##name;		\
-		return 0;						\
-	}								\
-	early_initcall(trace_init_perf_perm_##name);
-
-#define PERF_MAX_TRACE_SIZE	2048
-
-#define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
-
-enum event_trigger_type {
-	ETT_NONE		= (0),
-	ETT_TRACE_ONOFF		= (1 << 0),
-	ETT_SNAPSHOT		= (1 << 1),
-	ETT_STACKTRACE		= (1 << 2),
-	ETT_EVENT_ENABLE	= (1 << 3),
-};
-
-extern int filter_match_preds(struct event_filter *filter, void *rec);
-
-extern int filter_check_discard(struct ftrace_event_file *file, void *rec,
-				struct ring_buffer *buffer,
-				struct ring_buffer_event *event);
-extern int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
-				     struct ring_buffer *buffer,
-				     struct ring_buffer_event *event);
-extern enum event_trigger_type event_triggers_call(struct ftrace_event_file *file,
-						   void *rec);
-extern void event_triggers_post_call(struct ftrace_event_file *file,
-				     enum event_trigger_type tt);
-
-/**
- * ftrace_trigger_soft_disabled - do triggers and test if soft disabled
- * @file: The file pointer of the event to test
- *
- * If any triggers without filters are attached to this event, they
- * will be called here. If the event is soft disabled and has no
- * triggers that require testing the fields, it will return true,
- * otherwise false.
- */
-static inline bool
-ftrace_trigger_soft_disabled(struct ftrace_event_file *file)
-{
-	unsigned long eflags = file->flags;
-
-	if (!(eflags & FTRACE_EVENT_FL_TRIGGER_COND)) {
-		if (eflags & FTRACE_EVENT_FL_TRIGGER_MODE)
-			event_triggers_call(file, NULL);
-		if (eflags & FTRACE_EVENT_FL_SOFT_DISABLED)
-			return true;
-	}
-	return false;
-}
-
-/*
- * Helper function for event_trigger_unlock_commit{_regs}().
- * If there are event triggers attached to this event that requires
- * filtering against its fields, then they wil be called as the
- * entry already holds the field information of the current event.
- *
- * It also checks if the event should be discarded or not.
- * It is to be discarded if the event is soft disabled and the
- * event was only recorded to process triggers, or if the event
- * filter is active and this event did not match the filters.
- *
- * Returns true if the event is discarded, false otherwise.
- */
-static inline bool
-__event_trigger_test_discard(struct ftrace_event_file *file,
-			     struct ring_buffer *buffer,
-			     struct ring_buffer_event *event,
-			     void *entry,
-			     enum event_trigger_type *tt)
-{
-	unsigned long eflags = file->flags;
-
-	if (eflags & FTRACE_EVENT_FL_TRIGGER_COND)
-		*tt = event_triggers_call(file, entry);
-
-	if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags))
-		ring_buffer_discard_commit(buffer, event);
-	else if (!filter_check_discard(file, entry, buffer, event))
-		return false;
-
-	return true;
-}
-
-/**
- * event_trigger_unlock_commit - handle triggers and finish event commit
- * @file: The file pointer assoctiated to the event
- * @buffer: The ring buffer that the event is being written to
- * @event: The event meta data in the ring buffer
- * @entry: The event itself
- * @irq_flags: The state of the interrupts at the start of the event
- * @pc: The state of the preempt count at the start of the event.
- *
- * This is a helper function to handle triggers that require data
- * from the event itself. It also tests the event against filters and
- * if the event is soft disabled and should be discarded.
- */
-static inline void
-event_trigger_unlock_commit(struct ftrace_event_file *file,
-			    struct ring_buffer *buffer,
-			    struct ring_buffer_event *event,
-			    void *entry, unsigned long irq_flags, int pc)
-{
-	enum event_trigger_type tt = ETT_NONE;
-
-	if (!__event_trigger_test_discard(file, buffer, event, entry, &tt))
-		trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
-
-	if (tt)
-		event_triggers_post_call(file, tt);
-}
-
-/**
- * event_trigger_unlock_commit_regs - handle triggers and finish event commit
- * @file: The file pointer assoctiated to the event
- * @buffer: The ring buffer that the event is being written to
- * @event: The event meta data in the ring buffer
- * @entry: The event itself
- * @irq_flags: The state of the interrupts at the start of the event
- * @pc: The state of the preempt count at the start of the event.
- *
- * This is a helper function to handle triggers that require data
- * from the event itself. It also tests the event against filters and
- * if the event is soft disabled and should be discarded.
- *
- * Same as event_trigger_unlock_commit() but calls
- * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit().
- */
-static inline void
-event_trigger_unlock_commit_regs(struct ftrace_event_file *file,
-				 struct ring_buffer *buffer,
-				 struct ring_buffer_event *event,
-				 void *entry, unsigned long irq_flags, int pc,
-				 struct pt_regs *regs)
-{
-	enum event_trigger_type tt = ETT_NONE;
-
-	if (!__event_trigger_test_discard(file, buffer, event, entry, &tt))
-		trace_buffer_unlock_commit_regs(buffer, event,
-						irq_flags, pc, regs);
-
-	if (tt)
-		event_triggers_post_call(file, tt);
-}
-
-#ifdef CONFIG_BPF_SYSCALL
-unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
-#else
-static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
-{
-	return 1;
-}
-#endif
-
-enum {
-	FILTER_OTHER = 0,
-	FILTER_STATIC_STRING,
-	FILTER_DYN_STRING,
-	FILTER_PTR_STRING,
-	FILTER_TRACE_FN,
-};
-
-extern int trace_event_raw_init(struct ftrace_event_call *call);
-extern int trace_define_field(struct ftrace_event_call *call, const char *type,
-			      const char *name, int offset, int size,
-			      int is_signed, int filter_type);
-extern int trace_add_event_call(struct ftrace_event_call *call);
-extern int trace_remove_event_call(struct ftrace_event_call *call);
-
-#define is_signed_type(type)	(((type)(-1)) < (type)1)
-
-int trace_set_clr_event(const char *system, const char *event, int set);
-
-/*
- * The double __builtin_constant_p is because gcc will give us an error
- * if we try to allocate the static variable to fmt if it is not a
- * constant. Even with the outer if statement optimizing out.
- */
-#define event_trace_printk(ip, fmt, args...)				\
-do {									\
-	__trace_printk_check_format(fmt, ##args);			\
-	tracing_record_cmdline(current);				\
-	if (__builtin_constant_p(fmt)) {				\
-		static const char *trace_printk_fmt			\
-		  __attribute__((section("__trace_printk_fmt"))) =	\
-			__builtin_constant_p(fmt) ? fmt : NULL;		\
-									\
-		__trace_bprintk(ip, trace_printk_fmt, ##args);		\
-	} else								\
-		__trace_printk(ip, fmt, ##args);			\
-} while (0)
-
-#ifdef CONFIG_PERF_EVENTS
-struct perf_event;
-
-DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
-
-extern int  perf_trace_init(struct perf_event *event);
-extern void perf_trace_destroy(struct perf_event *event);
-extern int  perf_trace_add(struct perf_event *event, int flags);
-extern void perf_trace_del(struct perf_event *event, int flags);
-extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
-				     char *filter_str);
-extern void ftrace_profile_free_filter(struct perf_event *event);
-extern void *perf_trace_buf_prepare(int size, unsigned short type,
-				    struct pt_regs **regs, int *rctxp);
-
-static inline void
-perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
-		       u64 count, struct pt_regs *regs, void *head,
-		       struct task_struct *task)
-{
-	perf_tp_event(addr, count, raw_data, size, regs, head, rctx, task);
-}
-#endif
-
-#endif /* _LINUX_FTRACE_EVENT_H */
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
new file mode 100644
index 000000000000..f8465d65f3c7
--- /dev/null
+++ b/include/linux/trace_events.h
@@ -0,0 +1,616 @@
+
+#ifndef _LINUX_FTRACE_EVENT_H
+#define _LINUX_FTRACE_EVENT_H
+
+#include <linux/ring_buffer.h>
+#include <linux/trace_seq.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <linux/perf_event.h>
+#include <linux/tracepoint.h>
+
+struct trace_array;
+struct trace_buffer;
+struct tracer;
+struct dentry;
+struct bpf_prog;
+
+struct trace_print_flags {
+	unsigned long		mask;
+	const char		*name;
+};
+
+struct trace_print_flags_u64 {
+	unsigned long long	mask;
+	const char		*name;
+};
+
+const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
+				   unsigned long flags,
+				   const struct trace_print_flags *flag_array);
+
+const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
+				     const struct trace_print_flags *symbol_array);
+
+#if BITS_PER_LONG == 32
+const char *ftrace_print_symbols_seq_u64(struct trace_seq *p,
+					 unsigned long long val,
+					 const struct trace_print_flags_u64
+								 *symbol_array);
+#endif
+
+const char *ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
+				     unsigned int bitmask_size);
+
+const char *ftrace_print_hex_seq(struct trace_seq *p,
+				 const unsigned char *buf, int len);
+
+const char *ftrace_print_array_seq(struct trace_seq *p,
+				   const void *buf, int count,
+				   size_t el_size);
+
+struct trace_iterator;
+struct trace_event;
+
+int ftrace_raw_output_prep(struct trace_iterator *iter,
+			   struct trace_event *event);
+
+/*
+ * The trace entry - the most basic unit of tracing. This is what
+ * is printed in the end as a single line in the trace output, such as:
+ *
+ *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
+ */
+struct trace_entry {
+	unsigned short		type;
+	unsigned char		flags;
+	unsigned char		preempt_count;
+	int			pid;
+};
+
+#define FTRACE_MAX_EVENT						\
+	((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1)
+
+/*
+ * Trace iterator - used by printout routines who present trace
+ * results to users and which routines might sleep, etc:
+ */
+struct trace_iterator {
+	struct trace_array	*tr;
+	struct tracer		*trace;
+	struct trace_buffer	*trace_buffer;
+	void			*private;
+	int			cpu_file;
+	struct mutex		mutex;
+	struct ring_buffer_iter	**buffer_iter;
+	unsigned long		iter_flags;
+
+	/* trace_seq for __print_flags() and __print_symbolic() etc. */
+	struct trace_seq	tmp_seq;
+
+	cpumask_var_t		started;
+
+	/* it's true when current open file is snapshot */
+	bool			snapshot;
+
+	/* The below is zeroed out in pipe_read */
+	struct trace_seq	seq;
+	struct trace_entry	*ent;
+	unsigned long		lost_events;
+	int			leftover;
+	int			ent_size;
+	int			cpu;
+	u64			ts;
+
+	loff_t			pos;
+	long			idx;
+
+	/* All new field here will be zeroed out in pipe_read */
+};
+
+enum trace_iter_flags {
+	TRACE_FILE_LAT_FMT	= 1,
+	TRACE_FILE_ANNOTATE	= 2,
+	TRACE_FILE_TIME_IN_NS	= 4,
+};
+
+
+typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
+				      int flags, struct trace_event *event);
+
+struct trace_event_functions {
+	trace_print_func	trace;
+	trace_print_func	raw;
+	trace_print_func	hex;
+	trace_print_func	binary;
+};
+
+struct trace_event {
+	struct hlist_node		node;
+	struct list_head		list;
+	int				type;
+	struct trace_event_functions	*funcs;
+};
+
+extern int register_ftrace_event(struct trace_event *event);
+extern int unregister_ftrace_event(struct trace_event *event);
+
+/* Return values for print_line callback */
+enum print_line_t {
+	TRACE_TYPE_PARTIAL_LINE	= 0,	/* Retry after flushing the seq */
+	TRACE_TYPE_HANDLED	= 1,
+	TRACE_TYPE_UNHANDLED	= 2,	/* Relay to other output functions */
+	TRACE_TYPE_NO_CONSUME	= 3	/* Handled but ask to not consume */
+};
+
+/*
+ * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
+ * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
+ * simplifies those functions and keeps them in sync.
+ */
+static inline enum print_line_t trace_handle_return(struct trace_seq *s)
+{
+	return trace_seq_has_overflowed(s) ?
+		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
+}
+
+void tracing_generic_entry_update(struct trace_entry *entry,
+				  unsigned long flags,
+				  int pc);
+struct ftrace_event_file;
+
+struct ring_buffer_event *
+trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer,
+				struct ftrace_event_file *ftrace_file,
+				int type, unsigned long len,
+				unsigned long flags, int pc);
+struct ring_buffer_event *
+trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer,
+				  int type, unsigned long len,
+				  unsigned long flags, int pc);
+void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
+					struct ring_buffer_event *event,
+					unsigned long flags, int pc);
+void trace_buffer_unlock_commit(struct ring_buffer *buffer,
+				struct ring_buffer_event *event,
+				unsigned long flags, int pc);
+void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
+				     struct ring_buffer_event *event,
+				     unsigned long flags, int pc,
+				     struct pt_regs *regs);
+void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
+					 struct ring_buffer_event *event);
+
+void tracing_record_cmdline(struct task_struct *tsk);
+
+int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...);
+
+struct event_filter;
+
+enum trace_reg {
+	TRACE_REG_REGISTER,
+	TRACE_REG_UNREGISTER,
+#ifdef CONFIG_PERF_EVENTS
+	TRACE_REG_PERF_REGISTER,
+	TRACE_REG_PERF_UNREGISTER,
+	TRACE_REG_PERF_OPEN,
+	TRACE_REG_PERF_CLOSE,
+	TRACE_REG_PERF_ADD,
+	TRACE_REG_PERF_DEL,
+#endif
+};
+
+struct ftrace_event_call;
+
+struct ftrace_event_class {
+	const char		*system;
+	void			*probe;
+#ifdef CONFIG_PERF_EVENTS
+	void			*perf_probe;
+#endif
+	int			(*reg)(struct ftrace_event_call *event,
+				       enum trace_reg type, void *data);
+	int			(*define_fields)(struct ftrace_event_call *);
+	struct list_head	*(*get_fields)(struct ftrace_event_call *);
+	struct list_head	fields;
+	int			(*raw_init)(struct ftrace_event_call *);
+};
+
+extern int ftrace_event_reg(struct ftrace_event_call *event,
+			    enum trace_reg type, void *data);
+
+struct ftrace_event_buffer {
+	struct ring_buffer		*buffer;
+	struct ring_buffer_event	*event;
+	struct ftrace_event_file	*ftrace_file;
+	void				*entry;
+	unsigned long			flags;
+	int				pc;
+};
+
+void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer,
+				  struct ftrace_event_file *ftrace_file,
+				  unsigned long len);
+
+void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer);
+
+enum {
+	TRACE_EVENT_FL_FILTERED_BIT,
+	TRACE_EVENT_FL_CAP_ANY_BIT,
+	TRACE_EVENT_FL_NO_SET_FILTER_BIT,
+	TRACE_EVENT_FL_IGNORE_ENABLE_BIT,
+	TRACE_EVENT_FL_WAS_ENABLED_BIT,
+	TRACE_EVENT_FL_USE_CALL_FILTER_BIT,
+	TRACE_EVENT_FL_TRACEPOINT_BIT,
+	TRACE_EVENT_FL_KPROBE_BIT,
+};
+
+/*
+ * Event flags:
+ *  FILTERED	  - The event has a filter attached
+ *  CAP_ANY	  - Any user can enable for perf
+ *  NO_SET_FILTER - Set when filter has error and is to be ignored
+ *  IGNORE_ENABLE - For ftrace internal events, do not enable with debugfs file
+ *  WAS_ENABLED   - Set and stays set when an event was ever enabled
+ *                    (used for module unloading, if a module event is enabled,
+ *                     it is best to clear the buffers that used it).
+ *  USE_CALL_FILTER - For ftrace internal events, don't use file filter
+ *  TRACEPOINT    - Event is a tracepoint
+ *  KPROBE        - Event is a kprobe
+ */
+enum {
+	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
+	TRACE_EVENT_FL_CAP_ANY		= (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
+	TRACE_EVENT_FL_NO_SET_FILTER	= (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
+	TRACE_EVENT_FL_IGNORE_ENABLE	= (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT),
+	TRACE_EVENT_FL_WAS_ENABLED	= (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT),
+	TRACE_EVENT_FL_USE_CALL_FILTER	= (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT),
+	TRACE_EVENT_FL_TRACEPOINT	= (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
+	TRACE_EVENT_FL_KPROBE		= (1 << TRACE_EVENT_FL_KPROBE_BIT),
+};
+
+struct ftrace_event_call {
+	struct list_head	list;
+	struct ftrace_event_class *class;
+	union {
+		char			*name;
+		/* Set TRACE_EVENT_FL_TRACEPOINT flag when using "tp" */
+		struct tracepoint	*tp;
+	};
+	struct trace_event	event;
+	char			*print_fmt;
+	struct event_filter	*filter;
+	void			*mod;
+	void			*data;
+	/*
+	 *   bit 0:		filter_active
+	 *   bit 1:		allow trace by non root (cap any)
+	 *   bit 2:		failed to apply filter
+	 *   bit 3:		ftrace internal event (do not enable)
+	 *   bit 4:		Event was enabled by module
+	 *   bit 5:		use call filter rather than file filter
+	 *   bit 6:		Event is a tracepoint
+	 */
+	int			flags; /* static flags of different events */
+
+#ifdef CONFIG_PERF_EVENTS
+	int				perf_refcount;
+	struct hlist_head __percpu	*perf_events;
+	struct bpf_prog			*prog;
+
+	int	(*perf_perm)(struct ftrace_event_call *,
+			     struct perf_event *);
+#endif
+};
+
+static inline const char *
+ftrace_event_name(struct ftrace_event_call *call)
+{
+	if (call->flags & TRACE_EVENT_FL_TRACEPOINT)
+		return call->tp ? call->tp->name : NULL;
+	else
+		return call->name;
+}
+
+struct trace_array;
+struct ftrace_subsystem_dir;
+
+enum {
+	FTRACE_EVENT_FL_ENABLED_BIT,
+	FTRACE_EVENT_FL_RECORDED_CMD_BIT,
+	FTRACE_EVENT_FL_FILTERED_BIT,
+	FTRACE_EVENT_FL_NO_SET_FILTER_BIT,
+	FTRACE_EVENT_FL_SOFT_MODE_BIT,
+	FTRACE_EVENT_FL_SOFT_DISABLED_BIT,
+	FTRACE_EVENT_FL_TRIGGER_MODE_BIT,
+	FTRACE_EVENT_FL_TRIGGER_COND_BIT,
+};
+
+/*
+ * Ftrace event file flags:
+ *  ENABLED	  - The event is enabled
+ *  RECORDED_CMD  - The comms should be recorded at sched_switch
+ *  FILTERED	  - The event has a filter attached
+ *  NO_SET_FILTER - Set when filter has error and is to be ignored
+ *  SOFT_MODE     - The event is enabled/disabled by SOFT_DISABLED
+ *  SOFT_DISABLED - When set, do not trace the event (even though its
+ *                   tracepoint may be enabled)
+ *  TRIGGER_MODE  - When set, invoke the triggers associated with the event
+ *  TRIGGER_COND  - When set, one or more triggers has an associated filter
+ */
+enum {
+	FTRACE_EVENT_FL_ENABLED		= (1 << FTRACE_EVENT_FL_ENABLED_BIT),
+	FTRACE_EVENT_FL_RECORDED_CMD	= (1 << FTRACE_EVENT_FL_RECORDED_CMD_BIT),
+	FTRACE_EVENT_FL_FILTERED	= (1 << FTRACE_EVENT_FL_FILTERED_BIT),
+	FTRACE_EVENT_FL_NO_SET_FILTER	= (1 << FTRACE_EVENT_FL_NO_SET_FILTER_BIT),
+	FTRACE_EVENT_FL_SOFT_MODE	= (1 << FTRACE_EVENT_FL_SOFT_MODE_BIT),
+	FTRACE_EVENT_FL_SOFT_DISABLED	= (1 << FTRACE_EVENT_FL_SOFT_DISABLED_BIT),
+	FTRACE_EVENT_FL_TRIGGER_MODE	= (1 << FTRACE_EVENT_FL_TRIGGER_MODE_BIT),
+	FTRACE_EVENT_FL_TRIGGER_COND	= (1 << FTRACE_EVENT_FL_TRIGGER_COND_BIT),
+};
+
+struct ftrace_event_file {
+	struct list_head		list;
+	struct ftrace_event_call	*event_call;
+	struct event_filter		*filter;
+	struct dentry			*dir;
+	struct trace_array		*tr;
+	struct ftrace_subsystem_dir	*system;
+	struct list_head		triggers;
+
+	/*
+	 * 32 bit flags:
+	 *   bit 0:		enabled
+	 *   bit 1:		enabled cmd record
+	 *   bit 2:		enable/disable with the soft disable bit
+	 *   bit 3:		soft disabled
+	 *   bit 4:		trigger enabled
+	 *
+	 * Note: The bits must be set atomically to prevent races
+	 * from other writers. Reads of flags do not need to be in
+	 * sync as they occur in critical sections. But the way flags
+	 * is currently used, these changes do not affect the code
+	 * except that when a change is made, it may have a slight
+	 * delay in propagating the changes to other CPUs due to
+	 * caching and such. Which is mostly OK ;-)
+	 */
+	unsigned long		flags;
+	atomic_t		sm_ref;	/* soft-mode reference counter */
+	atomic_t		tm_ref;	/* trigger-mode reference counter */
+};
+
+#define __TRACE_EVENT_FLAGS(name, value)				\
+	static int __init trace_init_flags_##name(void)			\
+	{								\
+		event_##name.flags |= value;				\
+		return 0;						\
+	}								\
+	early_initcall(trace_init_flags_##name);
+
+#define __TRACE_EVENT_PERF_PERM(name, expr...)				\
+	static int perf_perm_##name(struct ftrace_event_call *tp_event, \
+				    struct perf_event *p_event)		\
+	{								\
+		return ({ expr; });					\
+	}								\
+	static int __init trace_init_perf_perm_##name(void)		\
+	{								\
+		event_##name.perf_perm = &perf_perm_##name;		\
+		return 0;						\
+	}								\
+	early_initcall(trace_init_perf_perm_##name);
+
+#define PERF_MAX_TRACE_SIZE	2048
+
+#define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
+
+enum event_trigger_type {
+	ETT_NONE		= (0),
+	ETT_TRACE_ONOFF		= (1 << 0),
+	ETT_SNAPSHOT		= (1 << 1),
+	ETT_STACKTRACE		= (1 << 2),
+	ETT_EVENT_ENABLE	= (1 << 3),
+};
+
+extern int filter_match_preds(struct event_filter *filter, void *rec);
+
+extern int filter_check_discard(struct ftrace_event_file *file, void *rec,
+				struct ring_buffer *buffer,
+				struct ring_buffer_event *event);
+extern int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
+				     struct ring_buffer *buffer,
+				     struct ring_buffer_event *event);
+extern enum event_trigger_type event_triggers_call(struct ftrace_event_file *file,
+						   void *rec);
+extern void event_triggers_post_call(struct ftrace_event_file *file,
+				     enum event_trigger_type tt);
+
+/**
+ * ftrace_trigger_soft_disabled - do triggers and test if soft disabled
+ * @file: The file pointer of the event to test
+ *
+ * If any triggers without filters are attached to this event, they
+ * will be called here. If the event is soft disabled and has no
+ * triggers that require testing the fields, it will return true,
+ * otherwise false.
+ */
+static inline bool
+ftrace_trigger_soft_disabled(struct ftrace_event_file *file)
+{
+	unsigned long eflags = file->flags;
+
+	if (!(eflags & FTRACE_EVENT_FL_TRIGGER_COND)) {
+		if (eflags & FTRACE_EVENT_FL_TRIGGER_MODE)
+			event_triggers_call(file, NULL);
+		if (eflags & FTRACE_EVENT_FL_SOFT_DISABLED)
+			return true;
+	}
+	return false;
+}
+
+/*
+ * Helper function for event_trigger_unlock_commit{_regs}().
+ * If there are event triggers attached to this event that requires
+ * filtering against its fields, then they wil be called as the
+ * entry already holds the field information of the current event.
+ *
+ * It also checks if the event should be discarded or not.
+ * It is to be discarded if the event is soft disabled and the
+ * event was only recorded to process triggers, or if the event
+ * filter is active and this event did not match the filters.
+ *
+ * Returns true if the event is discarded, false otherwise.
+ */
+static inline bool
+__event_trigger_test_discard(struct ftrace_event_file *file,
+			     struct ring_buffer *buffer,
+			     struct ring_buffer_event *event,
+			     void *entry,
+			     enum event_trigger_type *tt)
+{
+	unsigned long eflags = file->flags;
+
+	if (eflags & FTRACE_EVENT_FL_TRIGGER_COND)
+		*tt = event_triggers_call(file, entry);
+
+	if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags))
+		ring_buffer_discard_commit(buffer, event);
+	else if (!filter_check_discard(file, entry, buffer, event))
+		return false;
+
+	return true;
+}
+
+/**
+ * event_trigger_unlock_commit - handle triggers and finish event commit
+ * @file: The file pointer assoctiated to the event
+ * @buffer: The ring buffer that the event is being written to
+ * @event: The event meta data in the ring buffer
+ * @entry: The event itself
+ * @irq_flags: The state of the interrupts at the start of the event
+ * @pc: The state of the preempt count at the start of the event.
+ *
+ * This is a helper function to handle triggers that require data
+ * from the event itself. It also tests the event against filters and
+ * if the event is soft disabled and should be discarded.
+ */
+static inline void
+event_trigger_unlock_commit(struct ftrace_event_file *file,
+			    struct ring_buffer *buffer,
+			    struct ring_buffer_event *event,
+			    void *entry, unsigned long irq_flags, int pc)
+{
+	enum event_trigger_type tt = ETT_NONE;
+
+	if (!__event_trigger_test_discard(file, buffer, event, entry, &tt))
+		trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
+
+	if (tt)
+		event_triggers_post_call(file, tt);
+}
+
+/**
+ * event_trigger_unlock_commit_regs - handle triggers and finish event commit
+ * @file: The file pointer assoctiated to the event
+ * @buffer: The ring buffer that the event is being written to
+ * @event: The event meta data in the ring buffer
+ * @entry: The event itself
+ * @irq_flags: The state of the interrupts at the start of the event
+ * @pc: The state of the preempt count at the start of the event.
+ *
+ * This is a helper function to handle triggers that require data
+ * from the event itself. It also tests the event against filters and
+ * if the event is soft disabled and should be discarded.
+ *
+ * Same as event_trigger_unlock_commit() but calls
+ * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit().
+ */
+static inline void
+event_trigger_unlock_commit_regs(struct ftrace_event_file *file,
+				 struct ring_buffer *buffer,
+				 struct ring_buffer_event *event,
+				 void *entry, unsigned long irq_flags, int pc,
+				 struct pt_regs *regs)
+{
+	enum event_trigger_type tt = ETT_NONE;
+
+	if (!__event_trigger_test_discard(file, buffer, event, entry, &tt))
+		trace_buffer_unlock_commit_regs(buffer, event,
+						irq_flags, pc, regs);
+
+	if (tt)
+		event_triggers_post_call(file, tt);
+}
+
+#ifdef CONFIG_BPF_SYSCALL
+unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
+#else
+static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
+{
+	return 1;
+}
+#endif
+
+enum {
+	FILTER_OTHER = 0,
+	FILTER_STATIC_STRING,
+	FILTER_DYN_STRING,
+	FILTER_PTR_STRING,
+	FILTER_TRACE_FN,
+};
+
+extern int trace_event_raw_init(struct ftrace_event_call *call);
+extern int trace_define_field(struct ftrace_event_call *call, const char *type,
+			      const char *name, int offset, int size,
+			      int is_signed, int filter_type);
+extern int trace_add_event_call(struct ftrace_event_call *call);
+extern int trace_remove_event_call(struct ftrace_event_call *call);
+
+#define is_signed_type(type)	(((type)(-1)) < (type)1)
+
+int trace_set_clr_event(const char *system, const char *event, int set);
+
+/*
+ * The double __builtin_constant_p is because gcc will give us an error
+ * if we try to allocate the static variable to fmt if it is not a
+ * constant. Even with the outer if statement optimizing out.
+ */
+#define event_trace_printk(ip, fmt, args...)				\
+do {									\
+	__trace_printk_check_format(fmt, ##args);			\
+	tracing_record_cmdline(current);				\
+	if (__builtin_constant_p(fmt)) {				\
+		static const char *trace_printk_fmt			\
+		  __attribute__((section("__trace_printk_fmt"))) =	\
+			__builtin_constant_p(fmt) ? fmt : NULL;		\
+									\
+		__trace_bprintk(ip, trace_printk_fmt, ##args);		\
+	} else								\
+		__trace_printk(ip, fmt, ##args);			\
+} while (0)
+
+#ifdef CONFIG_PERF_EVENTS
+struct perf_event;
+
+DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
+
+extern int  perf_trace_init(struct perf_event *event);
+extern void perf_trace_destroy(struct perf_event *event);
+extern int  perf_trace_add(struct perf_event *event, int flags);
+extern void perf_trace_del(struct perf_event *event, int flags);
+extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
+				     char *filter_str);
+extern void ftrace_profile_free_filter(struct perf_event *event);
+extern void *perf_trace_buf_prepare(int size, unsigned short type,
+				    struct pt_regs **regs, int *rctxp);
+
+static inline void
+perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
+		       u64 count, struct pt_regs *regs, void *head,
+		       struct task_struct *task)
+{
+	perf_tp_event(addr, count, raw_data, size, regs, head, rctx, task);
+}
+#endif
+
+#endif /* _LINUX_FTRACE_EVENT_H */
-- 
cgit v1.2.3


From 645df987f7c1740bb1ba783ab907001720a20cf7 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Mon, 4 May 2015 18:12:44 -0400
Subject: tracing: Rename ftrace_print_*() functions ta trace_print_*()

The name "ftrace" really refers to the function hook infrastructure. It
is not about the trace_events. The functions ftrace_print_*() are not part of
the function infrastructure, and the names can be confusing. Rename them
to be trace_print_*().

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index f8465d65f3c7..29627cbafdea 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -1,6 +1,6 @@
 
-#ifndef _LINUX_FTRACE_EVENT_H
-#define _LINUX_FTRACE_EVENT_H
+#ifndef _LINUX_TRACE_EVENT_H
+#define _LINUX_TRACE_EVENT_H
 
 #include <linux/ring_buffer.h>
 #include <linux/trace_seq.h>
@@ -25,27 +25,27 @@ struct trace_print_flags_u64 {
 	const char		*name;
 };
 
-const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
-				   unsigned long flags,
-				   const struct trace_print_flags *flag_array);
+const char *trace_print_flags_seq(struct trace_seq *p, const char *delim,
+				  unsigned long flags,
+				  const struct trace_print_flags *flag_array);
 
-const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
-				     const struct trace_print_flags *symbol_array);
+const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val,
+				    const struct trace_print_flags *symbol_array);
 
 #if BITS_PER_LONG == 32
-const char *ftrace_print_symbols_seq_u64(struct trace_seq *p,
-					 unsigned long long val,
-					 const struct trace_print_flags_u64
+const char *trace_print_symbols_seq_u64(struct trace_seq *p,
+					unsigned long long val,
+					const struct trace_print_flags_u64
 								 *symbol_array);
 #endif
 
-const char *ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
-				     unsigned int bitmask_size);
+const char *trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
+				    unsigned int bitmask_size);
 
-const char *ftrace_print_hex_seq(struct trace_seq *p,
-				 const unsigned char *buf, int len);
+const char *trace_print_hex_seq(struct trace_seq *p,
+				const unsigned char *buf, int len);
 
-const char *ftrace_print_array_seq(struct trace_seq *p,
+const char *trace_print_array_seq(struct trace_seq *p,
 				   const void *buf, int count,
 				   size_t el_size);
 
-- 
cgit v1.2.3


From 9023c930902fbbcf0cebf6110828700f792989a4 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 5 May 2015 09:39:12 -0400
Subject: tracing: Rename (un)register_ftrace_event() to
 (un)register_trace_event()

The name "ftrace" really refers to the function hook infrastructure. It
is not about the trace_events. The functions (un)register_ftrace_event() is
really about trace_events, and the name should be register_trace_event()
instead.

Also renamed ftrace_event_reg() to trace_event_reg() for the same reason.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 29627cbafdea..99924c07a042 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -132,8 +132,8 @@ struct trace_event {
 	struct trace_event_functions	*funcs;
 };
 
-extern int register_ftrace_event(struct trace_event *event);
-extern int unregister_ftrace_event(struct trace_event *event);
+extern int register_trace_event(struct trace_event *event);
+extern int unregister_trace_event(struct trace_event *event);
 
 /* Return values for print_line callback */
 enum print_line_t {
@@ -216,7 +216,7 @@ struct ftrace_event_class {
 	int			(*raw_init)(struct ftrace_event_call *);
 };
 
-extern int ftrace_event_reg(struct ftrace_event_call *event,
+extern int trace_event_reg(struct ftrace_event_call *event,
 			    enum trace_reg type, void *data);
 
 struct ftrace_event_buffer {
-- 
cgit v1.2.3


From 7f1d2f8210195c8c309d424a77dbf06a6d2186f4 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 5 May 2015 10:09:53 -0400
Subject: tracing: Rename ftrace_event_file to trace_event_file

The name "ftrace" really refers to the function hook infrastructure. It
is not about the trace_events. The structure ftrace_event_file is really
about trace events and not "ftrace". Rename it to trace_event_file.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 99924c07a042..ae19233c7dd8 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -157,11 +157,11 @@ static inline enum print_line_t trace_handle_return(struct trace_seq *s)
 void tracing_generic_entry_update(struct trace_entry *entry,
 				  unsigned long flags,
 				  int pc);
-struct ftrace_event_file;
+struct trace_event_file;
 
 struct ring_buffer_event *
 trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer,
-				struct ftrace_event_file *ftrace_file,
+				struct trace_event_file *trace_file,
 				int type, unsigned long len,
 				unsigned long flags, int pc);
 struct ring_buffer_event *
@@ -222,14 +222,14 @@ extern int trace_event_reg(struct ftrace_event_call *event,
 struct ftrace_event_buffer {
 	struct ring_buffer		*buffer;
 	struct ring_buffer_event	*event;
-	struct ftrace_event_file	*ftrace_file;
+	struct trace_event_file		*trace_file;
 	void				*entry;
 	unsigned long			flags;
 	int				pc;
 };
 
 void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer,
-				  struct ftrace_event_file *ftrace_file,
+				  struct trace_event_file *trace_file,
 				  unsigned long len);
 
 void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer);
@@ -349,7 +349,7 @@ enum {
 	FTRACE_EVENT_FL_TRIGGER_COND	= (1 << FTRACE_EVENT_FL_TRIGGER_COND_BIT),
 };
 
-struct ftrace_event_file {
+struct trace_event_file {
 	struct list_head		list;
 	struct ftrace_event_call	*event_call;
 	struct event_filter		*filter;
@@ -414,15 +414,15 @@ enum event_trigger_type {
 
 extern int filter_match_preds(struct event_filter *filter, void *rec);
 
-extern int filter_check_discard(struct ftrace_event_file *file, void *rec,
+extern int filter_check_discard(struct trace_event_file *file, void *rec,
 				struct ring_buffer *buffer,
 				struct ring_buffer_event *event);
 extern int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
 				     struct ring_buffer *buffer,
 				     struct ring_buffer_event *event);
-extern enum event_trigger_type event_triggers_call(struct ftrace_event_file *file,
+extern enum event_trigger_type event_triggers_call(struct trace_event_file *file,
 						   void *rec);
-extern void event_triggers_post_call(struct ftrace_event_file *file,
+extern void event_triggers_post_call(struct trace_event_file *file,
 				     enum event_trigger_type tt);
 
 /**
@@ -435,7 +435,7 @@ extern void event_triggers_post_call(struct ftrace_event_file *file,
  * otherwise false.
  */
 static inline bool
-ftrace_trigger_soft_disabled(struct ftrace_event_file *file)
+ftrace_trigger_soft_disabled(struct trace_event_file *file)
 {
 	unsigned long eflags = file->flags;
 
@@ -462,7 +462,7 @@ ftrace_trigger_soft_disabled(struct ftrace_event_file *file)
  * Returns true if the event is discarded, false otherwise.
  */
 static inline bool
-__event_trigger_test_discard(struct ftrace_event_file *file,
+__event_trigger_test_discard(struct trace_event_file *file,
 			     struct ring_buffer *buffer,
 			     struct ring_buffer_event *event,
 			     void *entry,
@@ -495,7 +495,7 @@ __event_trigger_test_discard(struct ftrace_event_file *file,
  * if the event is soft disabled and should be discarded.
  */
 static inline void
-event_trigger_unlock_commit(struct ftrace_event_file *file,
+event_trigger_unlock_commit(struct trace_event_file *file,
 			    struct ring_buffer *buffer,
 			    struct ring_buffer_event *event,
 			    void *entry, unsigned long irq_flags, int pc)
@@ -526,7 +526,7 @@ event_trigger_unlock_commit(struct ftrace_event_file *file,
  * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit().
  */
 static inline void
-event_trigger_unlock_commit_regs(struct ftrace_event_file *file,
+event_trigger_unlock_commit_regs(struct trace_event_file *file,
 				 struct ring_buffer *buffer,
 				 struct ring_buffer_event *event,
 				 void *entry, unsigned long irq_flags, int pc,
-- 
cgit v1.2.3


From 2425bcb9240f8c97d793cb31c8e8d8d0a843fa29 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 5 May 2015 11:45:27 -0400
Subject: tracing: Rename ftrace_event_{call,class} to trace_event_{call,class}

The name "ftrace" really refers to the function hook infrastructure. It
is not about the trace_events. The structures ftrace_event_call and
ftrace_event_class have nothing to do with the function hooks, and are
really trace_event structures. Rename ftrace_event_* to trace_event_*.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/module.h       |  2 +-
 include/linux/perf_event.h   |  2 +-
 include/linux/syscalls.h     | 12 ++++++------
 include/linux/trace_events.h | 38 +++++++++++++++++++-------------------
 4 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index c883b86ea964..3e0d492682bb 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -336,7 +336,7 @@ struct module {
 	const char **trace_bprintk_fmt_start;
 #endif
 #ifdef CONFIG_EVENT_TRACING
-	struct ftrace_event_call **trace_events;
+	struct trace_event_call **trace_events;
 	unsigned int num_trace_events;
 	struct trace_enum_map **trace_enums;
 	unsigned int num_trace_enums;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 61992cf2e977..d089d6d58ae0 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -483,7 +483,7 @@ struct perf_event {
 	void				*overflow_handler_context;
 
 #ifdef CONFIG_EVENT_TRACING
-	struct ftrace_event_call	*tp_event;
+	struct trace_event_call		*tp_event;
 	struct event_filter		*filter;
 #ifdef CONFIG_FUNCTION_TRACER
 	struct ftrace_ops               ftrace_ops;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 76d1e38aabe1..d8b06abb264f 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -111,14 +111,14 @@ union bpf_attr;
 #define __SC_STR_ADECL(t, a)	#a
 #define __SC_STR_TDECL(t, a)	#t
 
-extern struct ftrace_event_class event_class_syscall_enter;
-extern struct ftrace_event_class event_class_syscall_exit;
+extern struct trace_event_class event_class_syscall_enter;
+extern struct trace_event_class event_class_syscall_exit;
 extern struct trace_event_functions enter_syscall_print_funcs;
 extern struct trace_event_functions exit_syscall_print_funcs;
 
 #define SYSCALL_TRACE_ENTER_EVENT(sname)				\
 	static struct syscall_metadata __syscall_meta_##sname;		\
-	static struct ftrace_event_call __used				\
+	static struct trace_event_call __used				\
 	  event_enter_##sname = {					\
 		.class			= &event_class_syscall_enter,	\
 		{							\
@@ -128,13 +128,13 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 		.data			= (void *)&__syscall_meta_##sname,\
 		.flags                  = TRACE_EVENT_FL_CAP_ANY,	\
 	};								\
-	static struct ftrace_event_call __used				\
+	static struct trace_event_call __used				\
 	  __attribute__((section("_ftrace_events")))			\
 	 *__event_enter_##sname = &event_enter_##sname;
 
 #define SYSCALL_TRACE_EXIT_EVENT(sname)					\
 	static struct syscall_metadata __syscall_meta_##sname;		\
-	static struct ftrace_event_call __used				\
+	static struct trace_event_call __used				\
 	  event_exit_##sname = {					\
 		.class			= &event_class_syscall_exit,	\
 		{							\
@@ -144,7 +144,7 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 		.data			= (void *)&__syscall_meta_##sname,\
 		.flags                  = TRACE_EVENT_FL_CAP_ANY,	\
 	};								\
-	static struct ftrace_event_call __used				\
+	static struct trace_event_call __used				\
 	  __attribute__((section("_ftrace_events")))			\
 	*__event_exit_##sname = &event_exit_##sname;
 
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index ae19233c7dd8..d10ab04a17b2 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -200,23 +200,23 @@ enum trace_reg {
 #endif
 };
 
-struct ftrace_event_call;
+struct trace_event_call;
 
-struct ftrace_event_class {
+struct trace_event_class {
 	const char		*system;
 	void			*probe;
 #ifdef CONFIG_PERF_EVENTS
 	void			*perf_probe;
 #endif
-	int			(*reg)(struct ftrace_event_call *event,
+	int			(*reg)(struct trace_event_call *event,
 				       enum trace_reg type, void *data);
-	int			(*define_fields)(struct ftrace_event_call *);
-	struct list_head	*(*get_fields)(struct ftrace_event_call *);
+	int			(*define_fields)(struct trace_event_call *);
+	struct list_head	*(*get_fields)(struct trace_event_call *);
 	struct list_head	fields;
-	int			(*raw_init)(struct ftrace_event_call *);
+	int			(*raw_init)(struct trace_event_call *);
 };
 
-extern int trace_event_reg(struct ftrace_event_call *event,
+extern int trace_event_reg(struct trace_event_call *event,
 			    enum trace_reg type, void *data);
 
 struct ftrace_event_buffer {
@@ -269,9 +269,9 @@ enum {
 	TRACE_EVENT_FL_KPROBE		= (1 << TRACE_EVENT_FL_KPROBE_BIT),
 };
 
-struct ftrace_event_call {
+struct trace_event_call {
 	struct list_head	list;
-	struct ftrace_event_class *class;
+	struct trace_event_class *class;
 	union {
 		char			*name;
 		/* Set TRACE_EVENT_FL_TRACEPOINT flag when using "tp" */
@@ -298,13 +298,13 @@ struct ftrace_event_call {
 	struct hlist_head __percpu	*perf_events;
 	struct bpf_prog			*prog;
 
-	int	(*perf_perm)(struct ftrace_event_call *,
+	int	(*perf_perm)(struct trace_event_call *,
 			     struct perf_event *);
 #endif
 };
 
 static inline const char *
-ftrace_event_name(struct ftrace_event_call *call)
+ftrace_event_name(struct trace_event_call *call)
 {
 	if (call->flags & TRACE_EVENT_FL_TRACEPOINT)
 		return call->tp ? call->tp->name : NULL;
@@ -351,7 +351,7 @@ enum {
 
 struct trace_event_file {
 	struct list_head		list;
-	struct ftrace_event_call	*event_call;
+	struct trace_event_call		*event_call;
 	struct event_filter		*filter;
 	struct dentry			*dir;
 	struct trace_array		*tr;
@@ -388,7 +388,7 @@ struct trace_event_file {
 	early_initcall(trace_init_flags_##name);
 
 #define __TRACE_EVENT_PERF_PERM(name, expr...)				\
-	static int perf_perm_##name(struct ftrace_event_call *tp_event, \
+	static int perf_perm_##name(struct trace_event_call *tp_event, \
 				    struct perf_event *p_event)		\
 	{								\
 		return ({ expr; });					\
@@ -417,7 +417,7 @@ extern int filter_match_preds(struct event_filter *filter, void *rec);
 extern int filter_check_discard(struct trace_event_file *file, void *rec,
 				struct ring_buffer *buffer,
 				struct ring_buffer_event *event);
-extern int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
+extern int call_filter_check_discard(struct trace_event_call *call, void *rec,
 				     struct ring_buffer *buffer,
 				     struct ring_buffer_event *event);
 extern enum event_trigger_type event_triggers_call(struct trace_event_file *file,
@@ -559,12 +559,12 @@ enum {
 	FILTER_TRACE_FN,
 };
 
-extern int trace_event_raw_init(struct ftrace_event_call *call);
-extern int trace_define_field(struct ftrace_event_call *call, const char *type,
+extern int trace_event_raw_init(struct trace_event_call *call);
+extern int trace_define_field(struct trace_event_call *call, const char *type,
 			      const char *name, int offset, int size,
 			      int is_signed, int filter_type);
-extern int trace_add_event_call(struct ftrace_event_call *call);
-extern int trace_remove_event_call(struct ftrace_event_call *call);
+extern int trace_add_event_call(struct trace_event_call *call);
+extern int trace_remove_event_call(struct trace_event_call *call);
 
 #define is_signed_type(type)	(((type)(-1)) < (type)1)
 
@@ -613,4 +613,4 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
 }
 #endif
 
-#endif /* _LINUX_FTRACE_EVENT_H */
+#endif /* _LINUX_TRACE_EVENT_H */
-- 
cgit v1.2.3


From 3f795dcfc7364cd811c3f6f03d115fcefbbdc1ca Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 5 May 2015 13:18:46 -0400
Subject: tracing: Rename ftrace_event_buffer to trace_event_buffer.

The name "ftrace" really refers to the function hook infrastructure. It
is not about the trace_events. The ftrace_event_buffer functions and data
structures are for trace_events and not for function hooks. Rename them
to trace_event_buffer*.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index d10ab04a17b2..a1fa8ebaf684 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -219,7 +219,7 @@ struct trace_event_class {
 extern int trace_event_reg(struct trace_event_call *event,
 			    enum trace_reg type, void *data);
 
-struct ftrace_event_buffer {
+struct trace_event_buffer {
 	struct ring_buffer		*buffer;
 	struct ring_buffer_event	*event;
 	struct trace_event_file		*trace_file;
@@ -228,11 +228,11 @@ struct ftrace_event_buffer {
 	int				pc;
 };
 
-void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer,
+void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
 				  struct trace_event_file *trace_file,
 				  unsigned long len);
 
-void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer);
+void trace_event_buffer_commit(struct trace_event_buffer *fbuffer);
 
 enum {
 	TRACE_EVENT_FL_FILTERED_BIT,
-- 
cgit v1.2.3


From 892c505aac2bdded3c8ec2ec27abc6d74fd210f5 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 5 May 2015 14:18:11 -0400
Subject: tracing: Rename ftrace_output functions to trace_output

The name "ftrace" really refers to the function hook infrastructure. It
is not about the trace_events. The ftrace_output_*() and ftrace_raw_output_*()
functions represent the trace_event code. Rename them to just trace_output
or trace_raw_output.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index a1fa8ebaf684..12ca46322a94 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -52,8 +52,8 @@ const char *trace_print_array_seq(struct trace_seq *p,
 struct trace_iterator;
 struct trace_event;
 
-int ftrace_raw_output_prep(struct trace_iterator *iter,
-			   struct trace_event *event);
+int trace_raw_output_prep(struct trace_iterator *iter,
+			  struct trace_event *event);
 
 /*
  * The trace entry - the most basic unit of tracing. This is what
@@ -183,7 +183,7 @@ void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
 
 void tracing_record_cmdline(struct task_struct *tsk);
 
-int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...);
+int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...);
 
 struct event_filter;
 
-- 
cgit v1.2.3


From 609a74045238c303bbe9396775eacf5bac1f51cc Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 13 May 2015 13:44:36 -0400
Subject: tracing: Rename FTRACE_MAX_EVENT to TRACE_EVENT_TYPE_MAX

The name "ftrace" really refers to the function hook infrastructure. It
is not about the trace_events. Rename the max trace_event type size to
something more descriptive and appropriate.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 12ca46322a94..6f28464be418 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -68,7 +68,7 @@ struct trace_entry {
 	int			pid;
 };
 
-#define FTRACE_MAX_EVENT						\
+#define TRACE_EVENT_TYPE_MAX						\
 	((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1)
 
 /*
-- 
cgit v1.2.3


From 687fcc4aee4567df14e31e82d6993418b826f408 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 13 May 2015 14:20:14 -0400
Subject: tracing: Rename ftrace_event_name() to trace_event_name()

The name "ftrace" really refers to the function hook infrastructure. It
is not about the trace_events. ftrace_event_name() returns the name of
an event tracepoint, has nothing to do with function tracing. Rename it
to trace_event_name().

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 6f28464be418..15617798849c 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -304,7 +304,7 @@ struct trace_event_call {
 };
 
 static inline const char *
-ftrace_event_name(struct trace_event_call *call)
+trace_event_name(struct trace_event_call *call)
 {
 	if (call->flags & TRACE_EVENT_FL_TRACEPOINT)
 		return call->tp ? call->tp->name : NULL;
-- 
cgit v1.2.3


From 7967b3e0c40ff72fb2cf44d3b50e2cb388ef6c67 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 13 May 2015 14:59:40 -0400
Subject: tracing: Rename struct ftrace_subsystem_dir to trace_subsystem_dir

The name "ftrace" really refers to the function hook infrastructure. It
is not about the trace_events. The structure ftrace_subsystem_dir holds
the information about trace event subsystems. It should not be named
ftrace, rename it to trace_subsystem_dir.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 15617798849c..d4ad58ec684a 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -313,7 +313,7 @@ trace_event_name(struct trace_event_call *call)
 }
 
 struct trace_array;
-struct ftrace_subsystem_dir;
+struct trace_subsystem_dir;
 
 enum {
 	FTRACE_EVENT_FL_ENABLED_BIT,
@@ -355,7 +355,7 @@ struct trace_event_file {
 	struct event_filter		*filter;
 	struct dentry			*dir;
 	struct trace_array		*tr;
-	struct ftrace_subsystem_dir	*system;
+	struct trace_subsystem_dir	*system;
 	struct list_head		triggers;
 
 	/*
-- 
cgit v1.2.3


From 1bd758eb1cab2fa5b71a23f9e5d3c8076f4ed650 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Tue, 12 May 2015 14:56:07 +0200
Subject: net: change name of flow_dissector header to match the .c file name

add couple of empty lines on the way.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c0b574a414e7..e35c2b13d434 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -34,7 +34,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/netdev_features.h>
 #include <linux/sched.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
 
 /* A. Checksumming of received packets by device.
  *
-- 
cgit v1.2.3


From 10b89ee43e849544eddfe34e535341fc077464ec Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Tue, 12 May 2015 14:56:09 +0200
Subject: net: move *skb_get_poff declarations into correct header

Since these functions are defined in flow_dissector.c, move header
declarations from skbuff.h into flow_dissector.h

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e35c2b13d434..17607ab9e7a2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3424,10 +3424,6 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
 				     unsigned int transport_len,
 				     __sum16(*skb_chkf)(struct sk_buff *skb));
 
-u32 skb_get_poff(const struct sk_buff *skb);
-u32 __skb_get_poff(const struct sk_buff *skb, void *data,
-		   const struct flow_keys *keys, int hlen);
-
 /**
  * skb_head_is_locked - Determine if the skb->head is locked down
  * @skb: skb to check
-- 
cgit v1.2.3


From 9c684b5083bc191c4b7b189c73d75587e167a474 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Tue, 12 May 2015 14:56:11 +0200
Subject: net: move __skb_get_hash function declaration to flow_dissector.h

Since the definition of the function is in flow_dissector.c, it makes
sense to have the declaration in flow_dissector.h

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 17607ab9e7a2..ae2d1b7769d8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -918,7 +918,6 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type)
 	skb->hash = hash;
 }
 
-void __skb_get_hash(struct sk_buff *skb);
 static inline __u32 skb_get_hash(struct sk_buff *skb)
 {
 	if (!skb->l4_hash && !skb->sw_hash)
-- 
cgit v1.2.3


From 5605c76240aadc823e3d46ac9afde2f26fbcf019 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Tue, 12 May 2015 14:56:12 +0200
Subject: net: move __skb_tx_hash to dev.c

__skb_tx_hash function has no relation to flow_dissect so just move it
to dev.c

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 +++
 include/linux/skbuff.h    | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cd0951c1893d..d3ed01c18247 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2832,6 +2832,9 @@ static inline int netif_set_xps_queue(struct net_device *dev,
 }
 #endif
 
+u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
+		  unsigned int num_tx_queues);
+
 /*
  * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used
  * as a distribution range limit for the returned value.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ae2d1b7769d8..b01c7fba7c17 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3299,9 +3299,6 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
 	return skb->queue_mapping != 0;
 }
 
-u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
-		  unsigned int num_tx_queues);
-
 static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
 {
 #ifdef CONFIG_XFRM
-- 
cgit v1.2.3


From 06635a35d13d42b95422bba6633f175245cc644e Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Tue, 12 May 2015 14:56:16 +0200
Subject: flow_dissect: use programable dissector in skb_flow_dissect and
 friends

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b01c7fba7c17..f83aa6568cbf 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1935,8 +1935,8 @@ static inline void skb_probe_transport_header(struct sk_buff *skb,
 
 	if (skb_transport_header_was_set(skb))
 		return;
-	else if (skb_flow_dissect(skb, &keys))
-		skb_set_transport_header(skb, keys.thoff);
+	else if (skb_flow_dissect_flow_keys(skb, &keys))
+		skb_set_transport_header(skb, keys.basic.thoff);
 	else
 		skb_set_transport_header(skb, offset_hint);
 }
-- 
cgit v1.2.3


From 5d6ad960a71f0b36d95d74ef93285733b9f62f59 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 13 May 2015 15:12:33 -0400
Subject: tracing: Rename FTRACE_EVENT_FL_* flags to EVENT_FILE_FL_*

The name "ftrace" really refers to the function hook infrastructure. It
is not about the trace_events. The FTRACE_EVENT_FL_* flags are flags to
do with the trace_event files in the tracefs directory. They are not related
to function tracing. Rename them to a more descriptive name.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 50 ++++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index d4ad58ec684a..a46c138b2eea 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -250,11 +250,11 @@ enum {
  *  FILTERED	  - The event has a filter attached
  *  CAP_ANY	  - Any user can enable for perf
  *  NO_SET_FILTER - Set when filter has error and is to be ignored
- *  IGNORE_ENABLE - For ftrace internal events, do not enable with debugfs file
+ *  IGNORE_ENABLE - For trace internal events, do not enable with debugfs file
  *  WAS_ENABLED   - Set and stays set when an event was ever enabled
  *                    (used for module unloading, if a module event is enabled,
  *                     it is best to clear the buffers that used it).
- *  USE_CALL_FILTER - For ftrace internal events, don't use file filter
+ *  USE_CALL_FILTER - For trace internal events, don't use file filter
  *  TRACEPOINT    - Event is a tracepoint
  *  KPROBE        - Event is a kprobe
  */
@@ -286,7 +286,7 @@ struct trace_event_call {
 	 *   bit 0:		filter_active
 	 *   bit 1:		allow trace by non root (cap any)
 	 *   bit 2:		failed to apply filter
-	 *   bit 3:		ftrace internal event (do not enable)
+	 *   bit 3:		trace internal event (do not enable)
 	 *   bit 4:		Event was enabled by module
 	 *   bit 5:		use call filter rather than file filter
 	 *   bit 6:		Event is a tracepoint
@@ -316,18 +316,18 @@ struct trace_array;
 struct trace_subsystem_dir;
 
 enum {
-	FTRACE_EVENT_FL_ENABLED_BIT,
-	FTRACE_EVENT_FL_RECORDED_CMD_BIT,
-	FTRACE_EVENT_FL_FILTERED_BIT,
-	FTRACE_EVENT_FL_NO_SET_FILTER_BIT,
-	FTRACE_EVENT_FL_SOFT_MODE_BIT,
-	FTRACE_EVENT_FL_SOFT_DISABLED_BIT,
-	FTRACE_EVENT_FL_TRIGGER_MODE_BIT,
-	FTRACE_EVENT_FL_TRIGGER_COND_BIT,
+	EVENT_FILE_FL_ENABLED_BIT,
+	EVENT_FILE_FL_RECORDED_CMD_BIT,
+	EVENT_FILE_FL_FILTERED_BIT,
+	EVENT_FILE_FL_NO_SET_FILTER_BIT,
+	EVENT_FILE_FL_SOFT_MODE_BIT,
+	EVENT_FILE_FL_SOFT_DISABLED_BIT,
+	EVENT_FILE_FL_TRIGGER_MODE_BIT,
+	EVENT_FILE_FL_TRIGGER_COND_BIT,
 };
 
 /*
- * Ftrace event file flags:
+ * Event file flags:
  *  ENABLED	  - The event is enabled
  *  RECORDED_CMD  - The comms should be recorded at sched_switch
  *  FILTERED	  - The event has a filter attached
@@ -339,14 +339,14 @@ enum {
  *  TRIGGER_COND  - When set, one or more triggers has an associated filter
  */
 enum {
-	FTRACE_EVENT_FL_ENABLED		= (1 << FTRACE_EVENT_FL_ENABLED_BIT),
-	FTRACE_EVENT_FL_RECORDED_CMD	= (1 << FTRACE_EVENT_FL_RECORDED_CMD_BIT),
-	FTRACE_EVENT_FL_FILTERED	= (1 << FTRACE_EVENT_FL_FILTERED_BIT),
-	FTRACE_EVENT_FL_NO_SET_FILTER	= (1 << FTRACE_EVENT_FL_NO_SET_FILTER_BIT),
-	FTRACE_EVENT_FL_SOFT_MODE	= (1 << FTRACE_EVENT_FL_SOFT_MODE_BIT),
-	FTRACE_EVENT_FL_SOFT_DISABLED	= (1 << FTRACE_EVENT_FL_SOFT_DISABLED_BIT),
-	FTRACE_EVENT_FL_TRIGGER_MODE	= (1 << FTRACE_EVENT_FL_TRIGGER_MODE_BIT),
-	FTRACE_EVENT_FL_TRIGGER_COND	= (1 << FTRACE_EVENT_FL_TRIGGER_COND_BIT),
+	EVENT_FILE_FL_ENABLED		= (1 << EVENT_FILE_FL_ENABLED_BIT),
+	EVENT_FILE_FL_RECORDED_CMD	= (1 << EVENT_FILE_FL_RECORDED_CMD_BIT),
+	EVENT_FILE_FL_FILTERED		= (1 << EVENT_FILE_FL_FILTERED_BIT),
+	EVENT_FILE_FL_NO_SET_FILTER	= (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT),
+	EVENT_FILE_FL_SOFT_MODE		= (1 << EVENT_FILE_FL_SOFT_MODE_BIT),
+	EVENT_FILE_FL_SOFT_DISABLED	= (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT),
+	EVENT_FILE_FL_TRIGGER_MODE	= (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT),
+	EVENT_FILE_FL_TRIGGER_COND	= (1 << EVENT_FILE_FL_TRIGGER_COND_BIT),
 };
 
 struct trace_event_file {
@@ -439,10 +439,10 @@ ftrace_trigger_soft_disabled(struct trace_event_file *file)
 {
 	unsigned long eflags = file->flags;
 
-	if (!(eflags & FTRACE_EVENT_FL_TRIGGER_COND)) {
-		if (eflags & FTRACE_EVENT_FL_TRIGGER_MODE)
+	if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) {
+		if (eflags & EVENT_FILE_FL_TRIGGER_MODE)
 			event_triggers_call(file, NULL);
-		if (eflags & FTRACE_EVENT_FL_SOFT_DISABLED)
+		if (eflags & EVENT_FILE_FL_SOFT_DISABLED)
 			return true;
 	}
 	return false;
@@ -470,10 +470,10 @@ __event_trigger_test_discard(struct trace_event_file *file,
 {
 	unsigned long eflags = file->flags;
 
-	if (eflags & FTRACE_EVENT_FL_TRIGGER_COND)
+	if (eflags & EVENT_FILE_FL_TRIGGER_COND)
 		*tt = event_triggers_call(file, entry);
 
-	if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags))
+	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags))
 		ring_buffer_discard_commit(buffer, event);
 	else if (!filter_check_discard(file, entry, buffer, event))
 		return false;
-- 
cgit v1.2.3


From 09a5059aa1a2cbf8c8993e61b013cc83a0dd5833 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 13 May 2015 15:21:25 -0400
Subject: tracing: Rename ftrace_trigger_soft_disabled() to
 trace_trigger_soft_disabled()

The name "ftrace" really refers to the function hook infrastructure. It
is not about the trace_events. The ftrace_trigger_soft_disabled() tests if a
trace_event is soft disabled (called but not traced), and returns true if
it is. It has nothing to do with function tracing and should be renamed.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index a46c138b2eea..1063c850dbab 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -426,7 +426,7 @@ extern void event_triggers_post_call(struct trace_event_file *file,
 				     enum event_trigger_type tt);
 
 /**
- * ftrace_trigger_soft_disabled - do triggers and test if soft disabled
+ * trace_trigger_soft_disabled - do triggers and test if soft disabled
  * @file: The file pointer of the event to test
  *
  * If any triggers without filters are attached to this event, they
@@ -435,7 +435,7 @@ extern void event_triggers_post_call(struct trace_event_file *file,
  * otherwise false.
  */
 static inline bool
-ftrace_trigger_soft_disabled(struct trace_event_file *file)
+trace_trigger_soft_disabled(struct trace_event_file *file)
 {
 	unsigned long eflags = file->flags;
 
-- 
cgit v1.2.3


From f0b5e8a42f37a880b8467e59dc814f4f21581d3d Mon Sep 17 00:00:00 2001
From: Pablo Neira <pablo@netfilter.org>
Date: Tue, 12 May 2015 20:28:07 +0200
Subject: net: kill useless net_*_ingress_queue() definitions when NET_CLS_ACT
 is unset

This fixes 4577139b2dabf589 ("net: use jump label patching for ingress qdisc in
__netif_receive_skb_core").

The only client of this is sch_ingress and it depends on NET_CLS_ACT. So
there is no way these definition can be of any help.

Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 7b8e260c4a27..bd29ab4b0941 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -82,14 +82,6 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev);
 #ifdef CONFIG_NET_CLS_ACT
 void net_inc_ingress_queue(void);
 void net_dec_ingress_queue(void);
-#else
-static inline void net_inc_ingress_queue(void)
-{
-}
-
-static inline void net_dec_ingress_queue(void)
-{
-}
 #endif
 
 extern void rtnetlink_init(void);
-- 
cgit v1.2.3


From 25956b6612601cf36022392ffa83f6bf97939bcd Mon Sep 17 00:00:00 2001
From: Hanjun Guo <hanjun.guo@linaro.org>
Date: Mon, 11 May 2015 12:17:13 +0800
Subject: ACPI / processor: Introduce invalid_logical_cpuid()

In ACPI processor drivers, we use direct comparisons of cpu logical
id with -1 which are error prone in case logical cpuid is accidentally
assinged an error code and prevents us from returning an error-encoding
cpuid directly in some cases.

So introduce invalid_logical_cpuid() to identify cpu with invalid
logical cpu num, then it will be used to replace the direct comparisons
with -1.

Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index e4da5e35e29c..913b49f9a6e6 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -158,6 +158,11 @@ typedef u32 phys_cpuid_t;
 #define PHYS_CPUID_INVALID (phys_cpuid_t)(-1)
 #endif
 
+static inline bool invalid_logical_cpuid(u32 cpuid)
+{
+	return (int)cpuid < 0;
+}
+
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 /* Arch dependent functions for cpu hotplug support */
 int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu);
-- 
cgit v1.2.3


From ddcc18f5bdd1aafd457032ec693fd9d0af764d61 Mon Sep 17 00:00:00 2001
From: Hanjun Guo <hanjun.guo@linaro.org>
Date: Wed, 13 May 2015 16:19:30 +0800
Subject: ACPI / processor: Introduce invalid_phys_cpuid()

Introduce invalid_phys_cpuid() to identify cpu with invalid
physical ID, then used it as replacement of the direct comparisons
with PHYS_CPUID_INVALID.

Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 913b49f9a6e6..90e4ed1eb191 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -163,6 +163,11 @@ static inline bool invalid_logical_cpuid(u32 cpuid)
 	return (int)cpuid < 0;
 }
 
+static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id)
+{
+	return phys_id == PHYS_CPUID_INVALID;
+}
+
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 /* Arch dependent functions for cpu hotplug support */
 int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu);
-- 
cgit v1.2.3


From 87d5c18ce1f41a2410d4fb26d5d68c55867450f5 Mon Sep 17 00:00:00 2001
From: Pablo Neira <pablo@netfilter.org>
Date: Wed, 13 May 2015 18:19:34 +0200
Subject: netfilter: cleanup struct nf_hook_ops indentation

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 63560d0a8dfe..83be4a3cec98 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -79,16 +79,16 @@ typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops,
 			       const struct nf_hook_state *state);
 
 struct nf_hook_ops {
-	struct list_head list;
+	struct list_head 	list;
 
 	/* User fills in from here down. */
-	nf_hookfn	*hook;
-	struct module	*owner;
-	void		*priv;
-	u_int8_t	pf;
-	unsigned int	hooknum;
+	nf_hookfn		*hook;
+	struct module		*owner;
+	void			*priv;
+	u_int8_t		pf;
+	unsigned int		hooknum;
 	/* Hooks are ordered in ascending priority. */
-	int		priority;
+	int			priority;
 };
 
 struct nf_sockopt_ops {
-- 
cgit v1.2.3


From f7191483461ce2ae579b6f7227fa7ce49e006656 Mon Sep 17 00:00:00 2001
From: Pablo Neira <pablo@netfilter.org>
Date: Wed, 13 May 2015 18:19:35 +0200
Subject: netfilter: add hook list to nf_hook_state

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 83be4a3cec98..388ed1952242 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -54,10 +54,12 @@ struct nf_hook_state {
 	struct net_device *in;
 	struct net_device *out;
 	struct sock *sk;
+	struct list_head *hook_list;
 	int (*okfn)(struct sock *, struct sk_buff *);
 };
 
 static inline void nf_hook_state_init(struct nf_hook_state *p,
+				      struct list_head *hook_list,
 				      unsigned int hook,
 				      int thresh, u_int8_t pf,
 				      struct net_device *indev,
@@ -71,6 +73,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p,
 	p->in = indev;
 	p->out = outdev;
 	p->sk = sk;
+	p->hook_list = hook_list;
 	p->okfn = okfn;
 }
 
@@ -166,8 +169,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
 	if (nf_hooks_active(pf, hook)) {
 		struct nf_hook_state state;
 
-		nf_hook_state_init(&state, hook, thresh, pf,
-				   indev, outdev, sk, okfn);
+		nf_hook_state_init(&state, &nf_hooks[pf][hook], hook, thresh,
+				   pf, indev, outdev, sk, okfn);
 		return nf_hook_slow(skb, &state);
 	}
 	return 1;
-- 
cgit v1.2.3


From b8d0aad0c77f488d1d51a02d871a5cbc2d8032b9 Mon Sep 17 00:00:00 2001
From: Pablo Neira <pablo@netfilter.org>
Date: Wed, 13 May 2015 18:19:36 +0200
Subject: netfilter: add nf_hook_list_active()

In preparation to have netfilter ingress per-device hook list.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 388ed1952242..49d00638d1fa 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -134,26 +134,33 @@ extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 #ifdef HAVE_JUMP_LABEL
 extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 
-static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
+static inline bool nf_hook_list_active(struct list_head *nf_hook_list,
+				       u_int8_t pf, unsigned int hook)
 {
 	if (__builtin_constant_p(pf) &&
 	    __builtin_constant_p(hook))
 		return static_key_false(&nf_hooks_needed[pf][hook]);
 
-	return !list_empty(&nf_hooks[pf][hook]);
+	return !list_empty(nf_hook_list);
 }
 #else
-static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
+static inline bool nf_hook_list_active(struct list_head *nf_hook_list,
+				       u_int8_t pf, unsigned int hook)
 {
-	return !list_empty(&nf_hooks[pf][hook]);
+	return !list_empty(nf_hook_list);
 }
 #endif
 
+static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
+{
+	return nf_hook_list_active(&nf_hooks[pf][hook], pf, hook);
+}
+
 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state);
 
 /**
  *	nf_hook_thresh - call a netfilter hook
- *	
+ *
  *	Returns 1 if the hook has allowed the packet to pass.  The function
  *	okfn must be invoked by the caller in this case.  Any other return
  *	value indicates the packet has been consumed by the hook.
-- 
cgit v1.2.3


From 1cf51900f8545b358b5deaacfda348d990f671db Mon Sep 17 00:00:00 2001
From: Pablo Neira <pablo@netfilter.org>
Date: Wed, 13 May 2015 18:19:37 +0200
Subject: net: add CONFIG_NET_INGRESS to enable ingress filtering

This new config switch enables the ingress filtering infrastructure that is
controlled through the ingress_needed static key. This prepares the
introduction of the Netfilter ingress hook that resides under this unique
static key.

Note that CONFIG_SCH_INGRESS automatically selects this, that should be no
problem since this also depends on CONFIG_NET_CLS_ACT.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index bd29ab4b0941..a2324fb45cf4 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -79,7 +79,7 @@ static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
 
 struct netdev_queue *dev_ingress_queue_create(struct net_device *dev);
 
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_INGRESS
 void net_inc_ingress_queue(void);
 void net_dec_ingress_queue(void);
 #endif
-- 
cgit v1.2.3


From e687ad60af09010936bbd0b2a3b5d90a8ee8353c Mon Sep 17 00:00:00 2001
From: Pablo Neira <pablo@netfilter.org>
Date: Wed, 13 May 2015 18:19:38 +0200
Subject: netfilter: add netfilter ingress hook after handle_ing() under unique
 static key

This patch adds the Netfilter ingress hook just after the existing tc ingress
hook, that seems to be the consensus solution for this.

Note that the Netfilter hook resides under the global static key that enables
ingress filtering. Nonetheless, Netfilter still also has its own static key for
minimal impact on the existing handle_ing().

* Without this patch:

Result: OK: 6216490(c6216338+d152) usec, 100000000 (60byte,0frags)
  16086246pps 7721Mb/sec (7721398080bps) errors: 100000000

    42.46%  kpktgend_0   [kernel.kallsyms]   [k] __netif_receive_skb_core
    25.92%  kpktgend_0   [kernel.kallsyms]   [k] kfree_skb
     7.81%  kpktgend_0   [pktgen]            [k] pktgen_thread_worker
     5.62%  kpktgend_0   [kernel.kallsyms]   [k] ip_rcv
     2.70%  kpktgend_0   [kernel.kallsyms]   [k] netif_receive_skb_internal
     2.34%  kpktgend_0   [kernel.kallsyms]   [k] netif_receive_skb_sk
     1.44%  kpktgend_0   [kernel.kallsyms]   [k] __build_skb

* With this patch:

Result: OK: 6214833(c6214731+d101) usec, 100000000 (60byte,0frags)
  16090536pps 7723Mb/sec (7723457280bps) errors: 100000000

    41.23%  kpktgend_0      [kernel.kallsyms]  [k] __netif_receive_skb_core
    26.57%  kpktgend_0      [kernel.kallsyms]  [k] kfree_skb
     7.72%  kpktgend_0      [pktgen]           [k] pktgen_thread_worker
     5.55%  kpktgend_0      [kernel.kallsyms]  [k] ip_rcv
     2.78%  kpktgend_0      [kernel.kallsyms]  [k] netif_receive_skb_internal
     2.06%  kpktgend_0      [kernel.kallsyms]  [k] netif_receive_skb_sk
     1.43%  kpktgend_0      [kernel.kallsyms]  [k] __build_skb

* Without this patch + tc ingress:

        tc filter add dev eth4 parent ffff: protocol ip prio 1 \
                u32 match ip dst 4.3.2.1/32

Result: OK: 9269001(c9268821+d179) usec, 100000000 (60byte,0frags)
  10788648pps 5178Mb/sec (5178551040bps) errors: 100000000

    40.99%  kpktgend_0   [kernel.kallsyms]  [k] __netif_receive_skb_core
    17.50%  kpktgend_0   [kernel.kallsyms]  [k] kfree_skb
    11.77%  kpktgend_0   [cls_u32]          [k] u32_classify
     5.62%  kpktgend_0   [kernel.kallsyms]  [k] tc_classify_compat
     5.18%  kpktgend_0   [pktgen]           [k] pktgen_thread_worker
     3.23%  kpktgend_0   [kernel.kallsyms]  [k] tc_classify
     2.97%  kpktgend_0   [kernel.kallsyms]  [k] ip_rcv
     1.83%  kpktgend_0   [kernel.kallsyms]  [k] netif_receive_skb_internal
     1.50%  kpktgend_0   [kernel.kallsyms]  [k] netif_receive_skb_sk
     0.99%  kpktgend_0   [kernel.kallsyms]  [k] __build_skb

* With this patch + tc ingress:

        tc filter add dev eth4 parent ffff: protocol ip prio 1 \
                u32 match ip dst 4.3.2.1/32

Result: OK: 9308218(c9308091+d126) usec, 100000000 (60byte,0frags)
  10743194pps 5156Mb/sec (5156733120bps) errors: 100000000

    42.01%  kpktgend_0   [kernel.kallsyms]   [k] __netif_receive_skb_core
    17.78%  kpktgend_0   [kernel.kallsyms]   [k] kfree_skb
    11.70%  kpktgend_0   [cls_u32]           [k] u32_classify
     5.46%  kpktgend_0   [kernel.kallsyms]   [k] tc_classify_compat
     5.16%  kpktgend_0   [pktgen]            [k] pktgen_thread_worker
     2.98%  kpktgend_0   [kernel.kallsyms]   [k] ip_rcv
     2.84%  kpktgend_0   [kernel.kallsyms]   [k] tc_classify
     1.96%  kpktgend_0   [kernel.kallsyms]   [k] netif_receive_skb_internal
     1.57%  kpktgend_0   [kernel.kallsyms]   [k] netif_receive_skb_sk

Note that the results are very similar before and after.

I can see gcc gets the code under the ingress static key out of the hot path.
Then, on that cold branch, it generates the code to accomodate the netfilter
ingress static key. My explanation for this is that this reduces the pressure
on the instruction cache for non-users as the new code is out of the hot path,
and it comes with minimal impact for tc ingress users.

Using gcc version 4.8.4 on:

Architecture:          x86_64
CPU op-mode(s):        32-bit, 64-bit
Byte Order:            Little Endian
CPU(s):                8
[...]
L1d cache:             16K
L1i cache:             64K
L2 cache:              2048K
L3 cache:              8192K

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h         |  3 +++
 include/linux/netfilter.h         |  1 +
 include/linux/netfilter_ingress.h | 41 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+)
 create mode 100644 include/linux/netfilter_ingress.h

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d3ed01c18247..51f8d2f5dc3f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1656,6 +1656,9 @@ struct net_device {
 	struct tcf_proto __rcu  *ingress_cl_list;
 #endif
 	struct netdev_queue __rcu *ingress_queue;
+#ifdef CONFIG_NETFILTER_INGRESS
+	struct list_head	nf_hooks_ingress;
+#endif
 
 	unsigned char		broadcast[MAX_ADDR_LEN];
 #ifdef CONFIG_RFS_ACCEL
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 49d00638d1fa..f5ff5d156da8 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -86,6 +86,7 @@ struct nf_hook_ops {
 
 	/* User fills in from here down. */
 	nf_hookfn		*hook;
+	struct net_device	*dev;
 	struct module		*owner;
 	void			*priv;
 	u_int8_t		pf;
diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
new file mode 100644
index 000000000000..cb0727fe2b3d
--- /dev/null
+++ b/include/linux/netfilter_ingress.h
@@ -0,0 +1,41 @@
+#ifndef _NETFILTER_INGRESS_H_
+#define _NETFILTER_INGRESS_H_
+
+#include <linux/netfilter.h>
+#include <linux/netdevice.h>
+
+#ifdef CONFIG_NETFILTER_INGRESS
+static inline int nf_hook_ingress_active(struct sk_buff *skb)
+{
+	return nf_hook_list_active(&skb->dev->nf_hooks_ingress,
+				   NFPROTO_NETDEV, NF_NETDEV_INGRESS);
+}
+
+static inline int nf_hook_ingress(struct sk_buff *skb)
+{
+	struct nf_hook_state state;
+
+	nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress,
+			   NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL,
+			   skb->dev, NULL, NULL);
+	return nf_hook_slow(skb, &state);
+}
+
+static inline void nf_hook_ingress_init(struct net_device *dev)
+{
+	INIT_LIST_HEAD(&dev->nf_hooks_ingress);
+}
+#else /* CONFIG_NETFILTER_INGRESS */
+static inline int nf_hook_ingress_active(struct sk_buff *skb)
+{
+	return 0;
+}
+
+static inline int nf_hook_ingress(struct sk_buff *skb)
+{
+	return 0;
+}
+
+static inline void nf_hook_ingress_init(struct net_device *dev) {}
+#endif /* CONFIG_NETFILTER_INGRESS */
+#endif /* _NETFILTER_INGRESS_H_ */
-- 
cgit v1.2.3


From af6c235d1a5c112964c3029eb0ed4b52c7aa33bf Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 13 May 2015 13:03:21 +0200
Subject: gpio: discourage passing base to gpio_chip

Passing a fixed base in struct gpio_chip is done for legacy
systems that cannot handle dynamic allocation. Discourage this
behaviour in the kerneldoc.

Acked-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/driver.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 96a678842cde..cc7ec129b329 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -42,8 +42,12 @@ struct seq_file;
  * @dbg_show: optional routine to show contents in debugfs; default code
  *	will be used when this is omitted, but custom code can show extra
  *	state (such as pullup/pulldown configuration).
- * @base: identifies the first GPIO number handled by this chip; or, if
- *	negative during registration, requests dynamic ID allocation.
+ * @base: identifies the first GPIO number handled by this chip;
+ *	or, if negative during registration, requests dynamic ID allocation.
+ *	DEPRECATION: providing anything non-negative and nailing the base
+ *	base offset of GPIO chips is deprecated. Please pass -1 as base to
+ *	let gpiolib select the chip base in all possible cases. We want to
+ *	get rid of the static GPIO number space in the long run.
  * @ngpio: the number of GPIOs handled by this controller; the last GPIO
  *	handled is (base + ngpio - 1).
  * @desc: array of ngpio descriptors. Private.
-- 
cgit v1.2.3


From 7fb48c5bc3100f7674a8e26f42c1518196500728 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 3 May 2015 22:05:28 +0200
Subject: netfilter: bridge: neigh_head and physoutdev can't be used at same
 time

The neigh_header is only needed when we detect DNAT after prerouting
and neigh cache didn't have a mac address for us.

The output port has not been chosen yet so we can re-use the storage
area, bringing struct size down to 32 bytes on x86_64.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/skbuff.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c0b574a414e7..3d932e64125a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -170,12 +170,14 @@ struct nf_bridge_info {
 		BRNF_PROTO_UNCHANGED,
 		BRNF_PROTO_8021Q,
 		BRNF_PROTO_PPPOE
-	} orig_proto;
+	} orig_proto:8;
 	bool			pkt_otherhost;
 	unsigned int		mask;
 	struct net_device	*physindev;
-	struct net_device	*physoutdev;
-	char			neigh_header[8];
+	union {
+		struct net_device *physoutdev;
+		char neigh_header[8];
+	};
 };
 #endif
 
-- 
cgit v1.2.3


From a3b1c1eb50f9b3e0c73c37157d0c61b2e90ae580 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Wed, 6 May 2015 16:28:57 +0200
Subject: netfilter: ipset: deinline ip_set_put_extensions()

On x86 allyesconfig build:
The function compiles to 489 bytes of machine code.
It has 25 callsites.

    text    data       bss       dec     hex filename
82441375 22255384 20627456 125324215 7784bb7 vmlinux.before
82434909 22255384 20627456 125317749 7783275 vmlinux

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
CC: Eric W. Biederman <ebiederm@xmission.com>
CC: David S. Miller <davem@davemloft.net>
CC: Jan Engelhardt <jengelh@medozas.de>
CC: Jiri Pirko <jpirko@redhat.com>
CC: linux-kernel@vger.kernel.org
CC: netdev@vger.kernel.org
CC: netfilter-devel@vger.kernel.org
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set.h | 24 ++----------------------
 1 file changed, 2 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index f88be7258e5f..ffdfdc24952a 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -533,29 +533,9 @@ bitmap_bytes(u32 a, u32 b)
 #include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <linux/netfilter/ipset/ip_set_comment.h>
 
-static inline int
+int
 ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
-		      const void *e, bool active)
-{
-	if (SET_WITH_TIMEOUT(set)) {
-		unsigned long *timeout = ext_timeout(e, set);
-
-		if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-			htonl(active ? ip_set_timeout_get(timeout)
-				: *timeout)))
-			return -EMSGSIZE;
-	}
-	if (SET_WITH_COUNTER(set) &&
-	    ip_set_put_counter(skb, ext_counter(e, set)))
-		return -EMSGSIZE;
-	if (SET_WITH_COMMENT(set) &&
-	    ip_set_put_comment(skb, ext_comment(e, set)))
-		return -EMSGSIZE;
-	if (SET_WITH_SKBINFO(set) &&
-	    ip_set_put_skbinfo(skb, ext_skbinfo(e, set)))
-		return -EMSGSIZE;
-	return 0;
-}
+		      const void *e, bool active);
 
 #define IP_SET_INIT_KEXT(skb, opt, set)			\
 	{ .bytes = (skb)->len, .packets = 1,		\
-- 
cgit v1.2.3


From 922f2dd1b65a888e34c472979460dc23211750a2 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 12 May 2015 10:33:24 -0700
Subject: net: phy: Add phy_ignore_ta_mask to account for broken turn-around

Some PHY devices/switches will not release the turn-around line as they
should do at the end of a MDIO transaction. To help with such
situations, allow MDIO bus drivers to be made aware of such
restrictions.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 685809835b5c..701c7a3946e0 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -181,6 +181,9 @@ struct mii_bus {
 	/* PHY addresses to be ignored when probing */
 	u32 phy_mask;
 
+	/* PHY addresses to ignore the TA/read failure */
+	u32 phy_ignore_ta_mask;
+
 	/*
 	 * Pointer to an array of interrupts, each PHY's
 	 * interrupt at the index matching its address
-- 
cgit v1.2.3


From 8c8a457a60050d5922676f81913d87e4af6fd97b Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov@siteground.com>
Date: Thu, 14 May 2015 14:31:01 +0300
Subject: sched: Remove redundant #ifdef

Two adjacent members in task_struct were guarded
by the same #define, so we can merge the two blocks.

Signed-off-by: Nikolay Borisov <n.borisov@siteground.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1431603061-29408-1-git-send-email-kernel@kyup.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0eceeec5a01a..5f8defa155cf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1398,8 +1398,6 @@ struct task_struct {
 	int rcu_read_lock_nesting;
 	union rcu_special rcu_read_unlock_special;
 	struct list_head rcu_node_entry;
-#endif /* #ifdef CONFIG_PREEMPT_RCU */
-#ifdef CONFIG_PREEMPT_RCU
 	struct rcu_node *rcu_blocked_node;
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
 #ifdef CONFIG_TASKS_RCU
-- 
cgit v1.2.3


From faad38492814112e3e7ce94d90123bbe301fff33 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sun, 10 May 2015 01:18:03 +0200
Subject: sched / idle: Call idle_set_state() from cpuidle_enter_state()

Introduce a wrapper function around idle_set_state() called
sched_idle_set_state() that will pass this_rq() to it as the
first argument and make cpuidle_enter_state() call the new
function before and after entering the target state.

At the same time, remove direct invocations of idle_set_state()
from call_cpuidle().

This will allow the invocation of default_idle_call() to be
moved from call_cpuidle() to cpuidle_enter_state() safely
and call_cpuidle() to be simplified a bit as a result.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Tested-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Tested-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Kevin Hilman <khilman@linaro.org>
---
 include/linux/cpuidle.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 9c5e89254796..301eaaab40e3 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -200,6 +200,9 @@ static inline struct cpuidle_driver *cpuidle_get_cpu_driver(
 	struct cpuidle_device *dev) {return NULL; }
 #endif
 
+/* kernel/sched/idle.c */
+extern void sched_idle_set_state(struct cpuidle_state *idle_state);
+
 #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
 void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a);
 #else
-- 
cgit v1.2.3


From 827a5aefc542b8fb17c00de06118e5cd0e3800f2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sun, 10 May 2015 01:18:46 +0200
Subject: sched / idle: Call default_idle_call() from cpuidle_enter_state()

The check of the cpuidle_enter() return value against -EBUSY
made in call_cpuidle() will not be necessary any more if
cpuidle_enter_state() calls default_idle_call() directly when it
is about to return -EBUSY, so make that happen and eliminate the
check.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Tested-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Tested-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Kevin Hilman <khilman@linaro.org>
---
 include/linux/cpuidle.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 301eaaab40e3..c7a63643658e 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -202,6 +202,7 @@ static inline struct cpuidle_driver *cpuidle_get_cpu_driver(
 
 /* kernel/sched/idle.c */
 extern void sched_idle_set_state(struct cpuidle_state *idle_state);
+extern void default_idle_call(void);
 
 #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
 void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a);
-- 
cgit v1.2.3


From 4573237b01221881702fbe6655f3ae5135be1c18 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 12 May 2015 12:22:34 +0530
Subject: cpufreq: Manage governor usage history with 'policy->last_governor'

History of which governor was used last is common to all CPUs within a
policy and maintaining it per-cpu isn't the best approach for sure.

Apart from wasting memory, this also increases the complexity of
managing this data structure as it has to be updated for all CPUs.

To make that somewhat simpler, lets store this information in a new
field 'last_governor' in struct cpufreq_policy and update it on removal
of last cpu of a policy.

As a side-effect it also solves an old problem, consider a system with
two clusters 0 & 1. And there is one policy per cluster.

Cluster 0: CPU0 and 1.
Cluster 1: CPU2 and 3.

 - CPU2 is first brought online, and governor is set to performance
   (default as cpufreq_cpu_governor wasn't set).
 - Governor is changed to ondemand.
 - CPU2 is taken offline and cpufreq_cpu_governor is updated for CPU2.
 - CPU3 is brought online.
 - Because cpufreq_cpu_governor wasn't set for CPU3, the default governor
   performance is picked for CPU3.

This patch fixes the bug as we now have a single variable to update for
policy.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 2ee4888c1f47..48e37c07eb84 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -80,6 +80,7 @@ struct cpufreq_policy {
 	struct cpufreq_governor	*governor; /* see below */
 	void			*governor_data;
 	bool			governor_enabled; /* governor start/stop flag */
+	char			last_governor[CPUFREQ_NAME_LEN]; /* last governor used */
 
 	struct work_struct	update; /* if update_policy() needs to be
 					 * called, but you're in IRQ context */
-- 
cgit v1.2.3


From a4afd37b26f4b9f640310a89b7f8d176ae3460b1 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 13 May 2015 13:12:43 +0200
Subject: test_bpf: add tests related to BPF_MAXINSNS

Couple of torture test cases related to the bug fixed in 0b59d8806a31
("ARM: net: delegate filter to kernel interpreter when imm_offset()
return value can't fit into 12bits.").

I've added a helper to allocate and fill the insn space. Output on
x86_64 from my laptop:

test_bpf: #233 BPF_MAXINSNS: Maximum possible literals jited:0 7 PASS
test_bpf: #234 BPF_MAXINSNS: Single literal jited:0 8 PASS
test_bpf: #235 BPF_MAXINSNS: Run/add until end jited:0 11553 PASS
test_bpf: #236 BPF_MAXINSNS: Too many instructions PASS
test_bpf: #237 BPF_MAXINSNS: Very long jump jited:0 9 PASS
test_bpf: #238 BPF_MAXINSNS: Ctx heavy transformations jited:0 20329 20398 PASS
test_bpf: #239 BPF_MAXINSNS: Call heavy transformations jited:0 32178 32475 PASS
test_bpf: #240 BPF_MAXINSNS: Jump heavy test jited:0 10518 PASS

test_bpf: #233 BPF_MAXINSNS: Maximum possible literals jited:1 4 PASS
test_bpf: #234 BPF_MAXINSNS: Single literal jited:1 4 PASS
test_bpf: #235 BPF_MAXINSNS: Run/add until end jited:1 1625 PASS
test_bpf: #236 BPF_MAXINSNS: Too many instructions PASS
test_bpf: #237 BPF_MAXINSNS: Very long jump jited:1 8 PASS
test_bpf: #238 BPF_MAXINSNS: Ctx heavy transformations jited:1 3301 3174 PASS
test_bpf: #239 BPF_MAXINSNS: Call heavy transformations jited:1 24107 23491 PASS
test_bpf: #240 BPF_MAXINSNS: Jump heavy test jited:1 8651 PASS

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Nicolas Schichan <nschichan@freebox.fr>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index ce1d72d34382..200be4a74a33 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -277,6 +277,14 @@ struct bpf_prog_aux;
 		.off   = 0,					\
 		.imm   = 0 })
 
+/* Internal classic blocks for direct assignment */
+
+#define __BPF_STMT(CODE, K)					\
+	((struct sock_filter) BPF_STMT(CODE, K))
+
+#define __BPF_JUMP(CODE, K, JT, JF)				\
+	((struct sock_filter) BPF_JUMP(CODE, K, JT, JF))
+
 #define bytes_to_bpf_size(bytes)				\
 ({								\
 	int bpf_size = -EINVAL;					\
-- 
cgit v1.2.3


From ef7f3a5c7149ad2dbd1d8a71d0aa88a02d1dbcb8 Mon Sep 17 00:00:00 2001
From: Bert Vermeulen <bert@biot.com>
Date: Wed, 13 May 2015 13:35:39 +0200
Subject: mdio-gpio: Propagate mii_bus.phy_ignore_ta_mask

This also changes mii_bus.phy_mask to u32 for consistency.

Signed-off-by: Bert Vermeulen <bert@biot.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mdio-gpio.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h
index 66c30a763b10..11f00cdabe3d 100644
--- a/include/linux/mdio-gpio.h
+++ b/include/linux/mdio-gpio.h
@@ -23,7 +23,8 @@ struct mdio_gpio_platform_data {
 	bool mdio_active_low;
 	bool mdo_active_low;
 
-	unsigned int phy_mask;
+	u32 phy_mask;
+	u32 phy_ignore_ta_mask;
 	int irqs[PHY_MAX_ADDR];
 	/* reset callback */
 	int (*reset)(struct mii_bus *bus);
-- 
cgit v1.2.3


From 8fa9dd24667f2d6997ec21341019657342859d31 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 23 Mar 2015 13:37:40 +1100
Subject: VFS/namei: make the use of touch_atime() in get_link() RCU-safe.

touch_atime is not RCU-safe, and so cannot be called on an RCU walk.
However, in situations where RCU-walk makes a difference, the symlink
will likely to accessed much more often than it is useful to update
the atime.

So split out the test of "Does the atime actually need to be updated"
into  atime_needs_update(), and have get_link() unlazy if it finds that
it will need to do that update.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8f738512c874..1426c435d455 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1880,6 +1880,7 @@ enum file_time_flags {
 	S_VERSION = 8,
 };
 
+extern bool atime_needs_update(const struct path *, struct inode *);
 extern void touch_atime(const struct path *);
 static inline void file_accessed(struct file *file)
 {
-- 
cgit v1.2.3


From 89076bc31950eee576ecc06460c23466e2d50939 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 12 May 2015 08:29:38 -0400
Subject: get rid of assorted nameidata-related debris

pointless forward declarations, stale comments

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h    | 1 -
 include/linux/namei.h | 1 -
 include/linux/sched.h | 1 +
 3 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1426c435d455..b577e801b4af 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -38,7 +38,6 @@ struct backing_dev_info;
 struct export_operations;
 struct hd_geometry;
 struct iovec;
-struct nameidata;
 struct kiocb;
 struct kobject;
 struct pipe_inode_info;
diff --git a/include/linux/namei.h b/include/linux/namei.h
index d756304aa09b..1208e489f83e 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -7,7 +7,6 @@
 #include <linux/path.h>
 
 struct vfsmount;
-struct nameidata;
 
 enum { MAX_NESTED_LINKS = 8 };
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f6c9b69d66f2..a1158c954f0f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -132,6 +132,7 @@ struct fs_struct;
 struct perf_event_context;
 struct blk_plug;
 struct filename;
+struct nameidata;
 
 #define VMACACHE_BITS 2
 #define VMACACHE_SIZE (1U << VMACACHE_BITS)
-- 
cgit v1.2.3


From b853a16176cf3e02c57e215743015614152c2428 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 13 May 2015 09:12:02 -0400
Subject: turn user_{path_at,path,lpath,path_dir}() into static inlines

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/namei.h | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index 1208e489f83e..d8c6334cd150 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -1,12 +1,10 @@
 #ifndef _LINUX_NAMEI_H
 #define _LINUX_NAMEI_H
 
-#include <linux/dcache.h>
-#include <linux/errno.h>
-#include <linux/linkage.h>
+#include <linux/kernel.h>
 #include <linux/path.h>
-
-struct vfsmount;
+#include <linux/fcntl.h>
+#include <linux/errno.h>
 
 enum { MAX_NESTED_LINKS = 8 };
 
@@ -46,13 +44,29 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_ROOT		0x2000
 #define LOOKUP_EMPTY		0x4000
 
-extern int user_path_at(int, const char __user *, unsigned, struct path *);
 extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty);
 
-#define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path)
-#define user_lpath(name, path) user_path_at(AT_FDCWD, name, 0, path)
-#define user_path_dir(name, path) \
-	user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, path)
+static inline int user_path_at(int dfd, const char __user *name, unsigned flags,
+		 struct path *path)
+{
+	return user_path_at_empty(dfd, name, flags, path, NULL);
+}
+
+static inline int user_path(const char __user *name, struct path *path)
+{
+	return user_path_at_empty(AT_FDCWD, name, LOOKUP_FOLLOW, path, NULL);
+}
+
+static inline int user_lpath(const char __user *name, struct path *path)
+{
+	return user_path_at_empty(AT_FDCWD, name, 0, path, NULL);
+}
+
+static inline int user_path_dir(const char __user *name, struct path *path)
+{
+	return user_path_at_empty(AT_FDCWD, name,
+				  LOOKUP_FOLLOW | LOOKUP_DIRECTORY, path, NULL);
+}
 
 extern int kern_path(const char *, unsigned, struct path *);
 
-- 
cgit v1.2.3


From 55917a21d0cc012bb6073bb05bb768fd51d8e237 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 14 May 2015 14:57:23 +0200
Subject: netfilter: x_tables: add context to know if extension runs from
 nft_compat

Currently, we have four xtables extensions that cannot be used from the
xt over nft compat layer. The problem is that they need real access to
the full blown xt_entry to validate that the rule comes with the right
dependencies. This check was introduced to overcome the lack of
sufficient userspace dependency validation in iptables.

To resolve this problem, this patch introduces a new field to the
xt_tgchk_param structure that tell us if the extension is run from
nft_compat context.

The three affected extensions are:

1) CLUSTERIP, this target has been superseded by xt_cluster. So just
   bail out by returning -EINVAL.

2) TCPMSS. Relax the checking when used from nft_compat. If used with
   the wrong configuration, it will corrupt !syn packets by adding TCP
   MSS option.

3) ebt_stp. Relax the check to make sure it uses the reserved
   destination MAC address for STP.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Tested-by: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
---
 include/linux/netfilter/x_tables.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index a3e215bb0241..09f38206c18f 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -62,6 +62,7 @@ struct xt_mtchk_param {
 	void *matchinfo;
 	unsigned int hook_mask;
 	u_int8_t family;
+	bool nft_compat;
 };
 
 /**
@@ -92,6 +93,7 @@ struct xt_tgchk_param {
 	void *targinfo;
 	unsigned int hook_mask;
 	u_int8_t family;
+	bool nft_compat;
 };
 
 /* Target destructor parameters */
-- 
cgit v1.2.3


From 3f7f642b9bc46453e1435e8b67f1c4f7949be7ff Mon Sep 17 00:00:00 2001
From: Martin Fuzzey <mfuzzey@parkeon.com>
Date: Wed, 13 May 2015 12:26:42 +0200
Subject: iio: core: add high pass filter attributes

Add a high pass filter attribute for measurements
(like the existing low pass)

Also add both high and low pass attributes for events.

Signed-off-by: Martin Fuzzey <mfuzzey@parkeon.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/iio.h   | 1 +
 include/linux/iio/types.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 058441da4984..f79148261d16 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -32,6 +32,7 @@ enum iio_chan_info_enum {
 	IIO_CHAN_INFO_QUADRATURE_CORRECTION_RAW,
 	IIO_CHAN_INFO_AVERAGE_RAW,
 	IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY,
+	IIO_CHAN_INFO_HIGH_PASS_FILTER_3DB_FREQUENCY,
 	IIO_CHAN_INFO_SAMP_FREQ,
 	IIO_CHAN_INFO_FREQUENCY,
 	IIO_CHAN_INFO_PHASE,
diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h
index 942b6de68e2f..32b579525004 100644
--- a/include/linux/iio/types.h
+++ b/include/linux/iio/types.h
@@ -17,6 +17,8 @@ enum iio_event_info {
 	IIO_EV_INFO_VALUE,
 	IIO_EV_INFO_HYSTERESIS,
 	IIO_EV_INFO_PERIOD,
+	IIO_EV_INFO_HIGH_PASS_FILTER_3DB,
+	IIO_EV_INFO_LOW_PASS_FILTER_3DB,
 };
 
 #define IIO_VAL_INT 1
-- 
cgit v1.2.3


From c91d46065209bfe6124396fc409765ced0601b59 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 15 May 2015 05:48:07 -0700
Subject: net: fix two sparse errors

First one in __skb_checksum_validate_complete() fixes the following
(and other callers)

make C=2 CF=-D__CHECK_ENDIAN__ net/ipv4/tcp_ipv4.o
  CHECK   net/ipv4/tcp_ipv4.c
include/linux/skbuff.h:3052:24: warning: incorrect type in return expression (different base types)
include/linux/skbuff.h:3052:24:    expected restricted __sum16
include/linux/skbuff.h:3052:24:    got int

Second is fixing gso_make_checksum() :

  CHECK   net/ipv4/gre_offload.c
include/linux/skbuff.h:3360:14: warning: incorrect type in assignment (different base types)
include/linux/skbuff.h:3360:14:    expected unsigned short [unsigned] [usertype] csum
include/linux/skbuff.h:3360:14:    got restricted __sum16
include/linux/skbuff.h:3365:16: warning: incorrect type in return expression (different base types)
include/linux/skbuff.h:3365:16:    expected restricted __sum16
include/linux/skbuff.h:3365:16:    got unsigned short [unsigned] [usertype] csum

Fixes: 5a21232983aa7 ("net: Support for csum_bad in skbuff")
Fixes: 7e2b10c1e52ca ("net: Support for multiple checksums with gso")
Signed-off-by: Eric Dumazet <edumazet@google.com>
CC: Tom Herbert <tom@herbertland.com>
Acked-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f83aa6568cbf..b57eebfb67e0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3051,7 +3051,7 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb,
 		}
 	} else if (skb->csum_bad) {
 		/* ip_summed == CHECKSUM_NONE in this case */
-		return 1;
+		return (__force __sum16)1;
 	}
 
 	skb->csum = psum;
@@ -3353,15 +3353,14 @@ static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra)
 static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res)
 {
 	int plen = SKB_GSO_CB(skb)->csum_start - skb_headroom(skb) -
-	    skb_transport_offset(skb);
-	__u16 csum;
+		   skb_transport_offset(skb);
+	__wsum partial;
 
-	csum = csum_fold(csum_partial(skb_transport_header(skb),
-				      plen, skb->csum));
+	partial = csum_partial(skb_transport_header(skb), plen, skb->csum);
 	skb->csum = res;
 	SKB_GSO_CB(skb)->csum_start -= plen;
 
-	return csum;
+	return csum_fold(partial);
 }
 
 static inline bool skb_is_gso(const struct sk_buff *skb)
-- 
cgit v1.2.3


From e1031dc1f7ba5c8724ba211062134076df292791 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Thu, 7 May 2015 17:38:07 +0200
Subject: dmaengine: Support different source and destination stride

In interleaved mode, we can expect to have different source and destination
strides.

Add support for such case to dmaengine.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index ad419757241f..5d63acb09813 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -122,10 +122,18 @@ enum dma_transfer_direction {
  *	 chunk and before first src/dst address for next chunk.
  *	 Ignored for dst(assumed 0), if dst_inc is true and dst_sgl is false.
  *	 Ignored for src(assumed 0), if src_inc is true and src_sgl is false.
+ * @dst_icg: Number of bytes to jump after last dst address of this
+ *	 chunk and before the first dst address for next chunk.
+ *	 Ignored if dst_inc is true and dst_sgl is false.
+ * @src_icg: Number of bytes to jump after last src address of this
+ *	 chunk and before the first src address for next chunk.
+ *	 Ignored if src_inc is true and src_sgl is false.
  */
 struct data_chunk {
 	size_t size;
 	size_t icg;
+	size_t dst_icg;
+	size_t src_icg;
 };
 
 /**
-- 
cgit v1.2.3


From 4cfafd3082afc707653aeb82e9f8e7b596fbbfd6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 14 May 2015 12:23:11 +0200
Subject: sched,perf: Fix periodic timers

In the below two commits (see Fixes) we have periodic timers that can
stop themselves when they're no longer required, but need to be
(re)-started when their idle condition changes.

Further complications is that we want the timer handler to always do
the forward such that it will always correctly deal with the overruns,
and we do not want to race such that the handler has already decided
to stop, but the (external) restart sees the timer still active and we
end up with a 'lost' timer.

The problem with the current code is that the re-start can come before
the callback does the forward, at which point the forward from the
callback will WARN about forwarding an enqueued timer.

Now, conceptually its easy to detect if you're before or after the fwd
by comparing the expiration time against the current time. Of course,
that's expensive (and racy) because we don't have the current time.

Alternatively one could cache this state inside the timer, but then
everybody pays the overhead of maintaining this extra state, and that
is undesired.

The only other option that I could see is the external timer_active
variable, which I tried to kill before. I would love a nicer interface
for this seemingly simple 'problem' but alas.

Fixes: 272325c4821f ("perf: Fix mux_interval hrtimer wreckage")
Fixes: 77a4d1a1b9a1 ("sched: Cleanup bandwidth timers")
Cc: pjt@google.com
Cc: tglx@linutronix.de
Cc: klamm@yandex-team.ru
Cc: mingo@kernel.org
Cc: bsegall@google.com
Cc: hpa@zytor.com
Cc: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150514102311.GX21418@twins.programming.kicks-ass.net
---
 include/linux/perf_event.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 61992cf2e977..cf3342a8ad80 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -566,8 +566,12 @@ struct perf_cpu_context {
 	struct perf_event_context	*task_ctx;
 	int				active_oncpu;
 	int				exclusive;
+
+	raw_spinlock_t			hrtimer_lock;
 	struct hrtimer			hrtimer;
 	ktime_t				hrtimer_interval;
+	unsigned int			hrtimer_active;
+
 	struct pmu			*unique_pmu;
 	struct perf_cgroup		*cgrp;
 };
-- 
cgit v1.2.3


From 5f22f5c668204f3af7557018b2ad6cf2074defac Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Sat, 16 May 2015 11:44:13 +0200
Subject: irqdomain: Add non-hierarchy helper irq_domain_set_info

This adds the helper irq_domain_set_info() in a non-domain hierarchy
variant. This allows to use the helper for generic chip since not
all chips using generic chip support domain hierarchy.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Cc: marc.zyngier@arm.com
Cc: linux@arm.linux.org.uk
Cc: u.kleine-koenig@pengutronix.de
Cc: olof@lixom.net
Cc: arnd@arndb.de
Cc: daniel.lezcano@linaro.org
Cc: mark.rutland@arm.com
Cc: pawel.moll@arm.com
Cc: robh+dt@kernel.org
Cc: ijc+devicetree@hellion.org.uk
Cc: galak@codeaurora.org
Cc: mcoquelin.stm32@gmail.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: shawn.guo@linaro.org
Cc: kernel@pengutronix.de
Cc: jason@lakedaemon.net
Link: http://lkml.kernel.org/r/1431769465-26867-2-git-send-email-stefan@agner.ch
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 676d7306a360..744ac0ec98eb 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -258,6 +258,10 @@ int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr,
 /* V2 interfaces to support hierarchy IRQ domains. */
 extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
 						unsigned int virq);
+extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
+				irq_hw_number_t hwirq, struct irq_chip *chip,
+				void *chip_data, irq_flow_handler_t handler,
+				void *handler_data, const char *handler_name);
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
 extern struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent,
 			unsigned int flags, unsigned int size,
@@ -281,10 +285,6 @@ extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain,
 					 irq_hw_number_t hwirq,
 					 struct irq_chip *chip,
 					 void *chip_data);
-extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
-				irq_hw_number_t hwirq, struct irq_chip *chip,
-				void *chip_data, irq_flow_handler_t handler,
-				void *handler_data, const char *handler_name);
 extern void irq_domain_reset_irq_data(struct irq_data *irq_data);
 extern void irq_domain_free_irqs_common(struct irq_domain *domain,
 					unsigned int virq,
-- 
cgit v1.2.3


From 3cfeffc265791bc953527458e0a44ea77c459340 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Sat, 16 May 2015 11:44:14 +0200
Subject: genirq: Add irq_chip_(enable/disable)_parent

Add helper irq_chip_enable_parent and irq_chip_disable_parent. The
helper implement the default behavior in case irq_enable or irq_disable
is not implemented for the parent interrupt chip, which is calling the
irq_mask or irq_unmask respectively.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Cc: marc.zyngier@arm.com
Cc: linux@arm.linux.org.uk
Cc: u.kleine-koenig@pengutronix.de
Cc: olof@lixom.net
Cc: arnd@arndb.de
Cc: daniel.lezcano@linaro.org
Cc: mark.rutland@arm.com
Cc: pawel.moll@arm.com
Cc: robh+dt@kernel.org
Cc: ijc+devicetree@hellion.org.uk
Cc: galak@codeaurora.org
Cc: mcoquelin.stm32@gmail.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: shawn.guo@linaro.org
Cc: kernel@pengutronix.de
Cc: jason@lakedaemon.net
Link: http://lkml.kernel.org/r/1431769465-26867-3-git-send-email-stefan@agner.ch
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 62c6901cab55..2633061364b1 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -458,6 +458,8 @@ extern void handle_nested_irq(unsigned int irq);
 
 extern int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg);
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+extern void irq_chip_enable_parent(struct irq_data *data);
+extern void irq_chip_disable_parent(struct irq_data *data);
 extern void irq_chip_ack_parent(struct irq_data *data);
 extern int irq_chip_retrigger_hierarchy(struct irq_data *data);
 extern void irq_chip_mask_parent(struct irq_data *data);
-- 
cgit v1.2.3


From b4a04ab7a37b490cad48e69abfe14288cacb669c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 May 2015 15:38:40 -0400
Subject: cgroup: separate out include/linux/cgroup-defs.h

From 2d728f74bfc071df06773e2fd7577dd5dab6425d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 May 2015 15:37:01 -0400

This patch separates out cgroup-defs.h from cgroup.h which has grown a
lot of dependencies.  cgroup-defs.h currently only contains constant
and type definitions and can be used to break circular include
dependency.  While moving, definitions are reordered so that
cgroup-defs.h has consistent logical structure.

This patch is pure reorganization.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h | 464 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/cgroup.h      | 455 +------------------------------------------
 2 files changed, 466 insertions(+), 453 deletions(-)
 create mode 100644 include/linux/cgroup-defs.h

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
new file mode 100644
index 000000000000..55f3120fb952
--- /dev/null
+++ b/include/linux/cgroup-defs.h
@@ -0,0 +1,464 @@
+/*
+ * linux/cgroup-defs.h - basic definitions for cgroup
+ *
+ * This file provides basic type and interface.  Include this file directly
+ * only if necessary to avoid cyclic dependencies.
+ */
+#ifndef _LINUX_CGROUP_DEFS_H
+#define _LINUX_CGROUP_DEFS_H
+
+#include <linux/limits.h>
+#include <linux/list.h>
+#include <linux/idr.h>
+#include <linux/wait.h>
+#include <linux/mutex.h>
+#include <linux/rcupdate.h>
+#include <linux/percpu-refcount.h>
+#include <linux/workqueue.h>
+
+#ifdef CONFIG_CGROUPS
+
+struct cgroup;
+struct cgroup_root;
+struct cgroup_subsys;
+struct cgroup_taskset;
+struct kernfs_node;
+struct kernfs_ops;
+struct kernfs_open_file;
+
+#define MAX_CGROUP_TYPE_NAMELEN 32
+#define MAX_CGROUP_ROOT_NAMELEN 64
+#define MAX_CFTYPE_NAME		64
+
+/* define the enumeration of all cgroup subsystems */
+#define SUBSYS(_x) _x ## _cgrp_id,
+enum cgroup_subsys_id {
+#include <linux/cgroup_subsys.h>
+	CGROUP_SUBSYS_COUNT,
+};
+#undef SUBSYS
+
+/* bits in struct cgroup_subsys_state flags field */
+enum {
+	CSS_NO_REF	= (1 << 0), /* no reference counting for this css */
+	CSS_ONLINE	= (1 << 1), /* between ->css_online() and ->css_offline() */
+	CSS_RELEASED	= (1 << 2), /* refcnt reached zero, released */
+};
+
+/* bits in struct cgroup flags field */
+enum {
+	/* Control Group requires release notifications to userspace */
+	CGRP_NOTIFY_ON_RELEASE,
+	/*
+	 * Clone the parent's configuration when creating a new child
+	 * cpuset cgroup.  For historical reasons, this option can be
+	 * specified at mount time and thus is implemented here.
+	 */
+	CGRP_CPUSET_CLONE_CHILDREN,
+};
+
+/* cgroup_root->flags */
+enum {
+	CGRP_ROOT_SANE_BEHAVIOR	= (1 << 0), /* __DEVEL__sane_behavior specified */
+	CGRP_ROOT_NOPREFIX	= (1 << 1), /* mounted subsystems have no named prefix */
+	CGRP_ROOT_XATTR		= (1 << 2), /* supports extended attributes */
+};
+
+/* cftype->flags */
+enum {
+	CFTYPE_ONLY_ON_ROOT	= (1 << 0),	/* only create on root cgrp */
+	CFTYPE_NOT_ON_ROOT	= (1 << 1),	/* don't create on root cgrp */
+	CFTYPE_NO_PREFIX	= (1 << 3),	/* (DON'T USE FOR NEW FILES) no subsys prefix */
+
+	/* internal flags, do not use outside cgroup core proper */
+	__CFTYPE_ONLY_ON_DFL	= (1 << 16),	/* only on default hierarchy */
+	__CFTYPE_NOT_ON_DFL	= (1 << 17),	/* not on default hierarchy */
+};
+
+/*
+ * Per-subsystem/per-cgroup state maintained by the system.  This is the
+ * fundamental structural building block that controllers deal with.
+ *
+ * Fields marked with "PI:" are public and immutable and may be accessed
+ * directly without synchronization.
+ */
+struct cgroup_subsys_state {
+	/* PI: the cgroup that this css is attached to */
+	struct cgroup *cgroup;
+
+	/* PI: the cgroup subsystem that this css is attached to */
+	struct cgroup_subsys *ss;
+
+	/* reference count - access via css_[try]get() and css_put() */
+	struct percpu_ref refcnt;
+
+	/* PI: the parent css */
+	struct cgroup_subsys_state *parent;
+
+	/* siblings list anchored at the parent's ->children */
+	struct list_head sibling;
+	struct list_head children;
+
+	/*
+	 * PI: Subsys-unique ID.  0 is unused and root is always 1.  The
+	 * matching css can be looked up using css_from_id().
+	 */
+	int id;
+
+	unsigned int flags;
+
+	/*
+	 * Monotonically increasing unique serial number which defines a
+	 * uniform order among all csses.  It's guaranteed that all
+	 * ->children lists are in the ascending order of ->serial_nr and
+	 * used to allow interrupting and resuming iterations.
+	 */
+	u64 serial_nr;
+
+	/* percpu_ref killing and RCU release */
+	struct rcu_head rcu_head;
+	struct work_struct destroy_work;
+};
+
+/*
+ * A css_set is a structure holding pointers to a set of
+ * cgroup_subsys_state objects. This saves space in the task struct
+ * object and speeds up fork()/exit(), since a single inc/dec and a
+ * list_add()/del() can bump the reference count on the entire cgroup
+ * set for a task.
+ */
+struct css_set {
+	/* Reference count */
+	atomic_t refcount;
+
+	/*
+	 * List running through all cgroup groups in the same hash
+	 * slot. Protected by css_set_lock
+	 */
+	struct hlist_node hlist;
+
+	/*
+	 * Lists running through all tasks using this cgroup group.
+	 * mg_tasks lists tasks which belong to this cset but are in the
+	 * process of being migrated out or in.  Protected by
+	 * css_set_rwsem, but, during migration, once tasks are moved to
+	 * mg_tasks, it can be read safely while holding cgroup_mutex.
+	 */
+	struct list_head tasks;
+	struct list_head mg_tasks;
+
+	/*
+	 * List of cgrp_cset_links pointing at cgroups referenced from this
+	 * css_set.  Protected by css_set_lock.
+	 */
+	struct list_head cgrp_links;
+
+	/* the default cgroup associated with this css_set */
+	struct cgroup *dfl_cgrp;
+
+	/*
+	 * Set of subsystem states, one for each subsystem. This array is
+	 * immutable after creation apart from the init_css_set during
+	 * subsystem registration (at boot time).
+	 */
+	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+
+	/*
+	 * List of csets participating in the on-going migration either as
+	 * source or destination.  Protected by cgroup_mutex.
+	 */
+	struct list_head mg_preload_node;
+	struct list_head mg_node;
+
+	/*
+	 * If this cset is acting as the source of migration the following
+	 * two fields are set.  mg_src_cgrp is the source cgroup of the
+	 * on-going migration and mg_dst_cset is the destination cset the
+	 * target tasks on this cset should be migrated to.  Protected by
+	 * cgroup_mutex.
+	 */
+	struct cgroup *mg_src_cgrp;
+	struct css_set *mg_dst_cset;
+
+	/*
+	 * On the default hierarhcy, ->subsys[ssid] may point to a css
+	 * attached to an ancestor instead of the cgroup this css_set is
+	 * associated with.  The following node is anchored at
+	 * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to
+	 * iterate through all css's attached to a given cgroup.
+	 */
+	struct list_head e_cset_node[CGROUP_SUBSYS_COUNT];
+
+	/* For RCU-protected deletion */
+	struct rcu_head rcu_head;
+};
+
+struct cgroup {
+	/* self css with NULL ->ss, points back to this cgroup */
+	struct cgroup_subsys_state self;
+
+	unsigned long flags;		/* "unsigned long" so bitops work */
+
+	/*
+	 * idr allocated in-hierarchy ID.
+	 *
+	 * ID 0 is not used, the ID of the root cgroup is always 1, and a
+	 * new cgroup will be assigned with a smallest available ID.
+	 *
+	 * Allocating/Removing ID must be protected by cgroup_mutex.
+	 */
+	int id;
+
+	/*
+	 * If this cgroup contains any tasks, it contributes one to
+	 * populated_cnt.  All children with non-zero popuplated_cnt of
+	 * their own contribute one.  The count is zero iff there's no task
+	 * in this cgroup or its subtree.
+	 */
+	int populated_cnt;
+
+	struct kernfs_node *kn;		/* cgroup kernfs entry */
+	struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */
+
+	/*
+	 * The bitmask of subsystems enabled on the child cgroups.
+	 * ->subtree_control is the one configured through
+	 * "cgroup.subtree_control" while ->child_subsys_mask is the
+	 * effective one which may have more subsystems enabled.
+	 * Controller knobs are made available iff it's enabled in
+	 * ->subtree_control.
+	 */
+	unsigned int subtree_control;
+	unsigned int child_subsys_mask;
+
+	/* Private pointers for each registered subsystem */
+	struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
+
+	struct cgroup_root *root;
+
+	/*
+	 * List of cgrp_cset_links pointing at css_sets with tasks in this
+	 * cgroup.  Protected by css_set_lock.
+	 */
+	struct list_head cset_links;
+
+	/*
+	 * On the default hierarchy, a css_set for a cgroup with some
+	 * susbsys disabled will point to css's which are associated with
+	 * the closest ancestor which has the subsys enabled.  The
+	 * following lists all css_sets which point to this cgroup's css
+	 * for the given subsystem.
+	 */
+	struct list_head e_csets[CGROUP_SUBSYS_COUNT];
+
+	/*
+	 * list of pidlists, up to two for each namespace (one for procs, one
+	 * for tasks); created on demand.
+	 */
+	struct list_head pidlists;
+	struct mutex pidlist_mutex;
+
+	/* used to wait for offlining of csses */
+	wait_queue_head_t offline_waitq;
+
+	/* used to schedule release agent */
+	struct work_struct release_agent_work;
+};
+
+/*
+ * A cgroup_root represents the root of a cgroup hierarchy, and may be
+ * associated with a kernfs_root to form an active hierarchy.  This is
+ * internal to cgroup core.  Don't access directly from controllers.
+ */
+struct cgroup_root {
+	struct kernfs_root *kf_root;
+
+	/* The bitmask of subsystems attached to this hierarchy */
+	unsigned int subsys_mask;
+
+	/* Unique id for this hierarchy. */
+	int hierarchy_id;
+
+	/* The root cgroup.  Root is destroyed on its release. */
+	struct cgroup cgrp;
+
+	/* Number of cgroups in the hierarchy, used only for /proc/cgroups */
+	atomic_t nr_cgrps;
+
+	/* A list running through the active hierarchies */
+	struct list_head root_list;
+
+	/* Hierarchy-specific flags */
+	unsigned int flags;
+
+	/* IDs for cgroups in this hierarchy */
+	struct idr cgroup_idr;
+
+	/* The path to use for release notifications. */
+	char release_agent_path[PATH_MAX];
+
+	/* The name for this hierarchy - may be empty */
+	char name[MAX_CGROUP_ROOT_NAMELEN];
+};
+
+/*
+ * struct cftype: handler definitions for cgroup control files
+ *
+ * When reading/writing to a file:
+ *	- the cgroup to use is file->f_path.dentry->d_parent->d_fsdata
+ *	- the 'cftype' of the file is file->f_path.dentry->d_fsdata
+ */
+struct cftype {
+	/*
+	 * By convention, the name should begin with the name of the
+	 * subsystem, followed by a period.  Zero length string indicates
+	 * end of cftype array.
+	 */
+	char name[MAX_CFTYPE_NAME];
+	int private;
+	/*
+	 * If not 0, file mode is set to this value, otherwise it will
+	 * be figured out automatically
+	 */
+	umode_t mode;
+
+	/*
+	 * The maximum length of string, excluding trailing nul, that can
+	 * be passed to write.  If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed.
+	 */
+	size_t max_write_len;
+
+	/* CFTYPE_* flags */
+	unsigned int flags;
+
+	/*
+	 * Fields used for internal bookkeeping.  Initialized automatically
+	 * during registration.
+	 */
+	struct cgroup_subsys *ss;	/* NULL for cgroup core files */
+	struct list_head node;		/* anchored at ss->cfts */
+	struct kernfs_ops *kf_ops;
+
+	/*
+	 * read_u64() is a shortcut for the common case of returning a
+	 * single integer. Use it in place of read()
+	 */
+	u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft);
+	/*
+	 * read_s64() is a signed version of read_u64()
+	 */
+	s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft);
+
+	/* generic seq_file read interface */
+	int (*seq_show)(struct seq_file *sf, void *v);
+
+	/* optional ops, implement all or none */
+	void *(*seq_start)(struct seq_file *sf, loff_t *ppos);
+	void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos);
+	void (*seq_stop)(struct seq_file *sf, void *v);
+
+	/*
+	 * write_u64() is a shortcut for the common case of accepting
+	 * a single integer (as parsed by simple_strtoull) from
+	 * userspace. Use in place of write(); return 0 or error.
+	 */
+	int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft,
+			 u64 val);
+	/*
+	 * write_s64() is a signed version of write_u64()
+	 */
+	int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft,
+			 s64 val);
+
+	/*
+	 * write() is the generic write callback which maps directly to
+	 * kernfs write operation and overrides all other operations.
+	 * Maximum write size is determined by ->max_write_len.  Use
+	 * of_css/cft() to access the associated css and cft.
+	 */
+	ssize_t (*write)(struct kernfs_open_file *of,
+			 char *buf, size_t nbytes, loff_t off);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lock_class_key	lockdep_key;
+#endif
+};
+
+/*
+ * Control Group subsystem type.
+ * See Documentation/cgroups/cgroups.txt for details
+ */
+struct cgroup_subsys {
+	struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css);
+	int (*css_online)(struct cgroup_subsys_state *css);
+	void (*css_offline)(struct cgroup_subsys_state *css);
+	void (*css_released)(struct cgroup_subsys_state *css);
+	void (*css_free)(struct cgroup_subsys_state *css);
+	void (*css_reset)(struct cgroup_subsys_state *css);
+	void (*css_e_css_changed)(struct cgroup_subsys_state *css);
+
+	int (*can_attach)(struct cgroup_subsys_state *css,
+			  struct cgroup_taskset *tset);
+	void (*cancel_attach)(struct cgroup_subsys_state *css,
+			      struct cgroup_taskset *tset);
+	void (*attach)(struct cgroup_subsys_state *css,
+		       struct cgroup_taskset *tset);
+	void (*fork)(struct task_struct *task);
+	void (*exit)(struct cgroup_subsys_state *css,
+		     struct cgroup_subsys_state *old_css,
+		     struct task_struct *task);
+	void (*bind)(struct cgroup_subsys_state *root_css);
+
+	int disabled;
+	int early_init;
+
+	/*
+	 * If %false, this subsystem is properly hierarchical -
+	 * configuration, resource accounting and restriction on a parent
+	 * cgroup cover those of its children.  If %true, hierarchy support
+	 * is broken in some ways - some subsystems ignore hierarchy
+	 * completely while others are only implemented half-way.
+	 *
+	 * It's now disallowed to create nested cgroups if the subsystem is
+	 * broken and cgroup core will emit a warning message on such
+	 * cases.  Eventually, all subsystems will be made properly
+	 * hierarchical and this will go away.
+	 */
+	bool broken_hierarchy;
+	bool warned_broken_hierarchy;
+
+	/* the following two fields are initialized automtically during boot */
+	int id;
+	const char *name;
+
+	/* link to parent, protected by cgroup_lock() */
+	struct cgroup_root *root;
+
+	/* idr for css->id */
+	struct idr css_idr;
+
+	/*
+	 * List of cftypes.  Each entry is the first entry of an array
+	 * terminated by zero length name.
+	 */
+	struct list_head cfts;
+
+	/*
+	 * Base cftypes which are automatically registered.  The two can
+	 * point to the same array.
+	 */
+	struct cftype *dfl_cftypes;	/* for the default hierarchy */
+	struct cftype *legacy_cftypes;	/* for the legacy hierarchies */
+
+	/*
+	 * A subsystem may depend on other subsystems.  When such subsystem
+	 * is enabled on a cgroup, the depended-upon subsystems are enabled
+	 * together if available.  Subsystems enabled due to dependency are
+	 * not visible to userland until explicitly enabled.  The following
+	 * specifies the mask of subsystems that this one depends on.
+	 */
+	unsigned int depends_on;
+};
+
+#endif	/* CONFIG_CGROUPS */
+#endif	/* _LINUX_CGROUP_DEFS_H */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b9cb94c3102a..96a2ecd5aa69 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -11,23 +11,16 @@
 #include <linux/sched.h>
 #include <linux/cpumask.h>
 #include <linux/nodemask.h>
-#include <linux/rcupdate.h>
 #include <linux/rculist.h>
 #include <linux/cgroupstats.h>
 #include <linux/rwsem.h>
-#include <linux/idr.h>
-#include <linux/workqueue.h>
 #include <linux/fs.h>
-#include <linux/percpu-refcount.h>
 #include <linux/seq_file.h>
 #include <linux/kernfs.h>
-#include <linux/wait.h>
 
-#ifdef CONFIG_CGROUPS
+#include <linux/cgroup-defs.h>
 
-struct cgroup_root;
-struct cgroup_subsys;
-struct cgroup;
+#ifdef CONFIG_CGROUPS
 
 extern int cgroup_init_early(void);
 extern int cgroup_init(void);
@@ -40,66 +33,6 @@ extern int cgroupstats_build(struct cgroupstats *stats,
 extern int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 			    struct pid *pid, struct task_struct *tsk);
 
-/* define the enumeration of all cgroup subsystems */
-#define SUBSYS(_x) _x ## _cgrp_id,
-enum cgroup_subsys_id {
-#include <linux/cgroup_subsys.h>
-	CGROUP_SUBSYS_COUNT,
-};
-#undef SUBSYS
-
-/*
- * Per-subsystem/per-cgroup state maintained by the system.  This is the
- * fundamental structural building block that controllers deal with.
- *
- * Fields marked with "PI:" are public and immutable and may be accessed
- * directly without synchronization.
- */
-struct cgroup_subsys_state {
-	/* PI: the cgroup that this css is attached to */
-	struct cgroup *cgroup;
-
-	/* PI: the cgroup subsystem that this css is attached to */
-	struct cgroup_subsys *ss;
-
-	/* reference count - access via css_[try]get() and css_put() */
-	struct percpu_ref refcnt;
-
-	/* PI: the parent css */
-	struct cgroup_subsys_state *parent;
-
-	/* siblings list anchored at the parent's ->children */
-	struct list_head sibling;
-	struct list_head children;
-
-	/*
-	 * PI: Subsys-unique ID.  0 is unused and root is always 1.  The
-	 * matching css can be looked up using css_from_id().
-	 */
-	int id;
-
-	unsigned int flags;
-
-	/*
-	 * Monotonically increasing unique serial number which defines a
-	 * uniform order among all csses.  It's guaranteed that all
-	 * ->children lists are in the ascending order of ->serial_nr and
-	 * used to allow interrupting and resuming iterations.
-	 */
-	u64 serial_nr;
-
-	/* percpu_ref killing and RCU release */
-	struct rcu_head rcu_head;
-	struct work_struct destroy_work;
-};
-
-/* bits in struct cgroup_subsys_state flags field */
-enum {
-	CSS_NO_REF	= (1 << 0), /* no reference counting for this css */
-	CSS_ONLINE	= (1 << 1), /* between ->css_online() and ->css_offline() */
-	CSS_RELEASED	= (1 << 2), /* refcnt reached zero, released */
-};
-
 /**
  * css_get - obtain a reference on the specified css
  * @css: target css
@@ -185,307 +118,6 @@ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
 		percpu_ref_put_many(&css->refcnt, n);
 }
 
-/* bits in struct cgroup flags field */
-enum {
-	/* Control Group requires release notifications to userspace */
-	CGRP_NOTIFY_ON_RELEASE,
-	/*
-	 * Clone the parent's configuration when creating a new child
-	 * cpuset cgroup.  For historical reasons, this option can be
-	 * specified at mount time and thus is implemented here.
-	 */
-	CGRP_CPUSET_CLONE_CHILDREN,
-};
-
-struct cgroup {
-	/* self css with NULL ->ss, points back to this cgroup */
-	struct cgroup_subsys_state self;
-
-	unsigned long flags;		/* "unsigned long" so bitops work */
-
-	/*
-	 * idr allocated in-hierarchy ID.
-	 *
-	 * ID 0 is not used, the ID of the root cgroup is always 1, and a
-	 * new cgroup will be assigned with a smallest available ID.
-	 *
-	 * Allocating/Removing ID must be protected by cgroup_mutex.
-	 */
-	int id;
-
-	/*
-	 * If this cgroup contains any tasks, it contributes one to
-	 * populated_cnt.  All children with non-zero popuplated_cnt of
-	 * their own contribute one.  The count is zero iff there's no task
-	 * in this cgroup or its subtree.
-	 */
-	int populated_cnt;
-
-	struct kernfs_node *kn;		/* cgroup kernfs entry */
-	struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */
-
-	/*
-	 * The bitmask of subsystems enabled on the child cgroups.
-	 * ->subtree_control is the one configured through
-	 * "cgroup.subtree_control" while ->child_subsys_mask is the
-	 * effective one which may have more subsystems enabled.
-	 * Controller knobs are made available iff it's enabled in
-	 * ->subtree_control.
-	 */
-	unsigned int subtree_control;
-	unsigned int child_subsys_mask;
-
-	/* Private pointers for each registered subsystem */
-	struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
-
-	struct cgroup_root *root;
-
-	/*
-	 * List of cgrp_cset_links pointing at css_sets with tasks in this
-	 * cgroup.  Protected by css_set_lock.
-	 */
-	struct list_head cset_links;
-
-	/*
-	 * On the default hierarchy, a css_set for a cgroup with some
-	 * susbsys disabled will point to css's which are associated with
-	 * the closest ancestor which has the subsys enabled.  The
-	 * following lists all css_sets which point to this cgroup's css
-	 * for the given subsystem.
-	 */
-	struct list_head e_csets[CGROUP_SUBSYS_COUNT];
-
-	/*
-	 * list of pidlists, up to two for each namespace (one for procs, one
-	 * for tasks); created on demand.
-	 */
-	struct list_head pidlists;
-	struct mutex pidlist_mutex;
-
-	/* used to wait for offlining of csses */
-	wait_queue_head_t offline_waitq;
-
-	/* used to schedule release agent */
-	struct work_struct release_agent_work;
-};
-
-#define MAX_CGROUP_ROOT_NAMELEN 64
-
-/* cgroup_root->flags */
-enum {
-	CGRP_ROOT_SANE_BEHAVIOR	= (1 << 0), /* __DEVEL__sane_behavior specified */
-	CGRP_ROOT_NOPREFIX	= (1 << 1), /* mounted subsystems have no named prefix */
-	CGRP_ROOT_XATTR		= (1 << 2), /* supports extended attributes */
-};
-
-/*
- * A cgroup_root represents the root of a cgroup hierarchy, and may be
- * associated with a kernfs_root to form an active hierarchy.  This is
- * internal to cgroup core.  Don't access directly from controllers.
- */
-struct cgroup_root {
-	struct kernfs_root *kf_root;
-
-	/* The bitmask of subsystems attached to this hierarchy */
-	unsigned int subsys_mask;
-
-	/* Unique id for this hierarchy. */
-	int hierarchy_id;
-
-	/* The root cgroup.  Root is destroyed on its release. */
-	struct cgroup cgrp;
-
-	/* Number of cgroups in the hierarchy, used only for /proc/cgroups */
-	atomic_t nr_cgrps;
-
-	/* A list running through the active hierarchies */
-	struct list_head root_list;
-
-	/* Hierarchy-specific flags */
-	unsigned int flags;
-
-	/* IDs for cgroups in this hierarchy */
-	struct idr cgroup_idr;
-
-	/* The path to use for release notifications. */
-	char release_agent_path[PATH_MAX];
-
-	/* The name for this hierarchy - may be empty */
-	char name[MAX_CGROUP_ROOT_NAMELEN];
-};
-
-/*
- * A css_set is a structure holding pointers to a set of
- * cgroup_subsys_state objects. This saves space in the task struct
- * object and speeds up fork()/exit(), since a single inc/dec and a
- * list_add()/del() can bump the reference count on the entire cgroup
- * set for a task.
- */
-
-struct css_set {
-
-	/* Reference count */
-	atomic_t refcount;
-
-	/*
-	 * List running through all cgroup groups in the same hash
-	 * slot. Protected by css_set_lock
-	 */
-	struct hlist_node hlist;
-
-	/*
-	 * Lists running through all tasks using this cgroup group.
-	 * mg_tasks lists tasks which belong to this cset but are in the
-	 * process of being migrated out or in.  Protected by
-	 * css_set_rwsem, but, during migration, once tasks are moved to
-	 * mg_tasks, it can be read safely while holding cgroup_mutex.
-	 */
-	struct list_head tasks;
-	struct list_head mg_tasks;
-
-	/*
-	 * List of cgrp_cset_links pointing at cgroups referenced from this
-	 * css_set.  Protected by css_set_lock.
-	 */
-	struct list_head cgrp_links;
-
-	/* the default cgroup associated with this css_set */
-	struct cgroup *dfl_cgrp;
-
-	/*
-	 * Set of subsystem states, one for each subsystem. This array is
-	 * immutable after creation apart from the init_css_set during
-	 * subsystem registration (at boot time).
-	 */
-	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
-
-	/*
-	 * List of csets participating in the on-going migration either as
-	 * source or destination.  Protected by cgroup_mutex.
-	 */
-	struct list_head mg_preload_node;
-	struct list_head mg_node;
-
-	/*
-	 * If this cset is acting as the source of migration the following
-	 * two fields are set.  mg_src_cgrp is the source cgroup of the
-	 * on-going migration and mg_dst_cset is the destination cset the
-	 * target tasks on this cset should be migrated to.  Protected by
-	 * cgroup_mutex.
-	 */
-	struct cgroup *mg_src_cgrp;
-	struct css_set *mg_dst_cset;
-
-	/*
-	 * On the default hierarhcy, ->subsys[ssid] may point to a css
-	 * attached to an ancestor instead of the cgroup this css_set is
-	 * associated with.  The following node is anchored at
-	 * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to
-	 * iterate through all css's attached to a given cgroup.
-	 */
-	struct list_head e_cset_node[CGROUP_SUBSYS_COUNT];
-
-	/* For RCU-protected deletion */
-	struct rcu_head rcu_head;
-};
-
-/*
- * struct cftype: handler definitions for cgroup control files
- *
- * When reading/writing to a file:
- *	- the cgroup to use is file->f_path.dentry->d_parent->d_fsdata
- *	- the 'cftype' of the file is file->f_path.dentry->d_fsdata
- */
-
-/* cftype->flags */
-enum {
-	CFTYPE_ONLY_ON_ROOT	= (1 << 0),	/* only create on root cgrp */
-	CFTYPE_NOT_ON_ROOT	= (1 << 1),	/* don't create on root cgrp */
-	CFTYPE_NO_PREFIX	= (1 << 3),	/* (DON'T USE FOR NEW FILES) no subsys prefix */
-
-	/* internal flags, do not use outside cgroup core proper */
-	__CFTYPE_ONLY_ON_DFL	= (1 << 16),	/* only on default hierarchy */
-	__CFTYPE_NOT_ON_DFL	= (1 << 17),	/* not on default hierarchy */
-};
-
-#define MAX_CFTYPE_NAME		64
-
-struct cftype {
-	/*
-	 * By convention, the name should begin with the name of the
-	 * subsystem, followed by a period.  Zero length string indicates
-	 * end of cftype array.
-	 */
-	char name[MAX_CFTYPE_NAME];
-	int private;
-	/*
-	 * If not 0, file mode is set to this value, otherwise it will
-	 * be figured out automatically
-	 */
-	umode_t mode;
-
-	/*
-	 * The maximum length of string, excluding trailing nul, that can
-	 * be passed to write.  If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed.
-	 */
-	size_t max_write_len;
-
-	/* CFTYPE_* flags */
-	unsigned int flags;
-
-	/*
-	 * Fields used for internal bookkeeping.  Initialized automatically
-	 * during registration.
-	 */
-	struct cgroup_subsys *ss;	/* NULL for cgroup core files */
-	struct list_head node;		/* anchored at ss->cfts */
-	struct kernfs_ops *kf_ops;
-
-	/*
-	 * read_u64() is a shortcut for the common case of returning a
-	 * single integer. Use it in place of read()
-	 */
-	u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft);
-	/*
-	 * read_s64() is a signed version of read_u64()
-	 */
-	s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft);
-
-	/* generic seq_file read interface */
-	int (*seq_show)(struct seq_file *sf, void *v);
-
-	/* optional ops, implement all or none */
-	void *(*seq_start)(struct seq_file *sf, loff_t *ppos);
-	void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos);
-	void (*seq_stop)(struct seq_file *sf, void *v);
-
-	/*
-	 * write_u64() is a shortcut for the common case of accepting
-	 * a single integer (as parsed by simple_strtoull) from
-	 * userspace. Use in place of write(); return 0 or error.
-	 */
-	int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft,
-			 u64 val);
-	/*
-	 * write_s64() is a signed version of write_u64()
-	 */
-	int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft,
-			 s64 val);
-
-	/*
-	 * write() is the generic write callback which maps directly to
-	 * kernfs write operation and overrides all other operations.
-	 * Maximum write size is determined by ->max_write_len.  Use
-	 * of_css/cft() to access the associated css and cft.
-	 */
-	ssize_t (*write)(struct kernfs_open_file *of,
-			 char *buf, size_t nbytes, loff_t off);
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lock_class_key	lockdep_key;
-#endif
-};
-
 extern struct cgroup_root cgrp_dfl_root;
 extern struct css_set init_css_set;
 
@@ -612,11 +244,6 @@ int cgroup_rm_cftypes(struct cftype *cfts);
 
 bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
 
-/*
- * Control Group taskset, used to pass around set of tasks to cgroup_subsys
- * methods.
- */
-struct cgroup_taskset;
 struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset);
 struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
 
@@ -629,84 +256,6 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
 	for ((task) = cgroup_taskset_first((tset)); (task);		\
 	     (task) = cgroup_taskset_next((tset)))
 
-/*
- * Control Group subsystem type.
- * See Documentation/cgroups/cgroups.txt for details
- */
-
-struct cgroup_subsys {
-	struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css);
-	int (*css_online)(struct cgroup_subsys_state *css);
-	void (*css_offline)(struct cgroup_subsys_state *css);
-	void (*css_released)(struct cgroup_subsys_state *css);
-	void (*css_free)(struct cgroup_subsys_state *css);
-	void (*css_reset)(struct cgroup_subsys_state *css);
-	void (*css_e_css_changed)(struct cgroup_subsys_state *css);
-
-	int (*can_attach)(struct cgroup_subsys_state *css,
-			  struct cgroup_taskset *tset);
-	void (*cancel_attach)(struct cgroup_subsys_state *css,
-			      struct cgroup_taskset *tset);
-	void (*attach)(struct cgroup_subsys_state *css,
-		       struct cgroup_taskset *tset);
-	void (*fork)(struct task_struct *task);
-	void (*exit)(struct cgroup_subsys_state *css,
-		     struct cgroup_subsys_state *old_css,
-		     struct task_struct *task);
-	void (*bind)(struct cgroup_subsys_state *root_css);
-
-	int disabled;
-	int early_init;
-
-	/*
-	 * If %false, this subsystem is properly hierarchical -
-	 * configuration, resource accounting and restriction on a parent
-	 * cgroup cover those of its children.  If %true, hierarchy support
-	 * is broken in some ways - some subsystems ignore hierarchy
-	 * completely while others are only implemented half-way.
-	 *
-	 * It's now disallowed to create nested cgroups if the subsystem is
-	 * broken and cgroup core will emit a warning message on such
-	 * cases.  Eventually, all subsystems will be made properly
-	 * hierarchical and this will go away.
-	 */
-	bool broken_hierarchy;
-	bool warned_broken_hierarchy;
-
-	/* the following two fields are initialized automtically during boot */
-	int id;
-#define MAX_CGROUP_TYPE_NAMELEN 32
-	const char *name;
-
-	/* link to parent, protected by cgroup_lock() */
-	struct cgroup_root *root;
-
-	/* idr for css->id */
-	struct idr css_idr;
-
-	/*
-	 * List of cftypes.  Each entry is the first entry of an array
-	 * terminated by zero length name.
-	 */
-	struct list_head cfts;
-
-	/*
-	 * Base cftypes which are automatically registered.  The two can
-	 * point to the same array.
-	 */
-	struct cftype *dfl_cftypes;	/* for the default hierarchy */
-	struct cftype *legacy_cftypes;	/* for the legacy hierarchies */
-
-	/*
-	 * A subsystem may depend on other subsystems.  When such subsystem
-	 * is enabled on a cgroup, the depended-upon subsystems are enabled
-	 * together if available.  Subsystems enabled due to dependency are
-	 * not visible to userland until explicitly enabled.  The following
-	 * specifies the mask of subsystems that this one depends on.
-	 */
-	unsigned int depends_on;
-};
-
 #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys;
 #include <linux/cgroup_subsys.h>
 #undef SUBSYS
-- 
cgit v1.2.3


From c326aa2bb2209e10df4a381801bb34ca0f923038 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 May 2015 16:24:16 -0400
Subject: cgroup: reorganize include/linux/cgroup.h

From c4d440938b5e2015c70594fe6666a099c844f929 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 May 2015 16:21:40 -0400

Over time, cgroup.h grew organically and doesn't have much logical
structure at this point.  Separation of cgroup-defs.h in the previous
patch gives us a good chance for reorganizing cgroup.h as changes to
the header are likely to cause conflicts anyway.

This patch reorganizes cgroup.h so that it has consistent logical
grouping.

This is pure reorganization.

v2: Relocating #ifdef CONFIG_CGROUPS caused build failure when cgroup
    is disabled.  Dropped.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h | 552 ++++++++++++++++++++++++-------------------------
 1 file changed, 271 insertions(+), 281 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 96a2ecd5aa69..82319fb31cfe 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -22,16 +22,189 @@
 
 #ifdef CONFIG_CGROUPS
 
-extern int cgroup_init_early(void);
-extern int cgroup_init(void);
-extern void cgroup_fork(struct task_struct *p);
-extern void cgroup_post_fork(struct task_struct *p);
-extern void cgroup_exit(struct task_struct *p);
-extern int cgroupstats_build(struct cgroupstats *stats,
-				struct dentry *dentry);
+/* a css_task_iter should be treated as an opaque object */
+struct css_task_iter {
+	struct cgroup_subsys		*ss;
+
+	struct list_head		*cset_pos;
+	struct list_head		*cset_head;
+
+	struct list_head		*task_pos;
+	struct list_head		*tasks_head;
+	struct list_head		*mg_tasks_head;
+};
+
+extern struct cgroup_root cgrp_dfl_root;
+extern struct css_set init_css_set;
+
+#define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys;
+#include <linux/cgroup_subsys.h>
+#undef SUBSYS
+
+bool css_has_online_children(struct cgroup_subsys_state *css);
+struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
+struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
+					     struct cgroup_subsys *ss);
+struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
+						       struct cgroup_subsys *ss);
+
+bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
+int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
+int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
+
+int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
+int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
+int cgroup_rm_cftypes(struct cftype *cfts);
+
+char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
+int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
+int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
+		     struct pid *pid, struct task_struct *tsk);
+
+void cgroup_fork(struct task_struct *p);
+void cgroup_post_fork(struct task_struct *p);
+void cgroup_exit(struct task_struct *p);
+
+int cgroup_init_early(void);
+int cgroup_init(void);
+
+/*
+ * Iteration helpers and macros.
+ */
+
+struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
+					   struct cgroup_subsys_state *parent);
+struct cgroup_subsys_state *css_next_descendant_pre(struct cgroup_subsys_state *pos,
+						    struct cgroup_subsys_state *css);
+struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state *pos);
+struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos,
+						     struct cgroup_subsys_state *css);
+
+struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset);
+struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
+
+void css_task_iter_start(struct cgroup_subsys_state *css,
+			 struct css_task_iter *it);
+struct task_struct *css_task_iter_next(struct css_task_iter *it);
+void css_task_iter_end(struct css_task_iter *it);
+
+/**
+ * css_for_each_child - iterate through children of a css
+ * @pos: the css * to use as the loop cursor
+ * @parent: css whose children to walk
+ *
+ * Walk @parent's children.  Must be called under rcu_read_lock().
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
+ *
+ * It is allowed to temporarily drop RCU read lock during iteration.  The
+ * caller is responsible for ensuring that @pos remains accessible until
+ * the start of the next iteration by, for example, bumping the css refcnt.
+ */
+#define css_for_each_child(pos, parent)					\
+	for ((pos) = css_next_child(NULL, (parent)); (pos);		\
+	     (pos) = css_next_child((pos), (parent)))
+
+/**
+ * css_for_each_descendant_pre - pre-order walk of a css's descendants
+ * @pos: the css * to use as the loop cursor
+ * @root: css whose descendants to walk
+ *
+ * Walk @root's descendants.  @root is included in the iteration and the
+ * first node to be visited.  Must be called under rcu_read_lock().
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
+ *
+ * For example, the following guarantees that a descendant can't escape
+ * state updates of its ancestors.
+ *
+ * my_online(@css)
+ * {
+ *	Lock @css's parent and @css;
+ *	Inherit state from the parent;
+ *	Unlock both.
+ * }
+ *
+ * my_update_state(@css)
+ * {
+ *	css_for_each_descendant_pre(@pos, @css) {
+ *		Lock @pos;
+ *		if (@pos == @css)
+ *			Update @css's state;
+ *		else
+ *			Verify @pos is alive and inherit state from its parent;
+ *		Unlock @pos;
+ *	}
+ * }
+ *
+ * As long as the inheriting step, including checking the parent state, is
+ * enclosed inside @pos locking, double-locking the parent isn't necessary
+ * while inheriting.  The state update to the parent is guaranteed to be
+ * visible by walking order and, as long as inheriting operations to the
+ * same @pos are atomic to each other, multiple updates racing each other
+ * still result in the correct state.  It's guaranateed that at least one
+ * inheritance happens for any css after the latest update to its parent.
+ *
+ * If checking parent's state requires locking the parent, each inheriting
+ * iteration should lock and unlock both @pos->parent and @pos.
+ *
+ * Alternatively, a subsystem may choose to use a single global lock to
+ * synchronize ->css_online() and ->css_offline() against tree-walking
+ * operations.
+ *
+ * It is allowed to temporarily drop RCU read lock during iteration.  The
+ * caller is responsible for ensuring that @pos remains accessible until
+ * the start of the next iteration by, for example, bumping the css refcnt.
+ */
+#define css_for_each_descendant_pre(pos, css)				\
+	for ((pos) = css_next_descendant_pre(NULL, (css)); (pos);	\
+	     (pos) = css_next_descendant_pre((pos), (css)))
+
+/**
+ * css_for_each_descendant_post - post-order walk of a css's descendants
+ * @pos: the css * to use as the loop cursor
+ * @css: css whose descendants to walk
+ *
+ * Similar to css_for_each_descendant_pre() but performs post-order
+ * traversal instead.  @root is included in the iteration and the last
+ * node to be visited.
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
+ *
+ * Note that the walk visibility guarantee example described in pre-order
+ * walk doesn't apply the same to post-order walks.
+ */
+#define css_for_each_descendant_post(pos, css)				\
+	for ((pos) = css_next_descendant_post(NULL, (css)); (pos);	\
+	     (pos) = css_next_descendant_post((pos), (css)))
+
+/**
+ * cgroup_taskset_for_each - iterate cgroup_taskset
+ * @task: the loop cursor
+ * @tset: taskset to iterate
+ */
+#define cgroup_taskset_for_each(task, tset)				\
+	for ((task) = cgroup_taskset_first((tset)); (task);		\
+	     (task) = cgroup_taskset_next((tset)))
 
-extern int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
-			    struct pid *pid, struct task_struct *tsk);
+/*
+ * Inline functions.
+ */
 
 /**
  * css_get - obtain a reference on the specified css
@@ -118,8 +291,87 @@ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
 		percpu_ref_put_many(&css->refcnt, n);
 }
 
-extern struct cgroup_root cgrp_dfl_root;
-extern struct css_set init_css_set;
+/**
+ * task_css_set_check - obtain a task's css_set with extra access conditions
+ * @task: the task to obtain css_set for
+ * @__c: extra condition expression to be passed to rcu_dereference_check()
+ *
+ * A task's css_set is RCU protected, initialized and exited while holding
+ * task_lock(), and can only be modified while holding both cgroup_mutex
+ * and task_lock() while the task is alive.  This macro verifies that the
+ * caller is inside proper critical section and returns @task's css_set.
+ *
+ * The caller can also specify additional allowed conditions via @__c, such
+ * as locks used during the cgroup_subsys::attach() methods.
+ */
+#ifdef CONFIG_PROVE_RCU
+extern struct mutex cgroup_mutex;
+extern struct rw_semaphore css_set_rwsem;
+#define task_css_set_check(task, __c)					\
+	rcu_dereference_check((task)->cgroups,				\
+		lockdep_is_held(&cgroup_mutex) ||			\
+		lockdep_is_held(&css_set_rwsem) ||			\
+		((task)->flags & PF_EXITING) || (__c))
+#else
+#define task_css_set_check(task, __c)					\
+	rcu_dereference((task)->cgroups)
+#endif
+
+/**
+ * task_css_check - obtain css for (task, subsys) w/ extra access conds
+ * @task: the target task
+ * @subsys_id: the target subsystem ID
+ * @__c: extra condition expression to be passed to rcu_dereference_check()
+ *
+ * Return the cgroup_subsys_state for the (@task, @subsys_id) pair.  The
+ * synchronization rules are the same as task_css_set_check().
+ */
+#define task_css_check(task, subsys_id, __c)				\
+	task_css_set_check((task), (__c))->subsys[(subsys_id)]
+
+/**
+ * task_css_set - obtain a task's css_set
+ * @task: the task to obtain css_set for
+ *
+ * See task_css_set_check().
+ */
+static inline struct css_set *task_css_set(struct task_struct *task)
+{
+	return task_css_set_check(task, false);
+}
+
+/**
+ * task_css - obtain css for (task, subsys)
+ * @task: the target task
+ * @subsys_id: the target subsystem ID
+ *
+ * See task_css_check().
+ */
+static inline struct cgroup_subsys_state *task_css(struct task_struct *task,
+						   int subsys_id)
+{
+	return task_css_check(task, subsys_id, false);
+}
+
+/**
+ * task_css_is_root - test whether a task belongs to the root css
+ * @task: the target task
+ * @subsys_id: the target subsystem ID
+ *
+ * Test whether @task belongs to the root css on the specified subsystem.
+ * May be invoked in any context.
+ */
+static inline bool task_css_is_root(struct task_struct *task, int subsys_id)
+{
+	return task_css_check(task, subsys_id, true) ==
+		init_css_set.subsys[subsys_id];
+}
+
+static inline struct cgroup *task_cgroup(struct task_struct *task,
+					 int subsys_id)
+{
+	return task_css(task, subsys_id)->cgroup;
+}
 
 /**
  * cgroup_on_dfl - test whether a cgroup is on the default hierarchy
@@ -236,284 +488,22 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
 	pr_cont_kernfs_path(cgrp->kn);
 }
 
-char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
-
-int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
-int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
-int cgroup_rm_cftypes(struct cftype *cfts);
-
-bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
-
-struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset);
-struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
-
-/**
- * cgroup_taskset_for_each - iterate cgroup_taskset
- * @task: the loop cursor
- * @tset: taskset to iterate
- */
-#define cgroup_taskset_for_each(task, tset)				\
-	for ((task) = cgroup_taskset_first((tset)); (task);		\
-	     (task) = cgroup_taskset_next((tset)))
-
-#define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys;
-#include <linux/cgroup_subsys.h>
-#undef SUBSYS
-
-/**
- * task_css_set_check - obtain a task's css_set with extra access conditions
- * @task: the task to obtain css_set for
- * @__c: extra condition expression to be passed to rcu_dereference_check()
- *
- * A task's css_set is RCU protected, initialized and exited while holding
- * task_lock(), and can only be modified while holding both cgroup_mutex
- * and task_lock() while the task is alive.  This macro verifies that the
- * caller is inside proper critical section and returns @task's css_set.
- *
- * The caller can also specify additional allowed conditions via @__c, such
- * as locks used during the cgroup_subsys::attach() methods.
- */
-#ifdef CONFIG_PROVE_RCU
-extern struct mutex cgroup_mutex;
-extern struct rw_semaphore css_set_rwsem;
-#define task_css_set_check(task, __c)					\
-	rcu_dereference_check((task)->cgroups,				\
-		lockdep_is_held(&cgroup_mutex) ||			\
-		lockdep_is_held(&css_set_rwsem) ||			\
-		((task)->flags & PF_EXITING) || (__c))
-#else
-#define task_css_set_check(task, __c)					\
-	rcu_dereference((task)->cgroups)
-#endif
-
-/**
- * task_css_check - obtain css for (task, subsys) w/ extra access conds
- * @task: the target task
- * @subsys_id: the target subsystem ID
- * @__c: extra condition expression to be passed to rcu_dereference_check()
- *
- * Return the cgroup_subsys_state for the (@task, @subsys_id) pair.  The
- * synchronization rules are the same as task_css_set_check().
- */
-#define task_css_check(task, subsys_id, __c)				\
-	task_css_set_check((task), (__c))->subsys[(subsys_id)]
-
-/**
- * task_css_set - obtain a task's css_set
- * @task: the task to obtain css_set for
- *
- * See task_css_set_check().
- */
-static inline struct css_set *task_css_set(struct task_struct *task)
-{
-	return task_css_set_check(task, false);
-}
-
-/**
- * task_css - obtain css for (task, subsys)
- * @task: the target task
- * @subsys_id: the target subsystem ID
- *
- * See task_css_check().
- */
-static inline struct cgroup_subsys_state *task_css(struct task_struct *task,
-						   int subsys_id)
-{
-	return task_css_check(task, subsys_id, false);
-}
-
-/**
- * task_css_is_root - test whether a task belongs to the root css
- * @task: the target task
- * @subsys_id: the target subsystem ID
- *
- * Test whether @task belongs to the root css on the specified subsystem.
- * May be invoked in any context.
- */
-static inline bool task_css_is_root(struct task_struct *task, int subsys_id)
-{
-	return task_css_check(task, subsys_id, true) ==
-		init_css_set.subsys[subsys_id];
-}
-
-static inline struct cgroup *task_cgroup(struct task_struct *task,
-					 int subsys_id)
-{
-	return task_css(task, subsys_id)->cgroup;
-}
-
-struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
-					   struct cgroup_subsys_state *parent);
-
-struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
-
-/**
- * css_for_each_child - iterate through children of a css
- * @pos: the css * to use as the loop cursor
- * @parent: css whose children to walk
- *
- * Walk @parent's children.  Must be called under rcu_read_lock().
- *
- * If a subsystem synchronizes ->css_online() and the start of iteration, a
- * css which finished ->css_online() is guaranteed to be visible in the
- * future iterations and will stay visible until the last reference is put.
- * A css which hasn't finished ->css_online() or already finished
- * ->css_offline() may show up during traversal.  It's each subsystem's
- * responsibility to synchronize against on/offlining.
- *
- * It is allowed to temporarily drop RCU read lock during iteration.  The
- * caller is responsible for ensuring that @pos remains accessible until
- * the start of the next iteration by, for example, bumping the css refcnt.
- */
-#define css_for_each_child(pos, parent)					\
-	for ((pos) = css_next_child(NULL, (parent)); (pos);		\
-	     (pos) = css_next_child((pos), (parent)))
-
-struct cgroup_subsys_state *
-css_next_descendant_pre(struct cgroup_subsys_state *pos,
-			struct cgroup_subsys_state *css);
-
-struct cgroup_subsys_state *
-css_rightmost_descendant(struct cgroup_subsys_state *pos);
-
-/**
- * css_for_each_descendant_pre - pre-order walk of a css's descendants
- * @pos: the css * to use as the loop cursor
- * @root: css whose descendants to walk
- *
- * Walk @root's descendants.  @root is included in the iteration and the
- * first node to be visited.  Must be called under rcu_read_lock().
- *
- * If a subsystem synchronizes ->css_online() and the start of iteration, a
- * css which finished ->css_online() is guaranteed to be visible in the
- * future iterations and will stay visible until the last reference is put.
- * A css which hasn't finished ->css_online() or already finished
- * ->css_offline() may show up during traversal.  It's each subsystem's
- * responsibility to synchronize against on/offlining.
- *
- * For example, the following guarantees that a descendant can't escape
- * state updates of its ancestors.
- *
- * my_online(@css)
- * {
- *	Lock @css's parent and @css;
- *	Inherit state from the parent;
- *	Unlock both.
- * }
- *
- * my_update_state(@css)
- * {
- *	css_for_each_descendant_pre(@pos, @css) {
- *		Lock @pos;
- *		if (@pos == @css)
- *			Update @css's state;
- *		else
- *			Verify @pos is alive and inherit state from its parent;
- *		Unlock @pos;
- *	}
- * }
- *
- * As long as the inheriting step, including checking the parent state, is
- * enclosed inside @pos locking, double-locking the parent isn't necessary
- * while inheriting.  The state update to the parent is guaranteed to be
- * visible by walking order and, as long as inheriting operations to the
- * same @pos are atomic to each other, multiple updates racing each other
- * still result in the correct state.  It's guaranateed that at least one
- * inheritance happens for any css after the latest update to its parent.
- *
- * If checking parent's state requires locking the parent, each inheriting
- * iteration should lock and unlock both @pos->parent and @pos.
- *
- * Alternatively, a subsystem may choose to use a single global lock to
- * synchronize ->css_online() and ->css_offline() against tree-walking
- * operations.
- *
- * It is allowed to temporarily drop RCU read lock during iteration.  The
- * caller is responsible for ensuring that @pos remains accessible until
- * the start of the next iteration by, for example, bumping the css refcnt.
- */
-#define css_for_each_descendant_pre(pos, css)				\
-	for ((pos) = css_next_descendant_pre(NULL, (css)); (pos);	\
-	     (pos) = css_next_descendant_pre((pos), (css)))
-
-struct cgroup_subsys_state *
-css_next_descendant_post(struct cgroup_subsys_state *pos,
-			 struct cgroup_subsys_state *css);
-
-/**
- * css_for_each_descendant_post - post-order walk of a css's descendants
- * @pos: the css * to use as the loop cursor
- * @css: css whose descendants to walk
- *
- * Similar to css_for_each_descendant_pre() but performs post-order
- * traversal instead.  @root is included in the iteration and the last
- * node to be visited.
- *
- * If a subsystem synchronizes ->css_online() and the start of iteration, a
- * css which finished ->css_online() is guaranteed to be visible in the
- * future iterations and will stay visible until the last reference is put.
- * A css which hasn't finished ->css_online() or already finished
- * ->css_offline() may show up during traversal.  It's each subsystem's
- * responsibility to synchronize against on/offlining.
- *
- * Note that the walk visibility guarantee example described in pre-order
- * walk doesn't apply the same to post-order walks.
- */
-#define css_for_each_descendant_post(pos, css)				\
-	for ((pos) = css_next_descendant_post(NULL, (css)); (pos);	\
-	     (pos) = css_next_descendant_post((pos), (css)))
-
-bool css_has_online_children(struct cgroup_subsys_state *css);
-
-/* A css_task_iter should be treated as an opaque object */
-struct css_task_iter {
-	struct cgroup_subsys		*ss;
-
-	struct list_head		*cset_pos;
-	struct list_head		*cset_head;
-
-	struct list_head		*task_pos;
-	struct list_head		*tasks_head;
-	struct list_head		*mg_tasks_head;
-};
-
-void css_task_iter_start(struct cgroup_subsys_state *css,
-			 struct css_task_iter *it);
-struct task_struct *css_task_iter_next(struct css_task_iter *it);
-void css_task_iter_end(struct css_task_iter *it);
-
-int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
-int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
-
-struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
-					     struct cgroup_subsys *ss);
-struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
-						       struct cgroup_subsys *ss);
-
 #else /* !CONFIG_CGROUPS */
 
 struct cgroup_subsys_state;
 
-static inline int cgroup_init_early(void) { return 0; }
-static inline int cgroup_init(void) { return 0; }
+static inline void css_put(struct cgroup_subsys_state *css) {}
+static inline int cgroup_attach_task_all(struct task_struct *from,
+					 struct task_struct *t) { return 0; }
+static inline int cgroupstats_build(struct cgroupstats *stats,
+				    struct dentry *dentry) { return -EINVAL; }
+
 static inline void cgroup_fork(struct task_struct *p) {}
 static inline void cgroup_post_fork(struct task_struct *p) {}
 static inline void cgroup_exit(struct task_struct *p) {}
 
-static inline int cgroupstats_build(struct cgroupstats *stats,
-					struct dentry *dentry)
-{
-	return -EINVAL;
-}
-
-static inline void css_put(struct cgroup_subsys_state *css) {}
-
-/* No cgroups - nothing to do */
-static inline int cgroup_attach_task_all(struct task_struct *from,
-					 struct task_struct *t)
-{
-	return 0;
-}
+static inline int cgroup_init_early(void) { return 0; }
+static inline int cgroup_init(void) { return 0; }
 
 #endif /* !CONFIG_CGROUPS */
 
-- 
cgit v1.2.3


From 87e9b9f1d86c2ee9a10c2a4186a72d0af4cc963e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sat, 16 May 2015 01:38:15 +0200
Subject: PM / sleep: Make suspend-to-idle-specific code depend on
 CONFIG_SUSPEND

Since idle_should_freeze() is defined to always return 'false'
for CONFIG_SUSPEND unset, all of the code depending on it in
cpuidle_idle_call() is not necessary in that case.

Make that code depend on CONFIG_SUSPEND too to avoid building it
when it is not going to be used.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuidle.h | 16 ++++++++++------
 include/linux/tick.h    | 12 ++++++++----
 2 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 9c5e89254796..13ee266ca98c 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -151,10 +151,6 @@ extern void cpuidle_resume(void);
 extern int cpuidle_enable_device(struct cpuidle_device *dev);
 extern void cpuidle_disable_device(struct cpuidle_device *dev);
 extern int cpuidle_play_dead(void);
-extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
-				      struct cpuidle_device *dev);
-extern int cpuidle_enter_freeze(struct cpuidle_driver *drv,
-				struct cpuidle_device *dev);
 
 extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev);
 #else
@@ -190,14 +186,22 @@ static inline int cpuidle_enable_device(struct cpuidle_device *dev)
 {return -ENODEV; }
 static inline void cpuidle_disable_device(struct cpuidle_device *dev) { }
 static inline int cpuidle_play_dead(void) {return -ENODEV; }
+static inline struct cpuidle_driver *cpuidle_get_cpu_driver(
+	struct cpuidle_device *dev) {return NULL; }
+#endif
+
+#if defined(CONFIG_CPU_IDLE) && defined(CONFIG_SUSPEND)
+extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
+				      struct cpuidle_device *dev);
+extern int cpuidle_enter_freeze(struct cpuidle_driver *drv,
+				struct cpuidle_device *dev);
+#else
 static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
 					     struct cpuidle_device *dev)
 {return -ENODEV; }
 static inline int cpuidle_enter_freeze(struct cpuidle_driver *drv,
 				       struct cpuidle_device *dev)
 {return -ENODEV; }
-static inline struct cpuidle_driver *cpuidle_get_cpu_driver(
-	struct cpuidle_device *dev) {return NULL; }
 #endif
 
 #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
diff --git a/include/linux/tick.h b/include/linux/tick.h
index f8492da57ad3..ec6e8bc992bf 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -13,8 +13,6 @@
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 extern void __init tick_init(void);
-extern void tick_freeze(void);
-extern void tick_unfreeze(void);
 /* Should be core only, but ARM BL switcher requires it */
 extern void tick_suspend_local(void);
 /* Should be core only, but XEN resume magic and ARM BL switcher require it */
@@ -23,14 +21,20 @@ extern void tick_handover_do_timer(void);
 extern void tick_cleanup_dead_cpu(int cpu);
 #else /* CONFIG_GENERIC_CLOCKEVENTS */
 static inline void tick_init(void) { }
-static inline void tick_freeze(void) { }
-static inline void tick_unfreeze(void) { }
 static inline void tick_suspend_local(void) { }
 static inline void tick_resume_local(void) { }
 static inline void tick_handover_do_timer(void) { }
 static inline void tick_cleanup_dead_cpu(int cpu) { }
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
+#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_SUSPEND)
+extern void tick_freeze(void);
+extern void tick_unfreeze(void);
+#else
+static inline void tick_freeze(void) { }
+static inline void tick_unfreeze(void) { }
+#endif
+
 #ifdef CONFIG_TICK_ONESHOT
 extern void tick_irq_enter(void);
 #  ifndef arch_needs_cpu
-- 
cgit v1.2.3


From ab3f02fc237211f0583c1e7ba3bf504747be9b8d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 12 May 2015 10:52:27 +0200
Subject: locking/arch: Add WRITE_ONCE() to set_mb()

Since we assume set_mb() to result in a single store followed by a
full memory barrier, employ WRITE_ONCE().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/compiler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index a7c0941d10da..03e227ba481c 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -250,7 +250,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 	({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
 
 #define WRITE_ONCE(x, val) \
-	({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; })
+	({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
 
 #endif /* __KERNEL__ */
 
-- 
cgit v1.2.3


From b92b8b35a2e38bde319fd1d68ec84628c1f1b0fb Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 12 May 2015 10:51:55 +0200
Subject: locking/arch: Rename set_mb() to smp_store_mb()

Since set_mb() is really about an smp_mb() -- not a IO/DMA barrier
like mb() rename it to match the recent smp_load_acquire() and
smp_store_release().

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26a2e6122734..18f197223ebd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -252,7 +252,7 @@ extern char ___assert_task_state[1 - 2*!!(
 #define set_task_state(tsk, state_value)			\
 	do {							\
 		(tsk)->task_state_change = _THIS_IP_;		\
-		set_mb((tsk)->state, (state_value));		\
+		smp_store_mb((tsk)->state, (state_value));		\
 	} while (0)
 
 /*
@@ -274,7 +274,7 @@ extern char ___assert_task_state[1 - 2*!!(
 #define set_current_state(state_value)				\
 	do {							\
 		current->task_state_change = _THIS_IP_;		\
-		set_mb(current->state, (state_value));		\
+		smp_store_mb(current->state, (state_value));		\
 	} while (0)
 
 #else
@@ -282,7 +282,7 @@ extern char ___assert_task_state[1 - 2*!!(
 #define __set_task_state(tsk, state_value)		\
 	do { (tsk)->state = (state_value); } while (0)
 #define set_task_state(tsk, state_value)		\
-	set_mb((tsk)->state, (state_value))
+	smp_store_mb((tsk)->state, (state_value))
 
 /*
  * set_current_state() includes a barrier so that the write of current->state
@@ -298,7 +298,7 @@ extern char ___assert_task_state[1 - 2*!!(
 #define __set_current_state(state_value)		\
 	do { current->state = (state_value); } while (0)
 #define set_current_state(state_value)			\
-	set_mb(current->state, (state_value))
+	smp_store_mb(current->state, (state_value))
 
 #endif
 
-- 
cgit v1.2.3


From 92cf211874e954027b8e91cc9a15485a50b58d6b Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 12 May 2015 16:41:46 +0200
Subject: sched/preempt: Merge preempt_mask.h into preempt.h

preempt_mask.h defines all the preempt_count semantics and related
symbols: preempt, softirq, hardirq, nmi, preempt active, need resched,
etc...

preempt.h defines the accessors and mutators of preempt_count.

But there is a messy dependency game around those two header files:

	* preempt_mask.h includes preempt.h in order to access preempt_count()

	* preempt_mask.h defines all preempt_count semantic and symbols
	  except PREEMPT_NEED_RESCHED that is needed by asm/preempt.h
	  Thus we need to define it from preempt.h, right before including
	  asm/preempt.h, instead of defining it to preempt_mask.h with the
	  other preempt_count symbols. Therefore the preempt_count semantics
	  happen to be spread out.

	* We plan to introduce preempt_active_[enter,exit]() to consolidate
	  preempt_schedule*() code. But we'll need to access both preempt_count
	  mutators (preempt_count_add()) and preempt_count symbols
	  (PREEMPT_ACTIVE, PREEMPT_OFFSET). The usual place to define preempt
	  operations is in preempt.h but then we'll need symbols in
	  preempt_mask.h which already includes preempt.h. So we end up with
	  a ressource circle dependency.

Lets merge preempt_mask.h into preempt.h to solve these dependency issues.
This way we gather semantic symbols and operation definition of
preempt_count in a single file.

This is a dumb copy-paste merge. Further merge re-arrangments are
performed in a subsequent patch to ease review.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1431441711-29753-2-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/bottom_half.h  |   1 -
 include/linux/hardirq.h      |   2 +-
 include/linux/preempt.h      | 111 ++++++++++++++++++++++++++++++++++++++++
 include/linux/preempt_mask.h | 117 -------------------------------------------
 include/linux/sched.h        |   2 +-
 5 files changed, 113 insertions(+), 120 deletions(-)
 delete mode 100644 include/linux/preempt_mask.h

(limited to 'include/linux')

diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h
index 86c12c93e3cf..8fdcb783197d 100644
--- a/include/linux/bottom_half.h
+++ b/include/linux/bottom_half.h
@@ -2,7 +2,6 @@
 #define _LINUX_BH_H
 
 #include <linux/preempt.h>
-#include <linux/preempt_mask.h>
 
 #ifdef CONFIG_TRACE_IRQFLAGS
 extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index f4af03404b97..dfd59d6bc6f0 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -1,7 +1,7 @@
 #ifndef LINUX_HARDIRQ_H
 #define LINUX_HARDIRQ_H
 
-#include <linux/preempt_mask.h>
+#include <linux/preempt.h>
 #include <linux/lockdep.h>
 #include <linux/ftrace_irq.h>
 #include <linux/vtime.h>
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index de83b4eb1642..8cc0338a5e9a 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -17,6 +17,117 @@
 
 #include <asm/preempt.h>
 
+/*
+ * We put the hardirq and softirq counter into the preemption
+ * counter. The bitmask has the following meaning:
+ *
+ * - bits 0-7 are the preemption count (max preemption depth: 256)
+ * - bits 8-15 are the softirq count (max # of softirqs: 256)
+ *
+ * The hardirq count could in theory be the same as the number of
+ * interrupts in the system, but we run all interrupt handlers with
+ * interrupts disabled, so we cannot have nesting interrupts. Though
+ * there are a few palaeontologic drivers which reenable interrupts in
+ * the handler, so we need more than one bit here.
+ *
+ * PREEMPT_MASK:	0x000000ff
+ * SOFTIRQ_MASK:	0x0000ff00
+ * HARDIRQ_MASK:	0x000f0000
+ *     NMI_MASK:	0x00100000
+ * PREEMPT_ACTIVE:	0x00200000
+ */
+#define PREEMPT_BITS	8
+#define SOFTIRQ_BITS	8
+#define HARDIRQ_BITS	4
+#define NMI_BITS	1
+
+#define PREEMPT_SHIFT	0
+#define SOFTIRQ_SHIFT	(PREEMPT_SHIFT + PREEMPT_BITS)
+#define HARDIRQ_SHIFT	(SOFTIRQ_SHIFT + SOFTIRQ_BITS)
+#define NMI_SHIFT	(HARDIRQ_SHIFT + HARDIRQ_BITS)
+
+#define __IRQ_MASK(x)	((1UL << (x))-1)
+
+#define PREEMPT_MASK	(__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
+#define SOFTIRQ_MASK	(__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
+#define HARDIRQ_MASK	(__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
+#define NMI_MASK	(__IRQ_MASK(NMI_BITS)     << NMI_SHIFT)
+
+#define PREEMPT_OFFSET	(1UL << PREEMPT_SHIFT)
+#define SOFTIRQ_OFFSET	(1UL << SOFTIRQ_SHIFT)
+#define HARDIRQ_OFFSET	(1UL << HARDIRQ_SHIFT)
+#define NMI_OFFSET	(1UL << NMI_SHIFT)
+
+#define SOFTIRQ_DISABLE_OFFSET	(2 * SOFTIRQ_OFFSET)
+
+#define PREEMPT_ACTIVE_BITS	1
+#define PREEMPT_ACTIVE_SHIFT	(NMI_SHIFT + NMI_BITS)
+#define PREEMPT_ACTIVE	(__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
+
+#define hardirq_count()	(preempt_count() & HARDIRQ_MASK)
+#define softirq_count()	(preempt_count() & SOFTIRQ_MASK)
+#define irq_count()	(preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
+				 | NMI_MASK))
+
+/*
+ * Are we doing bottom half or hardware interrupt processing?
+ * Are we in a softirq context? Interrupt context?
+ * in_softirq - Are we currently processing softirq or have bh disabled?
+ * in_serving_softirq - Are we currently processing softirq?
+ */
+#define in_irq()		(hardirq_count())
+#define in_softirq()		(softirq_count())
+#define in_interrupt()		(irq_count())
+#define in_serving_softirq()	(softirq_count() & SOFTIRQ_OFFSET)
+
+/*
+ * Are we in NMI context?
+ */
+#define in_nmi()	(preempt_count() & NMI_MASK)
+
+#if defined(CONFIG_PREEMPT_COUNT)
+# define PREEMPT_CHECK_OFFSET 1
+#else
+# define PREEMPT_CHECK_OFFSET 0
+#endif
+
+/*
+ * The preempt_count offset needed for things like:
+ *
+ *  spin_lock_bh()
+ *
+ * Which need to disable both preemption (CONFIG_PREEMPT_COUNT) and
+ * softirqs, such that unlock sequences of:
+ *
+ *  spin_unlock();
+ *  local_bh_enable();
+ *
+ * Work as expected.
+ */
+#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_CHECK_OFFSET)
+
+/*
+ * Are we running in atomic context?  WARNING: this macro cannot
+ * always detect atomic context; in particular, it cannot know about
+ * held spinlocks in non-preemptible kernels.  Thus it should not be
+ * used in the general case to determine whether sleeping is possible.
+ * Do not use in_atomic() in driver code.
+ */
+#define in_atomic()	((preempt_count() & ~PREEMPT_ACTIVE) != 0)
+
+/*
+ * Check whether we were atomic before we did preempt_disable():
+ * (used by the scheduler, *after* releasing the kernel lock)
+ */
+#define in_atomic_preempt_off() \
+		((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
+
+#ifdef CONFIG_PREEMPT_COUNT
+# define preemptible()	(preempt_count() == 0 && !irqs_disabled())
+#else
+# define preemptible()	0
+#endif
+
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
 extern void preempt_count_add(int val);
 extern void preempt_count_sub(int val);
diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h
deleted file mode 100644
index dbeec4d4a3be..000000000000
--- a/include/linux/preempt_mask.h
+++ /dev/null
@@ -1,117 +0,0 @@
-#ifndef LINUX_PREEMPT_MASK_H
-#define LINUX_PREEMPT_MASK_H
-
-#include <linux/preempt.h>
-
-/*
- * We put the hardirq and softirq counter into the preemption
- * counter. The bitmask has the following meaning:
- *
- * - bits 0-7 are the preemption count (max preemption depth: 256)
- * - bits 8-15 are the softirq count (max # of softirqs: 256)
- *
- * The hardirq count could in theory be the same as the number of
- * interrupts in the system, but we run all interrupt handlers with
- * interrupts disabled, so we cannot have nesting interrupts. Though
- * there are a few palaeontologic drivers which reenable interrupts in
- * the handler, so we need more than one bit here.
- *
- * PREEMPT_MASK:	0x000000ff
- * SOFTIRQ_MASK:	0x0000ff00
- * HARDIRQ_MASK:	0x000f0000
- *     NMI_MASK:	0x00100000
- * PREEMPT_ACTIVE:	0x00200000
- */
-#define PREEMPT_BITS	8
-#define SOFTIRQ_BITS	8
-#define HARDIRQ_BITS	4
-#define NMI_BITS	1
-
-#define PREEMPT_SHIFT	0
-#define SOFTIRQ_SHIFT	(PREEMPT_SHIFT + PREEMPT_BITS)
-#define HARDIRQ_SHIFT	(SOFTIRQ_SHIFT + SOFTIRQ_BITS)
-#define NMI_SHIFT	(HARDIRQ_SHIFT + HARDIRQ_BITS)
-
-#define __IRQ_MASK(x)	((1UL << (x))-1)
-
-#define PREEMPT_MASK	(__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
-#define SOFTIRQ_MASK	(__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
-#define HARDIRQ_MASK	(__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
-#define NMI_MASK	(__IRQ_MASK(NMI_BITS)     << NMI_SHIFT)
-
-#define PREEMPT_OFFSET	(1UL << PREEMPT_SHIFT)
-#define SOFTIRQ_OFFSET	(1UL << SOFTIRQ_SHIFT)
-#define HARDIRQ_OFFSET	(1UL << HARDIRQ_SHIFT)
-#define NMI_OFFSET	(1UL << NMI_SHIFT)
-
-#define SOFTIRQ_DISABLE_OFFSET	(2 * SOFTIRQ_OFFSET)
-
-#define PREEMPT_ACTIVE_BITS	1
-#define PREEMPT_ACTIVE_SHIFT	(NMI_SHIFT + NMI_BITS)
-#define PREEMPT_ACTIVE	(__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
-
-#define hardirq_count()	(preempt_count() & HARDIRQ_MASK)
-#define softirq_count()	(preempt_count() & SOFTIRQ_MASK)
-#define irq_count()	(preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
-				 | NMI_MASK))
-
-/*
- * Are we doing bottom half or hardware interrupt processing?
- * Are we in a softirq context? Interrupt context?
- * in_softirq - Are we currently processing softirq or have bh disabled?
- * in_serving_softirq - Are we currently processing softirq?
- */
-#define in_irq()		(hardirq_count())
-#define in_softirq()		(softirq_count())
-#define in_interrupt()		(irq_count())
-#define in_serving_softirq()	(softirq_count() & SOFTIRQ_OFFSET)
-
-/*
- * Are we in NMI context?
- */
-#define in_nmi()	(preempt_count() & NMI_MASK)
-
-#if defined(CONFIG_PREEMPT_COUNT)
-# define PREEMPT_CHECK_OFFSET 1
-#else
-# define PREEMPT_CHECK_OFFSET 0
-#endif
-
-/*
- * The preempt_count offset needed for things like:
- *
- *  spin_lock_bh()
- *
- * Which need to disable both preemption (CONFIG_PREEMPT_COUNT) and
- * softirqs, such that unlock sequences of:
- *
- *  spin_unlock();
- *  local_bh_enable();
- *
- * Work as expected.
- */
-#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_CHECK_OFFSET)
-
-/*
- * Are we running in atomic context?  WARNING: this macro cannot
- * always detect atomic context; in particular, it cannot know about
- * held spinlocks in non-preemptible kernels.  Thus it should not be
- * used in the general case to determine whether sleeping is possible.
- * Do not use in_atomic() in driver code.
- */
-#define in_atomic()	((preempt_count() & ~PREEMPT_ACTIVE) != 0)
-
-/*
- * Check whether we were atomic before we did preempt_disable():
- * (used by the scheduler, *after* releasing the kernel lock)
- */
-#define in_atomic_preempt_off() \
-		((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
-
-#ifdef CONFIG_PREEMPT_COUNT
-# define preemptible()	(preempt_count() == 0 && !irqs_disabled())
-#else
-# define preemptible()	0
-#endif
-
-#endif /* LINUX_PREEMPT_MASK_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5f8defa155cf..c53a1784d7a9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -25,7 +25,7 @@ struct sched_param {
 #include <linux/errno.h>
 #include <linux/nodemask.h>
 #include <linux/mm_types.h>
-#include <linux/preempt_mask.h>
+#include <linux/preempt.h>
 
 #include <asm/page.h>
 #include <asm/ptrace.h>
-- 
cgit v1.2.3


From 2e10e71ce88e3eaccfd09a045ae6ecebe657ba09 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 12 May 2015 16:41:47 +0200
Subject: sched/preempt: Rearrange a few symbols after headers merge

Adjust a few comments, and further integrate a few definitions after
the dumb headers copy.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1431441711-29753-3-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/preempt.h | 34 +++++++++++++++-------------------
 1 file changed, 15 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 8cc0338a5e9a..37974cd4f092 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -9,14 +9,6 @@
 #include <linux/linkage.h>
 #include <linux/list.h>
 
-/*
- * We use the MSB mostly because its available; see <linux/preempt_mask.h> for
- * the other bits -- can't include that header due to inclusion hell.
- */
-#define PREEMPT_NEED_RESCHED	0x80000000
-
-#include <asm/preempt.h>
-
 /*
  * We put the hardirq and softirq counter into the preemption
  * counter. The bitmask has the following meaning:
@@ -30,11 +22,12 @@
  * there are a few palaeontologic drivers which reenable interrupts in
  * the handler, so we need more than one bit here.
  *
- * PREEMPT_MASK:	0x000000ff
- * SOFTIRQ_MASK:	0x0000ff00
- * HARDIRQ_MASK:	0x000f0000
- *     NMI_MASK:	0x00100000
- * PREEMPT_ACTIVE:	0x00200000
+ *         PREEMPT_MASK:	0x000000ff
+ *         SOFTIRQ_MASK:	0x0000ff00
+ *         HARDIRQ_MASK:	0x000f0000
+ *             NMI_MASK:	0x00100000
+ *       PREEMPT_ACTIVE:	0x00200000
+ * PREEMPT_NEED_RESCHED:	0x80000000
  */
 #define PREEMPT_BITS	8
 #define SOFTIRQ_BITS	8
@@ -64,6 +57,12 @@
 #define PREEMPT_ACTIVE_SHIFT	(NMI_SHIFT + NMI_BITS)
 #define PREEMPT_ACTIVE	(__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
 
+/* We use the MSB mostly because its available */
+#define PREEMPT_NEED_RESCHED	0x80000000
+
+/* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */
+#include <asm/preempt.h>
+
 #define hardirq_count()	(preempt_count() & HARDIRQ_MASK)
 #define softirq_count()	(preempt_count() & SOFTIRQ_MASK)
 #define irq_count()	(preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
@@ -122,12 +121,6 @@
 #define in_atomic_preempt_off() \
 		((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
 
-#ifdef CONFIG_PREEMPT_COUNT
-# define preemptible()	(preempt_count() == 0 && !irqs_disabled())
-#else
-# define preemptible()	0
-#endif
-
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
 extern void preempt_count_add(int val);
 extern void preempt_count_sub(int val);
@@ -160,6 +153,8 @@ do { \
 
 #define preempt_enable_no_resched() sched_preempt_enable_no_resched()
 
+#define preemptible()	(preempt_count() == 0 && !irqs_disabled())
+
 #ifdef CONFIG_PREEMPT
 #define preempt_enable() \
 do { \
@@ -232,6 +227,7 @@ do { \
 #define preempt_disable_notrace()		barrier()
 #define preempt_enable_no_resched_notrace()	barrier()
 #define preempt_enable_notrace()		barrier()
+#define preemptible()				0
 
 #endif /* CONFIG_PREEMPT_COUNT */
 
-- 
cgit v1.2.3


From 90b62b5129d5cb50f62f40e684de7a1961e57197 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 12 May 2015 16:41:48 +0200
Subject: sched/preempt: Rename PREEMPT_CHECK_OFFSET to PREEMPT_DISABLE_OFFSET

"CHECK" suggests it's only used as a comparison mask. But now it's used
further as a config-conditional preempt disabler offset. Lets
disambiguate this name.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1431441711-29753-4-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/preempt.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 37974cd4f092..4689ef210a13 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -85,9 +85,9 @@
 #define in_nmi()	(preempt_count() & NMI_MASK)
 
 #if defined(CONFIG_PREEMPT_COUNT)
-# define PREEMPT_CHECK_OFFSET 1
+# define PREEMPT_DISABLE_OFFSET 1
 #else
-# define PREEMPT_CHECK_OFFSET 0
+# define PREEMPT_DISABLE_OFFSET 0
 #endif
 
 /*
@@ -103,7 +103,7 @@
  *
  * Work as expected.
  */
-#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_CHECK_OFFSET)
+#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_DISABLE_OFFSET)
 
 /*
  * Are we running in atomic context?  WARNING: this macro cannot
@@ -119,7 +119,7 @@
  * (used by the scheduler, *after* releasing the kernel lock)
  */
 #define in_atomic_preempt_off() \
-		((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
+		((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_DISABLE_OFFSET)
 
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
 extern void preempt_count_add(int val);
-- 
cgit v1.2.3


From b30f0e3ffedfa52b1d67a302ae5860c49998e5e2 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 12 May 2015 16:41:49 +0200
Subject: sched/preempt: Optimize preemption operations on __schedule() callers

__schedule() disables preemption and some of its callers
(the preempt_schedule*() family) also set PREEMPT_ACTIVE.

So we have two preempt_count() modifications that could be performed
at once.

Lets remove the preemption disablement from __schedule() and pull
this responsibility to its callers in order to optimize preempt_count()
operations in a single place.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1431441711-29753-5-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/preempt.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 4689ef210a13..45da394f2779 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -137,6 +137,18 @@ extern void preempt_count_sub(int val);
 #define preempt_count_inc() preempt_count_add(1)
 #define preempt_count_dec() preempt_count_sub(1)
 
+#define preempt_active_enter() \
+do { \
+	preempt_count_add(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); \
+	barrier(); \
+} while (0)
+
+#define preempt_active_exit() \
+do { \
+	barrier(); \
+	preempt_count_sub(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); \
+} while (0)
+
 #ifdef CONFIG_PREEMPT_COUNT
 
 #define preempt_disable() \
-- 
cgit v1.2.3


From e017cf21ae82e0b36f026b22083a8ae67926f465 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 12 May 2015 16:41:50 +0200
Subject: sched/preempt: Fix out of date comment

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1431441711-29753-6-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/preempt.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 45da394f2779..4057696c641c 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -116,7 +116,7 @@
 
 /*
  * Check whether we were atomic before we did preempt_disable():
- * (used by the scheduler, *after* releasing the kernel lock)
+ * (used by the scheduler)
  */
 #define in_atomic_preempt_off() \
 		((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_DISABLE_OFFSET)
-- 
cgit v1.2.3


From 3e51f3c4004c9b01f66da03214a3e206f5ed627b Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 12 May 2015 16:41:51 +0200
Subject: sched/preempt: Remove PREEMPT_ACTIVE unmasking off in_atomic()

Now that PREEMPT_ACTIVE implies PREEMPT_DISABLE_OFFSET, ignoring
PREEMPT_ACTIVE from in_atomic() check isn't useful anymore.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1431441711-29753-7-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/preempt.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 4057696c641c..a1a00e14c14f 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -112,7 +112,7 @@
  * used in the general case to determine whether sleeping is possible.
  * Do not use in_atomic() in driver code.
  */
-#define in_atomic()	((preempt_count() & ~PREEMPT_ACTIVE) != 0)
+#define in_atomic()	(preempt_count() != 0)
 
 /*
  * Check whether we were atomic before we did preempt_disable():
-- 
cgit v1.2.3


From 8bcbde5480f9777f8b74d71493722c663e22c21b Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Mon, 11 May 2015 17:52:06 +0200
Subject: sched/preempt, mm/fault: Count pagefault_disable() levels in
 pagefault_disabled

Until now, pagefault_disable()/pagefault_enabled() used the preempt
count to track whether in an environment with pagefaults disabled (can
be queried via in_atomic()).

This patch introduces a separate counter in task_struct to count the
level of pagefault_disable() calls. We'll keep manipulating the preempt
count to retain compatibility to existing pagefault handlers.

It is now possible to verify whether in a pagefault_disable() envionment
by calling pagefault_disabled(). In contrast to in_atomic() it will not
be influenced by preempt_enable()/preempt_disable().

This patch is based on a patch from Ingo Molnar.

Reviewed-and-tested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: David.Laight@ACULAB.COM
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: airlied@linux.ie
Cc: akpm@linux-foundation.org
Cc: benh@kernel.crashing.org
Cc: bigeasy@linutronix.de
Cc: borntraeger@de.ibm.com
Cc: daniel.vetter@intel.com
Cc: heiko.carstens@de.ibm.com
Cc: herbert@gondor.apana.org.au
Cc: hocko@suse.cz
Cc: hughd@google.com
Cc: mst@redhat.com
Cc: paulus@samba.org
Cc: ralf@linux-mips.org
Cc: schwidefsky@de.ibm.com
Cc: yang.shi@windriver.com
Link: http://lkml.kernel.org/r/1431359540-32227-2-git-send-email-dahi@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h   |  1 +
 include/linux/uaccess.h | 36 +++++++++++++++++++++++++++++-------
 2 files changed, 30 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c53a1784d7a9..dd07ac03f82a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1788,6 +1788,7 @@ struct task_struct {
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 	unsigned long	task_state_change;
 #endif
+	int pagefault_disabled;
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index ecd3319dac33..23290cc93a24 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -2,20 +2,36 @@
 #define __LINUX_UACCESS_H__
 
 #include <linux/preempt.h>
+#include <linux/sched.h>
 #include <asm/uaccess.h>
 
+static __always_inline void pagefault_disabled_inc(void)
+{
+	current->pagefault_disabled++;
+}
+
+static __always_inline void pagefault_disabled_dec(void)
+{
+	current->pagefault_disabled--;
+	WARN_ON(current->pagefault_disabled < 0);
+}
+
 /*
- * These routines enable/disable the pagefault handler in that
- * it will not take any locks and go straight to the fixup table.
+ * These routines enable/disable the pagefault handler. If disabled, it will
+ * not take any locks and go straight to the fixup table.
+ *
+ * We increase the preempt and the pagefault count, to be able to distinguish
+ * whether we run in simple atomic context or in a real pagefault_disable()
+ * context.
+ *
+ * For now, after pagefault_disabled() has been called, we run in atomic
+ * context. User access methods will not sleep.
  *
- * They have great resemblance to the preempt_disable/enable calls
- * and in fact they are identical; this is because currently there is
- * no other way to make the pagefault handlers do this. So we do
- * disable preemption but we don't necessarily care about that.
  */
 static inline void pagefault_disable(void)
 {
 	preempt_count_inc();
+	pagefault_disabled_inc();
 	/*
 	 * make sure to have issued the store before a pagefault
 	 * can hit.
@@ -25,18 +41,24 @@ static inline void pagefault_disable(void)
 
 static inline void pagefault_enable(void)
 {
-#ifndef CONFIG_PREEMPT
 	/*
 	 * make sure to issue those last loads/stores before enabling
 	 * the pagefault handler again.
 	 */
 	barrier();
+	pagefault_disabled_dec();
+#ifndef CONFIG_PREEMPT
 	preempt_count_dec();
 #else
 	preempt_enable();
 #endif
 }
 
+/*
+ * Is the pagefault handler disabled? If so, user access methods will not sleep.
+ */
+#define pagefault_disabled() (current->pagefault_disabled != 0)
+
 #ifndef ARCH_HAS_NOCACHE_UACCESS
 
 static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
-- 
cgit v1.2.3


From 9ec23531fd48031d1b6ca5366f5f967d17a8bc28 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Mon, 11 May 2015 17:52:07 +0200
Subject: sched/preempt, mm/fault: Trigger might_sleep() in might_fault() with
 disabled pagefaults

Commit 662bbcb2747c ("mm, sched: Allow uaccess in atomic with
pagefault_disable()") removed might_sleep() checks for all user access
code (that uses might_fault()).

The reason was to disable wrong "sleep in atomic" warnings in the
following scenario:

    pagefault_disable()
    rc = copy_to_user(...)
    pagefault_enable()

Which is valid, as pagefault_disable() increments the preempt counter
and therefore disables the pagefault handler. copy_to_user() will not
sleep and return an error code if a page is not available.

However, as all might_sleep() checks are removed,
CONFIG_DEBUG_ATOMIC_SLEEP would no longer detect the following scenario:

    spin_lock(&lock);
    rc = copy_to_user(...)
    spin_unlock(&lock)

If the kernel is compiled with preemption turned on, preempt_disable()
will make in_atomic() detect disabled preemption. The fault handler would
correctly never sleep on user access.
However, with preemption turned off, preempt_disable() is usually a NOP
(with !CONFIG_PREEMPT_COUNT), therefore in_atomic() will not be able to
detect disabled preemption nor disabled pagefaults. The fault handler
could sleep.
We really want to enable CONFIG_DEBUG_ATOMIC_SLEEP checks for user access
functions again, otherwise we can end up with horrible deadlocks.

Root of all evil is that pagefault_disable() acts almost as
preempt_disable(), depending on preemption being turned on/off.

As we now have pagefault_disabled(), we can use it to distinguish
whether user acces functions might sleep.

Convert might_fault() into a makro that calls __might_fault(), to
allow proper file + line messages in case of a might_sleep() warning.

Reviewed-and-tested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: David.Laight@ACULAB.COM
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: airlied@linux.ie
Cc: akpm@linux-foundation.org
Cc: benh@kernel.crashing.org
Cc: bigeasy@linutronix.de
Cc: borntraeger@de.ibm.com
Cc: daniel.vetter@intel.com
Cc: heiko.carstens@de.ibm.com
Cc: herbert@gondor.apana.org.au
Cc: hocko@suse.cz
Cc: hughd@google.com
Cc: mst@redhat.com
Cc: paulus@samba.org
Cc: ralf@linux-mips.org
Cc: schwidefsky@de.ibm.com
Cc: yang.shi@windriver.com
Link: http://lkml.kernel.org/r/1431359540-32227-3-git-send-email-dahi@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kernel.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3a5b48e52a9e..060dd7b61c6d 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -244,7 +244,8 @@ static inline u32 reciprocal_scale(u32 val, u32 ep_ro)
 
 #if defined(CONFIG_MMU) && \
 	(defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP))
-void might_fault(void);
+#define might_fault() __might_fault(__FILE__, __LINE__)
+void __might_fault(const char *file, int line);
 #else
 static inline void might_fault(void) { }
 #endif
-- 
cgit v1.2.3


From 2cb7c9cb426660b5ed58b643d9e7dd5d50ba901f Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Mon, 11 May 2015 17:52:09 +0200
Subject: sched/preempt, mm/kmap: Explicitly disable/enable preemption in
 kmap_atomic_*

The existing code relies on pagefault_disable() implicitly disabling
preemption, so that no schedule will happen between kmap_atomic() and
kunmap_atomic().

Let's make this explicit, to prepare for pagefault_disable() not
touching preemption anymore.

Reviewed-and-tested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: David.Laight@ACULAB.COM
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: airlied@linux.ie
Cc: akpm@linux-foundation.org
Cc: benh@kernel.crashing.org
Cc: bigeasy@linutronix.de
Cc: borntraeger@de.ibm.com
Cc: daniel.vetter@intel.com
Cc: heiko.carstens@de.ibm.com
Cc: herbert@gondor.apana.org.au
Cc: hocko@suse.cz
Cc: hughd@google.com
Cc: mst@redhat.com
Cc: paulus@samba.org
Cc: ralf@linux-mips.org
Cc: schwidefsky@de.ibm.com
Cc: yang.shi@windriver.com
Link: http://lkml.kernel.org/r/1431359540-32227-5-git-send-email-dahi@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/highmem.h    | 2 ++
 include/linux/io-mapping.h | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 9286a46b7d69..6aefcd0031a6 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -65,6 +65,7 @@ static inline void kunmap(struct page *page)
 
 static inline void *kmap_atomic(struct page *page)
 {
+	preempt_disable();
 	pagefault_disable();
 	return page_address(page);
 }
@@ -73,6 +74,7 @@ static inline void *kmap_atomic(struct page *page)
 static inline void __kunmap_atomic(void *addr)
 {
 	pagefault_enable();
+	preempt_enable();
 }
 
 #define kmap_atomic_pfn(pfn)	kmap_atomic(pfn_to_page(pfn))
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index 657fab4efab3..c27dde7215b5 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -141,6 +141,7 @@ static inline void __iomem *
 io_mapping_map_atomic_wc(struct io_mapping *mapping,
 			 unsigned long offset)
 {
+	preempt_disable();
 	pagefault_disable();
 	return ((char __force __iomem *) mapping) + offset;
 }
@@ -149,6 +150,7 @@ static inline void
 io_mapping_unmap_atomic(void __iomem *vaddr)
 {
 	pagefault_enable();
+	preempt_enable();
 }
 
 /* Non-atomic map/unmap */
-- 
cgit v1.2.3


From 70ffdb9393a7264a069265edded729078dcf0425 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Mon, 11 May 2015 17:52:11 +0200
Subject: mm/fault, arch: Use pagefault_disable() to check for disabled
 pagefaults in the handler

Introduce faulthandler_disabled() and use it to check for irq context and
disabled pagefaults (via pagefault_disable()) in the pagefault handlers.

Please note that we keep the in_atomic() checks in place - to detect
whether in irq context (in which case preemption is always properly
disabled).

In contrast, preempt_disable() should never be used to disable pagefaults.
With !CONFIG_PREEMPT_COUNT, preempt_disable() doesn't modify the preempt
counter, and therefore the result of in_atomic() differs.
We validate that condition by using might_fault() checks when calling
might_sleep().

Therefore, add a comment to faulthandler_disabled(), describing why this
is needed.

faulthandler_disabled() and pagefault_disable() are defined in
linux/uaccess.h, so let's properly add that include to all relevant files.

This patch is based on a patch from Thomas Gleixner.

Reviewed-and-tested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: David.Laight@ACULAB.COM
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: airlied@linux.ie
Cc: akpm@linux-foundation.org
Cc: benh@kernel.crashing.org
Cc: bigeasy@linutronix.de
Cc: borntraeger@de.ibm.com
Cc: daniel.vetter@intel.com
Cc: heiko.carstens@de.ibm.com
Cc: herbert@gondor.apana.org.au
Cc: hocko@suse.cz
Cc: hughd@google.com
Cc: mst@redhat.com
Cc: paulus@samba.org
Cc: ralf@linux-mips.org
Cc: schwidefsky@de.ibm.com
Cc: yang.shi@windriver.com
Link: http://lkml.kernel.org/r/1431359540-32227-7-git-send-email-dahi@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/uaccess.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 23290cc93a24..90786d2d74e5 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -59,6 +59,18 @@ static inline void pagefault_enable(void)
  */
 #define pagefault_disabled() (current->pagefault_disabled != 0)
 
+/*
+ * The pagefault handler is in general disabled by pagefault_disable() or
+ * when in irq context (via in_atomic()).
+ *
+ * This function should only be used by the fault handlers. Other users should
+ * stick to pagefault_disabled().
+ * Please NEVER use preempt_disable() to disable the fault handler. With
+ * !CONFIG_PREEMPT_COUNT, this is like a NOP. So the handler won't be disabled.
+ * in_atomic() will report different values based on !CONFIG_PREEMPT_COUNT.
+ */
+#define faulthandler_disabled() (pagefault_disabled() || in_atomic())
+
 #ifndef ARCH_HAS_NOCACHE_UACCESS
 
 static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
-- 
cgit v1.2.3


From 8222dbe21e79338de92d5e1956cd1e3994cc9f93 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Mon, 11 May 2015 17:52:20 +0200
Subject: sched/preempt, mm/fault: Decouple preemption from the page fault
 logic

As the fault handlers now all rely on the pagefault_disabled() checks
and implicit preempt_disable() calls by pagefault_disable() have been
made explicit, we can completely rely on the pagefault_disableD counter.

So let's no longer touch the preempt count when disabling/enabling
pagefaults. After a call to pagefault_disable(), pagefault_disabled()
will return true, but in_atomic() won't.

Reviewed-and-tested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: David.Laight@ACULAB.COM
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: airlied@linux.ie
Cc: akpm@linux-foundation.org
Cc: benh@kernel.crashing.org
Cc: bigeasy@linutronix.de
Cc: borntraeger@de.ibm.com
Cc: daniel.vetter@intel.com
Cc: heiko.carstens@de.ibm.com
Cc: herbert@gondor.apana.org.au
Cc: hocko@suse.cz
Cc: hughd@google.com
Cc: mst@redhat.com
Cc: paulus@samba.org
Cc: ralf@linux-mips.org
Cc: schwidefsky@de.ibm.com
Cc: yang.shi@windriver.com
Link: http://lkml.kernel.org/r/1431359540-32227-16-git-send-email-dahi@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/uaccess.h | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 90786d2d74e5..ae572c138607 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -1,7 +1,6 @@
 #ifndef __LINUX_UACCESS_H__
 #define __LINUX_UACCESS_H__
 
-#include <linux/preempt.h>
 #include <linux/sched.h>
 #include <asm/uaccess.h>
 
@@ -20,17 +19,11 @@ static __always_inline void pagefault_disabled_dec(void)
  * These routines enable/disable the pagefault handler. If disabled, it will
  * not take any locks and go straight to the fixup table.
  *
- * We increase the preempt and the pagefault count, to be able to distinguish
- * whether we run in simple atomic context or in a real pagefault_disable()
- * context.
- *
- * For now, after pagefault_disabled() has been called, we run in atomic
- * context. User access methods will not sleep.
- *
+ * User access methods will not sleep when called from a pagefault_disabled()
+ * environment.
  */
 static inline void pagefault_disable(void)
 {
-	preempt_count_inc();
 	pagefault_disabled_inc();
 	/*
 	 * make sure to have issued the store before a pagefault
@@ -47,11 +40,6 @@ static inline void pagefault_enable(void)
 	 */
 	barrier();
 	pagefault_disabled_dec();
-#ifndef CONFIG_PREEMPT
-	preempt_count_dec();
-#else
-	preempt_enable();
-#endif
 }
 
 /*
-- 
cgit v1.2.3


From 80ed87c8a9ca0cad7ca66cf3bbdfb17559a66dcf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 8 May 2015 14:23:45 +0200
Subject: sched/wait: Introduce TASK_NOLOAD and TASK_IDLE

Currently people use TASK_INTERRUPTIBLE to idle kthreads and wait for
'work' because TASK_UNINTERRUPTIBLE contributes to the loadavg. Having
all idle kthreads contribute to the loadavg is somewhat silly.

Now mostly this works OK, because kthreads have all their signals
masked. However there's a few sites where this is causing problems and
TASK_UNINTERRUPTIBLE should be used, except for that loadavg issue.

This patch adds TASK_NOLOAD which, when combined with
TASK_UNINTERRUPTIBLE avoids the loadavg accounting.

As most of imagined usage sites are loops where a thread wants to
idle, waiting for work, a helper TASK_IDLE is introduced.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Julian Anastasov <ja@ssi.bg>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: NeilBrown <neilb@suse.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index dd07ac03f82a..7de815c6fa78 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -218,9 +218,10 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
 #define TASK_WAKEKILL		128
 #define TASK_WAKING		256
 #define TASK_PARKED		512
-#define TASK_STATE_MAX		1024
+#define TASK_NOLOAD		1024
+#define TASK_STATE_MAX		2048
 
-#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWP"
+#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN"
 
 extern char ___assert_task_state[1 - 2*!!(
 		sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
@@ -230,6 +231,8 @@ extern char ___assert_task_state[1 - 2*!!(
 #define TASK_STOPPED		(TASK_WAKEKILL | __TASK_STOPPED)
 #define TASK_TRACED		(TASK_WAKEKILL | __TASK_TRACED)
 
+#define TASK_IDLE		(TASK_UNINTERRUPTIBLE | TASK_NOLOAD)
+
 /* Convenience macros for the sake of wake_up */
 #define TASK_NORMAL		(TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
 #define TASK_ALL		(TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
@@ -245,7 +248,8 @@ extern char ___assert_task_state[1 - 2*!!(
 			((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
 #define task_contributes_to_load(task)	\
 				((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
-				 (task->flags & PF_FROZEN) == 0)
+				 (task->flags & PF_FROZEN) == 0 && \
+				 (task->state & TASK_NOLOAD) == 0)
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 
-- 
cgit v1.2.3


From f03123783d4e43cd59df58e23e963136e04f8280 Mon Sep 17 00:00:00 2001
From: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Date: Thu, 30 Apr 2015 20:44:45 +0530
Subject: extcon: axp288: Add axp288 extcon driver support

This patch adds the extcon support for AXP288 PMIC which
has the BC1.2 charger detection capability. Additionally
it also adds the USB mux switching support b/w SOC and PMIC
based on GPIO control.

Signed-off-by: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
[cw00.choi: Modify the log message to keep the consistent log message pattern]
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 include/linux/mfd/axp20x.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index dfabd6db7ddf..4ed8071d062e 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -275,4 +275,9 @@ struct axp20x_fg_pdata {
 	int thermistor_curve[MAX_THERM_CURVE_SIZE][2];
 };
 
+struct axp288_extcon_pdata {
+	/* GPIO pin control to switch D+/D- lines b/w PMIC and SOC */
+	struct gpio_desc *gpio_mux_cntl;
+};
+
 #endif /* __LINUX_MFD_AXP20X_H */
-- 
cgit v1.2.3


From 707d7550875a9bd245ce5f1077d2d2cb44eab218 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Wed, 15 Apr 2015 13:57:51 +0900
Subject: extcon: Add extcon_get_edev_name() API to get the extcon device name

This patch adds the extcon_get_edev_name() API to get the name of extcon device
because all information inclued in the structure extcon_dev should be accessed
by extcon core API instead of directly accessing the data.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 include/linux/extcon.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index 36f49c405dfb..e2cf6254c86a 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -259,6 +259,10 @@ extern int extcon_unregister_notifier(struct extcon_dev *edev,
  * This function use phandle of devicetree to get extcon device directly.
  */
 extern struct extcon_dev *extcon_get_edev_by_phandle(struct device *dev, int index);
+
+/* Following API to get information of extcon device */
+extern const char *extcon_get_edev_name(struct extcon_dev *edev);
+
 #else /* CONFIG_EXTCON */
 static inline int extcon_dev_register(struct extcon_dev *edev)
 {
-- 
cgit v1.2.3


From b9ec23c08a0274d31ee626f14b359563ea0cae46 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Fri, 24 Apr 2015 14:48:52 +0900
Subject: extcon: Fix the checkpatch warning and minor coding style issue

This patch clean up the extcon core driver by fixing the checkpatch warning
and minor coding style issue.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 include/linux/extcon.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index e2cf6254c86a..799474d9dc48 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -97,8 +97,8 @@ struct extcon_cable;
  * @state:		Attach/detach state of this extcon. Do not provide at
  *			register-time.
  * @nh:			Notifier for the state change events from this extcon
- * @entry:		To support list of extcon devices so that users can search
- *			for extcon devices based on the extcon name.
+ * @entry:		To support list of extcon devices so that users can
+ *			search for extcon devices based on the extcon name.
  * @lock:
  * @max_supported:	Internal value to store the number of cables.
  * @extcon_dev_type:	Device_type struct to provide attribute_groups
@@ -258,7 +258,8 @@ extern int extcon_unregister_notifier(struct extcon_dev *edev,
  * Following API get the extcon device from devicetree.
  * This function use phandle of devicetree to get extcon device directly.
  */
-extern struct extcon_dev *extcon_get_edev_by_phandle(struct device *dev, int index);
+extern struct extcon_dev *extcon_get_edev_by_phandle(struct device *dev,
+						     int index);
 
 /* Following API to get information of extcon device */
 extern const char *extcon_get_edev_name(struct extcon_dev *edev);
-- 
cgit v1.2.3


From 9e86b2ad4c11fd52ee8133abce7a29e0b32d29a7 Mon Sep 17 00:00:00 2001
From: Inha Song <ideal.song@samsung.com>
Date: Mon, 4 May 2015 13:42:13 +0900
Subject: extcon: arizona: Add support for select accessory detect mode when
 headphone detection

This patch add support for select accessory detect mode to HPDETL or HPDETR.
Arizona provides a headphone detection circuit on the HPDETL and HPDETR pins
to measure the impedance of an external load connected to the headphone.

Depending on board design, headphone detect pins can change to HPDETR or HPDETL.

Signed-off-by: Inha Song <ideal.song@samsung.com>
Acked-by: Lee Jones <lee@kernel.org>
Acked-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 include/linux/mfd/arizona/pdata.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h
index 1789cb0f4f17..aa5c48b06755 100644
--- a/include/linux/mfd/arizona/pdata.h
+++ b/include/linux/mfd/arizona/pdata.h
@@ -121,6 +121,9 @@ struct arizona_pdata {
 	/** GPIO used for mic isolation with HPDET */
 	int hpdet_id_gpio;
 
+	/** Channel to use for headphone detection */
+	unsigned int hpdet_channel;
+
 	/** Extra debounce timeout used during initial mic detection (ms) */
 	int micd_detect_debounce;
 
-- 
cgit v1.2.3


From ca42aaf0c8616cde6161ea4391dff364efeee46a Mon Sep 17 00:00:00 2001
From: Nicholas Mc Guire <hofrat@osadl.org>
Date: Mon, 18 May 2015 14:19:13 +0200
Subject: time: Refactor msecs_to_jiffies

Refactor the msecs_to_jiffies conditional code part in time.c and
jiffies.h putting it into conditional functions rather than #ifdefs
to improve readability.

[ tglx: Verified that there is no binary code change ]

Signed-off-by: Nicholas Mc Guire <hofrat@osadl.org>
Cc: Masahiro Yamada <yamada.m@jp.panasonic.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Joe Perches <joe@perches.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Andrew Hunter <ahh@google.com>
Cc: Paul Turner <pjt@google.com>
Cc: Michal Marek <mmarek@suse.cz>
Link: http://lkml.kernel.org/r/1431951554-5563-2-git-send-email-hofrat@osadl.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/jiffies.h | 64 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 63 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index c367cbdf73ab..9527ddbb0f1b 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -7,6 +7,7 @@
 #include <linux/time.h>
 #include <linux/timex.h>
 #include <asm/param.h>			/* for HZ */
+#include <generated/timeconst.h>
 
 /*
  * The following defines establish the engineering parameters of the PLL
@@ -288,7 +289,68 @@ static inline u64 jiffies_to_nsecs(const unsigned long j)
 	return (u64)jiffies_to_usecs(j) * NSEC_PER_USEC;
 }
 
-extern unsigned long msecs_to_jiffies(const unsigned int m);
+extern unsigned long __msecs_to_jiffies(const unsigned int m);
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+/*
+ * HZ is equal to or smaller than 1000, and 1000 is a nice round
+ * multiple of HZ, divide with the factor between them, but round
+ * upwards:
+ */
+static inline unsigned long _msecs_to_jiffies(const unsigned int m)
+{
+		return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
+}
+#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
+/*
+ * HZ is larger than 1000, and HZ is a nice round multiple of 1000 -
+ * simply multiply with the factor between them.
+ *
+ * But first make sure the multiplication result cannot overflow:
+ */
+static inline unsigned long _msecs_to_jiffies(const unsigned int m)
+{
+		if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
+			return MAX_JIFFY_OFFSET;
+		return m * (HZ / MSEC_PER_SEC);
+}
+#else
+/*
+ * Generic case - multiply, round and divide. But first check that if
+ * we are doing a net multiplication, that we wouldn't overflow:
+ */
+static inline unsigned long _msecs_to_jiffies(const unsigned int m)
+{
+		if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
+			return MAX_JIFFY_OFFSET;
+
+		return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32)
+			>> MSEC_TO_HZ_SHR32;
+}
+#endif
+/**
+ * msecs_to_jiffies: - convert milliseconds to jiffies
+ * @m:	time in milliseconds
+ *
+ * conversion is done as follows:
+ *
+ * - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET)
+ *
+ * - 'too large' values [that would result in larger than
+ *   MAX_JIFFY_OFFSET values] mean 'infinite timeout' too.
+ *
+ * - all other values are converted to jiffies by either multiplying
+ *   the input value by a factor or dividing it with a factor and
+ *   handling any 32-bit overflows.
+ *   for the details see __msecs_to_jiffies()
+ *
+ * the HZ range specific helpers _msecs_to_jiffies() are called from
+ * __msecs_to_jiffies().
+ */
+static inline unsigned long msecs_to_jiffies(const unsigned int m)
+{
+	return __msecs_to_jiffies(m);
+}
+
 extern unsigned long usecs_to_jiffies(const unsigned int u);
 extern unsigned long timespec_to_jiffies(const struct timespec *value);
 extern void jiffies_to_timespec(const unsigned long jiffies,
-- 
cgit v1.2.3


From daa67b4b70568a07fef3cffacb2055891bf42ddb Mon Sep 17 00:00:00 2001
From: Nicholas Mc Guire <hofrat@osadl.org>
Date: Mon, 18 May 2015 14:19:14 +0200
Subject: time: Allow gcc to fold constants when possible

To allow constant folding in msecs_to_jiffies() conditionally calls
the HZ dependent _msecs_to_jiffies() helpers or, when gcc can not
figure out constant folding, __msecs_to_jiffies which is the renamed
original msecs_to_jiffies() function.

Signed-off-by: Nicholas Mc Guire <hofrat@osadl.org>
Cc: Masahiro Yamada <yamada.m@jp.panasonic.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Joe Perches <joe@perches.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Andrew Hunter <ahh@google.com>
Cc: Paul Turner <pjt@google.com>
Cc: Michal Marek <mmarek@suse.cz>
Link: http://lkml.kernel.org/r/1431951554-5563-3-git-send-email-hofrat@osadl.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/jiffies.h | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 9527ddbb0f1b..5e75af6cf1bc 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -343,12 +343,24 @@ static inline unsigned long _msecs_to_jiffies(const unsigned int m)
  *   handling any 32-bit overflows.
  *   for the details see __msecs_to_jiffies()
  *
- * the HZ range specific helpers _msecs_to_jiffies() are called from
- * __msecs_to_jiffies().
+ * msecs_to_jiffies() checks for the passed in value being a constant
+ * via __builtin_constant_p() allowing gcc to eliminate most of the
+ * code, __msecs_to_jiffies() is called if the value passed does not
+ * allow constant folding and the actual conversion must be done at
+ * runtime.
+ * the HZ range specific helpers _msecs_to_jiffies() are called both
+ * directly here and from __msecs_to_jiffies() in the case where
+ * constant folding is not possible.
  */
 static inline unsigned long msecs_to_jiffies(const unsigned int m)
 {
-	return __msecs_to_jiffies(m);
+	if (__builtin_constant_p(m)) {
+		if ((int)m < 0)
+			return MAX_JIFFY_OFFSET;
+		return _msecs_to_jiffies(m);
+	} else {
+		return __msecs_to_jiffies(m);
+	}
 }
 
 extern unsigned long usecs_to_jiffies(const unsigned int u);
-- 
cgit v1.2.3


From 0a4377de305684c883bf90ad21e3cbdeead70f5c Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Tue, 19 May 2015 17:07:14 +0800
Subject: genirq: Introduce irq_set_vcpu_affinity() to target an interrupt to a
 VCPU

With Posted-Interrupts support in Intel CPU and IOMMU, an external
interrupt from assigned-devices could be directly delivered to a
virtual CPU in a virtual machine. Instead of hacking KVM and Intel
IOMMU drivers, we propose a platform independent interface to target
an interrupt to a specific virtual CPU in a virtual machine, or set
virtual CPU affinity for an interrupt.

By adopting this new interface and the hierarchy irqdomain, we could
easily support posted-interrupts on Intel platforms, and also provide
flexible enough interfaces for other platforms to support similar
features.

Here is the usage scenario for this interface:
Guest update MSI/MSI-X interrupt configuration
        -->QEMU and KVM handle this
        -->KVM call this interface (passing posted interrupts descriptor
           and guest vector)
        -->irq core will transfer the control to IOMMU
        -->IOMMU will do the real work of updating IRTE (IRTE has new
           format for VT-d Posted-Interrupts)

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Signed-off-by: Feng Wu <feng.wu@intel.com>
Link: http://lkml.kernel.org/r/1432026437-16560-2-git-send-email-feng.wu@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 62c6901cab55..48cb7d1aa58f 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -327,6 +327,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
  * @irq_write_msi_msg:	optional to write message content for MSI
  * @irq_get_irqchip_state:	return the internal state of an interrupt
  * @irq_set_irqchip_state:	set the internal state of a interrupt
+ * @irq_set_vcpu_affinity:	optional to target a vCPU in a virtual machine
  * @flags:		chip specific flags
  */
 struct irq_chip {
@@ -369,6 +370,8 @@ struct irq_chip {
 	int		(*irq_get_irqchip_state)(struct irq_data *data, enum irqchip_irq_state which, bool *state);
 	int		(*irq_set_irqchip_state)(struct irq_data *data, enum irqchip_irq_state which, bool state);
 
+	int		(*irq_set_vcpu_affinity)(struct irq_data *data, void *vcpu_info);
+
 	unsigned long	flags;
 };
 
@@ -422,6 +425,7 @@ extern void irq_cpu_online(void);
 extern void irq_cpu_offline(void);
 extern int irq_set_affinity_locked(struct irq_data *data,
 				   const struct cpumask *cpumask, bool force);
+extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info);
 
 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
 void irq_move_irq(struct irq_data *data);
@@ -467,6 +471,8 @@ extern int irq_chip_set_affinity_parent(struct irq_data *data,
 					const struct cpumask *dest,
 					bool force);
 extern int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on);
+extern int irq_chip_set_vcpu_affinity_parent(struct irq_data *data,
+					     void *vcpu_info);
 #endif
 
 /* Handling of unhandled and spurious interrupts: */
-- 
cgit v1.2.3


From 8fff52fd50934580c5108afed12043a774edf728 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 3 Apr 2015 09:04:04 +0530
Subject: clockevents: Introduce CLOCK_EVT_STATE_ONESHOT_STOPPED state

When no timers/hrtimers are pending, the expiry time is set to a
special value: 'KTIME_MAX'. This normally happens with
NO_HZ_{IDLE|FULL} in both LOWRES/HIGHRES modes.

When 'expiry == KTIME_MAX', we either cancel the 'tick-sched' hrtimer
(NOHZ_MODE_HIGHRES) or skip reprogramming clockevent device
(NOHZ_MODE_LOWRES).  But, the clockevent device is already
reprogrammed from the tick-handler for next tick.

As the clock event device is programmed in ONESHOT mode it will at
least fire one more time (unnecessarily). Timers on few
implementations (like arm_arch_timer, etc.) only support PERIODIC mode
and their drivers emulate ONESHOT over that. Which means that on these
platforms we will get spurious interrupts periodically (at last
programmed interval rate, normally tick rate).

In order to avoid spurious interrupts, the clockevent device should be
stopped or its interrupts should be masked.

A simple (yet hacky) solution to get this fixed could be: update
hrtimer_force_reprogram() to always reprogram clockevent device and
update clockevent drivers to STOP generating events (or delay it to
max time) when 'expires' is set to KTIME_MAX. But the drawback here is
that every clockevent driver has to be hacked for this particular case
and its very easy for new ones to miss this.

However, Thomas suggested to add an optional state ONESHOT_STOPPED to
solve this problem: lkml.org/lkml/2014/5/9/508.

This patch adds support for ONESHOT_STOPPED state in clockevents
core. It will only be available to drivers that implement the
state-specific callbacks instead of the legacy ->set_mode() callback.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Preeti U. Murthy <preeti@linux.vnet.ibm.com>
Cc: linaro-kernel@lists.linaro.org
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Kevin Hilman <khilman@linaro.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/b8b383a03ac07b13312c16850b5106b82e4245b5.1428031396.git.viresh.kumar@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clockchips.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 96c280b2c263..271fa4c8eb29 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -37,12 +37,15 @@ enum clock_event_mode {
  *		reached from DETACHED or SHUTDOWN.
  * ONESHOT:	Device is programmed to generate event only once. Can be reached
  *		from DETACHED or SHUTDOWN.
+ * ONESHOT_STOPPED: Device was programmed in ONESHOT mode and is temporarily
+ *		    stopped.
  */
 enum clock_event_state {
 	CLOCK_EVT_STATE_DETACHED,
 	CLOCK_EVT_STATE_SHUTDOWN,
 	CLOCK_EVT_STATE_PERIODIC,
 	CLOCK_EVT_STATE_ONESHOT,
+	CLOCK_EVT_STATE_ONESHOT_STOPPED,
 };
 
 /*
@@ -90,6 +93,7 @@ enum clock_event_state {
  * @set_mode:		legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME.
  * @set_state_periodic:	switch state to periodic, if !set_mode
  * @set_state_oneshot:	switch state to oneshot, if !set_mode
+ * @set_state_oneshot_stopped: switch state to oneshot_stopped, if !set_mode
  * @set_state_shutdown:	switch state to shutdown, if !set_mode
  * @tick_resume:	resume clkevt device, if !set_mode
  * @broadcast:		function to broadcast events
@@ -121,11 +125,12 @@ struct clock_event_device {
 	 * State transition callback(s): Only one of the two groups should be
 	 * defined:
 	 * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME.
-	 * - set_state_{shutdown|periodic|oneshot}(), tick_resume().
+	 * - set_state_{shutdown|periodic|oneshot|oneshot_stopped}(), tick_resume().
 	 */
 	void			(*set_mode)(enum clock_event_mode mode, struct clock_event_device *);
 	int			(*set_state_periodic)(struct clock_event_device *);
 	int			(*set_state_oneshot)(struct clock_event_device *);
+	int			(*set_state_oneshot_stopped)(struct clock_event_device *);
 	int			(*set_state_shutdown)(struct clock_event_device *);
 	int			(*tick_resume)(struct clock_event_device *);
 
-- 
cgit v1.2.3


From 4ecd4fef3a074c8bb43c391a57742c422469ebbd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 7 May 2015 09:38:13 +0200
Subject: block: use an atomic_t for mq_freeze_depth

lockdep gets unhappy about the not disabling irqs when using the queue_lock
around it.  Instead of trying to fix that up just switch to an atomic_t
and get rid of the lock.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2da818a48097..bc917956a6d0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -444,7 +444,7 @@ struct request_queue {
 	struct mutex		sysfs_lock;
 
 	int			bypass_depth;
-	int			mq_freeze_depth;
+	atomic_t		mq_freeze_depth;
 
 #if defined(CONFIG_BLK_DEV_BSG)
 	bsg_job_fn		*bsg_job_fn;
-- 
cgit v1.2.3


From b25de9d6da49b1a8760a89672283128aa8c78345 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 24 Apr 2015 21:41:01 +0200
Subject: block: remove BIO_EOPNOTSUPP

Since the big barrier rewrite/removal in 2007 we never fail FLUSH or
FUA requests, which means we can remove the magic BIO_EOPNOTSUPP flag
to help propagating those to the buffer_head layer.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 93d2e7153816..daf95915d104 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -118,7 +118,6 @@ struct bio {
 #define BIO_CLONED	4	/* doesn't own data */
 #define BIO_BOUNCED	5	/* bio is a bounce bio */
 #define BIO_USER_MAPPED 6	/* contains user pages */
-#define BIO_EOPNOTSUPP	7	/* not supported */
 #define BIO_NULL_MAPPED 8	/* contains invalid user pages */
 #define BIO_QUIET	9	/* Make BIO Quiet */
 #define BIO_SNAP_STABLE	10	/* bio data must be snapshotted during write */
-- 
cgit v1.2.3


From 97ca223c3b37ed12a5b67a5dc6247e5a4799d337 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 24 Apr 2015 21:41:02 +0200
Subject: block: remove unused BIO_RW_BLOCK and BIO_EOF flags

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index daf95915d104..09c7a2cd48ef 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -112,8 +112,6 @@ struct bio {
  * bio flags
  */
 #define BIO_UPTODATE	0	/* ok after I/O completion */
-#define BIO_RW_BLOCK	1	/* RW_AHEAD set, and read/write would block */
-#define BIO_EOF		2	/* out-out-bounds error */
 #define BIO_SEG_VALID	3	/* bi_phys_segments valid */
 #define BIO_CLONED	4	/* doesn't own data */
 #define BIO_BOUNCED	5	/* bio is a bounce bio */
-- 
cgit v1.2.3


From 4e3d9cb0134fea035e6eb1707e5e7d8aaffa186d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 19 May 2015 17:14:51 +0200
Subject: jiffies: Remove the extra indentation level

Somehow I missed to clean that up when applying the patches. Fix it up
now.

Reported-by: Joe Perches <joe@perches.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Nicholas Mc Guire <der.herr@hofr.at>
---
 include/linux/jiffies.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 5e75af6cf1bc..3bde5eb8568b 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -298,7 +298,7 @@ extern unsigned long __msecs_to_jiffies(const unsigned int m);
  */
 static inline unsigned long _msecs_to_jiffies(const unsigned int m)
 {
-		return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
+	return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
 }
 #elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
 /*
@@ -309,9 +309,9 @@ static inline unsigned long _msecs_to_jiffies(const unsigned int m)
  */
 static inline unsigned long _msecs_to_jiffies(const unsigned int m)
 {
-		if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
-			return MAX_JIFFY_OFFSET;
-		return m * (HZ / MSEC_PER_SEC);
+	if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
+		return MAX_JIFFY_OFFSET;
+	return m * (HZ / MSEC_PER_SEC);
 }
 #else
 /*
@@ -320,11 +320,10 @@ static inline unsigned long _msecs_to_jiffies(const unsigned int m)
  */
 static inline unsigned long _msecs_to_jiffies(const unsigned int m)
 {
-		if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
-			return MAX_JIFFY_OFFSET;
+	if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
+		return MAX_JIFFY_OFFSET;
 
-		return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32)
-			>> MSEC_TO_HZ_SHR32;
+	return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32) >> MSEC_TO_HZ_SHR32;
 }
 #endif
 /**
-- 
cgit v1.2.3


From b2dbe0a60f1bcf4db5c701f1577b3135c3159eb5 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 19 May 2015 09:18:28 -0600
Subject: block: collapse bio bit space

Various previous patches removed bits and left holes, collapse them
all. Leave the reset start bit where it is, we don't need to change
that.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 09c7a2cd48ef..3f4ded0b1a34 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -112,15 +112,15 @@ struct bio {
  * bio flags
  */
 #define BIO_UPTODATE	0	/* ok after I/O completion */
-#define BIO_SEG_VALID	3	/* bi_phys_segments valid */
-#define BIO_CLONED	4	/* doesn't own data */
-#define BIO_BOUNCED	5	/* bio is a bounce bio */
-#define BIO_USER_MAPPED 6	/* contains user pages */
-#define BIO_NULL_MAPPED 8	/* contains invalid user pages */
-#define BIO_QUIET	9	/* Make BIO Quiet */
-#define BIO_SNAP_STABLE	10	/* bio data must be snapshotted during write */
-#define BIO_CHAIN	11	/* chained bio, ->bi_remaining in effect */
-#define BIO_REFFED	12	/* bio has elevated ->bi_cnt */
+#define BIO_SEG_VALID	1	/* bi_phys_segments valid */
+#define BIO_CLONED	2	/* doesn't own data */
+#define BIO_BOUNCED	3	/* bio is a bounce bio */
+#define BIO_USER_MAPPED 4	/* contains user pages */
+#define BIO_NULL_MAPPED 5	/* contains invalid user pages */
+#define BIO_QUIET	6	/* Make BIO Quiet */
+#define BIO_SNAP_STABLE	7	/* bio data must be snapshotted during write */
+#define BIO_CHAIN	8	/* chained bio, ->bi_remaining in effect */
+#define BIO_REFFED	9	/* bio has elevated ->bi_cnt */
 
 /*
  * Flags starting here get preserved by bio_reset() - this includes
-- 
cgit v1.2.3


From 343df3c79c62b644ce6ff5dff96c9e0be1ecb242 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 May 2015 09:23:23 +0200
Subject: suspend: simplify block I/O handling

Stop abusing struct page functionality and the swap end_io handler, and
instead add a modified version of the blk-lib.c bio_batch helpers.

Also move the block I/O code into swap.c as they are directly tied into
each other.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Pavel Machek <pavel@ucw.cz>
Tested-by: Ming Lin <mlin@kernel.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Rafael J. Wysocki <rjw@rjwysocki.net>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/swap.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index cee108cbe2d5..38874729dc5f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -377,7 +377,6 @@ extern void end_swap_bio_write(struct bio *bio, int err);
 extern int __swap_writepage(struct page *page, struct writeback_control *wbc,
 	void (*end_write_func)(struct bio *, int));
 extern int swap_set_page_dirty(struct page *page);
-extern void end_swap_bio_read(struct bio *bio, int err);
 
 int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
 		unsigned long nr_pages, sector_t start_block);
-- 
cgit v1.2.3


From 3520469d65f26a1cd2f610f5d5de976f78db74fe Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 2 Apr 2015 11:20:48 +0200
Subject: KVM: export __gfn_to_pfn_memslot, drop gfn_to_pfn_async

gfn_to_pfn_async is used in just one place, and because of x86-specific
treatment that place will need to look at the memory slot.  Hence inline
it into try_async_pf and export __gfn_to_pfn_memslot.

The patch also switches the subsequent call to gfn_to_pfn_prot to use
__gfn_to_pfn_memslot.  This is a small optimization.  Finally, remove
the now-unused async argument of __gfn_to_pfn.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b7a08cd6f4a8..87fd74a04005 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -539,13 +539,13 @@ void kvm_release_page_dirty(struct page *page);
 void kvm_set_page_accessed(struct page *page);
 
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
-pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
-		       bool write_fault, bool *writable);
 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
 pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 		      bool *writable);
 pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
 pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);
+pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic,
+			   bool *async, bool write_fault, bool *writable);
 
 void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
-- 
cgit v1.2.3


From cad706df7e4a00a595f2662f32c0fc174aa4e61f Mon Sep 17 00:00:00 2001
From: Miroslav Benes <mbenes@suse.cz>
Date: Tue, 19 May 2015 12:01:18 +0200
Subject: livepatch: make kobject in klp_object statically allocated

Make kobj variable (of type struct kobject) statically allocated in
klp_object structure. It will allow us to move in the func-object-patch
hierarchy through kobject links.

The only reason to have it dynamic was to not have empty release
callback in the code. However we have empty callbacks for function and
patch in the code now, so it is no longer valid and the advantage of
static allocation is clear.

Signed-off-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/livepatch.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index ee6dbb39a809..fe45f2f02c8d 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -99,7 +99,7 @@ struct klp_object {
 	struct klp_func *funcs;
 
 	/* internal */
-	struct kobject *kobj;
+	struct kobject kobj;
 	struct module *mod;
 	enum klp_state state;
 };
-- 
cgit v1.2.3


From 8cdd043ab32c2ff28d2a77c514a768a9edce244c Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Tue, 19 May 2015 12:01:19 +0200
Subject: livepatch: introduce patch/func-walking helpers

klp_for_each_object and klp_for_each_func are now used all over the
code. One need not think what is the proper condition to check in the
for loop now.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/livepatch.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index fe45f2f02c8d..31db7a05dd36 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -123,6 +123,12 @@ struct klp_patch {
 	enum klp_state state;
 };
 
+#define klp_for_each_object(patch, obj) \
+	for (obj = patch->objs; obj->funcs; obj++)
+
+#define klp_for_each_func(obj, func) \
+	for (func = obj->funcs; func->old_name; func++)
+
 int klp_register_patch(struct klp_patch *);
 int klp_unregister_patch(struct klp_patch *);
 int klp_enable_patch(struct klp_patch *);
-- 
cgit v1.2.3


From 4990d4fe327b9d9a7a3be7103a82699406fdde69 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 18 May 2015 15:40:29 -0700
Subject: PM / Wakeirq: Add automated device wake IRQ handling

Turns out we can automate the handling for the device_may_wakeup()
quite a bit by using the kernel wakeup source list as suggested
by Rafael J. Wysocki <rjw@rjwysocki.net>.

And as some hardware has separate dedicated wake-up interrupt
in addition to the IO interrupt, we can automate the handling by
adding a generic threaded interrupt handler that just calls the
device PM runtime to wake up the device.

This allows dropping code from device drivers as we currently
are doing it in multiple ways, and often wrong.

For most drivers, we should be able to drop the following
boilerplate code from runtime_suspend and runtime_resume
functions:

	...
	device_init_wakeup(dev, true);
	...
	if (device_may_wakeup(dev))
		enable_irq_wake(irq);
	...
	if (device_may_wakeup(dev))
		disable_irq_wake(irq);
	...
	device_init_wakeup(dev, false);
	...

We can replace it with just the following init and exit
time code:

	...
	device_init_wakeup(dev, true);
	dev_pm_set_wake_irq(dev, irq);
	...
	dev_pm_clear_wake_irq(dev);
	device_init_wakeup(dev, false);
	...

And for hardware with dedicated wake-up interrupts:

	...
	device_init_wakeup(dev, true);
	dev_pm_set_dedicated_wake_irq(dev, irq);
	...
	dev_pm_clear_wake_irq(dev);
	device_init_wakeup(dev, false);
	...

Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm.h         |  2 ++
 include/linux/pm_wakeirq.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pm_wakeup.h  |  9 ++++++++
 3 files changed, 63 insertions(+)
 create mode 100644 include/linux/pm_wakeirq.h

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 2d29c64f8fb1..1c4ed0cb7907 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -529,6 +529,7 @@ enum rpm_request {
 };
 
 struct wakeup_source;
+struct wake_irq;
 struct pm_domain_data;
 
 struct pm_subsys_data {
@@ -568,6 +569,7 @@ struct dev_pm_info {
 	unsigned long		timer_expires;
 	struct work_struct	work;
 	wait_queue_head_t	wait_queue;
+	struct wake_irq		*wakeirq;
 	atomic_t		usage_count;
 	atomic_t		child_count;
 	unsigned int		disable_depth:3;
diff --git a/include/linux/pm_wakeirq.h b/include/linux/pm_wakeirq.h
new file mode 100644
index 000000000000..4046fa1b7d25
--- /dev/null
+++ b/include/linux/pm_wakeirq.h
@@ -0,0 +1,52 @@
+/*
+ * pm_wakeirq.h - Device wakeirq helper functions
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _LINUX_PM_WAKEIRQ_H
+#define _LINUX_PM_WAKEIRQ_H
+
+#ifdef CONFIG_PM
+
+extern int dev_pm_set_wake_irq(struct device *dev, int irq);
+extern int dev_pm_set_dedicated_wake_irq(struct device *dev,
+					 int irq);
+extern void dev_pm_clear_wake_irq(struct device *dev);
+extern void dev_pm_enable_wake_irq(struct device *dev);
+extern void dev_pm_disable_wake_irq(struct device *dev);
+
+#else	/* !CONFIG_PM */
+
+static inline int dev_pm_set_wake_irq(struct device *dev, int irq)
+{
+	return 0;
+}
+
+static inline int dev_pm_set_dedicated__wake_irq(struct device *dev,
+						 int irq)
+{
+	return 0;
+}
+
+static inline void dev_pm_clear_wake_irq(struct device *dev)
+{
+}
+
+static inline void dev_pm_enable_wake_irq(struct device *dev)
+{
+}
+
+static inline void dev_pm_disable_wake_irq(struct device *dev)
+{
+}
+
+#endif	/* CONFIG_PM */
+#endif	/* _LINUX_PM_WAKEIRQ_H */
diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index a0f70808d7f4..a3447932df1f 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -28,9 +28,17 @@
 
 #include <linux/types.h>
 
+struct wake_irq;
+
 /**
  * struct wakeup_source - Representation of wakeup sources
  *
+ * @name: Name of the wakeup source
+ * @entry: Wakeup source list entry
+ * @lock: Wakeup source lock
+ * @wakeirq: Optional device specific wakeirq
+ * @timer: Wakeup timer list
+ * @timer_expires: Wakeup timer expiration
  * @total_time: Total time this wakeup source has been active.
  * @max_time: Maximum time this wakeup source has been continuously active.
  * @last_time: Monotonic clock when the wakeup source's was touched last time.
@@ -47,6 +55,7 @@ struct wakeup_source {
 	const char 		*name;
 	struct list_head	entry;
 	spinlock_t		lock;
+	struct wake_irq		*wakeirq;
 	struct timer_list	timer;
 	unsigned long		timer_expires;
 	ktime_t total_time;
-- 
cgit v1.2.3


From ecc8617053e0a97272ef2eee138809f30080e84b Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@suse.com>
Date: Mon, 30 Mar 2015 16:20:03 -0700
Subject: module: add extra argument for parse_params() callback

This adds an extra argument onto parse_params() to be used
as a way to make the unused callback a bit more useful and
generic by allowing the caller to pass on a data structure
of its choice. An example use case is to allow us to easily
make module parameters for every module which we will do
next.

@ parse @
identifier name, args, params, num, level_min, level_max;
identifier unknown, param, val, doing;
type s16;
@@
 extern char *parse_args(const char *name,
 			 char *args,
 			 const struct kernel_param *params,
 			 unsigned num,
 			 s16 level_min,
 			 s16 level_max,
+			 void *arg,
 			 int (*unknown)(char *param, char *val,
					const char *doing
+					, void *arg
					));

@ parse_mod @
identifier name, args, params, num, level_min, level_max;
identifier unknown, param, val, doing;
type s16;
@@
 char *parse_args(const char *name,
 			 char *args,
 			 const struct kernel_param *params,
 			 unsigned num,
 			 s16 level_min,
 			 s16 level_max,
+			 void *arg,
 			 int (*unknown)(char *param, char *val,
					const char *doing
+					, void *arg
					))
{
	...
}

@ parse_args_found @
expression R, E1, E2, E3, E4, E5, E6;
identifier func;
@@

(
	R =
	parse_args(E1, E2, E3, E4, E5, E6,
+		   NULL,
		   func);
|
	R =
	parse_args(E1, E2, E3, E4, E5, E6,
+		   NULL,
		   &func);
|
	R =
	parse_args(E1, E2, E3, E4, E5, E6,
+		   NULL,
		   NULL);
|
	parse_args(E1, E2, E3, E4, E5, E6,
+		   NULL,
		   func);
|
	parse_args(E1, E2, E3, E4, E5, E6,
+		   NULL,
		   &func);
|
	parse_args(E1, E2, E3, E4, E5, E6,
+		   NULL,
		   NULL);
)

@ parse_args_unused depends on parse_args_found @
identifier parse_args_found.func;
@@

int func(char *param, char *val, const char *unused
+		 , void *arg
		 )
{
	...
}

@ mod_unused depends on parse_args_found @
identifier parse_args_found.func;
expression A1, A2, A3;
@@

-	func(A1, A2, A3);
+	func(A1, A2, A3, NULL);

Generated-by: Coccinelle SmPL
Cc: cocci@systeme.lip6.fr
Cc: Tejun Heo <tj@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Felipe Contreras <felipe.contreras@gmail.com>
Cc: Ewan Milne <emilne@redhat.com>
Cc: Jean Delvare <jdelvare@suse.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Tejun Heo <tj@kernel.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/moduleparam.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 1c9effa25e26..13923709d30d 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -357,8 +357,9 @@ extern char *parse_args(const char *name,
 		      unsigned num,
 		      s16 level_min,
 		      s16 level_max,
+		      void *arg,
 		      int (*unknown)(char *param, char *val,
-			      const char *doing));
+				     const char *doing, void *arg));
 
 /* Called by module remove. */
 #ifdef CONFIG_SYSFS
-- 
cgit v1.2.3


From 765230b5f084863183aa8adb3405ab3f32c0b16e Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 30 Mar 2015 16:20:04 -0700
Subject: driver-core: add asynchronous probing support for drivers

Some devices take a long time when initializing, and not all drivers are
suited to initialize their devices when they are open. For example,
input drivers need to interrogate their devices in order to publish
device's capabilities before userspace will open them. When such drivers
are compiled into kernel they may stall entire kernel initialization.

This change allows drivers request for their probe functions to be
called asynchronously during driver and device registration (manual
binding is still synchronous). Because async_schedule is used to perform
asynchronous calls module loading will still wait for the probing to
complete.

Note that the end goal is to make the probing asynchronous by default,
so annotating drivers with PROBE_PREFER_ASYNCHRONOUS is a temporary
measure that allows us to speed up boot process while we validating and
fixing the rest of the drivers and preparing userspace.

This change is based on earlier patch by "Luis R. Rodriguez"
<mcgrof@suse.com>

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 6558af90c8fe..7857b46c548b 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -195,6 +195,31 @@ extern int bus_unregister_notifier(struct bus_type *bus,
 extern struct kset *bus_get_kset(struct bus_type *bus);
 extern struct klist *bus_get_device_klist(struct bus_type *bus);
 
+/**
+ * enum probe_type - device driver probe type to try
+ *	Device drivers may opt in for special handling of their
+ *	respective probe routines. This tells the core what to
+ *	expect and prefer.
+ *
+ * @PROBE_SYNCHRONOUS: Default. Drivers expect their probe routines
+ *	to run synchronously with driver and device registration
+ *	(with the exception of -EPROBE_DEFER handling - re-probing
+ *	always ends up being done asynchronously).
+ * @PROBE_PREFER_ASYNCHRONOUS: Drivers for "slow" devices which
+ *	probing order is not essential for booting the system may
+ *	opt into executing their probes asynchronously.
+ *
+ * Note that the end goal is to switch the kernel to use asynchronous
+ * probing by default, so annotating drivers with
+ * %PROBE_PREFER_ASYNCHRONOUS is a temporary measure that allows us
+ * to speed up boot process while we are validating the rest of the
+ * drivers.
+ */
+enum probe_type {
+	PROBE_SYNCHRONOUS,
+	PROBE_PREFER_ASYNCHRONOUS,
+};
+
 /**
  * struct device_driver - The basic device driver structure
  * @name:	Name of the device driver.
@@ -202,6 +227,7 @@ extern struct klist *bus_get_device_klist(struct bus_type *bus);
  * @owner:	The module owner.
  * @mod_name:	Used for built-in modules.
  * @suppress_bind_attrs: Disables bind/unbind via sysfs.
+ * @probe_type:	Type of the probe (synchronous or asynchronous) to use.
  * @of_match_table: The open firmware table.
  * @acpi_match_table: The ACPI match table.
  * @probe:	Called to query the existence of a specific device,
@@ -235,6 +261,7 @@ struct device_driver {
 	const char		*mod_name;	/* used for built-in modules */
 
 	bool suppress_bind_attrs;	/* disables bind/unbind via sysfs */
+	enum probe_type probe_type;
 
 	const struct of_device_id	*of_match_table;
 	const struct acpi_device_id	*acpi_match_table;
@@ -975,6 +1002,7 @@ extern int __must_check device_bind_driver(struct device *dev);
 extern void device_release_driver(struct device *dev);
 extern int  __must_check device_attach(struct device *dev);
 extern int __must_check driver_attach(struct device_driver *drv);
+extern void device_initial_probe(struct device *dev);
 extern int __must_check device_reprobe(struct device *dev);
 
 /*
-- 
cgit v1.2.3


From f2411da746985e60d4d087f3a43e271c61785927 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@suse.com>
Date: Mon, 30 Mar 2015 16:20:05 -0700
Subject: driver-core: add driver module asynchronous probe support

Some init systems may wish to express the desire to have device drivers
run their probe() code asynchronously. This implements support for this
and allows userspace to request async probe as a preference through a
generic shared device driver module parameter, async_probe.

Implementation for async probe is supported through a module parameter
given that since synchronous probe has been prevalent for years some
userspace might exist which relies on the fact that the device driver
will probe synchronously and the assumption that devices it provides
will be immediately available after this.

Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 8 +++++---
 include/linux/module.h | 2 ++
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 7857b46c548b..77b7cd9e5467 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -201,10 +201,12 @@ extern struct klist *bus_get_device_klist(struct bus_type *bus);
  *	respective probe routines. This tells the core what to
  *	expect and prefer.
  *
- * @PROBE_SYNCHRONOUS: Default. Drivers expect their probe routines
+ * @PROBE_DEFAULT_STRATEGY: Drivers expect their probe routines
  *	to run synchronously with driver and device registration
  *	(with the exception of -EPROBE_DEFER handling - re-probing
- *	always ends up being done asynchronously).
+ *	always ends up being done asynchronously) unless user
+ *	explicitly requested asynchronous probing via module
+ *	parameter.
  * @PROBE_PREFER_ASYNCHRONOUS: Drivers for "slow" devices which
  *	probing order is not essential for booting the system may
  *	opt into executing their probes asynchronously.
@@ -216,7 +218,7 @@ extern struct klist *bus_get_device_klist(struct bus_type *bus);
  * drivers.
  */
 enum probe_type {
-	PROBE_SYNCHRONOUS,
+	PROBE_DEFAULT_STRATEGY,
 	PROBE_PREFER_ASYNCHRONOUS,
 };
 
diff --git a/include/linux/module.h b/include/linux/module.h
index c883b86ea964..f46a47d3c0dc 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -257,6 +257,8 @@ struct module {
 	bool sig_ok;
 #endif
 
+	bool async_probe_requested;
+
 	/* symbols that will be GPL-only in the near future. */
 	const struct kernel_symbol *gpl_future_syms;
 	const unsigned long *gpl_future_crcs;
-- 
cgit v1.2.3


From d173a137c5bd95ee29d02705e5fa8890ef149718 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@suse.com>
Date: Mon, 30 Mar 2015 16:20:06 -0700
Subject: driver-core: enable drivers to opt-out of async probe

There are drivers that can not be probed asynchronously. One such group
is platform drivers registered with platform_driver_probe(), which
expects driver's probe routine be discarded after the driver has been
registered and initial binding attempt executed. Also
platform_driver_probe() an error when no devices were bound to the
driver, allowing failing to load such driver module altogether.

Other drivers do not work well with asynchronous probing because of
driver bug or not optimal driver organization.

To allow using such drivers even when user requests asynchronous probing
as default boot strategy, let's allow them to opt out.

Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 77b7cd9e5467..00ac57c26615 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -201,15 +201,15 @@ extern struct klist *bus_get_device_klist(struct bus_type *bus);
  *	respective probe routines. This tells the core what to
  *	expect and prefer.
  *
- * @PROBE_DEFAULT_STRATEGY: Drivers expect their probe routines
- *	to run synchronously with driver and device registration
- *	(with the exception of -EPROBE_DEFER handling - re-probing
- *	always ends up being done asynchronously) unless user
- *	explicitly requested asynchronous probing via module
- *	parameter.
+ * @PROBE_DEFAULT_STRATEGY: Used by drivers that work equally well
+ *	whether probed synchronously or asynchronously.
  * @PROBE_PREFER_ASYNCHRONOUS: Drivers for "slow" devices which
  *	probing order is not essential for booting the system may
  *	opt into executing their probes asynchronously.
+ * @PROBE_FORCE_SYNCHRONOUS: Use this to annotate drivers that need
+ *	their probe routines to run synchronously with driver and
+ *	device registration (with the exception of -EPROBE_DEFER
+ *	handling - re-probing always ends up being done asynchronously).
  *
  * Note that the end goal is to switch the kernel to use asynchronous
  * probing by default, so annotating drivers with
@@ -220,6 +220,7 @@ extern struct klist *bus_get_device_klist(struct bus_type *bus);
 enum probe_type {
 	PROBE_DEFAULT_STRATEGY,
 	PROBE_PREFER_ASYNCHRONOUS,
+	PROBE_FORCE_SYNCHRONOUS,
 };
 
 /**
-- 
cgit v1.2.3


From ec0ccc16a09fc32f7142ef3ddf1c2276fbbb35d0 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 30 Mar 2015 16:20:09 -0700
Subject: module: add core_param_unsafe

Similarly to module_param_unsafe(), add the helper to be used by core
code wishing to expose unsafe debugging or testing parameters that taint
the kernel when set.

Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/moduleparam.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 13923709d30d..6480dcaca275 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -310,6 +310,15 @@ static inline void __kernel_param_unlock(void)
 #define core_param(name, var, type, perm)				\
 	param_check_##type(name, &(var));				\
 	__module_param_call("", name, &param_ops_##type, &var, perm, -1, 0)
+
+/**
+ * core_param_unsafe - same as core_param but taints kernel
+ */
+#define core_param_unsafe(name, var, type, perm)		\
+	param_check_##type(name, &(var));				\
+	__module_param_call("", name, &param_ops_##type, &var, perm,	\
+			    -1, KERNEL_PARAM_FL_UNSAFE)
+
 #endif /* !MODULE */
 
 /**
-- 
cgit v1.2.3


From 985eba3aba77a997f65109e1418837b1d8b8512a Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Wed, 3 Dec 2014 16:46:14 +0100
Subject: mfd: syscon: Add Atmel MC (Memory Controller) registers definition

The at91rm9200 SoC embeds a Memory Controller block which is used to
configure several aspects of the platform:
- AHB/APB Bus behavior
- SDRAM Controller
- EBI (External Bus Interface) and SMC (Static Memory Controller) config

Those registers might be accessed by different drivers, hence we need to
define it as a syscon device.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 include/linux/mfd/syscon/atmel-mc.h | 144 ++++++++++++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)
 create mode 100644 include/linux/mfd/syscon/atmel-mc.h

(limited to 'include/linux')

diff --git a/include/linux/mfd/syscon/atmel-mc.h b/include/linux/mfd/syscon/atmel-mc.h
new file mode 100644
index 000000000000..afd9b8f1e363
--- /dev/null
+++ b/include/linux/mfd/syscon/atmel-mc.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2005 Ivan Kokshaysky
+ * Copyright (C) SAN People
+ *
+ * Memory Controllers (MC, EBI, SMC, SDRAMC, BFC) - System peripherals
+ * registers.
+ * Based on AT91RM9200 datasheet revision E.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _LINUX_MFD_SYSCON_ATMEL_MC_H_
+#define _LINUX_MFD_SYSCON_ATMEL_MC_H_
+
+/* Memory Controller */
+#define AT91_MC_RCR			0x00
+#define AT91_MC_RCB			BIT(0)
+
+#define AT91_MC_ASR			0x04
+#define AT91_MC_UNADD			BIT(0)
+#define AT91_MC_MISADD			BIT(1)
+#define AT91_MC_ABTSZ			GENMASK(9, 8)
+#define AT91_MC_ABTSZ_BYTE		(0 << 8)
+#define AT91_MC_ABTSZ_HALFWORD		(1 << 8)
+#define AT91_MC_ABTSZ_WORD		(2 << 8)
+#define AT91_MC_ABTTYP			GENMASK(11, 10)
+#define AT91_MC_ABTTYP_DATAREAD		(0 << 10)
+#define AT91_MC_ABTTYP_DATAWRITE	(1 << 10)
+#define AT91_MC_ABTTYP_FETCH		(2 << 10)
+#define AT91_MC_MST(n)			BIT(16 + (n))
+#define AT91_MC_SVMST(n)		BIT(24 + (n))
+
+#define AT91_MC_AASR			0x08
+
+#define AT91_MC_MPR			0x0c
+#define AT91_MPR_MSTP(n)		GENMASK(2 + ((x) * 4), ((x) * 4))
+
+/* External Bus Interface (EBI) registers */
+#define AT91_MC_EBI_CSA			0x60
+#define AT91_MC_EBI_CS(n)		BIT(x)
+#define AT91_MC_EBI_NUM_CS		8
+
+#define AT91_MC_EBI_CFGR		0x64
+#define AT91_MC_EBI_DBPUC		BIT(0)
+
+/* Static Memory Controller (SMC) registers */
+#define AT91_MC_SMC_CSR(n)		(0x70 + ((n) * 4))
+#define AT91_MC_SMC_NWS			GENMASK(6, 0)
+#define AT91_MC_SMC_NWS_(x)		((x) << 0)
+#define AT91_MC_SMC_WSEN		BIT(7)
+#define AT91_MC_SMC_TDF			GENMASK(11, 8)
+#define AT91_MC_SMC_TDF_(x)		((x) << 8)
+#define AT91_MC_SMC_TDF_MAX		0xf
+#define AT91_MC_SMC_BAT			BIT(12)
+#define AT91_MC_SMC_DBW			GENMASK(14, 13)
+#define AT91_MC_SMC_DBW_16		(1 << 13)
+#define AT91_MC_SMC_DBW_8		(2 << 13)
+#define AT91_MC_SMC_DPR			BIT(15)
+#define AT91_MC_SMC_ACSS		GENMASK(17, 16)
+#define AT91_MC_SMC_ACSS_(x)		((x) << 16)
+#define AT91_MC_SMC_ACSS_MAX		3
+#define AT91_MC_SMC_RWSETUP		GENMASK(26, 24)
+#define AT91_MC_SMC_RWSETUP_(x)		((x) << 24)
+#define AT91_MC_SMC_RWHOLD		GENMASK(30, 28)
+#define AT91_MC_SMC_RWHOLD_(x)		((x) << 28)
+#define AT91_MC_SMC_RWHOLDSETUP_MAX	7
+
+/* SDRAM Controller registers */
+#define AT91_MC_SDRAMC_MR		0x90
+#define AT91_MC_SDRAMC_MODE		GENMASK(3, 0)
+#define AT91_MC_SDRAMC_MODE_NORMAL	(0 << 0)
+#define AT91_MC_SDRAMC_MODE_NOP		(1 << 0)
+#define AT91_MC_SDRAMC_MODE_PRECHARGE	(2 << 0)
+#define AT91_MC_SDRAMC_MODE_LMR		(3 << 0)
+#define AT91_MC_SDRAMC_MODE_REFRESH	(4 << 0)
+#define AT91_MC_SDRAMC_DBW_16		BIT(4)
+
+#define AT91_MC_SDRAMC_TR		0x94
+#define AT91_MC_SDRAMC_COUNT		GENMASK(11, 0)
+
+#define AT91_MC_SDRAMC_CR		0x98
+#define AT91_MC_SDRAMC_NC		GENMASK(1, 0)
+#define AT91_MC_SDRAMC_NC_8		(0 << 0)
+#define AT91_MC_SDRAMC_NC_9		(1 << 0)
+#define AT91_MC_SDRAMC_NC_10		(2 << 0)
+#define AT91_MC_SDRAMC_NC_11		(3 << 0)
+#define AT91_MC_SDRAMC_NR		GENMASK(3, 2)
+#define AT91_MC_SDRAMC_NR_11		(0 << 2)
+#define AT91_MC_SDRAMC_NR_12		(1 << 2)
+#define AT91_MC_SDRAMC_NR_13		(2 << 2)
+#define AT91_MC_SDRAMC_NB		BIT(4)
+#define AT91_MC_SDRAMC_NB_2		(0 << 4)
+#define AT91_MC_SDRAMC_NB_4		(1 << 4)
+#define AT91_MC_SDRAMC_CAS		GENMASK(6, 5)
+#define AT91_MC_SDRAMC_CAS_2		(2 << 5)
+#define AT91_MC_SDRAMC_TWR		GENMASK(10,  7)
+#define AT91_MC_SDRAMC_TRC		GENMASK(14, 11)
+#define AT91_MC_SDRAMC_TRP		GENMASK(18, 15)
+#define AT91_MC_SDRAMC_TRCD		GENMASK(22, 19)
+#define AT91_MC_SDRAMC_TRAS		GENMASK(26, 23)
+#define AT91_MC_SDRAMC_TXSR		GENMASK(30, 27)
+
+#define AT91_MC_SDRAMC_SRR		0x9c
+#define AT91_MC_SDRAMC_SRCB		BIT(0)
+
+#define AT91_MC_SDRAMC_LPR		0xa0
+#define AT91_MC_SDRAMC_LPCB		BIT(0)
+
+#define AT91_MC_SDRAMC_IER		0xa4
+#define AT91_MC_SDRAMC_IDR		0xa8
+#define AT91_MC_SDRAMC_IMR		0xac
+#define AT91_MC_SDRAMC_ISR		0xb0
+#define AT91_MC_SDRAMC_RES		BIT(0)
+
+/* Burst Flash Controller register */
+#define AT91_MC_BFC_MR			0xc0
+#define AT91_MC_BFC_BFCOM		GENMASK(1, 0)
+#define AT91_MC_BFC_BFCOM_DISABLED	(0 << 0)
+#define AT91_MC_BFC_BFCOM_ASYNC		(1 << 0)
+#define AT91_MC_BFC_BFCOM_BURST		(2 << 0)
+#define AT91_MC_BFC_BFCC		GENMASK(3, 2)
+#define AT91_MC_BFC_BFCC_MCK		(1 << 2)
+#define AT91_MC_BFC_BFCC_DIV2		(2 << 2)
+#define AT91_MC_BFC_BFCC_DIV4		(3 << 2)
+#define AT91_MC_BFC_AVL			GENMASK(7,  4)
+#define AT91_MC_BFC_PAGES		GENMASK(10, 8)
+#define AT91_MC_BFC_PAGES_NO_PAGE	(0 << 8)
+#define AT91_MC_BFC_PAGES_16		(1 << 8)
+#define AT91_MC_BFC_PAGES_32		(2 << 8)
+#define AT91_MC_BFC_PAGES_64		(3 << 8)
+#define AT91_MC_BFC_PAGES_128		(4 << 8)
+#define AT91_MC_BFC_PAGES_256		(5 << 8)
+#define AT91_MC_BFC_PAGES_512		(6 << 8)
+#define AT91_MC_BFC_PAGES_1024		(7 << 8)
+#define AT91_MC_BFC_OEL			GENMASK(13, 12)
+#define AT91_MC_BFC_BAAEN		BIT(16)
+#define AT91_MC_BFC_BFOEH		BIT(17)
+#define AT91_MC_BFC_MUXEN		BIT(18)
+#define AT91_MC_BFC_RDYEN		BIT(19)
+
+#endif /* _LINUX_MFD_SYSCON_ATMEL_MC_H_ */
-- 
cgit v1.2.3


From be32417796c2b8a83fe4cbece83bea96ab9e378f Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Wed, 6 May 2015 12:26:22 +0800
Subject: block: export blkdev_reread_part() and __blkdev_reread_part()

This patch exports blkdev_reread_part() for block drivers, also
introduce __blkdev_reread_part().

For some drivers, such as loop, reread of partitions can be run
from the release path, and bd_mutex may already be held prior to
calling ioctl_by_bdev(bdev, BLKRRPART, 0), so introduce
__blkdev_reread_part for use in such cases.

CC: Christoph Hellwig <hch@lst.de>
CC: Jens Axboe <axboe@kernel.dk>
CC: Tejun Heo <tj@kernel.org>
CC: Alexander Viro <viro@zeniv.linux.org.uk>
CC: Markus Pargmann <mpa@pengutronix.de>
CC: Stefan Weinhuber <wein@de.ibm.com>
CC: Stefan Haberland <stefan.haberland@de.ibm.com>
CC: Sebastian Ott <sebott@linux.vnet.ibm.com>
CC: Fabian Frederick <fabf@skynet.be>
CC: Ming Lei <ming.lei@canonical.com>
CC: David Herrmann <dh.herrmann@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Peter Zijlstra <peterz@infradead.org>
CC: nbd-general@lists.sourceforge.net
CC: linux-s390@vger.kernel.org
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/fs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 35ec87e490b1..1ef63900243c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2279,6 +2279,9 @@ extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
 extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
 					      void *holder);
 extern void blkdev_put(struct block_device *bdev, fmode_t mode);
+extern int __blkdev_reread_part(struct block_device *bdev);
+extern int blkdev_reread_part(struct block_device *bdev);
+
 #ifdef CONFIG_SYSFS
 extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
 extern void bd_unlink_disk_holder(struct block_device *bdev,
-- 
cgit v1.2.3


From 6e844b035360294636271f4f0fd4a5a685e03c06 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 17 Apr 2015 10:45:55 -0700
Subject: ARM: BCM63xx: Add Broadcom BCM63xx PMB controller helpers

This patch adds both common register definitions and helper functions
used to issue read/write commands to the Broadcom BCM63xx PMB controller
which is used to power on and release from reset internal on-chip
peripherals such as the integrated Ethernet switch, AHCI, USB, as well
as the secondary CPU core.

This is going to be utilized by the BCM63138 SMP code, as well as by the
BCM63138 reset controller later.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 include/linux/reset/bcm63xx_pmb.h | 88 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 include/linux/reset/bcm63xx_pmb.h

(limited to 'include/linux')

diff --git a/include/linux/reset/bcm63xx_pmb.h b/include/linux/reset/bcm63xx_pmb.h
new file mode 100644
index 000000000000..bb4af7b5eb36
--- /dev/null
+++ b/include/linux/reset/bcm63xx_pmb.h
@@ -0,0 +1,88 @@
+/*
+ * Broadcom BCM63xx Processor Monitor Bus shared routines (SMP and reset)
+ *
+ * Copyright (C) 2015, Broadcom Corporation
+ * Author: Florian Fainelli <f.fainelli@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#ifndef __BCM63XX_PMB_H
+#define __BCM63XX_PMB_H
+
+#include <linux/io.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+
+/* PMB Master controller register */
+#define PMB_CTRL		0x00
+#define  PMC_PMBM_START		(1 << 31)
+#define  PMC_PMBM_TIMEOUT	(1 << 30)
+#define  PMC_PMBM_SLAVE_ERR	(1 << 29)
+#define  PMC_PMBM_BUSY		(1 << 28)
+#define  PMC_PMBM_READ		(0 << 20)
+#define  PMC_PMBM_WRITE		(1 << 20)
+#define PMB_WR_DATA		0x04
+#define PMB_TIMEOUT		0x08
+#define PMB_RD_DATA		0x0C
+
+#define PMB_BUS_ID_SHIFT	8
+
+/* Perform the low-level PMB master operation, shared between reads and
+ * writes.
+ */
+static inline int __bpcm_do_op(void __iomem *master, unsigned int addr,
+			       u32 off, u32 op)
+{
+	unsigned int timeout = 1000;
+	u32 cmd;
+
+	cmd = (PMC_PMBM_START | op | (addr & 0xff) << 12 | off);
+	writel(cmd, master + PMB_CTRL);
+	do {
+		cmd = readl(master + PMB_CTRL);
+		if (!(cmd & PMC_PMBM_START))
+			return 0;
+
+		if (cmd & PMC_PMBM_SLAVE_ERR)
+			return -EIO;
+
+		if (cmd & PMC_PMBM_TIMEOUT)
+			return -ETIMEDOUT;
+
+		udelay(1);
+	} while (timeout-- > 0);
+
+	return -ETIMEDOUT;
+}
+
+static inline int bpcm_rd(void __iomem *master, unsigned int addr,
+			  u32 off, u32 *val)
+{
+	int ret = 0;
+
+	ret = __bpcm_do_op(master, addr, off >> 2, PMC_PMBM_READ);
+	*val = readl(master + PMB_RD_DATA);
+
+	return ret;
+}
+
+static inline int bpcm_wr(void __iomem *master, unsigned int addr,
+			  u32 off, u32 val)
+{
+	int ret = 0;
+
+	writel(val, master + PMB_WR_DATA);
+	ret = __bpcm_do_op(master, addr, off >> 2, PMC_PMBM_WRITE);
+
+	return ret;
+}
+
+#endif /* __BCM63XX_PMB_H */
-- 
cgit v1.2.3


From 7f1a57fdd6cb6e7be2ed31878a34655df38e1861 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Tue, 19 May 2015 16:13:02 +0900
Subject: power_supply: Fix possible NULL pointer dereference on early uevent

Don't call the power_supply_changed() from power_supply_register() when
parent is still probing because it may lead to accessing parent too
early.

In bq27x00_battery this caused NULL pointer exception because uevent of
power_supply_changed called back the the get_property() method provided
by the driver. The get_property() method accessed pointer which should
be returned by power_supply_register().

Starting from bq27x00_battery_probe():
  di->bat = power_supply_register()
    power_supply_changed()
      kobject_uevent()
        power_supply_uevent()
          power_supply_show_property()
            power_supply_get_property()
              bq27x00_battery_get_property()
                dereference of di->bat which is NULL here

The dereference of di->bat (value returned by power_supply_register())
is the currently visible problem. However calling back the methods
provided by driver before ending the probe may lead to accessing other
driver-related data which is not yet initialized.

The call to power_supply_changed() is postponed till probing ends -
mutex of parent device is released.

Reported-by: H. Nikolaus Schaller <hns@goldelico.com>
Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Fixes: 297d716f6260 ("power_supply: Change ownership from driver to core")
Tested-By: Dr. H. Nikolaus Schaller <hns@goldelico.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/power_supply.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 75a1dd8dc56e..a80f1fd01ddb 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -237,6 +237,7 @@ struct power_supply {
 	/* private */
 	struct device dev;
 	struct work_struct changed_work;
+	struct delayed_work deferred_register_work;
 	spinlock_t changed_lock;
 	bool changed;
 	atomic_t use_cnt;
-- 
cgit v1.2.3


From 04fd61ab36ec065e194ab5e74ae34a5240d992bb Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Tue, 19 May 2015 16:59:03 -0700
Subject: bpf: allow bpf programs to tail-call other bpf programs

introduce bpf_tail_call(ctx, &jmp_table, index) helper function
which can be used from BPF programs like:
int bpf_prog(struct pt_regs *ctx)
{
  ...
  bpf_tail_call(ctx, &jmp_table, index);
  ...
}
that is roughly equivalent to:
int bpf_prog(struct pt_regs *ctx)
{
  ...
  if (jmp_table[index])
    return (*jmp_table[index])(ctx);
  ...
}
The important detail that it's not a normal call, but a tail call.
The kernel stack is precious, so this helper reuses the current
stack frame and jumps into another BPF program without adding
extra call frame.
It's trivially done in interpreter and a bit trickier in JITs.
In case of x64 JIT the bigger part of generated assembler prologue
is common for all programs, so it is simply skipped while jumping.
Other JITs can do similar prologue-skipping optimization or
do stack unwind before jumping into the next program.

bpf_tail_call() arguments:
ctx - context pointer
jmp_table - one of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table
index - index in the jump table

Since all BPF programs are idenitified by file descriptor, user space
need to populate the jmp_table with FDs of other BPF programs.
If jmp_table[index] is empty the bpf_tail_call() doesn't jump anywhere
and program execution continues as normal.

New BPF_MAP_TYPE_PROG_ARRAY map type is introduced so that user space can
populate this jmp_table array with FDs of other bpf programs.
Programs can share the same jmp_table array or use multiple jmp_tables.

The chain of tail calls can form unpredictable dynamic loops therefore
tail_call_cnt is used to limit the number of calls and currently is set to 32.

Use cases:
Acked-by: Daniel Borkmann <daniel@iogearbox.net>

==========
- simplify complex programs by splitting them into a sequence of small programs

- dispatch routine
  For tracing and future seccomp the program may be triggered on all system
  calls, but processing of syscall arguments will be different. It's more
  efficient to implement them as:
  int syscall_entry(struct seccomp_data *ctx)
  {
     bpf_tail_call(ctx, &syscall_jmp_table, ctx->nr /* syscall number */);
     ... default: process unknown syscall ...
  }
  int sys_write_event(struct seccomp_data *ctx) {...}
  int sys_read_event(struct seccomp_data *ctx) {...}
  syscall_jmp_table[__NR_write] = sys_write_event;
  syscall_jmp_table[__NR_read] = sys_read_event;

  For networking the program may call into different parsers depending on
  packet format, like:
  int packet_parser(struct __sk_buff *skb)
  {
     ... parse L2, L3 here ...
     __u8 ipproto = load_byte(skb, ... offsetof(struct iphdr, protocol));
     bpf_tail_call(skb, &ipproto_jmp_table, ipproto);
     ... default: process unknown protocol ...
  }
  int parse_tcp(struct __sk_buff *skb) {...}
  int parse_udp(struct __sk_buff *skb) {...}
  ipproto_jmp_table[IPPROTO_TCP] = parse_tcp;
  ipproto_jmp_table[IPPROTO_UDP] = parse_udp;

- for TC use case, bpf_tail_call() allows to implement reclassify-like logic

- bpf_map_update_elem/delete calls into BPF_MAP_TYPE_PROG_ARRAY jump table
  are atomic, so user space can build chains of BPF programs on the fly

Implementation details:
=======================
- high performance of bpf_tail_call() is the goal.
  It could have been implemented without JIT changes as a wrapper on top of
  BPF_PROG_RUN() macro, but with two downsides:
  . all programs would have to pay performance penalty for this feature and
    tail call itself would be slower, since mandatory stack unwind, return,
    stack allocate would be done for every tailcall.
  . tailcall would be limited to programs running preempt_disabled, since
    generic 'void *ctx' doesn't have room for 'tail_call_cnt' and it would
    need to be either global per_cpu variable accessed by helper and by wrapper
    or global variable protected by locks.

  In this implementation x64 JIT bypasses stack unwind and jumps into the
  callee program after prologue.

- bpf_prog_array_compatible() ensures that prog_type of callee and caller
  are the same and JITed/non-JITed flag is the same, since calling JITed
  program from non-JITed is invalid, since stack frames are different.
  Similarly calling kprobe type program from socket type program is invalid.

- jump table is implemented as BPF_MAP_TYPE_PROG_ARRAY to reuse 'map'
  abstraction, its user space API and all of verifier logic.
  It's in the existing arraymap.c file, since several functions are
  shared with regular array map.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h    | 22 ++++++++++++++++++++++
 include/linux/filter.h |  2 +-
 2 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d5cda067115a..8821b9a8689e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -126,6 +126,27 @@ struct bpf_prog_aux {
 	struct work_struct work;
 };
 
+struct bpf_array {
+	struct bpf_map map;
+	u32 elem_size;
+	/* 'ownership' of prog_array is claimed by the first program that
+	 * is going to use this map or by the first program which FD is stored
+	 * in the map to make sure that all callers and callees have the same
+	 * prog_type and JITed flag
+	 */
+	enum bpf_prog_type owner_prog_type;
+	bool owner_jited;
+	union {
+		char value[0] __aligned(8);
+		struct bpf_prog *prog[0] __aligned(8);
+	};
+};
+#define MAX_TAIL_CALL_CNT 32
+
+u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
+void bpf_prog_array_map_clear(struct bpf_map *map);
+bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
+
 #ifdef CONFIG_BPF_SYSCALL
 void bpf_register_prog_type(struct bpf_prog_type_list *tl);
 void bpf_register_map_type(struct bpf_map_type_list *tl);
@@ -160,5 +181,6 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto;
 
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
+extern const struct bpf_func_proto bpf_tail_call_proto;
 
 #endif /* _LINUX_BPF_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 200be4a74a33..17724f6ea983 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -378,7 +378,7 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 
 int sk_filter(struct sock *sk, struct sk_buff *skb);
 
-void bpf_prog_select_runtime(struct bpf_prog *fp);
+int bpf_prog_select_runtime(struct bpf_prog *fp);
 void bpf_prog_free(struct bpf_prog *fp);
 
 struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
-- 
cgit v1.2.3


From 37b1ef31a568fc02e53587620226e5f3c66454c8 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 20 May 2015 14:41:19 +0800
Subject: workqueue: move flush_scheduled_work() to workqueue.h

flush_scheduled_work() is just a simple call to flush_work().

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 4618dd672d1b..738b30b39b68 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -435,7 +435,6 @@ extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
 
 extern void flush_workqueue(struct workqueue_struct *wq);
 extern void drain_workqueue(struct workqueue_struct *wq);
-extern void flush_scheduled_work(void);
 
 extern int schedule_on_each_cpu(work_func_t func);
 
@@ -531,6 +530,35 @@ static inline bool schedule_work(struct work_struct *work)
 	return queue_work(system_wq, work);
 }
 
+/**
+ * flush_scheduled_work - ensure that any scheduled work has run to completion.
+ *
+ * Forces execution of the kernel-global workqueue and blocks until its
+ * completion.
+ *
+ * Think twice before calling this function!  It's very easy to get into
+ * trouble if you don't take great care.  Either of the following situations
+ * will lead to deadlock:
+ *
+ *	One of the work items currently on the workqueue needs to acquire
+ *	a lock held by your code or its caller.
+ *
+ *	Your code is running in the context of a work routine.
+ *
+ * They will be detected by lockdep when they occur, but the first might not
+ * occur very often.  It depends on what work items are on the workqueue and
+ * what locks they need, which you have no control over.
+ *
+ * In most situations flushing the entire workqueue is overkill; you merely
+ * need to know that a particular work item isn't queued and isn't running.
+ * In such cases you should use cancel_delayed_work_sync() or
+ * cancel_work_sync() instead.
+ */
+static inline void flush_scheduled_work(void)
+{
+	flush_workqueue(system_wq);
+}
+
 /**
  * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
  * @cpu: cpu to use
-- 
cgit v1.2.3


From 2efd055c53c06b7e89c167c98069bab9afce7e59 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <mleitner@redhat.com>
Date: Wed, 20 May 2015 16:35:41 -0700
Subject: tcp: add tcpi_segs_in and tcpi_segs_out to tcp_info

This patch tracks the total number of inbound and outbound segments on a
TCP socket. One may use this number to have an idea on connection
quality when compared against the retransmissions.

RFC4898 named these : tcpEStatsPerfSegsIn and tcpEStatsPerfSegsOut

These are a 32bit field each and can be fetched both from TCP_INFO
getsockopt() if one has a handle on a TCP socket, or from inet_diag
netlink facility (iproute2/ss patch will follow)

Note that tp->segs_out was placed near tp->snd_nxt for good data
locality and minimal performance impact, while tp->segs_in was placed
near tp->bytes_received for the same reason.

Join work with Eric Dumazet.

Note that received SYN are accounted on the listener, but sent SYNACK
are not accounted.

Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index e6fb5df22db1..f0212026c77f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -149,11 +149,16 @@ struct tcp_sock {
 				 * sum(delta(rcv_nxt)), or how many bytes
 				 * were acked.
 				 */
+	u32	segs_in;	/* RFC4898 tcpEStatsPerfSegsIn
+				 * total number of segments in.
+				 */
  	u32	rcv_nxt;	/* What we want to receive next 	*/
 	u32	copied_seq;	/* Head of yet unread data		*/
 	u32	rcv_wup;	/* rcv_nxt on last window update sent	*/
  	u32	snd_nxt;	/* Next sequence we send		*/
-
+	u32	segs_out;	/* RFC4898 tcpEStatsPerfSegsOut
+				 * The total number of segments sent.
+				 */
 	u64	bytes_acked;	/* RFC4898 tcpEStatsAppHCThruOctetsAcked
 				 * sum(delta(snd_una)), or how many bytes
 				 * were acked.
-- 
cgit v1.2.3


From 2d0f230fe0649758394466cb69b553c0e8184df9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 21 May 2015 15:11:02 +0800
Subject: crypto: aead - Rename aead_alg to old_aead_alg

This patch is the first step in the introduction of a new AEAD
alg type.  Unlike normal conversions this patch only renames the
existing aead_alg structure because there are external references
to it.

Those references will be removed after this patch.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 59ca4086ce6a..7d290a91c6f9 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -268,7 +268,7 @@ struct ablkcipher_alg {
 };
 
 /**
- * struct aead_alg - AEAD cipher definition
+ * struct old_aead_alg - AEAD cipher definition
  * @maxauthsize: Set the maximum authentication tag size supported by the
  *		 transformation. A transformation may support smaller tag sizes.
  *		 As the authentication tag is a message digest to ensure the
@@ -293,7 +293,7 @@ struct ablkcipher_alg {
  * All fields except @givencrypt , @givdecrypt , @geniv and @ivsize are
  * mandatory and must be filled.
  */
-struct aead_alg {
+struct old_aead_alg {
 	int (*setkey)(struct crypto_aead *tfm, const u8 *key,
 	              unsigned int keylen);
 	int (*setauthsize)(struct crypto_aead *tfm, unsigned int authsize);
@@ -501,7 +501,7 @@ struct crypto_alg {
 
 	union {
 		struct ablkcipher_alg ablkcipher;
-		struct aead_alg aead;
+		struct old_aead_alg aead;
 		struct blkcipher_alg blkcipher;
 		struct cipher_alg cipher;
 		struct compress_alg compress;
-- 
cgit v1.2.3


From 2a9de9c0f08d61fbe3764a21d22d0b72df97d6ae Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Fri, 24 Apr 2015 19:16:05 +0900
Subject: extcon: Use the unique id for external connector instead of string

This patch uses the unique id to identify the type of external connector instead
of string name. The string name have the many potential issues. So, this patch
defines the 'extcon' enumeration which includes all supported external connector
on EXTCON subsystem. If new external connector is necessary, the unique id of
new connector have to be added in 'extcon' enumeration. There are current
supported external connector in 'enum extcon' as following:

enum extcon {
	EXTCON_NONE		= 0x0,

	/* USB external connector */
	EXTCON_USB		= 0x1,
	EXTCON_USB_HOST		= 0x2,

	/* Charger external connector */
	EXTCON_TA		= 0x10,
	EXTCON_FAST_CHARGER	= 0x11,
	EXTCON_SLOW_CHARGER	= 0x12,
	EXTCON_CHARGE_DOWNSTREAM = 0x13,

	/* Audio and video external connector */
	EXTCON_LINE_IN		= 0x20,
	EXTCON_LINE_OUT		= 0x21,
	EXTCON_MICROPHONE	= 0x22,
	EXTCON_HEADPHONE	= 0x23,

	EXTCON_HDMI		= 0x30,
	EXTCON_MHL		= 0x31,
	EXTCON_DVI		= 0x32,
	EXTCON_VGA		= 0x33,
	EXTCON_SPDIF_IN		= 0x34,
	EXTCON_SPDIF_OUT	= 0x35,
	EXTCON_VIDEO_IN		= 0x36,
	EXTCON_VIDEO_OUT	= 0x37,

	/* Miscellaneous external connector */
	EXTCON_DOCK		= 0x50,
	EXTCON_JIG		= 0x51,
	EXTCON_MECHANICAL	= 0x52,

	EXTCON_END,
};

For example in extcon-arizona.c:
To use unique id removes the potential issue about handling
the inconsistent name of external connector with string.
- Previously, use the string to register the type of arizona jack connector
static const char *arizona_cable[] = {
	"Mechanical",
	"Microphone",
	"Headphone",
	"Line-out",
};
- Newly, use the unique id to register the type of arizona jack connector
static const enum extcon arizona_cable[] = {
	EXTCON_MECHANICAL,
	EXTCON_MICROPHONE,
	EXTCON_HEADPHONE,
	EXTCON_LINE_OUT,

	EXTCON_NONE,
};

And this patch modify the prototype of extcon_{get|set}_cable_state_() which
uses the 'enum extcon id' instead of 'cable_index'. Because although one more
extcon drivers support USB cable, each extcon driver might has the differnt
'cable_index' for USB cable. All extcon drivers can use the unique id number
for same external connector with modified extcon_{get|set}_cable_state_().

- Previously, use 'cable_index' on these functions:
extcon_get_cable_state_(struct extcon_dev*, int cable_index)
extcon_set_cable_state_(struct extcon_dev*, int cable_index, bool state)

-Newly, use 'enum extcon id' on these functions:
extcon_get_cable_state_(struct extcon_dev*, enum extcon id)
extcon_set_cable_state_(struct extcon_dev*, enum extcon id, bool state)

Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Felipe Balbi <balbi@ti.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Acked-by: Roger Quadros <rogerq@ti.com>
Acked-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Acked-by: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
[arnd: Report the build break about drivers/usb/phy/phy-tahvo.c after using the
unique id for external connector insteadf of string]
Reported-by: Arnd Bergmann <arnd@arndb.de>
[dan.carpenter: Report the build warning of extcon_{set|get}_cable_state_()]
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
---
 include/linux/extcon.h                 | 111 ++++++++++++++-------------------
 include/linux/extcon/extcon-adc-jack.h |   5 +-
 2 files changed, 50 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index 799474d9dc48..85c882f0029e 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -1,6 +1,9 @@
 /*
  *  External connector (extcon) class driver
  *
+ * Copyright (C) 2015 Samsung Electronics
+ * Author: Chanwoo Choi <cw00.choi@samsung.com>
+ *
  * Copyright (C) 2012 Samsung Electronics
  * Author: Donggeun Kim <dg77.kim@samsung.com>
  * Author: MyungJoo Ham <myungjoo.ham@samsung.com>
@@ -27,50 +30,41 @@
 #include <linux/notifier.h>
 #include <linux/sysfs.h>
 
-#define SUPPORTED_CABLE_MAX	32
-#define CABLE_NAME_MAX		30
-
-/*
- * The standard cable name is to help support general notifier
- * and notifiee device drivers to share the common names.
- * Please use standard cable names unless your notifier device has
- * a very unique and abnormal cable or
- * the cable type is supposed to be used with only one unique
- * pair of notifier/notifiee devices.
- *
- * Please add any other "standard" cables used with extcon dev.
- *
- * You may add a dot and number to specify version or specification
- * of the specific cable if it is required. (e.g., "Fast-charger.18"
- * and "Fast-charger.10" for 1.8A and 1.0A chargers)
- * However, the notifiee and notifier should be able to handle such
- * string and if the notifiee can negotiate the protocol or identify,
- * you don't need such convention. This convention is helpful when
- * notifier can distinguish but notifiee cannot.
- */
-enum extcon_cable_name {
-	EXTCON_USB = 0,
-	EXTCON_USB_HOST,
-	EXTCON_TA,			/* Travel Adaptor */
-	EXTCON_FAST_CHARGER,
-	EXTCON_SLOW_CHARGER,
-	EXTCON_CHARGE_DOWNSTREAM,	/* Charging an external device */
-	EXTCON_HDMI,
-	EXTCON_MHL,
-	EXTCON_DVI,
-	EXTCON_VGA,
-	EXTCON_DOCK,
-	EXTCON_LINE_IN,
-	EXTCON_LINE_OUT,
-	EXTCON_MIC_IN,
-	EXTCON_HEADPHONE_OUT,
-	EXTCON_SPDIF_IN,
-	EXTCON_SPDIF_OUT,
-	EXTCON_VIDEO_IN,
-	EXTCON_VIDEO_OUT,
-	EXTCON_MECHANICAL,
+enum extcon {
+	EXTCON_NONE		= 0x0,
+
+	/* USB external connector */
+	EXTCON_USB		= 0x1,
+	EXTCON_USB_HOST		= 0x2,
+
+	/* Charger external connector */
+	EXTCON_TA		= 0x10,
+	EXTCON_FAST_CHARGER	= 0x11,
+	EXTCON_SLOW_CHARGER	= 0x12,
+	EXTCON_CHARGE_DOWNSTREAM = 0x13,
+
+	/* Audio/Video external connector */
+	EXTCON_LINE_IN		= 0x20,
+	EXTCON_LINE_OUT		= 0x21,
+	EXTCON_MICROPHONE	= 0x22,
+	EXTCON_HEADPHONE	= 0x23,
+
+	EXTCON_HDMI		= 0x30,
+	EXTCON_MHL		= 0x31,
+	EXTCON_DVI		= 0x32,
+	EXTCON_VGA		= 0x33,
+	EXTCON_SPDIF_IN		= 0x34,
+	EXTCON_SPDIF_OUT	= 0x35,
+	EXTCON_VIDEO_IN		= 0x36,
+	EXTCON_VIDEO_OUT	= 0x37,
+
+	/* Etc external connector */
+	EXTCON_DOCK		= 0x50,
+	EXTCON_JIG		= 0x51,
+	EXTCON_MECHANICAL	= 0x52,
+
+	EXTCON_END,
 };
-extern const char extcon_cable_name[][CABLE_NAME_MAX + 1];
 
 struct extcon_cable;
 
@@ -78,7 +72,7 @@ struct extcon_cable;
  * struct extcon_dev - An extcon device represents one external connector.
  * @name:		The name of this extcon device. Parent device name is
  *			used if NULL.
- * @supported_cable:	Array of supported cable names ending with NULL.
+ * @supported_cable:	Array of supported cable names ending with EXTCON_NONE.
  *			If supported_cable is NULL, cable name related APIs
  *			are disabled.
  * @mutually_exclusive:	Array of mutually exclusive set of cables that cannot
@@ -113,7 +107,7 @@ struct extcon_cable;
 struct extcon_dev {
 	/* Optional user initializing data */
 	const char *name;
-	const char **supported_cable;
+	const enum extcon *supported_cable;
 	const u32 *mutually_exclusive;
 
 	/* Optional callbacks to override class functions */
@@ -194,10 +188,10 @@ extern struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name);
 /*
  * Following APIs control the memory of extcon device.
  */
-extern struct extcon_dev *extcon_dev_allocate(const char **cables);
+extern struct extcon_dev *extcon_dev_allocate(const enum extcon *cable);
 extern void extcon_dev_free(struct extcon_dev *edev);
 extern struct extcon_dev *devm_extcon_dev_allocate(struct device *dev,
-						   const char **cables);
+						   const enum extcon *cable);
 extern void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev);
 
 /*
@@ -216,13 +210,10 @@ extern int extcon_update_state(struct extcon_dev *edev, u32 mask, u32 state);
 
 /*
  * get/set_cable_state access each bit of the 32b encoded state value.
- * They are used to access the status of each cable based on the cable_name
- * or cable_index, which is retrieved by extcon_find_cable_index
+ * They are used to access the status of each cable based on the cable_name.
  */
-extern int extcon_find_cable_index(struct extcon_dev *sdev,
-				   const char *cable_name);
-extern int extcon_get_cable_state_(struct extcon_dev *edev, int cable_index);
-extern int extcon_set_cable_state_(struct extcon_dev *edev, int cable_index,
+extern int extcon_get_cable_state_(struct extcon_dev *edev, enum extcon id);
+extern int extcon_set_cable_state_(struct extcon_dev *edev, enum extcon id,
 				   bool cable_state);
 
 extern int extcon_get_cable_state(struct extcon_dev *edev,
@@ -281,7 +272,7 @@ static inline int devm_extcon_dev_register(struct device *dev,
 static inline void devm_extcon_dev_unregister(struct device *dev,
 					      struct extcon_dev *edev) { }
 
-static inline struct extcon_dev *extcon_dev_allocate(const char **cables)
+static inline struct extcon_dev *extcon_dev_allocate(const enum extcon *cable)
 {
 	return ERR_PTR(-ENOSYS);
 }
@@ -289,7 +280,7 @@ static inline struct extcon_dev *extcon_dev_allocate(const char **cables)
 static inline void extcon_dev_free(struct extcon_dev *edev) { }
 
 static inline struct extcon_dev *devm_extcon_dev_allocate(struct device *dev,
-							  const char **cables)
+						const enum extcon *cable)
 {
 	return ERR_PTR(-ENOSYS);
 }
@@ -312,20 +303,14 @@ static inline int extcon_update_state(struct extcon_dev *edev, u32 mask,
 	return 0;
 }
 
-static inline int extcon_find_cable_index(struct extcon_dev *edev,
-					  const char *cable_name)
-{
-	return 0;
-}
-
 static inline int extcon_get_cable_state_(struct extcon_dev *edev,
-					  int cable_index)
+					  enum extcon id)
 {
 	return 0;
 }
 
 static inline int extcon_set_cable_state_(struct extcon_dev *edev,
-					  int cable_index, bool cable_state)
+					  enum extcon id, bool cable_state)
 {
 	return 0;
 }
diff --git a/include/linux/extcon/extcon-adc-jack.h b/include/linux/extcon/extcon-adc-jack.h
index 9ca958c4e94c..53c60806bcfb 100644
--- a/include/linux/extcon/extcon-adc-jack.h
+++ b/include/linux/extcon/extcon-adc-jack.h
@@ -44,7 +44,7 @@ struct adc_jack_cond {
  * @consumer_channel:	Unique name to identify the channel on the consumer
  *			side. This typically describes the channels used within
  *			the consumer. E.g. 'battery_voltage'
- * @cable_names:	array of cable names ending with null.
+ * @cable_names:	array of extcon id for supported cables.
  * @adc_contitions:	array of struct adc_jack_cond conditions ending
  *			with .state = 0 entry. This describes how to decode
  *			adc values into extcon state.
@@ -58,8 +58,7 @@ struct adc_jack_pdata {
 	const char *name;
 	const char *consumer_channel;
 
-	/* The last entry should be NULL */
-	const char **cable_names;
+	const enum extcon *cable_names;
 
 	/* The last entry's state should be 0 */
 	struct adc_jack_cond *adc_conditions;
-- 
cgit v1.2.3


From 046050f6e623e442e9c71c525462ebd395dae526 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Tue, 19 May 2015 20:01:12 +0900
Subject: extcon: Update the prototype of extcon_register_notifier() with enum
 extcon

Previously, extcon consumer driver used the extcon_register_interest()
to register the notifier chain and then to receive the notifier event
when external connector's state is changed. When registering the notifier chain
for specific external connector with extcon_register_interest(), it used the
the string name of external connector directly. There are potential problem
because of unclear, non-standard and inconsequent cable name. Namely,
it is not appropriate method to identify each external connector.

So, this patch modify the prototype of extcon_register_notifier() by using
the 'enum extcon' which are the unique id for each external connector
instead of unclear string method.

- Previously, the extcon consumer driver used the extcon_register_interest()
with 'cable_name' to point out the specific external connector. Also. it used
the un-needed structure (struct extcon_specific_cable_nb).
: int extcon_register_interest(struct extcon_specific_cable_nb *obj,
			     const char *extcon_name, const char *cable_name,
			     struct notifier_block *nb)

- Newly, the updated extcon_register_notifier() would definitely support
the same feature to detech the changed state of external connector without
any specific structure (struct extcon_specific_cable_nb).
: int extcon_register_notifier(struct extcon_dev *edev, enum extcon id,
			     struct notifier_block *nb)

This patch support the both extcon_register_interest() and new extcon_register_
notifier(). But the extcon_{register|unregister}_interest() will be deprecated
because extcon core would support the notifier event for extcon consumer driver
with only updated extcon_register_notifier() and 'extcon_specific_cable_nb'
will be removed if there are no extcon consumer driver with legacy
extcon_{register|unregister}_interest().

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
---
 include/linux/extcon.h | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index 85c882f0029e..be9652b3a154 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -116,7 +116,7 @@ struct extcon_dev {
 
 	/* Internal data. Please do not set. */
 	struct device dev;
-	struct raw_notifier_head nh;
+	struct raw_notifier_head *nh;
 	struct list_head entry;
 	int max_supported;
 	spinlock_t lock;	/* could be called by irq handler */
@@ -155,8 +155,6 @@ struct extcon_cable {
 /**
  * struct extcon_specific_cable_nb - An internal data for
  *				     extcon_register_interest().
- * @internal_nb:	A notifier block bridging extcon notifier
- *			and cable notifier.
  * @user_nb:		user provided notifier block for events from
  *			a specific cable.
  * @cable_index:	the target cable.
@@ -164,7 +162,6 @@ struct extcon_cable {
  * @previous_value:	the saved previous event value.
  */
 struct extcon_specific_cable_nb {
-	struct notifier_block internal_nb;
 	struct notifier_block *user_nb;
 	int cable_index;
 	struct extcon_dev *edev;
@@ -240,10 +237,10 @@ extern int extcon_unregister_interest(struct extcon_specific_cable_nb *nb);
  * we do not recommend to use this for normal 'notifiee' device drivers who
  * want to be notified by a specific external port of the notifier.
  */
-extern int extcon_register_notifier(struct extcon_dev *edev,
+extern int extcon_register_notifier(struct extcon_dev *edev, enum extcon id,
+				    struct notifier_block *nb);
+extern int extcon_unregister_notifier(struct extcon_dev *edev, enum extcon id,
 				    struct notifier_block *nb);
-extern int extcon_unregister_notifier(struct extcon_dev *edev,
-				      struct notifier_block *nb);
 
 /*
  * Following API get the extcon device from devicetree.
@@ -333,13 +330,15 @@ static inline struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name)
 }
 
 static inline int extcon_register_notifier(struct extcon_dev *edev,
-					   struct notifier_block *nb)
+					enum extcon id,
+					struct notifier_block *nb)
 {
 	return 0;
 }
 
 static inline int extcon_unregister_notifier(struct extcon_dev *edev,
-					     struct notifier_block *nb)
+					enum extcon id,
+					struct notifier_block *nb)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 668abc729fcb9d034eccadf63166d2c76cd645d1 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Thu, 21 May 2015 17:42:43 +0100
Subject: regmap: Introduce regmap_get_max_register

This patch introduces regmap_get_max_register() function which would be
used by the infrastructures like nvmem framework built on top of
regmap.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regmap.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 116655d92269..2d87deda79cd 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -433,6 +433,7 @@ int regmap_update_bits_check_async(struct regmap *map, unsigned int reg,
 				   unsigned int mask, unsigned int val,
 				   bool *change);
 int regmap_get_val_bytes(struct regmap *map);
+int regmap_get_max_register(struct regmap *map);
 int regmap_async_complete(struct regmap *map);
 bool regmap_can_raw_write(struct regmap *map);
 
@@ -676,6 +677,12 @@ static inline int regmap_get_val_bytes(struct regmap *map)
 	return -EINVAL;
 }
 
+static inline int regmap_get_max_register(struct regmap *map)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
 static inline int regcache_sync(struct regmap *map)
 {
 	WARN_ONCE(1, "regmap API is disabled");
-- 
cgit v1.2.3


From a2f776cbb8271d7149784207da0b0c51e8b1847c Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Thu, 21 May 2015 17:42:54 +0100
Subject: regmap: Introduce regmap_get_reg_stride

This patch introduces regmap_get_reg_stride() function which would
be used by the infrastructures like nvmem framework built on top of
regmap. Mostly this function would be used for sanity checks on inputs
within such infrastructure.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regmap.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 2d87deda79cd..59c55ea0f0b5 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -434,6 +434,7 @@ int regmap_update_bits_check_async(struct regmap *map, unsigned int reg,
 				   bool *change);
 int regmap_get_val_bytes(struct regmap *map);
 int regmap_get_max_register(struct regmap *map);
+int regmap_get_reg_stride(struct regmap *map);
 int regmap_async_complete(struct regmap *map);
 bool regmap_can_raw_write(struct regmap *map);
 
@@ -683,6 +684,12 @@ static inline int regmap_get_max_register(struct regmap *map)
 	return -EINVAL;
 }
 
+static inline int regmap_get_reg_stride(struct regmap *map)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
 static inline int regcache_sync(struct regmap *map)
 {
 	WARN_ONCE(1, "regmap API is disabled");
-- 
cgit v1.2.3


From 69eb0980ab4ced06f7c2b4774575337ce32912fb Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Thu, 23 Apr 2015 16:10:24 +0530
Subject: regulator: max8973: add mechanism to enable/disable through GPIO

MAX8973 supports the voltage output enable/disable through its EN
pin. This EN pin can be connected through GPIO from host processor.
Add support to provide GPIO number from platform/DT and if it is
valid GPIO then enable external control default.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/max8973-regulator.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/max8973-regulator.h b/include/linux/regulator/max8973-regulator.h
index f8acc052e353..f6a8a16a0d4d 100644
--- a/include/linux/regulator/max8973-regulator.h
+++ b/include/linux/regulator/max8973-regulator.h
@@ -58,6 +58,9 @@
  *		control signal from EN input pin. If it is false then
  *		voltage output will be enabled/disabled through EN bit of
  *		device register.
+ * @enable_gpio: Enable GPIO. If EN pin is controlled through GPIO from host
+ *		then GPIO number can be provided. If no GPIO controlled then
+ *		it should be -1.
  * @dvs_gpio: GPIO for dvs. It should be -1 if this is tied with fixed logic.
  * @dvs_def_state: Default state of dvs. 1 if it is high else 0.
  */
@@ -65,6 +68,7 @@ struct max8973_regulator_platform_data {
 	struct regulator_init_data *reg_init_data;
 	unsigned long control_flags;
 	bool enable_ext_control;
+	int enable_gpio;
 	int dvs_gpio;
 	unsigned dvs_def_state:1;
 };
-- 
cgit v1.2.3


From f705f837c58ebe1ea69dfffff4dcc234e2fbc8dd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 22 May 2015 11:12:38 +0200
Subject: nvme: consolidate synchronous command submission helpers

Note that we keep the unused timeout argument, but allow callers to
pass 0 instead of a timeout if they want the default.  This will allow
adding a timeout to the pass through path later on.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/nvme.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 8dbd05e70f09..61488b2ae291 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -158,11 +158,7 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
 				unsigned long addr, unsigned length);
 void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
 			struct nvme_iod *iod);
-int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_ns *,
-						struct nvme_command *, u32 *);
-int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns);
-int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *,
-							u32 *result);
+int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd);
 int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns,
 							dma_addr_t dma_addr);
 int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
-- 
cgit v1.2.3


From e75ec752d725b7b612c0b2db1bca50a9e53c0879 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 22 May 2015 11:12:39 +0200
Subject: nvme: store a struct device pointer in struct nvme_dev

Most users want the generic device, so store that in struct nvme_dev
instead of the pci_dev.  This also happens to be a nice step towards
making some code reusable for non-PCI transports.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/nvme.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 61488b2ae291..de0e49a716b8 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -74,7 +74,7 @@ struct nvme_dev {
 	struct blk_mq_tag_set tagset;
 	struct blk_mq_tag_set admin_tagset;
 	u32 __iomem *dbs;
-	struct pci_dev *pci_dev;
+	struct device *dev;
 	struct dma_pool *prp_page_pool;
 	struct dma_pool *prp_small_pool;
 	int instance;
-- 
cgit v1.2.3


From d29ec8241c10eacf59c23b3828a88dbae06e7e3f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 22 May 2015 11:12:46 +0200
Subject: nvme: submit internal commands through the block layer

Use block layer queues with an internal cmd_type to submit internally
generated NVMe commands.  This both simplifies the code a lot and allow
for a better structure.  For example now the LighNVM code can construct
commands without knowing the details of the underlying I/O descriptors.
Or a future NVMe over network target could inject commands, as well as
could the SCSI translation and ioctl code be reused for such a beast.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/nvme.h | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index de0e49a716b8..986bf8ad8e93 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -146,21 +146,15 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
 	return (sector >> (ns->lba_shift - 9));
 }
 
-/**
- * nvme_free_iod - frees an nvme_iod
- * @dev: The device that the I/O was submitted to
- * @iod: The memory to free
- */
-void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod);
-
-int nvme_setup_prps(struct nvme_dev *, struct nvme_iod *, int, gfp_t);
-struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
-				unsigned long addr, unsigned length);
-void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
-			struct nvme_iod *iod);
-int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd);
-int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns,
-							dma_addr_t dma_addr);
+int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+		void *buf, unsigned bufflen);
+int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+		void *buffer, void __user *ubuffer, unsigned bufflen,
+		u32 *result, unsigned timeout);
+int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id);
+int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
+		struct nvme_id_ns **id);
+int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log);
 int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
 			dma_addr_t dma_addr, u32 *result);
 int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
-- 
cgit v1.2.3


From 326e1dbb57368087a36607aaebe9795b8d5453e5 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Fri, 22 May 2015 09:14:03 -0400
Subject: block: remove management of bi_remaining when restoring original
 bi_end_io

Commit c4cf5261 ("bio: skip atomic inc/dec of ->bi_remaining for
non-chains") regressed all existing callers that followed this pattern:
 1) saving a bio's original bi_end_io
 2) wiring up an intermediate bi_end_io
 3) restoring the original bi_end_io from intermediate bi_end_io
 4) calling bio_endio() to execute the restored original bi_end_io

The regression was due to BIO_CHAIN only ever getting set if
bio_inc_remaining() is called.  For the above pattern it isn't set until
step 3 above (step 2 would've needed to establish BIO_CHAIN).  As such
the first bio_endio(), in step 2 above, never decremented __bi_remaining
before calling the intermediate bi_end_io -- leaving __bi_remaining with
the value 1 instead of 0.  When bio_inc_remaining() occurred during step
3 it brought it to a value of 2.  When the second bio_endio() was
called, in step 4 above, it should've called the original bi_end_io but
it didn't because there was an extra reference that wasn't dropped (due
to atomic operations being optimized away since BIO_CHAIN wasn't set
upfront).

Fix this issue by removing the __bi_remaining management complexity for
all callers that use the above pattern -- bio_chain() is the only
interface that _needs_ to be concerned with __bi_remaining.  For the
above pattern callers just expect the bi_end_io they set to get called!
Remove bio_endio_nodec() and also remove all bio_inc_remaining() calls
that aren't associated with the bio_chain() interface.

Also, the bio_inc_remaining() interface has been moved local to bio.c.

Fixes: c4cf5261 ("bio: skip atomic inc/dec of ->bi_remaining for non-chains")
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/bio.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 7486ea103f6e..f0291cf64cc5 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -427,7 +427,6 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask)
 }
 
 extern void bio_endio(struct bio *, int);
-extern void bio_endio_nodec(struct bio *, int);
 struct request_queue;
 extern int bio_phys_segments(struct request_queue *, struct bio *);
 
@@ -658,17 +657,6 @@ static inline struct bio *bio_list_get(struct bio_list *bl)
 	return bio;
 }
 
-/*
- * Increment chain count for the bio. Make sure the CHAIN flag update
- * is visible before the raised count.
- */
-static inline void bio_inc_remaining(struct bio *bio)
-{
-	bio->bi_flags |= (1 << BIO_CHAIN);
-	smp_mb__before_atomic();
-	atomic_inc(&bio->__bi_remaining);
-}
-
 /*
  * bio_set is used to allow other portions of the IO system to
  * allocate their own private memory pools for bio and iovec structures.
-- 
cgit v1.2.3


From 5f1b670d0bef508a5554d92525f5f6d00d640b38 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 22 May 2015 09:14:04 -0400
Subject: block, dm: don't copy bios for request clones

Currently dm-multipath has to clone the bios for every request sent
to the lower devices, which wastes cpu cycles and ties down memory.

This patch instead adds a new REQ_CLONE flag that instructs req_bio_endio
to not complete bios attached to a request, which we set on clone
requests similar to bios in a flush sequence.  With this change I/O
errors on a path failure only get propagated to dm-multipath, which
can then either resubmit the I/O or complete the bios on the original
request.

I've done some basic testing of this on a Linux target with ALUA support,
and it survives path failures during I/O nicely.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 2 ++
 include/linux/blkdev.h    | 6 +-----
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 3f4ded0b1a34..45a6be89957c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -192,6 +192,7 @@ enum rq_flag_bits {
 	__REQ_HASHED,		/* on IO scheduler merge hash */
 	__REQ_MQ_INFLIGHT,	/* track inflight for MQ */
 	__REQ_NO_TIMEOUT,	/* requests may never expire */
+	__REQ_CLONE,		/* cloned bios */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -246,5 +247,6 @@ enum rq_flag_bits {
 #define REQ_HASHED		(1ULL << __REQ_HASHED)
 #define REQ_MQ_INFLIGHT		(1ULL << __REQ_MQ_INFLIGHT)
 #define REQ_NO_TIMEOUT		(1ULL << __REQ_NO_TIMEOUT)
+#define REQ_CLONE		(1ULL << __REQ_CLONE)
 
 #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index bc917956a6d0..9ded80da2c16 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -775,11 +775,7 @@ extern void blk_add_request_payload(struct request *rq, struct page *page,
 		unsigned int len);
 extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
 extern int blk_lld_busy(struct request_queue *q);
-extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
-			     struct bio_set *bs, gfp_t gfp_mask,
-			     int (*bio_ctr)(struct bio *, struct bio *, void *),
-			     void *data);
-extern void blk_rq_unprep_clone(struct request *rq);
+extern void blk_rq_prep_clone(struct request *rq, struct request *rq_src);
 extern int blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
-- 
cgit v1.2.3


From d0751b98dfa391f862e02dc36a233a54615e3f1d Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Thu, 21 May 2015 15:05:02 +0800
Subject: PCI: Add dev->has_secondary_link to track downstream PCIe links

A PCIe Port is an interface to a Link.  A Root Port is a PCI-PCI bridge in
a Root Complex and has a Link on its secondary (downstream) side.  For
other Ports, the Link may be on either the upstream (closer to the Root
Complex) or downstream side of the Port.

The usual topology has a Root Port connected to an Upstream Port.  We
previously assumed this was the only possible topology, and that a
Downstream Port's Link was always on its downstream side, like this:

                  +---------------------+
  +------+        |          Downstream |
  | Root |        | Upstream       Port +--Link--
  | Port +--Link--+ Port                |
  +------+        |          Downstream |
                  |                Port +--Link--
                  +---------------------+

But systems do exist (see URL below) where the Root Port is connected to a
Downstream Port.  In this case, a Downstream Port's Link may be on either
the upstream or downstream side:

                  +---------------------+
  +------+        |            Upstream |
  | Root |        | Downstream     Port +--Link--
  | Port +--Link--+ Port                |
  +------+        |          Downstream |
                  |                Port +--Link--
                  +---------------------+

We can't use the Port type to determine which side the Link is on, so add a
bit in struct pci_dev to keep track.

A Root Port's Link is always on the Port's secondary side.  A component
(Endpoint or Port) on the other end of the Link obviously has the Link on
its upstream side.  If that component is a Port, it is part of a Switch or
a Bridge.  A Bridge has a PCI or PCI-X bus on its secondary side, not a
Link.  The internal bus of a Switch connects the Port to another Port whose
Link is on the downstream side.

[bhelgaas: changelog, comment, cache "type", use if/else]
Link: http://lkml.kernel.org/r/54EB81B2.4050904@pobox.com
Link: https://bugzilla.kernel.org/show_bug.cgi?id=94361
Suggested-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 353db8dc4c6e..1989f6dc9618 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -355,6 +355,7 @@ struct pci_dev {
 	unsigned int	broken_intx_masking:1;
 	unsigned int	io_window_1k:1;	/* Intel P2P bridge 1K I/O windows */
 	unsigned int	irq_managed:1;
+	unsigned int	has_secondary_link:1;
 	pci_dev_flags_t dev_flags;
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
 
-- 
cgit v1.2.3


From 6374f9124efea5fae9cba263108583c39e22f86b Mon Sep 17 00:00:00 2001
From: Harald Geyer <harald@ccbib.org>
Date: Tue, 7 Apr 2015 11:12:35 +0000
Subject: timekeeping: Provide new API to get the current time resolution

This patch series introduces a new function
u32 ktime_get_resolution_ns(void)
which allows to clean up some driver code.

In particular the IIO subsystem has a function to provide timestamps for
events but no means to get their resolution. So currently the dht11 driver
tries to guess the resolution in a rather messy and convoluted way. We
can do much better with the new code.

This API is not designed to be exposed to user space.

This has been tested on i386, sunxi and mxs.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Harald Geyer <harald@ccbib.org>
[jstultz: Tweaked to make it build after upstream changes]
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 99176af216af..9af5c1214682 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -163,6 +163,7 @@ extern ktime_t ktime_get(void);
 extern ktime_t ktime_get_with_offset(enum tk_offsets offs);
 extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs);
 extern ktime_t ktime_get_raw(void);
+extern u32 ktime_get_resolution_ns(void);
 
 /**
  * ktime_get_real - get the real (wall-) time in ktime_t format
-- 
cgit v1.2.3


From 57d05a93ada77c4f8a6112cbc867a2948dce7991 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 13 May 2015 16:04:47 -0700
Subject: time: Rework debugging variables so they aren't global

Ingo suggested that the timekeeping debugging variables
recently added should not be global, and should be tied
to the timekeeper's read_base.

Thus this patch implements that suggestion.

This version is different from the earlier versions
as it keeps the variables in the timekeeper structure
rather then in the tkr.

Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeper_internal.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 6f8276ae579c..e1f5a1136554 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -61,6 +61,9 @@ struct tk_read_base {
  *			shifted nano seconds.
  * @ntp_error_shift:	Shift conversion between clock shifted nano seconds and
  *			ntp shifted nano seconds.
+ * @last_warning:	Warning ratelimiter (DEBUG_TIMEKEEPING)
+ * @underflow_seen:	Underflow warning flag (DEBUG_TIMEKEEPING)
+ * @overflow_seen:	Overflow warning flag (DEBUG_TIMEKEEPING)
  *
  * Note: For timespec(64) based interfaces wall_to_monotonic is what
  * we need to add to xtime (or xtime corrected for sub jiffie times)
@@ -106,6 +109,18 @@ struct timekeeper {
 	s64			ntp_error;
 	u32			ntp_error_shift;
 	u32			ntp_err_mult;
+#ifdef CONFIG_DEBUG_TIMEKEEPING
+	long			last_warning;
+	/*
+	 * These simple flag variables are managed
+	 * without locks, which is racy, but they are
+	 * ok since we don't really care about being
+	 * super precise about how many events were
+	 * seen, just that a problem was observed.
+	 */
+	int			underflow_seen;
+	int			overflow_seen;
+#endif
 };
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL
-- 
cgit v1.2.3


From 30f3b3f9836c7c7e1f42ca855bbe8127fff4b99a Mon Sep 17 00:00:00 2001
From: Xunlei Pang <pang.xunlei@linaro.org>
Date: Thu, 9 Apr 2015 09:04:40 +0800
Subject: time: Include math64.h in time64.h

On 32-bit systems, timespec64_add_ns() calls __iter_div_u64_rem()
which needs math64.h, and we want to include time64.h in some
cases.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/time64.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/time64.h b/include/linux/time64.h
index a3831478d9cf..12d4e82b0276 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -2,6 +2,7 @@
 #define _LINUX_TIME64_H
 
 #include <uapi/linux/time.h>
+#include <linux/math64.h>
 
 typedef __s64 time64_t;
 
-- 
cgit v1.2.3


From e83d0a4106d81dd08b70318f078f3bad6acdc110 Mon Sep 17 00:00:00 2001
From: Xunlei Pang <pang.xunlei@linaro.org>
Date: Thu, 9 Apr 2015 09:04:42 +0800
Subject: time: Remove read_boot_clock()

Now that we have a read_boot_clock64() function available on every
architecture, and converted all the users to it, it's time to remove
the (now unused) read_boot_clock() completely from the kernel.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
[jstultz: Minor commit message tweak suggested by Ingo]
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 9af5c1214682..3aa72e648650 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -267,7 +267,6 @@ extern int persistent_clock_is_local;
 
 extern void read_persistent_clock(struct timespec *ts);
 extern void read_persistent_clock64(struct timespec64 *ts);
-extern void read_boot_clock(struct timespec *ts);
 extern void read_boot_clock64(struct timespec64 *ts);
 extern int update_persistent_clock(struct timespec now);
 extern int update_persistent_clock64(struct timespec64 now);
-- 
cgit v1.2.3


From 25d238b2260973bfca0b82181824340c7aeae45a Mon Sep 17 00:00:00 2001
From: Rajeev Kumar <rajeevkumar.linux@gmail.com>
Date: Fri, 22 May 2015 09:58:30 -0700
Subject: Input: update email-id of Rajeev Kumar

rajeev-dlh.kumar@st.com email-id doesn't exist anymore as I have left the
company.

Signed-off-by: Rajeev Kumar <rajeevkumar.linux@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/platform_data/keyboard-spear.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/keyboard-spear.h b/include/linux/platform_data/keyboard-spear.h
index 9248e3a7e333..5e3ff653900c 100644
--- a/include/linux/platform_data/keyboard-spear.h
+++ b/include/linux/platform_data/keyboard-spear.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2010 ST Microelectronics
- * Rajeev Kumar<rajeev-dlh.kumar@st.com>
+ * Rajeev Kumar <rajeevkumar.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
-- 
cgit v1.2.3


From 9d16f207112f77711600fb0770182a06e056e5de Mon Sep 17 00:00:00 2001
From: Saravana Kannan <skannan@codeaurora.org>
Date: Mon, 18 May 2015 10:43:31 +0530
Subject: cpufreq: Track cpu managing sysfs kobjects separately

In order to prepare for the next few commits, that will stop migrating
sysfs files on cpu hotplug, this patch starts managing sysfs-cpu
separately.

The behavior is still the same as we are still migrating sysfs files on
hotplug, later commits would change that.

Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 48e37c07eb84..29ad97c34fd5 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -65,7 +65,9 @@ struct cpufreq_policy {
 
 	unsigned int		shared_type; /* ACPI: ANY or ALL affected CPUs
 						should set cpufreq */
-	unsigned int		cpu;    /* cpu nr of CPU managing this policy */
+	unsigned int		cpu;    /* cpu managing this policy, must be online */
+	unsigned int		kobj_cpu; /* cpu managing sysfs files, can be offline */
+
 	struct clk		*clk;
 	struct cpufreq_cpuinfo	cpuinfo;/* see above */
 
-- 
cgit v1.2.3


From 0824965140fff1bf640a987dc790d1594a8e0699 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 13 Apr 2015 16:23:36 +0200
Subject: PCI: Propagate the "ignore hotplug" setting to parent

Refine the mechanism introduced by commit f244d8b623da ("ACPIPHP / radeon /
nouveau: Fix VGA switcheroo problem related to hotplug") to propagate the
ignore_hotplug setting of the device to its parent bridge in case hotplug
notifications related to the graphics adapter switching are given for the
bridge rather than for the device itself (they need to be ignored in both
cases).

Link: https://bugzilla.kernel.org/show_bug.cgi?id=61891
Link: https://bugs.freedesktop.org/show_bug.cgi?id=88927
Fixes: b440bde74f04 ("PCI: Add pci_ignore_hotplug() to ignore hotplug events for a device")
Reported-and-tested-by: tiagdtd-lava <tiagdtd-lava@yahoo.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: stable@vger.kernel.org	# v3.17+
---
 include/linux/pci.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 353db8dc4c6e..ef45ffe9ca88 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1006,6 +1006,7 @@ int __must_check pci_assign_resource(struct pci_dev *dev, int i);
 int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align);
 int pci_select_bars(struct pci_dev *dev, unsigned long flags);
 bool pci_device_is_present(struct pci_dev *pdev);
+void pci_ignore_hotplug(struct pci_dev *dev);
 
 /* ROM control related routines */
 int pci_enable_rom(struct pci_dev *pdev);
@@ -1043,11 +1044,6 @@ bool pci_dev_run_wake(struct pci_dev *dev);
 bool pci_check_pme_status(struct pci_dev *dev);
 void pci_pme_wakeup_bus(struct pci_bus *bus);
 
-static inline void pci_ignore_hotplug(struct pci_dev *dev)
-{
-	dev->ignore_hotplug = 1;
-}
-
 static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state,
 				  bool enable)
 {
-- 
cgit v1.2.3


From edd4ab0559316a1efe0881a4e2ccaeb4fec73142 Mon Sep 17 00:00:00 2001
From: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Date: Sun, 24 May 2015 09:11:58 +0530
Subject: power: max17042_battery: add HEALTH and TEMP_* properties support

This patch adds the support for following battery properties
to max17042 fuel gauge driver.

POWER_SUPPLY_PROP_TEMP_ALERT_MIN
POWER_SUPPLY_PROP_TEMP_ALERT_MAX
POWER_SUPPLY_PROP_TEMP_MIN
POWER_SUPPLY_PROP_TEMP_MAX
POWER_SUPPLY_PROP_HEALTH

Signed-off-by: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski.k@gmail.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/power/max17042_battery.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h
index cf112b4075c8..522757ac9cd4 100644
--- a/include/linux/power/max17042_battery.h
+++ b/include/linux/power/max17042_battery.h
@@ -215,6 +215,10 @@ struct max17042_platform_data {
 	 * the datasheet although it can be changed by board designers.
 	 */
 	unsigned int r_sns;
+	int         vmin;	/* in millivolts */
+	int         vmax;	/* in millivolts */
+	int         temp_min;	/* in tenths of degree Celsius */
+	int         temp_max;	/* in tenths of degree Celsius */
 };
 
 #endif /* __MAX17042_BATTERY_H_ */
-- 
cgit v1.2.3


From 843735b788a4e49c453f4aefdae80e6dfbe9ee85 Mon Sep 17 00:00:00 2001
From: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Date: Mon, 4 May 2015 22:16:07 +0530
Subject: power: axp288_charger: axp288 charger driver

This patch adds new power supply charger driver support
for X-Power AXP288 PMIC integrated charger.

This driver interfaces with the axp20x mfd driver as a cell
and listens to extcon cable events for setting up charging.

Signed-off-by: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/mfd/axp20x.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index dfabd6db7ddf..f9030df5acd1 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -275,4 +275,11 @@ struct axp20x_fg_pdata {
 	int thermistor_curve[MAX_THERM_CURVE_SIZE][2];
 };
 
+struct axp20x_chrg_pdata {
+	int max_cc;
+	int max_cv;
+	int def_cc;
+	int def_cv;
+};
+
 #endif /* __LINUX_MFD_AXP20X_H */
-- 
cgit v1.2.3


From c93b76b34b4d8dbe8e3443eb27e49ac60034342b Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Thu, 7 May 2015 15:54:02 +0300
Subject: mei: bus: report also uuid in module alias

In order to automate modules matching add device uuid
which is reported in client enumeration, keep also
the name that is needed in for nfc distinguishing radio vendor

Report mei:name:uuid

Cc: linux-api@vger.kernel.org
Cc: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/mod_devicetable.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 3bfd56778c29..2d2b2b571d61 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -599,9 +599,22 @@ struct ipack_device_id {
 
 #define MEI_CL_MODULE_PREFIX "mei:"
 #define MEI_CL_NAME_SIZE 32
+#define MEI_CL_UUID_FMT "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x"
+#define MEI_CL_UUID_ARGS(_u) \
+	_u[0], _u[1], _u[2], _u[3], _u[4], _u[5], _u[6], _u[7], \
+	_u[8], _u[9], _u[10], _u[11], _u[12], _u[13], _u[14], _u[15]
 
+/**
+ * struct mei_cl_device_id - MEI client device identifier
+ * @name: helper name
+ * @uuid: client uuid
+ * @driver_info: information used by the driver.
+ *
+ * identifies mei client device by uuid and name
+ */
 struct mei_cl_device_id {
 	char name[MEI_CL_NAME_SIZE];
+	__u8 uuid[16];
 	kernel_ulong_t driver_info;
 };
 
-- 
cgit v1.2.3


From dbac993f6a6df24d5edc362667e524ba43543472 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Thu, 7 May 2015 15:54:07 +0300
Subject: mei: export mei client device struct to external use

Cc: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/mei_cl_bus.h | 38 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index 0819d36a3a74..a16b1f9c1aca 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -7,6 +7,42 @@
 
 struct mei_cl_device;
 
+typedef void (*mei_cl_event_cb_t)(struct mei_cl_device *device,
+			       u32 events, void *context);
+
+/**
+ * struct mei_cl_device - MEI device handle
+ * An mei_cl_device pointer is returned from mei_add_device()
+ * and links MEI bus clients to their actual ME host client pointer.
+ * Drivers for MEI devices will get an mei_cl_device pointer
+ * when being probed and shall use it for doing ME bus I/O.
+ *
+ * @dev: linux driver model device pointer
+ * @me_cl: me client
+ * @cl: mei client
+ * @name: device name
+ * @event_work: async work to execute event callback
+ * @event_cb: Drivers register this callback to get asynchronous ME
+ *	events (e.g. Rx buffer pending) notifications.
+ * @event_context: event callback run context
+ * @events: Events bitmask sent to the driver.
+ * @priv_data: client private data
+ */
+struct mei_cl_device {
+	struct device dev;
+
+	struct mei_me_client *me_cl;
+	struct mei_cl *cl;
+	char name[MEI_CL_NAME_SIZE];
+
+	struct work_struct event_work;
+	mei_cl_event_cb_t event_cb;
+	void *event_context;
+	unsigned long events;
+
+	void *priv_data;
+};
+
 struct mei_cl_driver {
 	struct device_driver driver;
 	const char *name;
@@ -28,8 +64,6 @@ void mei_cl_driver_unregister(struct mei_cl_driver *driver);
 ssize_t mei_cl_send(struct mei_cl_device *device, u8 *buf, size_t length);
 ssize_t  mei_cl_recv(struct mei_cl_device *device, u8 *buf, size_t length);
 
-typedef void (*mei_cl_event_cb_t)(struct mei_cl_device *device,
-			       u32 events, void *context);
 int mei_cl_register_event_cb(struct mei_cl_device *device,
 			  mei_cl_event_cb_t read_cb, void *context);
 
-- 
cgit v1.2.3


From 7df20f2d893db42eaa1ea1e30a2573c971ec9238 Mon Sep 17 00:00:00 2001
From: Sudeep Dutt <sudeep.dutt@intel.com>
Date: Wed, 29 Apr 2015 05:32:28 -0700
Subject: misc: mic: SCIF header file and IOCTL interface

This patch introduces the SCIF documentation in the header file
and describes the IOCTL interface for user mode. mic_overview.txt
is updated with documentation on SCIF and a new document
describing SCIF in more details is available in scif_overview.txt.

Reviewed-by: Nikhil Rao <nikhil.rao@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/scif.h | 993 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 993 insertions(+)
 create mode 100644 include/linux/scif.h

(limited to 'include/linux')

diff --git a/include/linux/scif.h b/include/linux/scif.h
new file mode 100644
index 000000000000..44f4f3898bbe
--- /dev/null
+++ b/include/linux/scif.h
@@ -0,0 +1,993 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef __SCIF_H__
+#define __SCIF_H__
+
+#include <linux/types.h>
+#include <linux/poll.h>
+#include <linux/scif_ioctl.h>
+
+#define SCIF_ACCEPT_SYNC	1
+#define SCIF_SEND_BLOCK		1
+#define SCIF_RECV_BLOCK		1
+
+enum {
+	SCIF_PROT_READ = (1 << 0),
+	SCIF_PROT_WRITE = (1 << 1)
+};
+
+enum {
+	SCIF_MAP_FIXED = 0x10,
+	SCIF_MAP_KERNEL	= 0x20,
+};
+
+enum {
+	SCIF_FENCE_INIT_SELF = (1 << 0),
+	SCIF_FENCE_INIT_PEER = (1 << 1),
+	SCIF_SIGNAL_LOCAL = (1 << 4),
+	SCIF_SIGNAL_REMOTE = (1 << 5)
+};
+
+enum {
+	SCIF_RMA_USECPU = (1 << 0),
+	SCIF_RMA_USECACHE = (1 << 1),
+	SCIF_RMA_SYNC = (1 << 2),
+	SCIF_RMA_ORDERED = (1 << 3)
+};
+
+/* End of SCIF Admin Reserved Ports */
+#define SCIF_ADMIN_PORT_END	1024
+
+/* End of SCIF Reserved Ports */
+#define SCIF_PORT_RSVD		1088
+
+typedef struct scif_endpt *scif_epd_t;
+
+#define SCIF_OPEN_FAILED ((scif_epd_t)-1)
+#define SCIF_REGISTER_FAILED ((off_t)-1)
+#define SCIF_MMAP_FAILED ((void *)-1)
+
+/**
+ * scif_open() - Create an endpoint
+ *
+ * Return:
+ * Upon successful completion, scif_open() returns an endpoint descriptor to
+ * be used in subsequent SCIF functions calls to refer to that endpoint;
+ * otherwise in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is
+ * returned and errno is set to indicate the error; in kernel mode a NULL
+ * scif_epd_t is returned.
+ *
+ * Errors:
+ * ENOMEM - Insufficient kernel memory was available
+ */
+scif_epd_t scif_open(void);
+
+/**
+ * scif_bind() - Bind an endpoint to a port
+ * @epd:	endpoint descriptor
+ * @pn:		port number
+ *
+ * scif_bind() binds endpoint epd to port pn, where pn is a port number on the
+ * local node. If pn is zero, a port number greater than or equal to
+ * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to
+ * exactly one local port. Ports less than 1024 when requested can only be bound
+ * by system (or root) processes or by processes executed by privileged users.
+ *
+ * Return:
+ * Upon successful completion, scif_bind() returns the port number to which epd
+ * is bound; otherwise in user mode -1 is returned and errno is set to
+ * indicate the error; in kernel mode the negative of one of the following
+ * errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * EINVAL - the endpoint or the port is already bound
+ * EISCONN - The endpoint is already connected
+ * ENOSPC - No port number available for assignment
+ * EACCES - The port requested is protected and the user is not the superuser
+ */
+int scif_bind(scif_epd_t epd, u16 pn);
+
+/**
+ * scif_listen() - Listen for connections on an endpoint
+ * @epd:	endpoint descriptor
+ * @backlog:	maximum pending connection requests
+ *
+ * scif_listen() marks the endpoint epd as a listening endpoint - that is, as
+ * an endpoint that will be used to accept incoming connection requests. Once
+ * so marked, the endpoint is said to be in the listening state and may not be
+ * used as the endpoint of a connection.
+ *
+ * The endpoint, epd, must have been bound to a port.
+ *
+ * The backlog argument defines the maximum length to which the queue of
+ * pending connections for epd may grow. If a connection request arrives when
+ * the queue is full, the client may receive an error with an indication that
+ * the connection was refused.
+ *
+ * Return:
+ * Upon successful completion, scif_listen() returns 0; otherwise in user mode
+ * -1 is returned and errno is set to indicate the error; in kernel mode the
+ * negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * EINVAL - the endpoint is not bound to a port
+ * EISCONN - The endpoint is already connected or listening
+ */
+int scif_listen(scif_epd_t epd, int backlog);
+
+/**
+ * scif_connect() - Initiate a connection on a port
+ * @epd:	endpoint descriptor
+ * @dst:	global id of port to which to connect
+ *
+ * The scif_connect() function requests the connection of endpoint epd to remote
+ * port dst. If the connection is successful, a peer endpoint, bound to dst, is
+ * created on node dst.node. On successful return, the connection is complete.
+ *
+ * If the endpoint epd has not already been bound to a port, scif_connect()
+ * will bind it to an unused local port.
+ *
+ * A connection is terminated when an endpoint of the connection is closed,
+ * either explicitly by scif_close(), or when a process that owns one of the
+ * endpoints of the connection is terminated.
+ *
+ * In user space, scif_connect() supports an asynchronous connection mode
+ * if the application has set the O_NONBLOCK flag on the endpoint via the
+ * fcntl() system call. Setting this flag will result in the calling process
+ * not to wait during scif_connect().
+ *
+ * Return:
+ * Upon successful completion, scif_connect() returns the port ID to which the
+ * endpoint, epd, is bound; otherwise in user mode -1 is returned and errno is
+ * set to indicate the error; in kernel mode the negative of one of the
+ * following errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNREFUSED - The destination was not listening for connections or refused
+ * the connection request
+ * EINVAL - dst.port is not a valid port ID
+ * EISCONN - The endpoint is already connected
+ * ENOMEM - No buffer space is available
+ * ENODEV - The destination node does not exist, or the node is lost or existed,
+ * but is not currently in the network since it may have crashed
+ * ENOSPC - No port number available for assignment
+ * EOPNOTSUPP - The endpoint is listening and cannot be connected
+ */
+int scif_connect(scif_epd_t epd, struct scif_port_id *dst);
+
+/**
+ * scif_accept() - Accept a connection on an endpoint
+ * @epd:	endpoint descriptor
+ * @peer:	global id of port to which connected
+ * @newepd:	new connected endpoint descriptor
+ * @flags:	flags
+ *
+ * The scif_accept() call extracts the first connection request from the queue
+ * of pending connections for the port on which epd is listening. scif_accept()
+ * creates a new endpoint, bound to the same port as epd, and allocates a new
+ * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new
+ * endpoint is connected to the endpoint through which the connection was
+ * requested. epd is unaffected by this call, and remains in the listening
+ * state.
+ *
+ * On successful return, peer holds the global port identifier (node id and
+ * local port number) of the port which requested the connection.
+ *
+ * A connection is terminated when an endpoint of the connection is closed,
+ * either explicitly by scif_close(), or when a process that owns one of the
+ * endpoints of the connection is terminated.
+ *
+ * The number of connections that can (subsequently) be accepted on epd is only
+ * limited by system resources (memory).
+ *
+ * The flags argument is formed by OR'ing together zero or more of the
+ * following values.
+ * SCIF_ACCEPT_SYNC - block until a connection request is presented. If
+ *			SCIF_ACCEPT_SYNC is not in flags, and no pending
+ *			connections are present on the queue, scif_accept()
+ *			fails with an EAGAIN error
+ *
+ * In user mode, the select() and poll() functions can be used to determine
+ * when there is a connection request. In kernel mode, the scif_poll()
+ * function may be used for this purpose. A readable event will be delivered
+ * when a connection is requested.
+ *
+ * Return:
+ * Upon successful completion, scif_accept() returns 0; otherwise in user mode
+ * -1 is returned and errno is set to indicate the error; in kernel mode the
+ *	negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EAGAIN - SCIF_ACCEPT_SYNC is not set and no connections are present to be
+ * accepted or SCIF_ACCEPT_SYNC is not set and remote node failed to complete
+ * its connection request
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * EINTR - Interrupted function
+ * EINVAL - epd is not a listening endpoint, or flags is invalid, or peer is
+ * NULL, or newepd is NULL
+ * ENODEV - The requesting node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOMEM - Not enough space
+ * ENOENT - Secondary part of epd registration failed
+ */
+int scif_accept(scif_epd_t epd, struct scif_port_id *peer, scif_epd_t
+		*newepd, int flags);
+
+/**
+ * scif_close() - Close an endpoint
+ * @epd:	endpoint descriptor
+ *
+ * scif_close() closes an endpoint and performs necessary teardown of
+ * facilities associated with that endpoint.
+ *
+ * If epd is a listening endpoint then it will no longer accept connection
+ * requests on the port to which it is bound. Any pending connection requests
+ * are rejected.
+ *
+ * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs
+ * which are in-process through epd or its peer endpoint will complete before
+ * scif_close() returns. Registered windows of the local and peer endpoints are
+ * released as if scif_unregister() was called against each window.
+ *
+ * Closing a SCIF endpoint does not affect local registered memory mapped by
+ * a SCIF endpoint on a remote node. The local memory remains mapped by the peer
+ * SCIF endpoint explicitly removed by calling munmap(..) by the peer.
+ *
+ * If the peer endpoint's receive queue is not empty at the time that epd is
+ * closed, then the peer endpoint can be passed as the endpoint parameter to
+ * scif_recv() until the receive queue is empty.
+ *
+ * epd is freed and may no longer be accessed.
+ *
+ * Return:
+ * Upon successful completion, scif_close() returns 0; otherwise in user mode
+ * -1 is returned and errno is set to indicate the error; in kernel mode the
+ * negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ */
+int scif_close(scif_epd_t epd);
+
+/**
+ * scif_send() - Send a message
+ * @epd:	endpoint descriptor
+ * @msg:	message buffer address
+ * @len:	message length
+ * @flags:	blocking mode flags
+ *
+ * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data
+ * are copied from memory starting at address msg. On successful execution the
+ * return value of scif_send() is the number of bytes that were sent, and is
+ * zero if no bytes were sent because len was zero. scif_send() may be called
+ * only when the endpoint is in a connected state.
+ *
+ * If a scif_send() call is non-blocking, then it sends only those bytes which
+ * can be sent without waiting, up to a maximum of len bytes.
+ *
+ * If a scif_send() call is blocking, then it normally returns after sending
+ * all len bytes. If a blocking call is interrupted or the connection is
+ * reset, the call is considered successful if some bytes were sent or len is
+ * zero, otherwise the call is considered unsuccessful.
+ *
+ * In user mode, the select() and poll() functions can be used to determine
+ * when the send queue is not full. In kernel mode, the scif_poll() function
+ * may be used for this purpose.
+ *
+ * It is recommended that scif_send()/scif_recv() only be used for short
+ * control-type message communication between SCIF endpoints. The SCIF RMA
+ * APIs are expected to provide better performance for transfer sizes of
+ * 1024 bytes or longer for the current MIC hardware and software
+ * implementation.
+ *
+ * scif_send() will block until the entire message is sent if SCIF_SEND_BLOCK
+ * is passed as the flags argument.
+ *
+ * Return:
+ * Upon successful completion, scif_send() returns the number of bytes sent;
+ * otherwise in user mode -1 is returned and errno is set to indicate the
+ * error; in kernel mode the negative of one of the following errors is
+ * returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EFAULT - An invalid address was specified for a parameter
+ * EINVAL - flags is invalid, or len is negative
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOMEM - Not enough space
+ * ENOTCONN - The endpoint is not connected
+ */
+int scif_send(scif_epd_t epd, void *msg, int len, int flags);
+
+/**
+ * scif_recv() - Receive a message
+ * @epd:	endpoint descriptor
+ * @msg:	message buffer address
+ * @len:	message buffer length
+ * @flags:	blocking mode flags
+ *
+ * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of
+ * data are copied to memory starting at address msg. On successful execution
+ * the return value of scif_recv() is the number of bytes that were received,
+ * and is zero if no bytes were received because len was zero. scif_recv() may
+ * be called only when the endpoint is in a connected state.
+ *
+ * If a scif_recv() call is non-blocking, then it receives only those bytes
+ * which can be received without waiting, up to a maximum of len bytes.
+ *
+ * If a scif_recv() call is blocking, then it normally returns after receiving
+ * all len bytes. If the blocking call was interrupted due to a disconnection,
+ * subsequent calls to scif_recv() will copy all bytes received upto the point
+ * of disconnection.
+ *
+ * In user mode, the select() and poll() functions can be used to determine
+ * when data is available to be received. In kernel mode, the scif_poll()
+ * function may be used for this purpose.
+ *
+ * It is recommended that scif_send()/scif_recv() only be used for short
+ * control-type message communication between SCIF endpoints. The SCIF RMA
+ * APIs are expected to provide better performance for transfer sizes of
+ * 1024 bytes or longer for the current MIC hardware and software
+ * implementation.
+ *
+ * scif_recv() will block until the entire message is received if
+ * SCIF_RECV_BLOCK is passed as the flags argument.
+ *
+ * Return:
+ * Upon successful completion, scif_recv() returns the number of bytes
+ * received; otherwise in user mode -1 is returned and errno is set to
+ * indicate the error; in kernel mode the negative of one of the following
+ * errors is returned.
+ *
+ * Errors:
+ * EAGAIN - The destination node is returning from a low power state
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EFAULT - An invalid address was specified for a parameter
+ * EINVAL - flags is invalid, or len is negative
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOMEM - Not enough space
+ * ENOTCONN - The endpoint is not connected
+ */
+int scif_recv(scif_epd_t epd, void *msg, int len, int flags);
+
+/**
+ * scif_register() - Mark a memory region for remote access.
+ * @epd:		endpoint descriptor
+ * @addr:		starting virtual address
+ * @len:		length of range
+ * @offset:		offset of window
+ * @prot_flags:		read/write protection flags
+ * @map_flags:		mapping flags
+ *
+ * The scif_register() function opens a window, a range of whole pages of the
+ * registered address space of the endpoint epd, starting at offset po and
+ * continuing for len bytes. The value of po, further described below, is a
+ * function of the parameters offset and len, and the value of map_flags. Each
+ * page of the window represents the physical memory page which backs the
+ * corresponding page of the range of virtual address pages starting at addr
+ * and continuing for len bytes. addr and len are constrained to be multiples
+ * of the page size. A successful scif_register() call returns po.
+ *
+ * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
+ * exactly, and offset is constrained to be a multiple of the page size. The
+ * mapping established by scif_register() will not replace any existing
+ * registration; an error is returned if any page within the range [offset,
+ * offset + len - 1] intersects an existing window.
+ *
+ * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
+ * implementation-defined manner to arrive at po. The po value so chosen will
+ * be an area of the registered address space that the implementation deems
+ * suitable for a mapping of len bytes. An offset value of 0 is interpreted as
+ * granting the implementation complete freedom in selecting po, subject to
+ * constraints described below. A non-zero value of offset is taken to be a
+ * suggestion of an offset near which the mapping should be placed. When the
+ * implementation selects a value for po, it does not replace any extant
+ * window. In all cases, po will be a multiple of the page size.
+ *
+ * The physical pages which are so represented by a window are available for
+ * access in calls to mmap(), scif_readfrom(), scif_writeto(),
+ * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
+ * physical pages represented by the window will not be reused by the memory
+ * subsystem for any other purpose. Note that the same physical page may be
+ * represented by multiple windows.
+ *
+ * Subsequent operations which change the memory pages to which virtual
+ * addresses are mapped (such as mmap(), munmap()) have no effect on
+ * existing window.
+ *
+ * If the process will fork(), it is recommended that the registered
+ * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
+ * problems due to copy-on-write semantics.
+ *
+ * The prot_flags argument is formed by OR'ing together one or more of the
+ * following values.
+ * SCIF_PROT_READ - allow read operations from the window
+ * SCIF_PROT_WRITE - allow write operations to the window
+ *
+ * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a
+ * fixed offset.
+ *
+ * Return:
+ * Upon successful completion, scif_register() returns the offset at which the
+ * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that
+ * is (off_t *)-1) is returned and errno is set to indicate the error; in
+ * kernel mode the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags, and pages in the range
+ * [offset, offset + len -1] are already registered
+ * EAGAIN - The mapping could not be performed due to lack of resources
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid
+ * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is
+ * set in flags, and offset is not a multiple of the page size, or addr is not a
+ * multiple of the page size, or len is not a multiple of the page size, or is
+ * 0, or offset is negative
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOMEM - Not enough space
+ * ENOTCONN -The endpoint is not connected
+ */
+off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
+		    int prot_flags, int map_flags);
+
+/**
+ * scif_unregister() - Mark a memory region for remote access.
+ * @epd:	endpoint descriptor
+ * @offset:	start of range to unregister
+ * @len:	length of range to unregister
+ *
+ * The scif_unregister() function closes those previously registered windows
+ * which are entirely within the range [offset, offset + len - 1]. It is an
+ * error to specify a range which intersects only a subrange of a window.
+ *
+ * On a successful return, pages within the window may no longer be specified
+ * in calls to mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(),
+ * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window,
+ * however, continues to exist until all previous references against it are
+ * removed. A window is referenced if there is a mapping to it created by
+ * mmap(), or if scif_get_pages() was called against the window
+ * (and the pages have not been returned via scif_put_pages()). A window is
+ * also referenced while an RMA, in which some range of the window is a source
+ * or destination, is in progress. Finally a window is referenced while some
+ * offset in that window was specified to scif_fence_signal(), and the RMAs
+ * marked by that call to scif_fence_signal() have not completed. While a
+ * window is in this state, its registered address space pages are not
+ * available for use in a new registered window.
+ *
+ * When all such references to the window have been removed, its references to
+ * all the physical pages which it represents are removed. Similarly, the
+ * registered address space pages of the window become available for
+ * registration in a new window.
+ *
+ * Return:
+ * Upon successful completion, scif_unregister() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned. In the event of an
+ * error, no windows are unregistered.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EINVAL - the range [offset, offset + len - 1] intersects a subrange of a
+ * window, or offset is negative
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid for the
+ * registered address space of epd
+ */
+int scif_unregister(scif_epd_t epd, off_t offset, size_t len);
+
+/**
+ * scif_readfrom() - Copy from a remote address space
+ * @epd:	endpoint descriptor
+ * @loffset:	offset in local registered address space to
+ *		which to copy
+ * @len:	length of range to copy
+ * @roffset:	offset in remote registered address space
+ *		from which to copy
+ * @rma_flags:	transfer mode flags
+ *
+ * scif_readfrom() copies len bytes from the remote registered address space of
+ * the peer of endpoint epd, starting at the offset roffset to the local
+ * registered address space of epd, starting at the offset loffset.
+ *
+ * Each of the specified ranges [loffset, loffset + len - 1] and [roffset,
+ * roffset + len - 1] must be within some registered window or windows of the
+ * local and remote nodes. A range may intersect multiple registered windows,
+ * but only if those windows are contiguous in the registered address space.
+ *
+ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
+ * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
+ * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the
+ * transfer is complete. Otherwise, the transfer may be performed asynchron-
+ * ously. The order in which any two asynchronous RMA operations complete
+ * is non-deterministic. The synchronization functions, scif_fence_mark()/
+ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
+ * the completion of asynchronous RMA operations on the same endpoint.
+ *
+ * The DMA transfer of individual bytes is not guaranteed to complete in
+ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
+ * cacheline or partial cacheline of the source range will become visible on
+ * the destination node after all other transferred data in the source
+ * range has become visible on the destination node.
+ *
+ * The optimal DMA performance will likely be realized if both
+ * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
+ * performance will likely be realized if loffset and roffset are not
+ * cacheline aligned but are separated by some multiple of 64. The lowest level
+ * of performance is likely if loffset and roffset are not separated by a
+ * multiple of 64.
+ *
+ * The rma_flags argument is formed by ORing together zero or more of the
+ * following values.
+ * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
+ *	engine.
+ * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
+ *		transfer has completed. Passing this flag results in the
+ *		current implementation busy waiting and consuming CPU cycles
+ *		while the DMA transfer is in progress for best performance by
+ *		avoiding the interrupt latency.
+ * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
+ *		the source range becomes visible on the destination node
+ *		after all other transferred data in the source range has
+ *		become visible on the destination
+ *
+ * Return:
+ * Upon successful completion, scif_readfrom() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EACCESS - Attempt to write to a read-only range
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EINVAL - rma_flags is invalid
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered
+ * address space of epd, or, The range [roffset, roffset + len - 1] is invalid
+ * for the registered address space of the peer of epd, or loffset or roffset
+ * is negative
+ */
+int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t
+		  roffset, int rma_flags);
+
+/**
+ * scif_writeto() - Copy to a remote address space
+ * @epd:	endpoint descriptor
+ * @loffset:	offset in local registered address space
+ *		from which to copy
+ * @len:	length of range to copy
+ * @roffset:	offset in remote registered address space to
+ *		which to copy
+ * @rma_flags:	transfer mode flags
+ *
+ * scif_writeto() copies len bytes from the local registered address space of
+ * epd, starting at the offset loffset to the remote registered address space
+ * of the peer of endpoint epd, starting at the offset roffset.
+ *
+ * Each of the specified ranges [loffset, loffset + len - 1] and [roffset,
+ * roffset + len - 1] must be within some registered window or windows of the
+ * local and remote nodes. A range may intersect multiple registered windows,
+ * but only if those windows are contiguous in the registered address space.
+ *
+ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
+ * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
+ * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the
+ * transfer is complete. Otherwise, the transfer may be performed asynchron-
+ * ously. The order in which any two asynchronous RMA operations complete
+ * is non-deterministic. The synchronization functions, scif_fence_mark()/
+ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
+ * the completion of asynchronous RMA operations on the same endpoint.
+ *
+ * The DMA transfer of individual bytes is not guaranteed to complete in
+ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
+ * cacheline or partial cacheline of the source range will become visible on
+ * the destination node after all other transferred data in the source
+ * range has become visible on the destination node.
+ *
+ * The optimal DMA performance will likely be realized if both
+ * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
+ * performance will likely be realized if loffset and roffset are not cacheline
+ * aligned but are separated by some multiple of 64. The lowest level of
+ * performance is likely if loffset and roffset are not separated by a multiple
+ * of 64.
+ *
+ * The rma_flags argument is formed by ORing together zero or more of the
+ * following values.
+ * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
+ *			engine.
+ * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
+ *		transfer has completed. Passing this flag results in the
+ *		current implementation busy waiting and consuming CPU cycles
+ *		while the DMA transfer is in progress for best performance by
+ *		avoiding the interrupt latency.
+ * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
+ *		the source range becomes visible on the destination node
+ *		after all other transferred data in the source range has
+ *		become visible on the destination
+ *
+ * Return:
+ * Upon successful completion, scif_readfrom() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EACCESS - Attempt to write to a read-only range
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EINVAL - rma_flags is invalid
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered
+ * address space of epd, or, The range [roffset , roffset + len -1] is invalid
+ * for the registered address space of the peer of epd, or loffset or roffset
+ * is negative
+ */
+int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t
+		 roffset, int rma_flags);
+
+/**
+ * scif_vreadfrom() - Copy from a remote address space
+ * @epd:	endpoint descriptor
+ * @addr:	address to which to copy
+ * @len:	length of range to copy
+ * @roffset:	offset in remote registered address space
+ *		from which to copy
+ * @rma_flags:	transfer mode flags
+ *
+ * scif_vreadfrom() copies len bytes from the remote registered address
+ * space of the peer of endpoint epd, starting at the offset roffset, to local
+ * memory, starting at addr.
+ *
+ * The specified range [roffset, roffset + len - 1] must be within some
+ * registered window or windows of the remote nodes. The range may
+ * intersect multiple registered windows, but only if those windows are
+ * contiguous in the registered address space.
+ *
+ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
+ * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
+ * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the
+ * transfer is complete. Otherwise, the transfer may be performed asynchron-
+ * ously. The order in which any two asynchronous RMA operations complete
+ * is non-deterministic. The synchronization functions, scif_fence_mark()/
+ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
+ * the completion of asynchronous RMA operations on the same endpoint.
+ *
+ * The DMA transfer of individual bytes is not guaranteed to complete in
+ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
+ * cacheline or partial cacheline of the source range will become visible on
+ * the destination node after all other transferred data in the source
+ * range has become visible on the destination node.
+ *
+ * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
+ * the specified local memory range may be remain in a pinned state even after
+ * the specified transfer completes. This may reduce overhead if some or all of
+ * the same virtual address range is referenced in a subsequent call of
+ * scif_vreadfrom() or scif_vwriteto().
+ *
+ * The optimal DMA performance will likely be realized if both
+ * addr and roffset are cacheline aligned (are a multiple of 64). Lower
+ * performance will likely be realized if addr and roffset are not
+ * cacheline aligned but are separated by some multiple of 64. The lowest level
+ * of performance is likely if addr and roffset are not separated by a
+ * multiple of 64.
+ *
+ * The rma_flags argument is formed by ORing together zero or more of the
+ * following values.
+ * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
+ *	engine.
+ * SCIF_RMA_USECACHE - enable registration caching
+ * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
+ *		transfer has completed. Passing this flag results in the
+ *		current implementation busy waiting and consuming CPU cycles
+ *		while the DMA transfer is in progress for best performance by
+ *		avoiding the interrupt latency.
+ * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
+ *	the source range becomes visible on the destination node
+ *	after all other transferred data in the source range has
+ *	become visible on the destination
+ *
+ * Return:
+ * Upon successful completion, scif_vreadfrom() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EACCESS - Attempt to write to a read-only range
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid
+ * EINVAL - rma_flags is invalid
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the
+ * registered address space of epd
+ */
+int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset,
+		   int rma_flags);
+
+/**
+ * scif_vwriteto() - Copy to a remote address space
+ * @epd:	endpoint descriptor
+ * @addr:	address from which to copy
+ * @len:	length of range to copy
+ * @roffset:	offset in remote registered address space to
+ *		which to copy
+ * @rma_flags:	transfer mode flags
+ *
+ * scif_vwriteto() copies len bytes from the local memory, starting at addr, to
+ * the remote registered address space of the peer of endpoint epd, starting at
+ * the offset roffset.
+ *
+ * The specified range [roffset, roffset + len - 1] must be within some
+ * registered window or windows of the remote nodes. The range may intersect
+ * multiple registered windows, but only if those windows are contiguous in the
+ * registered address space.
+ *
+ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
+ * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
+ * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the
+ * transfer is complete. Otherwise, the transfer may be performed asynchron-
+ * ously. The order in which any two asynchronous RMA operations complete
+ * is non-deterministic. The synchronization functions, scif_fence_mark()/
+ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
+ * the completion of asynchronous RMA operations on the same endpoint.
+ *
+ * The DMA transfer of individual bytes is not guaranteed to complete in
+ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
+ * cacheline or partial cacheline of the source range will become visible on
+ * the destination node after all other transferred data in the source
+ * range has become visible on the destination node.
+ *
+ * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
+ * the specified local memory range may be remain in a pinned state even after
+ * the specified transfer completes. This may reduce overhead if some or all of
+ * the same virtual address range is referenced in a subsequent call of
+ * scif_vreadfrom() or scif_vwriteto().
+ *
+ * The optimal DMA performance will likely be realized if both
+ * addr and offset are cacheline aligned (are a multiple of 64). Lower
+ * performance will likely be realized if addr and offset are not cacheline
+ * aligned but are separated by some multiple of 64. The lowest level of
+ * performance is likely if addr and offset are not separated by a multiple of
+ * 64.
+ *
+ * The rma_flags argument is formed by ORing together zero or more of the
+ * following values.
+ * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
+ *	engine.
+ * SCIF_RMA_USECACHE - allow registration caching
+ * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
+ *		transfer has completed. Passing this flag results in the
+ *		current implementation busy waiting and consuming CPU cycles
+ *		while the DMA transfer is in progress for best performance by
+ *		avoiding the interrupt latency.
+ * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
+ *		the source range becomes visible on the destination node
+ *		after all other transferred data in the source range has
+ *		become visible on the destination
+ *
+ * Return:
+ * Upon successful completion, scif_vwriteto() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EACCESS - Attempt to write to a read-only range
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid
+ * EINVAL - rma_flags is invalid
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the
+ * registered address space of epd
+ */
+int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t roffset,
+		  int rma_flags);
+
+/**
+ * scif_fence_mark() - Mark previously issued RMAs
+ * @epd:	endpoint descriptor
+ * @flags:	control flags
+ * @mark:	marked value returned as output.
+ *
+ * scif_fence_mark() returns after marking the current set of all uncompleted
+ * RMAs initiated through the endpoint epd or the current set of all
+ * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are
+ * marked with a value returned at mark. The application may subsequently call
+ * scif_fence_wait(), passing the value returned at mark, to await completion
+ * of all RMAs so marked.
+ *
+ * The flags argument has exactly one of the following values.
+ * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint
+ *	epd are marked
+ * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer
+ *	of endpoint epd are marked
+ *
+ * Return:
+ * Upon successful completion, scif_fence_mark() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EINVAL - flags is invalid
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENOMEM - Insufficient kernel memory was available
+ */
+int scif_fence_mark(scif_epd_t epd, int flags, int *mark);
+
+/**
+ * scif_fence_wait() - Wait for completion of marked RMAs
+ * @epd:	endpoint descriptor
+ * @mark:	mark request
+ *
+ * scif_fence_wait() returns after all RMAs marked with mark have completed.
+ * The value passed in mark must have been obtained in a previous call to
+ * scif_fence_mark().
+ *
+ * Return:
+ * Upon successful completion, scif_fence_wait() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENOMEM - Insufficient kernel memory was available
+ */
+int scif_fence_wait(scif_epd_t epd, int mark);
+
+/**
+ * scif_fence_signal() - Request a memory update on completion of RMAs
+ * @epd:	endpoint descriptor
+ * @loff:	local offset
+ * @lval:	local value to write to loffset
+ * @roff:	remote offset
+ * @rval:	remote value to write to roffset
+ * @flags:	flags
+ *
+ * scif_fence_signal() returns after marking the current set of all uncompleted
+ * RMAs initiated through the endpoint epd or marking the current set of all
+ * uncompleted RMAs initiated through the peer of endpoint epd.
+ *
+ * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the
+ * marked set, lval is written to memory at the address corresponding to offset
+ * loff in the local registered address space of epd. loff must be within a
+ * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion
+ * of the RMAs in the marked set, rval is written to memory at the address
+ * corresponding to offset roff in the remote registered address space of epd.
+ * roff must be within a remote registered window of the peer of epd. Note
+ * that any specified offset must be DWORD (4 byte / 32 bit) aligned.
+ *
+ * The flags argument is formed by OR'ing together the following.
+ * Exactly one of the following values.
+ * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint
+ *	epd are marked
+ * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer
+ *	of endpoint epd are marked
+ * One or more of the following values.
+ * SCIF_SIGNAL_LOCAL - On completion of the marked set of RMAs, write lval to
+ *	memory at the address corresponding to offset loff in the local
+ *	registered address space of epd.
+ * SCIF_SIGNAL_REMOTE - On completion of the marked set of RMAs, write rval to
+ *	memory at the address corresponding to offset roff in the remote
+ *	registered address space of epd.
+ *
+ * Return:
+ * Upon successful completion, scif_fence_signal() returns 0; otherwise in
+ * user mode -1 is returned and errno is set to indicate the error; in kernel
+ * mode the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EINVAL - flags is invalid, or loff or roff are not DWORD aligned
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENXIO - loff is invalid for the registered address of epd, or roff is invalid
+ * for the registered address space, of the peer of epd
+ */
+int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff,
+		      u64 rval, int flags);
+
+/**
+ * scif_get_node_ids() - Return information about online nodes
+ * @nodes:	array in which to return online node IDs
+ * @len:	number of entries in the nodes array
+ * @self:	address to place the node ID of the local node
+ *
+ * scif_get_node_ids() fills in the nodes array with up to len node IDs of the
+ * nodes in the SCIF network. If there is not enough space in nodes, as
+ * indicated by the len parameter, only len node IDs are returned in nodes. The
+ * return value of scif_get_node_ids() is the total number of nodes currently in
+ * the SCIF network. By checking the return value against the len parameter,
+ * the user may determine if enough space for nodes was allocated.
+ *
+ * The node ID of the local node is returned at self.
+ *
+ * Return:
+ * Upon successful completion, scif_get_node_ids() returns the actual number of
+ * online nodes in the SCIF network including 'self'; otherwise in user mode
+ * -1 is returned and errno is set to indicate the error; in kernel mode no
+ * errors are returned.
+ *
+ * Errors:
+ * EFAULT - Bad address
+ */
+int scif_get_node_ids(u16 *nodes, int len, u16 *self);
+
+#endif /* __SCIF_H__ */
-- 
cgit v1.2.3


From 3647a83d9dcf00b8e17777ec8aa1e48f1ed4fe06 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Sat, 11 Apr 2015 18:07:39 -0700
Subject: Drivers: hv: util: move kvp/vss function declarations to
 hyperv_vmbus.h

These declarations are internal to hv_util module and hv_fcopy_* declarations
already reside there.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Tested-by: Alex Ng <alexng@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 902c37aef67e..1744148a39f9 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1236,13 +1236,6 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *,
 					struct icmsg_negotiate *, u8 *, int,
 					int);
 
-int hv_kvp_init(struct hv_util_service *);
-void hv_kvp_deinit(void);
-void hv_kvp_onchannelcallback(void *);
-
-int hv_vss_init(struct hv_util_service *);
-void hv_vss_deinit(void);
-void hv_vss_onchannelcallback(void *);
 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid);
 
 extern struct resource hyperv_mmio;
-- 
cgit v1.2.3


From db9ba2088f6507fee370904f02db1eb9b49bd088 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Wed, 22 Apr 2015 21:31:31 -0700
Subject: drivers: hv: vmbus: Get rid of some unused definitions

Get rid of some unused definitions.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 19 -------------------
 1 file changed, 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 1744148a39f9..e29ccddc6300 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -389,10 +389,6 @@ enum vmbus_channel_message_type {
 	CHANNELMSG_INITIATE_CONTACT		= 14,
 	CHANNELMSG_VERSION_RESPONSE		= 15,
 	CHANNELMSG_UNLOAD			= 16,
-#ifdef VMBUS_FEATURE_PARENT_OR_PEER_MEMORY_MAPPED_INTO_A_CHILD
-	CHANNELMSG_VIEWRANGE_ADD		= 17,
-	CHANNELMSG_VIEWRANGE_REMOVE		= 18,
-#endif
 	CHANNELMSG_COUNT
 };
 
@@ -549,21 +545,6 @@ struct vmbus_channel_gpadl_torndown {
 	u32 gpadl;
 } __packed;
 
-#ifdef VMBUS_FEATURE_PARENT_OR_PEER_MEMORY_MAPPED_INTO_A_CHILD
-struct vmbus_channel_view_range_add {
-	struct vmbus_channel_message_header header;
-	PHYSICAL_ADDRESS viewrange_base;
-	u64 viewrange_length;
-	u32 child_relid;
-} __packed;
-
-struct vmbus_channel_view_range_remove {
-	struct vmbus_channel_message_header header;
-	PHYSICAL_ADDRESS viewrange_base;
-	u32 child_relid;
-} __packed;
-#endif
-
 struct vmbus_channel_relid_released {
 	struct vmbus_channel_message_header header;
 	u32 child_relid;
-- 
cgit v1.2.3


From 2db84eff127e3f4b3635edc589cd6a56db8755a3 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Wed, 22 Apr 2015 21:31:32 -0700
Subject: Drivers: hv: vmbus: Implement the protocol for tearing down vmbus
 state

Implement the protocol for tearing down the monitor state established with
the host.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Tested-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index e29ccddc6300..ea934864293d 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -389,6 +389,7 @@ enum vmbus_channel_message_type {
 	CHANNELMSG_INITIATE_CONTACT		= 14,
 	CHANNELMSG_VERSION_RESPONSE		= 15,
 	CHANNELMSG_UNLOAD			= 16,
+	CHANNELMSG_UNLOAD_RESPONSE		= 17,
 	CHANNELMSG_COUNT
 };
 
-- 
cgit v1.2.3


From fea844a2b0edd6540d5cde2cd54a8a3c86e9c53f Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <[mailto:vkuznets@redhat.com]>
Date: Wed, 6 May 2015 17:47:43 -0700
Subject: Drivers: hv: vmbus: briefly comment num_sc and next_oc

next_oc and num_sc fields of struct vmbus_channel deserve a description. Move
them closer to sc_list as these fields are related to it.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index ea934864293d..3932a993ff5a 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -726,6 +726,15 @@ struct vmbus_channel {
 	 * All Sub-channels of a primary channel are linked here.
 	 */
 	struct list_head sc_list;
+	/*
+	 * Current number of sub-channels.
+	 */
+	int num_sc;
+	/*
+	 * Number of a sub-channel (position within sc_list) which is supposed
+	 * to be used as the next outgoing channel.
+	 */
+	int next_oc;
 	/*
 	 * The primary channel this sub-channel belongs to.
 	 * This will be NULL for the primary channel.
@@ -740,9 +749,6 @@ struct vmbus_channel {
 	 * link up channels based on their CPU affinity.
 	 */
 	struct list_head percpu_list;
-
-	int num_sc;
-	int next_oc;
 };
 
 static inline void set_channel_read_state(struct vmbus_channel *c, bool state)
-- 
cgit v1.2.3


From 80c6e1465948c2e91214f01764f427d31ebedb26 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 21 May 2015 15:49:37 -0700
Subject: driver-core: fix build for !CONFIG_MODULES

Commit f2411da74698 ("driver-core: add driver module asynchronous probe
support") broke build in case modules are disabled, because in this case
"struct module" is not defined and we can't dereference it. Let's define
module_requested_async_probing() helper and stub it out if modules are
disabled.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/module.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index f46a47d3c0dc..57f5c0a804c0 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -510,6 +510,11 @@ int unregister_module_notifier(struct notifier_block *nb);
 
 extern void print_modules(void);
 
+static inline bool module_requested_async_probing(struct module *module)
+{
+	return module && module->async_probe_requested;
+}
+
 #else /* !CONFIG_MODULES... */
 
 /* Given an address, look for it in the exception tables. */
@@ -620,6 +625,12 @@ static inline int unregister_module_notifier(struct notifier_block *nb)
 static inline void print_modules(void)
 {
 }
+
+static inline bool module_requested_async_probing(struct module *module)
+{
+	return false;
+}
+
 #endif /* CONFIG_MODULES */
 
 #ifdef CONFIG_SYSFS
-- 
cgit v1.2.3


From 2539b258ec028351af954c169ea1b0ff72023a9f Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 8 May 2015 14:45:34 +0100
Subject: drivers/base: cacheinfo: fix annoying typo when DT nodes are absent

s/hierarcy/hierarchy/

Maybe the typo will annoy people enough so that they add the missing
nodes to their device-tree files, but I still think this is better off
fixed.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/cacheinfo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 3daf5ed392c9..2189935075b4 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -19,7 +19,7 @@ enum cache_type {
 /**
  * struct cacheinfo - represent a cache leaf node
  * @type: type of the cache - data, inst or unified
- * @level: represents the hierarcy in the multi-level cache
+ * @level: represents the hierarchy in the multi-level cache
  * @coherency_line_size: size of each cache line usually representing
  *	the minimum amount of data that gets transferred from memory
  * @number_of_sets: total number of sets, a set is a collection of cache
-- 
cgit v1.2.3


From f4445f8b204de44a8baa4326b0e56537be867427 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Thu, 14 May 2015 15:28:24 +0100
Subject: drivers: of/base: move of_init to driver_init

Commit 5590f3196b29 ("drivers/core/of: Add symlink to device-tree from
devices with an OF node") adds the symlink `of_node` for each device
pointing to it's device tree node while creating/initialising it.

However the devicetree sysfs is created and setup in of_init which is
executed at core_initcall level. For all the devices created before
of_init, the following error is thrown:
	"Error -2(-ENOENT) creating of_node link"

Like many other components in driver model, initialize the sysfs support
for OF/devicetree from driver_init so that it's ready before any devices
are created.

Fixes: 5590f3196b29 ("drivers/core/of: Add symlink to device-tree from
	devices with an OF node")
Suggested-by: Rob Herring <robh+dt@kernel.org>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Pawel Moll <pawel.moll@arm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Robert Schwebel <r.schwebel@pengutronix.de>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/of.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index ddeaae6d2083..b871ff9d81d7 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -121,6 +121,8 @@ extern struct device_node *of_stdout;
 extern raw_spinlock_t devtree_lock;
 
 #ifdef CONFIG_OF
+void of_core_init(void);
+
 static inline bool is_of_node(struct fwnode_handle *fwnode)
 {
 	return fwnode && fwnode->type == FWNODE_OF;
@@ -376,6 +378,10 @@ bool of_console_check(struct device_node *dn, char *name, int index);
 
 #else /* CONFIG_OF */
 
+static inline void of_core_init(void)
+{
+}
+
 static inline bool is_of_node(struct fwnode_handle *fwnode)
 {
 	return false;
-- 
cgit v1.2.3


From be12a1fe298e8be04d5215364f94654dff81b0bc Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Thu, 21 May 2015 16:59:58 +0200
Subject: net: skbuff: add skb_append_pagefrags and use it

Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b617095adb88..f708936cdd23 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -861,6 +861,9 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
 					int len, int odd, struct sk_buff *skb),
 			    void *from, int length);
 
+int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
+			 int offset, size_t size);
+
 struct skb_seq_state {
 	__u32		lower_offset;
 	__u32		upper_offset;
-- 
cgit v1.2.3


From a60e3cc7c92973a31fad0fd04dc5cf4355d3d1ef Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Thu, 21 May 2015 17:00:00 +0200
Subject: net: make skb_splice_bits more configureable

Prepare skb_splice_bits to be able to deal with AF_UNIX sockets.

AF_UNIX sockets don't use lock_sock/release_sock and thus we have to
use a callback to make the locking and unlocking configureable.

Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f708936cdd23..6b41c15efa27 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -35,6 +35,7 @@
 #include <linux/netdev_features.h>
 #include <linux/sched.h>
 #include <net/flow_dissector.h>
+#include <linux/splice.h>
 
 /* A. Checksumming of received packets by device.
  *
@@ -2699,9 +2700,15 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
 int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
 __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
 			      int len, __wsum csum);
-int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
+ssize_t skb_socket_splice(struct sock *sk,
+			  struct pipe_inode_info *pipe,
+			  struct splice_pipe_desc *spd);
+int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
 		    struct pipe_inode_info *pipe, unsigned int len,
-		    unsigned int flags);
+		    unsigned int flags,
+		    ssize_t (*splice_cb)(struct sock *,
+					 struct pipe_inode_info *,
+					 struct splice_pipe_desc *));
 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
 unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
 int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
-- 
cgit v1.2.3


From a57e16cf03339c20b09642f46f60190069ff70c7 Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Mon, 25 May 2015 23:29:20 +0200
Subject: dmaengine: pxa: add pxa dmaengine driver

This is a new driver for pxa SoCs, which is also compatible with the former
mmp_pdma.

The rationale behind a new driver (as opposed to incremental patching) was :

 - the new driver relies on virt-dma, which obsoletes all the internal
   structures of mmp_pdma (sw_desc, hw_desc, ...), and by consequence all the
   functions

 - mmp_pdma allocates dma coherent descriptors containing not only hardware
   descriptors but linked list information
   The new driver only puts the dma hardware descriptors (ie. 4 u32) into the
   dma pool allocated memory. This changes completely the way descriptors are
   handled

 - the architecture behind the interrupt/tasklet management was rewritten to be
   more conforming to virt-dma

 - the buffers alignment is handled differently
   The former driver assumed that the DMA channel stopped between each
   descriptor. The new one chains descriptors to let the channel running. This
   is a necessary guarantee for real-time high bandwidth usecases such as video
   capture on "old" architectures such as pxa.

 - hot chaining / cold chaining / no chaining
   Whenever possible, submitting a descriptor "hot chains" it to a running
   channel. There is still no guarantee that the descriptor will be issued, as
   the channel might be stopped just before the descriptor is submitted. Yet
   this allows to submit several video buffers, and resubmit a buffer while
   another is under handling.
   As before, dma_async_issue_pending() is the only guarantee to have all the
   buffers issued.
   When an alignment issue is detected (ie. one address in a descriptor is not
   a multiple of 8), if the already running channel is in "aligned mode", the
   channel will stop, and restarted in "misaligned mode" to finished the issued
   list.

 - descriptors reusing
   A submitted, issued and completed descriptor can be reused, ie resubmitted if
   it was prepared with the proper flag (DMA_PREP_ACK).  Only a channel
   resources release will in this case release that buffer.
   This allows a rolling ring of buffers to be reused, where there are several
   thousands of hardware descriptors used (video buffer for example).

Additionally, a set of more casual features is introduced :
 - debugging traces
 - lockless way to know if a descriptor is terminated or not

The driver was tested on zylonite board (pxa3xx) and mioa701 (pxa27x),
with dmatest, pxa_camera and pxamci.

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dma/pxa-dma.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 include/linux/dma/pxa-dma.h

(limited to 'include/linux')

diff --git a/include/linux/dma/pxa-dma.h b/include/linux/dma/pxa-dma.h
new file mode 100644
index 000000000000..3edc99294bf6
--- /dev/null
+++ b/include/linux/dma/pxa-dma.h
@@ -0,0 +1,27 @@
+#ifndef _PXA_DMA_H_
+#define _PXA_DMA_H_
+
+enum pxad_chan_prio {
+	PXAD_PRIO_HIGHEST = 0,
+	PXAD_PRIO_NORMAL,
+	PXAD_PRIO_LOW,
+	PXAD_PRIO_LOWEST,
+};
+
+struct pxad_param {
+	unsigned int drcmr;
+	enum pxad_chan_prio prio;
+};
+
+struct dma_chan;
+
+#ifdef CONFIG_PXA_DMA
+bool pxad_filter_fn(struct dma_chan *chan, void *param);
+#else
+static inline bool pxad_filter_fn(struct dma_chan *chan, void *param)
+{
+	return false;
+}
+#endif
+
+#endif /* _PXA_DMA_H_ */
-- 
cgit v1.2.3


From 09170a49422bd786be3eac5cec1955257c5a34b7 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 18 May 2015 13:59:39 +0200
Subject: KVM: const-ify uses of struct kvm_userspace_memory_region

Architecture-specific helpers are not supposed to muck with
struct kvm_userspace_memory_region contents.  Add const to
enforce this.

In order to eliminate the only write in __kvm_set_memory_region,
the cleaning of deleted slots is pulled up from update_memslots
to __kvm_set_memory_region.

Reviewed-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Reviewed-by: Radim Krcmar <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 87fd74a04005..fbced7015ebd 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -501,9 +501,9 @@ enum kvm_mr_change {
 };
 
 int kvm_set_memory_region(struct kvm *kvm,
-			  struct kvm_userspace_memory_region *mem);
+			  const struct kvm_userspace_memory_region *mem);
 int __kvm_set_memory_region(struct kvm *kvm,
-			    struct kvm_userspace_memory_region *mem);
+			    const struct kvm_userspace_memory_region *mem);
 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont);
 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
@@ -511,10 +511,10 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 void kvm_arch_memslots_updated(struct kvm *kvm);
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
-				struct kvm_userspace_memory_region *mem,
+				const struct kvm_userspace_memory_region *mem,
 				enum kvm_mr_change change);
 void kvm_arch_commit_memory_region(struct kvm *kvm,
-				struct kvm_userspace_memory_region *mem,
+				const struct kvm_userspace_memory_region *mem,
 				const struct kvm_memory_slot *old,
 				enum kvm_mr_change change);
 bool kvm_largepages_enabled(void);
-- 
cgit v1.2.3


From 15f46015ee17681b542432df21747f5c51857156 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sun, 17 May 2015 21:26:08 +0200
Subject: KVM: add memslots argument to kvm_arch_memslots_updated

Prepare for the case of multiple address spaces.

Reviewed-by: Radim Krcmar <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h  | 2 +-
 include/linux/kvm_types.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index fbced7015ebd..8815f1dffb77 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -508,7 +508,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont);
 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			    unsigned long npages);
-void kvm_arch_memslots_updated(struct kvm *kvm);
+void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots);
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
 				const struct kvm_userspace_memory_region *mem,
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 931da7e917cf..1b47a185c2f0 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -28,6 +28,7 @@ struct kvm_run;
 struct kvm_userspace_memory_region;
 struct kvm_vcpu;
 struct kvm_vcpu_init;
+struct kvm_memslots;
 
 enum kvm_mr_change;
 
-- 
cgit v1.2.3


From cacce073bfd2b4091fbc58e906e7a9a21f538ff6 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Thu, 14 May 2015 23:05:49 +0200
Subject: bcma: add module_bcma_driver()

This makes it possible to save some lines of code in drivers with an
simple bcma driver registration.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 include/linux/bcma/bcma.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index e34f906647d3..2ff4a9961e1d 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -305,6 +305,15 @@ int __bcma_driver_register(struct bcma_driver *drv, struct module *owner);
 
 extern void bcma_driver_unregister(struct bcma_driver *drv);
 
+/* module_bcma_driver() - Helper macro for drivers that don't do
+ * anything special in module init/exit.  This eliminates a lot of
+ * boilerplate.  Each module may only use this macro once, and
+ * calling it replaces module_init() and module_exit()
+ */
+#define module_bcma_driver(__bcma_driver) \
+	module_driver(__bcma_driver, bcma_driver_register, \
+			bcma_driver_unregister)
+
 /* Set a fallback SPROM.
  * See kdoc at the function definition for complete documentation. */
 extern int bcma_arch_register_fallback_sprom(
-- 
cgit v1.2.3


From 069d4a7b583274e3fd8712c92a035626e0ebf7be Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 3 Mar 2015 11:58:14 +0100
Subject: netfilter: ebtables: fix comment grammar

s/stongly inspired on/strongly inspired by/

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/netfilter_bridge/ebtables.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 34e7a2b7f867..9ac6f263956b 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -6,7 +6,7 @@
  *
  *  ebtables.c,v 2.0, April, 2002
  *
- *  This code is stongly inspired on the iptables code which is
+ *  This code is strongly inspired by the iptables code which is
  *  Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
  */
 #ifndef __LINUX_BRIDGE_EFF_H
-- 
cgit v1.2.3


From 7667928601d2981b20011e357904bcb96c365427 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Fri, 8 May 2015 00:02:27 +0900
Subject: rapidio: Fix kerneldoc and comment

This patch fix spelling typos found in DocBook/rapidio.xml
Ths file was generated from comments in the source files,
I had to fix them, instead of the xml file.

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/rio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rio.h b/include/linux/rio.h
index 6bda06f21930..cde976e86b48 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -298,7 +298,7 @@ struct rio_id_table {
  * struct rio_net - RIO network info
  * @node: Node in global list of RIO networks
  * @devices: List of devices in this network
- * @switches: List of switches in this netowrk
+ * @switches: List of switches in this network
  * @mports: List of master ports accessing this network
  * @hport: Default port for accessing this network
  * @id: RIO network ID
-- 
cgit v1.2.3


From 94268fcd9aea736d24cbdade16e7f7c9419c489e Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ao2@ao2.it>
Date: Tue, 28 Apr 2015 13:11:28 +0200
Subject: lib: crc-itu-t.[ch] fix 0x0x prefix in integer constants

Signed-off-by: Antonio Ospite <ao2@ao2.it>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/crc-itu-t.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/crc-itu-t.h b/include/linux/crc-itu-t.h
index 84920f3cc83e..a9953c762eee 100644
--- a/include/linux/crc-itu-t.h
+++ b/include/linux/crc-itu-t.h
@@ -3,7 +3,7 @@
  *
  * Implements the standard CRC ITU-T V.41:
  *   Width 16
- *   Poly  0x0x1021 (x^16 + x^12 + x^15 + 1)
+ *   Poly  0x1021 (x^16 + x^12 + x^15 + 1)
  *   Init  0
  *
  * This source code is licensed under the GNU General Public License,
-- 
cgit v1.2.3


From e0213bc5467ca5fe44ab04527f0e47998f30c046 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Mon, 18 May 2015 20:04:14 +0900
Subject: usb: renesas_usbhs: Change USBHS_TYPE_R8A779x to USBHS_TYPE_RCAR_GEN2

Since the HSUSB controllers of R-Car Gen2 are the same specification
(they have 16 pipes and usb-dmac), this patch changes USBHS_TYPE_R8A7790
and USBHS_TYPE_R8A7791 to USBHS_TYPE_RCAR_GEN2.

Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/renesas_usbhs.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h
index f06529c14141..3dd5a781da99 100644
--- a/include/linux/usb/renesas_usbhs.h
+++ b/include/linux/usb/renesas_usbhs.h
@@ -169,8 +169,7 @@ struct renesas_usbhs_driver_param {
 #define USBHS_USB_DMAC_XFER_SIZE	32	/* hardcode the xfer size */
 };
 
-#define USBHS_TYPE_R8A7790 1
-#define USBHS_TYPE_R8A7791 2
+#define USBHS_TYPE_RCAR_GEN2	1
 
 /*
  * option:
-- 
cgit v1.2.3


From a09e23f53e2c14a65a3b14a00060fea163081e1f Mon Sep 17 00:00:00 2001
From: Mian Yousaf Kaukab <yousaf.kaukab@intel.com>
Date: Sat, 16 May 2015 22:33:35 +0200
Subject: usb: gadget: net2280: check interrupts for all endpoints

USB3380 in enhanced mode has 4 IN and 4 OUT endpoints. Check
interrupts for all of them.

Tested-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Mian Yousaf Kaukab <yousaf.kaukab@intel.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/net2280.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/net2280.h b/include/linux/usb/net2280.h
index 148b8fa5b1a2..725120224472 100644
--- a/include/linux/usb/net2280.h
+++ b/include/linux/usb/net2280.h
@@ -168,6 +168,9 @@ struct net2280_regs {
 #define     ENDPOINT_B_INTERRUPT                                2
 #define     ENDPOINT_A_INTERRUPT                                1
 #define     ENDPOINT_0_INTERRUPT                                0
+#define     USB3380_IRQSTAT0_EP_INTR_MASK_IN (0xF << 17)
+#define     USB3380_IRQSTAT0_EP_INTR_MASK_OUT (0xF << 1)
+
 	u32		irqstat1;
 #define     POWER_STATE_CHANGE_INTERRUPT                        27
 #define     PCI_ARBITER_TIMEOUT_INTERRUPT                       26
-- 
cgit v1.2.3


From c65c4f052bc3b67989bf54914798513685c54988 Mon Sep 17 00:00:00 2001
From: Mian Yousaf Kaukab <yousaf.kaukab@intel.com>
Date: Sat, 16 May 2015 22:33:36 +0200
Subject: usb: gadget: net2280: fix use of GPEP in both directions

USB3380 enhanced mode allows GPEP to be used in both IN and OUT
directions. However, IN and OUT endpoints must use same USB endpoint
address (bEndpointAddress). Fix this by setting the ep_cfg.ep_number
during initialization and keep it in net2280_enable()

Tested-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Mian Yousaf Kaukab <yousaf.kaukab@intel.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/usb338x.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/usb338x.h b/include/linux/usb/usb338x.h
index f92eb635b9d3..11525d8d89a7 100644
--- a/include/linux/usb/usb338x.h
+++ b/include/linux/usb/usb338x.h
@@ -43,6 +43,10 @@
 #define     IN_ENDPOINT_TYPE                    12
 #define     OUT_ENDPOINT_ENABLE                 10
 #define     OUT_ENDPOINT_TYPE                    8
+#define USB3380_EP_CFG_MASK_IN ((0x3 << IN_ENDPOINT_TYPE) | \
+				BIT(IN_ENDPOINT_ENABLE))
+#define USB3380_EP_CFG_MASK_OUT ((0x3 << OUT_ENDPOINT_TYPE) | \
+				BIT(OUT_ENDPOINT_ENABLE))
 
 struct usb338x_usb_ext_regs {
 	u32     usbclass;
-- 
cgit v1.2.3


From e842b84c8e7221c45c8dbd7de09185c6149e1cf9 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 23 Mar 2015 09:52:48 +1100
Subject: usb: phy: Add interface to get phy give of device_node.

Split the "get phy from device_node" functionality out of
"get phy by phandle" so it can be used directly.

This is useful when a battery-charger is intimately associated with a
particular phy but handled by a separate driver.  The charger
can find the device_node based on sibling relationships
without the need for a redundant declaration in the devicetree
description.

As a peripheral that gets a phy will often want to register a
notifier block, and de-register it later, that functionality
is included so the de-registration is automatic.

Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/phy.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h
index bc91b5d380fd..8ed1e29ef329 100644
--- a/include/linux/usb/phy.h
+++ b/include/linux/usb/phy.h
@@ -205,6 +205,8 @@ extern struct usb_phy *usb_get_phy_dev(struct device *dev, u8 index);
 extern struct usb_phy *devm_usb_get_phy_dev(struct device *dev, u8 index);
 extern struct usb_phy *devm_usb_get_phy_by_phandle(struct device *dev,
 	const char *phandle, u8 index);
+extern struct usb_phy *devm_usb_get_phy_by_node(struct device *dev,
+	struct device_node *node, struct notifier_block *nb);
 extern void usb_put_phy(struct usb_phy *);
 extern void devm_usb_put_phy(struct device *dev, struct usb_phy *x);
 extern int usb_bind_phy(const char *dev_name, u8 index,
-- 
cgit v1.2.3


From e4b88e19897f1039fd83f1630517becafc0dd163 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 22 May 2015 13:44:33 -0700
Subject: Input: stmpe-ts - enforce device tree only mode

The STMPE MFD is only used with device tree configured systems (and STMPE
MFD core depends on OF), so force the configuration to come from device
tree only.

Tested-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Marek Vasut <marex@denx.de>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/mfd/stmpe.h | 44 --------------------------------------------
 1 file changed, 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index c9d869027300..cb83883918a7 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h
@@ -117,47 +117,6 @@ extern int stmpe_disable(struct stmpe *stmpe, unsigned int blocks);
 
 #define STMPE_GPIO_NOREQ_811_TOUCH	(0xf0)
 
-/**
- * struct stmpe_ts_platform_data - stmpe811 touch screen controller platform
- * data
- * @sample_time: ADC converstion time in number of clock.
- * (0 -> 36 clocks, 1 -> 44 clocks, 2 -> 56 clocks, 3 -> 64 clocks,
- * 4 -> 80 clocks, 5 -> 96 clocks, 6 -> 144 clocks),
- * recommended is 4.
- * @mod_12b: ADC Bit mode (0 -> 10bit ADC, 1 -> 12bit ADC)
- * @ref_sel: ADC reference source
- * (0 -> internal reference, 1 -> external reference)
- * @adc_freq: ADC Clock speed
- * (0 -> 1.625 MHz, 1 -> 3.25 MHz, 2 || 3 -> 6.5 MHz)
- * @ave_ctrl: Sample average control
- * (0 -> 1 sample, 1 -> 2 samples, 2 -> 4 samples, 3 -> 8 samples)
- * @touch_det_delay: Touch detect interrupt delay
- * (0 -> 10 us, 1 -> 50 us, 2 -> 100 us, 3 -> 500 us,
- * 4-> 1 ms, 5 -> 5 ms, 6 -> 10 ms, 7 -> 50 ms)
- * recommended is 3
- * @settling: Panel driver settling time
- * (0 -> 10 us, 1 -> 100 us, 2 -> 500 us, 3 -> 1 ms,
- * 4 -> 5 ms, 5 -> 10 ms, 6 for 50 ms, 7 -> 100 ms)
- * recommended is 2
- * @fraction_z: Length of the fractional part in z
- * (fraction_z ([0..7]) = Count of the fractional part)
- * recommended is 7
- * @i_drive: current limit value of the touchscreen drivers
- * (0 -> 20 mA typical 35 mA max, 1 -> 50 mA typical 80 mA max)
- *
- * */
-struct stmpe_ts_platform_data {
-       u8 sample_time;
-       u8 mod_12b;
-       u8 ref_sel;
-       u8 adc_freq;
-       u8 ave_ctrl;
-       u8 touch_det_delay;
-       u8 settling;
-       u8 fraction_z;
-       u8 i_drive;
-};
-
 /**
  * struct stmpe_platform_data - STMPE platform data
  * @id: device id to distinguish between multiple STMPEs on the same board
@@ -168,7 +127,6 @@ struct stmpe_ts_platform_data {
  * @irq_over_gpio: true if gpio is used to get irq
  * @irq_gpio: gpio number over which irq will be requested (significant only if
  *	      irq_over_gpio is true)
- * @ts: touchscreen-specific platform data
  */
 struct stmpe_platform_data {
 	int id;
@@ -178,8 +136,6 @@ struct stmpe_platform_data {
 	bool irq_over_gpio;
 	int irq_gpio;
 	int autosleep_timeout;
-
-	struct stmpe_ts_platform_data *ts;
 };
 
 #endif
-- 
cgit v1.2.3


From 7d7efec368d537226142cbe559f45797f18672f9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 May 2015 16:35:16 -0400
Subject: sched, cgroup: reorganize threadgroup locking

threadgroup_change_begin/end() are used to mark the beginning and end
of threadgroup modifying operations to allow code paths which require
a threadgroup to stay stable across blocking operations to synchronize
against those sections using threadgroup_lock/unlock().

It's currently implemented as a general mechanism in sched.h using
per-signal_struct rwsem; however, this never grew non-cgroup use cases
and becomes noop if !CONFIG_CGROUPS.  It turns out that cgroups is
gonna be better served with a different sycnrhonization scheme and is
a bit silly to keep cgroups specific details as a general mechanism.

What's general here is identifying the places where threadgroups are
modified.  This patch restructures threadgroup locking so that
threadgroup_change_begin/end() become a place where subsystems which
need to sycnhronize against threadgroup changes can hook into.

cgroup_threadgroup_change_begin/end() which operate on the
per-signal_struct rwsem are created and threadgroup_lock/unlock() are
moved to cgroup.c and made static.

This is pure reorganization which doesn't cause any functional
changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/cgroup-defs.h | 10 +++++++++
 include/linux/sched.h       | 53 +++++++++++++++------------------------------
 2 files changed, 27 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 55f3120fb952..1b8c93806dbd 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -14,6 +14,7 @@
 #include <linux/mutex.h>
 #include <linux/rcupdate.h>
 #include <linux/percpu-refcount.h>
+#include <linux/percpu-rwsem.h>
 #include <linux/workqueue.h>
 
 #ifdef CONFIG_CGROUPS
@@ -460,5 +461,14 @@ struct cgroup_subsys {
 	unsigned int depends_on;
 };
 
+void cgroup_threadgroup_change_begin(struct task_struct *tsk);
+void cgroup_threadgroup_change_end(struct task_struct *tsk);
+
+#else	/* CONFIG_CGROUPS */
+
+static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) {}
+static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {}
+
 #endif	/* CONFIG_CGROUPS */
+
 #endif	/* _LINUX_CGROUP_DEFS_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8222ae40ecb0..5ee290003470 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -58,6 +58,7 @@ struct sched_param {
 #include <linux/uidgid.h>
 #include <linux/gfp.h>
 #include <linux/magic.h>
+#include <linux/cgroup-defs.h>
 
 #include <asm/processor.h>
 
@@ -2648,53 +2649,33 @@ static inline void unlock_task_sighand(struct task_struct *tsk,
 	spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
 }
 
-#ifdef CONFIG_CGROUPS
-static inline void threadgroup_change_begin(struct task_struct *tsk)
-{
-	down_read(&tsk->signal->group_rwsem);
-}
-static inline void threadgroup_change_end(struct task_struct *tsk)
-{
-	up_read(&tsk->signal->group_rwsem);
-}
-
 /**
- * threadgroup_lock - lock threadgroup
- * @tsk: member task of the threadgroup to lock
- *
- * Lock the threadgroup @tsk belongs to.  No new task is allowed to enter
- * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or
- * change ->group_leader/pid.  This is useful for cases where the threadgroup
- * needs to stay stable across blockable operations.
+ * threadgroup_change_begin - mark the beginning of changes to a threadgroup
+ * @tsk: task causing the changes
  *
- * fork and exit paths explicitly call threadgroup_change_{begin|end}() for
- * synchronization.  While held, no new task will be added to threadgroup
- * and no existing live task will have its PF_EXITING set.
- *
- * de_thread() does threadgroup_change_{begin|end}() when a non-leader
- * sub-thread becomes a new leader.
+ * All operations which modify a threadgroup - a new thread joining the
+ * group, death of a member thread (the assertion of PF_EXITING) and
+ * exec(2) dethreading the process and replacing the leader - are wrapped
+ * by threadgroup_change_{begin|end}().  This is to provide a place which
+ * subsystems needing threadgroup stability can hook into for
+ * synchronization.
  */
-static inline void threadgroup_lock(struct task_struct *tsk)
+static inline void threadgroup_change_begin(struct task_struct *tsk)
 {
-	down_write(&tsk->signal->group_rwsem);
+	might_sleep();
+	cgroup_threadgroup_change_begin(tsk);
 }
 
 /**
- * threadgroup_unlock - unlock threadgroup
- * @tsk: member task of the threadgroup to unlock
+ * threadgroup_change_end - mark the end of changes to a threadgroup
+ * @tsk: task causing the changes
  *
- * Reverse threadgroup_lock().
+ * See threadgroup_change_begin().
  */
-static inline void threadgroup_unlock(struct task_struct *tsk)
+static inline void threadgroup_change_end(struct task_struct *tsk)
 {
-	up_write(&tsk->signal->group_rwsem);
+	cgroup_threadgroup_change_end(tsk);
 }
-#else
-static inline void threadgroup_change_begin(struct task_struct *tsk) {}
-static inline void threadgroup_change_end(struct task_struct *tsk) {}
-static inline void threadgroup_lock(struct task_struct *tsk) {}
-static inline void threadgroup_unlock(struct task_struct *tsk) {}
-#endif
 
 #ifndef __HAVE_THREAD_FUNCTIONS
 
-- 
cgit v1.2.3


From d59cfc09c32a2ae31f1c3bc2983a0cd79afb3f14 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 May 2015 16:35:17 -0400
Subject: sched, cgroup: replace signal_struct->group_rwsem with a global
 percpu_rwsem

The cgroup side of threadgroup locking uses signal_struct->group_rwsem
to synchronize against threadgroup changes.  This per-process rwsem
adds small overhead to thread creation, exit and exec paths, forces
cgroup code paths to do lock-verify-unlock-retry dance in a couple
places and makes it impossible to atomically perform operations across
multiple processes.

This patch replaces signal_struct->group_rwsem with a global
percpu_rwsem cgroup_threadgroup_rwsem which is cheaper on the reader
side and contained in cgroups proper.  This patch converts one-to-one.

This does make writer side heavier and lower the granularity; however,
cgroup process migration is a fairly cold path, we do want to optimize
thread operations over it and cgroup migration operations don't take
enough time for the lower granularity to matter.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/cgroup-defs.h | 27 +++++++++++++++++++++++++--
 include/linux/init_task.h   |  8 --------
 include/linux/sched.h       | 12 ------------
 3 files changed, 25 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 1b8c93806dbd..7d83d7f73420 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -461,8 +461,31 @@ struct cgroup_subsys {
 	unsigned int depends_on;
 };
 
-void cgroup_threadgroup_change_begin(struct task_struct *tsk);
-void cgroup_threadgroup_change_end(struct task_struct *tsk);
+extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
+
+/**
+ * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups
+ * @tsk: target task
+ *
+ * Called from threadgroup_change_begin() and allows cgroup operations to
+ * synchronize against threadgroup changes using a percpu_rw_semaphore.
+ */
+static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
+{
+	percpu_down_read(&cgroup_threadgroup_rwsem);
+}
+
+/**
+ * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups
+ * @tsk: target task
+ *
+ * Called from threadgroup_change_end().  Counterpart of
+ * cgroup_threadcgroup_change_begin().
+ */
+static inline void cgroup_threadgroup_change_end(struct task_struct *tsk)
+{
+	percpu_up_read(&cgroup_threadgroup_rwsem);
+}
 
 #else	/* CONFIG_CGROUPS */
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 696d22312b31..0cc0bbf20022 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -25,13 +25,6 @@
 extern struct files_struct init_files;
 extern struct fs_struct init_fs;
 
-#ifdef CONFIG_CGROUPS
-#define INIT_GROUP_RWSEM(sig)						\
-	.group_rwsem = __RWSEM_INITIALIZER(sig.group_rwsem),
-#else
-#define INIT_GROUP_RWSEM(sig)
-#endif
-
 #ifdef CONFIG_CPUSETS
 #define INIT_CPUSET_SEQ(tsk)							\
 	.mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq),
@@ -56,7 +49,6 @@ extern struct fs_struct init_fs;
 	},								\
 	.cred_guard_mutex =						\
 		 __MUTEX_INITIALIZER(sig.cred_guard_mutex),		\
-	INIT_GROUP_RWSEM(sig)						\
 }
 
 extern struct nsproxy init_nsproxy;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5ee290003470..add524a910bd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -743,18 +743,6 @@ struct signal_struct {
 	unsigned audit_tty_log_passwd;
 	struct tty_audit_buf *tty_audit_buf;
 #endif
-#ifdef CONFIG_CGROUPS
-	/*
-	 * group_rwsem prevents new tasks from entering the threadgroup and
-	 * member tasks from exiting,a more specifically, setting of
-	 * PF_EXITING.  fork and exit paths are protected with this rwsem
-	 * using threadgroup_change_begin/end().  Users which require
-	 * threadgroup to remain stable should use threadgroup_[un]lock()
-	 * which also takes care of exec path.  Currently, cgroup is the
-	 * only user.
-	 */
-	struct rw_semaphore group_rwsem;
-#endif
 
 	oom_flags_t oom_flags;
 	short oom_score_adj;		/* OOM kill score adjustment */
-- 
cgit v1.2.3


From e463d88c36d42211aa72ed76d32fb8bf37820ef1 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 26 May 2015 12:19:58 -0700
Subject: net: phy: Add phy_interface_is_rgmii helper

RGMII interfaces come in 4 different flavors that the PHY library needs
to care about: regular RGMII (no delays), RGMII with either RX or TX
delay, and both. In order to avoid errors of checking only for one type
of RGMII interface and miss the 3 others, introduce a convenience
function which tests for all values.

Suggested-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 701c7a3946e0..a26c3f84b8dd 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -677,6 +677,17 @@ static inline bool phy_is_internal(struct phy_device *phydev)
 	return phydev->is_internal;
 }
 
+/**
+ * phy_interface_is_rgmii - Convenience function for testing if a PHY interface
+ * is RGMII (all variants)
+ * @phydev: the phy_device struct
+ */
+static inline bool phy_interface_is_rgmii(struct phy_device *phydev)
+{
+	return phydev->interface >= PHY_INTERFACE_MODE_RGMII &&
+		phydev->interface <= PHY_INTERFACE_MODE_RGMII_TXID;
+}
+
 /**
  * phy_write_mmd - Convenience function for writing a register
  * on an MMD on a given PHY.
-- 
cgit v1.2.3


From b3df4ec4424f27e55d754cfe586195fecca1c4e4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 19 May 2015 00:00:51 +0000
Subject: perf/x86/intel/cqm: Use proper data types

'int' is really not a proper data type for an MSR. Use u32 to make it
clear that we are dealing with a 32-bit unsigned hardware value.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Matt Fleming <matt.fleming@intel.com>
Cc: Kanaka Juvva <kanaka.d.juvva@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vikas Shivappa <vikas.shivappa@linux.intel.com>
Cc: Will Auld <will.auld@intel.com>
Link: http://lkml.kernel.org/r/20150518235149.919350144@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 248f7829ce41..06580028cee6 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -120,7 +120,7 @@ struct hw_perf_event {
 		};
 		struct { /* intel_cqm */
 			int			cqm_state;
-			int			cqm_rmid;
+			u32			cqm_rmid;
 			struct list_head	cqm_events_entry;
 			struct list_head	cqm_groups_entry;
 			struct list_head	cqm_group_entry;
-- 
cgit v1.2.3


From 16b369a91d0dd80be214b7f7801fbc51875454cc Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Mon, 25 May 2015 15:08:47 +0200
Subject: random: Blocking API for accessing nonblocking_pool

The added API calls provide a synchronous function call
get_blocking_random_bytes where the caller is blocked until
the nonblocking_pool is initialized.

CC: Andreas Steffen <andreas.steffen@strongswan.org>
CC: Theodore Ts'o <tytso@mit.edu>
CC: Sandy Harris <sandyinchina@gmail.com>
Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/random.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/random.h b/include/linux/random.h
index b05856e16b75..796267d56901 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -14,6 +14,7 @@ extern void add_input_randomness(unsigned int type, unsigned int code,
 extern void add_interrupt_randomness(int irq, int irq_flags);
 
 extern void get_random_bytes(void *buf, int nbytes);
+extern void get_blocking_random_bytes(void *buf, int nbytes);
 extern void get_random_bytes_arch(void *buf, int nbytes);
 void generate_random_uuid(unsigned char uuid_out[16]);
 extern int random_int_secret_init(void);
-- 
cgit v1.2.3


From 7d010fdf299929f9583ce5e17da629dcd83c36ef Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@suse.com>
Date: Tue, 26 May 2015 10:28:13 +0200
Subject: x86/mm/mtrr: Avoid #ifdeffery with phys_wc_to_mtrr_index()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is only one user but since we're going to bury MTRR next
out of access to drivers, expose this last piece of API to
drivers in a general fashion only needing io.h for access to
helpers.

Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Abhilash Kesavan <a.kesavan@samsung.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Antonino Daplas <adaplas@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Cristian Stoica <cristian.stoica@freescale.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jean-Christophe Plagniol-Villard <plagnioj@jcrosoft.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suresh Siddha <sbsiddha@gmail.com>
Cc: Thierry Reding <treding@nvidia.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Ville Syrjälä <syrjala@sci.fi>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will.deacon@arm.com>
Cc: dri-devel@lists.freedesktop.org
Link: http://lkml.kernel.org/r/1429722736-4473-1-git-send-email-mcgrof@do-not-panic.com
Link: http://lkml.kernel.org/r/1432628901-18044-11-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/io.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/io.h b/include/linux/io.h
index 986f2bffea1e..04cce4da3685 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -111,6 +111,13 @@ static inline void arch_phys_wc_del(int handle)
 }
 
 #define arch_phys_wc_add arch_phys_wc_add
+#ifndef arch_phys_wc_index
+static inline int arch_phys_wc_index(int handle)
+{
+	return -1;
+}
+#define arch_phys_wc_index arch_phys_wc_index
+#endif
 #endif
 
 #endif /* _LINUX_IO_H */
-- 
cgit v1.2.3


From 06931e62246844c73fba24d7aeb4a5dc897a2739 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Tue, 26 May 2015 15:11:28 +0200
Subject: sched/topology: Rename topology_thread_cpumask() to
 topology_sibling_cpumask()

Rename topology_thread_cpumask() to topology_sibling_cpumask()
for more consistency with scheduler code.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Benoit Cousson <bcousson@baylibre.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Jean Delvare <jdelvare@suse.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Drokin <oleg.drokin@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Link: http://lkml.kernel.org/r/1432645896-12588-2-git-send-email-bgolaszewski@baylibre.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/topology.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index 909b6e43b694..73ddad1e0fa3 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -191,8 +191,8 @@ static inline int cpu_to_mem(int cpu)
 #ifndef topology_core_id
 #define topology_core_id(cpu)			((void)(cpu), 0)
 #endif
-#ifndef topology_thread_cpumask
-#define topology_thread_cpumask(cpu)		cpumask_of(cpu)
+#ifndef topology_sibling_cpumask
+#define topology_sibling_cpumask(cpu)		cpumask_of(cpu)
 #endif
 #ifndef topology_core_cpumask
 #define topology_core_cpumask(cpu)		cpumask_of(cpu)
@@ -201,7 +201,7 @@ static inline int cpu_to_mem(int cpu)
 #ifdef CONFIG_SCHED_SMT
 static inline const struct cpumask *cpu_smt_mask(int cpu)
 {
-	return topology_thread_cpumask(cpu);
+	return topology_sibling_cpumask(cpu);
 }
 #endif
 
-- 
cgit v1.2.3


From 66eb579e66ecfea55e2007be0594869ea9e453d4 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 13 May 2015 17:12:23 +0100
Subject: perf: allow for PMU-specific event filtering

In certain circumstances it may not be possible to schedule particular
events due to constraints other than a lack of hardware counters (e.g.
on big.LITTLE systems where CPUs support different events). The core
perf event code does not distinguish these cases and pessimistically
assumes that any failure to schedule an event means that it is not worth
attempting to schedule later events, even if some hardware counters are
still unused.

When an event a pmu cannot schedule exists in a flexible group list it
can unnecessarily prevent event groups following it in the list from
being scheduled (until it is rotated to the end of the list). This means
some events are scheduled for only a portion of the time they could be,
and for short running programs no events may be scheduled if the list is
initially sorted in an unfortunate order.

This patch adds a new (optional) filter_match function pointer to struct
pmu which a pmu driver can use to tell perf core when an event matches
pmu-specific scheduling requirements. This plugs into the existing
event_filter_match logic, and makes it possible to avoid the scheduling
problem described above. When no filter is provided by the PMU, the
existing behaviour is retained.

Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/perf_event.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 61992cf2e977..67c719cc91aa 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -304,6 +304,11 @@ struct pmu {
 	 * Free pmu-private AUX data structures
 	 */
 	void (*free_aux)		(void *aux); /* optional */
+
+	/*
+	 * Filter events for PMU-specific reasons.
+	 */
+	int (*filter_match)		(struct perf_event *event); /* optional */
 };
 
 /**
-- 
cgit v1.2.3


From 24fe86a617c550fb9bdc6c8bd7cf647d3955f8ba Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 29 Mar 2015 12:50:46 +0200
Subject: phy: sun4i-usb: Add a sunxi specific function for setting
 squelch-detect

The sunxi otg phy has a bug where it wrongly detects a high speed squelch
when reset on the root port gets de-asserted with a lo-speed device.

The workaround for this is to disable squelch detect before de-asserting
reset, and re-enabling it after the reset de-assert is done. Add a sunxi
specific phy function to allow the sunxi-musb glue to do this.

Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/phy/phy-sun4i-usb.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 include/linux/phy/phy-sun4i-usb.h

(limited to 'include/linux')

diff --git a/include/linux/phy/phy-sun4i-usb.h b/include/linux/phy/phy-sun4i-usb.h
new file mode 100644
index 000000000000..50aed92ea89c
--- /dev/null
+++ b/include/linux/phy/phy-sun4i-usb.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2015 Hans de Goede <hdegoede@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef PHY_SUN4I_USB_H_
+#define PHY_SUN4I_USB_H_
+
+#include "phy.h"
+
+/**
+ * sun4i_usb_phy_set_squelch_detect() - Enable/disable squelch detect
+ * @phy: reference to a sun4i usb phy
+ * @enabled: wether to enable or disable squelch detect
+ */
+void sun4i_usb_phy_set_squelch_detect(struct phy *phy, bool enabled);
+
+#endif
-- 
cgit v1.2.3


From e5c4708b2b6fec0300db60ee3cf4b4ae96430a12 Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@cavium.com>
Date: Tue, 26 May 2015 19:20:14 -0700
Subject: pci: Add Cavium PCI vendor id

This vendor id will be used by network (vNIC), USB (xHCI),
SATA (AHCI), GPIO, I2C, MMC and maybe other drivers
for ThunderX SoC.

Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: Aleksey Makarov <aleksey.makarov@caviumnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pci_ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 2f7b9a40f627..2972c7f3aa1d 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2329,6 +2329,8 @@
 #define PCI_DEVICE_ID_ALTIMA_AC9100	0x03ea
 #define PCI_DEVICE_ID_ALTIMA_AC1003	0x03eb
 
+#define PCI_VENDOR_ID_CAVIUM		0x177d
+
 #define PCI_VENDOR_ID_BELKIN		0x1799
 #define PCI_DEVICE_ID_BELKIN_F5D7010V7	0x701f
 
-- 
cgit v1.2.3


From 4612c715a6ea6b3af2aee0163c0721375b2548d7 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Mon, 18 May 2015 12:58:40 +0200
Subject: mtd: cfi: deinline large functions

With this .config: http://busybox.net/~vda/kernel_config,
after uninlining these functions have sizes and callsite counts
as follows:

cfi_udelay(): 74 bytes, 26 callsites
cfi_send_gen_cmd(): 153 bytes, 95 callsites
cfi_build_cmd(): 274 bytes, 123 callsites
cfi_build_cmd_addr(): 49 bytes, 15 callsites
cfi_merge_status(): 230 bytes, 3 callsites

Reduction in code size is about 50,000:

    text     data      bss       dec     hex filename
85842882 22294584 20627456 128764922 7accbfa vmlinux.before
85789648 22294616 20627456 128711720 7abfc28 vmlinux

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Dan Carpenter <dan.carpenter@oracle.com>
CC: Jingoo Han <jg1.han@samsung.com>
CC: Brian Norris <computersforpeace@gmail.com>
CC: Aaron Sierra <asierra@xes-inc.com>
CC: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
CC: David Woodhouse <David.Woodhouse@intel.com>
CC: linux-mtd@lists.infradead.org
CC: linux-kernel@vger.kernel.org
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/cfi.h | 188 +++---------------------------------------------
 1 file changed, 8 insertions(+), 180 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index 299d7d31fe53..9b57a9b1b081 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -296,183 +296,19 @@ struct cfi_private {
 	struct flchip chips[0];  /* per-chip data structure for each chip */
 };
 
-/*
- * Returns the command address according to the given geometry.
- */
-static inline uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs,
-				struct map_info *map, struct cfi_private *cfi)
-{
-	unsigned bankwidth = map_bankwidth(map);
-	unsigned interleave = cfi_interleave(cfi);
-	unsigned type = cfi->device_type;
-	uint32_t addr;
-	
-	addr = (cmd_ofs * type) * interleave;
-
-	/* Modify the unlock address if we are in compatibility mode.
-	 * For 16bit devices on 8 bit busses
-	 * and 32bit devices on 16 bit busses
-	 * set the low bit of the alternating bit sequence of the address.
-	 */
-	if (((type * interleave) > bankwidth) && ((cmd_ofs & 0xff) == 0xaa))
-		addr |= (type >> 1)*interleave;
-
-	return  addr;
-}
-
-/*
- * Transforms the CFI command for the given geometry (bus width & interleave).
- * It looks too long to be inline, but in the common case it should almost all
- * get optimised away.
- */
-static inline map_word cfi_build_cmd(u_long cmd, struct map_info *map, struct cfi_private *cfi)
-{
-	map_word val = { {0} };
-	int wordwidth, words_per_bus, chip_mode, chips_per_word;
-	unsigned long onecmd;
-	int i;
-
-	/* We do it this way to give the compiler a fighting chance
-	   of optimising away all the crap for 'bankwidth' larger than
-	   an unsigned long, in the common case where that support is
-	   disabled */
-	if (map_bankwidth_is_large(map)) {
-		wordwidth = sizeof(unsigned long);
-		words_per_bus = (map_bankwidth(map)) / wordwidth; // i.e. normally 1
-	} else {
-		wordwidth = map_bankwidth(map);
-		words_per_bus = 1;
-	}
-
-	chip_mode = map_bankwidth(map) / cfi_interleave(cfi);
-	chips_per_word = wordwidth * cfi_interleave(cfi) / map_bankwidth(map);
-
-	/* First, determine what the bit-pattern should be for a single
-	   device, according to chip mode and endianness... */
-	switch (chip_mode) {
-	default: BUG();
-	case 1:
-		onecmd = cmd;
-		break;
-	case 2:
-		onecmd = cpu_to_cfi16(map, cmd);
-		break;
-	case 4:
-		onecmd = cpu_to_cfi32(map, cmd);
-		break;
-	}
-
-	/* Now replicate it across the size of an unsigned long, or
-	   just to the bus width as appropriate */
-	switch (chips_per_word) {
-	default: BUG();
-#if BITS_PER_LONG >= 64
-	case 8:
-		onecmd |= (onecmd << (chip_mode * 32));
-#endif
-	case 4:
-		onecmd |= (onecmd << (chip_mode * 16));
-	case 2:
-		onecmd |= (onecmd << (chip_mode * 8));
-	case 1:
-		;
-	}
+uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs,
+				struct map_info *map, struct cfi_private *cfi);
 
-	/* And finally, for the multi-word case, replicate it
-	   in all words in the structure */
-	for (i=0; i < words_per_bus; i++) {
-		val.x[i] = onecmd;
-	}
-
-	return val;
-}
+map_word cfi_build_cmd(u_long cmd, struct map_info *map, struct cfi_private *cfi);
 #define CMD(x)  cfi_build_cmd((x), map, cfi)
 
-
-static inline unsigned long cfi_merge_status(map_word val, struct map_info *map,
-					   struct cfi_private *cfi)
-{
-	int wordwidth, words_per_bus, chip_mode, chips_per_word;
-	unsigned long onestat, res = 0;
-	int i;
-
-	/* We do it this way to give the compiler a fighting chance
-	   of optimising away all the crap for 'bankwidth' larger than
-	   an unsigned long, in the common case where that support is
-	   disabled */
-	if (map_bankwidth_is_large(map)) {
-		wordwidth = sizeof(unsigned long);
-		words_per_bus = (map_bankwidth(map)) / wordwidth; // i.e. normally 1
-	} else {
-		wordwidth = map_bankwidth(map);
-		words_per_bus = 1;
-	}
-
-	chip_mode = map_bankwidth(map) / cfi_interleave(cfi);
-	chips_per_word = wordwidth * cfi_interleave(cfi) / map_bankwidth(map);
-
-	onestat = val.x[0];
-	/* Or all status words together */
-	for (i=1; i < words_per_bus; i++) {
-		onestat |= val.x[i];
-	}
-
-	res = onestat;
-	switch(chips_per_word) {
-	default: BUG();
-#if BITS_PER_LONG >= 64
-	case 8:
-		res |= (onestat >> (chip_mode * 32));
-#endif
-	case 4:
-		res |= (onestat >> (chip_mode * 16));
-	case 2:
-		res |= (onestat >> (chip_mode * 8));
-	case 1:
-		;
-	}
-
-	/* Last, determine what the bit-pattern should be for a single
-	   device, according to chip mode and endianness... */
-	switch (chip_mode) {
-	case 1:
-		break;
-	case 2:
-		res = cfi16_to_cpu(map, res);
-		break;
-	case 4:
-		res = cfi32_to_cpu(map, res);
-		break;
-	default: BUG();
-	}
-	return res;
-}
-
+unsigned long cfi_merge_status(map_word val, struct map_info *map,
+					   struct cfi_private *cfi);
 #define MERGESTATUS(x) cfi_merge_status((x), map, cfi)
 
-
-/*
- * Sends a CFI command to a bank of flash for the given geometry.
- *
- * Returns the offset in flash where the command was written.
- * If prev_val is non-null, it will be set to the value at the command address,
- * before the command was written.
- */
-static inline uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t base,
+uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t base,
 				struct map_info *map, struct cfi_private *cfi,
-				int type, map_word *prev_val)
-{
-	map_word val;
-	uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, map, cfi);
-	val = cfi_build_cmd(cmd, map, cfi);
-
-	if (prev_val)
-		*prev_val = map_read(map, addr);
-
-	map_write(map, val, addr);
-
-	return addr - base;
-}
+				int type, map_word *prev_val);
 
 static inline uint8_t cfi_read_query(struct map_info *map, uint32_t addr)
 {
@@ -506,15 +342,7 @@ static inline uint16_t cfi_read_query16(struct map_info *map, uint32_t addr)
 	}
 }
 
-static inline void cfi_udelay(int us)
-{
-	if (us >= 1000) {
-		msleep((us+999)/1000);
-	} else {
-		udelay(us);
-		cond_resched();
-	}
-}
+void cfi_udelay(int us);
 
 int __xipram cfi_qry_present(struct map_info *map, __u32 base,
 			     struct cfi_private *cfi);
-- 
cgit v1.2.3


From 7d0ae8086b828311250c6afdf800b568ac9bd693 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 3 Mar 2015 14:57:58 -0800
Subject: rcu: Convert ACCESS_ONCE() to READ_ONCE() and WRITE_ONCE()

This commit moves from the old ACCESS_ONCE() API to the new READ_ONCE()
and WRITE_ONCE() APIs.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck:  Updated to include kernel/torture.c as suggested by Jason Low. ]
---
 include/linux/rculist.h  |  6 +++---
 include/linux/rcupdate.h | 16 ++++++++--------
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index a18b16f1dc0e..665397247e82 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -29,8 +29,8 @@
  */
 static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
 {
-	ACCESS_ONCE(list->next) = list;
-	ACCESS_ONCE(list->prev) = list;
+	WRITE_ONCE(list->next, list);
+	WRITE_ONCE(list->prev, list);
 }
 
 /*
@@ -288,7 +288,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
 #define list_first_or_null_rcu(ptr, type, member) \
 ({ \
 	struct list_head *__ptr = (ptr); \
-	struct list_head *__next = ACCESS_ONCE(__ptr->next); \
+	struct list_head *__next = READ_ONCE(__ptr->next); \
 	likely(__ptr != __next) ? list_entry_rcu(__next, type, member) : NULL; \
 })
 
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 573a5afd5ed8..87bb0eee665b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -364,8 +364,8 @@ extern struct srcu_struct tasks_rcu_exit_srcu;
 #define rcu_note_voluntary_context_switch(t) \
 	do { \
 		rcu_all_qs(); \
-		if (ACCESS_ONCE((t)->rcu_tasks_holdout)) \
-			ACCESS_ONCE((t)->rcu_tasks_holdout) = false; \
+		if (READ_ONCE((t)->rcu_tasks_holdout)) \
+			WRITE_ONCE((t)->rcu_tasks_holdout, false); \
 	} while (0)
 #else /* #ifdef CONFIG_TASKS_RCU */
 #define TASKS_RCU(x) do { } while (0)
@@ -609,7 +609,7 @@ static inline void rcu_preempt_sleep_check(void)
 
 #define __rcu_access_pointer(p, space) \
 ({ \
-	typeof(*p) *_________p1 = (typeof(*p) *__force)ACCESS_ONCE(p); \
+	typeof(*p) *_________p1 = (typeof(*p) *__force)READ_ONCE(p); \
 	rcu_dereference_sparse(p, space); \
 	((typeof(*p) __force __kernel *)(_________p1)); \
 })
@@ -630,7 +630,7 @@ static inline void rcu_preempt_sleep_check(void)
 
 #define __rcu_access_index(p, space) \
 ({ \
-	typeof(p) _________p1 = ACCESS_ONCE(p); \
+	typeof(p) _________p1 = READ_ONCE(p); \
 	rcu_dereference_sparse(p, space); \
 	(_________p1); \
 })
@@ -659,7 +659,7 @@ static inline void rcu_preempt_sleep_check(void)
  */
 #define lockless_dereference(p) \
 ({ \
-	typeof(p) _________p1 = ACCESS_ONCE(p); \
+	typeof(p) _________p1 = READ_ONCE(p); \
 	smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
 	(_________p1); \
 })
@@ -702,7 +702,7 @@ static inline void rcu_preempt_sleep_check(void)
  * @p: The pointer to read
  *
  * Return the value of the specified RCU-protected pointer, but omit the
- * smp_read_barrier_depends() and keep the ACCESS_ONCE().  This is useful
+ * smp_read_barrier_depends() and keep the READ_ONCE().  This is useful
  * when the value of this pointer is accessed, but the pointer is not
  * dereferenced, for example, when testing an RCU-protected pointer against
  * NULL.  Although rcu_access_pointer() may also be used in cases where
@@ -791,7 +791,7 @@ static inline void rcu_preempt_sleep_check(void)
  * @p: The index to read
  *
  * Return the value of the specified RCU-protected index, but omit the
- * smp_read_barrier_depends() and keep the ACCESS_ONCE().  This is useful
+ * smp_read_barrier_depends() and keep the READ_ONCE().  This is useful
  * when the value of this index is accessed, but the index is not
  * dereferenced, for example, when testing an RCU-protected index against
  * -1.  Although rcu_access_index() may also be used in cases where
@@ -827,7 +827,7 @@ static inline void rcu_preempt_sleep_check(void)
  * @c: The conditions under which the dereference will take place
  *
  * Return the value of the specified RCU-protected pointer, but omit
- * both the smp_read_barrier_depends() and the ACCESS_ONCE().  This
+ * both the smp_read_barrier_depends() and the READ_ONCE().  This
  * is useful in cases where update-side locks prevent the value of the
  * pointer from changing.  Please note that this primitive does -not-
  * prevent the compiler from repeating this reference or combining it
-- 
cgit v1.2.3


From 1ebee8017d84ec8a0ba893cf7b8be3f70ead088b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 19 Apr 2015 18:21:47 -0700
Subject: rcu: Eliminate array-index-based RCU primitives

Now that rcu_access_index() and rcu_dereference_index_check() are no
longer used, the commit removes them from the RCU API.  This means that
RCU's data dependencies now involve only pointers, give or take the
occasional cast to and then back from an integer type to do pointer
arithmetic.  This in turn eliminates the need for a number of operations
on values carrying RCU data dependencies.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: linux-edac@vger.kernel.org
Cc: Tony Luck <tony.luck@intel.com>
Acked-by: Borislav Petkov <bp@suse.de>
---
 include/linux/rcupdate.h | 50 ------------------------------------------------
 1 file changed, 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 87bb0eee665b..b97842ff71d2 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -628,21 +628,6 @@ static inline void rcu_preempt_sleep_check(void)
 	((typeof(*p) __force __kernel *)(p)); \
 })
 
-#define __rcu_access_index(p, space) \
-({ \
-	typeof(p) _________p1 = READ_ONCE(p); \
-	rcu_dereference_sparse(p, space); \
-	(_________p1); \
-})
-#define __rcu_dereference_index_check(p, c) \
-({ \
-	/* Dependency order vs. p above. */ \
-	typeof(p) _________p1 = lockless_dereference(p); \
-	rcu_lockdep_assert(c, \
-			   "suspicious rcu_dereference_index_check() usage"); \
-	(_________p1); \
-})
-
 /**
  * RCU_INITIALIZER() - statically initialize an RCU-protected global variable
  * @v: The value to statically initialize with.
@@ -786,41 +771,6 @@ static inline void rcu_preempt_sleep_check(void)
  */
 #define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu)
 
-/**
- * rcu_access_index() - fetch RCU index with no dereferencing
- * @p: The index to read
- *
- * Return the value of the specified RCU-protected index, but omit the
- * smp_read_barrier_depends() and keep the READ_ONCE().  This is useful
- * when the value of this index is accessed, but the index is not
- * dereferenced, for example, when testing an RCU-protected index against
- * -1.  Although rcu_access_index() may also be used in cases where
- * update-side locks prevent the value of the index from changing, you
- * should instead use rcu_dereference_index_protected() for this use case.
- */
-#define rcu_access_index(p) __rcu_access_index((p), __rcu)
-
-/**
- * rcu_dereference_index_check() - rcu_dereference for indices with debug checking
- * @p: The pointer to read, prior to dereferencing
- * @c: The conditions under which the dereference will take place
- *
- * Similar to rcu_dereference_check(), but omits the sparse checking.
- * This allows rcu_dereference_index_check() to be used on integers,
- * which can then be used as array indices.  Attempting to use
- * rcu_dereference_check() on an integer will give compiler warnings
- * because the sparse address-space mechanism relies on dereferencing
- * the RCU-protected pointer.  Dereferencing integers is not something
- * that even gcc will put up with.
- *
- * Note that this function does not implicitly check for RCU read-side
- * critical sections.  If this function gains lots of uses, it might
- * make sense to provide versions for each flavor of RCU, but it does
- * not make sense as of early 2010.
- */
-#define rcu_dereference_index_check(p, c) \
-	__rcu_dereference_index_check((p), (c))
-
 /**
  * rcu_dereference_protected() - fetch RCU pointer when updates prevented
  * @p: The pointer to read, prior to dereferencing
-- 
cgit v1.2.3


From d956028e99b30726b0bce0ca684b40b1ad67b514 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 31 Mar 2015 09:39:41 +0100
Subject: documentation: memory-barriers: Fix smp_mb__before_spinlock()
 semantics

Our current documentation claims that, when followed by an ACQUIRE,
smp_mb__before_spinlock() orders prior loads against subsequent loads
and stores, which isn't the intent.  This commit therefore fixes the
documentation to state that this sequence orders only prior stores
against subsequent loads and stores.

In addition, the original intent of smp_mb__before_spinlock() was to only
order prior loads against subsequent stores, however, people have started
using it as if it ordered prior loads against subsequent loads and stores.
This commit therefore also updates smp_mb__before_spinlock()'s header
comment to reflect this new reality.

Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/spinlock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 3e18379dfa6f..0063b24b4f36 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -120,7 +120,7 @@ do {								\
 /*
  * Despite its name it doesn't necessarily has to be a full barrier.
  * It should only guarantee that a STORE before the critical section
- * can not be reordered with a LOAD inside this section.
+ * can not be reordered with LOADs and STOREs inside this section.
  * spin_lock() is the one-way barrier, this LOAD can not escape out
  * of the region. So the default implementation simply ensures that
  * a STORE can not move into the critical section, smp_wmb() should
-- 
cgit v1.2.3


From 3382adbc1bb8c80ea512243acf6059564287620b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 4 Mar 2015 15:41:24 -0800
Subject: rcu: Eliminate a few CONFIG_RCU_NOCB_CPU_ALL #ifdefs

This commit converts several CONFIG_RCU_NOCB_CPU_ALL #ifdefs to
instead use IS_ENABLED().  This change should help avoid hiding
code from compiler diagnostics.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 4 ++--
 include/linux/rcutree.h  | 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 87bb0eee665b..5ec20bc4af76 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -1153,13 +1153,13 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 #define kfree_rcu(ptr, rcu_head)					\
 	__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
 
-#if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL)
+#ifdef CONFIG_TINY_RCU
 static inline int rcu_needs_cpu(unsigned long *delta_jiffies)
 {
 	*delta_jiffies = ULONG_MAX;
 	return 0;
 }
-#endif /* #if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL) */
+#endif /* #ifdef CONFIG_TINY_RCU */
 
 #if defined(CONFIG_RCU_NOCB_CPU_ALL)
 static inline bool rcu_is_nocb_cpu(int cpu) { return true; }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index d2e583a6aaca..0bd400b02430 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -31,9 +31,7 @@
 #define __LINUX_RCUTREE_H
 
 void rcu_note_context_switch(void);
-#ifndef CONFIG_RCU_NOCB_CPU_ALL
 int rcu_needs_cpu(unsigned long *delta_jiffies);
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 void rcu_cpu_stall_reset(void);
 
 /*
-- 
cgit v1.2.3


From 5af4692a75daf08dddc93dbb4cd2a1b3d3b617af Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 25 Apr 2015 12:48:29 -0700
Subject: smp: Make control dependencies work on Alpha, improve documentation

The current formulation of control dependencies fails on DEC Alpha,
which does not respect dependencies of any kind unless an explicit
memory barrier is provided.  This means that the current fomulation of
control dependencies fails on Alpha.  This commit therefore creates a
READ_ONCE_CTRL() that has the same overhead on non-Alpha systems, but
causes Alpha to produce the needed ordering.  This commit also applies
READ_ONCE_CTRL() to the one known use of control dependencies.

Use of READ_ONCE_CTRL() also has the beneficial effect of adding a bit
of self-documentation to control dependencies.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/compiler.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 867722591be2..5d66777914db 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -252,6 +252,22 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 #define WRITE_ONCE(x, val) \
 	({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; })
 
+/**
+ * READ_ONCE_CTRL - Read a value heading a control dependency
+ * @x: The value to be read, heading the control dependency
+ *
+ * Control dependencies are tricky.  See Documentation/memory-barriers.txt
+ * for important information on how to use them.  Note that in many cases,
+ * use of smp_load_acquire() will be much simpler.  Control dependencies
+ * should be avoided except on the hottest of hotpaths.
+ */
+#define READ_ONCE_CTRL(x) \
+({ \
+	typeof(x) __val = READ_ONCE(x); \
+	smp_read_barrier_depends(); /* Enforce control dependency. */ \
+	__val; \
+})
+
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
-- 
cgit v1.2.3


From f517700cce37ffcb36e7afae0294fd11c72ed134 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Thu, 26 Mar 2015 13:27:08 +0800
Subject: rculist: Fix another sparse warning

This fixes the following sparse warnings:

make C=1 CF=-D__CHECK_ENDIAN__ net/tipc/name_table.o
net/tipc/name_table.c:977:17: error: incompatible types in comparison expression (different address spaces)
net/tipc/name_table.c:977:17: error: incompatible types in comparison expression (different address spaces)

To silence these spare complaints, an RCU annotation should be added to
"next" pointer of hlist_node structure through hlist_next_rcu() macro
when iterating over a hlist with hlist_for_each_entry_from_rcu().

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rculist.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 665397247e82..17c6b1f84a77 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -549,8 +549,8 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n,
  */
 #define hlist_for_each_entry_from_rcu(pos, member)			\
 	for (; pos;							\
-	     pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\
-			typeof(*(pos)), member))
+	     pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(	\
+			&(pos)->member)), typeof(*(pos)), member))
 
 #endif	/* __KERNEL__ */
 #endif
-- 
cgit v1.2.3


From 51952bc633064311410b041fad38da1614f4539e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 21 Apr 2015 11:15:30 -0700
Subject: rcu: Further shrink Tiny RCU by making empty functions static inlines

The Tiny RCU counterparts to rcu_idle_enter(), rcu_idle_exit(),
rcu_irq_enter(), and rcu_irq_exit() are empty functions, but each has
EXPORT_SYMBOL_GPL(), which needlessly consumes extra memory, especially
in kernels built with module support.  This commit therefore moves these
functions to static inlines in rcutiny.h, removing the need for exports.

This won't affect the size of the tiniest kernels, which are likely
built without module support, but might help semi-tiny kernels that
might include module support.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h |  4 ----
 include/linux/rcutiny.h  | 16 ++++++++++++++++
 include/linux/rcutree.h  |  5 +++++
 3 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 87bb0eee665b..1b3d7bcb3a6c 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -292,10 +292,6 @@ void rcu_sched_qs(void);
 void rcu_bh_qs(void);
 void rcu_check_callbacks(int user);
 struct notifier_block;
-void rcu_idle_enter(void);
-void rcu_idle_exit(void);
-void rcu_irq_enter(void);
-void rcu_irq_exit(void);
 int rcu_cpu_notify(struct notifier_block *self,
 		   unsigned long action, void *hcpu);
 
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 937edaeb150d..3df6c1ec4e25 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -159,6 +159,22 @@ static inline void rcu_cpu_stall_reset(void)
 {
 }
 
+static inline void rcu_idle_enter(void)
+{
+}
+
+static inline void rcu_idle_exit(void)
+{
+}
+
+static inline void rcu_irq_enter(void)
+{
+}
+
+static inline void rcu_irq_exit(void)
+{
+}
+
 static inline void exit_rcu(void)
 {
 }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index d2e583a6aaca..f22d83f49e56 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -93,6 +93,11 @@ void rcu_force_quiescent_state(void);
 void rcu_bh_force_quiescent_state(void);
 void rcu_sched_force_quiescent_state(void);
 
+void rcu_idle_enter(void);
+void rcu_idle_exit(void);
+void rcu_irq_enter(void);
+void rcu_irq_exit(void);
+
 void exit_rcu(void);
 
 void rcu_scheduler_starting(void);
-- 
cgit v1.2.3


From ad5fb870c486d932a1749d7853dd70f436a7e03f Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 3 Apr 2015 12:05:28 -0400
Subject: e820, efi: add ACPI 6.0 persistent memory types

ACPI 6.0 formalizes e820-type-7 and efi-type-14 as persistent memory.
Mark it "reserved" and allow it to be claimed by a persistent memory
device driver.

This definition is in addition to the Linux kernel's existing type-12
definition that was recently added in support of shipping platforms with
NVDIMM support that predate ACPI 6.0 (which now classifies type-12 as
OEM reserved).

Note, /proc/iomem can be consulted for differentiating legacy
"Persistent Memory (legacy)" E820_PRAM vs standard "Persistent Memory"
E820_PMEM.

Cc: Boaz Harrosh <boaz@plexistor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jens Axboe <axboe@fb.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Acked-by: Andy Lutomirski <luto@amacapital.net>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/efi.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index af5be0368dec..825b6e3d69cb 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -85,7 +85,8 @@ typedef	struct {
 #define EFI_MEMORY_MAPPED_IO		11
 #define EFI_MEMORY_MAPPED_IO_PORT_SPACE	12
 #define EFI_PAL_CODE			13
-#define EFI_MAX_MEMORY_TYPE		14
+#define EFI_PERSISTENT_MEMORY		14
+#define EFI_MAX_MEMORY_TYPE		15
 
 /* Attribute values: */
 #define EFI_MEMORY_UC		((u64)0x0000000000000001ULL)	/* uncached */
-- 
cgit v1.2.3


From f36f3f2846b5578d62910ee0b6dbef59fdd1cfa4 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 18 May 2015 13:20:23 +0200
Subject: KVM: add "new" argument to kvm_arch_commit_memory_region

This lets the function access the new memory slot without going through
kvm_memslots and id_to_memslot.  It will simplify the code when more
than one address space will be supported.

Unfortunately, the "const"ness of the new argument must be casted
away in two places.  Fixing KVM to accept const struct kvm_memory_slot
pointers would require modifications in pretty much all architectures,
and is left for later.

Reviewed-by: Radim Krcmar <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8815f1dffb77..9bd3bc16be87 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -516,6 +516,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 void kvm_arch_commit_memory_region(struct kvm *kvm,
 				const struct kvm_userspace_memory_region *mem,
 				const struct kvm_memory_slot *old,
+				const struct kvm_memory_slot *new,
 				enum kvm_mr_change change);
 bool kvm_largepages_enabled(void);
 void kvm_disable_largepages(void);
-- 
cgit v1.2.3


From d9ef13c2b3983de8dd1373ef670799dbb6498122 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 19 May 2015 16:01:50 +0200
Subject: KVM: pass kvm_memory_slot to gfn_to_page_many_atomic

The memory slot is already available from gfn_to_memslot_dirty_bitmap.
Isn't it a shame to look it up again?  Plus, it makes gfn_to_page_many_atomic
agnostic of multiple VCPU address spaces.

Reviewed-by: Radim Krcmar <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9bd3bc16be87..a8bcbc9c6078 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -526,8 +526,8 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm);
 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 				   struct kvm_memory_slot *slot);
 
-int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
-			    int nr_pages);
+int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
+			    struct page **pages, int nr_pages);
 
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
-- 
cgit v1.2.3


From bfa1ce5f38938cc9e6c7f2d1011f88eba2b9e2b2 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Thu, 28 May 2015 11:40:54 +0200
Subject: bus: mvebu-mbus: add mv_mbus_dram_info_nooverlap()

This commit introduces a variant of the mv_mbus_dram_info() function
called mv_mbus_dram_info_nooverlap(). Both functions are used by
Marvell drivers supporting devices doing DMA, and provide them a
description the DRAM ranges that they need to configure their DRAM
windows.

The ranges provided by the mv_mbus_dram_info() function may overlap
with the I/O windows if there is a lot (>= 4 GB) of RAM
installed. This is not a problem for most of the DMA masters, except
for the upcoming new CESA crypto driver because it does DMA to the
SRAM, which is mapped through an I/O window. For this unit, we need to
have DRAM ranges that do not overlap with the I/O windows.

A first implementation done in commit 1737cac69369 ("bus: mvebu-mbus:
make sure SDRAM CS for DMA don't overlap the MBus bridge window"),
changed the information returned by mv_mbus_dram_info() to match this
requirement. However, it broke the requirement of the other DMA
masters than the DRAM ranges should have power of two sizes.

To solve this situation, this commit introduces a new
mv_mbus_dram_info_nooverlap() function, which returns the same
information as mv_mbus_dram_info(), but guaranteed to not overlap with
the I/O windows.

In the end, it gives us two variants of the mv_mbus_dram_info*()
functions:

 - The normal one, mv_mbus_dram_info(), which has been around for many
   years. This function returns the raw DRAM ranges, which are
   guaranteed to use power of two sizes, but will overlap with I/O
   windows. This function will therefore be used by all DMA masters
   (SATA, XOR, Ethernet, etc.) except the CESA crypto driver.

 - The new 'nooverlap' variant, mv_mbus_dram_info_nooverlap(). This
   function returns DRAM ranges after they have been "tweaked" to make
   sure they don't overlap with I/O windows. By doing this tweaking,
   we remove the power of two size guarantee. This variant will be
   used by the new CESA crypto driver.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
---
 include/linux/mbus.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mbus.h b/include/linux/mbus.h
index 611b69fa8594..1f7bc630d225 100644
--- a/include/linux/mbus.h
+++ b/include/linux/mbus.h
@@ -54,11 +54,16 @@ struct mbus_dram_target_info
  */
 #ifdef CONFIG_PLAT_ORION
 extern const struct mbus_dram_target_info *mv_mbus_dram_info(void);
+extern const struct mbus_dram_target_info *mv_mbus_dram_info_nooverlap(void);
 #else
 static inline const struct mbus_dram_target_info *mv_mbus_dram_info(void)
 {
 	return NULL;
 }
+static inline const struct mbus_dram_target_info *mv_mbus_dram_info_nooverlap(void)
+{
+	return NULL;
+}
 #endif
 
 int mvebu_mbus_save_cpu_target(u32 *store_addr);
-- 
cgit v1.2.3


From 85e6f09785bd06f0510bdb00c40772c7f8da3c43 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Tue, 19 May 2015 16:16:14 +0100
Subject: ARM: 8367/1: sa1100: prepare for moving irq driver to drivers/irqchip

Prepare for moving sa1100 irq driver to irqchip infrastructure - split
sa1100_init_irq into helper code and irq parts.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/irqchip/irq-sa11x0.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 include/linux/irqchip/irq-sa11x0.h

(limited to 'include/linux')

diff --git a/include/linux/irqchip/irq-sa11x0.h b/include/linux/irqchip/irq-sa11x0.h
new file mode 100644
index 000000000000..15db6829c1e4
--- /dev/null
+++ b/include/linux/irqchip/irq-sa11x0.h
@@ -0,0 +1,17 @@
+/*
+ * Generic IRQ handling for the SA11x0.
+ *
+ * Copyright (C) 2015 Dmitry Eremin-Solenikov
+ * Copyright (C) 1999-2001 Nicolas Pitre
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __INCLUDE_LINUX_IRQCHIP_IRQ_SA11x0_H
+#define __INCLUDE_LINUX_IRQCHIP_IRQ_SA11x0_H
+
+void __init sa11x0_init_irq_nodt(int irq_start, resource_size_t io_start);
+
+#endif
-- 
cgit v1.2.3


From 307c858bc245da5229b30186546590e1d472fc8f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 28 May 2015 16:08:02 +0200
Subject: usb: phy: add static inline wrapper for devm_usb_get_phy_by_node

The newly introduced devm_usb_get_phy_by_node function only has
an extern declaration, but no alternative for the case that
CONFIG_USB_PHY is disabled, which leads to a build error when
it is used anyway:

drivers/power/twl4030_charger.c: In function 'twl4030_bci_probe':
drivers/power/twl4030_charger.c:648:23: error: implicit declaration of function 'devm_usb_get_phy_by_node' [-Werror=implicit-function-declaration]
    bci->transceiver = devm_usb_get_phy_by_node(

This adds the wrapper in the same way that we have one for
all other usb-phy API functions.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: e842b84c8e7 ("usb: phy: Add interface to get phy give of device_node.")
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/phy.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h
index 8ed1e29ef329..e39f251cf861 100644
--- a/include/linux/usb/phy.h
+++ b/include/linux/usb/phy.h
@@ -240,6 +240,12 @@ static inline struct usb_phy *devm_usb_get_phy_by_phandle(struct device *dev,
 	return ERR_PTR(-ENXIO);
 }
 
+static inline struct usb_phy *devm_usb_get_phy_by_node(struct device *dev,
+	struct device_node *node, struct notifier_block *nb)
+{
+	return ERR_PTR(-ENXIO);
+}
+
 static inline void usb_put_phy(struct usb_phy *x)
 {
 }
-- 
cgit v1.2.3


From 9626b6993b2e6faf047d2d96958e8474edc9c7a5 Mon Sep 17 00:00:00 2001
From: jilai wang <jilaiw@codeaurora.org>
Date: Fri, 10 Apr 2015 16:15:59 -0400
Subject: firmware: qcom: scm: Add HDCP Support

HDCP driver needs to check if secure environment supports HDCP.  If it's
supported, then it requires to program some registers through SCM.
Add qcom_scm_hdcp_available and qcom_scm_hdcp_req to support these
requirements.

Signed-off-by: Jilai Wang <jilaiw@codeaurora.org>
Signed-off-by: Kumar Gala <galak@codeaurora.org>
---
 include/linux/qcom_scm.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h
index d7a974d5f57c..6e7d5ec65838 100644
--- a/include/linux/qcom_scm.h
+++ b/include/linux/qcom_scm.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2014, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2010-2015, The Linux Foundation. All rights reserved.
  * Copyright (C) 2015 Linaro Ltd.
  *
  * This program is free software; you can redistribute it and/or modify
@@ -16,6 +16,17 @@
 extern int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus);
 extern int qcom_scm_set_warm_boot_addr(void *entry, const cpumask_t *cpus);
 
+#define QCOM_SCM_HDCP_MAX_REQ_CNT	5
+
+struct qcom_scm_hdcp_req {
+	u32 addr;
+	u32 val;
+};
+
+extern bool qcom_scm_hdcp_available(void);
+extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt,
+		u32 *resp);
+
 #define QCOM_SCM_CPU_PWR_DOWN_L2_ON	0x0
 #define QCOM_SCM_CPU_PWR_DOWN_L2_OFF	0x1
 
-- 
cgit v1.2.3


From 3c6296f716ebef704b76070d90567ab4faa8462c Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 28 May 2015 13:21:34 -0400
Subject: ring-buffer: Remove useless unused tracing_off_permanent()

The tracing_off_permanent() call is a way to disable all ring_buffers.
Nothing uses it and nothing should use it, as tracing_off() and
friends are better, as they disable the ring buffers related to
tracing. The tracing_off_permanent() even disabled non tracing
ring buffers. This is a bit drastic, and was added to handle NMIs
doing outputs that could corrupt the ring buffer when only tracing
used them. It is now obsolete and adds a little overhead, it should
be removed.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/kernel.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3a5b48e52a9e..d948718a83d7 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -532,12 +532,6 @@ bool mac_pton(const char *s, u8 *mac);
  *
  * Most likely, you want to use tracing_on/tracing_off.
  */
-#ifdef CONFIG_RING_BUFFER
-/* trace_off_permanent stops recording with no way to bring it back */
-void tracing_off_permanent(void);
-#else
-static inline void tracing_off_permanent(void) { }
-#endif
 
 enum ftrace_dump_mode {
 	DUMP_NONE,
-- 
cgit v1.2.3


From 0040b933187b11f78e83dd162a31d64a46be4e37 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 20 Apr 2015 11:52:23 -0700
Subject: f2fs: add missing version info in superblock

The mkfs.f2fs remains kernel version in superblock, but f2fs module has not
added that so far.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 591f8c3ef410..8d345c24bcf7 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -50,6 +50,8 @@
 #define MAX_ACTIVE_NODE_LOGS	8
 #define MAX_ACTIVE_DATA_LOGS	8
 
+#define VERSION_LEN	256
+
 /*
  * For superblock
  */
@@ -86,6 +88,9 @@ struct f2fs_super_block {
 	__le32 extension_count;		/* # of extensions below */
 	__u8 extension_list[F2FS_MAX_EXTENSION][8];	/* extension array */
 	__le32 cp_payload;
+	__u8 version[VERSION_LEN];	/* the kernel version */
+	__u8 init_version[VERSION_LEN];	/* the initial kernel version */
+	__u8 reserved[892];		/* valid reserved region */
 } __packed;
 
 /*
-- 
cgit v1.2.3


From 76f105a2dbcd47509bac6ba8d94cb3759a3e6e9d Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 13 Apr 2015 15:10:36 -0700
Subject: f2fs: add feature facility in superblock

This patch introduces a feature in superblock, which will indicate any new
features for f2fs.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 8d345c24bcf7..d44e97f2b98e 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -90,7 +90,8 @@ struct f2fs_super_block {
 	__le32 cp_payload;
 	__u8 version[VERSION_LEN];	/* the kernel version */
 	__u8 init_version[VERSION_LEN];	/* the initial kernel version */
-	__u8 reserved[892];		/* valid reserved region */
+	__le32 feature;			/* defined features */
+	__u8 reserved[888];		/* valid reserved region */
 } __packed;
 
 /*
-- 
cgit v1.2.3


From cde4de1205770514005663d70a9a7d81cb555085 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 20 Apr 2015 13:57:51 -0700
Subject: f2fs crypto: declare some definitions for f2fs encryption feature

This definitions will be used by inode and superblock for encyption.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index d44e97f2b98e..920408a21ffd 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -91,7 +91,9 @@ struct f2fs_super_block {
 	__u8 version[VERSION_LEN];	/* the kernel version */
 	__u8 init_version[VERSION_LEN];	/* the initial kernel version */
 	__le32 feature;			/* defined features */
-	__u8 reserved[888];		/* valid reserved region */
+	__u8 encryption_level;		/* versioning level for encryption */
+	__u8 encrypt_pw_salt[16];	/* Salt used for string2key algorithm */
+	__u8 reserved[871];		/* valid reserved region */
 } __packed;
 
 /*
-- 
cgit v1.2.3


From f8df88081183bd4d3c461c617c3519445eb85642 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Wed, 27 May 2015 23:06:30 +0900
Subject: extcon: Remove optional print_name() function pointer of extcon_dev

This patch removes the optional print_name() function pointer included in
'struct extcon_dev' because the extcon must maintain the consistent name of
extcon device on sysfs instead of inconsistent name. After merged patch[1],
extcon can maintain the consistent name of extcon device without any hard-coded
device name.
[1] https://lkml.org/lkml/2015/4/27/258

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 include/linux/extcon.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index be9652b3a154..a7b224b20ecc 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -83,8 +83,6 @@ struct extcon_cable;
  *			be attached simulataneously. {0x7, 0} is equivalent to
  *			{0x3, 0x6, 0x5, 0}. If it is {0xFFFFFFFF, 0}, there
  *			can be no simultaneous connections.
- * @print_name:		An optional callback to override the method to print the
- *			name of the extcon device.
  * @print_state:	An optional callback to override the method to print the
  *			status of the extcon device.
  * @dev:		Device of this extcon.
@@ -111,7 +109,6 @@ struct extcon_dev {
 	const u32 *mutually_exclusive;
 
 	/* Optional callbacks to override class functions */
-	ssize_t	(*print_name)(struct extcon_dev *edev, char *buf);
 	ssize_t	(*print_state)(struct extcon_dev *edev, char *buf);
 
 	/* Internal data. Please do not set. */
-- 
cgit v1.2.3


From c80ef9e0c021ff86771fdd72583c75d8f7b6a720 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 29 May 2015 10:52:59 +0200
Subject: cgroup: add seq_file forward declaration for struct cftype

Recent header file changes for cgroup caused lots of warnings
about a missing struct seq_file form declaration for every
inclusion of include/linux/cgroup-defs.h.

As some files are built with -Werror, this leads to build
failure like:

                 from /git/arm-soc/drivers/gpu/drm/tilcdc/tilcdc_crtc.c:18:
/git/arm-soc/include/linux/cgroup-defs.h:354:25: error: 'struct seq_file' declared inside parameter list [-Werror]
cc1: all warnings being treated as errors
make[6]: *** [drivers/gpu/drm/tilcdc/tilcdc_crtc.o] Error 1

This patch adds the declaration, which resolves both the
warnings and the drm failure.

tj: Moved it where other type declarations are.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: b4a04ab7a37b ("cgroup: separate out include/linux/cgroup-defs.h")
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 7d83d7f73420..26d1cea7929f 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -26,6 +26,7 @@ struct cgroup_taskset;
 struct kernfs_node;
 struct kernfs_ops;
 struct kernfs_open_file;
+struct seq_file;
 
 #define MAX_CGROUP_TYPE_NAMELEN 32
 #define MAX_CGROUP_ROOT_NAMELEN 64
-- 
cgit v1.2.3


From e548ca4ee4595f65b262661d166310ad8a149bec Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Fri, 29 May 2015 13:11:32 -0600
Subject: block: don't honor chunk sizes for data-less IO

We don't need to honor chunk sizes for IO that doesn't carry any
data.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9ded80da2c16..ccaa9aecd593 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -903,7 +903,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq)
 	if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC))
 		return q->limits.max_hw_sectors;
 
-	if (!q->limits.chunk_sectors)
+	if (!q->limits.chunk_sectors || (rq->cmd_flags & REQ_DISCARD))
 		return blk_queue_get_max_sectors(q, rq->cmd_flags);
 
 	return min(blk_max_size_offset(q, blk_rq_pos(rq)),
-- 
cgit v1.2.3


From 19bdb6e4ec071bc49a9871b41e6a59a1657ed365 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 26 May 2015 15:11:44 -0600
Subject: PCI: Move pci_ari_enabled() to global header

pci_ari_enabled() is useful outside of drivers/pci, particularly for
deriving INTx routing via ACPI _PRT, so move it to the global header.
Also convert to bool return.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Don Dutile <ddutile@redhat.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pci.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 353db8dc4c6e..2925561a8f1e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1905,4 +1905,15 @@ static inline bool pci_is_dev_assigned(struct pci_dev *pdev)
 {
 	return (pdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED) == PCI_DEV_FLAGS_ASSIGNED;
 }
+
+/**
+ * pci_ari_enabled - query ARI forwarding status
+ * @bus: the PCI bus
+ *
+ * Returns true if ARI forwarding is enabled.
+ */
+static inline bool pci_ari_enabled(struct pci_bus *bus)
+{
+	return bus->self && bus->self->ari_enabled;
+}
 #endif /* LINUX_PCI_H */
-- 
cgit v1.2.3


From 3a9ad0b4fdcd57f775d3615004c8c64c021a9e7d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 27 May 2015 17:23:51 -0700
Subject: PCI: Add pci_bus_addr_t

David Ahern reported that d63e2e1f3df9 ("sparc/PCI: Clip bridge windows
to fit in upstream windows") fails to boot on sparc/T5-8:

  pci 0000:06:00.0: reg 0x184: can't handle BAR above 4GB (bus address 0x110204000)

The problem is that sparc64 assumed that dma_addr_t only needed to hold DMA
addresses, i.e., bus addresses returned via the DMA API (dma_map_single(),
etc.), while the PCI core assumed dma_addr_t could hold *any* bus address,
including raw BAR values.  On sparc64, all DMA addresses fit in 32 bits, so
dma_addr_t is a 32-bit type.  However, BAR values can be 64 bits wide, so
they don't fit in a dma_addr_t.  d63e2e1f3df9 added new checking that
tripped over this mismatch.

Add pci_bus_addr_t, which is wide enough to hold any PCI bus address,
including both raw BAR values and DMA addresses.  This will be 64 bits
on 64-bit platforms and on platforms with a 64-bit dma_addr_t.  Then
dma_addr_t only needs to be wide enough to hold addresses from the DMA API.

[bhelgaas: changelog, bugzilla, Kconfig to ensure pci_bus_addr_t is at
least as wide as dma_addr_t, documentation]
Fixes: d63e2e1f3df9 ("sparc/PCI: Clip bridge windows to fit in upstream windows")
Fixes: 23b13bc76f35 ("PCI: Fail safely if we can't handle BARs larger than 4GB")
Link: http://lkml.kernel.org/r/CAE9FiQU1gJY1LYrxs+ma5LCTEEe4xmtjRG0aXJ9K_Tsu+m9Wuw@mail.gmail.com
Link: http://lkml.kernel.org/r/1427857069-6789-1-git-send-email-yinghai@kernel.org
Link: https://bugzilla.kernel.org/show_bug.cgi?id=96231
Reported-by: David Ahern <david.ahern@oracle.com>
Tested-by: David Ahern <david.ahern@oracle.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: David S. Miller <davem@davemloft.net>
CC: stable@vger.kernel.org	# v3.19+
---
 include/linux/pci.h   | 12 +++++++++---
 include/linux/types.h | 12 ++++++++++--
 2 files changed, 19 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 353db8dc4c6e..956f74bad37a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -577,9 +577,15 @@ int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
 int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn,
 		  int reg, int len, u32 val);
 
+#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT
+typedef u64 pci_bus_addr_t;
+#else
+typedef u32 pci_bus_addr_t;
+#endif
+
 struct pci_bus_region {
-	dma_addr_t start;
-	dma_addr_t end;
+	pci_bus_addr_t start;
+	pci_bus_addr_t end;
 };
 
 struct pci_dynids {
@@ -1128,7 +1134,7 @@ int __must_check pci_bus_alloc_resource(struct pci_bus *bus,
 
 int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr);
 
-static inline dma_addr_t pci_bus_address(struct pci_dev *pdev, int bar)
+static inline pci_bus_addr_t pci_bus_address(struct pci_dev *pdev, int bar)
 {
 	struct pci_bus_region region;
 
diff --git a/include/linux/types.h b/include/linux/types.h
index 59698be03490..8715287c3b1f 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -139,12 +139,20 @@ typedef unsigned long blkcnt_t;
  */
 #define pgoff_t unsigned long
 
-/* A dma_addr_t can hold any valid DMA or bus address for the platform */
+/*
+ * A dma_addr_t can hold any valid DMA address, i.e., any address returned
+ * by the DMA API.
+ *
+ * If the DMA API only uses 32-bit addresses, dma_addr_t need only be 32
+ * bits wide.  Bus addresses, e.g., PCI BARs, may be wider than 32 bits,
+ * but drivers do memory-mapped I/O to ioremapped kernel virtual addresses,
+ * so they don't care about the size of the actual bus addresses.
+ */
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 typedef u64 dma_addr_t;
 #else
 typedef u32 dma_addr_t;
-#endif /* dma_addr_t */
+#endif
 
 typedef unsigned __bitwise__ gfp_t;
 typedef unsigned __bitwise__ fmode_t;
-- 
cgit v1.2.3


From db874c7e10557f8f1af9a6fb1ec6589ae06f349c Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Fri, 29 May 2015 09:54:02 -0700
Subject: PM / wakeirq: Fix typo in prototype for dev_pm_set_dedicated_wake_irq

Looks like I only built test the dev_pm_set_wake_irq and not the
dev_pm_set_dedicated_wake_irq case on x86.

Turns out there's a typo for the dev_pm_set_dedicated_wake_irq
prototype that causes a build error if CONFIG_COMPILE_TEST
and CONFIG_MMC_OMAP_HS are selected.

Reported-by: Jim Davis <jim.epost@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_wakeirq.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_wakeirq.h b/include/linux/pm_wakeirq.h
index 4046fa1b7d25..cd5b62db9084 100644
--- a/include/linux/pm_wakeirq.h
+++ b/include/linux/pm_wakeirq.h
@@ -30,8 +30,7 @@ static inline int dev_pm_set_wake_irq(struct device *dev, int irq)
 	return 0;
 }
 
-static inline int dev_pm_set_dedicated__wake_irq(struct device *dev,
-						 int irq)
+static inline int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 5790cf3c00c2f92aacba348e13f8a9a8f5dd96bd Mon Sep 17 00:00:00 2001
From: Mathieu Olivari <mathieu@codeaurora.org>
Date: Wed, 27 May 2015 11:02:47 -0700
Subject: stmmac: add phy-handle support to the platform layer

On stmmac driver, PHY specification in device-tree was done using the
non-standard property "snps,phy-addr". Specifying a PHY on a different
MDIO bus that the one within the stmmac controller doesn't seem to be
possible when device-tree is used.

This change adds support for the phy-handle property, as specified in
Documentation/devicetree/bindings/net/ethernet.txt.

Signed-off-by: Mathieu Olivari <mathieu@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/stmmac.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 7f484a239f53..c735f5c91eea 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -99,6 +99,7 @@ struct plat_stmmacenet_data {
 	int phy_addr;
 	int interface;
 	struct stmmac_mdio_bus_data *mdio_bus_data;
+	struct device_node *phy_node;
 	struct stmmac_dma_cfg *dma_cfg;
 	int clk_csr;
 	int has_gmac;
-- 
cgit v1.2.3


From f4fb874cf076f9eafdd15c0a88cd0f0397b95e43 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Wed, 27 May 2015 21:07:26 -0400
Subject: if_vlan: fix vlaue -> value typo

Fixes "vlaue" for "value" in include/linux/if_vlan.h.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index b9ab677c0c0a..a40d29846ac2 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -416,7 +416,7 @@ static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb,
 /**
  * __vlan_get_tag - get the VLAN ID that is part of the payload
  * @skb: skbuff to query
- * @vlan_tci: buffer to store vlaue
+ * @vlan_tci: buffer to store value
  *
  * Returns error if the skb is not of VLAN type
  */
@@ -435,7 +435,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci)
 /**
  * __vlan_hwaccel_get_tag - get the VLAN ID that is in @skb->cb[]
  * @skb: skbuff to query
- * @vlan_tci: buffer to store vlaue
+ * @vlan_tci: buffer to store value
  *
  * Returns error if @skb->vlan_tci is not set correctly
  */
@@ -456,7 +456,7 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb,
 /**
  * vlan_get_tag - get the VLAN ID from the skb
  * @skb: skbuff to query
- * @vlan_tci: buffer to store vlaue
+ * @vlan_tci: buffer to store value
  *
  * Returns error if the skb is not VLAN tagged
  */
-- 
cgit v1.2.3


From 64ffaa2159b752e6c263dc57eaaaed7367d37493 Mon Sep 17 00:00:00 2001
From: Amir Vadai <amirv@mellanox.com>
Date: Thu, 28 May 2015 22:28:38 +0300
Subject: net/mlx5_core,mlx5_ib: Do not use vmap() on coherent memory

As David Daney pointed in mlx4_core driver [1], mlx5_core is also
misusing the DMA-API.

This patch is removing the code that vmap() memory allocated by
dma_alloc_coherent().

After this patch, users of this drivers might fail allocating resources
on memory fragmeneted systems.  This will be fixed later on.

[1] - https://patchwork.ozlabs.org/patch/458531/

CC: David Daney <david.daney@cavium.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/driver.h | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 9a90e7523dc2..c4cf25ffcc16 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -334,8 +334,6 @@ struct mlx5_buf_list {
 
 struct mlx5_buf {
 	struct mlx5_buf_list	direct;
-	struct mlx5_buf_list   *page_list;
-	int			nbufs;
 	int			npages;
 	int			size;
 	u8			page_shift;
@@ -586,11 +584,7 @@ struct mlx5_pas {
 
 static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset)
 {
-	if (likely(BITS_PER_LONG == 64 || buf->nbufs == 1))
 		return buf->direct.buf + offset;
-	else
-		return buf->page_list[offset >> PAGE_SHIFT].buf +
-			(offset & (PAGE_SIZE - 1));
 }
 
 extern struct workqueue_struct *mlx5_core_wq;
@@ -669,8 +663,7 @@ void mlx5_health_cleanup(void);
 void  __init mlx5_health_init(void);
 void mlx5_start_health_poll(struct mlx5_core_dev *dev);
 void mlx5_stop_health_poll(struct mlx5_core_dev *dev);
-int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct,
-		   struct mlx5_buf *buf);
+int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf);
 void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf);
 struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev,
 						      gfp_t flags, int npages);
-- 
cgit v1.2.3


From db058a186f98b057c19c42f7b10d9a96fd3b5d59 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 28 May 2015 22:28:39 +0300
Subject: net/mlx5_core: Set irq affinity hints

Preparation for upcoming ethernet driver.
- Move msix array from eq_table struct to priv since its not related to
  eq_table
- Intorduce irq_info struct to hold all irq information
- Move name from mlx5_eq to irq_info struct since it is irq property.
- Set IRQ affinity hints

Signed-off-by: Achiad Shochat <achiad@mellanox.com>
Signed-off-by: Rana Shahout <ranas@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/driver.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index c4cf25ffcc16..9e8979502826 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -85,7 +85,7 @@ enum {
 };
 
 enum {
-	MLX5_MAX_EQ_NAME	= 32
+	MLX5_MAX_IRQ_NAME	= 32
 };
 
 enum {
@@ -349,7 +349,6 @@ struct mlx5_eq {
 	u8			eqn;
 	int			nent;
 	u64			mask;
-	char			name[MLX5_MAX_EQ_NAME];
 	struct list_head	list;
 	int			index;
 	struct mlx5_rsc_debug	*dbg;
@@ -412,7 +411,6 @@ struct mlx5_eq_table {
 	struct mlx5_eq		pages_eq;
 	struct mlx5_eq		async_eq;
 	struct mlx5_eq		cmd_eq;
-	struct msix_entry	*msix_arr;
 	int			num_comp_vectors;
 	/* protect EQs list
 	 */
@@ -465,9 +463,16 @@ struct mlx5_mr_table {
 	struct radix_tree_root	tree;
 };
 
+struct mlx5_irq_info {
+	cpumask_var_t mask;
+	char name[MLX5_MAX_IRQ_NAME];
+};
+
 struct mlx5_priv {
 	char			name[MLX5_MAX_NAME_LEN];
 	struct mlx5_eq_table	eq_table;
+	struct msix_entry	*msix_arr;
+	struct mlx5_irq_info	*irq_info;
 	struct mlx5_uuar_info	uuari;
 	MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock);
 
-- 
cgit v1.2.3


From e281682bf29438848daac11627216bceb1507b71 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 28 May 2015 22:28:40 +0300
Subject: net/mlx5_core: HW data structs/types definitions cleanup

mlx5_ifc.h was heavily modified here since it is now generated by a
script from the device specification (PRM rev 0.25). This specification
is backward compatible to existing hardware.

Some structures/fields were added here in order to enable the Ethernet
functionality of the driver.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/device.h   |  113 +-
 include/linux/mlx5/driver.h   |    4 +-
 include/linux/mlx5/mlx5_ifc.h | 6608 ++++++++++++++++++++++++++++++++++++++++-
 include/linux/mlx5/qp.h       |   25 +
 4 files changed, 6650 insertions(+), 100 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index abf65c790421..feebed7b392b 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -35,6 +35,7 @@
 
 #include <linux/types.h>
 #include <rdma/ib_verbs.h>
+#include <linux/mlx5/mlx5_ifc.h>
 
 #if defined(__LITTLE_ENDIAN)
 #define MLX5_SET_HOST_ENDIANNESS	0
@@ -70,6 +71,14 @@
 		     << __mlx5_dw_bit_off(typ, fld))); \
 } while (0)
 
+#define MLX5_SET_TO_ONES(typ, p, fld) do { \
+	BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32);             \
+	*((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \
+	cpu_to_be32((be32_to_cpu(*((__be32 *)(p) + __mlx5_dw_off(typ, fld))) & \
+		     (~__mlx5_dw_mask(typ, fld))) | ((__mlx5_mask(typ, fld)) \
+		     << __mlx5_dw_bit_off(typ, fld))); \
+} while (0)
+
 #define MLX5_GET(typ, p, fld) ((be32_to_cpu(*((__be32 *)(p) +\
 __mlx5_dw_off(typ, fld))) >> __mlx5_dw_bit_off(typ, fld)) & \
 __mlx5_mask(typ, fld))
@@ -264,6 +273,7 @@ enum {
 	MLX5_OPCODE_RDMA_WRITE_IMM	= 0x09,
 	MLX5_OPCODE_SEND		= 0x0a,
 	MLX5_OPCODE_SEND_IMM		= 0x0b,
+	MLX5_OPCODE_LSO			= 0x0e,
 	MLX5_OPCODE_RDMA_READ		= 0x10,
 	MLX5_OPCODE_ATOMIC_CS		= 0x11,
 	MLX5_OPCODE_ATOMIC_FA		= 0x12,
@@ -541,6 +551,10 @@ struct mlx5_cmd_prot_block {
 	u8		sig;
 };
 
+enum {
+	MLX5_CQE_SYND_FLUSHED_IN_ERROR = 5,
+};
+
 struct mlx5_err_cqe {
 	u8	rsvd0[32];
 	__be32	srqn;
@@ -554,13 +568,22 @@ struct mlx5_err_cqe {
 };
 
 struct mlx5_cqe64 {
-	u8		rsvd0[17];
+	u8		rsvd0[4];
+	u8		lro_tcppsh_abort_dupack;
+	u8		lro_min_ttl;
+	__be16		lro_tcp_win;
+	__be32		lro_ack_seq_num;
+	__be32		rss_hash_result;
+	u8		rss_hash_type;
 	u8		ml_path;
-	u8		rsvd20[4];
+	u8		rsvd20[2];
+	__be16		check_sum;
 	__be16		slid;
 	__be32		flags_rqpn;
-	u8		rsvd28[4];
-	__be32		srqn;
+	u8		hds_ip_ext;
+	u8		l4_hdr_type_etc;
+	__be16		vlan_info;
+	__be32		srqn; /* [31:24]: lro_num_seg, [23:0]: srqn */
 	__be32		imm_inval_pkey;
 	u8		rsvd40[4];
 	__be32		byte_cnt;
@@ -571,6 +594,40 @@ struct mlx5_cqe64 {
 	u8		op_own;
 };
 
+static inline int get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe)
+{
+	return (cqe->lro_tcppsh_abort_dupack >> 6) & 1;
+}
+
+static inline u8 get_cqe_l4_hdr_type(struct mlx5_cqe64 *cqe)
+{
+	return (cqe->l4_hdr_type_etc >> 4) & 0x7;
+}
+
+static inline int cqe_has_vlan(struct mlx5_cqe64 *cqe)
+{
+	return !!(cqe->l4_hdr_type_etc & 0x1);
+}
+
+enum {
+	CQE_L4_HDR_TYPE_NONE			= 0x0,
+	CQE_L4_HDR_TYPE_TCP_NO_ACK		= 0x1,
+	CQE_L4_HDR_TYPE_UDP			= 0x2,
+	CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA		= 0x3,
+	CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA	= 0x4,
+};
+
+enum {
+	CQE_RSS_HTYPE_IP	= 0x3 << 6,
+	CQE_RSS_HTYPE_L4	= 0x3 << 2,
+};
+
+enum {
+	CQE_L2_OK	= 1 << 0,
+	CQE_L3_OK	= 1 << 1,
+	CQE_L4_OK	= 1 << 2,
+};
+
 struct mlx5_sig_err_cqe {
 	u8		rsvd0[16];
 	__be32		expected_trans_sig;
@@ -996,4 +1053,52 @@ struct mlx5_destroy_psv_out {
 	u8                      rsvd[8];
 };
 
+#define MLX5_CMD_OP_MAX 0x920
+
+enum {
+	VPORT_STATE_DOWN		= 0x0,
+	VPORT_STATE_UP			= 0x1,
+};
+
+enum {
+	MLX5_L3_PROT_TYPE_IPV4		= 0,
+	MLX5_L3_PROT_TYPE_IPV6		= 1,
+};
+
+enum {
+	MLX5_L4_PROT_TYPE_TCP		= 0,
+	MLX5_L4_PROT_TYPE_UDP		= 1,
+};
+
+enum {
+	MLX5_HASH_FIELD_SEL_SRC_IP	= 1 << 0,
+	MLX5_HASH_FIELD_SEL_DST_IP	= 1 << 1,
+	MLX5_HASH_FIELD_SEL_L4_SPORT	= 1 << 2,
+	MLX5_HASH_FIELD_SEL_L4_DPORT	= 1 << 3,
+	MLX5_HASH_FIELD_SEL_IPSEC_SPI	= 1 << 4,
+};
+
+enum {
+	MLX5_MATCH_OUTER_HEADERS	= 1 << 0,
+	MLX5_MATCH_MISC_PARAMETERS	= 1 << 1,
+	MLX5_MATCH_INNER_HEADERS	= 1 << 2,
+
+};
+
+enum {
+	MLX5_FLOW_TABLE_TYPE_NIC_RCV	= 0,
+	MLX5_FLOW_TABLE_TYPE_ESWITCH	= 4,
+};
+
+enum {
+	MLX5_FLOW_CONTEXT_DEST_TYPE_VPORT	= 0,
+	MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE	= 1,
+	MLX5_FLOW_CONTEXT_DEST_TYPE_TIR		= 2,
+};
+
+enum {
+	MLX5_RQC_RQ_TYPE_MEMORY_RQ_INLINE = 0x0,
+	MLX5_RQC_RQ_TYPE_MEMORY_RQ_RPM    = 0x1,
+};
+
 #endif /* MLX5_DEVICE_H */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 9e8979502826..3fd4fdc1ba16 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -44,7 +44,6 @@
 
 #include <linux/mlx5/device.h>
 #include <linux/mlx5/doorbell.h>
-#include <linux/mlx5/mlx5_ifc.h>
 
 enum {
 	MLX5_BOARD_ID_LEN = 64,
@@ -278,7 +277,6 @@ struct mlx5_general_caps {
 	u8	log_max_mkey;
 	u8	log_max_pd;
 	u8	log_max_srq;
-	u8	log_max_strq;
 	u8	log_max_mrw_sz;
 	u8	log_max_bsf_list_size;
 	u8	log_max_klm_list_size;
@@ -664,6 +662,8 @@ int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
 int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
 int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
 int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
+int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar);
+void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar);
 void mlx5_health_cleanup(void);
 void  __init mlx5_health_init(void);
 void mlx5_start_health_poll(struct mlx5_core_dev *dev);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index cb3ad17edd1f..b27e9f6e090a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -28,11 +28,44 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- */
-
+*/
 #ifndef MLX5_IFC_H
 #define MLX5_IFC_H
 
+enum {
+	MLX5_EVENT_TYPE_CODING_COMPLETION_EVENTS                   = 0x0,
+	MLX5_EVENT_TYPE_CODING_PATH_MIGRATED_SUCCEEDED             = 0x1,
+	MLX5_EVENT_TYPE_CODING_COMMUNICATION_ESTABLISHED           = 0x2,
+	MLX5_EVENT_TYPE_CODING_SEND_QUEUE_DRAINED                  = 0x3,
+	MLX5_EVENT_TYPE_CODING_LAST_WQE_REACHED                    = 0x13,
+	MLX5_EVENT_TYPE_CODING_SRQ_LIMIT                           = 0x14,
+	MLX5_EVENT_TYPE_CODING_DCT_ALL_CONNECTIONS_CLOSED          = 0x1c,
+	MLX5_EVENT_TYPE_CODING_DCT_ACCESS_KEY_VIOLATION            = 0x1d,
+	MLX5_EVENT_TYPE_CODING_CQ_ERROR                            = 0x4,
+	MLX5_EVENT_TYPE_CODING_LOCAL_WQ_CATASTROPHIC_ERROR         = 0x5,
+	MLX5_EVENT_TYPE_CODING_PATH_MIGRATION_FAILED               = 0x7,
+	MLX5_EVENT_TYPE_CODING_PAGE_FAULT_EVENT                    = 0xc,
+	MLX5_EVENT_TYPE_CODING_INVALID_REQUEST_LOCAL_WQ_ERROR      = 0x10,
+	MLX5_EVENT_TYPE_CODING_LOCAL_ACCESS_VIOLATION_WQ_ERROR     = 0x11,
+	MLX5_EVENT_TYPE_CODING_LOCAL_SRQ_CATASTROPHIC_ERROR        = 0x12,
+	MLX5_EVENT_TYPE_CODING_INTERNAL_ERROR                      = 0x8,
+	MLX5_EVENT_TYPE_CODING_PORT_STATE_CHANGE                   = 0x9,
+	MLX5_EVENT_TYPE_CODING_GPIO_EVENT                          = 0x15,
+	MLX5_EVENT_TYPE_CODING_REMOTE_CONFIGURATION_PROTOCOL_EVENT = 0x19,
+	MLX5_EVENT_TYPE_CODING_DOORBELL_BLUEFLAME_CONGESTION_EVENT = 0x1a,
+	MLX5_EVENT_TYPE_CODING_STALL_VL_EVENT                      = 0x1b,
+	MLX5_EVENT_TYPE_CODING_DROPPED_PACKET_LOGGED_EVENT         = 0x1f,
+	MLX5_EVENT_TYPE_CODING_COMMAND_INTERFACE_COMPLETION        = 0xa,
+	MLX5_EVENT_TYPE_CODING_PAGE_REQUEST                        = 0xb
+};
+
+enum {
+	MLX5_MODIFY_TIR_BITMASK_LRO                   = 0x0,
+	MLX5_MODIFY_TIR_BITMASK_INDIRECT_TABLE        = 0x1,
+	MLX5_MODIFY_TIR_BITMASK_HASH                  = 0x2,
+	MLX5_MODIFY_TIR_BITMASK_TUNNELED_OFFLOAD_EN   = 0x3
+};
+
 enum {
 	MLX5_CMD_OP_QUERY_HCA_CAP                 = 0x100,
 	MLX5_CMD_OP_QUERY_ADAPTER                 = 0x101,
@@ -43,6 +76,8 @@ enum {
 	MLX5_CMD_OP_QUERY_PAGES                   = 0x107,
 	MLX5_CMD_OP_MANAGE_PAGES                  = 0x108,
 	MLX5_CMD_OP_SET_HCA_CAP                   = 0x109,
+	MLX5_CMD_OP_QUERY_ISSI                    = 0x10a,
+	MLX5_CMD_OP_SET_ISSI                      = 0x10b,
 	MLX5_CMD_OP_CREATE_MKEY                   = 0x200,
 	MLX5_CMD_OP_QUERY_MKEY                    = 0x201,
 	MLX5_CMD_OP_DESTROY_MKEY                  = 0x202,
@@ -66,6 +101,7 @@ enum {
 	MLX5_CMD_OP_2ERR_QP                       = 0x507,
 	MLX5_CMD_OP_2RST_QP                       = 0x50a,
 	MLX5_CMD_OP_QUERY_QP                      = 0x50b,
+	MLX5_CMD_OP_SQD_RTS_QP                    = 0x50c,
 	MLX5_CMD_OP_INIT2INIT_QP                  = 0x50e,
 	MLX5_CMD_OP_CREATE_PSV                    = 0x600,
 	MLX5_CMD_OP_DESTROY_PSV                   = 0x601,
@@ -73,7 +109,10 @@ enum {
 	MLX5_CMD_OP_DESTROY_SRQ                   = 0x701,
 	MLX5_CMD_OP_QUERY_SRQ                     = 0x702,
 	MLX5_CMD_OP_ARM_RQ                        = 0x703,
-	MLX5_CMD_OP_RESIZE_SRQ                    = 0x704,
+	MLX5_CMD_OP_CREATE_XRC_SRQ                = 0x705,
+	MLX5_CMD_OP_DESTROY_XRC_SRQ               = 0x706,
+	MLX5_CMD_OP_QUERY_XRC_SRQ                 = 0x707,
+	MLX5_CMD_OP_ARM_XRC_SRQ                   = 0x708,
 	MLX5_CMD_OP_CREATE_DCT                    = 0x710,
 	MLX5_CMD_OP_DESTROY_DCT                   = 0x711,
 	MLX5_CMD_OP_DRAIN_DCT                     = 0x712,
@@ -85,8 +124,12 @@ enum {
 	MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT      = 0x753,
 	MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT       = 0x754,
 	MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT      = 0x755,
-	MLX5_CMD_OP_QUERY_RCOE_ADDRESS            = 0x760,
+	MLX5_CMD_OP_QUERY_ROCE_ADDRESS            = 0x760,
 	MLX5_CMD_OP_SET_ROCE_ADDRESS              = 0x761,
+	MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT       = 0x762,
+	MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT      = 0x763,
+	MLX5_CMD_OP_QUERY_HCA_VPORT_GID           = 0x764,
+	MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY          = 0x765,
 	MLX5_CMD_OP_QUERY_VPORT_COUNTER           = 0x770,
 	MLX5_CMD_OP_ALLOC_Q_COUNTER               = 0x771,
 	MLX5_CMD_OP_DEALLOC_Q_COUNTER             = 0x772,
@@ -98,7 +141,7 @@ enum {
 	MLX5_CMD_OP_CONFIG_INT_MODERATION         = 0x804,
 	MLX5_CMD_OP_ACCESS_REG                    = 0x805,
 	MLX5_CMD_OP_ATTACH_TO_MCG                 = 0x806,
-	MLX5_CMD_OP_DETACH_FROM_MCG               = 0x807,
+	MLX5_CMD_OP_DETTACH_FROM_MCG              = 0x807,
 	MLX5_CMD_OP_GET_DROPPED_PACKET_LOG        = 0x80a,
 	MLX5_CMD_OP_MAD_IFC                       = 0x50d,
 	MLX5_CMD_OP_QUERY_MAD_DEMUX               = 0x80b,
@@ -106,23 +149,22 @@ enum {
 	MLX5_CMD_OP_NOP                           = 0x80d,
 	MLX5_CMD_OP_ALLOC_XRCD                    = 0x80e,
 	MLX5_CMD_OP_DEALLOC_XRCD                  = 0x80f,
-	MLX5_CMD_OP_SET_BURST_SIZE                = 0x812,
-	MLX5_CMD_OP_QUERY_BURST_SZIE              = 0x813,
-	MLX5_CMD_OP_ACTIVATE_TRACER               = 0x814,
-	MLX5_CMD_OP_DEACTIVATE_TRACER             = 0x815,
-	MLX5_CMD_OP_CREATE_SNIFFER_RULE           = 0x820,
-	MLX5_CMD_OP_DESTROY_SNIFFER_RULE          = 0x821,
-	MLX5_CMD_OP_QUERY_CONG_PARAMS             = 0x822,
-	MLX5_CMD_OP_MODIFY_CONG_PARAMS            = 0x823,
-	MLX5_CMD_OP_QUERY_CONG_STATISTICS         = 0x824,
+	MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN        = 0x816,
+	MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN      = 0x817,
+	MLX5_CMD_OP_QUERY_CONG_STATUS             = 0x822,
+	MLX5_CMD_OP_MODIFY_CONG_STATUS            = 0x823,
+	MLX5_CMD_OP_QUERY_CONG_PARAMS             = 0x824,
+	MLX5_CMD_OP_MODIFY_CONG_PARAMS            = 0x825,
+	MLX5_CMD_OP_QUERY_CONG_STATISTICS         = 0x826,
+	MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT           = 0x827,
+	MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT        = 0x828,
+	MLX5_CMD_OP_SET_L2_TABLE_ENTRY            = 0x829,
+	MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY          = 0x82a,
+	MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY         = 0x82b,
 	MLX5_CMD_OP_CREATE_TIR                    = 0x900,
 	MLX5_CMD_OP_MODIFY_TIR                    = 0x901,
 	MLX5_CMD_OP_DESTROY_TIR                   = 0x902,
 	MLX5_CMD_OP_QUERY_TIR                     = 0x903,
-	MLX5_CMD_OP_CREATE_TIS                    = 0x912,
-	MLX5_CMD_OP_MODIFY_TIS                    = 0x913,
-	MLX5_CMD_OP_DESTROY_TIS                   = 0x914,
-	MLX5_CMD_OP_QUERY_TIS                     = 0x915,
 	MLX5_CMD_OP_CREATE_SQ                     = 0x904,
 	MLX5_CMD_OP_MODIFY_SQ                     = 0x905,
 	MLX5_CMD_OP_DESTROY_SQ                    = 0x906,
@@ -135,9 +177,430 @@ enum {
 	MLX5_CMD_OP_MODIFY_RMP                    = 0x90d,
 	MLX5_CMD_OP_DESTROY_RMP                   = 0x90e,
 	MLX5_CMD_OP_QUERY_RMP                     = 0x90f,
-	MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY          = 0x910,
-	MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY        = 0x911,
-	MLX5_CMD_OP_MAX				  = 0x911
+	MLX5_CMD_OP_CREATE_TIS                    = 0x912,
+	MLX5_CMD_OP_MODIFY_TIS                    = 0x913,
+	MLX5_CMD_OP_DESTROY_TIS                   = 0x914,
+	MLX5_CMD_OP_QUERY_TIS                     = 0x915,
+	MLX5_CMD_OP_CREATE_RQT                    = 0x916,
+	MLX5_CMD_OP_MODIFY_RQT                    = 0x917,
+	MLX5_CMD_OP_DESTROY_RQT                   = 0x918,
+	MLX5_CMD_OP_QUERY_RQT                     = 0x919,
+	MLX5_CMD_OP_CREATE_FLOW_TABLE             = 0x930,
+	MLX5_CMD_OP_DESTROY_FLOW_TABLE            = 0x931,
+	MLX5_CMD_OP_QUERY_FLOW_TABLE              = 0x932,
+	MLX5_CMD_OP_CREATE_FLOW_GROUP             = 0x933,
+	MLX5_CMD_OP_DESTROY_FLOW_GROUP            = 0x934,
+	MLX5_CMD_OP_QUERY_FLOW_GROUP              = 0x935,
+	MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY          = 0x936,
+	MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY        = 0x937,
+	MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY       = 0x938
+};
+
+struct mlx5_ifc_flow_table_fields_supported_bits {
+	u8         outer_dmac[0x1];
+	u8         outer_smac[0x1];
+	u8         outer_ether_type[0x1];
+	u8         reserved_0[0x1];
+	u8         outer_first_prio[0x1];
+	u8         outer_first_cfi[0x1];
+	u8         outer_first_vid[0x1];
+	u8         reserved_1[0x1];
+	u8         outer_second_prio[0x1];
+	u8         outer_second_cfi[0x1];
+	u8         outer_second_vid[0x1];
+	u8         reserved_2[0x1];
+	u8         outer_sip[0x1];
+	u8         outer_dip[0x1];
+	u8         outer_frag[0x1];
+	u8         outer_ip_protocol[0x1];
+	u8         outer_ip_ecn[0x1];
+	u8         outer_ip_dscp[0x1];
+	u8         outer_udp_sport[0x1];
+	u8         outer_udp_dport[0x1];
+	u8         outer_tcp_sport[0x1];
+	u8         outer_tcp_dport[0x1];
+	u8         outer_tcp_flags[0x1];
+	u8         outer_gre_protocol[0x1];
+	u8         outer_gre_key[0x1];
+	u8         outer_vxlan_vni[0x1];
+	u8         reserved_3[0x5];
+	u8         source_eswitch_port[0x1];
+
+	u8         inner_dmac[0x1];
+	u8         inner_smac[0x1];
+	u8         inner_ether_type[0x1];
+	u8         reserved_4[0x1];
+	u8         inner_first_prio[0x1];
+	u8         inner_first_cfi[0x1];
+	u8         inner_first_vid[0x1];
+	u8         reserved_5[0x1];
+	u8         inner_second_prio[0x1];
+	u8         inner_second_cfi[0x1];
+	u8         inner_second_vid[0x1];
+	u8         reserved_6[0x1];
+	u8         inner_sip[0x1];
+	u8         inner_dip[0x1];
+	u8         inner_frag[0x1];
+	u8         inner_ip_protocol[0x1];
+	u8         inner_ip_ecn[0x1];
+	u8         inner_ip_dscp[0x1];
+	u8         inner_udp_sport[0x1];
+	u8         inner_udp_dport[0x1];
+	u8         inner_tcp_sport[0x1];
+	u8         inner_tcp_dport[0x1];
+	u8         inner_tcp_flags[0x1];
+	u8         reserved_7[0x9];
+
+	u8         reserved_8[0x40];
+};
+
+struct mlx5_ifc_flow_table_prop_layout_bits {
+	u8         ft_support[0x1];
+	u8         reserved_0[0x1f];
+
+	u8         reserved_1[0x2];
+	u8         log_max_ft_size[0x6];
+	u8         reserved_2[0x10];
+	u8         max_ft_level[0x8];
+
+	u8         reserved_3[0x20];
+
+	u8         reserved_4[0x18];
+	u8         log_max_ft_num[0x8];
+
+	u8         reserved_5[0x18];
+	u8         log_max_destination[0x8];
+
+	u8         reserved_6[0x18];
+	u8         log_max_flow[0x8];
+
+	u8         reserved_7[0x40];
+
+	struct mlx5_ifc_flow_table_fields_supported_bits ft_field_support;
+
+	struct mlx5_ifc_flow_table_fields_supported_bits ft_field_bitmask_support;
+};
+
+struct mlx5_ifc_odp_per_transport_service_cap_bits {
+	u8         send[0x1];
+	u8         receive[0x1];
+	u8         write[0x1];
+	u8         read[0x1];
+	u8         reserved_0[0x1];
+	u8         srq_receive[0x1];
+	u8         reserved_1[0x1a];
+};
+
+struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
+	u8         smac_47_16[0x20];
+
+	u8         smac_15_0[0x10];
+	u8         ethertype[0x10];
+
+	u8         dmac_47_16[0x20];
+
+	u8         dmac_15_0[0x10];
+	u8         first_prio[0x3];
+	u8         first_cfi[0x1];
+	u8         first_vid[0xc];
+
+	u8         ip_protocol[0x8];
+	u8         ip_dscp[0x6];
+	u8         ip_ecn[0x2];
+	u8         vlan_tag[0x1];
+	u8         reserved_0[0x1];
+	u8         frag[0x1];
+	u8         reserved_1[0x4];
+	u8         tcp_flags[0x9];
+
+	u8         tcp_sport[0x10];
+	u8         tcp_dport[0x10];
+
+	u8         reserved_2[0x20];
+
+	u8         udp_sport[0x10];
+	u8         udp_dport[0x10];
+
+	u8         src_ip[4][0x20];
+
+	u8         dst_ip[4][0x20];
+};
+
+struct mlx5_ifc_fte_match_set_misc_bits {
+	u8         reserved_0[0x20];
+
+	u8         reserved_1[0x10];
+	u8         source_port[0x10];
+
+	u8         outer_second_prio[0x3];
+	u8         outer_second_cfi[0x1];
+	u8         outer_second_vid[0xc];
+	u8         inner_second_prio[0x3];
+	u8         inner_second_cfi[0x1];
+	u8         inner_second_vid[0xc];
+
+	u8         outer_second_vlan_tag[0x1];
+	u8         inner_second_vlan_tag[0x1];
+	u8         reserved_2[0xe];
+	u8         gre_protocol[0x10];
+
+	u8         gre_key_h[0x18];
+	u8         gre_key_l[0x8];
+
+	u8         vxlan_vni[0x18];
+	u8         reserved_3[0x8];
+
+	u8         reserved_4[0x20];
+
+	u8         reserved_5[0xc];
+	u8         outer_ipv6_flow_label[0x14];
+
+	u8         reserved_6[0xc];
+	u8         inner_ipv6_flow_label[0x14];
+
+	u8         reserved_7[0xe0];
+};
+
+struct mlx5_ifc_cmd_pas_bits {
+	u8         pa_h[0x20];
+
+	u8         pa_l[0x14];
+	u8         reserved_0[0xc];
+};
+
+struct mlx5_ifc_uint64_bits {
+	u8         hi[0x20];
+
+	u8         lo[0x20];
+};
+
+enum {
+	MLX5_ADS_STAT_RATE_NO_LIMIT  = 0x0,
+	MLX5_ADS_STAT_RATE_2_5GBPS   = 0x7,
+	MLX5_ADS_STAT_RATE_10GBPS    = 0x8,
+	MLX5_ADS_STAT_RATE_30GBPS    = 0x9,
+	MLX5_ADS_STAT_RATE_5GBPS     = 0xa,
+	MLX5_ADS_STAT_RATE_20GBPS    = 0xb,
+	MLX5_ADS_STAT_RATE_40GBPS    = 0xc,
+	MLX5_ADS_STAT_RATE_60GBPS    = 0xd,
+	MLX5_ADS_STAT_RATE_80GBPS    = 0xe,
+	MLX5_ADS_STAT_RATE_120GBPS   = 0xf,
+};
+
+struct mlx5_ifc_ads_bits {
+	u8         fl[0x1];
+	u8         free_ar[0x1];
+	u8         reserved_0[0xe];
+	u8         pkey_index[0x10];
+
+	u8         reserved_1[0x8];
+	u8         grh[0x1];
+	u8         mlid[0x7];
+	u8         rlid[0x10];
+
+	u8         ack_timeout[0x5];
+	u8         reserved_2[0x3];
+	u8         src_addr_index[0x8];
+	u8         reserved_3[0x4];
+	u8         stat_rate[0x4];
+	u8         hop_limit[0x8];
+
+	u8         reserved_4[0x4];
+	u8         tclass[0x8];
+	u8         flow_label[0x14];
+
+	u8         rgid_rip[16][0x8];
+
+	u8         reserved_5[0x4];
+	u8         f_dscp[0x1];
+	u8         f_ecn[0x1];
+	u8         reserved_6[0x1];
+	u8         f_eth_prio[0x1];
+	u8         ecn[0x2];
+	u8         dscp[0x6];
+	u8         udp_sport[0x10];
+
+	u8         dei_cfi[0x1];
+	u8         eth_prio[0x3];
+	u8         sl[0x4];
+	u8         port[0x8];
+	u8         rmac_47_32[0x10];
+
+	u8         rmac_31_0[0x20];
+};
+
+struct mlx5_ifc_flow_table_nic_cap_bits {
+	u8         reserved_0[0x200];
+
+	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive;
+
+	u8         reserved_1[0x200];
+
+	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive_sniffer;
+
+	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit;
+
+	u8         reserved_2[0x200];
+
+	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_sniffer;
+
+	u8         reserved_3[0x7200];
+};
+
+struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
+	u8         csum_cap[0x1];
+	u8         vlan_cap[0x1];
+	u8         lro_cap[0x1];
+	u8         lro_psh_flag[0x1];
+	u8         lro_time_stamp[0x1];
+	u8         reserved_0[0x6];
+	u8         max_lso_cap[0x5];
+	u8         reserved_1[0x4];
+	u8         rss_ind_tbl_cap[0x4];
+	u8         reserved_2[0x3];
+	u8         tunnel_lso_const_out_ip_id[0x1];
+	u8         reserved_3[0x2];
+	u8         tunnel_statless_gre[0x1];
+	u8         tunnel_stateless_vxlan[0x1];
+
+	u8         reserved_4[0x20];
+
+	u8         reserved_5[0x10];
+	u8         lro_min_mss_size[0x10];
+
+	u8         reserved_6[0x120];
+
+	u8         lro_timer_supported_periods[4][0x20];
+
+	u8         reserved_7[0x600];
+};
+
+struct mlx5_ifc_roce_cap_bits {
+	u8         roce_apm[0x1];
+	u8         reserved_0[0x1f];
+
+	u8         reserved_1[0x60];
+
+	u8         reserved_2[0xc];
+	u8         l3_type[0x4];
+	u8         reserved_3[0x8];
+	u8         roce_version[0x8];
+
+	u8         reserved_4[0x10];
+	u8         r_roce_dest_udp_port[0x10];
+
+	u8         r_roce_max_src_udp_port[0x10];
+	u8         r_roce_min_src_udp_port[0x10];
+
+	u8         reserved_5[0x10];
+	u8         roce_address_table_size[0x10];
+
+	u8         reserved_6[0x700];
+};
+
+enum {
+	MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_1_BYTE     = 0x0,
+	MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_2_BYTES    = 0x2,
+	MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_4_BYTES    = 0x4,
+	MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_8_BYTES    = 0x8,
+	MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_16_BYTES   = 0x10,
+	MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_32_BYTES   = 0x20,
+	MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_64_BYTES   = 0x40,
+	MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_128_BYTES  = 0x80,
+	MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_256_BYTES  = 0x100,
+};
+
+enum {
+	MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_1_BYTE     = 0x1,
+	MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_2_BYTES    = 0x2,
+	MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_4_BYTES    = 0x4,
+	MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_8_BYTES    = 0x8,
+	MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_16_BYTES   = 0x10,
+	MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_32_BYTES   = 0x20,
+	MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_64_BYTES   = 0x40,
+	MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_128_BYTES  = 0x80,
+	MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_256_BYTES  = 0x100,
+};
+
+struct mlx5_ifc_atomic_caps_bits {
+	u8         reserved_0[0x40];
+
+	u8         atomic_req_endianness[0x1];
+	u8         reserved_1[0x1f];
+
+	u8         reserved_2[0x20];
+
+	u8         reserved_3[0x10];
+	u8         atomic_operations[0x10];
+
+	u8         reserved_4[0x10];
+	u8         atomic_size_qp[0x10];
+
+	u8         reserved_5[0x10];
+	u8         atomic_size_dc[0x10];
+
+	u8         reserved_6[0x720];
+};
+
+struct mlx5_ifc_odp_cap_bits {
+	u8         reserved_0[0x40];
+
+	u8         sig[0x1];
+	u8         reserved_1[0x1f];
+
+	u8         reserved_2[0x20];
+
+	struct mlx5_ifc_odp_per_transport_service_cap_bits rc_odp_caps;
+
+	struct mlx5_ifc_odp_per_transport_service_cap_bits uc_odp_caps;
+
+	struct mlx5_ifc_odp_per_transport_service_cap_bits ud_odp_caps;
+
+	u8         reserved_3[0x720];
+};
+
+enum {
+	MLX5_WQ_TYPE_LINKED_LIST  = 0x0,
+	MLX5_WQ_TYPE_CYCLIC       = 0x1,
+	MLX5_WQ_TYPE_STRQ         = 0x2,
+};
+
+enum {
+	MLX5_WQ_END_PAD_MODE_NONE   = 0x0,
+	MLX5_WQ_END_PAD_MODE_ALIGN  = 0x1,
+};
+
+enum {
+	MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_8_GID_ENTRIES    = 0x0,
+	MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_16_GID_ENTRIES   = 0x1,
+	MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_32_GID_ENTRIES   = 0x2,
+	MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_64_GID_ENTRIES   = 0x3,
+	MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_128_GID_ENTRIES  = 0x4,
+};
+
+enum {
+	MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_128_ENTRIES  = 0x0,
+	MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_256_ENTRIES  = 0x1,
+	MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_512_ENTRIES  = 0x2,
+	MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_1K_ENTRIES   = 0x3,
+	MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_2K_ENTRIES   = 0x4,
+	MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_4K_ENTRIES   = 0x5,
+};
+
+enum {
+	MLX5_CMD_HCA_CAP_PORT_TYPE_IB        = 0x0,
+	MLX5_CMD_HCA_CAP_PORT_TYPE_ETHERNET  = 0x1,
+};
+
+enum {
+	MLX5_CMD_HCA_CAP_CMDIF_CHECKSUM_DISABLED       = 0x0,
+	MLX5_CMD_HCA_CAP_CMDIF_CHECKSUM_INITIAL_STATE  = 0x1,
+	MLX5_CMD_HCA_CAP_CMDIF_CHECKSUM_ENABLED        = 0x3,
+};
+
+enum {
+	MLX5_CAP_PORT_TYPE_IB  = 0x0,
+	MLX5_CAP_PORT_TYPE_ETH = 0x1,
 };
 
 struct mlx5_ifc_cmd_hca_cap_bits {
@@ -148,9 +611,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_1[0xb];
 	u8         log_max_qp[0x5];
 
-	u8         log_max_strq_sz[0x8];
-	u8         reserved_2[0x3];
-	u8         log_max_srqs[0x5];
+	u8         reserved_2[0xb];
+	u8         log_max_srq[0x5];
 	u8         reserved_3[0x10];
 
 	u8         reserved_4[0x8];
@@ -185,165 +647,6123 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         pad_cap[0x1];
 	u8         cc_query_allowed[0x1];
 	u8         cc_modify_allowed[0x1];
-	u8         reserved_15[0x1d];
+	u8         reserved_15[0xd];
+	u8         gid_table_size[0x10];
 
-	u8         reserved_16[0x6];
+	u8         out_of_seq_cnt[0x1];
+	u8         vport_counters[0x1];
+	u8         reserved_16[0x4];
 	u8         max_qp_cnt[0xa];
 	u8         pkey_table_size[0x10];
 
-	u8         eswitch_owner[0x1];
-	u8         reserved_17[0xa];
+	u8         vport_group_manager[0x1];
+	u8         vhca_group_manager[0x1];
+	u8         ib_virt[0x1];
+	u8         eth_virt[0x1];
+	u8         reserved_17[0x1];
+	u8         ets[0x1];
+	u8         nic_flow_table[0x1];
+	u8         reserved_18[0x4];
 	u8         local_ca_ack_delay[0x5];
-	u8         reserved_18[0x8];
+	u8         reserved_19[0x6];
+	u8         port_type[0x2];
 	u8         num_ports[0x8];
 
-	u8         reserved_19[0x3];
+	u8         reserved_20[0x3];
 	u8         log_max_msg[0x5];
-	u8         reserved_20[0x18];
+	u8         reserved_21[0x18];
 
 	u8         stat_rate_support[0x10];
-	u8         reserved_21[0x10];
+	u8         reserved_22[0xc];
+	u8         cqe_version[0x4];
 
-	u8         reserved_22[0x10];
+	u8         compact_address_vector[0x1];
+	u8         reserved_23[0xe];
+	u8         drain_sigerr[0x1];
 	u8         cmdif_checksum[0x2];
 	u8         sigerr_cqe[0x1];
-	u8         reserved_23[0x1];
+	u8         reserved_24[0x1];
 	u8         wq_signature[0x1];
 	u8         sctr_data_cqe[0x1];
-	u8         reserved_24[0x1];
+	u8         reserved_25[0x1];
 	u8         sho[0x1];
 	u8         tph[0x1];
 	u8         rf[0x1];
-	u8         dc[0x1];
-	u8         reserved_25[0x2];
+	u8         dct[0x1];
+	u8         reserved_26[0x1];
+	u8         eth_net_offloads[0x1];
 	u8         roce[0x1];
 	u8         atomic[0x1];
-	u8         rsz_srq[0x1];
+	u8         reserved_27[0x1];
 
 	u8         cq_oi[0x1];
 	u8         cq_resize[0x1];
 	u8         cq_moderation[0x1];
-	u8         sniffer_rule_flow[0x1];
-	u8         sniffer_rule_vport[0x1];
-	u8         sniffer_rule_phy[0x1];
-	u8         reserved_26[0x1];
+	u8         reserved_28[0x3];
+	u8         cq_eq_remap[0x1];
 	u8         pg[0x1];
 	u8         block_lb_mc[0x1];
-	u8         reserved_27[0x3];
+	u8         reserved_29[0x1];
+	u8         scqe_break_moderation[0x1];
+	u8         reserved_30[0x1];
 	u8         cd[0x1];
-	u8         reserved_28[0x1];
+	u8         reserved_31[0x1];
 	u8         apm[0x1];
-	u8         reserved_29[0x7];
+	u8         reserved_32[0x7];
 	u8         qkv[0x1];
 	u8         pkv[0x1];
-	u8         reserved_30[0x4];
+	u8         reserved_33[0x4];
 	u8         xrc[0x1];
 	u8         ud[0x1];
 	u8         uc[0x1];
 	u8         rc[0x1];
 
-	u8         reserved_31[0xa];
+	u8         reserved_34[0xa];
 	u8         uar_sz[0x6];
-	u8         reserved_32[0x8];
+	u8         reserved_35[0x8];
 	u8         log_pg_sz[0x8];
 
 	u8         bf[0x1];
-	u8         reserved_33[0xa];
+	u8         reserved_36[0x1];
+	u8         pad_tx_eth_packet[0x1];
+	u8         reserved_37[0x8];
 	u8         log_bf_reg_size[0x5];
-	u8         reserved_34[0x10];
+	u8         reserved_38[0x10];
 
-	u8         reserved_35[0x10];
+	u8         reserved_39[0x10];
 	u8         max_wqe_sz_sq[0x10];
 
-	u8         reserved_36[0x10];
+	u8         reserved_40[0x10];
 	u8         max_wqe_sz_rq[0x10];
 
-	u8         reserved_37[0x10];
+	u8         reserved_41[0x10];
 	u8         max_wqe_sz_sq_dc[0x10];
 
-	u8         reserved_38[0x7];
+	u8         reserved_42[0x7];
 	u8         max_qp_mcg[0x19];
 
-	u8         reserved_39[0x18];
+	u8         reserved_43[0x18];
 	u8         log_max_mcg[0x8];
 
-	u8         reserved_40[0xb];
+	u8         reserved_44[0x3];
+	u8         log_max_transport_domain[0x5];
+	u8         reserved_45[0x3];
 	u8         log_max_pd[0x5];
-	u8         reserved_41[0xb];
+	u8         reserved_46[0xb];
 	u8         log_max_xrcd[0x5];
 
-	u8         reserved_42[0x20];
+	u8         reserved_47[0x20];
 
-	u8         reserved_43[0x3];
+	u8         reserved_48[0x3];
 	u8         log_max_rq[0x5];
-	u8         reserved_44[0x3];
+	u8         reserved_49[0x3];
 	u8         log_max_sq[0x5];
-	u8         reserved_45[0x3];
+	u8         reserved_50[0x3];
 	u8         log_max_tir[0x5];
-	u8         reserved_46[0x3];
+	u8         reserved_51[0x3];
 	u8         log_max_tis[0x5];
 
-	u8         reserved_47[0x13];
-	u8         log_max_rq_per_tir[0x5];
-	u8         reserved_48[0x3];
+	u8         basic_cyclic_rcv_wqe[0x1];
+	u8         reserved_52[0x2];
+	u8         log_max_rmp[0x5];
+	u8         reserved_53[0x3];
+	u8         log_max_rqt[0x5];
+	u8         reserved_54[0x3];
+	u8         log_max_rqt_size[0x5];
+	u8         reserved_55[0x3];
 	u8         log_max_tis_per_sq[0x5];
 
-	u8         reserved_49[0xe0];
+	u8         reserved_56[0x3];
+	u8         log_max_stride_sz_rq[0x5];
+	u8         reserved_57[0x3];
+	u8         log_min_stride_sz_rq[0x5];
+	u8         reserved_58[0x3];
+	u8         log_max_stride_sz_sq[0x5];
+	u8         reserved_59[0x3];
+	u8         log_min_stride_sz_sq[0x5];
+
+	u8         reserved_60[0x1b];
+	u8         log_max_wq_sz[0x5];
+
+	u8         reserved_61[0xa0];
 
-	u8         reserved_50[0x10];
+	u8         reserved_62[0x3];
+	u8         log_max_l2_table[0x5];
+	u8         reserved_63[0x8];
 	u8         log_uar_page_sz[0x10];
 
-	u8         reserved_51[0x100];
+	u8         reserved_64[0x100];
 
-	u8         reserved_52[0x1f];
+	u8         reserved_65[0x1f];
 	u8         cqe_zip[0x1];
 
 	u8         cqe_zip_timeout[0x10];
 	u8         cqe_zip_max_num[0x10];
 
-	u8         reserved_53[0x220];
+	u8         reserved_66[0x220];
 };
 
-struct mlx5_ifc_set_hca_cap_in_bits {
-	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+enum {
+	MLX5_DEST_FORMAT_STRUCT_DESTINATION_TYPE_FLOW_TABLE_  = 0x1,
+	MLX5_DEST_FORMAT_STRUCT_DESTINATION_TYPE_TIR          = 0x2,
+};
 
-	u8         reserved_1[0x10];
-	u8         op_mod[0x10];
+struct mlx5_ifc_dest_format_struct_bits {
+	u8         destination_type[0x8];
+	u8         destination_id[0x18];
 
-	u8         reserved_2[0x40];
+	u8         reserved_0[0x20];
+};
+
+struct mlx5_ifc_fte_match_param_bits {
+	struct mlx5_ifc_fte_match_set_lyr_2_4_bits outer_headers;
+
+	struct mlx5_ifc_fte_match_set_misc_bits misc_parameters;
+
+	struct mlx5_ifc_fte_match_set_lyr_2_4_bits inner_headers;
 
-	struct mlx5_ifc_cmd_hca_cap_bits hca_capability_struct;
+	u8         reserved_0[0xa00];
 };
 
-struct mlx5_ifc_query_hca_cap_in_bits {
-	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+enum {
+	MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_SRC_IP     = 0x0,
+	MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_DST_IP     = 0x1,
+	MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_L4_SPORT   = 0x2,
+	MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_L4_DPORT   = 0x3,
+	MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_IPSEC_SPI  = 0x4,
+};
 
-	u8         reserved_1[0x10];
-	u8         op_mod[0x10];
+struct mlx5_ifc_rx_hash_field_select_bits {
+	u8         l3_prot_type[0x1];
+	u8         l4_prot_type[0x1];
+	u8         selected_fields[0x1e];
+};
 
-	u8         reserved_2[0x40];
+enum {
+	MLX5_WQ_WQ_TYPE_WQ_LINKED_LIST  = 0x0,
+	MLX5_WQ_WQ_TYPE_WQ_CYCLIC       = 0x1,
 };
 
-struct mlx5_ifc_query_hca_cap_out_bits {
-	u8         status[0x8];
+enum {
+	MLX5_WQ_END_PADDING_MODE_END_PAD_NONE   = 0x0,
+	MLX5_WQ_END_PADDING_MODE_END_PAD_ALIGN  = 0x1,
+};
+
+struct mlx5_ifc_wq_bits {
+	u8         wq_type[0x4];
+	u8         wq_signature[0x1];
+	u8         end_padding_mode[0x2];
+	u8         cd_slave[0x1];
 	u8         reserved_0[0x18];
 
-	u8         syndrome[0x20];
+	u8         hds_skip_first_sge[0x1];
+	u8         log2_hds_buf_size[0x3];
+	u8         reserved_1[0x7];
+	u8         page_offset[0x5];
+	u8         lwm[0x10];
 
-	u8         reserved_1[0x40];
+	u8         reserved_2[0x8];
+	u8         pd[0x18];
+
+	u8         reserved_3[0x8];
+	u8         uar_page[0x18];
+
+	u8         dbr_addr[0x40];
+
+	u8         hw_counter[0x20];
+
+	u8         sw_counter[0x20];
+
+	u8         reserved_4[0xc];
+	u8         log_wq_stride[0x4];
+	u8         reserved_5[0x3];
+	u8         log_wq_pg_sz[0x5];
+	u8         reserved_6[0x3];
+	u8         log_wq_sz[0x5];
+
+	u8         reserved_7[0x4e0];
 
-	u8         capability_struct[256][0x8];
+	struct mlx5_ifc_cmd_pas_bits pas[0];
 };
 
-struct mlx5_ifc_set_hca_cap_out_bits {
-	u8         status[0x8];
-	u8         reserved_0[0x18];
+struct mlx5_ifc_rq_num_bits {
+	u8         reserved_0[0x8];
+	u8         rq_num[0x18];
+};
 
-	u8         syndrome[0x20];
+struct mlx5_ifc_mac_address_layout_bits {
+	u8         reserved_0[0x10];
+	u8         mac_addr_47_32[0x10];
 
-	u8         reserved_1[0x40];
+	u8         mac_addr_31_0[0x20];
+};
+
+struct mlx5_ifc_cong_control_r_roce_ecn_np_bits {
+	u8         reserved_0[0xa0];
+
+	u8         min_time_between_cnps[0x20];
+
+	u8         reserved_1[0x12];
+	u8         cnp_dscp[0x6];
+	u8         reserved_2[0x5];
+	u8         cnp_802p_prio[0x3];
+
+	u8         reserved_3[0x720];
+};
+
+struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits {
+	u8         reserved_0[0x60];
+
+	u8         reserved_1[0x4];
+	u8         clamp_tgt_rate[0x1];
+	u8         reserved_2[0x3];
+	u8         clamp_tgt_rate_after_time_inc[0x1];
+	u8         reserved_3[0x17];
+
+	u8         reserved_4[0x20];
+
+	u8         rpg_time_reset[0x20];
+
+	u8         rpg_byte_reset[0x20];
+
+	u8         rpg_threshold[0x20];
+
+	u8         rpg_max_rate[0x20];
+
+	u8         rpg_ai_rate[0x20];
+
+	u8         rpg_hai_rate[0x20];
+
+	u8         rpg_gd[0x20];
+
+	u8         rpg_min_dec_fac[0x20];
+
+	u8         rpg_min_rate[0x20];
+
+	u8         reserved_5[0xe0];
+
+	u8         rate_to_set_on_first_cnp[0x20];
+
+	u8         dce_tcp_g[0x20];
+
+	u8         dce_tcp_rtt[0x20];
+
+	u8         rate_reduce_monitor_period[0x20];
+
+	u8         reserved_6[0x20];
+
+	u8         initial_alpha_value[0x20];
+
+	u8         reserved_7[0x4a0];
+};
+
+struct mlx5_ifc_cong_control_802_1qau_rp_bits {
+	u8         reserved_0[0x80];
+
+	u8         rppp_max_rps[0x20];
+
+	u8         rpg_time_reset[0x20];
+
+	u8         rpg_byte_reset[0x20];
+
+	u8         rpg_threshold[0x20];
+
+	u8         rpg_max_rate[0x20];
+
+	u8         rpg_ai_rate[0x20];
+
+	u8         rpg_hai_rate[0x20];
+
+	u8         rpg_gd[0x20];
+
+	u8         rpg_min_dec_fac[0x20];
+
+	u8         rpg_min_rate[0x20];
+
+	u8         reserved_1[0x640];
+};
+
+enum {
+	MLX5_RESIZE_FIELD_SELECT_RESIZE_FIELD_SELECT_LOG_CQ_SIZE    = 0x1,
+	MLX5_RESIZE_FIELD_SELECT_RESIZE_FIELD_SELECT_PAGE_OFFSET    = 0x2,
+	MLX5_RESIZE_FIELD_SELECT_RESIZE_FIELD_SELECT_LOG_PAGE_SIZE  = 0x4,
+};
+
+struct mlx5_ifc_resize_field_select_bits {
+	u8         resize_field_select[0x20];
+};
+
+enum {
+	MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_CQ_PERIOD     = 0x1,
+	MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_CQ_MAX_COUNT  = 0x2,
+	MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_OI            = 0x4,
+	MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_C_EQN         = 0x8,
+};
+
+struct mlx5_ifc_modify_field_select_bits {
+	u8         modify_field_select[0x20];
+};
+
+struct mlx5_ifc_field_select_r_roce_np_bits {
+	u8         field_select_r_roce_np[0x20];
+};
+
+struct mlx5_ifc_field_select_r_roce_rp_bits {
+	u8         field_select_r_roce_rp[0x20];
+};
+
+enum {
+	MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPPP_MAX_RPS     = 0x4,
+	MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_TIME_RESET   = 0x8,
+	MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_BYTE_RESET   = 0x10,
+	MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_THRESHOLD    = 0x20,
+	MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_MAX_RATE     = 0x40,
+	MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_AI_RATE      = 0x80,
+	MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_HAI_RATE     = 0x100,
+	MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_GD           = 0x200,
+	MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_MIN_DEC_FAC  = 0x400,
+	MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_MIN_RATE     = 0x800,
+};
+
+struct mlx5_ifc_field_select_802_1qau_rp_bits {
+	u8         field_select_8021qaurp[0x20];
+};
+
+struct mlx5_ifc_phys_layer_cntrs_bits {
+	u8         time_since_last_clear_high[0x20];
+
+	u8         time_since_last_clear_low[0x20];
+
+	u8         symbol_errors_high[0x20];
+
+	u8         symbol_errors_low[0x20];
+
+	u8         sync_headers_errors_high[0x20];
+
+	u8         sync_headers_errors_low[0x20];
+
+	u8         edpl_bip_errors_lane0_high[0x20];
+
+	u8         edpl_bip_errors_lane0_low[0x20];
+
+	u8         edpl_bip_errors_lane1_high[0x20];
+
+	u8         edpl_bip_errors_lane1_low[0x20];
+
+	u8         edpl_bip_errors_lane2_high[0x20];
+
+	u8         edpl_bip_errors_lane2_low[0x20];
+
+	u8         edpl_bip_errors_lane3_high[0x20];
+
+	u8         edpl_bip_errors_lane3_low[0x20];
+
+	u8         fc_fec_corrected_blocks_lane0_high[0x20];
+
+	u8         fc_fec_corrected_blocks_lane0_low[0x20];
+
+	u8         fc_fec_corrected_blocks_lane1_high[0x20];
+
+	u8         fc_fec_corrected_blocks_lane1_low[0x20];
+
+	u8         fc_fec_corrected_blocks_lane2_high[0x20];
+
+	u8         fc_fec_corrected_blocks_lane2_low[0x20];
+
+	u8         fc_fec_corrected_blocks_lane3_high[0x20];
+
+	u8         fc_fec_corrected_blocks_lane3_low[0x20];
+
+	u8         fc_fec_uncorrectable_blocks_lane0_high[0x20];
+
+	u8         fc_fec_uncorrectable_blocks_lane0_low[0x20];
+
+	u8         fc_fec_uncorrectable_blocks_lane1_high[0x20];
+
+	u8         fc_fec_uncorrectable_blocks_lane1_low[0x20];
+
+	u8         fc_fec_uncorrectable_blocks_lane2_high[0x20];
+
+	u8         fc_fec_uncorrectable_blocks_lane2_low[0x20];
+
+	u8         fc_fec_uncorrectable_blocks_lane3_high[0x20];
+
+	u8         fc_fec_uncorrectable_blocks_lane3_low[0x20];
+
+	u8         rs_fec_corrected_blocks_high[0x20];
+
+	u8         rs_fec_corrected_blocks_low[0x20];
+
+	u8         rs_fec_uncorrectable_blocks_high[0x20];
+
+	u8         rs_fec_uncorrectable_blocks_low[0x20];
+
+	u8         rs_fec_no_errors_blocks_high[0x20];
+
+	u8         rs_fec_no_errors_blocks_low[0x20];
+
+	u8         rs_fec_single_error_blocks_high[0x20];
+
+	u8         rs_fec_single_error_blocks_low[0x20];
+
+	u8         rs_fec_corrected_symbols_total_high[0x20];
+
+	u8         rs_fec_corrected_symbols_total_low[0x20];
+
+	u8         rs_fec_corrected_symbols_lane0_high[0x20];
+
+	u8         rs_fec_corrected_symbols_lane0_low[0x20];
+
+	u8         rs_fec_corrected_symbols_lane1_high[0x20];
+
+	u8         rs_fec_corrected_symbols_lane1_low[0x20];
+
+	u8         rs_fec_corrected_symbols_lane2_high[0x20];
+
+	u8         rs_fec_corrected_symbols_lane2_low[0x20];
+
+	u8         rs_fec_corrected_symbols_lane3_high[0x20];
+
+	u8         rs_fec_corrected_symbols_lane3_low[0x20];
+
+	u8         link_down_events[0x20];
+
+	u8         successful_recovery_events[0x20];
+
+	u8         reserved_0[0x180];
+};
+
+struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits {
+	u8         transmit_queue_high[0x20];
+
+	u8         transmit_queue_low[0x20];
+
+	u8         reserved_0[0x780];
+};
+
+struct mlx5_ifc_eth_per_prio_grp_data_layout_bits {
+	u8         rx_octets_high[0x20];
+
+	u8         rx_octets_low[0x20];
+
+	u8         reserved_0[0xc0];
+
+	u8         rx_frames_high[0x20];
+
+	u8         rx_frames_low[0x20];
+
+	u8         tx_octets_high[0x20];
+
+	u8         tx_octets_low[0x20];
+
+	u8         reserved_1[0xc0];
+
+	u8         tx_frames_high[0x20];
+
+	u8         tx_frames_low[0x20];
+
+	u8         rx_pause_high[0x20];
+
+	u8         rx_pause_low[0x20];
+
+	u8         rx_pause_duration_high[0x20];
+
+	u8         rx_pause_duration_low[0x20];
+
+	u8         tx_pause_high[0x20];
+
+	u8         tx_pause_low[0x20];
+
+	u8         tx_pause_duration_high[0x20];
+
+	u8         tx_pause_duration_low[0x20];
+
+	u8         rx_pause_transition_high[0x20];
+
+	u8         rx_pause_transition_low[0x20];
+
+	u8         reserved_2[0x400];
+};
+
+struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits {
+	u8         port_transmit_wait_high[0x20];
+
+	u8         port_transmit_wait_low[0x20];
+
+	u8         reserved_0[0x780];
+};
+
+struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits {
+	u8         dot3stats_alignment_errors_high[0x20];
+
+	u8         dot3stats_alignment_errors_low[0x20];
+
+	u8         dot3stats_fcs_errors_high[0x20];
+
+	u8         dot3stats_fcs_errors_low[0x20];
+
+	u8         dot3stats_single_collision_frames_high[0x20];
+
+	u8         dot3stats_single_collision_frames_low[0x20];
+
+	u8         dot3stats_multiple_collision_frames_high[0x20];
+
+	u8         dot3stats_multiple_collision_frames_low[0x20];
+
+	u8         dot3stats_sqe_test_errors_high[0x20];
+
+	u8         dot3stats_sqe_test_errors_low[0x20];
+
+	u8         dot3stats_deferred_transmissions_high[0x20];
+
+	u8         dot3stats_deferred_transmissions_low[0x20];
+
+	u8         dot3stats_late_collisions_high[0x20];
+
+	u8         dot3stats_late_collisions_low[0x20];
+
+	u8         dot3stats_excessive_collisions_high[0x20];
+
+	u8         dot3stats_excessive_collisions_low[0x20];
+
+	u8         dot3stats_internal_mac_transmit_errors_high[0x20];
+
+	u8         dot3stats_internal_mac_transmit_errors_low[0x20];
+
+	u8         dot3stats_carrier_sense_errors_high[0x20];
+
+	u8         dot3stats_carrier_sense_errors_low[0x20];
+
+	u8         dot3stats_frame_too_longs_high[0x20];
+
+	u8         dot3stats_frame_too_longs_low[0x20];
+
+	u8         dot3stats_internal_mac_receive_errors_high[0x20];
+
+	u8         dot3stats_internal_mac_receive_errors_low[0x20];
+
+	u8         dot3stats_symbol_errors_high[0x20];
+
+	u8         dot3stats_symbol_errors_low[0x20];
+
+	u8         dot3control_in_unknown_opcodes_high[0x20];
+
+	u8         dot3control_in_unknown_opcodes_low[0x20];
+
+	u8         dot3in_pause_frames_high[0x20];
+
+	u8         dot3in_pause_frames_low[0x20];
+
+	u8         dot3out_pause_frames_high[0x20];
+
+	u8         dot3out_pause_frames_low[0x20];
+
+	u8         reserved_0[0x3c0];
+};
+
+struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits {
+	u8         ether_stats_drop_events_high[0x20];
+
+	u8         ether_stats_drop_events_low[0x20];
+
+	u8         ether_stats_octets_high[0x20];
+
+	u8         ether_stats_octets_low[0x20];
+
+	u8         ether_stats_pkts_high[0x20];
+
+	u8         ether_stats_pkts_low[0x20];
+
+	u8         ether_stats_broadcast_pkts_high[0x20];
+
+	u8         ether_stats_broadcast_pkts_low[0x20];
+
+	u8         ether_stats_multicast_pkts_high[0x20];
+
+	u8         ether_stats_multicast_pkts_low[0x20];
+
+	u8         ether_stats_crc_align_errors_high[0x20];
+
+	u8         ether_stats_crc_align_errors_low[0x20];
+
+	u8         ether_stats_undersize_pkts_high[0x20];
+
+	u8         ether_stats_undersize_pkts_low[0x20];
+
+	u8         ether_stats_oversize_pkts_high[0x20];
+
+	u8         ether_stats_oversize_pkts_low[0x20];
+
+	u8         ether_stats_fragments_high[0x20];
+
+	u8         ether_stats_fragments_low[0x20];
+
+	u8         ether_stats_jabbers_high[0x20];
+
+	u8         ether_stats_jabbers_low[0x20];
+
+	u8         ether_stats_collisions_high[0x20];
+
+	u8         ether_stats_collisions_low[0x20];
+
+	u8         ether_stats_pkts64octets_high[0x20];
+
+	u8         ether_stats_pkts64octets_low[0x20];
+
+	u8         ether_stats_pkts65to127octets_high[0x20];
+
+	u8         ether_stats_pkts65to127octets_low[0x20];
+
+	u8         ether_stats_pkts128to255octets_high[0x20];
+
+	u8         ether_stats_pkts128to255octets_low[0x20];
+
+	u8         ether_stats_pkts256to511octets_high[0x20];
+
+	u8         ether_stats_pkts256to511octets_low[0x20];
+
+	u8         ether_stats_pkts512to1023octets_high[0x20];
+
+	u8         ether_stats_pkts512to1023octets_low[0x20];
+
+	u8         ether_stats_pkts1024to1518octets_high[0x20];
+
+	u8         ether_stats_pkts1024to1518octets_low[0x20];
+
+	u8         ether_stats_pkts1519to2047octets_high[0x20];
+
+	u8         ether_stats_pkts1519to2047octets_low[0x20];
+
+	u8         ether_stats_pkts2048to4095octets_high[0x20];
+
+	u8         ether_stats_pkts2048to4095octets_low[0x20];
+
+	u8         ether_stats_pkts4096to8191octets_high[0x20];
+
+	u8         ether_stats_pkts4096to8191octets_low[0x20];
+
+	u8         ether_stats_pkts8192to10239octets_high[0x20];
+
+	u8         ether_stats_pkts8192to10239octets_low[0x20];
+
+	u8         reserved_0[0x280];
+};
+
+struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits {
+	u8         if_in_octets_high[0x20];
+
+	u8         if_in_octets_low[0x20];
+
+	u8         if_in_ucast_pkts_high[0x20];
+
+	u8         if_in_ucast_pkts_low[0x20];
+
+	u8         if_in_discards_high[0x20];
+
+	u8         if_in_discards_low[0x20];
+
+	u8         if_in_errors_high[0x20];
+
+	u8         if_in_errors_low[0x20];
+
+	u8         if_in_unknown_protos_high[0x20];
+
+	u8         if_in_unknown_protos_low[0x20];
+
+	u8         if_out_octets_high[0x20];
+
+	u8         if_out_octets_low[0x20];
+
+	u8         if_out_ucast_pkts_high[0x20];
+
+	u8         if_out_ucast_pkts_low[0x20];
+
+	u8         if_out_discards_high[0x20];
+
+	u8         if_out_discards_low[0x20];
+
+	u8         if_out_errors_high[0x20];
+
+	u8         if_out_errors_low[0x20];
+
+	u8         if_in_multicast_pkts_high[0x20];
+
+	u8         if_in_multicast_pkts_low[0x20];
+
+	u8         if_in_broadcast_pkts_high[0x20];
+
+	u8         if_in_broadcast_pkts_low[0x20];
+
+	u8         if_out_multicast_pkts_high[0x20];
+
+	u8         if_out_multicast_pkts_low[0x20];
+
+	u8         if_out_broadcast_pkts_high[0x20];
+
+	u8         if_out_broadcast_pkts_low[0x20];
+
+	u8         reserved_0[0x480];
+};
+
+struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits {
+	u8         a_frames_transmitted_ok_high[0x20];
+
+	u8         a_frames_transmitted_ok_low[0x20];
+
+	u8         a_frames_received_ok_high[0x20];
+
+	u8         a_frames_received_ok_low[0x20];
+
+	u8         a_frame_check_sequence_errors_high[0x20];
+
+	u8         a_frame_check_sequence_errors_low[0x20];
+
+	u8         a_alignment_errors_high[0x20];
+
+	u8         a_alignment_errors_low[0x20];
+
+	u8         a_octets_transmitted_ok_high[0x20];
+
+	u8         a_octets_transmitted_ok_low[0x20];
+
+	u8         a_octets_received_ok_high[0x20];
+
+	u8         a_octets_received_ok_low[0x20];
+
+	u8         a_multicast_frames_xmitted_ok_high[0x20];
+
+	u8         a_multicast_frames_xmitted_ok_low[0x20];
+
+	u8         a_broadcast_frames_xmitted_ok_high[0x20];
+
+	u8         a_broadcast_frames_xmitted_ok_low[0x20];
+
+	u8         a_multicast_frames_received_ok_high[0x20];
+
+	u8         a_multicast_frames_received_ok_low[0x20];
+
+	u8         a_broadcast_frames_received_ok_high[0x20];
+
+	u8         a_broadcast_frames_received_ok_low[0x20];
+
+	u8         a_in_range_length_errors_high[0x20];
+
+	u8         a_in_range_length_errors_low[0x20];
+
+	u8         a_out_of_range_length_field_high[0x20];
+
+	u8         a_out_of_range_length_field_low[0x20];
+
+	u8         a_frame_too_long_errors_high[0x20];
+
+	u8         a_frame_too_long_errors_low[0x20];
+
+	u8         a_symbol_error_during_carrier_high[0x20];
+
+	u8         a_symbol_error_during_carrier_low[0x20];
+
+	u8         a_mac_control_frames_transmitted_high[0x20];
+
+	u8         a_mac_control_frames_transmitted_low[0x20];
+
+	u8         a_mac_control_frames_received_high[0x20];
+
+	u8         a_mac_control_frames_received_low[0x20];
+
+	u8         a_unsupported_opcodes_received_high[0x20];
+
+	u8         a_unsupported_opcodes_received_low[0x20];
+
+	u8         a_pause_mac_ctrl_frames_received_high[0x20];
+
+	u8         a_pause_mac_ctrl_frames_received_low[0x20];
+
+	u8         a_pause_mac_ctrl_frames_transmitted_high[0x20];
+
+	u8         a_pause_mac_ctrl_frames_transmitted_low[0x20];
+
+	u8         reserved_0[0x300];
+};
+
+struct mlx5_ifc_cmd_inter_comp_event_bits {
+	u8         command_completion_vector[0x20];
+
+	u8         reserved_0[0xc0];
+};
+
+struct mlx5_ifc_stall_vl_event_bits {
+	u8         reserved_0[0x18];
+	u8         port_num[0x1];
+	u8         reserved_1[0x3];
+	u8         vl[0x4];
+
+	u8         reserved_2[0xa0];
+};
+
+struct mlx5_ifc_db_bf_congestion_event_bits {
+	u8         event_subtype[0x8];
+	u8         reserved_0[0x8];
+	u8         congestion_level[0x8];
+	u8         reserved_1[0x8];
+
+	u8         reserved_2[0xa0];
+};
+
+struct mlx5_ifc_gpio_event_bits {
+	u8         reserved_0[0x60];
+
+	u8         gpio_event_hi[0x20];
+
+	u8         gpio_event_lo[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_port_state_change_event_bits {
+	u8         reserved_0[0x40];
+
+	u8         port_num[0x4];
+	u8         reserved_1[0x1c];
+
+	u8         reserved_2[0x80];
+};
+
+struct mlx5_ifc_dropped_packet_logged_bits {
+	u8         reserved_0[0xe0];
+};
+
+enum {
+	MLX5_CQ_ERROR_SYNDROME_CQ_OVERRUN                 = 0x1,
+	MLX5_CQ_ERROR_SYNDROME_CQ_ACCESS_VIOLATION_ERROR  = 0x2,
+};
+
+struct mlx5_ifc_cq_error_bits {
+	u8         reserved_0[0x8];
+	u8         cqn[0x18];
+
+	u8         reserved_1[0x20];
+
+	u8         reserved_2[0x18];
+	u8         syndrome[0x8];
+
+	u8         reserved_3[0x80];
+};
+
+struct mlx5_ifc_rdma_page_fault_event_bits {
+	u8         bytes_committed[0x20];
+
+	u8         r_key[0x20];
+
+	u8         reserved_0[0x10];
+	u8         packet_len[0x10];
+
+	u8         rdma_op_len[0x20];
+
+	u8         rdma_va[0x40];
+
+	u8         reserved_1[0x5];
+	u8         rdma[0x1];
+	u8         write[0x1];
+	u8         requestor[0x1];
+	u8         qp_number[0x18];
+};
+
+struct mlx5_ifc_wqe_associated_page_fault_event_bits {
+	u8         bytes_committed[0x20];
+
+	u8         reserved_0[0x10];
+	u8         wqe_index[0x10];
+
+	u8         reserved_1[0x10];
+	u8         len[0x10];
+
+	u8         reserved_2[0x60];
+
+	u8         reserved_3[0x5];
+	u8         rdma[0x1];
+	u8         write_read[0x1];
+	u8         requestor[0x1];
+	u8         qpn[0x18];
+};
+
+struct mlx5_ifc_qp_events_bits {
+	u8         reserved_0[0xa0];
+
+	u8         type[0x8];
+	u8         reserved_1[0x18];
+
+	u8         reserved_2[0x8];
+	u8         qpn_rqn_sqn[0x18];
+};
+
+struct mlx5_ifc_dct_events_bits {
+	u8         reserved_0[0xc0];
+
+	u8         reserved_1[0x8];
+	u8         dct_number[0x18];
+};
+
+struct mlx5_ifc_comp_event_bits {
+	u8         reserved_0[0xc0];
+
+	u8         reserved_1[0x8];
+	u8         cq_number[0x18];
+};
+
+enum {
+	MLX5_QPC_STATE_RST        = 0x0,
+	MLX5_QPC_STATE_INIT       = 0x1,
+	MLX5_QPC_STATE_RTR        = 0x2,
+	MLX5_QPC_STATE_RTS        = 0x3,
+	MLX5_QPC_STATE_SQER       = 0x4,
+	MLX5_QPC_STATE_ERR        = 0x6,
+	MLX5_QPC_STATE_SQD        = 0x7,
+	MLX5_QPC_STATE_SUSPENDED  = 0x9,
+};
+
+enum {
+	MLX5_QPC_ST_RC            = 0x0,
+	MLX5_QPC_ST_UC            = 0x1,
+	MLX5_QPC_ST_UD            = 0x2,
+	MLX5_QPC_ST_XRC           = 0x3,
+	MLX5_QPC_ST_DCI           = 0x5,
+	MLX5_QPC_ST_QP0           = 0x7,
+	MLX5_QPC_ST_QP1           = 0x8,
+	MLX5_QPC_ST_RAW_DATAGRAM  = 0x9,
+	MLX5_QPC_ST_REG_UMR       = 0xc,
+};
+
+enum {
+	MLX5_QPC_PM_STATE_ARMED     = 0x0,
+	MLX5_QPC_PM_STATE_REARM     = 0x1,
+	MLX5_QPC_PM_STATE_RESERVED  = 0x2,
+	MLX5_QPC_PM_STATE_MIGRATED  = 0x3,
+};
+
+enum {
+	MLX5_QPC_END_PADDING_MODE_SCATTER_AS_IS                = 0x0,
+	MLX5_QPC_END_PADDING_MODE_PAD_TO_CACHE_LINE_ALIGNMENT  = 0x1,
+};
+
+enum {
+	MLX5_QPC_MTU_256_BYTES        = 0x1,
+	MLX5_QPC_MTU_512_BYTES        = 0x2,
+	MLX5_QPC_MTU_1K_BYTES         = 0x3,
+	MLX5_QPC_MTU_2K_BYTES         = 0x4,
+	MLX5_QPC_MTU_4K_BYTES         = 0x5,
+	MLX5_QPC_MTU_RAW_ETHERNET_QP  = 0x7,
+};
+
+enum {
+	MLX5_QPC_ATOMIC_MODE_IB_SPEC     = 0x1,
+	MLX5_QPC_ATOMIC_MODE_ONLY_8B     = 0x2,
+	MLX5_QPC_ATOMIC_MODE_UP_TO_8B    = 0x3,
+	MLX5_QPC_ATOMIC_MODE_UP_TO_16B   = 0x4,
+	MLX5_QPC_ATOMIC_MODE_UP_TO_32B   = 0x5,
+	MLX5_QPC_ATOMIC_MODE_UP_TO_64B   = 0x6,
+	MLX5_QPC_ATOMIC_MODE_UP_TO_128B  = 0x7,
+	MLX5_QPC_ATOMIC_MODE_UP_TO_256B  = 0x8,
+};
+
+enum {
+	MLX5_QPC_CS_REQ_DISABLE    = 0x0,
+	MLX5_QPC_CS_REQ_UP_TO_32B  = 0x11,
+	MLX5_QPC_CS_REQ_UP_TO_64B  = 0x22,
+};
+
+enum {
+	MLX5_QPC_CS_RES_DISABLE    = 0x0,
+	MLX5_QPC_CS_RES_UP_TO_32B  = 0x1,
+	MLX5_QPC_CS_RES_UP_TO_64B  = 0x2,
+};
+
+struct mlx5_ifc_qpc_bits {
+	u8         state[0x4];
+	u8         reserved_0[0x4];
+	u8         st[0x8];
+	u8         reserved_1[0x3];
+	u8         pm_state[0x2];
+	u8         reserved_2[0x7];
+	u8         end_padding_mode[0x2];
+	u8         reserved_3[0x2];
+
+	u8         wq_signature[0x1];
+	u8         block_lb_mc[0x1];
+	u8         atomic_like_write_en[0x1];
+	u8         latency_sensitive[0x1];
+	u8         reserved_4[0x1];
+	u8         drain_sigerr[0x1];
+	u8         reserved_5[0x2];
+	u8         pd[0x18];
+
+	u8         mtu[0x3];
+	u8         log_msg_max[0x5];
+	u8         reserved_6[0x1];
+	u8         log_rq_size[0x4];
+	u8         log_rq_stride[0x3];
+	u8         no_sq[0x1];
+	u8         log_sq_size[0x4];
+	u8         reserved_7[0x6];
+	u8         rlky[0x1];
+	u8         reserved_8[0x4];
+
+	u8         counter_set_id[0x8];
+	u8         uar_page[0x18];
+
+	u8         reserved_9[0x8];
+	u8         user_index[0x18];
+
+	u8         reserved_10[0x3];
+	u8         log_page_size[0x5];
+	u8         remote_qpn[0x18];
+
+	struct mlx5_ifc_ads_bits primary_address_path;
+
+	struct mlx5_ifc_ads_bits secondary_address_path;
+
+	u8         log_ack_req_freq[0x4];
+	u8         reserved_11[0x4];
+	u8         log_sra_max[0x3];
+	u8         reserved_12[0x2];
+	u8         retry_count[0x3];
+	u8         rnr_retry[0x3];
+	u8         reserved_13[0x1];
+	u8         fre[0x1];
+	u8         cur_rnr_retry[0x3];
+	u8         cur_retry_count[0x3];
+	u8         reserved_14[0x5];
+
+	u8         reserved_15[0x20];
+
+	u8         reserved_16[0x8];
+	u8         next_send_psn[0x18];
+
+	u8         reserved_17[0x8];
+	u8         cqn_snd[0x18];
+
+	u8         reserved_18[0x40];
+
+	u8         reserved_19[0x8];
+	u8         last_acked_psn[0x18];
+
+	u8         reserved_20[0x8];
+	u8         ssn[0x18];
+
+	u8         reserved_21[0x8];
+	u8         log_rra_max[0x3];
+	u8         reserved_22[0x1];
+	u8         atomic_mode[0x4];
+	u8         rre[0x1];
+	u8         rwe[0x1];
+	u8         rae[0x1];
+	u8         reserved_23[0x1];
+	u8         page_offset[0x6];
+	u8         reserved_24[0x3];
+	u8         cd_slave_receive[0x1];
+	u8         cd_slave_send[0x1];
+	u8         cd_master[0x1];
+
+	u8         reserved_25[0x3];
+	u8         min_rnr_nak[0x5];
+	u8         next_rcv_psn[0x18];
+
+	u8         reserved_26[0x8];
+	u8         xrcd[0x18];
+
+	u8         reserved_27[0x8];
+	u8         cqn_rcv[0x18];
+
+	u8         dbr_addr[0x40];
+
+	u8         q_key[0x20];
+
+	u8         reserved_28[0x5];
+	u8         rq_type[0x3];
+	u8         srqn_rmpn[0x18];
+
+	u8         reserved_29[0x8];
+	u8         rmsn[0x18];
+
+	u8         hw_sq_wqebb_counter[0x10];
+	u8         sw_sq_wqebb_counter[0x10];
+
+	u8         hw_rq_counter[0x20];
+
+	u8         sw_rq_counter[0x20];
+
+	u8         reserved_30[0x20];
+
+	u8         reserved_31[0xf];
+	u8         cgs[0x1];
+	u8         cs_req[0x8];
+	u8         cs_res[0x8];
+
+	u8         dc_access_key[0x40];
+
+	u8         reserved_32[0xc0];
+};
+
+struct mlx5_ifc_roce_addr_layout_bits {
+	u8         source_l3_address[16][0x8];
+
+	u8         reserved_0[0x3];
+	u8         vlan_valid[0x1];
+	u8         vlan_id[0xc];
+	u8         source_mac_47_32[0x10];
+
+	u8         source_mac_31_0[0x20];
+
+	u8         reserved_1[0x14];
+	u8         roce_l3_type[0x4];
+	u8         roce_version[0x8];
+
+	u8         reserved_2[0x20];
+};
+
+union mlx5_ifc_hca_cap_union_bits {
+	struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap;
+	struct mlx5_ifc_odp_cap_bits odp_cap;
+	struct mlx5_ifc_atomic_caps_bits atomic_caps;
+	struct mlx5_ifc_roce_cap_bits roce_cap;
+	struct mlx5_ifc_per_protocol_networking_offload_caps_bits per_protocol_networking_offload_caps;
+	struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap;
+	u8         reserved_0[0x8000];
+};
+
+enum {
+	MLX5_FLOW_CONTEXT_ACTION_ALLOW     = 0x1,
+	MLX5_FLOW_CONTEXT_ACTION_DROP      = 0x2,
+	MLX5_FLOW_CONTEXT_ACTION_FWD_DEST  = 0x4,
+};
+
+struct mlx5_ifc_flow_context_bits {
+	u8         reserved_0[0x20];
+
+	u8         group_id[0x20];
+
+	u8         reserved_1[0x8];
+	u8         flow_tag[0x18];
+
+	u8         reserved_2[0x10];
+	u8         action[0x10];
+
+	u8         reserved_3[0x8];
+	u8         destination_list_size[0x18];
+
+	u8         reserved_4[0x160];
+
+	struct mlx5_ifc_fte_match_param_bits match_value;
+
+	u8         reserved_5[0x600];
+
+	struct mlx5_ifc_dest_format_struct_bits destination[0];
+};
+
+enum {
+	MLX5_XRC_SRQC_STATE_GOOD   = 0x0,
+	MLX5_XRC_SRQC_STATE_ERROR  = 0x1,
+};
+
+struct mlx5_ifc_xrc_srqc_bits {
+	u8         state[0x4];
+	u8         log_xrc_srq_size[0x4];
+	u8         reserved_0[0x18];
+
+	u8         wq_signature[0x1];
+	u8         cont_srq[0x1];
+	u8         reserved_1[0x1];
+	u8         rlky[0x1];
+	u8         basic_cyclic_rcv_wqe[0x1];
+	u8         log_rq_stride[0x3];
+	u8         xrcd[0x18];
+
+	u8         page_offset[0x6];
+	u8         reserved_2[0x2];
+	u8         cqn[0x18];
+
+	u8         reserved_3[0x20];
+
+	u8         user_index_equal_xrc_srqn[0x1];
+	u8         reserved_4[0x1];
+	u8         log_page_size[0x6];
+	u8         user_index[0x18];
+
+	u8         reserved_5[0x20];
+
+	u8         reserved_6[0x8];
+	u8         pd[0x18];
+
+	u8         lwm[0x10];
+	u8         wqe_cnt[0x10];
+
+	u8         reserved_7[0x40];
+
+	u8         db_record_addr_h[0x20];
+
+	u8         db_record_addr_l[0x1e];
+	u8         reserved_8[0x2];
+
+	u8         reserved_9[0x80];
+};
+
+struct mlx5_ifc_traffic_counter_bits {
+	u8         packets[0x40];
+
+	u8         octets[0x40];
+};
+
+struct mlx5_ifc_tisc_bits {
+	u8         reserved_0[0xc];
+	u8         prio[0x4];
+	u8         reserved_1[0x10];
+
+	u8         reserved_2[0x100];
+
+	u8         reserved_3[0x8];
+	u8         transport_domain[0x18];
+
+	u8         reserved_4[0x3c0];
+};
+
+enum {
+	MLX5_TIRC_DISP_TYPE_DIRECT    = 0x0,
+	MLX5_TIRC_DISP_TYPE_INDIRECT  = 0x1,
+};
+
+enum {
+	MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO  = 0x1,
+	MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO  = 0x2,
+};
+
+enum {
+	MLX5_TIRC_RX_HASH_FN_HASH_NONE           = 0x0,
+	MLX5_TIRC_RX_HASH_FN_HASH_INVERTED_XOR8  = 0x1,
+	MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ       = 0x2,
+};
+
+enum {
+	MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_    = 0x1,
+	MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST_  = 0x2,
+};
+
+struct mlx5_ifc_tirc_bits {
+	u8         reserved_0[0x20];
+
+	u8         disp_type[0x4];
+	u8         reserved_1[0x1c];
+
+	u8         reserved_2[0x40];
+
+	u8         reserved_3[0x4];
+	u8         lro_timeout_period_usecs[0x10];
+	u8         lro_enable_mask[0x4];
+	u8         lro_max_ip_payload_size[0x8];
+
+	u8         reserved_4[0x40];
+
+	u8         reserved_5[0x8];
+	u8         inline_rqn[0x18];
+
+	u8         rx_hash_symmetric[0x1];
+	u8         reserved_6[0x1];
+	u8         tunneled_offload_en[0x1];
+	u8         reserved_7[0x5];
+	u8         indirect_table[0x18];
+
+	u8         rx_hash_fn[0x4];
+	u8         reserved_8[0x2];
+	u8         self_lb_block[0x2];
+	u8         transport_domain[0x18];
+
+	u8         rx_hash_toeplitz_key[10][0x20];
+
+	struct mlx5_ifc_rx_hash_field_select_bits rx_hash_field_selector_outer;
+
+	struct mlx5_ifc_rx_hash_field_select_bits rx_hash_field_selector_inner;
+
+	u8         reserved_9[0x4c0];
+};
+
+enum {
+	MLX5_SRQC_STATE_GOOD   = 0x0,
+	MLX5_SRQC_STATE_ERROR  = 0x1,
+};
+
+struct mlx5_ifc_srqc_bits {
+	u8         state[0x4];
+	u8         log_srq_size[0x4];
+	u8         reserved_0[0x18];
+
+	u8         wq_signature[0x1];
+	u8         cont_srq[0x1];
+	u8         reserved_1[0x1];
+	u8         rlky[0x1];
+	u8         reserved_2[0x1];
+	u8         log_rq_stride[0x3];
+	u8         xrcd[0x18];
+
+	u8         page_offset[0x6];
+	u8         reserved_3[0x2];
+	u8         cqn[0x18];
+
+	u8         reserved_4[0x20];
+
+	u8         reserved_5[0x2];
+	u8         log_page_size[0x6];
+	u8         reserved_6[0x18];
+
+	u8         reserved_7[0x20];
+
+	u8         reserved_8[0x8];
+	u8         pd[0x18];
+
+	u8         lwm[0x10];
+	u8         wqe_cnt[0x10];
+
+	u8         reserved_9[0x40];
+
+	u8         db_record_addr_h[0x20];
+
+	u8         db_record_addr_l[0x1e];
+	u8         reserved_10[0x2];
+
+	u8         reserved_11[0x80];
+};
+
+enum {
+	MLX5_SQC_STATE_RST  = 0x0,
+	MLX5_SQC_STATE_RDY  = 0x1,
+	MLX5_SQC_STATE_ERR  = 0x3,
+};
+
+struct mlx5_ifc_sqc_bits {
+	u8         rlky[0x1];
+	u8         cd_master[0x1];
+	u8         fre[0x1];
+	u8         flush_in_error_en[0x1];
+	u8         reserved_0[0x4];
+	u8         state[0x4];
+	u8         reserved_1[0x14];
+
+	u8         reserved_2[0x8];
+	u8         user_index[0x18];
+
+	u8         reserved_3[0x8];
+	u8         cqn[0x18];
+
+	u8         reserved_4[0xa0];
+
+	u8         tis_lst_sz[0x10];
+	u8         reserved_5[0x10];
+
+	u8         reserved_6[0x40];
+
+	u8         reserved_7[0x8];
+	u8         tis_num_0[0x18];
+
+	struct mlx5_ifc_wq_bits wq;
+};
+
+struct mlx5_ifc_rqtc_bits {
+	u8         reserved_0[0xa0];
+
+	u8         reserved_1[0x10];
+	u8         rqt_max_size[0x10];
+
+	u8         reserved_2[0x10];
+	u8         rqt_actual_size[0x10];
+
+	u8         reserved_3[0x6a0];
+
+	struct mlx5_ifc_rq_num_bits rq_num[0];
+};
+
+enum {
+	MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE  = 0x0,
+	MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_RMP     = 0x1,
+};
+
+enum {
+	MLX5_RQC_STATE_RST  = 0x0,
+	MLX5_RQC_STATE_RDY  = 0x1,
+	MLX5_RQC_STATE_ERR  = 0x3,
+};
+
+struct mlx5_ifc_rqc_bits {
+	u8         rlky[0x1];
+	u8         reserved_0[0x2];
+	u8         vsd[0x1];
+	u8         mem_rq_type[0x4];
+	u8         state[0x4];
+	u8         reserved_1[0x1];
+	u8         flush_in_error_en[0x1];
+	u8         reserved_2[0x12];
+
+	u8         reserved_3[0x8];
+	u8         user_index[0x18];
+
+	u8         reserved_4[0x8];
+	u8         cqn[0x18];
+
+	u8         counter_set_id[0x8];
+	u8         reserved_5[0x18];
+
+	u8         reserved_6[0x8];
+	u8         rmpn[0x18];
+
+	u8         reserved_7[0xe0];
+
+	struct mlx5_ifc_wq_bits wq;
+};
+
+enum {
+	MLX5_RMPC_STATE_RDY  = 0x1,
+	MLX5_RMPC_STATE_ERR  = 0x3,
+};
+
+struct mlx5_ifc_rmpc_bits {
+	u8         reserved_0[0x8];
+	u8         state[0x4];
+	u8         reserved_1[0x14];
+
+	u8         basic_cyclic_rcv_wqe[0x1];
+	u8         reserved_2[0x1f];
+
+	u8         reserved_3[0x140];
+
+	struct mlx5_ifc_wq_bits wq;
+};
+
+enum {
+	MLX5_NIC_VPORT_CONTEXT_ALLOWED_LIST_TYPE_CURRENT_UC_MAC_ADDRESS  = 0x0,
+};
+
+struct mlx5_ifc_nic_vport_context_bits {
+	u8         reserved_0[0x1f];
+	u8         roce_en[0x1];
+
+	u8         reserved_1[0x760];
+
+	u8         reserved_2[0x5];
+	u8         allowed_list_type[0x3];
+	u8         reserved_3[0xc];
+	u8         allowed_list_size[0xc];
+
+	struct mlx5_ifc_mac_address_layout_bits permanent_address;
+
+	u8         reserved_4[0x20];
+
+	u8         current_uc_mac_address[0][0x40];
+};
+
+enum {
+	MLX5_MKC_ACCESS_MODE_PA    = 0x0,
+	MLX5_MKC_ACCESS_MODE_MTT   = 0x1,
+	MLX5_MKC_ACCESS_MODE_KLMS  = 0x2,
+};
+
+struct mlx5_ifc_mkc_bits {
+	u8         reserved_0[0x1];
+	u8         free[0x1];
+	u8         reserved_1[0xd];
+	u8         small_fence_on_rdma_read_response[0x1];
+	u8         umr_en[0x1];
+	u8         a[0x1];
+	u8         rw[0x1];
+	u8         rr[0x1];
+	u8         lw[0x1];
+	u8         lr[0x1];
+	u8         access_mode[0x2];
+	u8         reserved_2[0x8];
+
+	u8         qpn[0x18];
+	u8         mkey_7_0[0x8];
+
+	u8         reserved_3[0x20];
+
+	u8         length64[0x1];
+	u8         bsf_en[0x1];
+	u8         sync_umr[0x1];
+	u8         reserved_4[0x2];
+	u8         expected_sigerr_count[0x1];
+	u8         reserved_5[0x1];
+	u8         en_rinval[0x1];
+	u8         pd[0x18];
+
+	u8         start_addr[0x40];
+
+	u8         len[0x40];
+
+	u8         bsf_octword_size[0x20];
+
+	u8         reserved_6[0x80];
+
+	u8         translations_octword_size[0x20];
+
+	u8         reserved_7[0x1b];
+	u8         log_page_size[0x5];
+
+	u8         reserved_8[0x20];
+};
+
+struct mlx5_ifc_pkey_bits {
+	u8         reserved_0[0x10];
+	u8         pkey[0x10];
+};
+
+struct mlx5_ifc_array128_auto_bits {
+	u8         array128_auto[16][0x8];
+};
+
+struct mlx5_ifc_hca_vport_context_bits {
+	u8         field_select[0x20];
+
+	u8         reserved_0[0xe0];
+
+	u8         sm_virt_aware[0x1];
+	u8         has_smi[0x1];
+	u8         has_raw[0x1];
+	u8         grh_required[0x1];
+	u8         reserved_1[0x10];
+	u8         port_state_policy[0x4];
+	u8         phy_port_state[0x4];
+	u8         vport_state[0x4];
+
+	u8         reserved_2[0x60];
+
+	u8         port_guid[0x40];
+
+	u8         node_guid[0x40];
+
+	u8         cap_mask1[0x20];
+
+	u8         cap_mask1_field_select[0x20];
+
+	u8         cap_mask2[0x20];
+
+	u8         cap_mask2_field_select[0x20];
+
+	u8         reserved_3[0x80];
+
+	u8         lid[0x10];
+	u8         reserved_4[0x4];
+	u8         init_type_reply[0x4];
+	u8         lmc[0x3];
+	u8         subnet_timeout[0x5];
+
+	u8         sm_lid[0x10];
+	u8         sm_sl[0x4];
+	u8         reserved_5[0xc];
+
+	u8         qkey_violation_counter[0x10];
+	u8         pkey_violation_counter[0x10];
+
+	u8         reserved_6[0xca0];
+};
+
+enum {
+	MLX5_EQC_STATUS_OK                = 0x0,
+	MLX5_EQC_STATUS_EQ_WRITE_FAILURE  = 0xa,
+};
+
+enum {
+	MLX5_EQC_ST_ARMED  = 0x9,
+	MLX5_EQC_ST_FIRED  = 0xa,
+};
+
+struct mlx5_ifc_eqc_bits {
+	u8         status[0x4];
+	u8         reserved_0[0x9];
+	u8         ec[0x1];
+	u8         oi[0x1];
+	u8         reserved_1[0x5];
+	u8         st[0x4];
+	u8         reserved_2[0x8];
+
+	u8         reserved_3[0x20];
+
+	u8         reserved_4[0x14];
+	u8         page_offset[0x6];
+	u8         reserved_5[0x6];
+
+	u8         reserved_6[0x3];
+	u8         log_eq_size[0x5];
+	u8         uar_page[0x18];
+
+	u8         reserved_7[0x20];
+
+	u8         reserved_8[0x18];
+	u8         intr[0x8];
+
+	u8         reserved_9[0x3];
+	u8         log_page_size[0x5];
+	u8         reserved_10[0x18];
+
+	u8         reserved_11[0x60];
+
+	u8         reserved_12[0x8];
+	u8         consumer_counter[0x18];
+
+	u8         reserved_13[0x8];
+	u8         producer_counter[0x18];
+
+	u8         reserved_14[0x80];
+};
+
+enum {
+	MLX5_DCTC_STATE_ACTIVE    = 0x0,
+	MLX5_DCTC_STATE_DRAINING  = 0x1,
+	MLX5_DCTC_STATE_DRAINED   = 0x2,
+};
+
+enum {
+	MLX5_DCTC_CS_RES_DISABLE    = 0x0,
+	MLX5_DCTC_CS_RES_NA         = 0x1,
+	MLX5_DCTC_CS_RES_UP_TO_64B  = 0x2,
+};
+
+enum {
+	MLX5_DCTC_MTU_256_BYTES  = 0x1,
+	MLX5_DCTC_MTU_512_BYTES  = 0x2,
+	MLX5_DCTC_MTU_1K_BYTES   = 0x3,
+	MLX5_DCTC_MTU_2K_BYTES   = 0x4,
+	MLX5_DCTC_MTU_4K_BYTES   = 0x5,
+};
+
+struct mlx5_ifc_dctc_bits {
+	u8         reserved_0[0x4];
+	u8         state[0x4];
+	u8         reserved_1[0x18];
+
+	u8         reserved_2[0x8];
+	u8         user_index[0x18];
+
+	u8         reserved_3[0x8];
+	u8         cqn[0x18];
+
+	u8         counter_set_id[0x8];
+	u8         atomic_mode[0x4];
+	u8         rre[0x1];
+	u8         rwe[0x1];
+	u8         rae[0x1];
+	u8         atomic_like_write_en[0x1];
+	u8         latency_sensitive[0x1];
+	u8         rlky[0x1];
+	u8         free_ar[0x1];
+	u8         reserved_4[0xd];
+
+	u8         reserved_5[0x8];
+	u8         cs_res[0x8];
+	u8         reserved_6[0x3];
+	u8         min_rnr_nak[0x5];
+	u8         reserved_7[0x8];
+
+	u8         reserved_8[0x8];
+	u8         srqn[0x18];
+
+	u8         reserved_9[0x8];
+	u8         pd[0x18];
+
+	u8         tclass[0x8];
+	u8         reserved_10[0x4];
+	u8         flow_label[0x14];
+
+	u8         dc_access_key[0x40];
+
+	u8         reserved_11[0x5];
+	u8         mtu[0x3];
+	u8         port[0x8];
+	u8         pkey_index[0x10];
+
+	u8         reserved_12[0x8];
+	u8         my_addr_index[0x8];
+	u8         reserved_13[0x8];
+	u8         hop_limit[0x8];
+
+	u8         dc_access_key_violation_count[0x20];
+
+	u8         reserved_14[0x14];
+	u8         dei_cfi[0x1];
+	u8         eth_prio[0x3];
+	u8         ecn[0x2];
+	u8         dscp[0x6];
+
+	u8         reserved_15[0x40];
+};
+
+enum {
+	MLX5_CQC_STATUS_OK             = 0x0,
+	MLX5_CQC_STATUS_CQ_OVERFLOW    = 0x9,
+	MLX5_CQC_STATUS_CQ_WRITE_FAIL  = 0xa,
+};
+
+enum {
+	MLX5_CQC_CQE_SZ_64_BYTES   = 0x0,
+	MLX5_CQC_CQE_SZ_128_BYTES  = 0x1,
+};
+
+enum {
+	MLX5_CQC_ST_SOLICITED_NOTIFICATION_REQUEST_ARMED  = 0x6,
+	MLX5_CQC_ST_NOTIFICATION_REQUEST_ARMED            = 0x9,
+	MLX5_CQC_ST_FIRED                                 = 0xa,
+};
+
+struct mlx5_ifc_cqc_bits {
+	u8         status[0x4];
+	u8         reserved_0[0x4];
+	u8         cqe_sz[0x3];
+	u8         cc[0x1];
+	u8         reserved_1[0x1];
+	u8         scqe_break_moderation_en[0x1];
+	u8         oi[0x1];
+	u8         reserved_2[0x2];
+	u8         cqe_zip_en[0x1];
+	u8         mini_cqe_res_format[0x2];
+	u8         st[0x4];
+	u8         reserved_3[0x8];
+
+	u8         reserved_4[0x20];
+
+	u8         reserved_5[0x14];
+	u8         page_offset[0x6];
+	u8         reserved_6[0x6];
+
+	u8         reserved_7[0x3];
+	u8         log_cq_size[0x5];
+	u8         uar_page[0x18];
+
+	u8         reserved_8[0x4];
+	u8         cq_period[0xc];
+	u8         cq_max_count[0x10];
+
+	u8         reserved_9[0x18];
+	u8         c_eqn[0x8];
+
+	u8         reserved_10[0x3];
+	u8         log_page_size[0x5];
+	u8         reserved_11[0x18];
+
+	u8         reserved_12[0x20];
+
+	u8         reserved_13[0x8];
+	u8         last_notified_index[0x18];
+
+	u8         reserved_14[0x8];
+	u8         last_solicit_index[0x18];
+
+	u8         reserved_15[0x8];
+	u8         consumer_counter[0x18];
+
+	u8         reserved_16[0x8];
+	u8         producer_counter[0x18];
+
+	u8         reserved_17[0x40];
+
+	u8         dbr_addr[0x40];
+};
+
+union mlx5_ifc_cong_control_roce_ecn_auto_bits {
+	struct mlx5_ifc_cong_control_802_1qau_rp_bits cong_control_802_1qau_rp;
+	struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits cong_control_r_roce_ecn_rp;
+	struct mlx5_ifc_cong_control_r_roce_ecn_np_bits cong_control_r_roce_ecn_np;
+	u8         reserved_0[0x800];
+};
+
+struct mlx5_ifc_query_adapter_param_block_bits {
+	u8         reserved_0[0xe0];
+
+	u8         reserved_1[0x10];
+	u8         vsd_vendor_id[0x10];
+
+	u8         vsd[208][0x8];
+
+	u8         vsd_contd_psid[16][0x8];
+};
+
+union mlx5_ifc_modify_field_select_resize_field_select_auto_bits {
+	struct mlx5_ifc_modify_field_select_bits modify_field_select;
+	struct mlx5_ifc_resize_field_select_bits resize_field_select;
+	u8         reserved_0[0x20];
+};
+
+union mlx5_ifc_field_select_802_1_r_roce_auto_bits {
+	struct mlx5_ifc_field_select_802_1qau_rp_bits field_select_802_1qau_rp;
+	struct mlx5_ifc_field_select_r_roce_rp_bits field_select_r_roce_rp;
+	struct mlx5_ifc_field_select_r_roce_np_bits field_select_r_roce_np;
+	u8         reserved_0[0x20];
+};
+
+union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits {
+	struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits eth_802_3_cntrs_grp_data_layout;
+	struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits eth_2863_cntrs_grp_data_layout;
+	struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout;
+	struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits eth_3635_cntrs_grp_data_layout;
+	struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits eth_extended_cntrs_grp_data_layout;
+	struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout;
+	struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout;
+	struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs;
+	u8         reserved_0[0x7c0];
+};
+
+union mlx5_ifc_event_auto_bits {
+	struct mlx5_ifc_comp_event_bits comp_event;
+	struct mlx5_ifc_dct_events_bits dct_events;
+	struct mlx5_ifc_qp_events_bits qp_events;
+	struct mlx5_ifc_wqe_associated_page_fault_event_bits wqe_associated_page_fault_event;
+	struct mlx5_ifc_rdma_page_fault_event_bits rdma_page_fault_event;
+	struct mlx5_ifc_cq_error_bits cq_error;
+	struct mlx5_ifc_dropped_packet_logged_bits dropped_packet_logged;
+	struct mlx5_ifc_port_state_change_event_bits port_state_change_event;
+	struct mlx5_ifc_gpio_event_bits gpio_event;
+	struct mlx5_ifc_db_bf_congestion_event_bits db_bf_congestion_event;
+	struct mlx5_ifc_stall_vl_event_bits stall_vl_event;
+	struct mlx5_ifc_cmd_inter_comp_event_bits cmd_inter_comp_event;
+	u8         reserved_0[0xe0];
+};
+
+struct mlx5_ifc_health_buffer_bits {
+	u8         reserved_0[0x100];
+
+	u8         assert_existptr[0x20];
+
+	u8         assert_callra[0x20];
+
+	u8         reserved_1[0x40];
+
+	u8         fw_version[0x20];
+
+	u8         hw_id[0x20];
+
+	u8         reserved_2[0x20];
+
+	u8         irisc_index[0x8];
+	u8         synd[0x8];
+	u8         ext_synd[0x10];
+};
+
+struct mlx5_ifc_register_loopback_control_bits {
+	u8         no_lb[0x1];
+	u8         reserved_0[0x7];
+	u8         port[0x8];
+	u8         reserved_1[0x10];
+
+	u8         reserved_2[0x60];
+};
+
+struct mlx5_ifc_teardown_hca_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+enum {
+	MLX5_TEARDOWN_HCA_IN_PROFILE_GRACEFUL_CLOSE  = 0x0,
+	MLX5_TEARDOWN_HCA_IN_PROFILE_PANIC_CLOSE     = 0x1,
+};
+
+struct mlx5_ifc_teardown_hca_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x10];
+	u8         profile[0x10];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_sqerr2rts_qp_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_sqerr2rts_qp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         qpn[0x18];
+
+	u8         reserved_3[0x20];
+
+	u8         opt_param_mask[0x20];
+
+	u8         reserved_4[0x20];
+
+	struct mlx5_ifc_qpc_bits qpc;
+
+	u8         reserved_5[0x80];
+};
+
+struct mlx5_ifc_sqd2rts_qp_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_sqd2rts_qp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         qpn[0x18];
+
+	u8         reserved_3[0x20];
+
+	u8         opt_param_mask[0x20];
+
+	u8         reserved_4[0x20];
+
+	struct mlx5_ifc_qpc_bits qpc;
+
+	u8         reserved_5[0x80];
+};
+
+struct mlx5_ifc_set_roce_address_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_set_roce_address_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         roce_address_index[0x10];
+	u8         reserved_2[0x10];
+
+	u8         reserved_3[0x20];
+
+	struct mlx5_ifc_roce_addr_layout_bits roce_address;
+};
+
+struct mlx5_ifc_set_mad_demux_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+enum {
+	MLX5_SET_MAD_DEMUX_IN_DEMUX_MODE_PASS_ALL   = 0x0,
+	MLX5_SET_MAD_DEMUX_IN_DEMUX_MODE_SELECTIVE  = 0x2,
+};
+
+struct mlx5_ifc_set_mad_demux_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x20];
+
+	u8         reserved_3[0x6];
+	u8         demux_mode[0x2];
+	u8         reserved_4[0x18];
+};
+
+struct mlx5_ifc_set_l2_table_entry_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_set_l2_table_entry_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x60];
+
+	u8         reserved_3[0x8];
+	u8         table_index[0x18];
+
+	u8         reserved_4[0x20];
+
+	u8         reserved_5[0x13];
+	u8         vlan_valid[0x1];
+	u8         vlan[0xc];
+
+	struct mlx5_ifc_mac_address_layout_bits mac_address;
+
+	u8         reserved_6[0xc0];
+};
+
+struct mlx5_ifc_set_issi_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_set_issi_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x10];
+	u8         current_issi[0x10];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_set_hca_cap_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_set_hca_cap_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+
+	union mlx5_ifc_hca_cap_union_bits capability;
+};
+
+struct mlx5_ifc_set_fte_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_set_fte_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+
+	u8         table_type[0x8];
+	u8         reserved_3[0x18];
+
+	u8         reserved_4[0x8];
+	u8         table_id[0x18];
+
+	u8         reserved_5[0x40];
+
+	u8         flow_index[0x20];
+
+	u8         reserved_6[0xe0];
+
+	struct mlx5_ifc_flow_context_bits flow_context;
+};
+
+struct mlx5_ifc_rts2rts_qp_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_rts2rts_qp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         qpn[0x18];
+
+	u8         reserved_3[0x20];
+
+	u8         opt_param_mask[0x20];
+
+	u8         reserved_4[0x20];
+
+	struct mlx5_ifc_qpc_bits qpc;
+
+	u8         reserved_5[0x80];
+};
+
+struct mlx5_ifc_rtr2rts_qp_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_rtr2rts_qp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         qpn[0x18];
+
+	u8         reserved_3[0x20];
+
+	u8         opt_param_mask[0x20];
+
+	u8         reserved_4[0x20];
+
+	struct mlx5_ifc_qpc_bits qpc;
+
+	u8         reserved_5[0x80];
+};
+
+struct mlx5_ifc_rst2init_qp_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_rst2init_qp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         qpn[0x18];
+
+	u8         reserved_3[0x20];
+
+	u8         opt_param_mask[0x20];
+
+	u8         reserved_4[0x20];
+
+	struct mlx5_ifc_qpc_bits qpc;
+
+	u8         reserved_5[0x80];
+};
+
+struct mlx5_ifc_query_xrc_srq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry;
+
+	u8         reserved_2[0x600];
+
+	u8         pas[0][0x40];
+};
+
+struct mlx5_ifc_query_xrc_srq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         xrc_srqn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+enum {
+	MLX5_QUERY_VPORT_STATE_OUT_STATE_DOWN  = 0x0,
+	MLX5_QUERY_VPORT_STATE_OUT_STATE_UP    = 0x1,
+};
+
+struct mlx5_ifc_query_vport_state_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x20];
+
+	u8         reserved_2[0x18];
+	u8         admin_state[0x4];
+	u8         state[0x4];
+};
+
+enum {
+	MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT  = 0x0,
+};
+
+struct mlx5_ifc_query_vport_state_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         other_vport[0x1];
+	u8         reserved_2[0xf];
+	u8         vport_number[0x10];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_query_vport_counter_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	struct mlx5_ifc_traffic_counter_bits received_errors;
+
+	struct mlx5_ifc_traffic_counter_bits transmit_errors;
+
+	struct mlx5_ifc_traffic_counter_bits received_ib_unicast;
+
+	struct mlx5_ifc_traffic_counter_bits transmitted_ib_unicast;
+
+	struct mlx5_ifc_traffic_counter_bits received_ib_multicast;
+
+	struct mlx5_ifc_traffic_counter_bits transmitted_ib_multicast;
+
+	struct mlx5_ifc_traffic_counter_bits received_eth_broadcast;
+
+	struct mlx5_ifc_traffic_counter_bits transmitted_eth_broadcast;
+
+	struct mlx5_ifc_traffic_counter_bits received_eth_unicast;
+
+	struct mlx5_ifc_traffic_counter_bits transmitted_eth_unicast;
+
+	struct mlx5_ifc_traffic_counter_bits received_eth_multicast;
+
+	struct mlx5_ifc_traffic_counter_bits transmitted_eth_multicast;
+
+	u8         reserved_2[0xa00];
+};
+
+enum {
+	MLX5_QUERY_VPORT_COUNTER_IN_OP_MOD_VPORT_COUNTERS  = 0x0,
+};
+
+struct mlx5_ifc_query_vport_counter_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         other_vport[0x1];
+	u8         reserved_2[0xf];
+	u8         vport_number[0x10];
+
+	u8         reserved_3[0x60];
+
+	u8         clear[0x1];
+	u8         reserved_4[0x1f];
+
+	u8         reserved_5[0x20];
+};
+
+struct mlx5_ifc_query_tis_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	struct mlx5_ifc_tisc_bits tis_context;
+};
+
+struct mlx5_ifc_query_tis_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         tisn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_query_tir_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0xc0];
+
+	struct mlx5_ifc_tirc_bits tir_context;
+};
+
+struct mlx5_ifc_query_tir_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         tirn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_query_srq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	struct mlx5_ifc_srqc_bits srq_context_entry;
+
+	u8         reserved_2[0x600];
+
+	u8         pas[0][0x40];
+};
+
+struct mlx5_ifc_query_srq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         srqn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_query_sq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0xc0];
+
+	struct mlx5_ifc_sqc_bits sq_context;
+};
+
+struct mlx5_ifc_query_sq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         sqn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_query_special_contexts_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x20];
+
+	u8         resd_lkey[0x20];
+};
+
+struct mlx5_ifc_query_special_contexts_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+};
+
+struct mlx5_ifc_query_rqt_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0xc0];
+
+	struct mlx5_ifc_rqtc_bits rqt_context;
+};
+
+struct mlx5_ifc_query_rqt_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         rqtn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_query_rq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0xc0];
+
+	struct mlx5_ifc_rqc_bits rq_context;
+};
+
+struct mlx5_ifc_query_rq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         rqn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_query_roce_address_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	struct mlx5_ifc_roce_addr_layout_bits roce_address;
+};
+
+struct mlx5_ifc_query_roce_address_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         roce_address_index[0x10];
+	u8         reserved_2[0x10];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_query_rmp_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0xc0];
+
+	struct mlx5_ifc_rmpc_bits rmp_context;
+};
+
+struct mlx5_ifc_query_rmp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         rmpn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_query_qp_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	u8         opt_param_mask[0x20];
+
+	u8         reserved_2[0x20];
+
+	struct mlx5_ifc_qpc_bits qpc;
+
+	u8         reserved_3[0x80];
+
+	u8         pas[0][0x40];
+};
+
+struct mlx5_ifc_query_qp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         qpn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_query_q_counter_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	u8         rx_write_requests[0x20];
+
+	u8         reserved_2[0x20];
+
+	u8         rx_read_requests[0x20];
+
+	u8         reserved_3[0x20];
+
+	u8         rx_atomic_requests[0x20];
+
+	u8         reserved_4[0x20];
+
+	u8         rx_dct_connect[0x20];
+
+	u8         reserved_5[0x20];
+
+	u8         out_of_buffer[0x20];
+
+	u8         reserved_6[0x20];
+
+	u8         out_of_sequence[0x20];
+
+	u8         reserved_7[0x620];
+};
+
+struct mlx5_ifc_query_q_counter_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x80];
+
+	u8         clear[0x1];
+	u8         reserved_3[0x1f];
+
+	u8         reserved_4[0x18];
+	u8         counter_set_id[0x8];
+};
+
+struct mlx5_ifc_query_pages_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x10];
+	u8         function_id[0x10];
+
+	u8         num_pages[0x20];
+};
+
+enum {
+	MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES     = 0x1,
+	MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES     = 0x2,
+	MLX5_QUERY_PAGES_IN_OP_MOD_REGULAR_PAGES  = 0x3,
+};
+
+struct mlx5_ifc_query_pages_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x10];
+	u8         function_id[0x10];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_query_nic_vport_context_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	struct mlx5_ifc_nic_vport_context_bits nic_vport_context;
+};
+
+struct mlx5_ifc_query_nic_vport_context_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         other_vport[0x1];
+	u8         reserved_2[0xf];
+	u8         vport_number[0x10];
+
+	u8         reserved_3[0x5];
+	u8         allowed_list_type[0x3];
+	u8         reserved_4[0x18];
+};
+
+struct mlx5_ifc_query_mkey_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	struct mlx5_ifc_mkc_bits memory_key_mkey_entry;
+
+	u8         reserved_2[0x600];
+
+	u8         bsf0_klm0_pas_mtt0_1[16][0x8];
+
+	u8         bsf1_klm1_pas_mtt2_3[16][0x8];
+};
+
+struct mlx5_ifc_query_mkey_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         mkey_index[0x18];
+
+	u8         pg_access[0x1];
+	u8         reserved_3[0x1f];
+};
+
+struct mlx5_ifc_query_mad_demux_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	u8         mad_dumux_parameters_block[0x20];
+};
+
+struct mlx5_ifc_query_mad_demux_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+};
+
+struct mlx5_ifc_query_l2_table_entry_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0xa0];
+
+	u8         reserved_2[0x13];
+	u8         vlan_valid[0x1];
+	u8         vlan[0xc];
+
+	struct mlx5_ifc_mac_address_layout_bits mac_address;
+
+	u8         reserved_3[0xc0];
+};
+
+struct mlx5_ifc_query_l2_table_entry_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x60];
+
+	u8         reserved_3[0x8];
+	u8         table_index[0x18];
+
+	u8         reserved_4[0x140];
+};
+
+struct mlx5_ifc_query_issi_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x10];
+	u8         current_issi[0x10];
+
+	u8         reserved_2[0xa0];
+
+	u8         supported_issi_reserved[76][0x8];
+	u8         supported_issi_dw0[0x20];
+};
+
+struct mlx5_ifc_query_issi_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+};
+
+struct mlx5_ifc_query_hca_vport_pkey_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	struct mlx5_ifc_pkey_bits pkey[0];
+};
+
+struct mlx5_ifc_query_hca_vport_pkey_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         other_vport[0x1];
+	u8         reserved_2[0xf];
+	u8         vport_number[0x10];
+
+	u8         reserved_3[0x10];
+	u8         pkey_index[0x10];
+};
+
+struct mlx5_ifc_query_hca_vport_gid_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x20];
+
+	u8         gids_num[0x10];
+	u8         reserved_2[0x10];
+
+	struct mlx5_ifc_array128_auto_bits gid[0];
+};
+
+struct mlx5_ifc_query_hca_vport_gid_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         other_vport[0x1];
+	u8         reserved_2[0xf];
+	u8         vport_number[0x10];
+
+	u8         reserved_3[0x10];
+	u8         gid_index[0x10];
+};
+
+struct mlx5_ifc_query_hca_vport_context_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	struct mlx5_ifc_hca_vport_context_bits hca_vport_context;
+};
+
+struct mlx5_ifc_query_hca_vport_context_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         other_vport[0x1];
+	u8         reserved_2[0xf];
+	u8         vport_number[0x10];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_query_hca_cap_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	union mlx5_ifc_hca_cap_union_bits capability;
+};
+
+struct mlx5_ifc_query_hca_cap_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+};
+
+struct mlx5_ifc_query_flow_table_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x80];
+
+	u8         reserved_2[0x8];
+	u8         level[0x8];
+	u8         reserved_3[0x8];
+	u8         log_size[0x8];
+
+	u8         reserved_4[0x120];
+};
+
+struct mlx5_ifc_query_flow_table_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+
+	u8         table_type[0x8];
+	u8         reserved_3[0x18];
+
+	u8         reserved_4[0x8];
+	u8         table_id[0x18];
+
+	u8         reserved_5[0x140];
+};
+
+struct mlx5_ifc_query_fte_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x1c0];
+
+	struct mlx5_ifc_flow_context_bits flow_context;
+};
+
+struct mlx5_ifc_query_fte_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+
+	u8         table_type[0x8];
+	u8         reserved_3[0x18];
+
+	u8         reserved_4[0x8];
+	u8         table_id[0x18];
+
+	u8         reserved_5[0x40];
+
+	u8         flow_index[0x20];
+
+	u8         reserved_6[0xe0];
+};
+
+enum {
+	MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_OUTER_HEADERS    = 0x0,
+	MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS  = 0x1,
+	MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_INNER_HEADERS    = 0x2,
+};
+
+struct mlx5_ifc_query_flow_group_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0xa0];
+
+	u8         start_flow_index[0x20];
+
+	u8         reserved_2[0x20];
+
+	u8         end_flow_index[0x20];
+
+	u8         reserved_3[0xa0];
+
+	u8         reserved_4[0x18];
+	u8         match_criteria_enable[0x8];
+
+	struct mlx5_ifc_fte_match_param_bits match_criteria;
+
+	u8         reserved_5[0xe00];
+};
+
+struct mlx5_ifc_query_flow_group_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+
+	u8         table_type[0x8];
+	u8         reserved_3[0x18];
+
+	u8         reserved_4[0x8];
+	u8         table_id[0x18];
+
+	u8         group_id[0x20];
+
+	u8         reserved_5[0x120];
+};
+
+struct mlx5_ifc_query_eq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	struct mlx5_ifc_eqc_bits eq_context_entry;
+
+	u8         reserved_2[0x40];
+
+	u8         event_bitmask[0x40];
+
+	u8         reserved_3[0x580];
+
+	u8         pas[0][0x40];
+};
+
+struct mlx5_ifc_query_eq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x18];
+	u8         eq_number[0x8];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_query_dct_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	struct mlx5_ifc_dctc_bits dct_context_entry;
+
+	u8         reserved_2[0x180];
+};
+
+struct mlx5_ifc_query_dct_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         dctn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_query_cq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	struct mlx5_ifc_cqc_bits cq_context;
+
+	u8         reserved_2[0x600];
+
+	u8         pas[0][0x40];
+};
+
+struct mlx5_ifc_query_cq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         cqn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_query_cong_status_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x20];
+
+	u8         enable[0x1];
+	u8         tag_enable[0x1];
+	u8         reserved_2[0x1e];
+};
+
+struct mlx5_ifc_query_cong_status_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x18];
+	u8         priority[0x4];
+	u8         cong_protocol[0x4];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_query_cong_statistics_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	u8         cur_flows[0x20];
+
+	u8         sum_flows[0x20];
+
+	u8         cnp_ignored_high[0x20];
+
+	u8         cnp_ignored_low[0x20];
+
+	u8         cnp_handled_high[0x20];
+
+	u8         cnp_handled_low[0x20];
+
+	u8         reserved_2[0x100];
+
+	u8         time_stamp_high[0x20];
+
+	u8         time_stamp_low[0x20];
+
+	u8         accumulators_period[0x20];
+
+	u8         ecn_marked_roce_packets_high[0x20];
+
+	u8         ecn_marked_roce_packets_low[0x20];
+
+	u8         cnps_sent_high[0x20];
+
+	u8         cnps_sent_low[0x20];
+
+	u8         reserved_3[0x560];
+};
+
+struct mlx5_ifc_query_cong_statistics_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         clear[0x1];
+	u8         reserved_2[0x1f];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_query_cong_params_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	union mlx5_ifc_cong_control_roce_ecn_auto_bits congestion_parameters;
+};
+
+struct mlx5_ifc_query_cong_params_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x1c];
+	u8         cong_protocol[0x4];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_query_adapter_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	struct mlx5_ifc_query_adapter_param_block_bits query_adapter_struct;
+};
+
+struct mlx5_ifc_query_adapter_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+};
+
+struct mlx5_ifc_qp_2rst_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_qp_2rst_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         qpn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_qp_2err_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_qp_2err_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         qpn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_page_fault_resume_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_page_fault_resume_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         error[0x1];
+	u8         reserved_2[0x4];
+	u8         rdma[0x1];
+	u8         read_write[0x1];
+	u8         req_res[0x1];
+	u8         qpn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_nop_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_nop_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+};
+
+struct mlx5_ifc_modify_vport_state_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_modify_vport_state_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         other_vport[0x1];
+	u8         reserved_2[0xf];
+	u8         vport_number[0x10];
+
+	u8         reserved_3[0x18];
+	u8         admin_state[0x4];
+	u8         reserved_4[0x4];
+};
+
+struct mlx5_ifc_modify_tis_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_modify_tis_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         tisn[0x18];
+
+	u8         reserved_3[0x20];
+
+	u8         modify_bitmask[0x40];
+
+	u8         reserved_4[0x40];
+
+	struct mlx5_ifc_tisc_bits ctx;
+};
+
+struct mlx5_ifc_modify_tir_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_modify_tir_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         tirn[0x18];
+
+	u8         reserved_3[0x20];
+
+	u8         modify_bitmask[0x40];
+
+	u8         reserved_4[0x40];
+
+	struct mlx5_ifc_tirc_bits ctx;
+};
+
+struct mlx5_ifc_modify_sq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_modify_sq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         sq_state[0x4];
+	u8         reserved_2[0x4];
+	u8         sqn[0x18];
+
+	u8         reserved_3[0x20];
+
+	u8         modify_bitmask[0x40];
+
+	u8         reserved_4[0x40];
+
+	struct mlx5_ifc_sqc_bits ctx;
+};
+
+struct mlx5_ifc_modify_rqt_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_modify_rqt_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         rqtn[0x18];
+
+	u8         reserved_3[0x20];
+
+	u8         modify_bitmask[0x40];
+
+	u8         reserved_4[0x40];
+
+	struct mlx5_ifc_rqtc_bits ctx;
+};
+
+struct mlx5_ifc_modify_rq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_modify_rq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         rq_state[0x4];
+	u8         reserved_2[0x4];
+	u8         rqn[0x18];
+
+	u8         reserved_3[0x20];
+
+	u8         modify_bitmask[0x40];
+
+	u8         reserved_4[0x40];
+
+	struct mlx5_ifc_rqc_bits ctx;
+};
+
+struct mlx5_ifc_modify_rmp_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_modify_rmp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         rmp_state[0x4];
+	u8         reserved_2[0x4];
+	u8         rmpn[0x18];
+
+	u8         reserved_3[0x20];
+
+	u8         modify_bitmask[0x40];
+
+	u8         reserved_4[0x40];
+
+	struct mlx5_ifc_rmpc_bits ctx;
+};
+
+struct mlx5_ifc_modify_nic_vport_context_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_modify_nic_vport_field_select_bits {
+	u8         reserved_0[0x1c];
+	u8         permanent_address[0x1];
+	u8         addresses_list[0x1];
+	u8         roce_en[0x1];
+	u8         reserved_1[0x1];
+};
+
+struct mlx5_ifc_modify_nic_vport_context_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         other_vport[0x1];
+	u8         reserved_2[0xf];
+	u8         vport_number[0x10];
+
+	struct mlx5_ifc_modify_nic_vport_field_select_bits field_select;
+
+	u8         reserved_3[0x780];
+
+	struct mlx5_ifc_nic_vport_context_bits nic_vport_context;
+};
+
+struct mlx5_ifc_modify_hca_vport_context_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_modify_hca_vport_context_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         other_vport[0x1];
+	u8         reserved_2[0xf];
+	u8         vport_number[0x10];
+
+	u8         reserved_3[0x20];
+
+	struct mlx5_ifc_hca_vport_context_bits hca_vport_context;
+};
+
+struct mlx5_ifc_modify_cq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+enum {
+	MLX5_MODIFY_CQ_IN_OP_MOD_MODIFY_CQ  = 0x0,
+	MLX5_MODIFY_CQ_IN_OP_MOD_RESIZE_CQ  = 0x1,
+};
+
+struct mlx5_ifc_modify_cq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         cqn[0x18];
+
+	union mlx5_ifc_modify_field_select_resize_field_select_auto_bits modify_field_select_resize_field_select;
+
+	struct mlx5_ifc_cqc_bits cq_context;
+
+	u8         reserved_3[0x600];
+
+	u8         pas[0][0x40];
+};
+
+struct mlx5_ifc_modify_cong_status_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_modify_cong_status_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x18];
+	u8         priority[0x4];
+	u8         cong_protocol[0x4];
+
+	u8         enable[0x1];
+	u8         tag_enable[0x1];
+	u8         reserved_3[0x1e];
+};
+
+struct mlx5_ifc_modify_cong_params_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_modify_cong_params_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x1c];
+	u8         cong_protocol[0x4];
+
+	union mlx5_ifc_field_select_802_1_r_roce_auto_bits field_select;
+
+	u8         reserved_3[0x80];
+
+	union mlx5_ifc_cong_control_roce_ecn_auto_bits congestion_parameters;
+};
+
+struct mlx5_ifc_manage_pages_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         output_num_entries[0x20];
+
+	u8         reserved_1[0x20];
+
+	u8         pas[0][0x40];
+};
+
+enum {
+	MLX5_MANAGE_PAGES_IN_OP_MOD_ALLOCATION_FAIL     = 0x0,
+	MLX5_MANAGE_PAGES_IN_OP_MOD_ALLOCATION_SUCCESS  = 0x1,
+	MLX5_MANAGE_PAGES_IN_OP_MOD_HCA_RETURN_PAGES    = 0x2,
+};
+
+struct mlx5_ifc_manage_pages_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x10];
+	u8         function_id[0x10];
+
+	u8         input_num_entries[0x20];
+
+	u8         pas[0][0x40];
+};
+
+struct mlx5_ifc_mad_ifc_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	u8         response_mad_packet[256][0x8];
+};
+
+struct mlx5_ifc_mad_ifc_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         remote_lid[0x10];
+	u8         reserved_2[0x8];
+	u8         port[0x8];
+
+	u8         reserved_3[0x20];
+
+	u8         mad[256][0x8];
+};
+
+struct mlx5_ifc_init_hca_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_init_hca_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+};
+
+struct mlx5_ifc_init2rtr_qp_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_init2rtr_qp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         qpn[0x18];
+
+	u8         reserved_3[0x20];
+
+	u8         opt_param_mask[0x20];
+
+	u8         reserved_4[0x20];
+
+	struct mlx5_ifc_qpc_bits qpc;
+
+	u8         reserved_5[0x80];
+};
+
+struct mlx5_ifc_init2init_qp_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_init2init_qp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         qpn[0x18];
+
+	u8         reserved_3[0x20];
+
+	u8         opt_param_mask[0x20];
+
+	u8         reserved_4[0x20];
+
+	struct mlx5_ifc_qpc_bits qpc;
+
+	u8         reserved_5[0x80];
+};
+
+struct mlx5_ifc_get_dropped_packet_log_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	u8         packet_headers_log[128][0x8];
+
+	u8         packet_syndrome[64][0x8];
+};
+
+struct mlx5_ifc_get_dropped_packet_log_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+};
+
+struct mlx5_ifc_gen_eqe_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x18];
+	u8         eq_number[0x8];
+
+	u8         reserved_3[0x20];
+
+	u8         eqe[64][0x8];
+};
+
+struct mlx5_ifc_gen_eq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_enable_hca_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x20];
+};
+
+struct mlx5_ifc_enable_hca_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x10];
+	u8         function_id[0x10];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_drain_dct_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_drain_dct_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         dctn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_disable_hca_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x20];
+};
+
+struct mlx5_ifc_disable_hca_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x10];
+	u8         function_id[0x10];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_detach_from_mcg_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_detach_from_mcg_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         qpn[0x18];
+
+	u8         reserved_3[0x20];
+
+	u8         multicast_gid[16][0x8];
+};
+
+struct mlx5_ifc_destroy_xrc_srq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_destroy_xrc_srq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         xrc_srqn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_destroy_tis_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_destroy_tis_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         tisn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_destroy_tir_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_destroy_tir_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         tirn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_destroy_srq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_destroy_srq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         srqn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_destroy_sq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_destroy_sq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         sqn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_destroy_rqt_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_destroy_rqt_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         rqtn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_destroy_rq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_destroy_rq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         rqn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_destroy_rmp_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_destroy_rmp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         rmpn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_destroy_qp_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_destroy_qp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         qpn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_destroy_psv_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_destroy_psv_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         psvn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_destroy_mkey_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_destroy_mkey_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         mkey_index[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_destroy_flow_table_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_destroy_flow_table_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+
+	u8         table_type[0x8];
+	u8         reserved_3[0x18];
+
+	u8         reserved_4[0x8];
+	u8         table_id[0x18];
+
+	u8         reserved_5[0x140];
+};
+
+struct mlx5_ifc_destroy_flow_group_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_destroy_flow_group_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+
+	u8         table_type[0x8];
+	u8         reserved_3[0x18];
+
+	u8         reserved_4[0x8];
+	u8         table_id[0x18];
+
+	u8         group_id[0x20];
+
+	u8         reserved_5[0x120];
+};
+
+struct mlx5_ifc_destroy_eq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_destroy_eq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x18];
+	u8         eq_number[0x8];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_destroy_dct_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_destroy_dct_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         dctn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_destroy_cq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_destroy_cq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         cqn[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_delete_vxlan_udp_dport_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_delete_vxlan_udp_dport_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x20];
+
+	u8         reserved_3[0x10];
+	u8         vxlan_udp_port[0x10];
+};
+
+struct mlx5_ifc_delete_l2_table_entry_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_delete_l2_table_entry_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x60];
+
+	u8         reserved_3[0x8];
+	u8         table_index[0x18];
+
+	u8         reserved_4[0x140];
+};
+
+struct mlx5_ifc_delete_fte_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_delete_fte_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+
+	u8         table_type[0x8];
+	u8         reserved_3[0x18];
+
+	u8         reserved_4[0x8];
+	u8         table_id[0x18];
+
+	u8         reserved_5[0x40];
+
+	u8         flow_index[0x20];
+
+	u8         reserved_6[0xe0];
+};
+
+struct mlx5_ifc_dealloc_xrcd_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_dealloc_xrcd_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         xrcd[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_dealloc_uar_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_dealloc_uar_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         uar[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_dealloc_transport_domain_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_dealloc_transport_domain_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         transport_domain[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_dealloc_q_counter_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_dealloc_q_counter_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x18];
+	u8         counter_set_id[0x8];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_dealloc_pd_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_dealloc_pd_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         pd[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_create_xrc_srq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x8];
+	u8         xrc_srqn[0x18];
+
+	u8         reserved_2[0x20];
+};
+
+struct mlx5_ifc_create_xrc_srq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+
+	struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry;
+
+	u8         reserved_3[0x600];
+
+	u8         pas[0][0x40];
+};
+
+struct mlx5_ifc_create_tis_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x8];
+	u8         tisn[0x18];
+
+	u8         reserved_2[0x20];
+};
+
+struct mlx5_ifc_create_tis_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0xc0];
+
+	struct mlx5_ifc_tisc_bits ctx;
+};
+
+struct mlx5_ifc_create_tir_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x8];
+	u8         tirn[0x18];
+
+	u8         reserved_2[0x20];
+};
+
+struct mlx5_ifc_create_tir_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0xc0];
+
+	struct mlx5_ifc_tirc_bits ctx;
+};
+
+struct mlx5_ifc_create_srq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x8];
+	u8         srqn[0x18];
+
+	u8         reserved_2[0x20];
+};
+
+struct mlx5_ifc_create_srq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+
+	struct mlx5_ifc_srqc_bits srq_context_entry;
+
+	u8         reserved_3[0x600];
+
+	u8         pas[0][0x40];
+};
+
+struct mlx5_ifc_create_sq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x8];
+	u8         sqn[0x18];
+
+	u8         reserved_2[0x20];
+};
+
+struct mlx5_ifc_create_sq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0xc0];
+
+	struct mlx5_ifc_sqc_bits ctx;
+};
+
+struct mlx5_ifc_create_rqt_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x8];
+	u8         rqtn[0x18];
+
+	u8         reserved_2[0x20];
+};
+
+struct mlx5_ifc_create_rqt_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0xc0];
+
+	struct mlx5_ifc_rqtc_bits rqt_context;
+};
+
+struct mlx5_ifc_create_rq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x8];
+	u8         rqn[0x18];
+
+	u8         reserved_2[0x20];
+};
+
+struct mlx5_ifc_create_rq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0xc0];
+
+	struct mlx5_ifc_rqc_bits ctx;
+};
+
+struct mlx5_ifc_create_rmp_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x8];
+	u8         rmpn[0x18];
+
+	u8         reserved_2[0x20];
+};
+
+struct mlx5_ifc_create_rmp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0xc0];
+
+	struct mlx5_ifc_rmpc_bits ctx;
+};
+
+struct mlx5_ifc_create_qp_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x8];
+	u8         qpn[0x18];
+
+	u8         reserved_2[0x20];
+};
+
+struct mlx5_ifc_create_qp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+
+	u8         opt_param_mask[0x20];
+
+	u8         reserved_3[0x20];
+
+	struct mlx5_ifc_qpc_bits qpc;
+
+	u8         reserved_4[0x80];
+
+	u8         pas[0][0x40];
+};
+
+struct mlx5_ifc_create_psv_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	u8         reserved_2[0x8];
+	u8         psv0_index[0x18];
+
+	u8         reserved_3[0x8];
+	u8         psv1_index[0x18];
+
+	u8         reserved_4[0x8];
+	u8         psv2_index[0x18];
+
+	u8         reserved_5[0x8];
+	u8         psv3_index[0x18];
+};
+
+struct mlx5_ifc_create_psv_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         num_psv[0x4];
+	u8         reserved_2[0x4];
+	u8         pd[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_create_mkey_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x8];
+	u8         mkey_index[0x18];
+
+	u8         reserved_2[0x20];
+};
+
+struct mlx5_ifc_create_mkey_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x20];
+
+	u8         pg_access[0x1];
+	u8         reserved_3[0x1f];
+
+	struct mlx5_ifc_mkc_bits memory_key_mkey_entry;
+
+	u8         reserved_4[0x80];
+
+	u8         translations_octword_actual_size[0x20];
+
+	u8         reserved_5[0x560];
+
+	u8         klm_pas_mtt[0][0x20];
+};
+
+struct mlx5_ifc_create_flow_table_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x8];
+	u8         table_id[0x18];
+
+	u8         reserved_2[0x20];
+};
+
+struct mlx5_ifc_create_flow_table_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+
+	u8         table_type[0x8];
+	u8         reserved_3[0x18];
+
+	u8         reserved_4[0x20];
+
+	u8         reserved_5[0x8];
+	u8         level[0x8];
+	u8         reserved_6[0x8];
+	u8         log_size[0x8];
+
+	u8         reserved_7[0x120];
+};
+
+struct mlx5_ifc_create_flow_group_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x8];
+	u8         group_id[0x18];
+
+	u8         reserved_2[0x20];
+};
+
+enum {
+	MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS    = 0x0,
+	MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS  = 0x1,
+	MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS    = 0x2,
+};
+
+struct mlx5_ifc_create_flow_group_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+
+	u8         table_type[0x8];
+	u8         reserved_3[0x18];
+
+	u8         reserved_4[0x8];
+	u8         table_id[0x18];
+
+	u8         reserved_5[0x20];
+
+	u8         start_flow_index[0x20];
+
+	u8         reserved_6[0x20];
+
+	u8         end_flow_index[0x20];
+
+	u8         reserved_7[0xa0];
+
+	u8         reserved_8[0x18];
+	u8         match_criteria_enable[0x8];
+
+	struct mlx5_ifc_fte_match_param_bits match_criteria;
+
+	u8         reserved_9[0xe00];
+};
+
+struct mlx5_ifc_create_eq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x18];
+	u8         eq_number[0x8];
+
+	u8         reserved_2[0x20];
+};
+
+struct mlx5_ifc_create_eq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+
+	struct mlx5_ifc_eqc_bits eq_context_entry;
+
+	u8         reserved_3[0x40];
+
+	u8         event_bitmask[0x40];
+
+	u8         reserved_4[0x580];
+
+	u8         pas[0][0x40];
+};
+
+struct mlx5_ifc_create_dct_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x8];
+	u8         dctn[0x18];
+
+	u8         reserved_2[0x20];
+};
+
+struct mlx5_ifc_create_dct_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+
+	struct mlx5_ifc_dctc_bits dct_context_entry;
+
+	u8         reserved_3[0x180];
+};
+
+struct mlx5_ifc_create_cq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x8];
+	u8         cqn[0x18];
+
+	u8         reserved_2[0x20];
+};
+
+struct mlx5_ifc_create_cq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+
+	struct mlx5_ifc_cqc_bits cq_context;
+
+	u8         reserved_3[0x600];
+
+	u8         pas[0][0x40];
+};
+
+struct mlx5_ifc_config_int_moderation_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x4];
+	u8         min_delay[0xc];
+	u8         int_vector[0x10];
+
+	u8         reserved_2[0x20];
+};
+
+enum {
+	MLX5_CONFIG_INT_MODERATION_IN_OP_MOD_WRITE  = 0x0,
+	MLX5_CONFIG_INT_MODERATION_IN_OP_MOD_READ   = 0x1,
+};
+
+struct mlx5_ifc_config_int_moderation_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x4];
+	u8         min_delay[0xc];
+	u8         int_vector[0x10];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_attach_to_mcg_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_attach_to_mcg_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         qpn[0x18];
+
+	u8         reserved_3[0x20];
+
+	u8         multicast_gid[16][0x8];
+};
+
+struct mlx5_ifc_arm_xrc_srq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+enum {
+	MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ  = 0x1,
+};
+
+struct mlx5_ifc_arm_xrc_srq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         xrc_srqn[0x18];
+
+	u8         reserved_3[0x10];
+	u8         lwm[0x10];
+};
+
+struct mlx5_ifc_arm_rq_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+enum {
+	MLX5_ARM_RQ_IN_OP_MOD_SRQ_  = 0x1,
+};
+
+struct mlx5_ifc_arm_rq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         srq_number[0x18];
+
+	u8         reserved_3[0x10];
+	u8         lwm[0x10];
+};
+
+struct mlx5_ifc_arm_dct_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_arm_dct_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x8];
+	u8         dct_number[0x18];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_alloc_xrcd_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x8];
+	u8         xrcd[0x18];
+
+	u8         reserved_2[0x20];
+};
+
+struct mlx5_ifc_alloc_xrcd_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+};
+
+struct mlx5_ifc_alloc_uar_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x8];
+	u8         uar[0x18];
+
+	u8         reserved_2[0x20];
+};
+
+struct mlx5_ifc_alloc_uar_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+};
+
+struct mlx5_ifc_alloc_transport_domain_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x8];
+	u8         transport_domain[0x18];
+
+	u8         reserved_2[0x20];
+};
+
+struct mlx5_ifc_alloc_transport_domain_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+};
+
+struct mlx5_ifc_alloc_q_counter_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x18];
+	u8         counter_set_id[0x8];
+
+	u8         reserved_2[0x20];
+};
+
+struct mlx5_ifc_alloc_q_counter_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+};
+
+struct mlx5_ifc_alloc_pd_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x8];
+	u8         pd[0x18];
+
+	u8         reserved_2[0x20];
+};
+
+struct mlx5_ifc_alloc_pd_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+};
+
+struct mlx5_ifc_add_vxlan_udp_dport_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_add_vxlan_udp_dport_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x20];
+
+	u8         reserved_3[0x10];
+	u8         vxlan_udp_port[0x10];
+};
+
+struct mlx5_ifc_access_register_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	u8         register_data[0][0x20];
+};
+
+enum {
+	MLX5_ACCESS_REGISTER_IN_OP_MOD_WRITE  = 0x0,
+	MLX5_ACCESS_REGISTER_IN_OP_MOD_READ   = 0x1,
+};
+
+struct mlx5_ifc_access_register_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x10];
+	u8         register_id[0x10];
+
+	u8         argument[0x20];
+
+	u8         register_data[0][0x20];
+};
+
+struct mlx5_ifc_sltp_reg_bits {
+	u8         status[0x4];
+	u8         version[0x4];
+	u8         local_port[0x8];
+	u8         pnat[0x2];
+	u8         reserved_0[0x2];
+	u8         lane[0x4];
+	u8         reserved_1[0x8];
+
+	u8         reserved_2[0x20];
+
+	u8         reserved_3[0x7];
+	u8         polarity[0x1];
+	u8         ob_tap0[0x8];
+	u8         ob_tap1[0x8];
+	u8         ob_tap2[0x8];
+
+	u8         reserved_4[0xc];
+	u8         ob_preemp_mode[0x4];
+	u8         ob_reg[0x8];
+	u8         ob_bias[0x8];
+
+	u8         reserved_5[0x20];
+};
+
+struct mlx5_ifc_slrg_reg_bits {
+	u8         status[0x4];
+	u8         version[0x4];
+	u8         local_port[0x8];
+	u8         pnat[0x2];
+	u8         reserved_0[0x2];
+	u8         lane[0x4];
+	u8         reserved_1[0x8];
+
+	u8         time_to_link_up[0x10];
+	u8         reserved_2[0xc];
+	u8         grade_lane_speed[0x4];
+
+	u8         grade_version[0x8];
+	u8         grade[0x18];
+
+	u8         reserved_3[0x4];
+	u8         height_grade_type[0x4];
+	u8         height_grade[0x18];
+
+	u8         height_dz[0x10];
+	u8         height_dv[0x10];
+
+	u8         reserved_4[0x10];
+	u8         height_sigma[0x10];
+
+	u8         reserved_5[0x20];
+
+	u8         reserved_6[0x4];
+	u8         phase_grade_type[0x4];
+	u8         phase_grade[0x18];
+
+	u8         reserved_7[0x8];
+	u8         phase_eo_pos[0x8];
+	u8         reserved_8[0x8];
+	u8         phase_eo_neg[0x8];
+
+	u8         ffe_set_tested[0x10];
+	u8         test_errors_per_lane[0x10];
+};
+
+struct mlx5_ifc_pvlc_reg_bits {
+	u8         reserved_0[0x8];
+	u8         local_port[0x8];
+	u8         reserved_1[0x10];
+
+	u8         reserved_2[0x1c];
+	u8         vl_hw_cap[0x4];
+
+	u8         reserved_3[0x1c];
+	u8         vl_admin[0x4];
+
+	u8         reserved_4[0x1c];
+	u8         vl_operational[0x4];
+};
+
+struct mlx5_ifc_pude_reg_bits {
+	u8         swid[0x8];
+	u8         local_port[0x8];
+	u8         reserved_0[0x4];
+	u8         admin_status[0x4];
+	u8         reserved_1[0x4];
+	u8         oper_status[0x4];
+
+	u8         reserved_2[0x60];
+};
+
+struct mlx5_ifc_ptys_reg_bits {
+	u8         reserved_0[0x8];
+	u8         local_port[0x8];
+	u8         reserved_1[0xd];
+	u8         proto_mask[0x3];
+
+	u8         reserved_2[0x40];
+
+	u8         eth_proto_capability[0x20];
+
+	u8         ib_link_width_capability[0x10];
+	u8         ib_proto_capability[0x10];
+
+	u8         reserved_3[0x20];
+
+	u8         eth_proto_admin[0x20];
+
+	u8         ib_link_width_admin[0x10];
+	u8         ib_proto_admin[0x10];
+
+	u8         reserved_4[0x20];
+
+	u8         eth_proto_oper[0x20];
+
+	u8         ib_link_width_oper[0x10];
+	u8         ib_proto_oper[0x10];
+
+	u8         reserved_5[0x20];
+
+	u8         eth_proto_lp_advertise[0x20];
+
+	u8         reserved_6[0x60];
+};
+
+struct mlx5_ifc_ptas_reg_bits {
+	u8         reserved_0[0x20];
+
+	u8         algorithm_options[0x10];
+	u8         reserved_1[0x4];
+	u8         repetitions_mode[0x4];
+	u8         num_of_repetitions[0x8];
+
+	u8         grade_version[0x8];
+	u8         height_grade_type[0x4];
+	u8         phase_grade_type[0x4];
+	u8         height_grade_weight[0x8];
+	u8         phase_grade_weight[0x8];
+
+	u8         gisim_measure_bits[0x10];
+	u8         adaptive_tap_measure_bits[0x10];
+
+	u8         ber_bath_high_error_threshold[0x10];
+	u8         ber_bath_mid_error_threshold[0x10];
+
+	u8         ber_bath_low_error_threshold[0x10];
+	u8         one_ratio_high_threshold[0x10];
+
+	u8         one_ratio_high_mid_threshold[0x10];
+	u8         one_ratio_low_mid_threshold[0x10];
+
+	u8         one_ratio_low_threshold[0x10];
+	u8         ndeo_error_threshold[0x10];
+
+	u8         mixer_offset_step_size[0x10];
+	u8         reserved_2[0x8];
+	u8         mix90_phase_for_voltage_bath[0x8];
+
+	u8         mixer_offset_start[0x10];
+	u8         mixer_offset_end[0x10];
+
+	u8         reserved_3[0x15];
+	u8         ber_test_time[0xb];
+};
+
+struct mlx5_ifc_pspa_reg_bits {
+	u8         swid[0x8];
+	u8         local_port[0x8];
+	u8         sub_port[0x8];
+	u8         reserved_0[0x8];
+
+	u8         reserved_1[0x20];
+};
+
+struct mlx5_ifc_pqdr_reg_bits {
+	u8         reserved_0[0x8];
+	u8         local_port[0x8];
+	u8         reserved_1[0x5];
+	u8         prio[0x3];
+	u8         reserved_2[0x6];
+	u8         mode[0x2];
+
+	u8         reserved_3[0x20];
+
+	u8         reserved_4[0x10];
+	u8         min_threshold[0x10];
+
+	u8         reserved_5[0x10];
+	u8         max_threshold[0x10];
+
+	u8         reserved_6[0x10];
+	u8         mark_probability_denominator[0x10];
+
+	u8         reserved_7[0x60];
+};
+
+struct mlx5_ifc_ppsc_reg_bits {
+	u8         reserved_0[0x8];
+	u8         local_port[0x8];
+	u8         reserved_1[0x10];
+
+	u8         reserved_2[0x60];
+
+	u8         reserved_3[0x1c];
+	u8         wrps_admin[0x4];
+
+	u8         reserved_4[0x1c];
+	u8         wrps_status[0x4];
+
+	u8         reserved_5[0x8];
+	u8         up_threshold[0x8];
+	u8         reserved_6[0x8];
+	u8         down_threshold[0x8];
+
+	u8         reserved_7[0x20];
+
+	u8         reserved_8[0x1c];
+	u8         srps_admin[0x4];
+
+	u8         reserved_9[0x1c];
+	u8         srps_status[0x4];
+
+	u8         reserved_10[0x40];
+};
+
+struct mlx5_ifc_pplr_reg_bits {
+	u8         reserved_0[0x8];
+	u8         local_port[0x8];
+	u8         reserved_1[0x10];
+
+	u8         reserved_2[0x8];
+	u8         lb_cap[0x8];
+	u8         reserved_3[0x8];
+	u8         lb_en[0x8];
+};
+
+struct mlx5_ifc_pplm_reg_bits {
+	u8         reserved_0[0x8];
+	u8         local_port[0x8];
+	u8         reserved_1[0x10];
+
+	u8         reserved_2[0x20];
+
+	u8         port_profile_mode[0x8];
+	u8         static_port_profile[0x8];
+	u8         active_port_profile[0x8];
+	u8         reserved_3[0x8];
+
+	u8         retransmission_active[0x8];
+	u8         fec_mode_active[0x18];
+
+	u8         reserved_4[0x20];
+};
+
+struct mlx5_ifc_ppcnt_reg_bits {
+	u8         swid[0x8];
+	u8         local_port[0x8];
+	u8         pnat[0x2];
+	u8         reserved_0[0x8];
+	u8         grp[0x6];
+
+	u8         clr[0x1];
+	u8         reserved_1[0x1c];
+	u8         prio_tc[0x3];
+
+	union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set;
+};
+
+struct mlx5_ifc_ppad_reg_bits {
+	u8         reserved_0[0x3];
+	u8         single_mac[0x1];
+	u8         reserved_1[0x4];
+	u8         local_port[0x8];
+	u8         mac_47_32[0x10];
+
+	u8         mac_31_0[0x20];
+
+	u8         reserved_2[0x40];
+};
+
+struct mlx5_ifc_pmtu_reg_bits {
+	u8         reserved_0[0x8];
+	u8         local_port[0x8];
+	u8         reserved_1[0x10];
+
+	u8         max_mtu[0x10];
+	u8         reserved_2[0x10];
+
+	u8         admin_mtu[0x10];
+	u8         reserved_3[0x10];
+
+	u8         oper_mtu[0x10];
+	u8         reserved_4[0x10];
+};
+
+struct mlx5_ifc_pmpr_reg_bits {
+	u8         reserved_0[0x8];
+	u8         module[0x8];
+	u8         reserved_1[0x10];
+
+	u8         reserved_2[0x18];
+	u8         attenuation_5g[0x8];
+
+	u8         reserved_3[0x18];
+	u8         attenuation_7g[0x8];
+
+	u8         reserved_4[0x18];
+	u8         attenuation_12g[0x8];
+};
+
+struct mlx5_ifc_pmpe_reg_bits {
+	u8         reserved_0[0x8];
+	u8         module[0x8];
+	u8         reserved_1[0xc];
+	u8         module_status[0x4];
+
+	u8         reserved_2[0x60];
+};
+
+struct mlx5_ifc_pmpc_reg_bits {
+	u8         module_state_updated[32][0x8];
+};
+
+struct mlx5_ifc_pmlpn_reg_bits {
+	u8         reserved_0[0x4];
+	u8         mlpn_status[0x4];
+	u8         local_port[0x8];
+	u8         reserved_1[0x10];
+
+	u8         e[0x1];
+	u8         reserved_2[0x1f];
+};
+
+struct mlx5_ifc_pmlp_reg_bits {
+	u8         rxtx[0x1];
+	u8         reserved_0[0x7];
+	u8         local_port[0x8];
+	u8         reserved_1[0x8];
+	u8         width[0x8];
+
+	u8         lane0_module_mapping[0x20];
+
+	u8         lane1_module_mapping[0x20];
+
+	u8         lane2_module_mapping[0x20];
+
+	u8         lane3_module_mapping[0x20];
+
+	u8         reserved_2[0x160];
+};
+
+struct mlx5_ifc_pmaos_reg_bits {
+	u8         reserved_0[0x8];
+	u8         module[0x8];
+	u8         reserved_1[0x4];
+	u8         admin_status[0x4];
+	u8         reserved_2[0x4];
+	u8         oper_status[0x4];
+
+	u8         ase[0x1];
+	u8         ee[0x1];
+	u8         reserved_3[0x1c];
+	u8         e[0x2];
+
+	u8         reserved_4[0x40];
+};
+
+struct mlx5_ifc_plpc_reg_bits {
+	u8         reserved_0[0x4];
+	u8         profile_id[0xc];
+	u8         reserved_1[0x4];
+	u8         proto_mask[0x4];
+	u8         reserved_2[0x8];
+
+	u8         reserved_3[0x10];
+	u8         lane_speed[0x10];
+
+	u8         reserved_4[0x17];
+	u8         lpbf[0x1];
+	u8         fec_mode_policy[0x8];
+
+	u8         retransmission_capability[0x8];
+	u8         fec_mode_capability[0x18];
+
+	u8         retransmission_support_admin[0x8];
+	u8         fec_mode_support_admin[0x18];
+
+	u8         retransmission_request_admin[0x8];
+	u8         fec_mode_request_admin[0x18];
+
+	u8         reserved_5[0x80];
+};
+
+struct mlx5_ifc_plib_reg_bits {
+	u8         reserved_0[0x8];
+	u8         local_port[0x8];
+	u8         reserved_1[0x8];
+	u8         ib_port[0x8];
+
+	u8         reserved_2[0x60];
+};
+
+struct mlx5_ifc_plbf_reg_bits {
+	u8         reserved_0[0x8];
+	u8         local_port[0x8];
+	u8         reserved_1[0xd];
+	u8         lbf_mode[0x3];
+
+	u8         reserved_2[0x20];
+};
+
+struct mlx5_ifc_pipg_reg_bits {
+	u8         reserved_0[0x8];
+	u8         local_port[0x8];
+	u8         reserved_1[0x10];
+
+	u8         dic[0x1];
+	u8         reserved_2[0x19];
+	u8         ipg[0x4];
+	u8         reserved_3[0x2];
+};
+
+struct mlx5_ifc_pifr_reg_bits {
+	u8         reserved_0[0x8];
+	u8         local_port[0x8];
+	u8         reserved_1[0x10];
+
+	u8         reserved_2[0xe0];
+
+	u8         port_filter[8][0x20];
+
+	u8         port_filter_update_en[8][0x20];
+};
+
+struct mlx5_ifc_pfcc_reg_bits {
+	u8         reserved_0[0x8];
+	u8         local_port[0x8];
+	u8         reserved_1[0x10];
+
+	u8         ppan[0x4];
+	u8         reserved_2[0x4];
+	u8         prio_mask_tx[0x8];
+	u8         reserved_3[0x8];
+	u8         prio_mask_rx[0x8];
+
+	u8         pptx[0x1];
+	u8         aptx[0x1];
+	u8         reserved_4[0x6];
+	u8         pfctx[0x8];
+	u8         reserved_5[0x10];
+
+	u8         pprx[0x1];
+	u8         aprx[0x1];
+	u8         reserved_6[0x6];
+	u8         pfcrx[0x8];
+	u8         reserved_7[0x10];
+
+	u8         reserved_8[0x80];
+};
+
+struct mlx5_ifc_pelc_reg_bits {
+	u8         op[0x4];
+	u8         reserved_0[0x4];
+	u8         local_port[0x8];
+	u8         reserved_1[0x10];
+
+	u8         op_admin[0x8];
+	u8         op_capability[0x8];
+	u8         op_request[0x8];
+	u8         op_active[0x8];
+
+	u8         admin[0x40];
+
+	u8         capability[0x40];
+
+	u8         request[0x40];
+
+	u8         active[0x40];
+
+	u8         reserved_2[0x80];
+};
+
+struct mlx5_ifc_peir_reg_bits {
+	u8         reserved_0[0x8];
+	u8         local_port[0x8];
+	u8         reserved_1[0x10];
+
+	u8         reserved_2[0xc];
+	u8         error_count[0x4];
+	u8         reserved_3[0x10];
+
+	u8         reserved_4[0xc];
+	u8         lane[0x4];
+	u8         reserved_5[0x8];
+	u8         error_type[0x8];
+};
+
+struct mlx5_ifc_pcap_reg_bits {
+	u8         reserved_0[0x8];
+	u8         local_port[0x8];
+	u8         reserved_1[0x10];
+
+	u8         port_capability_mask[4][0x20];
+};
+
+struct mlx5_ifc_paos_reg_bits {
+	u8         swid[0x8];
+	u8         local_port[0x8];
+	u8         reserved_0[0x4];
+	u8         admin_status[0x4];
+	u8         reserved_1[0x4];
+	u8         oper_status[0x4];
+
+	u8         ase[0x1];
+	u8         ee[0x1];
+	u8         reserved_2[0x1c];
+	u8         e[0x2];
+
+	u8         reserved_3[0x40];
+};
+
+struct mlx5_ifc_pamp_reg_bits {
+	u8         reserved_0[0x8];
+	u8         opamp_group[0x8];
+	u8         reserved_1[0xc];
+	u8         opamp_group_type[0x4];
+
+	u8         start_index[0x10];
+	u8         reserved_2[0x4];
+	u8         num_of_indices[0xc];
+
+	u8         index_data[18][0x10];
+};
+
+struct mlx5_ifc_lane_2_module_mapping_bits {
+	u8         reserved_0[0x6];
+	u8         rx_lane[0x2];
+	u8         reserved_1[0x6];
+	u8         tx_lane[0x2];
+	u8         reserved_2[0x8];
+	u8         module[0x8];
+};
+
+struct mlx5_ifc_bufferx_reg_bits {
+	u8         reserved_0[0x6];
+	u8         lossy[0x1];
+	u8         epsb[0x1];
+	u8         reserved_1[0xc];
+	u8         size[0xc];
+
+	u8         xoff_threshold[0x10];
+	u8         xon_threshold[0x10];
+};
+
+struct mlx5_ifc_set_node_in_bits {
+	u8         node_description[64][0x8];
+};
+
+struct mlx5_ifc_register_power_settings_bits {
+	u8         reserved_0[0x18];
+	u8         power_settings_level[0x8];
+
+	u8         reserved_1[0x60];
+};
+
+struct mlx5_ifc_register_host_endianness_bits {
+	u8         he[0x1];
+	u8         reserved_0[0x1f];
+
+	u8         reserved_1[0x60];
+};
+
+struct mlx5_ifc_umr_pointer_desc_argument_bits {
+	u8         reserved_0[0x20];
+
+	u8         mkey[0x20];
+
+	u8         addressh_63_32[0x20];
+
+	u8         addressl_31_0[0x20];
+};
+
+struct mlx5_ifc_ud_adrs_vector_bits {
+	u8         dc_key[0x40];
+
+	u8         ext[0x1];
+	u8         reserved_0[0x7];
+	u8         destination_qp_dct[0x18];
+
+	u8         static_rate[0x4];
+	u8         sl_eth_prio[0x4];
+	u8         fl[0x1];
+	u8         mlid[0x7];
+	u8         rlid_udp_sport[0x10];
+
+	u8         reserved_1[0x20];
+
+	u8         rmac_47_16[0x20];
+
+	u8         rmac_15_0[0x10];
+	u8         tclass[0x8];
+	u8         hop_limit[0x8];
+
+	u8         reserved_2[0x1];
+	u8         grh[0x1];
+	u8         reserved_3[0x2];
+	u8         src_addr_index[0x8];
+	u8         flow_label[0x14];
+
+	u8         rgid_rip[16][0x8];
+};
+
+struct mlx5_ifc_pages_req_event_bits {
+	u8         reserved_0[0x10];
+	u8         function_id[0x10];
+
+	u8         num_pages[0x20];
+
+	u8         reserved_1[0xa0];
+};
+
+struct mlx5_ifc_eqe_bits {
+	u8         reserved_0[0x8];
+	u8         event_type[0x8];
+	u8         reserved_1[0x8];
+	u8         event_sub_type[0x8];
+
+	u8         reserved_2[0xe0];
+
+	union mlx5_ifc_event_auto_bits event_data;
+
+	u8         reserved_3[0x10];
+	u8         signature[0x8];
+	u8         reserved_4[0x7];
+	u8         owner[0x1];
+};
+
+enum {
+	MLX5_CMD_QUEUE_ENTRY_TYPE_PCIE_CMD_IF_TRANSPORT  = 0x7,
+};
+
+struct mlx5_ifc_cmd_queue_entry_bits {
+	u8         type[0x8];
+	u8         reserved_0[0x18];
+
+	u8         input_length[0x20];
+
+	u8         input_mailbox_pointer_63_32[0x20];
+
+	u8         input_mailbox_pointer_31_9[0x17];
+	u8         reserved_1[0x9];
+
+	u8         command_input_inline_data[16][0x8];
+
+	u8         command_output_inline_data[16][0x8];
+
+	u8         output_mailbox_pointer_63_32[0x20];
+
+	u8         output_mailbox_pointer_31_9[0x17];
+	u8         reserved_2[0x9];
+
+	u8         output_length[0x20];
+
+	u8         token[0x8];
+	u8         signature[0x8];
+	u8         reserved_3[0x8];
+	u8         status[0x7];
+	u8         ownership[0x1];
+};
+
+struct mlx5_ifc_cmd_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         command_output[0x20];
+};
+
+struct mlx5_ifc_cmd_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         command[0][0x20];
+};
+
+struct mlx5_ifc_cmd_if_box_bits {
+	u8         mailbox_data[512][0x8];
+
+	u8         reserved_0[0x180];
+
+	u8         next_pointer_63_32[0x20];
+
+	u8         next_pointer_31_10[0x16];
+	u8         reserved_1[0xa];
+
+	u8         block_number[0x20];
+
+	u8         reserved_2[0x8];
+	u8         token[0x8];
+	u8         ctrl_signature[0x8];
+	u8         signature[0x8];
+};
+
+struct mlx5_ifc_mtt_bits {
+	u8         ptag_63_32[0x20];
+
+	u8         ptag_31_8[0x18];
+	u8         reserved_0[0x6];
+	u8         wr_en[0x1];
+	u8         rd_en[0x1];
+};
+
+enum {
+	MLX5_INITIAL_SEG_NIC_INTERFACE_FULL_DRIVER  = 0x0,
+	MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED     = 0x1,
+	MLX5_INITIAL_SEG_NIC_INTERFACE_NO_DRAM_NIC  = 0x2,
+};
+
+enum {
+	MLX5_INITIAL_SEG_NIC_INTERFACE_SUPPORTED_FULL_DRIVER  = 0x0,
+	MLX5_INITIAL_SEG_NIC_INTERFACE_SUPPORTED_DISABLED     = 0x1,
+	MLX5_INITIAL_SEG_NIC_INTERFACE_SUPPORTED_NO_DRAM_NIC  = 0x2,
+};
+
+enum {
+	MLX5_INITIAL_SEG_HEALTH_SYNDROME_FW_INTERNAL_ERR              = 0x1,
+	MLX5_INITIAL_SEG_HEALTH_SYNDROME_DEAD_IRISC                   = 0x7,
+	MLX5_INITIAL_SEG_HEALTH_SYNDROME_HW_FATAL_ERR                 = 0x8,
+	MLX5_INITIAL_SEG_HEALTH_SYNDROME_FW_CRC_ERR                   = 0x9,
+	MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_FETCH_PCI_ERR            = 0xa,
+	MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PAGE_ERR                 = 0xb,
+	MLX5_INITIAL_SEG_HEALTH_SYNDROME_ASYNCHRONOUS_EQ_BUF_OVERRUN  = 0xc,
+	MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_IN_ERR                    = 0xd,
+	MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_INV                       = 0xe,
+	MLX5_INITIAL_SEG_HEALTH_SYNDROME_FFSER_ERR                    = 0xf,
+	MLX5_INITIAL_SEG_HEALTH_SYNDROME_HIGH_TEMP_ERR                = 0x10,
+};
+
+struct mlx5_ifc_initial_seg_bits {
+	u8         fw_rev_minor[0x10];
+	u8         fw_rev_major[0x10];
+
+	u8         cmd_interface_rev[0x10];
+	u8         fw_rev_subminor[0x10];
+
+	u8         reserved_0[0x40];
+
+	u8         cmdq_phy_addr_63_32[0x20];
+
+	u8         cmdq_phy_addr_31_12[0x14];
+	u8         reserved_1[0x2];
+	u8         nic_interface[0x2];
+	u8         log_cmdq_size[0x4];
+	u8         log_cmdq_stride[0x4];
+
+	u8         command_doorbell_vector[0x20];
+
+	u8         reserved_2[0xf00];
+
+	u8         initializing[0x1];
+	u8         reserved_3[0x4];
+	u8         nic_interface_supported[0x3];
+	u8         reserved_4[0x18];
+
+	struct mlx5_ifc_health_buffer_bits health_buffer;
+
+	u8         no_dram_nic_offset[0x20];
+
+	u8         reserved_5[0x6e40];
+
+	u8         reserved_6[0x1f];
+	u8         clear_int[0x1];
+
+	u8         health_syndrome[0x8];
+	u8         health_counter[0x18];
+
+	u8         reserved_7[0x17fc0];
+};
+
+union mlx5_ifc_ports_control_registers_document_bits {
+	struct mlx5_ifc_bufferx_reg_bits bufferx_reg;
+	struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout;
+	struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits eth_2863_cntrs_grp_data_layout;
+	struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits eth_3635_cntrs_grp_data_layout;
+	struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits eth_802_3_cntrs_grp_data_layout;
+	struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits eth_extended_cntrs_grp_data_layout;
+	struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout;
+	struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout;
+	struct mlx5_ifc_lane_2_module_mapping_bits lane_2_module_mapping;
+	struct mlx5_ifc_pamp_reg_bits pamp_reg;
+	struct mlx5_ifc_paos_reg_bits paos_reg;
+	struct mlx5_ifc_pcap_reg_bits pcap_reg;
+	struct mlx5_ifc_peir_reg_bits peir_reg;
+	struct mlx5_ifc_pelc_reg_bits pelc_reg;
+	struct mlx5_ifc_pfcc_reg_bits pfcc_reg;
+	struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs;
+	struct mlx5_ifc_pifr_reg_bits pifr_reg;
+	struct mlx5_ifc_pipg_reg_bits pipg_reg;
+	struct mlx5_ifc_plbf_reg_bits plbf_reg;
+	struct mlx5_ifc_plib_reg_bits plib_reg;
+	struct mlx5_ifc_plpc_reg_bits plpc_reg;
+	struct mlx5_ifc_pmaos_reg_bits pmaos_reg;
+	struct mlx5_ifc_pmlp_reg_bits pmlp_reg;
+	struct mlx5_ifc_pmlpn_reg_bits pmlpn_reg;
+	struct mlx5_ifc_pmpc_reg_bits pmpc_reg;
+	struct mlx5_ifc_pmpe_reg_bits pmpe_reg;
+	struct mlx5_ifc_pmpr_reg_bits pmpr_reg;
+	struct mlx5_ifc_pmtu_reg_bits pmtu_reg;
+	struct mlx5_ifc_ppad_reg_bits ppad_reg;
+	struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg;
+	struct mlx5_ifc_pplm_reg_bits pplm_reg;
+	struct mlx5_ifc_pplr_reg_bits pplr_reg;
+	struct mlx5_ifc_ppsc_reg_bits ppsc_reg;
+	struct mlx5_ifc_pqdr_reg_bits pqdr_reg;
+	struct mlx5_ifc_pspa_reg_bits pspa_reg;
+	struct mlx5_ifc_ptas_reg_bits ptas_reg;
+	struct mlx5_ifc_ptys_reg_bits ptys_reg;
+	struct mlx5_ifc_pude_reg_bits pude_reg;
+	struct mlx5_ifc_pvlc_reg_bits pvlc_reg;
+	struct mlx5_ifc_slrg_reg_bits slrg_reg;
+	struct mlx5_ifc_sltp_reg_bits sltp_reg;
+	u8         reserved_0[0x60e0];
+};
+
+union mlx5_ifc_debug_enhancements_document_bits {
+	struct mlx5_ifc_health_buffer_bits health_buffer;
+	u8         reserved_0[0x200];
+};
+
+union mlx5_ifc_uplink_pci_interface_document_bits {
+	struct mlx5_ifc_initial_seg_bits initial_seg;
+	u8         reserved_0[0x20060];
 };
 
 #endif /* MLX5_IFC_H */
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 310b5f7fd6ae..f079fb1a31f7 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -134,13 +134,21 @@ enum {
 
 enum {
 	MLX5_WQE_CTRL_CQ_UPDATE		= 2 << 2,
+	MLX5_WQE_CTRL_CQ_UPDATE_AND_EQE	= 3 << 2,
 	MLX5_WQE_CTRL_SOLICITED		= 1 << 1,
 };
 
 enum {
+	MLX5_SEND_WQE_DS	= 16,
 	MLX5_SEND_WQE_BB	= 64,
 };
 
+#define MLX5_SEND_WQEBB_NUM_DS	(MLX5_SEND_WQE_BB / MLX5_SEND_WQE_DS)
+
+enum {
+	MLX5_SEND_WQE_MAX_WQEBBS	= 16,
+};
+
 enum {
 	MLX5_WQE_FMR_PERM_LOCAL_READ	= 1 << 27,
 	MLX5_WQE_FMR_PERM_LOCAL_WRITE	= 1 << 28,
@@ -200,6 +208,23 @@ struct mlx5_wqe_ctrl_seg {
 #define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00
 #define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8
 
+enum {
+	MLX5_ETH_WQE_L3_INNER_CSUM      = 1 << 4,
+	MLX5_ETH_WQE_L4_INNER_CSUM      = 1 << 5,
+	MLX5_ETH_WQE_L3_CSUM            = 1 << 6,
+	MLX5_ETH_WQE_L4_CSUM            = 1 << 7,
+};
+
+struct mlx5_wqe_eth_seg {
+	u8              rsvd0[4];
+	u8              cs_flags;
+	u8              rsvd1;
+	__be16          mss;
+	__be32          rsvd2;
+	__be16          inline_hdr_sz;
+	u8              inline_hdr_start[2];
+};
+
 struct mlx5_wqe_xrc_seg {
 	__be32			xrc_srqn;
 	u8			rsvd[12];
-- 
cgit v1.2.3


From 938fe83c8dcbbf294d167e6163200a8540ae43c4 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 28 May 2015 22:28:41 +0300
Subject: net/mlx5_core: New device capabilities handling

- Query all supported types of dev caps on driver load.
- Store the Cap data outbox per cap type into driver private data.
- Introduce new Macros to access/dump stored caps (using the auto
  generated data types).
- Obsolete SW representation of dev caps (no need for SW copy for each
  cap).
- Modify IB driver to use new macros for checking caps.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/device.h | 66 ++++++++++++++++++++++++++++++++++++++++-----
 include/linux/mlx5/driver.h | 58 +++++----------------------------------
 2 files changed, 65 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index feebed7b392b..4ee52bf1f959 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -59,6 +59,8 @@
 #define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8)
 #define MLX5_ST_SZ_BYTES(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8)
 #define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32)
+#define MLX5_UN_SZ_BYTES(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 8)
+#define MLX5_UN_SZ_DW(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 32)
 #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8)
 #define MLX5_ADDR_OF(typ, p, fld) ((char *)(p) + MLX5_BYTE_OFF(typ, fld))
 
@@ -322,13 +324,6 @@ enum {
 	MLX5_CAP_OFF_CMDIF_CSUM		= 46,
 };
 
-enum {
-	HCA_CAP_OPMOD_GET_MAX	= 0,
-	HCA_CAP_OPMOD_GET_CUR	= 1,
-	HCA_CAP_OPMOD_GET_ODP_MAX = 4,
-	HCA_CAP_OPMOD_GET_ODP_CUR = 5
-};
-
 struct mlx5_inbox_hdr {
 	__be16		opcode;
 	u8		rsvd[4];
@@ -1101,4 +1096,61 @@ enum {
 	MLX5_RQC_RQ_TYPE_MEMORY_RQ_RPM    = 0x1,
 };
 
+/* MLX5 DEV CAPs */
+
+/* TODO: EAT.ME */
+enum mlx5_cap_mode {
+	HCA_CAP_OPMOD_GET_MAX	= 0,
+	HCA_CAP_OPMOD_GET_CUR	= 1,
+};
+
+enum mlx5_cap_type {
+	MLX5_CAP_GENERAL = 0,
+	MLX5_CAP_ETHERNET_OFFLOADS,
+	MLX5_CAP_ODP,
+	MLX5_CAP_ATOMIC,
+	MLX5_CAP_ROCE,
+	MLX5_CAP_IPOIB_OFFLOADS,
+	MLX5_CAP_EOIB_OFFLOADS,
+	MLX5_CAP_FLOW_TABLE,
+	/* NUM OF CAP Types */
+	MLX5_CAP_NUM
+};
+
+/* GET Dev Caps macros */
+#define MLX5_CAP_GEN(mdev, cap) \
+	MLX5_GET(cmd_hca_cap, mdev->hca_caps_cur[MLX5_CAP_GENERAL], cap)
+
+#define MLX5_CAP_GEN_MAX(mdev, cap) \
+	MLX5_GET(cmd_hca_cap, mdev->hca_caps_max[MLX5_CAP_GENERAL], cap)
+
+#define MLX5_CAP_ETH(mdev, cap) \
+	MLX5_GET(per_protocol_networking_offload_caps,\
+		 mdev->hca_caps_cur[MLX5_CAP_ETHERNET_OFFLOADS], cap)
+
+#define MLX5_CAP_ETH_MAX(mdev, cap) \
+	MLX5_GET(per_protocol_networking_offload_caps,\
+		 mdev->hca_caps_max[MLX5_CAP_ETHERNET_OFFLOADS], cap)
+
+#define MLX5_CAP_ROCE(mdev, cap) \
+	MLX5_GET(roce_cap, mdev->hca_caps_cur[MLX5_CAP_ROCE], cap)
+
+#define MLX5_CAP_ROCE_MAX(mdev, cap) \
+	MLX5_GET(roce_cap, mdev->hca_caps_max[MLX5_CAP_ROCE], cap)
+
+#define MLX5_CAP_ATOMIC(mdev, cap) \
+	MLX5_GET(atomic_caps, mdev->hca_caps_cur[MLX5_CAP_ATOMIC], cap)
+
+#define MLX5_CAP_ATOMIC_MAX(mdev, cap) \
+	MLX5_GET(atomic_caps, mdev->hca_caps_max[MLX5_CAP_ATOMIC], cap)
+
+#define MLX5_CAP_FLOWTABLE(mdev, cap) \
+	MLX5_GET(flow_table_nic_cap, mdev->hca_caps_cur[MLX5_CAP_FLOW_TABLE], cap)
+
+#define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \
+	MLX5_GET(flow_table_nic_cap, mdev->hca_caps_max[MLX5_CAP_FLOW_TABLE], cap)
+
+#define MLX5_CAP_ODP(mdev, cap)\
+	MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap)
+
 #endif /* MLX5_DEVICE_H */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 3fd4fdc1ba16..6b9199163633 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -268,55 +268,7 @@ struct mlx5_cmd {
 struct mlx5_port_caps {
 	int	gid_table_len;
 	int	pkey_table_len;
-};
-
-struct mlx5_general_caps {
-	u8	log_max_eq;
-	u8	log_max_cq;
-	u8	log_max_qp;
-	u8	log_max_mkey;
-	u8	log_max_pd;
-	u8	log_max_srq;
-	u8	log_max_mrw_sz;
-	u8	log_max_bsf_list_size;
-	u8	log_max_klm_list_size;
-	u32	max_cqes;
-	int	max_wqes;
-	u32	max_eqes;
-	u32	max_indirection;
-	int	max_sq_desc_sz;
-	int	max_rq_desc_sz;
-	int	max_dc_sq_desc_sz;
-	u64	flags;
-	u16	stat_rate_support;
-	int	log_max_msg;
-	int	num_ports;
-	u8	log_max_ra_res_qp;
-	u8	log_max_ra_req_qp;
-	int	max_srq_wqes;
-	int	bf_reg_size;
-	int	bf_regs_per_page;
-	struct mlx5_port_caps	port[MLX5_MAX_PORTS];
-	u8			ext_port_cap[MLX5_MAX_PORTS];
-	int	max_vf;
-	u32	reserved_lkey;
-	u8	local_ca_ack_delay;
-	u8	log_max_mcg;
-	u32	max_qp_mcg;
-	int	min_page_sz;
-	int	pd_cap;
-	u32	max_qp_counters;
-	u32	pkey_table_size;
-	u8	log_max_ra_req_dc;
-	u8	log_max_ra_res_dc;
-	u32	uar_sz;
-	u8	min_log_pg_sz;
-	u8	log_max_xrcd;
-	u16	log_uar_page_sz;
-};
-
-struct mlx5_caps {
-	struct mlx5_general_caps gen;
+	u8	ext_port_cap;
 };
 
 struct mlx5_cmd_mailbox {
@@ -521,7 +473,9 @@ struct mlx5_core_dev {
 	u8			rev_id;
 	char			board_id[MLX5_BOARD_ID_LEN];
 	struct mlx5_cmd		cmd;
-	struct mlx5_caps	caps;
+	struct mlx5_port_caps	port_caps[MLX5_MAX_PORTS];
+	u32 hca_caps_cur[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)];
+	u32 hca_caps_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)];
 	phys_addr_t		iseg_base;
 	struct mlx5_init_seg __iomem *iseg;
 	void			(*event) (struct mlx5_core_dev *dev,
@@ -651,8 +605,8 @@ void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
 int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
 int mlx5_cmd_status_to_err_v2(void *ptr);
-int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps,
-		       u16 opmod);
+int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type,
+		       enum mlx5_cap_mode cap_mode);
 int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 		  int out_size);
 int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
-- 
cgit v1.2.3


From adb0c9545bce6f1b1d563e988e6ee5531861d449 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 28 May 2015 22:28:42 +0300
Subject: net/mlx5_core: Implement access functions of ptys register fields

Those registers will be used by the ethtool to set/get settings.

Signed-off-by: Rana Shahout <ranas@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/driver.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 6b9199163633..266d5498a270 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -504,6 +504,11 @@ enum {
 	MLX5_COMP_EQ_SIZE = 1024,
 };
 
+enum {
+	MLX5_PTYS_IB = 1 << 0,
+	MLX5_PTYS_EN = 1 << 2,
+};
+
 struct mlx5_db_pgdir {
 	struct list_head	list;
 	DECLARE_BITMAP(bitmap, MLX5_DB_PER_PAGE);
@@ -686,7 +691,16 @@ void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev);
 int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
 			 int size_in, void *data_out, int size_out,
 			 u16 reg_num, int arg, int write);
+
 int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
+int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
+			 int ptys_size, int proto_mask);
+int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev,
+			      u32 *proto_cap, int proto_mask);
+int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev,
+				u32 *proto_admin, int proto_mask);
+int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin,
+			int proto_mask);
 
 int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-- 
cgit v1.2.3


From 4c916a798058c1acf5a980438416020932c24aca Mon Sep 17 00:00:00 2001
From: Rana Shahout <ranas@mellanox.com>
Date: Thu, 28 May 2015 22:28:43 +0300
Subject: net/mlx5_core: Implement get/set port status

Implemet get/set port status low level functions to be exposed by the
netdev.

Signed-off-by: Rana Shahout <ranas@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/driver.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 266d5498a270..6438444ab361 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -149,6 +149,11 @@ enum mlx5_dev_event {
 	MLX5_DEV_EVENT_CLIENT_REREG,
 };
 
+enum mlx5_port_status {
+	MLX5_PORT_UP        = 1 << 1,
+	MLX5_PORT_DOWN      = 1 << 2,
+};
+
 struct mlx5_uuar_info {
 	struct mlx5_uar	       *uars;
 	int			num_uars;
@@ -701,6 +706,9 @@ int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev,
 				u32 *proto_admin, int proto_mask);
 int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin,
 			int proto_mask);
+int mlx5_set_port_status(struct mlx5_core_dev *dev,
+			 enum mlx5_port_status status);
+int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status);
 
 int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-- 
cgit v1.2.3


From 90b3e38d048f09b22fb50bcd460cea65fd00b2d7 Mon Sep 17 00:00:00 2001
From: Rana Shahout <ranas@mellanox.com>
Date: Thu, 28 May 2015 22:28:44 +0300
Subject: net/mlx5_core: Modify CQ moderation parameters

Introduce mlx5_core_modify_cq_moderation() to be used by the netdev, to
set hardware coalescing.

Signed-off-by: Rana Shahout <ranas@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/cq.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 2695ced222df..abc4767695e4 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -169,6 +169,9 @@ int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 		       struct mlx5_query_cq_mbox_out *out);
 int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 			struct mlx5_modify_cq_mbox_in *in, int in_sz);
+int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev,
+				   struct mlx5_core_cq *cq, u16 cq_period,
+				   u16 cq_max_count);
 int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
 void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
 
-- 
cgit v1.2.3


From e725440e75da8c4d617a31c4e38216acc55c24e3 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 28 May 2015 22:28:45 +0300
Subject: net/mlx5_core: Set/Query port MTU commands

Introduce set/Query low level functions to access MTU in hardware. To be
used by the netdev.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/driver.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 6438444ab361..51738472657e 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -710,6 +710,10 @@ int mlx5_set_port_status(struct mlx5_core_dev *dev,
 			 enum mlx5_port_status status);
 int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status);
 
+int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu);
+int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu);
+int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu);
+
 int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
-- 
cgit v1.2.3


From afb736e9330ad6b2b6935d2f53ded784eb73f12d Mon Sep 17 00:00:00 2001
From: Amir Vadai <amirv@mellanox.com>
Date: Thu, 28 May 2015 22:28:47 +0300
Subject: net/mlx5: Ethernet resource handling files

This patch contains the resource handling files:
- flow_table.c: This file contains the code to handle the low level API
		to configure hardware flow table. It is separated from
		the flow_table_en.c, because it will be used in the
		future by Raw Ethernet QP in mlx5_ib too.
- en_flow_table.[ch]: Ethernet flow steering handling. The flow table
		object contain a mapping between flow specs and TIRs.
		This mechanism will be used also to configure e-switch
		in the future, when SR-IOV support will be added.
- transobj.[ch] - Low level functions to create/modify/destroy the
                  transport objects: RQ/SQ/TIR/TIS
- vport.[ch] - Handle attributes of a virtual port (vPort) in the
  embedded switch. Currently this switch is a passthrough, until SR-IOV
  support will be added.

Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/flow_table.h | 54 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 include/linux/mlx5/flow_table.h

(limited to 'include/linux')

diff --git a/include/linux/mlx5/flow_table.h b/include/linux/mlx5/flow_table.h
new file mode 100644
index 000000000000..5f922c6d4fc2
--- /dev/null
+++ b/include/linux/mlx5/flow_table.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_FLOW_TABLE_H
+#define MLX5_FLOW_TABLE_H
+
+#include <linux/mlx5/driver.h>
+
+struct mlx5_flow_table_group {
+	u8	log_sz;
+	u8	match_criteria_enable;
+	u32	match_criteria[MLX5_ST_SZ_DW(fte_match_param)];
+};
+
+void *mlx5_create_flow_table(struct mlx5_core_dev *dev, u8 level, u8 table_type,
+			     u16 num_groups,
+			     struct mlx5_flow_table_group *group);
+void mlx5_destroy_flow_table(void *flow_table);
+int mlx5_add_flow_table_entry(void *flow_table, u8 match_criteria_enable,
+			      void *match_criteria, void *flow_context,
+			      u32 *flow_index);
+void mlx5_del_flow_table_entry(void *flow_table, u32 flow_index);
+u32 mlx5_get_flow_table_id(void *flow_table);
+
+#endif /* MLX5_FLOW_TABLE_H */
-- 
cgit v1.2.3


From f62b8bb8f2d30582f30f51e85a8c0e1260125d7e Mon Sep 17 00:00:00 2001
From: Amir Vadai <amirv@mellanox.com>
Date: Thu, 28 May 2015 22:28:48 +0300
Subject: net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet
 functionality

This is the Ethernet part of the driver for the Mellanox ConnectX(R)-4
Single/Dual-Port Adapter supporting 100Gb/s with VPI.  The driver
extends the existing mlx5 driver with Ethernet functionality.

This patch contains the driver entry points but does not include
transmit and receive (see the previous patch in the series) routines.

It also adds the option MLX5_CORE_EN to Kconfig to enable/disable the
Ethernet functionality. Currently, Kconfig is programmed to make
Ethernet and Infiniband functionality mutally exclusive.
Also changed MLX5_INFINIBAND to be depandant on MLX5_CORE instead of
selecting it, since MLX5_CORE could be selected without MLX5_INFINIBAND
being selected.

Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/device.h | 19 +++++++++++++++++++
 include/linux/mlx5/driver.h |  1 +
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 4ee52bf1f959..b288c538347a 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1153,4 +1153,23 @@ enum mlx5_cap_type {
 #define MLX5_CAP_ODP(mdev, cap)\
 	MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap)
 
+enum {
+	MLX5_CMD_STAT_OK			= 0x0,
+	MLX5_CMD_STAT_INT_ERR			= 0x1,
+	MLX5_CMD_STAT_BAD_OP_ERR		= 0x2,
+	MLX5_CMD_STAT_BAD_PARAM_ERR		= 0x3,
+	MLX5_CMD_STAT_BAD_SYS_STATE_ERR		= 0x4,
+	MLX5_CMD_STAT_BAD_RES_ERR		= 0x5,
+	MLX5_CMD_STAT_RES_BUSY			= 0x6,
+	MLX5_CMD_STAT_LIM_ERR			= 0x8,
+	MLX5_CMD_STAT_BAD_RES_STATE_ERR		= 0x9,
+	MLX5_CMD_STAT_IX_ERR			= 0xa,
+	MLX5_CMD_STAT_NO_RES_ERR		= 0xf,
+	MLX5_CMD_STAT_BAD_INP_LEN_ERR		= 0x50,
+	MLX5_CMD_STAT_BAD_OUTP_LEN_ERR		= 0x51,
+	MLX5_CMD_STAT_BAD_QP_STATE_ERR		= 0x10,
+	MLX5_CMD_STAT_BAD_PKT_ERR		= 0x30,
+	MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR	= 0x40,
+};
+
 #endif /* MLX5_DEVICE_H */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 51738472657e..7fa26f03acc1 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -489,6 +489,7 @@ struct mlx5_core_dev {
 	struct mlx5_priv	priv;
 	struct mlx5_profile	*profile;
 	atomic_t		num_qps;
+	u32			issi;
 };
 
 struct mlx5_db {
-- 
cgit v1.2.3


From 07d783fd830a49008f3b2764ae7b6033ee1bf329 Mon Sep 17 00:00:00 2001
From: Peter Senna Tschudin <peter.senna@gmail.com>
Date: Tue, 19 May 2015 11:44:46 +0200
Subject: staging: goldfish: Fix pointer cast for 32 bits

As the first argument of gf_write64() was of type unsigned long, and as
some calls to gf_write64() were casting the first argument from void *
to u64 the compiler and/or sparse were printing warnings for casts of
wrong sizes when compiling for i386.

This patch changes the type of the first argument of gf_write64() to
const void *, and update calls to the function. This change fixed the
warnings and allowed to remove casts from 3 calls to gf_write64().

In addition gf_write64() was renamed to gf_write_ptr() as the name was
misleading because it only writes 32 bits on 32 bit systems.

gf_write_dma_addr() was added to handle dma_addr_t values which is
used at drivers/staging/goldfish/goldfish_audio.c.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Peter Senna Tschudin <peter.senna@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/goldfish.h | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/goldfish.h b/include/linux/goldfish.h
index 569236e6b2bc..93e080b39cf6 100644
--- a/include/linux/goldfish.h
+++ b/include/linux/goldfish.h
@@ -3,13 +3,24 @@
 
 /* Helpers for Goldfish virtual platform */
 
-static inline void gf_write64(unsigned long data,
-		void __iomem *portl, void __iomem *porth)
+static inline void gf_write_ptr(const void *ptr, void __iomem *portl,
+				void __iomem *porth)
 {
-	writel((u32)data, portl);
+	writel((u32)(unsigned long)ptr, portl);
 #ifdef CONFIG_64BIT
-	writel(data>>32, porth);
+	writel((unsigned long)ptr >> 32, porth);
 #endif
 }
 
+static inline void gf_write_dma_addr(const dma_addr_t addr,
+				     void __iomem *portl,
+				     void __iomem *porth)
+{
+	writel((u32)addr, portl);
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	writel(addr >> 32, porth);
+#endif
+}
+
+
 #endif /* __LINUX_GOLDFISH_H */
-- 
cgit v1.2.3


From b144ce2d37619e05afdb0a15676500d76a64b1be Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 27 May 2015 17:17:27 -0700
Subject: mei: fix up uuid matching

A previous commit, c93b76b34b4d ("mei: bus: report also uuid in module
alias") caused a build error as I missed applying a needed patch to add
some macros to uapi/linux/uuid.h.  Instead of those additional macros,
change the mei code to use the existing uuid structure directly.

Fixes: c93b76b34b4d
Cc: Tomas Winkler <tomas.winkler@intel.com>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/mod_devicetable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 2d2b2b571d61..048c270822f9 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -614,7 +614,7 @@ struct ipack_device_id {
  */
 struct mei_cl_device_id {
 	char name[MEI_CL_NAME_SIZE];
-	__u8 uuid[16];
+	uuid_le uuid;
 	kernel_ulong_t driver_info;
 };
 
-- 
cgit v1.2.3


From c66fa19c405a36673d4aab13658c8246413d5c0f Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Sun, 31 May 2015 09:30:16 +0300
Subject: net/mlx4: Add EQ pool

Previously, mlx4_en allocated EQs and used them exclusively.
This affected RoCE performance, as applications which are
events sensitive were limited to use only the legacy EQs.

Change that by introducing an EQ pool. This pool is managed
by mlx4_core. EQs are assigned to ports (when there are limited
number of EQs, multiple ports could be assigned to the same EQs).

An exception to this rule is the ASYNC EQ which handles various events.

Legacy EQs are completely removed as all EQs could be shared.

When a consumer (mlx4_ib/mlx4_en) requests an EQ, it asks for
EQ serving on a specific port. The core driver calculates which
EQ should be assigned to that request.

Because IRQs are shared between IB and Ethernet modules, their
names only include the PCI device BDF address.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Ido Shamay <idos@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/device.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 83e80ab94500..ad31e476873f 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -46,8 +46,9 @@
 
 #define MAX_MSIX_P_PORT		17
 #define MAX_MSIX		64
-#define MSIX_LEGACY_SZ		4
 #define MIN_MSIX_P_PORT		5
+#define MLX4_IS_LEGACY_EQ_MODE(dev_cap) ((dev_cap).num_comp_vectors < \
+					 (dev_cap).num_ports * MIN_MSIX_P_PORT)
 
 #define MLX4_MAX_100M_UNITS_VAL		255	/*
 						 * work around: can't set values
@@ -528,7 +529,6 @@ struct mlx4_caps {
 	int			num_eqs;
 	int			reserved_eqs;
 	int			num_comp_vectors;
-	int			comp_pool;
 	int			num_mpts;
 	int			max_fmr_maps;
 	int			num_mtts;
@@ -1332,10 +1332,13 @@ void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
 int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
 int mlx4_SYNC_TPT(struct mlx4_dev *dev);
 int mlx4_test_interrupts(struct mlx4_dev *dev);
-int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap,
-		   int *vector);
+u32 mlx4_get_eqs_per_port(struct mlx4_dev *dev, u8 port);
+bool mlx4_is_eq_vector_valid(struct mlx4_dev *dev, u8 port, int vector);
+struct cpu_rmap *mlx4_get_cpu_rmap(struct mlx4_dev *dev, int port);
+int mlx4_assign_eq(struct mlx4_dev *dev, u8 port, int *vector);
 void mlx4_release_eq(struct mlx4_dev *dev, int vec);
 
+int mlx4_is_eq_shared(struct mlx4_dev *dev, int vector);
 int mlx4_eq_get_irq(struct mlx4_dev *dev, int vec);
 
 int mlx4_get_phys_port_id(struct mlx4_dev *dev);
-- 
cgit v1.2.3


From f0e6a326deec8b51ee12f82a34057efd3d0979b8 Mon Sep 17 00:00:00 2001
From: Abhishek Bist <ishubist@gmail.com>
Date: Wed, 27 May 2015 23:54:19 +0530
Subject: USB: hcd.h : Removed an unnecessary function prototype
 usb_find_interface_driver()

This function is used to call in early version of linux kernel in order
to find out the interface used by a usb device. But now it's use is
completely abolished. So,it would be relevant to remove this obselete
function from kernel mainline.

Signed-off-by: Abhishek Bist <ishubist@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/hcd.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 68b1e836dff1..c9aa7792de10 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -622,8 +622,6 @@ extern struct list_head usb_bus_list;
 extern struct mutex usb_bus_list_lock;
 extern wait_queue_head_t usb_kill_urb_queue;
 
-extern int usb_find_interface_driver(struct usb_device *dev,
-	struct usb_interface *interface);
 
 #define usb_endpoint_out(ep_dir)	(!((ep_dir) & USB_DIR_IN))
 
-- 
cgit v1.2.3


From 138c3f03b017e261316a4f1ec793e1ff74516def Mon Sep 17 00:00:00 2001
From: Nikhil Badola <nikhil.badola@freescale.com>
Date: Tue, 26 May 2015 17:15:29 +0530
Subject: drivers:usb:fsl: Add support for USB controller version-2.5

Add support for USB controller version-2.5 used in
T4240 rev2.0, T1024, T1040, T2080, LS1021A

Signed-off-by: Nikhil Badola <nikhil.badola@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/fsl_devices.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index a82296af413f..2a2f56b292c1 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -24,6 +24,7 @@
 #define FSL_USB_VER_1_6		1
 #define FSL_USB_VER_2_2		2
 #define FSL_USB_VER_2_4		3
+#define FSL_USB_VER_2_5		4
 
 #include <linux/types.h>
 
-- 
cgit v1.2.3


From 34d2e4584ae594eff29d1595d47d7d044e57f834 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 26 May 2015 13:28:48 +0900
Subject: serial: 8250: include <linux/serial_reg.h> from serial_8250.h

The header file, include/linux/serial_8250.h, contains references to
UART_LSR_BRK_ERROR_BITS and UART_MSR_ANY_DELTA that are defined in
<linux/serial_reg.h>.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_8250.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index f0c68d88b6f4..ba82c07feb95 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -12,6 +12,7 @@
 #define _LINUX_SERIAL_8250_H
 
 #include <linux/serial_core.h>
+#include <linux/serial_reg.h>
 #include <linux/platform_device.h>
 
 /*
-- 
cgit v1.2.3


From abf2e7d6e2e315b32ee00067a69aaad2cf4e1b3f Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 28 May 2015 19:26:02 -0700
Subject: bpf: add missing rcu protection when releasing programs from
 prog_array

Normally the program attachment place (like sockets, qdiscs) takes
care of rcu protection and calls bpf_prog_put() after a grace period.
The programs stored inside prog_array may not be attached anywhere,
so prog_array needs to take care of preserving rcu protection.
Otherwise bpf_tail_call() will race with bpf_prog_put().
To solve that introduce bpf_prog_put_rcu() helper function and use
it in 3 places where unattached program can decrement refcnt:
closing program fd, deleting/replacing program in prog_array.

Fixes: 04fd61ab36ec ("bpf: allow bpf programs to tail-call other bpf programs")
Reported-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8821b9a8689e..5f520f5f087e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -123,7 +123,10 @@ struct bpf_prog_aux {
 	const struct bpf_verifier_ops *ops;
 	struct bpf_map **used_maps;
 	struct bpf_prog *prog;
-	struct work_struct work;
+	union {
+		struct work_struct work;
+		struct rcu_head	rcu;
+	};
 };
 
 struct bpf_array {
@@ -153,6 +156,7 @@ void bpf_register_map_type(struct bpf_map_type_list *tl);
 
 struct bpf_prog *bpf_prog_get(u32 ufd);
 void bpf_prog_put(struct bpf_prog *prog);
+void bpf_prog_put_rcu(struct bpf_prog *prog);
 
 struct bpf_map *bpf_map_get(struct fd f);
 void bpf_map_put(struct bpf_map *map);
-- 
cgit v1.2.3


From 3b369bd212d5cabb46cff0e863298971b382bbd6 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Thu, 28 May 2015 15:38:32 +0300
Subject: ieee802154: Fix generation of random EUI-64 addresses.

Currently, ieee802154_random_extended_addr() has a 50% chance of
generating a group (multicast) address, while this function is used
for generating station addresses (which can't be group addresses)
for interfaces that don't have a hardware-provided address.

Also, in case get_random_bytes() generates the EUI-64 address
00:00:00:00:00:00:00:00 (extremely unlikely), which is an invalid
address, ieee802154_random_extended_addr() reacts by changing it
to 01:00:00:00:00:00:00:00, which is an invalid station address as
well, as it is a group address.

This patch changes the address generation procedure to grab eight
random bytes, treat that as an EUI-64, and then clear the Group
address bit and set the Locally Administered bit, which is in
line with how eth_random_addr() generates random EUI-48s.

Signed-off-by: Lennert Buytenhek <buytenh@wantstofly.org>
Acked-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ieee802154.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index 8872ca103d06..552210d0a46f 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -244,9 +244,9 @@ static inline void ieee802154_random_extended_addr(__le64 *addr)
 {
 	get_random_bytes(addr, IEEE802154_EXTENDED_ADDR_LEN);
 
-	/* toggle some bit if we hit an invalid extended addr */
-	if (!ieee802154_is_valid_extended_addr(*addr))
-		((u8 *)addr)[IEEE802154_EXTENDED_ADDR_LEN - 1] ^= 0x01;
+	/* clear the group bit, and set the locally administered bit */
+	((u8 *)addr)[IEEE802154_EXTENDED_ADDR_LEN - 1] &= ~0x01;
+	((u8 *)addr)[IEEE802154_EXTENDED_ADDR_LEN - 1] |= 0x02;
 }
 
 #endif /* LINUX_IEEE802154_H */
-- 
cgit v1.2.3


From daf4e2c89254ed6eb8cf7ef60f614edebfdb9f3a Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Thu, 28 May 2015 15:38:43 +0300
Subject: ieee802154: Fix EUI-64 station address validation.

Refuse to allow setting an EUI-64 group address as an interface
address, as those are not valid station addresses.

Signed-off-by: Lennert Buytenhek <buytenh@wantstofly.org>
Acked-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ieee802154.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index 552210d0a46f..1dc1f4ed4001 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -225,15 +225,13 @@ static inline bool ieee802154_is_valid_psdu_len(const u8 len)
  * ieee802154_is_valid_psdu_len - check if extended addr is valid
  * @addr: extended addr to check
  */
-static inline bool ieee802154_is_valid_extended_addr(const __le64 addr)
+static inline bool ieee802154_is_valid_extended_unicast_addr(const __le64 addr)
 {
-	/* These EUI-64 addresses are reserved by IEEE. 0xffffffffffffffff
-	 * is used internally as extended to short address broadcast mapping.
-	 * This is currently a workaround because neighbor discovery can't
-	 * deal with short addresses types right now.
+	/* Bail out if the address is all zero, or if the group
+	 * address bit is set.
 	 */
 	return ((addr != cpu_to_le64(0x0000000000000000ULL)) &&
-		(addr != cpu_to_le64(0xffffffffffffffffULL)));
+		!(addr & cpu_to_le64(0x0100000000000000ULL)));
 }
 
 /**
-- 
cgit v1.2.3


From 1a1bc59c5f7657387d1a4b45d63248fed55ab88c Mon Sep 17 00:00:00 2001
From: Varka Bhadram <varkabhadram@gmail.com>
Date: Fri, 29 May 2015 10:56:55 +0530
Subject: cc2520: fix CC2591 handling

This patch changes tha way of handling of cc2591-cc2520 combination
by moving amplified variable from platform data to private data.
This will be useful in other sections like tx power support.

Signed-off-by: Varka Bhadram <varkab@cdac.in>
Cc: Brad Campbell <bradjc5@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/spi/cc2520.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/cc2520.h b/include/linux/spi/cc2520.h
index e741e8baad92..85b8ee67e937 100644
--- a/include/linux/spi/cc2520.h
+++ b/include/linux/spi/cc2520.h
@@ -21,7 +21,6 @@ struct cc2520_platform_data {
 	int sfd;
 	int reset;
 	int vreg;
-	bool amplified;
 };
 
 #endif
-- 
cgit v1.2.3


From 6c4e5f9c9ff41ea997fd0f345b3b2b88c113eb68 Mon Sep 17 00:00:00 2001
From: Keith Mange <keith.mange@microsoft.com>
Date: Tue, 26 May 2015 14:23:01 -0700
Subject: Drivers: hv: vmbus:Update preferred vmbus protocol version to windows
 10.

Add support for Windows 10.

Signed-off-by: Keith Mange <keith.mange@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 3932a993ff5a..4317cd1b69ed 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -160,16 +160,18 @@ hv_get_ringbuffer_availbytes(struct hv_ring_buffer_info *rbi,
  * 1 . 1  (Windows 7)
  * 2 . 4  (Windows 8)
  * 3 . 0  (Windows 8 R2)
+ * 4 . 0  (Windows 10)
  */
 
 #define VERSION_WS2008  ((0 << 16) | (13))
 #define VERSION_WIN7    ((1 << 16) | (1))
 #define VERSION_WIN8    ((2 << 16) | (4))
 #define VERSION_WIN8_1    ((3 << 16) | (0))
+#define VERSION_WIN10	((4 << 16) | (0))
 
 #define VERSION_INVAL -1
 
-#define VERSION_CURRENT VERSION_WIN8_1
+#define VERSION_CURRENT VERSION_WIN10
 
 /* Make maximum size of pipe payload of 16K */
 #define MAX_PIPE_DATA_PAYLOAD		(sizeof(u8) * 16384)
-- 
cgit v1.2.3


From 6fa45a22689722dac9f0e90c0931d4b34b334ede Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Wed, 20 May 2015 20:56:57 +0530
Subject: parport: add device-model to parport subsystem

parport subsystem starts using the device-model. Drivers using the
device-model has to define devmodel as true and should register the
device with parport using parport_register_dev_model().

Tested-by: Jean Delvare <jdelvare@suse.de>
Tested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: Sudip Mukherjee <sudip@vectorindia.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/parport.h | 43 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/parport.h b/include/linux/parport.h
index c22f12547324..58e3c64c6b49 100644
--- a/include/linux/parport.h
+++ b/include/linux/parport.h
@@ -13,6 +13,7 @@
 #include <linux/wait.h>
 #include <linux/irqreturn.h>
 #include <linux/semaphore.h>
+#include <linux/device.h>
 #include <asm/ptrace.h>
 #include <uapi/linux/parport.h>
 
@@ -145,6 +146,8 @@ struct pardevice {
 	unsigned int flags;
 	struct pardevice *next;
 	struct pardevice *prev;
+	struct device dev;
+	bool devmodel;
 	struct parport_state *state;     /* saved status over preemption */
 	wait_queue_head_t wait_q;
 	unsigned long int time;
@@ -156,6 +159,8 @@ struct pardevice {
 	void * sysctl_table;
 };
 
+#define to_pardevice(n) container_of(n, struct pardevice, dev)
+
 /* IEEE1284 information */
 
 /* IEEE1284 phases. These are exposed to userland through ppdev IOCTL
@@ -195,7 +200,7 @@ struct parport {
 				 * This may unfortulately be null if the
 				 * port has a legacy driver.
 				 */
-
+	struct device bus_dev;	/* to link with the bus */
 	struct parport *physport;
 				/* If this is a non-default mux
 				   parport, i.e. we're a clone of a real
@@ -245,15 +250,26 @@ struct parport {
 	struct parport *slaves[3];
 };
 
+#define to_parport_dev(n) container_of(n, struct parport, bus_dev)
+
 #define DEFAULT_SPIN_TIME 500 /* us */
 
 struct parport_driver {
 	const char *name;
 	void (*attach) (struct parport *);
 	void (*detach) (struct parport *);
+	void (*match_port)(struct parport *);
+	int (*probe)(struct pardevice *);
+	struct device_driver driver;
+	bool devmodel;
 	struct list_head list;
 };
 
+#define to_parport_driver(n) container_of(n, struct parport_driver, driver)
+
+int parport_bus_init(void);
+void parport_bus_exit(void);
+
 /* parport_register_port registers a new parallel port at the given
    address (if one does not already exist) and returns a pointer to it.
    This entails claiming the I/O region, IRQ and DMA.  NULL is returned
@@ -272,10 +288,20 @@ void parport_announce_port (struct parport *port);
 extern void parport_remove_port(struct parport *port);
 
 /* Register a new high-level driver. */
-extern int parport_register_driver (struct parport_driver *);
+
+int __must_check __parport_register_driver(struct parport_driver *,
+					   struct module *,
+					   const char *mod_name);
+/*
+ * parport_register_driver must be a macro so that KBUILD_MODNAME can
+ * be expanded
+ */
+#define parport_register_driver(driver)             \
+	__parport_register_driver(driver, THIS_MODULE, KBUILD_MODNAME)
 
 /* Unregister a high-level driver. */
 extern void parport_unregister_driver (struct parport_driver *);
+void parport_unregister_driver(struct parport_driver *);
 
 /* If parport_register_driver doesn't fit your needs, perhaps
  * parport_find_xxx does. */
@@ -288,6 +314,15 @@ extern irqreturn_t parport_irq_handler(int irq, void *dev_id);
 /* Reference counting for ports. */
 extern struct parport *parport_get_port (struct parport *);
 extern void parport_put_port (struct parport *);
+void parport_del_port(struct parport *);
+
+struct pardev_cb {
+	int (*preempt)(void *);
+	void (*wakeup)(void *);
+	void *private;
+	void (*irq_func)(void *);
+	unsigned int flags;
+};
 
 /* parport_register_device declares that a device is connected to a
    port, and tells the kernel all it needs to know.
@@ -301,6 +336,10 @@ struct pardevice *parport_register_device(struct parport *port,
 			  void (*irq_func)(void *), 
 			  int flags, void *handle);
 
+struct pardevice *
+parport_register_dev_model(struct parport *port, const char *name,
+			   const struct pardev_cb *par_dev_cb, int cnt);
+
 /* parport_unregister unlinks a device from the chain. */
 extern void parport_unregister_device(struct pardevice *dev);
 
-- 
cgit v1.2.3


From b84b1d522f979fb53ad347605e24b2940fa2ad99 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Wed, 29 Apr 2015 08:57:34 +0200
Subject: scsi: Do not set cmd_per_lun to 1 in the host template

'0' is now used as the default cmd_per_lun value,
so there's no need to explicitly set it to '1' in the
host template.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: James Bottomley <JBottomley@Odin.com>
---
 include/linux/libata.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 8dad4a307bb8..1402291aab5e 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -134,7 +134,6 @@ enum {
 	ATA_ALL_DEVICES		= (1 << ATA_MAX_DEVICES) - 1,
 
 	ATA_SHT_EMULATED	= 1,
-	ATA_SHT_CMD_PER_LUN	= 1,
 	ATA_SHT_THIS_ID		= -1,
 	ATA_SHT_USE_CLUSTERING	= 1,
 
@@ -1354,7 +1353,6 @@ extern struct device_attribute *ata_common_sdev_attrs[];
 	.can_queue		= ATA_DEF_QUEUE,		\
 	.tag_alloc_policy	= BLK_TAG_ALLOC_RR,		\
 	.this_id		= ATA_SHT_THIS_ID,		\
-	.cmd_per_lun		= ATA_SHT_CMD_PER_LUN,		\
 	.emulated		= ATA_SHT_EMULATED,		\
 	.use_clustering		= ATA_SHT_USE_CLUSTERING,	\
 	.proc_name		= drv_name,			\
-- 
cgit v1.2.3


From 1c4b1d73bacc546ba4e42f7eb4cb88c54139820b Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Wed, 27 May 2015 13:57:46 +0200
Subject: tty: move linux/gsmmux.h to uapi

linux/gsmmux.h defines a user interface and therefore should be
installed with other headers.

Make the file include:
* linux/if.h for IFNAMSIZ
* linux/ioctl.h for _IO* macros

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/gsmmux.h | 36 ------------------------------------
 1 file changed, 36 deletions(-)
 delete mode 100644 include/linux/gsmmux.h

(limited to 'include/linux')

diff --git a/include/linux/gsmmux.h b/include/linux/gsmmux.h
deleted file mode 100644
index c25e9477f7c3..000000000000
--- a/include/linux/gsmmux.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef _LINUX_GSMMUX_H
-#define _LINUX_GSMMUX_H
-
-struct gsm_config
-{
-	unsigned int adaption;
-	unsigned int encapsulation;
-	unsigned int initiator;
-	unsigned int t1;
-	unsigned int t2;
-	unsigned int t3;
-	unsigned int n2;
-	unsigned int mru;
-	unsigned int mtu;
-	unsigned int k;
-	unsigned int i;
-	unsigned int unused[8];		/* Padding for expansion without
-					   breaking stuff */
-};
-
-#define GSMIOC_GETCONF		_IOR('G', 0, struct gsm_config)
-#define GSMIOC_SETCONF		_IOW('G', 1, struct gsm_config)
-
-struct gsm_netconfig {
-	unsigned int adaption;  /* Adaption to use in network mode */
-	unsigned short protocol;/* Protocol to use - only ETH_P_IP supported */
-	unsigned short unused2;
-	char if_name[IFNAMSIZ];	/* interface name format string */
-	__u8 unused[28];        /* For future use */
-};
-
-#define GSMIOC_ENABLE_NET      _IOW('G', 2, struct gsm_netconfig)
-#define GSMIOC_DISABLE_NET     _IO('G', 3)
-
-
-#endif
-- 
cgit v1.2.3


From 1f656ff3fdddc2f59649cc84b633b799908f1f7b Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Sat, 30 May 2015 23:37:48 -0700
Subject: Drivers: hv: vmbus: Implement NUMA aware CPU affinity for channels

Channels/sub-channels can be affinitized to VCPUs in the guest. Implement
this affinity in a way that is NUMA aware. The current protocol distributed
the primary channels uniformly across all available CPUs. The new protocol
is NUMA aware: primary channels are distributed across the available NUMA
nodes while the sub-channels within a primary channel are distributed amongst
CPUs within the NUMA node assigned to the primary channel.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 4317cd1b69ed..30d3a1f79450 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -696,6 +696,11 @@ struct vmbus_channel {
 	u32 target_vp;
 	/* The corresponding CPUID in the guest */
 	u32 target_cpu;
+	/*
+	 * State to manage the CPU affiliation of channels.
+	 */
+	struct cpumask alloced_cpus_in_node;
+	int numa_node;
 	/*
 	 * Support for sub-channels. For high performance devices,
 	 * it will be useful to have multiple sub-channels to support
-- 
cgit v1.2.3


From 17ca8cbf49be3aa94bb1c2b7ee6545fd70094eb4 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 29 May 2015 23:23:06 +0200
Subject: ebpf: allow bpf_ktime_get_ns_proto also for networking

As this is already exported from tracing side via commit d9847d310ab4
("tracing: Allow BPF programs to call bpf_ktime_get_ns()"), we might
as well want to move it to the core, so also networking users can make
use of it, e.g. to measure diffs for certain flows from ingress/egress.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5f520f5f087e..ca854e5bb2f7 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -186,5 +186,6 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto;
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
 extern const struct bpf_func_proto bpf_tail_call_proto;
+extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
 
 #endif /* _LINUX_BPF_H */
-- 
cgit v1.2.3


From 887ee43477e4e327dbcd2aabc2d78a5116ed8a33 Mon Sep 17 00:00:00 2001
From: Beomho Seo <beomho.seo@samsung.com>
Date: Thu, 30 Apr 2015 13:07:43 +0900
Subject: hwmon: (ntc_thermistor) Add support for ncpXXwf104

This patch adds support for the ntc thermistor NCPXXWF104 series.

Cc: Jean Delvare <jdelvare@suse.de>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Beomho Seo <beomho.seo@samsung.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/platform_data/ntc_thermistor.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/ntc_thermistor.h b/include/linux/platform_data/ntc_thermistor.h
index 0a6de4ca4930..aed170588b74 100644
--- a/include/linux/platform_data/ntc_thermistor.h
+++ b/include/linux/platform_data/ntc_thermistor.h
@@ -27,6 +27,7 @@ enum ntc_thermistor_type {
 	TYPE_NCPXXWB473,
 	TYPE_NCPXXWL333,
 	TYPE_B57330V2103,
+	TYPE_NCPXXWF104,
 };
 
 struct ntc_thermistor_platform_data {
-- 
cgit v1.2.3


From dfa13ebbe3340e538b988f5608efd9ff2ca7fc35 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 7 May 2015 13:10:12 +0300
Subject: mmc: host: Add facility to support re-tuning

Currently, there is core support for tuning during
initialization. There can also be a need to re-tune
periodically (e.g. sdhci) or to re-tune after the
host controller is powered off (e.g. after PM
runtime suspend / resume) or to re-tune in response
to CRC errors.

The main requirements for re-tuning are:
  - ability to enable / disable re-tuning
  - ability to flag that re-tuning is needed
  - ability to re-tune before any request
  - ability to hold off re-tuning if the card is busy
  - ability to hold off re-tuning if re-tuning is in
  progress
  - ability to run a re-tuning timer

To support those requirements 7 members are added to struct
mmc_host:

  unsigned int		can_retune:1;	/* re-tuning can be used */
  unsigned int		doing_retune:1;	/* re-tuning in progress */
  unsigned int		retune_now:1;   /* do re-tuning at next req */
  int			need_retune;	/* re-tuning is needed */
  int			hold_retune;	/* hold off re-tuning */
  unsigned int		retune_period;  /* re-tuning period in secs */
  struct timer_list	retune_timer;	/* for periodic re-tuning */

need_retune is an integer so it can be set without needing
synchronization. hold_retune is a integer to allow nesting.

Various simple functions are provided to set / clear those
variables.

Subsequent patches take those functions into use.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/host.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index b5bedaec6223..f471193ef6d6 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -12,6 +12,7 @@
 
 #include <linux/leds.h>
 #include <linux/mutex.h>
+#include <linux/timer.h>
 #include <linux/sched.h>
 #include <linux/device.h>
 #include <linux/fault-inject.h>
@@ -321,10 +322,18 @@ struct mmc_host {
 #ifdef CONFIG_MMC_DEBUG
 	unsigned int		removed:1;	/* host is being removed */
 #endif
+	unsigned int		can_retune:1;	/* re-tuning can be used */
+	unsigned int		doing_retune:1;	/* re-tuning in progress */
+	unsigned int		retune_now:1;	/* do re-tuning at next req */
 
 	int			rescan_disable;	/* disable card detection */
 	int			rescan_entered;	/* used with nonremovable devices */
 
+	int			need_retune;	/* re-tuning is needed */
+	int			hold_retune;	/* hold off re-tuning */
+	unsigned int		retune_period;	/* re-tuning period in secs */
+	struct timer_list	retune_timer;	/* for periodic re-tuning */
+
 	bool			trigger_card_event; /* card_event necessary */
 
 	struct mmc_card		*card;		/* device attached to this host */
@@ -513,4 +522,18 @@ static inline bool mmc_card_hs400(struct mmc_card *card)
 	return card->host->ios.timing == MMC_TIMING_MMC_HS400;
 }
 
+void mmc_retune_timer_stop(struct mmc_host *host);
+
+static inline void mmc_retune_needed(struct mmc_host *host)
+{
+	if (host->can_retune)
+		host->need_retune = 1;
+}
+
+static inline void mmc_retune_recheck(struct mmc_host *host)
+{
+	if (host->hold_retune <= 1)
+		host->retune_now = 1;
+}
+
 #endif /* LINUX_MMC_HOST_H */
-- 
cgit v1.2.3


From 9f6e0bff2afb52a4c29f5ca8a4db01810357974e Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Wed, 6 May 2015 20:31:19 +0200
Subject: mmc: Add support for disabling write-protect detection

It is not uncommon to see systems where there is no physical write-protect
signal (e.g. when using eMMC or microSD card slots). For some controllers,
which have a dedicated write-protection detection logic (like SDHCI
controllers), the get_ro() callback can return bogus data in such a case.

Instead of handling this on a per controller basis this patch adds a new
capability flag to the MMC core that can be set to specify that the result
of get_ro() is invalid. When the flag is set the core will not call
get_ro() and assume that the card is always read-write.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index f471193ef6d6..433eccb50838 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -286,6 +286,7 @@ struct mmc_host {
 				 MMC_CAP2_HS400_1_2V)
 #define MMC_CAP2_HSX00_1_2V	(MMC_CAP2_HS200_1_2V_SDR | MMC_CAP2_HS400_1_2V)
 #define MMC_CAP2_SDIO_IRQ_NOTHREAD (1 << 17)
+#define MMC_CAP2_NO_WRITE_PROTECT (1 << 18)	/* No physical write protect pin, assume that card is always read-write */
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
-- 
cgit v1.2.3


From eff8f2f5df1c509c873cdc70c84eb2ee75b41e65 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Wed, 6 May 2015 20:31:22 +0200
Subject: mmc: dw_mmc: Use core to handle absent write protect line

Use the new MMC_CAP2_NO_WRITE_PROTECT to let the core handle the case where
no write protect line is present instead of having custom driver code to
handle it.

dw_mci_of_get_slot_quirks() is slightly refactored to directly modify the
mmc_host capabilities instead of returning a quirk mask.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/dw_mmc.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 12111993a317..5be97676f1fa 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -226,12 +226,6 @@ struct dw_mci_dma_ops {
 #define DW_MCI_QUIRK_HIGHSPEED			BIT(2)
 /* Unreliable card detection */
 #define DW_MCI_QUIRK_BROKEN_CARD_DETECTION	BIT(3)
-/* No write protect */
-#define DW_MCI_QUIRK_NO_WRITE_PROTECT		BIT(4)
-
-/* Slot level quirks */
-/* This slot has no write protect */
-#define DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT	BIT(0)
 
 struct dma_pdata;
 
-- 
cgit v1.2.3


From b4f30a174e1fda8118eda038b5d8d5260db36ad5 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 6 Feb 2015 14:12:52 +0200
Subject: mmc: core: Allow card drive strength to be different to host

Initialization of UHS-I modes for SD and SDIO cards
employs a callback to allow the host driver to
choose a drive strength value. Currently that
assumes the card drive strength and host driver
type must be the same value. Change to let the
callback make that decision and return both the
card drive strength and host driver type.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/host.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 433eccb50838..da33d18c66c8 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -132,7 +132,8 @@ struct mmc_host_ops {
 
 	/* Prepare HS400 target operating frequency depending host driver */
 	int	(*prepare_hs400_tuning)(struct mmc_host *host, struct mmc_ios *ios);
-	int	(*select_drive_strength)(unsigned int max_dtr, int host_drv, int card_drv);
+	int	(*select_drive_strength)(unsigned int max_dtr, int host_drv,
+					 int card_drv, int *drv_type);
 	void	(*hw_reset)(struct mmc_host *host);
 	void	(*card_event)(struct mmc_host *host);
 
-- 
cgit v1.2.3


From f168359efbb99d6f8591bb666d6510bb78df2d07 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 6 Feb 2015 14:12:54 +0200
Subject: mmc: core: Add 'card' to drive strength selection callback

In preparation for supporting also eMMC drive strength,
add the 'card' as a parameter so that the callback can
distinguish different types of cards if necessary.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/host.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index da33d18c66c8..1369e54faeb7 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -132,7 +132,8 @@ struct mmc_host_ops {
 
 	/* Prepare HS400 target operating frequency depending host driver */
 	int	(*prepare_hs400_tuning)(struct mmc_host *host, struct mmc_ios *ios);
-	int	(*select_drive_strength)(unsigned int max_dtr, int host_drv,
+	int	(*select_drive_strength)(struct mmc_card *card,
+					 unsigned int max_dtr, int host_drv,
 					 int card_drv, int *drv_type);
 	void	(*hw_reset)(struct mmc_host *host);
 	void	(*card_event)(struct mmc_host *host);
-- 
cgit v1.2.3


From 3853a042325e8f497c199020979c4fc824528c6e Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 6 Feb 2015 14:12:56 +0200
Subject: mmc: core: Record card drive strength

In preparation for adding drive strength support
for eMMC, add drive_strength to struct mmc_card
to record the card drive strength for UHS-I modes
and HS200 / HS400. For eMMC this will be needed
when switching between HS200 and HS400.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/card.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 19f0175c0afa..2f073d555793 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -305,6 +305,7 @@ struct mmc_card {
 
 	unsigned int		sd_bus_speed;	/* Bus Speed Mode set for the card */
 	unsigned int		mmc_avail_type;	/* supported device type by both host and card */
+	unsigned int		drive_strength;	/* for UHS-I, HS200 or HS400 */
 
 	struct dentry		*debugfs_root;
 	struct mmc_part	part[MMC_NUM_PHY_PARTITION]; /* physical partitions */
-- 
cgit v1.2.3


From b097e07f57930eda774c83aa46e8e401686d01dc Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 6 Feb 2015 14:12:57 +0200
Subject: mmc: mmc: Read card's valid driver strength mask

In preparation for supporing drive strength selection
for eMMC, read the card's valid driver strengths.

Note that though the SD spec uses the term "drive strength",
the JEDEC eMMC spec uses the term "driver strength".

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/card.h | 1 +
 include/linux/mmc/mmc.h  | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 2f073d555793..4d3776d25925 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -97,6 +97,7 @@ struct mmc_ext_csd {
 	u8			raw_erased_mem_count;	/* 181 */
 	u8			raw_ext_csd_structure;	/* 194 */
 	u8			raw_card_type;		/* 196 */
+	u8			raw_driver_strength;	/* 197 */
 	u8			out_of_int_time;	/* 198 */
 	u8			raw_pwr_cl_52_195;	/* 200 */
 	u8			raw_pwr_cl_26_195;	/* 201 */
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 124f562118b8..4819cfbc3795 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -302,6 +302,7 @@ struct _mmc_csd {
 #define EXT_CSD_REV			192	/* RO */
 #define EXT_CSD_STRUCTURE		194	/* RO */
 #define EXT_CSD_CARD_TYPE		196	/* RO */
+#define EXT_CSD_DRIVER_STRENGTH		197	/* RO */
 #define EXT_CSD_OUT_OF_INTERRUPT_TIME	198	/* RO */
 #define EXT_CSD_PART_SWITCH_TIME        199     /* RO */
 #define EXT_CSD_PWR_CL_52_195		200	/* RO */
-- 
cgit v1.2.3


From cc4f414c885cd04f7227ad9bcd6b18fd78d718d9 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 6 Feb 2015 14:12:58 +0200
Subject: mmc: mmc: Add driver strength selection

Add the ability to set eMMC driver strength
for HS200 and HS400.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/mmc.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 4819cfbc3795..15f2c4a0a62c 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -391,6 +391,7 @@ struct _mmc_csd {
 #define EXT_CSD_TIMING_HS	1	/* High speed */
 #define EXT_CSD_TIMING_HS200	2	/* HS200 */
 #define EXT_CSD_TIMING_HS400	3	/* HS400 */
+#define EXT_CSD_DRV_STR_SHIFT	4	/* Driver Strength shift */
 
 #define EXT_CSD_SEC_ER_EN	BIT(0)
 #define EXT_CSD_SEC_BD_BLK_EN	BIT(2)
@@ -442,4 +443,6 @@ struct _mmc_csd {
 #define MMC_SWITCH_MODE_CLEAR_BITS	0x02	/* Clear bits which are 1 in value */
 #define MMC_SWITCH_MODE_WRITE_BYTE	0x03	/* Set target to value */
 
+#define mmc_driver_type_mask(n)		(1 << (n))
+
 #endif /* LINUX_MMC_MMC_H */
-- 
cgit v1.2.3


From e1bfad6d936d7149a83423e2a7244dd5771f27e7 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 6 Feb 2015 14:13:00 +0200
Subject: mmc: sdhci-pci: Add support for drive strength selection for SPT

Implement the select_drive_strength callback to provide
drive strength selection for Intel SPT.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/sdhci-pci-data.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/sdhci-pci-data.h b/include/linux/mmc/sdhci-pci-data.h
index 8959604a13d3..fda15b6d4135 100644
--- a/include/linux/mmc/sdhci-pci-data.h
+++ b/include/linux/mmc/sdhci-pci-data.h
@@ -15,4 +15,6 @@ struct sdhci_pci_data {
 extern struct sdhci_pci_data *(*sdhci_pci_get_data)(struct pci_dev *pdev,
 				int slotno);
 
+extern int sdhci_pci_spt_drive_strength;
+
 #endif
-- 
cgit v1.2.3


From 225d59adf1c899176cce0fc80e42b1d1c12f109f Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Fri, 29 May 2015 18:14:21 +0200
Subject: iio: Specify supported modes for buffers

For each buffer type specify the supported device modes for this buffer.
This allows us for devices which support multiple different operating modes
to pick the correct operating mode based on the modes supported by the
attached buffers.

It also prevents that buffers with conflicting modes are attached
to a device at the same time or that a buffer with a non-supported mode is
attached to a device (e.g. in-kernel callback buffer to a device only
supporting hardware mode).

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/buffer.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h
index eb8622b78ec9..1600c55828e0 100644
--- a/include/linux/iio/buffer.h
+++ b/include/linux/iio/buffer.h
@@ -29,6 +29,7 @@ struct iio_buffer;
  * @set_length:		set number of datums in buffer
  * @release:		called when the last reference to the buffer is dropped,
  *			should free all resources allocated by the buffer.
+ * @modes:		Supported operating modes by this buffer type
  *
  * The purpose of this structure is to make the buffer element
  * modular as event for a given driver, different usecases may require
@@ -51,6 +52,8 @@ struct iio_buffer_access_funcs {
 	int (*set_length)(struct iio_buffer *buffer, int length);
 
 	void (*release)(struct iio_buffer *buffer);
+
+	unsigned int modes;
 };
 
 /**
-- 
cgit v1.2.3


From 04fba7864ffcceae8a5f78d88ae1fd8d682a5123 Mon Sep 17 00:00:00 2001
From: Goffredo Baroncelli <kreijack@inwind.it>
Date: Sat, 30 May 2015 11:00:26 +0200
Subject: HID: Export hid_field_extract()

Rename the function extract() to hid_field_extract(), make it external linkage
to allow the use from other modules.

Suggested-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/hid.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 176b43670e5d..f17980de2662 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -815,6 +815,8 @@ void hid_disconnect(struct hid_device *hid);
 const struct hid_device_id *hid_match_id(struct hid_device *hdev,
 					 const struct hid_device_id *id);
 s32 hid_snto32(__u32 value, unsigned n);
+__u32 hid_field_extract(const struct hid_device *hid, __u8 *report,
+		     unsigned offset, unsigned n);
 
 /**
  * hid_device_io_start - enable HID input during probe, remove
-- 
cgit v1.2.3


From 3fff99bc4e926d9602a7d6e8c008a0175a099ce4 Mon Sep 17 00:00:00 2001
From: Rojhalat Ibrahim <imr@rtschenk.de>
Date: Wed, 13 May 2015 11:04:56 +0200
Subject: gpiolib: rename gpiod_set_array to gpiod_set_array_value

There have been concerns that the function names gpiod_set_array() and
gpiod_get_array() might be confusing to users. One might expect
gpiod_get_array() to return array values, while it is actually the array
counterpart of gpiod_get(). To be consistent with the single descriptor API
we could rename gpiod_set_array() to gpiod_set_array_value(). This makes
some function names a bit lengthy: gpiod_set_raw_array_value_cansleep().

Signed-off-by: Rojhalat Ibrahim <imr@rtschenk.de>
Acked-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/consumer.h | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 09a7fb0062a6..fd098169fe87 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -100,24 +100,25 @@ int gpiod_direction_output_raw(struct gpio_desc *desc, int value);
 /* Value get/set from non-sleeping context */
 int gpiod_get_value(const struct gpio_desc *desc);
 void gpiod_set_value(struct gpio_desc *desc, int value);
-void gpiod_set_array(unsigned int array_size,
-		     struct gpio_desc **desc_array, int *value_array);
+void gpiod_set_array_value(unsigned int array_size,
+			   struct gpio_desc **desc_array, int *value_array);
 int gpiod_get_raw_value(const struct gpio_desc *desc);
 void gpiod_set_raw_value(struct gpio_desc *desc, int value);
-void gpiod_set_raw_array(unsigned int array_size,
-			 struct gpio_desc **desc_array, int *value_array);
+void gpiod_set_raw_array_value(unsigned int array_size,
+			       struct gpio_desc **desc_array,
+			       int *value_array);
 
 /* Value get/set from sleeping context */
 int gpiod_get_value_cansleep(const struct gpio_desc *desc);
 void gpiod_set_value_cansleep(struct gpio_desc *desc, int value);
-void gpiod_set_array_cansleep(unsigned int array_size,
-			      struct gpio_desc **desc_array,
-			      int *value_array);
+void gpiod_set_array_value_cansleep(unsigned int array_size,
+				    struct gpio_desc **desc_array,
+				    int *value_array);
 int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc);
 void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value);
-void gpiod_set_raw_array_cansleep(unsigned int array_size,
-				  struct gpio_desc **desc_array,
-				  int *value_array);
+void gpiod_set_raw_array_value_cansleep(unsigned int array_size,
+					struct gpio_desc **desc_array,
+					int *value_array);
 
 int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce);
 
@@ -304,9 +305,9 @@ static inline void gpiod_set_value(struct gpio_desc *desc, int value)
 	/* GPIO can never have been requested */
 	WARN_ON(1);
 }
-static inline void gpiod_set_array(unsigned int array_size,
-				   struct gpio_desc **desc_array,
-				   int *value_array)
+static inline void gpiod_set_array_value(unsigned int array_size,
+					 struct gpio_desc **desc_array,
+					 int *value_array)
 {
 	/* GPIO can never have been requested */
 	WARN_ON(1);
@@ -322,9 +323,9 @@ static inline void gpiod_set_raw_value(struct gpio_desc *desc, int value)
 	/* GPIO can never have been requested */
 	WARN_ON(1);
 }
-static inline void gpiod_set_raw_array(unsigned int array_size,
-				       struct gpio_desc **desc_array,
-				       int *value_array)
+static inline void gpiod_set_raw_array_value(unsigned int array_size,
+					     struct gpio_desc **desc_array,
+					     int *value_array)
 {
 	/* GPIO can never have been requested */
 	WARN_ON(1);
@@ -341,7 +342,7 @@ static inline void gpiod_set_value_cansleep(struct gpio_desc *desc, int value)
 	/* GPIO can never have been requested */
 	WARN_ON(1);
 }
-static inline void gpiod_set_array_cansleep(unsigned int array_size,
+static inline void gpiod_set_array_value_cansleep(unsigned int array_size,
 					    struct gpio_desc **desc_array,
 					    int *value_array)
 {
@@ -360,7 +361,7 @@ static inline void gpiod_set_raw_value_cansleep(struct gpio_desc *desc,
 	/* GPIO can never have been requested */
 	WARN_ON(1);
 }
-static inline void gpiod_set_raw_array_cansleep(unsigned int array_size,
+static inline void gpiod_set_raw_array_value_cansleep(unsigned int array_size,
 						struct gpio_desc **desc_array,
 						int *value_array)
 {
-- 
cgit v1.2.3


From 5fbf65d5c9c0fd2e5c6c48d69ce34b1c5415f2fd Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Fri, 22 May 2015 15:19:37 +0900
Subject: pinctrl: remove useless const qualifier

This "const" claims the get_function_groups callback never
changes the given num_groups pointer.  It is always true
in C language, so not worth mentioning.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/pinmux.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h
index d3740fa7073f..ace60d775b20 100644
--- a/include/linux/pinctrl/pinmux.h
+++ b/include/linux/pinctrl/pinmux.h
@@ -69,7 +69,7 @@ struct pinmux_ops {
 	int (*get_function_groups) (struct pinctrl_dev *pctldev,
 				  unsigned selector,
 				  const char * const **groups,
-				  unsigned * const num_groups);
+				  unsigned *num_groups);
 	int (*set_mux) (struct pinctrl_dev *pctldev, unsigned func_selector,
 			unsigned group_selector);
 	int (*gpio_request_enable) (struct pinctrl_dev *pctldev,
-- 
cgit v1.2.3


From b3da97ee581387cd42dafd76eb2ac23f2335cd92 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Fri, 22 May 2015 15:25:50 +0900
Subject: pinctrl: use "const struct ..." rather than "struct ... const"

Only this member, pins, is defined as "struct ... const *", but the
others in this struct, pinlops, pmxops, confops, etc. are defined as
"const struct ... *".

Swap the "struct pinctrl_pin_desc" and "const" for consistency.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/pinctrl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h
index 66e4697516de..9ba59fcba549 100644
--- a/include/linux/pinctrl/pinctrl.h
+++ b/include/linux/pinctrl/pinctrl.h
@@ -127,7 +127,7 @@ struct pinctrl_ops {
  */
 struct pinctrl_desc {
 	const char *name;
-	struct pinctrl_pin_desc const *pins;
+	const struct pinctrl_pin_desc *pins;
 	unsigned int npins;
 	const struct pinctrl_ops *pctlops;
 	const struct pinmux_ops *pmxops;
-- 
cgit v1.2.3


From 4af34b572a85c44c55491a10693535a79627c478 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Mon, 1 Jun 2015 11:04:26 +0200
Subject: drivers: soc: sunxi: Introduce SoC driver to map SRAMs

The Allwinner SoCs have a handful of SRAM that can be either mapped to be
accessible by devices or the CPU.

That mapping is controlled by an SRAM controller, and that mapping might
not be set by the bootloader, for example if the device wasn't used at all,
or if we're using solutions like the U-Boot's Falcon Boot.

We could also imagine changing this at runtime for example to change the
mapping of these SRAMs to use them for suspend/resume or runtime memory
rate change, if that ever happens.

These use cases require some API in the kernel to control that mapping,
exported through a drivers/soc driver.

This driver also implement a debugfs file that shows the SRAM found in the
system, the current mapping and the SRAM that have been claimed by some
drivers in the kernel.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/soc/sunxi/sunxi_sram.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 include/linux/soc/sunxi/sunxi_sram.h

(limited to 'include/linux')

diff --git a/include/linux/soc/sunxi/sunxi_sram.h b/include/linux/soc/sunxi/sunxi_sram.h
new file mode 100644
index 000000000000..c5f663bba9c2
--- /dev/null
+++ b/include/linux/soc/sunxi/sunxi_sram.h
@@ -0,0 +1,19 @@
+/*
+ * Allwinner SoCs SRAM Controller Driver
+ *
+ * Copyright (C) 2015 Maxime Ripard
+ *
+ * Author: Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef _SUNXI_SRAM_H_
+#define _SUNXI_SRAM_H_
+
+int sunxi_sram_claim(struct device *dev);
+int sunxi_sram_release(struct device *dev);
+
+#endif /* _SUNXI_SRAM_H_ */
-- 
cgit v1.2.3


From f26cdc8536ad50fb802a0445f836b4f94ca09ae7 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 1 Jun 2015 09:29:53 -0600
Subject: blk-mq: Shared tag enhancements

Storage controllers may expose multiple block devices that share hardware
resources managed by blk-mq. This patch enhances the shared tags so a
low-level driver can access the shared resources not tied to the unshared
h/w contexts. This way the LLD can dynamically add and delete disks and
request queues without having to track all the request_queue hctx's to
iterate outstanding tags.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk-mq.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2056a99b92f8..37d1602c4f7a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -96,6 +96,7 @@ typedef void (exit_request_fn)(void *, struct request *, unsigned int,
 
 typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
 		bool);
+typedef void (busy_tag_iter_fn)(struct request *, void *, bool);
 
 struct blk_mq_ops {
 	/*
@@ -182,6 +183,7 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 		gfp_t gfp, bool reserved);
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
+struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags);
 
 enum {
 	BLK_MQ_UNIQUE_TAG_BITS = 16,
@@ -224,6 +226,8 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async);
 void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
 void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
 		void *priv);
+void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
+		void *priv);
 void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_unfreeze_queue(struct request_queue *q);
 void blk_mq_freeze_queue_start(struct request_queue *q);
-- 
cgit v1.2.3


From bdef7de4b8d9be4cf7bf5aea977f827310ab3ff0 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 1 Jun 2015 14:56:09 -0700
Subject: net: Add priority to packet_offload objects.

When we scan a packet for GRO processing, we want to see the most
common packet types in the front of the offload_base list.

So add a priority field so we can handle this properly.

IPv4/IPv6 get the highest priority with the implicit zero priority
field.

Next comes ethernet with a priority of 10, and then we have the MPLS
types with a priority of 15.

Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Suggested-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 51f8d2f5dc3f..6f5f71ff5169 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1997,6 +1997,7 @@ struct offload_callbacks {
 
 struct packet_offload {
 	__be16			 type;	/* This is really htons(ether_type). */
+	u16			 priority;
 	struct offload_callbacks callbacks;
 	struct list_head	 list;
 };
-- 
cgit v1.2.3


From 66e5133f19e901a044fa5eaeeb6ecff4545839e5 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Mon, 1 Jun 2015 21:55:06 +0900
Subject: vlan: Add GRO support for non hardware accelerated vlan

Currently packets with non-hardware-accelerated vlan cannot be handled
by GRO. This causes low performance for 802.1ad and stacked vlan, as their
vlan tags are currently not stripped by hardware.

This patch adds GRO support for non-hardware-accelerated vlan and
improves receive performance of them.

Test Environment:
 vlan device (.1Q) on vlan device (.1ad) on ixgbe (82599)

Result:

- Before

$ netperf -t TCP_STREAM -H 192.168.20.2 -l 60
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    60.00    5233.17

Rx side CPU usage:
  %usr      %sys      %irq     %soft     %idle
  0.27     58.03      0.00     41.70      0.00

- After

$ netperf -t TCP_STREAM -H 192.168.20.2 -l 60
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    60.00    7586.85

Rx side CPU usage:
  %usr      %sys      %irq     %soft     %idle
  0.50     25.83      0.00     59.53     14.14

[ Register VLAN offloads with priority 10 -DaveM ]

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index a40d29846ac2..67ce5bd3b56a 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -628,4 +628,24 @@ static inline netdev_features_t vlan_features_check(const struct sk_buff *skb,
 	return features;
 }
 
+/**
+ * compare_vlan_header - Compare two vlan headers
+ * @h1: Pointer to vlan header
+ * @h2: Pointer to vlan header
+ *
+ * Compare two vlan headers, returns 0 if equal.
+ *
+ * Please note that alignment of h1 & h2 are only guaranteed to be 16 bits.
+ */
+static inline unsigned long compare_vlan_header(const struct vlan_hdr *h1,
+						const struct vlan_hdr *h2)
+{
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+	return *(u32 *)h1 ^ *(u32 *)h2;
+#else
+	return ((__force u32)h1->h_vlan_TCI ^ (__force u32)h2->h_vlan_TCI) |
+	       ((__force u32)h1->h_vlan_encapsulated_proto ^
+		(__force u32)h2->h_vlan_encapsulated_proto);
+#endif
+}
 #endif /* !(_LINUX_IF_VLAN_H_) */
-- 
cgit v1.2.3


From 3434d23b694e5cb6e44e966914563406c31c4053 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 21 May 2015 13:33:45 +0530
Subject: clockevents: Add helpers to check the state of a clockevent device

Some clockevent drivers, once migrated to use per-state callbacks,
need to check the state of the clockevent device in their callbacks or
interrupt handler.

Add accessor functions clockevent_state_*() to get this information.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: linaro-kernel@lists.linaro.org
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/04a717d490335c688dd7af899fbcede97e1bb8ee.1432192527.git.viresh.kumar@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clockchips.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 271fa4c8eb29..64214ad85af9 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -149,6 +149,32 @@ struct clock_event_device {
 	struct module		*owner;
 } ____cacheline_aligned;
 
+/* Helpers to verify state of a clockevent device */
+static inline bool clockevent_state_detached(struct clock_event_device *dev)
+{
+	return dev->state == CLOCK_EVT_STATE_DETACHED;
+}
+
+static inline bool clockevent_state_shutdown(struct clock_event_device *dev)
+{
+	return dev->state == CLOCK_EVT_STATE_SHUTDOWN;
+}
+
+static inline bool clockevent_state_periodic(struct clock_event_device *dev)
+{
+	return dev->state == CLOCK_EVT_STATE_PERIODIC;
+}
+
+static inline bool clockevent_state_oneshot(struct clock_event_device *dev)
+{
+	return dev->state == CLOCK_EVT_STATE_ONESHOT;
+}
+
+static inline bool clockevent_state_oneshot_stopped(struct clock_event_device *dev)
+{
+	return dev->state == CLOCK_EVT_STATE_ONESHOT_STOPPED;
+}
+
 /*
  * Calculate a multiplication factor for scaled math, which is used to convert
  * nanoseconds based values to clock ticks:
-- 
cgit v1.2.3


From a97e2d86a9b88ea9e9a280b594b80f0eec2c955b Mon Sep 17 00:00:00 2001
From: Ira Weiny <ira.weiny@intel.com>
Date: Sun, 31 May 2015 17:15:30 -0400
Subject: IB/core cleanup: Add const on args - device->process_mad

The process_mad device function declares some parameters as "in".  Make those
parameters const and adjust the call tree under process_mad in the various
drivers accordingly.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Hal Rosenstock <hal@mellanox.com>
Reviewed-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/linux/mlx5/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 9a90e7523dc2..9ec7c93d6fa3 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -696,7 +696,7 @@ int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
 			     u32 *mkey);
 int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
 int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn);
-int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, void *inb, void *outb,
+int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
 		      u16 opmod, u8 port);
 void mlx5_pagealloc_init(struct mlx5_core_dev *dev);
 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev);
-- 
cgit v1.2.3


From 11f81becca04bb7d2826a9b65bb8d27b0a1bb543 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:15 -0400
Subject: page_writeback: revive cancel_dirty_page() in a restricted form

cancel_dirty_page() had some issues and b9ea25152e56 ("page_writeback:
clean up mess around cancel_dirty_page()") replaced it with
account_page_cleaned() which makes the caller responsible for clearing
the dirty bit; unfortunately, the planned changes for cgroup writeback
support requires synchronization between dirty bit manipulation and
stat updates.  While we can open-code such synchronization in each
account_page_cleaned() callsite, that's gonna be unnecessarily awkward
and verbose.

This patch revives cancel_dirty_page() but in a more restricted form.
All it does is TestClearPageDirty() followed by account_page_cleaned()
invocation if the page was dirty.  This helper covers all
account_page_cleaned() usages except for __delete_from_page_cache()
which is a special case anyway and left alone.  As this leaves no
module user for account_page_cleaned(), EXPORT_SYMBOL() is dropped
from it.

This patch just revives cancel_dirty_page() as a trivial wrapper to
replace equivalent usages and doesn't introduce any functional
changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0755b9fd03a7..a83cf3a6f78e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1215,6 +1215,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping);
 void account_page_cleaned(struct page *page, struct address_space *mapping);
 int set_page_dirty(struct page *page);
 int set_page_dirty_lock(struct page *page);
+void cancel_dirty_page(struct page *page);
 int clear_page_dirty_for_io(struct page *page);
 
 int get_cmdline(struct task_struct *task, char *buffer, int buflen);
-- 
cgit v1.2.3


From c4843a7593a9df3ff5b1806084cefdfa81dd7c79 Mon Sep 17 00:00:00 2001
From: Greg Thelen <gthelen@google.com>
Date: Fri, 22 May 2015 17:13:16 -0400
Subject: memcg: add per cgroup dirty page accounting

When modifying PG_Dirty on cached file pages, update the new
MEM_CGROUP_STAT_DIRTY counter.  This is done in the same places where
global NR_FILE_DIRTY is managed.  The new memcg stat is visible in the
per memcg memory.stat cgroupfs file.  The most recent past attempt at
this was http://thread.gmane.org/gmane.linux.kernel.cgroups/8632

The new accounting supports future efforts to add per cgroup dirty
page throttling and writeback.  It also helps an administrator break
down a container's memory usage and provides evidence to understand
memcg oom kills (the new dirty count is included in memcg oom kill
messages).

The ability to move page accounting between memcg
(memory.move_charge_at_immigrate) makes this accounting more
complicated than the global counter.  The existing
mem_cgroup_{begin,end}_page_stat() lock is used to serialize move
accounting with stat updates.
Typical update operation:
	memcg = mem_cgroup_begin_page_stat(page)
	if (TestSetPageDirty()) {
		[...]
		mem_cgroup_update_page_stat(memcg)
	}
	mem_cgroup_end_page_stat(memcg)

Summary of mem_cgroup_end_page_stat() overhead:
- Without CONFIG_MEMCG it's a no-op
- With CONFIG_MEMCG and no inter memcg task movement, it's just
  rcu_read_lock()
- With CONFIG_MEMCG and inter memcg  task movement, it's
  rcu_read_lock() + spin_lock_irqsave()

A memcg parameter is added to several routines because their callers
now grab mem_cgroup_begin_page_stat() which returns the memcg later
needed by for mem_cgroup_update_page_stat().

Because mem_cgroup_begin_page_stat() may disable interrupts, some
adjustments are needed:
- move __mark_inode_dirty() from __set_page_dirty() to its caller.
  __mark_inode_dirty() locking does not want interrupts disabled.
- use spin_lock_irqsave(tree_lock) rather than spin_lock_irq() in
  __delete_from_page_cache(), replace_page_cache_page(),
  invalidate_complete_page2(), and __remove_mapping().

   text    data     bss      dec    hex filename
8925147 1774832 1785856 12485835 be84cb vmlinux-!CONFIG_MEMCG-before
8925339 1774832 1785856 12486027 be858b vmlinux-!CONFIG_MEMCG-after
                            +192 text bytes
8965977 1784992 1785856 12536825 bf4bf9 vmlinux-CONFIG_MEMCG-before
8966750 1784992 1785856 12537598 bf4efe vmlinux-CONFIG_MEMCG-after
                            +773 text bytes

Performance tests run on v4.0-rc1-36-g4f671fe2f952.  Lower is better for
all metrics, they're all wall clock or cycle counts.  The read and write
fault benchmarks just measure fault time, they do not include I/O time.

* CONFIG_MEMCG not set:
                            baseline                              patched
  kbuild                 1m25.030000(+-0.088% 3 samples)       1m25.426667(+-0.120% 3 samples)
  dd write 100 MiB          0.859211561 +-15.10%                  0.874162885 +-15.03%
  dd write 200 MiB          1.670653105 +-17.87%                  1.669384764 +-11.99%
  dd write 1000 MiB         8.434691190 +-14.15%                  8.474733215 +-14.77%
  read fault cycles       254.0(+-0.000% 10 samples)            253.0(+-0.000% 10 samples)
  write fault cycles     2021.2(+-3.070% 10 samples)           1984.5(+-1.036% 10 samples)

* CONFIG_MEMCG=y root_memcg:
                            baseline                              patched
  kbuild                 1m25.716667(+-0.105% 3 samples)       1m25.686667(+-0.153% 3 samples)
  dd write 100 MiB          0.855650830 +-14.90%                  0.887557919 +-14.90%
  dd write 200 MiB          1.688322953 +-12.72%                  1.667682724 +-13.33%
  dd write 1000 MiB         8.418601605 +-14.30%                  8.673532299 +-15.00%
  read fault cycles       266.0(+-0.000% 10 samples)            266.0(+-0.000% 10 samples)
  write fault cycles     2051.7(+-1.349% 10 samples)           2049.6(+-1.686% 10 samples)

* CONFIG_MEMCG=y non-root_memcg:
                            baseline                              patched
  kbuild                 1m26.120000(+-0.273% 3 samples)       1m25.763333(+-0.127% 3 samples)
  dd write 100 MiB          0.861723964 +-15.25%                  0.818129350 +-14.82%
  dd write 200 MiB          1.669887569 +-13.30%                  1.698645885 +-13.27%
  dd write 1000 MiB         8.383191730 +-14.65%                  8.351742280 +-14.52%
  read fault cycles       265.7(+-0.172% 10 samples)            267.0(+-0.000% 10 samples)
  write fault cycles     2070.6(+-1.512% 10 samples)           2084.4(+-2.148% 10 samples)

As expected anon page faults are not affected by this patch.

tj: Updated to apply on top of the recent cancel_dirty_page() changes.

Signed-off-by: Sha Zhengju <handai.szj@gmail.com>
Signed-off-by: Greg Thelen <gthelen@google.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/memcontrol.h | 1 +
 include/linux/mm.h         | 6 ++++--
 include/linux/pagemap.h    | 3 ++-
 3 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 72dff5fb0d0c..5fe6411b5e54 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -41,6 +41,7 @@ enum mem_cgroup_stat_index {
 	MEM_CGROUP_STAT_RSS,		/* # of pages charged as anon rss */
 	MEM_CGROUP_STAT_RSS_HUGE,	/* # of pages charged as anon huge */
 	MEM_CGROUP_STAT_FILE_MAPPED,	/* # of pages charged as file rss */
+	MEM_CGROUP_STAT_DIRTY,          /* # of dirty pages in page cache */
 	MEM_CGROUP_STAT_WRITEBACK,	/* # of pages under writeback */
 	MEM_CGROUP_STAT_SWAP,		/* # of pages, swapped out */
 	MEM_CGROUP_STAT_NSTATS,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a83cf3a6f78e..f48d979ced4b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1211,8 +1211,10 @@ int __set_page_dirty_nobuffers(struct page *page);
 int __set_page_dirty_no_writeback(struct page *page);
 int redirty_page_for_writepage(struct writeback_control *wbc,
 				struct page *page);
-void account_page_dirtied(struct page *page, struct address_space *mapping);
-void account_page_cleaned(struct page *page, struct address_space *mapping);
+void account_page_dirtied(struct page *page, struct address_space *mapping,
+			  struct mem_cgroup *memcg);
+void account_page_cleaned(struct page *page, struct address_space *mapping,
+			  struct mem_cgroup *memcg);
 int set_page_dirty(struct page *page);
 int set_page_dirty_lock(struct page *page);
 void cancel_dirty_page(struct page *page);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 4b3736f7065c..fb0814ca65c7 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -651,7 +651,8 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 				pgoff_t index, gfp_t gfp_mask);
 extern void delete_from_page_cache(struct page *page);
-extern void __delete_from_page_cache(struct page *page, void *shadow);
+extern void __delete_from_page_cache(struct page *page, void *shadow,
+				     struct mem_cgroup *memcg);
 int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
 
 /*
-- 
cgit v1.2.3


From eea8f41cc58849e354ecf8b95bd7f806e1d1f703 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:17 -0400
Subject: blkcg: move block/blk-cgroup.h to include/linux/blk-cgroup.h

cgroup aware writeback support will require exposing some of blkcg
details.  In preprataion, move block/blk-cgroup.h to
include/linux/blk-cgroup.h.  This patch is pure file move.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk-cgroup.h | 603 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 603 insertions(+)
 create mode 100644 include/linux/blk-cgroup.h

(limited to 'include/linux')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
new file mode 100644
index 000000000000..c567865b5f1d
--- /dev/null
+++ b/include/linux/blk-cgroup.h
@@ -0,0 +1,603 @@
+#ifndef _BLK_CGROUP_H
+#define _BLK_CGROUP_H
+/*
+ * Common Block IO controller cgroup interface
+ *
+ * Based on ideas and code from CFQ, CFS and BFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+ *
+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+ *		      Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
+ * 	              Nauman Rafique <nauman@google.com>
+ */
+
+#include <linux/cgroup.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/seq_file.h>
+#include <linux/radix-tree.h>
+#include <linux/blkdev.h>
+#include <linux/atomic.h>
+
+/* Max limits for throttle policy */
+#define THROTL_IOPS_MAX		UINT_MAX
+
+/* CFQ specific, out here for blkcg->cfq_weight */
+#define CFQ_WEIGHT_MIN		10
+#define CFQ_WEIGHT_MAX		1000
+#define CFQ_WEIGHT_DEFAULT	500
+
+#ifdef CONFIG_BLK_CGROUP
+
+enum blkg_rwstat_type {
+	BLKG_RWSTAT_READ,
+	BLKG_RWSTAT_WRITE,
+	BLKG_RWSTAT_SYNC,
+	BLKG_RWSTAT_ASYNC,
+
+	BLKG_RWSTAT_NR,
+	BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
+};
+
+struct blkcg_gq;
+
+struct blkcg {
+	struct cgroup_subsys_state	css;
+	spinlock_t			lock;
+
+	struct radix_tree_root		blkg_tree;
+	struct blkcg_gq			*blkg_hint;
+	struct hlist_head		blkg_list;
+
+	/* TODO: per-policy storage in blkcg */
+	unsigned int			cfq_weight;	/* belongs to cfq */
+	unsigned int			cfq_leaf_weight;
+};
+
+struct blkg_stat {
+	struct u64_stats_sync		syncp;
+	uint64_t			cnt;
+};
+
+struct blkg_rwstat {
+	struct u64_stats_sync		syncp;
+	uint64_t			cnt[BLKG_RWSTAT_NR];
+};
+
+/*
+ * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
+ * request_queue (q).  This is used by blkcg policies which need to track
+ * information per blkcg - q pair.
+ *
+ * There can be multiple active blkcg policies and each has its private
+ * data on each blkg, the size of which is determined by
+ * blkcg_policy->pd_size.  blkcg core allocates and frees such areas
+ * together with blkg and invokes pd_init/exit_fn() methods.
+ *
+ * Such private data must embed struct blkg_policy_data (pd) at the
+ * beginning and pd_size can't be smaller than pd.
+ */
+struct blkg_policy_data {
+	/* the blkg and policy id this per-policy data belongs to */
+	struct blkcg_gq			*blkg;
+	int				plid;
+
+	/* used during policy activation */
+	struct list_head		alloc_node;
+};
+
+/* association between a blk cgroup and a request queue */
+struct blkcg_gq {
+	/* Pointer to the associated request_queue */
+	struct request_queue		*q;
+	struct list_head		q_node;
+	struct hlist_node		blkcg_node;
+	struct blkcg			*blkcg;
+
+	/* all non-root blkcg_gq's are guaranteed to have access to parent */
+	struct blkcg_gq			*parent;
+
+	/* request allocation list for this blkcg-q pair */
+	struct request_list		rl;
+
+	/* reference count */
+	atomic_t			refcnt;
+
+	/* is this blkg online? protected by both blkcg and q locks */
+	bool				online;
+
+	struct blkg_policy_data		*pd[BLKCG_MAX_POLS];
+
+	struct rcu_head			rcu_head;
+};
+
+typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg);
+typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg);
+typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg);
+typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg);
+typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg);
+
+struct blkcg_policy {
+	int				plid;
+	/* policy specific private data size */
+	size_t				pd_size;
+	/* cgroup files for the policy */
+	struct cftype			*cftypes;
+
+	/* operations */
+	blkcg_pol_init_pd_fn		*pd_init_fn;
+	blkcg_pol_online_pd_fn		*pd_online_fn;
+	blkcg_pol_offline_pd_fn		*pd_offline_fn;
+	blkcg_pol_exit_pd_fn		*pd_exit_fn;
+	blkcg_pol_reset_pd_stats_fn	*pd_reset_stats_fn;
+};
+
+extern struct blkcg blkcg_root;
+
+struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q);
+struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
+				    struct request_queue *q);
+int blkcg_init_queue(struct request_queue *q);
+void blkcg_drain_queue(struct request_queue *q);
+void blkcg_exit_queue(struct request_queue *q);
+
+/* Blkio controller policy registration */
+int blkcg_policy_register(struct blkcg_policy *pol);
+void blkcg_policy_unregister(struct blkcg_policy *pol);
+int blkcg_activate_policy(struct request_queue *q,
+			  const struct blkcg_policy *pol);
+void blkcg_deactivate_policy(struct request_queue *q,
+			     const struct blkcg_policy *pol);
+
+void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
+		       u64 (*prfill)(struct seq_file *,
+				     struct blkg_policy_data *, int),
+		       const struct blkcg_policy *pol, int data,
+		       bool show_total);
+u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
+u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
+			 const struct blkg_rwstat *rwstat);
+u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off);
+u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
+		       int off);
+
+u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off);
+struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
+					     int off);
+
+struct blkg_conf_ctx {
+	struct gendisk			*disk;
+	struct blkcg_gq			*blkg;
+	u64				v;
+};
+
+int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
+		   const char *input, struct blkg_conf_ctx *ctx);
+void blkg_conf_finish(struct blkg_conf_ctx *ctx);
+
+
+static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
+{
+	return css ? container_of(css, struct blkcg, css) : NULL;
+}
+
+static inline struct blkcg *task_blkcg(struct task_struct *tsk)
+{
+	return css_to_blkcg(task_css(tsk, blkio_cgrp_id));
+}
+
+static inline struct blkcg *bio_blkcg(struct bio *bio)
+{
+	if (bio && bio->bi_css)
+		return css_to_blkcg(bio->bi_css);
+	return task_blkcg(current);
+}
+
+/**
+ * blkcg_parent - get the parent of a blkcg
+ * @blkcg: blkcg of interest
+ *
+ * Return the parent blkcg of @blkcg.  Can be called anytime.
+ */
+static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
+{
+	return css_to_blkcg(blkcg->css.parent);
+}
+
+/**
+ * blkg_to_pdata - get policy private data
+ * @blkg: blkg of interest
+ * @pol: policy of interest
+ *
+ * Return pointer to private data associated with the @blkg-@pol pair.
+ */
+static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
+						  struct blkcg_policy *pol)
+{
+	return blkg ? blkg->pd[pol->plid] : NULL;
+}
+
+/**
+ * pdata_to_blkg - get blkg associated with policy private data
+ * @pd: policy private data of interest
+ *
+ * @pd is policy private data.  Determine the blkg it's associated with.
+ */
+static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
+{
+	return pd ? pd->blkg : NULL;
+}
+
+/**
+ * blkg_path - format cgroup path of blkg
+ * @blkg: blkg of interest
+ * @buf: target buffer
+ * @buflen: target buffer length
+ *
+ * Format the path of the cgroup of @blkg into @buf.
+ */
+static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
+{
+	char *p;
+
+	p = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
+	if (!p) {
+		strncpy(buf, "<unavailable>", buflen);
+		return -ENAMETOOLONG;
+	}
+
+	memmove(buf, p, buf + buflen - p);
+	return 0;
+}
+
+/**
+ * blkg_get - get a blkg reference
+ * @blkg: blkg to get
+ *
+ * The caller should be holding an existing reference.
+ */
+static inline void blkg_get(struct blkcg_gq *blkg)
+{
+	WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
+	atomic_inc(&blkg->refcnt);
+}
+
+void __blkg_release_rcu(struct rcu_head *rcu);
+
+/**
+ * blkg_put - put a blkg reference
+ * @blkg: blkg to put
+ */
+static inline void blkg_put(struct blkcg_gq *blkg)
+{
+	WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
+	if (atomic_dec_and_test(&blkg->refcnt))
+		call_rcu(&blkg->rcu_head, __blkg_release_rcu);
+}
+
+struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
+			       bool update_hint);
+
+/**
+ * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
+ * @d_blkg: loop cursor pointing to the current descendant
+ * @pos_css: used for iteration
+ * @p_blkg: target blkg to walk descendants of
+ *
+ * Walk @c_blkg through the descendants of @p_blkg.  Must be used with RCU
+ * read locked.  If called under either blkcg or queue lock, the iteration
+ * is guaranteed to include all and only online blkgs.  The caller may
+ * update @pos_css by calling css_rightmost_descendant() to skip subtree.
+ * @p_blkg is included in the iteration and the first node to be visited.
+ */
+#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg)		\
+	css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css)	\
+		if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),	\
+					      (p_blkg)->q, false)))
+
+/**
+ * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
+ * @d_blkg: loop cursor pointing to the current descendant
+ * @pos_css: used for iteration
+ * @p_blkg: target blkg to walk descendants of
+ *
+ * Similar to blkg_for_each_descendant_pre() but performs post-order
+ * traversal instead.  Synchronization rules are the same.  @p_blkg is
+ * included in the iteration and the last node to be visited.
+ */
+#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg)		\
+	css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css)	\
+		if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),	\
+					      (p_blkg)->q, false)))
+
+/**
+ * blk_get_rl - get request_list to use
+ * @q: request_queue of interest
+ * @bio: bio which will be attached to the allocated request (may be %NULL)
+ *
+ * The caller wants to allocate a request from @q to use for @bio.  Find
+ * the request_list to use and obtain a reference on it.  Should be called
+ * under queue_lock.  This function is guaranteed to return non-%NULL
+ * request_list.
+ */
+static inline struct request_list *blk_get_rl(struct request_queue *q,
+					      struct bio *bio)
+{
+	struct blkcg *blkcg;
+	struct blkcg_gq *blkg;
+
+	rcu_read_lock();
+
+	blkcg = bio_blkcg(bio);
+
+	/* bypass blkg lookup and use @q->root_rl directly for root */
+	if (blkcg == &blkcg_root)
+		goto root_rl;
+
+	/*
+	 * Try to use blkg->rl.  blkg lookup may fail under memory pressure
+	 * or if either the blkcg or queue is going away.  Fall back to
+	 * root_rl in such cases.
+	 */
+	blkg = blkg_lookup_create(blkcg, q);
+	if (unlikely(IS_ERR(blkg)))
+		goto root_rl;
+
+	blkg_get(blkg);
+	rcu_read_unlock();
+	return &blkg->rl;
+root_rl:
+	rcu_read_unlock();
+	return &q->root_rl;
+}
+
+/**
+ * blk_put_rl - put request_list
+ * @rl: request_list to put
+ *
+ * Put the reference acquired by blk_get_rl().  Should be called under
+ * queue_lock.
+ */
+static inline void blk_put_rl(struct request_list *rl)
+{
+	/* root_rl may not have blkg set */
+	if (rl->blkg && rl->blkg->blkcg != &blkcg_root)
+		blkg_put(rl->blkg);
+}
+
+/**
+ * blk_rq_set_rl - associate a request with a request_list
+ * @rq: request of interest
+ * @rl: target request_list
+ *
+ * Associate @rq with @rl so that accounting and freeing can know the
+ * request_list @rq came from.
+ */
+static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl)
+{
+	rq->rl = rl;
+}
+
+/**
+ * blk_rq_rl - return the request_list a request came from
+ * @rq: request of interest
+ *
+ * Return the request_list @rq is allocated from.
+ */
+static inline struct request_list *blk_rq_rl(struct request *rq)
+{
+	return rq->rl;
+}
+
+struct request_list *__blk_queue_next_rl(struct request_list *rl,
+					 struct request_queue *q);
+/**
+ * blk_queue_for_each_rl - iterate through all request_lists of a request_queue
+ *
+ * Should be used under queue_lock.
+ */
+#define blk_queue_for_each_rl(rl, q)	\
+	for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
+
+static inline void blkg_stat_init(struct blkg_stat *stat)
+{
+	u64_stats_init(&stat->syncp);
+}
+
+/**
+ * blkg_stat_add - add a value to a blkg_stat
+ * @stat: target blkg_stat
+ * @val: value to add
+ *
+ * Add @val to @stat.  The caller is responsible for synchronizing calls to
+ * this function.
+ */
+static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
+{
+	u64_stats_update_begin(&stat->syncp);
+	stat->cnt += val;
+	u64_stats_update_end(&stat->syncp);
+}
+
+/**
+ * blkg_stat_read - read the current value of a blkg_stat
+ * @stat: blkg_stat to read
+ *
+ * Read the current value of @stat.  This function can be called without
+ * synchroniztion and takes care of u64 atomicity.
+ */
+static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
+{
+	unsigned int start;
+	uint64_t v;
+
+	do {
+		start = u64_stats_fetch_begin_irq(&stat->syncp);
+		v = stat->cnt;
+	} while (u64_stats_fetch_retry_irq(&stat->syncp, start));
+
+	return v;
+}
+
+/**
+ * blkg_stat_reset - reset a blkg_stat
+ * @stat: blkg_stat to reset
+ */
+static inline void blkg_stat_reset(struct blkg_stat *stat)
+{
+	stat->cnt = 0;
+}
+
+/**
+ * blkg_stat_merge - merge a blkg_stat into another
+ * @to: the destination blkg_stat
+ * @from: the source
+ *
+ * Add @from's count to @to.
+ */
+static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from)
+{
+	blkg_stat_add(to, blkg_stat_read(from));
+}
+
+static inline void blkg_rwstat_init(struct blkg_rwstat *rwstat)
+{
+	u64_stats_init(&rwstat->syncp);
+}
+
+/**
+ * blkg_rwstat_add - add a value to a blkg_rwstat
+ * @rwstat: target blkg_rwstat
+ * @rw: mask of REQ_{WRITE|SYNC}
+ * @val: value to add
+ *
+ * Add @val to @rwstat.  The counters are chosen according to @rw.  The
+ * caller is responsible for synchronizing calls to this function.
+ */
+static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
+				   int rw, uint64_t val)
+{
+	u64_stats_update_begin(&rwstat->syncp);
+
+	if (rw & REQ_WRITE)
+		rwstat->cnt[BLKG_RWSTAT_WRITE] += val;
+	else
+		rwstat->cnt[BLKG_RWSTAT_READ] += val;
+	if (rw & REQ_SYNC)
+		rwstat->cnt[BLKG_RWSTAT_SYNC] += val;
+	else
+		rwstat->cnt[BLKG_RWSTAT_ASYNC] += val;
+
+	u64_stats_update_end(&rwstat->syncp);
+}
+
+/**
+ * blkg_rwstat_read - read the current values of a blkg_rwstat
+ * @rwstat: blkg_rwstat to read
+ *
+ * Read the current snapshot of @rwstat and return it as the return value.
+ * This function can be called without synchronization and takes care of
+ * u64 atomicity.
+ */
+static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
+{
+	unsigned int start;
+	struct blkg_rwstat tmp;
+
+	do {
+		start = u64_stats_fetch_begin_irq(&rwstat->syncp);
+		tmp = *rwstat;
+	} while (u64_stats_fetch_retry_irq(&rwstat->syncp, start));
+
+	return tmp;
+}
+
+/**
+ * blkg_rwstat_total - read the total count of a blkg_rwstat
+ * @rwstat: blkg_rwstat to read
+ *
+ * Return the total count of @rwstat regardless of the IO direction.  This
+ * function can be called without synchronization and takes care of u64
+ * atomicity.
+ */
+static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
+{
+	struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);
+
+	return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE];
+}
+
+/**
+ * blkg_rwstat_reset - reset a blkg_rwstat
+ * @rwstat: blkg_rwstat to reset
+ */
+static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
+{
+	memset(rwstat->cnt, 0, sizeof(rwstat->cnt));
+}
+
+/**
+ * blkg_rwstat_merge - merge a blkg_rwstat into another
+ * @to: the destination blkg_rwstat
+ * @from: the source
+ *
+ * Add @from's counts to @to.
+ */
+static inline void blkg_rwstat_merge(struct blkg_rwstat *to,
+				     struct blkg_rwstat *from)
+{
+	struct blkg_rwstat v = blkg_rwstat_read(from);
+	int i;
+
+	u64_stats_update_begin(&to->syncp);
+	for (i = 0; i < BLKG_RWSTAT_NR; i++)
+		to->cnt[i] += v.cnt[i];
+	u64_stats_update_end(&to->syncp);
+}
+
+#else	/* CONFIG_BLK_CGROUP */
+
+struct cgroup;
+struct blkcg;
+
+struct blkg_policy_data {
+};
+
+struct blkcg_gq {
+};
+
+struct blkcg_policy {
+};
+
+static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
+static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
+static inline void blkcg_drain_queue(struct request_queue *q) { }
+static inline void blkcg_exit_queue(struct request_queue *q) { }
+static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
+static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
+static inline int blkcg_activate_policy(struct request_queue *q,
+					const struct blkcg_policy *pol) { return 0; }
+static inline void blkcg_deactivate_policy(struct request_queue *q,
+					   const struct blkcg_policy *pol) { }
+
+static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
+
+static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
+						  struct blkcg_policy *pol) { return NULL; }
+static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
+static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
+static inline void blkg_get(struct blkcg_gq *blkg) { }
+static inline void blkg_put(struct blkcg_gq *blkg) { }
+
+static inline struct request_list *blk_get_rl(struct request_queue *q,
+					      struct bio *bio) { return &q->root_rl; }
+static inline void blk_put_rl(struct request_list *rl) { }
+static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
+static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
+
+#define blk_queue_for_each_rl(rl, q)	\
+	for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
+
+#endif	/* CONFIG_BLK_CGROUP */
+#endif	/* _BLK_CGROUP_H */
-- 
cgit v1.2.3


From efa7d1c733d1d2c1a468b85126d70bad9fdf6ba8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:18 -0400
Subject: update !CONFIG_BLK_CGROUP dummies in include/linux/blk-cgroup.h

The header file will be used more widely with the pending cgroup
writeback support and the current set of dummy declarations aren't
enough to handle different config combinations.  Update as follows.

* Drop the struct cgroup declaration.  None of the dummy defs need it.

* Define blkcg as an empty struct instead of just declaring it.

* Wrap dummy function defs in CONFIG_BLOCK.  Some functions use block
  data types and none of them are to be used w/o block enabled.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk-cgroup.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index c567865b5f1d..51f95b34d3f0 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -558,8 +558,8 @@ static inline void blkg_rwstat_merge(struct blkg_rwstat *to,
 
 #else	/* CONFIG_BLK_CGROUP */
 
-struct cgroup;
-struct blkcg;
+struct blkcg {
+};
 
 struct blkg_policy_data {
 };
@@ -570,6 +570,8 @@ struct blkcg_gq {
 struct blkcg_policy {
 };
 
+#ifdef CONFIG_BLOCK
+
 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
 static inline void blkcg_drain_queue(struct request_queue *q) { }
@@ -599,5 +601,6 @@ static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q
 #define blk_queue_for_each_rl(rl, q)	\
 	for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
 
+#endif	/* CONFIG_BLOCK */
 #endif	/* CONFIG_BLK_CGROUP */
 #endif	/* _BLK_CGROUP_H */
-- 
cgit v1.2.3


From 56161634e4824380a67243a4cf3fa52eb1e5d836 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:20 -0400
Subject: memcg: add mem_cgroup_root_css

Add global mem_cgroup_root_css which points to the root memcg css.
This will be used by cgroup writeback support.  If memcg is disabled,
it's defined as ERR_PTR(-EINVAL).

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
aCc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/memcontrol.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5fe6411b5e54..294498f4f6fc 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -68,6 +68,8 @@ enum mem_cgroup_events_index {
 };
 
 #ifdef CONFIG_MEMCG
+extern struct cgroup_subsys_state *mem_cgroup_root_css;
+
 void mem_cgroup_events(struct mem_cgroup *memcg,
 		       enum mem_cgroup_events_index idx,
 		       unsigned int nr);
@@ -196,6 +198,8 @@ void mem_cgroup_split_huge_fixup(struct page *head);
 #else /* CONFIG_MEMCG */
 struct mem_cgroup;
 
+#define mem_cgroup_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
+
 static inline void mem_cgroup_events(struct mem_cgroup *memcg,
 				     enum mem_cgroup_events_index idx,
 				     unsigned int nr)
-- 
cgit v1.2.3


From 496d5e7560dbb84399dbd92316fc33857aa83900 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:21 -0400
Subject: blkcg: add blkcg_root_css

Add global constant blkcg_root_css which points to &blkcg_root.css.
This will be used by cgroup writeback support.  If blkcg is disabled,
it's defined as ERR_PTR(-EINVAL).

v2: The declarations moved to include/linux/blk-cgroup.h as suggested
    by Vivek.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk-cgroup.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 51f95b34d3f0..65f0c178fd04 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -134,6 +134,7 @@ struct blkcg_policy {
 };
 
 extern struct blkcg blkcg_root;
+extern struct cgroup_subsys_state * const blkcg_root_css;
 
 struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q);
 struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
@@ -570,6 +571,8 @@ struct blkcg_gq {
 struct blkcg_policy {
 };
 
+#define blkcg_root_css	((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
+
 #ifdef CONFIG_BLOCK
 
 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
-- 
cgit v1.2.3


From ec438699a9ae0856c2ce20a50dd39cdc7e92a732 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:22 -0400
Subject: cgroup, block: implement task_get_css() and use it in
 bio_associate_current()

bio_associate_current() currently open codes task_css() and
css_tryget_online() to find and pin $current's blkcg css.  Abstract it
into task_get_css() which is implemented from cgroup side.  As a task
is always associated with an online css for every subsystem except
while the css_set update is propagating, task_get_css() retries till
css_tryget_online() succeeds.

This is a cleanup and shouldn't lead to noticeable behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/cgroup.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b9cb94c3102a..e7da0aa65b2d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -773,6 +773,31 @@ static inline struct cgroup_subsys_state *task_css(struct task_struct *task,
 	return task_css_check(task, subsys_id, false);
 }
 
+/**
+ * task_get_css - find and get the css for (task, subsys)
+ * @task: the target task
+ * @subsys_id: the target subsystem ID
+ *
+ * Find the css for the (@task, @subsys_id) combination, increment a
+ * reference on and return it.  This function is guaranteed to return a
+ * valid css.
+ */
+static inline struct cgroup_subsys_state *
+task_get_css(struct task_struct *task, int subsys_id)
+{
+	struct cgroup_subsys_state *css;
+
+	rcu_read_lock();
+	while (true) {
+		css = task_css(task, subsys_id);
+		if (likely(css_tryget_online(css)))
+			break;
+		cpu_relax();
+	}
+	rcu_read_unlock();
+	return css;
+}
+
 /**
  * task_css_is_root - test whether a task belongs to the root css
  * @task: the target task
-- 
cgit v1.2.3


From fd383c2d3cae146337cea809de0d622b8b887e6c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:23 -0400
Subject: blkcg: implement task_get_blkcg_css()

Implement a wrapper around task_get_css() to acquire the blkcg css for
a given task.  The wrapper is necessary for cgroup writeback support
as there will be places outside blkcg proper trying to acquire
blkcg_css and blkio_cgrp_id will be undefined when !CONFIG_BLK_CGROUP.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk-cgroup.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 65f0c178fd04..4dc643f2046e 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -195,6 +195,12 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
 	return task_blkcg(current);
 }
 
+static inline struct cgroup_subsys_state *
+task_get_blkcg_css(struct task_struct *task)
+{
+	return task_get_css(task, blkio_cgrp_id);
+}
+
 /**
  * blkcg_parent - get the parent of a blkcg
  * @blkcg: blkcg of interest
@@ -573,6 +579,12 @@ struct blkcg_policy {
 
 #define blkcg_root_css	((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
 
+static inline struct cgroup_subsys_state *
+task_get_blkcg_css(struct task_struct *task)
+{
+	return NULL;
+}
+
 #ifdef CONFIG_BLOCK
 
 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
-- 
cgit v1.2.3


From 1d933cf096e3aea15f1aec8297657b7a846fab63 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:24 -0400
Subject: blkcg: implement bio_associate_blkcg()

Currently, a bio can only be associated with the io_context and blkcg
of %current using bio_associate_current().  This is too restrictive
for cgroup writeback support.  Implement bio_associate_blkcg() which
associates a bio with the specified blkcg.

bio_associate_blkcg() leaves the io_context unassociated.
bio_associate_current() is updated so that it considers a bio as
already associated if it has a blkcg_css, instead of an io_context,
associated with it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/bio.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index f0291cf64cc5..5e963a6d7c14 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -482,9 +482,12 @@ extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int);
 extern unsigned int bvec_nr_vecs(unsigned short idx);
 
 #ifdef CONFIG_BLK_CGROUP
+int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
 int bio_associate_current(struct bio *bio);
 void bio_disassociate_task(struct bio *bio);
 #else	/* CONFIG_BLK_CGROUP */
+static inline int bio_associate_blkcg(struct bio *bio,
+			struct cgroup_subsys_state *blkcg_css) { return 0; }
 static inline int bio_associate_current(struct bio *bio) { return -ENOENT; }
 static inline void bio_disassociate_task(struct bio *bio) { }
 #endif	/* CONFIG_BLK_CGROUP */
-- 
cgit v1.2.3


From ad7fa852d3d2816d68a138ebc5bc8967aeb7fd86 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 27 May 2015 20:00:02 -0400
Subject: memcg: implement mem_cgroup_css_from_page()

Implement mem_cgroup_css_from_page() which returns the
cgroup_subsys_state of the memcg associated with a given page on the
default hierarchy.  This will be used by cgroup writeback support.

This function assumes that page->mem_cgroup association doesn't change
until the page is released, which is true on the default hierarchy as
long as replace_page_cache_page() is not used.  As the only user of
replace_page_cache_page() is FUSE which won't support cgroup writeback
for the time being, this works for now, and replace_page_cache_page()
will soon be updated so that the invariant actually holds.

Note that the RCU protected page->mem_cgroup access is consistent with
other usages across memcg but ultimately incorrect.  These unlocked
accesses are missing required barriers.  page->mem_cgroup should be
made an RCU pointer and updated and accessed using RCU operations.

v4: Instead of triggering WARN, return the root css on the traditional
    hierarchies.  This makes the function a lot easier to deal with
    especially as there's no light way to synchronize against
    hierarchy rebinding.

v3: s/mem_cgroup_migrate()/mem_cgroup_css_from_page()/

v2: Trigger WARN if the function is used on the traditional
    hierarchies and add comment about the assumed invariant.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/memcontrol.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 294498f4f6fc..637ef626008e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -115,6 +115,7 @@ static inline bool mm_match_cgroup(struct mm_struct *mm,
 }
 
 extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
+extern struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page);
 
 struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
 				   struct mem_cgroup *,
-- 
cgit v1.2.3


From 4452226ea276e74fc3e252c88d9bb7e8f8e44bf0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:26 -0400
Subject: writeback: move backing_dev_info->state into bdi_writeback

Currently, a bdi (backing_dev_info) embeds single wb (bdi_writeback)
and the role of the separation is unclear.  For cgroup support for
writeback IOs, a bdi will be updated to host multiple wb's where each
wb serves writeback IOs of a different cgroup on the bdi.  To achieve
that, a wb should carry all states necessary for servicing writeback
IOs for a cgroup independently.

This patch moves bdi->state into wb.

* enum bdi_state is renamed to wb_state and the prefix of all enums is
  changed from BDI_ to WB_.

* Explicit zeroing of bdi->state is removed without adding zeoring of
  wb->state as the whole data structure is zeroed on init anyway.

* As there's still only one bdi_writeback per backing_dev_info, all
  uses of bdi->state are mechanically replaced with bdi->wb.state
  introducing no behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: drbd-dev@lists.linbit.com
Cc: Neil Brown <neilb@suse.de>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index aff923ae8c4b..eb14f988a63e 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -25,13 +25,13 @@ struct device;
 struct dentry;
 
 /*
- * Bits in backing_dev_info.state
+ * Bits in bdi_writeback.state
  */
-enum bdi_state {
-	BDI_async_congested,	/* The async (write) queue is getting full */
-	BDI_sync_congested,	/* The sync queue is getting full */
-	BDI_registered,		/* bdi_register() was done */
-	BDI_writeback_running,	/* Writeback is in progress */
+enum wb_state {
+	WB_async_congested,	/* The async (write) queue is getting full */
+	WB_sync_congested,	/* The sync queue is getting full */
+	WB_registered,		/* bdi_register() was done */
+	WB_writeback_running,	/* Writeback is in progress */
 };
 
 typedef int (congested_fn)(void *, int);
@@ -49,6 +49,7 @@ enum bdi_stat_item {
 struct bdi_writeback {
 	struct backing_dev_info *bdi;	/* our parent bdi */
 
+	unsigned long state;		/* Always use atomic bitops on this */
 	unsigned long last_old_flush;	/* last old data flush */
 
 	struct delayed_work dwork;	/* work item used for writeback */
@@ -62,7 +63,6 @@ struct bdi_writeback {
 struct backing_dev_info {
 	struct list_head bdi_list;
 	unsigned long ra_pages;	/* max readahead in PAGE_CACHE_SIZE units */
-	unsigned long state;	/* Always use atomic bitops on this */
 	unsigned int capabilities; /* Device capabilities */
 	congested_fn *congested_fn; /* Function pointer if device is md/dm */
 	void *congested_data;	/* Pointer to aux data for congested func */
@@ -250,23 +250,23 @@ static inline int bdi_congested(struct backing_dev_info *bdi, int bdi_bits)
 {
 	if (bdi->congested_fn)
 		return bdi->congested_fn(bdi->congested_data, bdi_bits);
-	return (bdi->state & bdi_bits);
+	return (bdi->wb.state & bdi_bits);
 }
 
 static inline int bdi_read_congested(struct backing_dev_info *bdi)
 {
-	return bdi_congested(bdi, 1 << BDI_sync_congested);
+	return bdi_congested(bdi, 1 << WB_sync_congested);
 }
 
 static inline int bdi_write_congested(struct backing_dev_info *bdi)
 {
-	return bdi_congested(bdi, 1 << BDI_async_congested);
+	return bdi_congested(bdi, 1 << WB_async_congested);
 }
 
 static inline int bdi_rw_congested(struct backing_dev_info *bdi)
 {
-	return bdi_congested(bdi, (1 << BDI_sync_congested) |
-				  (1 << BDI_async_congested));
+	return bdi_congested(bdi, (1 << WB_sync_congested) |
+				  (1 << WB_async_congested));
 }
 
 enum {
-- 
cgit v1.2.3


From 93f78d882865cb90020d0f80a9523c99cf46924c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:27 -0400
Subject: writeback: move backing_dev_info->bdi_stat[] into bdi_writeback

Currently, a bdi (backing_dev_info) embeds single wb (bdi_writeback)
and the role of the separation is unclear.  For cgroup support for
writeback IOs, a bdi will be updated to host multiple wb's where each
wb serves writeback IOs of a different cgroup on the bdi.  To achieve
that, a wb should carry all states necessary for servicing writeback
IOs for a cgroup independently.

This patch moves bdi->bdi_stat[] into wb.

* enum bdi_stat_item is renamed to wb_stat_item and the prefix of all
  enums is changed from BDI_ to WB_.

* BDI_STAT_BATCH() -> WB_STAT_BATCH()

* [__]{add|inc|dec|sum}_wb_stat(bdi, ...) -> [__]{add|inc}_wb_stat(wb, ...)

* bdi_stat[_error]() -> wb_stat[_error]()

* bdi_writeout_inc() -> wb_writeout_inc()

* stat init is moved to bdi_wb_init() and bdi_wb_exit() is added and
  frees stat.

* As there's still only one bdi_writeback per backing_dev_info, all
  uses of bdi->stat[] are mechanically replaced with bdi->wb.stat[]
  introducing no behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev.h | 68 +++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index eb14f988a63e..fe7a907a4e16 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -36,15 +36,15 @@ enum wb_state {
 
 typedef int (congested_fn)(void *, int);
 
-enum bdi_stat_item {
-	BDI_RECLAIMABLE,
-	BDI_WRITEBACK,
-	BDI_DIRTIED,
-	BDI_WRITTEN,
-	NR_BDI_STAT_ITEMS
+enum wb_stat_item {
+	WB_RECLAIMABLE,
+	WB_WRITEBACK,
+	WB_DIRTIED,
+	WB_WRITTEN,
+	NR_WB_STAT_ITEMS
 };
 
-#define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
+#define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
 
 struct bdi_writeback {
 	struct backing_dev_info *bdi;	/* our parent bdi */
@@ -58,6 +58,8 @@ struct bdi_writeback {
 	struct list_head b_more_io;	/* parked for more writeback */
 	struct list_head b_dirty_time;	/* time stamps are dirty */
 	spinlock_t list_lock;		/* protects the b_* lists */
+
+	struct percpu_counter stat[NR_WB_STAT_ITEMS];
 };
 
 struct backing_dev_info {
@@ -69,8 +71,6 @@ struct backing_dev_info {
 
 	char *name;
 
-	struct percpu_counter bdi_stat[NR_BDI_STAT_ITEMS];
-
 	unsigned long bw_time_stamp;	/* last time write bw is updated */
 	unsigned long dirtied_stamp;
 	unsigned long written_stamp;	/* pages written at bw_time_stamp */
@@ -137,78 +137,74 @@ static inline int wb_has_dirty_io(struct bdi_writeback *wb)
 	       !list_empty(&wb->b_more_io);
 }
 
-static inline void __add_bdi_stat(struct backing_dev_info *bdi,
-		enum bdi_stat_item item, s64 amount)
+static inline void __add_wb_stat(struct bdi_writeback *wb,
+				 enum wb_stat_item item, s64 amount)
 {
-	__percpu_counter_add(&bdi->bdi_stat[item], amount, BDI_STAT_BATCH);
+	__percpu_counter_add(&wb->stat[item], amount, WB_STAT_BATCH);
 }
 
-static inline void __inc_bdi_stat(struct backing_dev_info *bdi,
-		enum bdi_stat_item item)
+static inline void __inc_wb_stat(struct bdi_writeback *wb,
+				 enum wb_stat_item item)
 {
-	__add_bdi_stat(bdi, item, 1);
+	__add_wb_stat(wb, item, 1);
 }
 
-static inline void inc_bdi_stat(struct backing_dev_info *bdi,
-		enum bdi_stat_item item)
+static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
-	__inc_bdi_stat(bdi, item);
+	__inc_wb_stat(wb, item);
 	local_irq_restore(flags);
 }
 
-static inline void __dec_bdi_stat(struct backing_dev_info *bdi,
-		enum bdi_stat_item item)
+static inline void __dec_wb_stat(struct bdi_writeback *wb,
+				 enum wb_stat_item item)
 {
-	__add_bdi_stat(bdi, item, -1);
+	__add_wb_stat(wb, item, -1);
 }
 
-static inline void dec_bdi_stat(struct backing_dev_info *bdi,
-		enum bdi_stat_item item)
+static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
-	__dec_bdi_stat(bdi, item);
+	__dec_wb_stat(wb, item);
 	local_irq_restore(flags);
 }
 
-static inline s64 bdi_stat(struct backing_dev_info *bdi,
-		enum bdi_stat_item item)
+static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
 {
-	return percpu_counter_read_positive(&bdi->bdi_stat[item]);
+	return percpu_counter_read_positive(&wb->stat[item]);
 }
 
-static inline s64 __bdi_stat_sum(struct backing_dev_info *bdi,
-		enum bdi_stat_item item)
+static inline s64 __wb_stat_sum(struct bdi_writeback *wb,
+				enum wb_stat_item item)
 {
-	return percpu_counter_sum_positive(&bdi->bdi_stat[item]);
+	return percpu_counter_sum_positive(&wb->stat[item]);
 }
 
-static inline s64 bdi_stat_sum(struct backing_dev_info *bdi,
-		enum bdi_stat_item item)
+static inline s64 wb_stat_sum(struct bdi_writeback *wb, enum wb_stat_item item)
 {
 	s64 sum;
 	unsigned long flags;
 
 	local_irq_save(flags);
-	sum = __bdi_stat_sum(bdi, item);
+	sum = __wb_stat_sum(wb, item);
 	local_irq_restore(flags);
 
 	return sum;
 }
 
-extern void bdi_writeout_inc(struct backing_dev_info *bdi);
+extern void wb_writeout_inc(struct bdi_writeback *wb);
 
 /*
  * maximal error of a stat counter.
  */
-static inline unsigned long bdi_stat_error(struct backing_dev_info *bdi)
+static inline unsigned long wb_stat_error(struct bdi_writeback *wb)
 {
 #ifdef CONFIG_SMP
-	return nr_cpu_ids * BDI_STAT_BATCH;
+	return nr_cpu_ids * WB_STAT_BATCH;
 #else
 	return 1;
 #endif
-- 
cgit v1.2.3


From a88a341a73be4ef035ca26170c849f002797da27 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:28 -0400
Subject: writeback: move bandwidth related fields from backing_dev_info into
 bdi_writeback

Currently, a bdi (backing_dev_info) embeds single wb (bdi_writeback)
and the role of the separation is unclear.  For cgroup support for
writeback IOs, a bdi will be updated to host multiple wb's where each
wb serves writeback IOs of a different cgroup on the bdi.  To achieve
that, a wb should carry all states necessary for servicing writeback
IOs for a cgroup independently.

This patch moves bandwidth related fields from backing_dev_info into
bdi_writeback.

* The moved fields are: bw_time_stamp, dirtied_stamp, written_stamp,
  write_bandwidth, avg_write_bandwidth, dirty_ratelimit,
  balanced_dirty_ratelimit, completions and dirty_exceeded.

* writeback_chunk_size() and over_bground_thresh() now take @wb
  instead of @bdi.

* bdi_writeout_fraction(bdi, ...)	-> wb_writeout_fraction(wb, ...)
  bdi_dirty_limit(bdi, ...)		-> wb_dirty_limit(wb, ...)
  bdi_position_ration(bdi, ...)		-> wb_position_ratio(wb, ...)
  bdi_update_writebandwidth(bdi, ...)	-> wb_update_write_bandwidth(wb, ...)
  [__]bdi_update_bandwidth(bdi, ...)	-> [__]wb_update_bandwidth(wb, ...)
  bdi_{max|min}_pause(bdi, ...)		-> wb_{max|min}_pause(wb, ...)
  bdi_dirty_limits(bdi, ...)		-> wb_dirty_limits(wb, ...)

* Init/exits of the relocated fields are moved to bdi_wb_init/exit()
  respectively.  Note that explicit zeroing is dropped in the process
  as wb's are cleared in entirety anyway.

* As there's still only one bdi_writeback per backing_dev_info, all
  uses of bdi->stat[] are mechanically replaced with bdi->wb.stat[]
  introducing no behavior changes.

v2: Typo in description fixed as suggested by Jan.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev.h | 20 ++++++++++----------
 include/linux/writeback.h   | 19 +++++++++----------
 2 files changed, 19 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index fe7a907a4e16..2ab06049d812 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -60,16 +60,6 @@ struct bdi_writeback {
 	spinlock_t list_lock;		/* protects the b_* lists */
 
 	struct percpu_counter stat[NR_WB_STAT_ITEMS];
-};
-
-struct backing_dev_info {
-	struct list_head bdi_list;
-	unsigned long ra_pages;	/* max readahead in PAGE_CACHE_SIZE units */
-	unsigned int capabilities; /* Device capabilities */
-	congested_fn *congested_fn; /* Function pointer if device is md/dm */
-	void *congested_data;	/* Pointer to aux data for congested func */
-
-	char *name;
 
 	unsigned long bw_time_stamp;	/* last time write bw is updated */
 	unsigned long dirtied_stamp;
@@ -88,6 +78,16 @@ struct backing_dev_info {
 
 	struct fprop_local_percpu completions;
 	int dirty_exceeded;
+};
+
+struct backing_dev_info {
+	struct list_head bdi_list;
+	unsigned long ra_pages;	/* max readahead in PAGE_CACHE_SIZE units */
+	unsigned int capabilities; /* Device capabilities */
+	congested_fn *congested_fn; /* Function pointer if device is md/dm */
+	void *congested_data;	/* Pointer to aux data for congested func */
+
+	char *name;
 
 	unsigned int min_ratio;
 	unsigned int max_ratio, max_prop_frac;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index b2dd371ec0ca..a6b9db7fcee8 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -155,16 +155,15 @@ int dirty_writeback_centisecs_handler(struct ctl_table *, int,
 				      void __user *, size_t *, loff_t *);
 
 void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
-unsigned long bdi_dirty_limit(struct backing_dev_info *bdi,
-			       unsigned long dirty);
-
-void __bdi_update_bandwidth(struct backing_dev_info *bdi,
-			    unsigned long thresh,
-			    unsigned long bg_thresh,
-			    unsigned long dirty,
-			    unsigned long bdi_thresh,
-			    unsigned long bdi_dirty,
-			    unsigned long start_time);
+unsigned long wb_dirty_limit(struct bdi_writeback *wb, unsigned long dirty);
+
+void __wb_update_bandwidth(struct bdi_writeback *wb,
+			   unsigned long thresh,
+			   unsigned long bg_thresh,
+			   unsigned long dirty,
+			   unsigned long bdi_thresh,
+			   unsigned long bdi_dirty,
+			   unsigned long start_time);
 
 void page_writeback_init(void);
 void balance_dirty_pages_ratelimited(struct address_space *mapping);
-- 
cgit v1.2.3


From f0054bb1e1f3be03cc33369df640db97f10f6172 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:30 -0400
Subject: writeback: move backing_dev_info->wb_lock and ->worklist into
 bdi_writeback

Currently, a bdi (backing_dev_info) embeds single wb (bdi_writeback)
and the role of the separation is unclear.  For cgroup support for
writeback IOs, a bdi will be updated to host multiple wb's where each
wb serves writeback IOs of a different cgroup on the bdi.  To achieve
that, a wb should carry all states necessary for servicing writeback
IOs for a cgroup independently.

This patch moves bdi->wb_lock and ->worklist into wb.

* The lock protects bdi->worklist and bdi->wb.dwork scheduling.  While
  moving, rename it to wb->work_lock as wb->wb_lock is confusing.
  Also, move wb->dwork downwards so that it's colocated with the new
  ->work_lock and ->work_list fields.

* bdi_writeback_workfn()		-> wb_workfn()
  bdi_wakeup_thread_delayed(bdi)	-> wb_wakeup_delayed(wb)
  bdi_wakeup_thread(bdi)		-> wb_wakeup(wb)
  bdi_queue_work(bdi, ...)		-> wb_queue_work(wb, ...)
  __bdi_start_writeback(bdi, ...)	-> __wb_start_writeback(wb, ...)
  get_next_work_item(bdi)		-> get_next_work_item(wb)

* bdi_wb_shutdown() is renamed to wb_shutdown() and now takes @wb.
  The function contained parts which belong to the containing bdi
  rather than the wb itself - testing cap_writeback_dirty and
  bdi_remove_from_list() invocation.  Those are moved to
  bdi_unregister().

* bdi_wb_{init|exit}() are renamed to wb_{init|exit}().
  Initializations of the moved bdi->wb_lock and ->work_list are
  relocated from bdi_init() to wb_init().

* As there's still only one bdi_writeback per backing_dev_info, all
  uses of bdi->state are mechanically replaced with bdi->wb.state
  introducing no behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 2ab06049d812..d796f49ce87a 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -52,7 +52,6 @@ struct bdi_writeback {
 	unsigned long state;		/* Always use atomic bitops on this */
 	unsigned long last_old_flush;	/* last old data flush */
 
-	struct delayed_work dwork;	/* work item used for writeback */
 	struct list_head b_dirty;	/* dirty inodes */
 	struct list_head b_io;		/* parked for writeback */
 	struct list_head b_more_io;	/* parked for more writeback */
@@ -78,6 +77,10 @@ struct bdi_writeback {
 
 	struct fprop_local_percpu completions;
 	int dirty_exceeded;
+
+	spinlock_t work_lock;		/* protects work_list & dwork scheduling */
+	struct list_head work_list;
+	struct delayed_work dwork;	/* work item used for writeback */
 };
 
 struct backing_dev_info {
@@ -93,9 +96,6 @@ struct backing_dev_info {
 	unsigned int max_ratio, max_prop_frac;
 
 	struct bdi_writeback wb;  /* default writeback info for this bdi */
-	spinlock_t wb_lock;	  /* protects work_list & wb.dwork scheduling */
-
-	struct list_head work_list;
 
 	struct device *dev;
 
@@ -121,9 +121,9 @@ int __must_check bdi_setup_and_register(struct backing_dev_info *, char *);
 void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
 			enum wb_reason reason);
 void bdi_start_background_writeback(struct backing_dev_info *bdi);
-void bdi_writeback_workfn(struct work_struct *work);
+void wb_workfn(struct work_struct *work);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
-void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi);
+void wb_wakeup_delayed(struct bdi_writeback *wb);
 
 extern spinlock_t bdi_lock;
 extern struct list_head bdi_list;
-- 
cgit v1.2.3


From 66114cad64bf76a155fec1f0fff0de771cf909d5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:32 -0400
Subject: writeback: separate out include/linux/backing-dev-defs.h

With the planned cgroup writeback support, backing-dev related
declarations will be more widely used across block and cgroup;
unfortunately, including backing-dev.h from include/linux/blkdev.h
makes cyclic include dependency quite likely.

This patch separates out backing-dev-defs.h which only has the
essential definitions and updates blkdev.h to include it.  c files
which need access to more backing-dev details now include
backing-dev.h directly.  This takes backing-dev.h off the common
include dependency chain making it a lot easier to use it across block
and cgroup.

v2: fs/fat build failure fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev-defs.h | 106 +++++++++++++++++++++++++++++++++++++++
 include/linux/backing-dev.h      | 102 +------------------------------------
 include/linux/blkdev.h           |   2 +-
 3 files changed, 108 insertions(+), 102 deletions(-)
 create mode 100644 include/linux/backing-dev-defs.h

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
new file mode 100644
index 000000000000..aa18c4bd43c1
--- /dev/null
+++ b/include/linux/backing-dev-defs.h
@@ -0,0 +1,106 @@
+#ifndef __LINUX_BACKING_DEV_DEFS_H
+#define __LINUX_BACKING_DEV_DEFS_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/percpu_counter.h>
+#include <linux/flex_proportions.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+
+struct page;
+struct device;
+struct dentry;
+
+/*
+ * Bits in bdi_writeback.state
+ */
+enum wb_state {
+	WB_async_congested,	/* The async (write) queue is getting full */
+	WB_sync_congested,	/* The sync queue is getting full */
+	WB_registered,		/* bdi_register() was done */
+	WB_writeback_running,	/* Writeback is in progress */
+};
+
+typedef int (congested_fn)(void *, int);
+
+enum wb_stat_item {
+	WB_RECLAIMABLE,
+	WB_WRITEBACK,
+	WB_DIRTIED,
+	WB_WRITTEN,
+	NR_WB_STAT_ITEMS
+};
+
+#define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
+
+struct bdi_writeback {
+	struct backing_dev_info *bdi;	/* our parent bdi */
+
+	unsigned long state;		/* Always use atomic bitops on this */
+	unsigned long last_old_flush;	/* last old data flush */
+
+	struct list_head b_dirty;	/* dirty inodes */
+	struct list_head b_io;		/* parked for writeback */
+	struct list_head b_more_io;	/* parked for more writeback */
+	struct list_head b_dirty_time;	/* time stamps are dirty */
+	spinlock_t list_lock;		/* protects the b_* lists */
+
+	struct percpu_counter stat[NR_WB_STAT_ITEMS];
+
+	unsigned long bw_time_stamp;	/* last time write bw is updated */
+	unsigned long dirtied_stamp;
+	unsigned long written_stamp;	/* pages written at bw_time_stamp */
+	unsigned long write_bandwidth;	/* the estimated write bandwidth */
+	unsigned long avg_write_bandwidth; /* further smoothed write bw */
+
+	/*
+	 * The base dirty throttle rate, re-calculated on every 200ms.
+	 * All the bdi tasks' dirty rate will be curbed under it.
+	 * @dirty_ratelimit tracks the estimated @balanced_dirty_ratelimit
+	 * in small steps and is much more smooth/stable than the latter.
+	 */
+	unsigned long dirty_ratelimit;
+	unsigned long balanced_dirty_ratelimit;
+
+	struct fprop_local_percpu completions;
+	int dirty_exceeded;
+
+	spinlock_t work_lock;		/* protects work_list & dwork scheduling */
+	struct list_head work_list;
+	struct delayed_work dwork;	/* work item used for writeback */
+};
+
+struct backing_dev_info {
+	struct list_head bdi_list;
+	unsigned long ra_pages;	/* max readahead in PAGE_CACHE_SIZE units */
+	unsigned int capabilities; /* Device capabilities */
+	congested_fn *congested_fn; /* Function pointer if device is md/dm */
+	void *congested_data;	/* Pointer to aux data for congested func */
+
+	char *name;
+
+	unsigned int min_ratio;
+	unsigned int max_ratio, max_prop_frac;
+
+	struct bdi_writeback wb;  /* default writeback info for this bdi */
+
+	struct device *dev;
+
+	struct timer_list laptop_mode_wb_timer;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debug_dir;
+	struct dentry *debug_stats;
+#endif
+};
+
+enum {
+	BLK_RW_ASYNC	= 0,
+	BLK_RW_SYNC	= 1,
+};
+
+void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
+void set_bdi_congested(struct backing_dev_info *bdi, int sync);
+
+#endif	/* __LINUX_BACKING_DEV_DEFS_H */
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index d796f49ce87a..5e39f7a8efed 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -8,104 +8,11 @@
 #ifndef _LINUX_BACKING_DEV_H
 #define _LINUX_BACKING_DEV_H
 
-#include <linux/percpu_counter.h>
-#include <linux/log2.h>
-#include <linux/flex_proportions.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
-#include <linux/timer.h>
 #include <linux/writeback.h>
-#include <linux/atomic.h>
-#include <linux/sysctl.h>
-#include <linux/workqueue.h>
-
-struct page;
-struct device;
-struct dentry;
-
-/*
- * Bits in bdi_writeback.state
- */
-enum wb_state {
-	WB_async_congested,	/* The async (write) queue is getting full */
-	WB_sync_congested,	/* The sync queue is getting full */
-	WB_registered,		/* bdi_register() was done */
-	WB_writeback_running,	/* Writeback is in progress */
-};
-
-typedef int (congested_fn)(void *, int);
-
-enum wb_stat_item {
-	WB_RECLAIMABLE,
-	WB_WRITEBACK,
-	WB_DIRTIED,
-	WB_WRITTEN,
-	NR_WB_STAT_ITEMS
-};
-
-#define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
-
-struct bdi_writeback {
-	struct backing_dev_info *bdi;	/* our parent bdi */
-
-	unsigned long state;		/* Always use atomic bitops on this */
-	unsigned long last_old_flush;	/* last old data flush */
-
-	struct list_head b_dirty;	/* dirty inodes */
-	struct list_head b_io;		/* parked for writeback */
-	struct list_head b_more_io;	/* parked for more writeback */
-	struct list_head b_dirty_time;	/* time stamps are dirty */
-	spinlock_t list_lock;		/* protects the b_* lists */
-
-	struct percpu_counter stat[NR_WB_STAT_ITEMS];
-
-	unsigned long bw_time_stamp;	/* last time write bw is updated */
-	unsigned long dirtied_stamp;
-	unsigned long written_stamp;	/* pages written at bw_time_stamp */
-	unsigned long write_bandwidth;	/* the estimated write bandwidth */
-	unsigned long avg_write_bandwidth; /* further smoothed write bw */
-
-	/*
-	 * The base dirty throttle rate, re-calculated on every 200ms.
-	 * All the bdi tasks' dirty rate will be curbed under it.
-	 * @dirty_ratelimit tracks the estimated @balanced_dirty_ratelimit
-	 * in small steps and is much more smooth/stable than the latter.
-	 */
-	unsigned long dirty_ratelimit;
-	unsigned long balanced_dirty_ratelimit;
-
-	struct fprop_local_percpu completions;
-	int dirty_exceeded;
-
-	spinlock_t work_lock;		/* protects work_list & dwork scheduling */
-	struct list_head work_list;
-	struct delayed_work dwork;	/* work item used for writeback */
-};
-
-struct backing_dev_info {
-	struct list_head bdi_list;
-	unsigned long ra_pages;	/* max readahead in PAGE_CACHE_SIZE units */
-	unsigned int capabilities; /* Device capabilities */
-	congested_fn *congested_fn; /* Function pointer if device is md/dm */
-	void *congested_data;	/* Pointer to aux data for congested func */
-
-	char *name;
-
-	unsigned int min_ratio;
-	unsigned int max_ratio, max_prop_frac;
-
-	struct bdi_writeback wb;  /* default writeback info for this bdi */
-
-	struct device *dev;
-
-	struct timer_list laptop_mode_wb_timer;
-
-#ifdef CONFIG_DEBUG_FS
-	struct dentry *debug_dir;
-	struct dentry *debug_stats;
-#endif
-};
+#include <linux/backing-dev-defs.h>
 
 struct backing_dev_info *inode_to_bdi(struct inode *inode);
 
@@ -265,13 +172,6 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi)
 				  (1 << WB_async_congested));
 }
 
-enum {
-	BLK_RW_ASYNC	= 0,
-	BLK_RW_SYNC	= 1,
-};
-
-void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
-void set_bdi_congested(struct backing_dev_info *bdi, int sync);
 long congestion_wait(int sync, long timeout);
 long wait_iff_congested(struct zone *zone, int sync, long timeout);
 int pdflush_proc_obsolete(struct ctl_table *table, int write,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ccaa9aecd593..60d2726a6b62 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -12,7 +12,7 @@
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/pagemap.h>
-#include <linux/backing-dev.h>
+#include <linux/backing-dev-defs.h>
 #include <linux/wait.h>
 #include <linux/mempool.h>
 #include <linux/bio.h>
-- 
cgit v1.2.3


From a212b105b07d75b48b1a166378282e8a77fbf53d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:33 -0400
Subject: bdi: make inode_to_bdi() inline

Now that bdi definitions are moved to backing-dev-defs.h,
backing-dev.h can include blkdev.h and inline inode_to_bdi() without
worrying about introducing circular include dependency.  The function
gets called from hot paths and fairly trivial.

This patch makes inode_to_bdi() and sb_is_blkdev_sb() that the
function calls inline.  blockdev_superblock and noop_backing_dev_info
are EXPORT_GPL'd to allow the inline functions to be used from
modules.

While at it, make sb_is_blkdev_sb() return bool instead of int.

v2: Fixed typo in description as suggested by Jan.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev.h | 18 ++++++++++++++++--
 include/linux/fs.h          |  8 +++++++-
 2 files changed, 23 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 5e39f7a8efed..785782034e86 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -11,11 +11,10 @@
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
+#include <linux/blkdev.h>
 #include <linux/writeback.h>
 #include <linux/backing-dev-defs.h>
 
-struct backing_dev_info *inode_to_bdi(struct inode *inode);
-
 int __must_check bdi_init(struct backing_dev_info *bdi);
 void bdi_destroy(struct backing_dev_info *bdi);
 
@@ -149,6 +148,21 @@ extern struct backing_dev_info noop_backing_dev_info;
 
 int writeback_in_progress(struct backing_dev_info *bdi);
 
+static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
+{
+	struct super_block *sb;
+
+	if (!inode)
+		return &noop_backing_dev_info;
+
+	sb = inode->i_sb;
+#ifdef CONFIG_BLOCK
+	if (sb_is_blkdev_sb(sb))
+		return blk_get_backing_dev_info(I_BDEV(inode));
+#endif
+	return sb->s_bdi;
+}
+
 static inline int bdi_congested(struct backing_dev_info *bdi, int bdi_bits)
 {
 	if (bdi->congested_fn)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1ef63900243c..ce100b87fba3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2240,7 +2240,13 @@ extern struct super_block *freeze_bdev(struct block_device *);
 extern void emergency_thaw_all(void);
 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
 extern int fsync_bdev(struct block_device *);
-extern int sb_is_blkdev_sb(struct super_block *sb);
+
+extern struct super_block *blockdev_superblock;
+
+static inline bool sb_is_blkdev_sb(struct super_block *sb)
+{
+	return sb == blockdev_superblock;
+}
 #else
 static inline void bd_forget(struct inode *inode) {}
 static inline int sync_blockdev(struct block_device *bdev) { return 0; }
-- 
cgit v1.2.3


From 4aa9c692e052cf6db99db62a8fe0543e5c455da7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:35 -0400
Subject: bdi: separate out congested state into a separate struct

Currently, a wb's (bdi_writeback) congestion state is carried in its
->state field; however, cgroup writeback support will require multiple
wb's sharing the same congestion state.  This patch separates out
congestion state into its own struct - struct bdi_writeback_congested.
A new field wb field, wb_congested, points to its associated congested
struct.  The default wb, bdi->wb, always points to bdi->wb_congested.

While this patch adds a layer of indirection, it doesn't introduce any
behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev-defs.h | 14 ++++++++++++--
 include/linux/backing-dev.h      |  2 +-
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index aa18c4bd43c1..9e9eafa5f5aa 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -16,12 +16,15 @@ struct dentry;
  * Bits in bdi_writeback.state
  */
 enum wb_state {
-	WB_async_congested,	/* The async (write) queue is getting full */
-	WB_sync_congested,	/* The sync queue is getting full */
 	WB_registered,		/* bdi_register() was done */
 	WB_writeback_running,	/* Writeback is in progress */
 };
 
+enum wb_congested_state {
+	WB_async_congested,	/* The async (write) queue is getting full */
+	WB_sync_congested,	/* The sync queue is getting full */
+};
+
 typedef int (congested_fn)(void *, int);
 
 enum wb_stat_item {
@@ -34,6 +37,10 @@ enum wb_stat_item {
 
 #define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
 
+struct bdi_writeback_congested {
+	unsigned long state;		/* WB_[a]sync_congested flags */
+};
+
 struct bdi_writeback {
 	struct backing_dev_info *bdi;	/* our parent bdi */
 
@@ -48,6 +55,8 @@ struct bdi_writeback {
 
 	struct percpu_counter stat[NR_WB_STAT_ITEMS];
 
+	struct bdi_writeback_congested *congested;
+
 	unsigned long bw_time_stamp;	/* last time write bw is updated */
 	unsigned long dirtied_stamp;
 	unsigned long written_stamp;	/* pages written at bw_time_stamp */
@@ -84,6 +93,7 @@ struct backing_dev_info {
 	unsigned int max_ratio, max_prop_frac;
 
 	struct bdi_writeback wb;  /* default writeback info for this bdi */
+	struct bdi_writeback_congested wb_congested;
 
 	struct device *dev;
 
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 785782034e86..bfdaa18ba0a1 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -167,7 +167,7 @@ static inline int bdi_congested(struct backing_dev_info *bdi, int bdi_bits)
 {
 	if (bdi->congested_fn)
 		return bdi->congested_fn(bdi->congested_data, bdi_bits);
-	return (bdi->wb.state & bdi_bits);
+	return (bdi->wb.congested->state & bdi_bits);
 }
 
 static inline int bdi_read_congested(struct backing_dev_info *bdi)
-- 
cgit v1.2.3


From 89e9b9e07a390c50980d10aa37a04631db5a23ab Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:36 -0400
Subject: writeback: add {CONFIG|BDI_CAP|FS}_CGROUP_WRITEBACK

cgroup writeback requires support from both bdi and filesystem sides.
Add BDI_CAP_CGROUP_WRITEBACK and FS_CGROUP_WRITEBACK to indicate
support and enable BDI_CAP_CGROUP_WRITEBACK on block based bdi's by
default.  Also, define CONFIG_CGROUP_WRITEBACK which is enabled if
both MEMCG and BLK_CGROUP are enabled.

inode_cgwb_enabled() which determines whether a given inode's both bdi
and fs support cgroup writeback is added.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev.h | 32 +++++++++++++++++++++++++++++++-
 include/linux/fs.h          |  1 +
 2 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index bfdaa18ba0a1..6bb31234e6a9 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -134,12 +134,15 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
  * BDI_CAP_NO_WRITEBACK:   Don't write pages back
  * BDI_CAP_NO_ACCT_WB:     Don't automatically account writeback pages
  * BDI_CAP_STRICTLIMIT:    Keep number of dirty pages below bdi threshold.
+ *
+ * BDI_CAP_CGROUP_WRITEBACK: Supports cgroup-aware writeback.
  */
 #define BDI_CAP_NO_ACCT_DIRTY	0x00000001
 #define BDI_CAP_NO_WRITEBACK	0x00000002
 #define BDI_CAP_NO_ACCT_WB	0x00000004
 #define BDI_CAP_STABLE_WRITES	0x00000008
 #define BDI_CAP_STRICTLIMIT	0x00000010
+#define BDI_CAP_CGROUP_WRITEBACK 0x00000020
 
 #define BDI_CAP_NO_ACCT_AND_WRITEBACK \
 	(BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB)
@@ -229,4 +232,31 @@ static inline int bdi_sched_wait(void *word)
 	return 0;
 }
 
-#endif		/* _LINUX_BACKING_DEV_H */
+#ifdef CONFIG_CGROUP_WRITEBACK
+
+/**
+ * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
+ * @inode: inode of interest
+ *
+ * cgroup writeback requires support from both the bdi and filesystem.
+ * Test whether @inode has both.
+ */
+static inline bool inode_cgwb_enabled(struct inode *inode)
+{
+	struct backing_dev_info *bdi = inode_to_bdi(inode);
+
+	return bdi_cap_account_dirty(bdi) &&
+		(bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) &&
+		(inode->i_sb->s_type->fs_flags & FS_CGROUP_WRITEBACK);
+}
+
+#else	/* CONFIG_CGROUP_WRITEBACK */
+
+static inline bool inode_cgwb_enabled(struct inode *inode)
+{
+	return false;
+}
+
+#endif	/* CONFIG_CGROUP_WRITEBACK */
+
+#endif	/* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ce100b87fba3..74e0ae0626a8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1897,6 +1897,7 @@ struct file_system_type {
 #define FS_HAS_SUBTYPE		4
 #define FS_USERNS_MOUNT		8	/* Can be mounted by userns root */
 #define FS_USERNS_DEV_MOUNT	16 /* A userns mount does not imply MNT_NODEV */
+#define FS_CGROUP_WRITEBACK	32	/* Supports cgroup-aware writeback */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
 	struct dentry *(*mount) (struct file_system_type *, int,
 		       const char *, void *);
-- 
cgit v1.2.3


From 52ebea749aaed195245701a8f90a23d672c7a933 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:37 -0400
Subject: writeback: make backing_dev_info host cgroup-specific bdi_writebacks

For the planned cgroup writeback support, on each bdi
(backing_dev_info), each memcg will be served by a separate wb
(bdi_writeback).  This patch updates bdi so that a bdi can host
multiple wbs (bdi_writebacks).

On the default hierarchy, blkcg implicitly enables memcg.  This allows
using memcg's page ownership for attributing writeback IOs, and every
memcg - blkcg combination can be served by its own wb by assigning a
dedicated wb to each memcg.  This means that there may be multiple
wb's of a bdi mapped to the same blkcg.  As congested state is per
blkcg - bdi combination, those wb's should share the same congested
state.  This is achieved by tracking congested state via
bdi_writeback_congested structs which are keyed by blkcg.

bdi->wb remains unchanged and will keep serving the root cgroup.
cgwb's (cgroup wb's) for non-root cgroups are created on-demand or
looked up while dirtying an inode according to the memcg of the page
being dirtied or current task.  Each cgwb is indexed on bdi->cgwb_tree
by its memcg id.  Once an inode is associated with its wb, it can be
retrieved using inode_to_wb().

Currently, none of the filesystems has FS_CGROUP_WRITEBACK and all
pages will keep being associated with bdi->wb.

v3: inode_attach_wb() in account_page_dirtied() moved inside
    mapping_cap_account_dirty() block where it's known to be !NULL.
    Also, an unnecessary NULL check before kfree() removed.  Both
    detected by the kbuild bot.

v2: Updated so that wb association is per inode and wb is per memcg
    rather than blkcg.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: kbuild test robot <fengguang.wu@intel.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev-defs.h |  59 +++++++++++-
 include/linux/backing-dev.h      | 195 +++++++++++++++++++++++++++++++++++++++
 include/linux/blk-cgroup.h       |   4 +
 include/linux/fs.h               |   4 +
 include/linux/memcontrol.h       |   4 +
 5 files changed, 263 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 9e9eafa5f5aa..a1e9c407a59a 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -2,8 +2,11 @@
 #define __LINUX_BACKING_DEV_DEFS_H
 
 #include <linux/list.h>
+#include <linux/radix-tree.h>
+#include <linux/rbtree.h>
 #include <linux/spinlock.h>
 #include <linux/percpu_counter.h>
+#include <linux/percpu-refcount.h>
 #include <linux/flex_proportions.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
@@ -37,10 +40,43 @@ enum wb_stat_item {
 
 #define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
 
+/*
+ * For cgroup writeback, multiple wb's may map to the same blkcg.  Those
+ * wb's can operate mostly independently but should share the congested
+ * state.  To facilitate such sharing, the congested state is tracked using
+ * the following struct which is created on demand, indexed by blkcg ID on
+ * its bdi, and refcounted.
+ */
 struct bdi_writeback_congested {
 	unsigned long state;		/* WB_[a]sync_congested flags */
+
+#ifdef CONFIG_CGROUP_WRITEBACK
+	struct backing_dev_info *bdi;	/* the associated bdi */
+	atomic_t refcnt;		/* nr of attached wb's and blkg */
+	int blkcg_id;			/* ID of the associated blkcg */
+	struct rb_node rb_node;		/* on bdi->cgwb_congestion_tree */
+#endif
 };
 
+/*
+ * Each wb (bdi_writeback) can perform writeback operations, is measured
+ * and throttled, independently.  Without cgroup writeback, each bdi
+ * (bdi_writeback) is served by its embedded bdi->wb.
+ *
+ * On the default hierarchy, blkcg implicitly enables memcg.  This allows
+ * using memcg's page ownership for attributing writeback IOs, and every
+ * memcg - blkcg combination can be served by its own wb by assigning a
+ * dedicated wb to each memcg, which enables isolation across different
+ * cgroups and propagation of IO back pressure down from the IO layer upto
+ * the tasks which are generating the dirty pages to be written back.
+ *
+ * A cgroup wb is indexed on its bdi by the ID of the associated memcg,
+ * refcounted with the number of inodes attached to it, and pins the memcg
+ * and the corresponding blkcg.  As the corresponding blkcg for a memcg may
+ * change as blkcg is disabled and enabled higher up in the hierarchy, a wb
+ * is tested for blkcg after lookup and removed from index on mismatch so
+ * that a new wb for the combination can be created.
+ */
 struct bdi_writeback {
 	struct backing_dev_info *bdi;	/* our parent bdi */
 
@@ -78,6 +114,19 @@ struct bdi_writeback {
 	spinlock_t work_lock;		/* protects work_list & dwork scheduling */
 	struct list_head work_list;
 	struct delayed_work dwork;	/* work item used for writeback */
+
+#ifdef CONFIG_CGROUP_WRITEBACK
+	struct percpu_ref refcnt;	/* used only for !root wb's */
+	struct cgroup_subsys_state *memcg_css; /* the associated memcg */
+	struct cgroup_subsys_state *blkcg_css; /* and blkcg */
+	struct list_head memcg_node;	/* anchored at memcg->cgwb_list */
+	struct list_head blkcg_node;	/* anchored at blkcg->cgwb_list */
+
+	union {
+		struct work_struct release_work;
+		struct rcu_head rcu;
+	};
+#endif
 };
 
 struct backing_dev_info {
@@ -92,9 +141,13 @@ struct backing_dev_info {
 	unsigned int min_ratio;
 	unsigned int max_ratio, max_prop_frac;
 
-	struct bdi_writeback wb;  /* default writeback info for this bdi */
-	struct bdi_writeback_congested wb_congested;
-
+	struct bdi_writeback wb;  /* the root writeback info for this bdi */
+	struct bdi_writeback_congested wb_congested; /* its congested state */
+#ifdef CONFIG_CGROUP_WRITEBACK
+	struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
+	struct rb_root cgwb_congested_tree; /* their congested states */
+	atomic_t usage_cnt; /* counts both cgwbs and cgwb_contested's */
+#endif
 	struct device *dev;
 
 	struct timer_list laptop_mode_wb_timer;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 6bb31234e6a9..8ae59df2e3d1 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/blkdev.h>
 #include <linux/writeback.h>
+#include <linux/blk-cgroup.h>
 #include <linux/backing-dev-defs.h>
 
 int __must_check bdi_init(struct backing_dev_info *bdi);
@@ -234,6 +235,16 @@ static inline int bdi_sched_wait(void *word)
 
 #ifdef CONFIG_CGROUP_WRITEBACK
 
+struct bdi_writeback_congested *
+wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp);
+void wb_congested_put(struct bdi_writeback_congested *congested);
+struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
+				    struct cgroup_subsys_state *memcg_css,
+				    gfp_t gfp);
+void __inode_attach_wb(struct inode *inode, struct page *page);
+void wb_memcg_offline(struct mem_cgroup *memcg);
+void wb_blkcg_offline(struct blkcg *blkcg);
+
 /**
  * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
  * @inode: inode of interest
@@ -250,6 +261,135 @@ static inline bool inode_cgwb_enabled(struct inode *inode)
 		(inode->i_sb->s_type->fs_flags & FS_CGROUP_WRITEBACK);
 }
 
+/**
+ * wb_tryget - try to increment a wb's refcount
+ * @wb: bdi_writeback to get
+ */
+static inline bool wb_tryget(struct bdi_writeback *wb)
+{
+	if (wb != &wb->bdi->wb)
+		return percpu_ref_tryget(&wb->refcnt);
+	return true;
+}
+
+/**
+ * wb_get - increment a wb's refcount
+ * @wb: bdi_writeback to get
+ */
+static inline void wb_get(struct bdi_writeback *wb)
+{
+	if (wb != &wb->bdi->wb)
+		percpu_ref_get(&wb->refcnt);
+}
+
+/**
+ * wb_put - decrement a wb's refcount
+ * @wb: bdi_writeback to put
+ */
+static inline void wb_put(struct bdi_writeback *wb)
+{
+	if (wb != &wb->bdi->wb)
+		percpu_ref_put(&wb->refcnt);
+}
+
+/**
+ * wb_find_current - find wb for %current on a bdi
+ * @bdi: bdi of interest
+ *
+ * Find the wb of @bdi which matches both the memcg and blkcg of %current.
+ * Must be called under rcu_read_lock() which protects the returend wb.
+ * NULL if not found.
+ */
+static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
+{
+	struct cgroup_subsys_state *memcg_css;
+	struct bdi_writeback *wb;
+
+	memcg_css = task_css(current, memory_cgrp_id);
+	if (!memcg_css->parent)
+		return &bdi->wb;
+
+	wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
+
+	/*
+	 * %current's blkcg equals the effective blkcg of its memcg.  No
+	 * need to use the relatively expensive cgroup_get_e_css().
+	 */
+	if (likely(wb && wb->blkcg_css == task_css(current, blkio_cgrp_id)))
+		return wb;
+	return NULL;
+}
+
+/**
+ * wb_get_create_current - get or create wb for %current on a bdi
+ * @bdi: bdi of interest
+ * @gfp: allocation mask
+ *
+ * Equivalent to wb_get_create() on %current's memcg.  This function is
+ * called from a relatively hot path and optimizes the common cases using
+ * wb_find_current().
+ */
+static inline struct bdi_writeback *
+wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
+{
+	struct bdi_writeback *wb;
+
+	rcu_read_lock();
+	wb = wb_find_current(bdi);
+	if (wb && unlikely(!wb_tryget(wb)))
+		wb = NULL;
+	rcu_read_unlock();
+
+	if (unlikely(!wb)) {
+		struct cgroup_subsys_state *memcg_css;
+
+		memcg_css = task_get_css(current, memory_cgrp_id);
+		wb = wb_get_create(bdi, memcg_css, gfp);
+		css_put(memcg_css);
+	}
+	return wb;
+}
+
+/**
+ * inode_attach_wb - associate an inode with its wb
+ * @inode: inode of interest
+ * @page: page being dirtied (may be NULL)
+ *
+ * If @inode doesn't have its wb, associate it with the wb matching the
+ * memcg of @page or, if @page is NULL, %current.  May be called w/ or w/o
+ * @inode->i_lock.
+ */
+static inline void inode_attach_wb(struct inode *inode, struct page *page)
+{
+	if (!inode->i_wb)
+		__inode_attach_wb(inode, page);
+}
+
+/**
+ * inode_detach_wb - disassociate an inode from its wb
+ * @inode: inode of interest
+ *
+ * @inode is being freed.  Detach from its wb.
+ */
+static inline void inode_detach_wb(struct inode *inode)
+{
+	if (inode->i_wb) {
+		wb_put(inode->i_wb);
+		inode->i_wb = NULL;
+	}
+}
+
+/**
+ * inode_to_wb - determine the wb of an inode
+ * @inode: inode of interest
+ *
+ * Returns the wb @inode is currently associated with.
+ */
+static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
+{
+	return inode->i_wb;
+}
+
 #else	/* CONFIG_CGROUP_WRITEBACK */
 
 static inline bool inode_cgwb_enabled(struct inode *inode)
@@ -257,6 +397,61 @@ static inline bool inode_cgwb_enabled(struct inode *inode)
 	return false;
 }
 
+static inline struct bdi_writeback_congested *
+wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
+{
+	return bdi->wb.congested;
+}
+
+static inline void wb_congested_put(struct bdi_writeback_congested *congested)
+{
+}
+
+static inline bool wb_tryget(struct bdi_writeback *wb)
+{
+	return true;
+}
+
+static inline void wb_get(struct bdi_writeback *wb)
+{
+}
+
+static inline void wb_put(struct bdi_writeback *wb)
+{
+}
+
+static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
+{
+	return &bdi->wb;
+}
+
+static inline struct bdi_writeback *
+wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
+{
+	return &bdi->wb;
+}
+
+static inline void inode_attach_wb(struct inode *inode, struct page *page)
+{
+}
+
+static inline void inode_detach_wb(struct inode *inode)
+{
+}
+
+static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
+{
+	return &inode_to_bdi(inode)->wb;
+}
+
+static inline void wb_memcg_offline(struct mem_cgroup *memcg)
+{
+}
+
+static inline void wb_blkcg_offline(struct blkcg *blkcg)
+{
+}
+
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
 #endif	/* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 4dc643f2046e..3033eb173eb4 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -53,6 +53,10 @@ struct blkcg {
 	/* TODO: per-policy storage in blkcg */
 	unsigned int			cfq_weight;	/* belongs to cfq */
 	unsigned int			cfq_leaf_weight;
+
+#ifdef CONFIG_CGROUP_WRITEBACK
+	struct list_head		cgwb_list;
+#endif
 };
 
 struct blkg_stat {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 74e0ae0626a8..67a42ec95065 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -35,6 +35,7 @@
 #include <uapi/linux/fs.h>
 
 struct backing_dev_info;
+struct bdi_writeback;
 struct export_operations;
 struct hd_geometry;
 struct iovec;
@@ -635,6 +636,9 @@ struct inode {
 
 	struct hlist_node	i_hash;
 	struct list_head	i_wb_list;	/* backing dev IO list */
+#ifdef CONFIG_CGROUP_WRITEBACK
+	struct bdi_writeback	*i_wb;		/* the associated cgroup wb */
+#endif
 	struct list_head	i_lru;		/* inode LRU list */
 	struct list_head	i_sb_list;
 	union {
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 637ef626008e..662a953ea8ad 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -388,6 +388,10 @@ enum {
 	OVER_LIMIT,
 };
 
+#ifdef CONFIG_CGROUP_WRITEBACK
+struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg);
+#endif
+
 struct sock;
 #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
 void sock_update_memcg(struct sock *sk);
-- 
cgit v1.2.3


From ce7acfeaf0363c8b75810908448f61af04d38f91 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:38 -0400
Subject: writeback, blkcg: associate each blkcg_gq with the corresponding
 bdi_writeback_congested

A blkg (blkcg_gq) can be congested and decongested independently from
other blkgs on the same request_queue.  Accordingly, for cgroup
writeback support, the congestion status at bdi (backing_dev_info)
should be split and updated separately from matching blkg's.

This patch prepares by adding blkg->wb_congested and associating a
blkg with its matching per-blkcg bdi_writeback_congested on creation.

v2: Updated to associate bdi_writeback_congested instead of
    bdi_writeback.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk-cgroup.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 3033eb173eb4..07a32b813ed8 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -99,6 +99,12 @@ struct blkcg_gq {
 	struct hlist_node		blkcg_node;
 	struct blkcg			*blkcg;
 
+	/*
+	 * Each blkg gets congested separately and the congestion state is
+	 * propagated to the matching bdi_writeback_congested.
+	 */
+	struct bdi_writeback_congested	*wb_congested;
+
 	/* all non-root blkcg_gq's are guaranteed to have access to parent */
 	struct blkcg_gq			*parent;
 
-- 
cgit v1.2.3


From ec8a6f2643923ee5b74d24fa8d134240379f436b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:41 -0400
Subject: writeback: make congestion functions per bdi_writeback

Currently, all congestion functions take bdi (backing_dev_info) and
always operate on the root wb (bdi->wb) and the congestion state from
the block layer is propagated only for the root blkcg.  This patch
introduces {set|clear}_wb_congested() and wb_congested() which take a
bdi_writeback_congested and bdi_writeback respectively.  The bdi
counteparts are now wrappers invoking the wb based functions on
@bdi->wb.

While converting clear_bdi_congested() to clear_wb_congested(), the
local variable declaration order between @wqh and @bit is swapped for
cosmetic reason.

This patch just adds the new wb based functions.  The following
patches will apply them.

v2: Updated for bdi_writeback_congested.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev-defs.h | 14 +++++++++++--
 include/linux/backing-dev.h      | 45 +++++++++++++++++++++++-----------------
 2 files changed, 38 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index a1e9c407a59a..eb386766b5f3 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -163,7 +163,17 @@ enum {
 	BLK_RW_SYNC	= 1,
 };
 
-void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
-void set_bdi_congested(struct backing_dev_info *bdi, int sync);
+void clear_wb_congested(struct bdi_writeback_congested *congested, int sync);
+void set_wb_congested(struct bdi_writeback_congested *congested, int sync);
+
+static inline void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
+{
+	clear_wb_congested(bdi->wb.congested, sync);
+}
+
+static inline void set_bdi_congested(struct backing_dev_info *bdi, int sync)
+{
+	set_wb_congested(bdi->wb.congested, sync);
+}
 
 #endif	/* __LINUX_BACKING_DEV_DEFS_H */
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 8ae59df2e3d1..2c498a2a8268 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -167,27 +167,13 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
 	return sb->s_bdi;
 }
 
-static inline int bdi_congested(struct backing_dev_info *bdi, int bdi_bits)
+static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
 {
-	if (bdi->congested_fn)
-		return bdi->congested_fn(bdi->congested_data, bdi_bits);
-	return (bdi->wb.congested->state & bdi_bits);
-}
-
-static inline int bdi_read_congested(struct backing_dev_info *bdi)
-{
-	return bdi_congested(bdi, 1 << WB_sync_congested);
-}
-
-static inline int bdi_write_congested(struct backing_dev_info *bdi)
-{
-	return bdi_congested(bdi, 1 << WB_async_congested);
-}
+	struct backing_dev_info *bdi = wb->bdi;
 
-static inline int bdi_rw_congested(struct backing_dev_info *bdi)
-{
-	return bdi_congested(bdi, (1 << WB_sync_congested) |
-				  (1 << WB_async_congested));
+	if (bdi->congested_fn)
+		return bdi->congested_fn(bdi->congested_data, cong_bits);
+	return wb->congested->state & cong_bits;
 }
 
 long congestion_wait(int sync, long timeout);
@@ -454,4 +440,25 @@ static inline void wb_blkcg_offline(struct blkcg *blkcg)
 
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
+static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits)
+{
+	return wb_congested(&bdi->wb, cong_bits);
+}
+
+static inline int bdi_read_congested(struct backing_dev_info *bdi)
+{
+	return bdi_congested(bdi, 1 << WB_sync_congested);
+}
+
+static inline int bdi_write_congested(struct backing_dev_info *bdi)
+{
+	return bdi_congested(bdi, 1 << WB_async_congested);
+}
+
+static inline int bdi_rw_congested(struct backing_dev_info *bdi)
+{
+	return bdi_congested(bdi, (1 << WB_sync_congested) |
+				  (1 << WB_async_congested));
+}
+
 #endif	/* _LINUX_BACKING_DEV_H */
-- 
cgit v1.2.3


From d40f75a06dd675808eed385d490ba9468200b23f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:42 -0400
Subject: writeback, blkcg: restructure blk_{set|clear}_queue_congested()

blk_{set|clear}_queue_congested() take @q and set or clear,
respectively, the congestion state of its bdi's root wb.  Because bdi
used to be able to handle congestion state only on the root wb, the
callers of those functions tested whether the congestion is on the
root blkcg and skipped if not.

This is cumbersome and makes implementation of per cgroup
bdi_writeback congestion state propagation difficult.  This patch
renames blk_{set|clear}_queue_congested() to
blk_{set|clear}_congested(), and makes them take request_list instead
of request_queue and test whether the specified request_list is the
root one before updating bdi_writeback congestion state.  This makes
the tests in the callers unnecessary and simplifies them.

As there are no external users of these functions, the definitions are
moved from include/linux/blkdev.h to block/blk-core.c.

This patch doesn't introduce any noticeable behavior difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 19 -------------------
 1 file changed, 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 60d2726a6b62..ab4a27852f1b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -790,25 +790,6 @@ extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 
 extern void blk_queue_bio(struct request_queue *q, struct bio *bio);
 
-/*
- * A queue has just exitted congestion.  Note this in the global counter of
- * congested queues, and wake up anyone who was waiting for requests to be
- * put back.
- */
-static inline void blk_clear_queue_congested(struct request_queue *q, int sync)
-{
-	clear_bdi_congested(&q->backing_dev_info, sync);
-}
-
-/*
- * A queue has just entered congestion.  Flag that in the queue's VM-visible
- * state flags and increment the global gounter of congested queues.
- */
-static inline void blk_set_queue_congested(struct request_queue *q, int sync)
-{
-	set_bdi_congested(&q->backing_dev_info, sync);
-}
-
 extern void blk_start_queue(struct request_queue *q);
 extern void blk_stop_queue(struct request_queue *q);
 extern void blk_sync_queue(struct request_queue *q);
-- 
cgit v1.2.3


From 703c270887bb5106c4c46a00cc7477d30d5e04f5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:44 -0400
Subject: writeback: implement and use inode_congested()

In several places, bdi_congested() and its wrappers are used to
determine whether more IOs should be issued.  With cgroup writeback
support, this question can't be answered solely based on the bdi
(backing_dev_info).  It's dependent on whether the filesystem and bdi
support cgroup writeback and the blkcg the inode is associated with.

This patch implements inode_congested() and its wrappers which take
@inode and determines the congestion state considering cgroup
writeback.  The new functions replace bdi_*congested() calls in places
where the query is about specific inode and task.

There are several filesystem users which also fit this criteria but
they should be updated when each filesystem implements cgroup
writeback support.

v2: Now that a given inode is associated with only one wb, congestion
    state can be determined independent from the asking task.  Drop
    @task.  Spotted by Vivek.  Also, converted to take @inode instead
    of @mapping and renamed to inode_congested().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 2c498a2a8268..6f0882105f95 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -230,6 +230,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
 void __inode_attach_wb(struct inode *inode, struct page *page);
 void wb_memcg_offline(struct mem_cgroup *memcg);
 void wb_blkcg_offline(struct blkcg *blkcg);
+int inode_congested(struct inode *inode, int cong_bits);
 
 /**
  * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
@@ -438,8 +439,29 @@ static inline void wb_blkcg_offline(struct blkcg *blkcg)
 {
 }
 
+static inline int inode_congested(struct inode *inode, int cong_bits)
+{
+	return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
+}
+
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
+static inline int inode_read_congested(struct inode *inode)
+{
+	return inode_congested(inode, 1 << WB_sync_congested);
+}
+
+static inline int inode_write_congested(struct inode *inode)
+{
+	return inode_congested(inode, 1 << WB_async_congested);
+}
+
+static inline int inode_rw_congested(struct inode *inode)
+{
+	return inode_congested(inode, (1 << WB_sync_congested) |
+				      (1 << WB_async_congested));
+}
+
 static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits)
 {
 	return wb_congested(&bdi->wb, cong_bits);
-- 
cgit v1.2.3


From d6c10f1fc8626dc55946f4768ae322b4c57b07dd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:45 -0400
Subject: writeback: implement WB_has_dirty_io wb_state flag

Currently, wb_has_dirty_io() determines whether a wb (bdi_writeback)
has any dirty inode by testing all three IO lists on each invocation
without actively keeping track.  For cgroup writeback support, a
single bdi will host multiple wb's each of which will host dirty
inodes separately and we'll need to make bdi_has_dirty_io(), which
currently only represents the root wb, aggregate has_dirty_io from all
member wb's, which requires tracking transitions in has_dirty_io state
on each wb.

This patch introduces inode_wb_list_{move|del}_locked() to consolidate
IO list operations leaving queue_io() the only other function which
directly manipulates IO lists (via move_expired_inodes()).  All three
functions are updated to call wb_io_lists_[de]populated() which keep
track of whether the wb has dirty inodes or not and record it using
the new WB_has_dirty_io flag.  inode_wb_list_moved_locked()'s return
value indicates whether the wb had no dirty inodes before.

mark_inode_dirty() is restructured so that the return value of
inode_wb_list_move_locked() can be used for deciding whether to wake
up the wb.

While at it, change {bdi|wb}_has_dirty_io()'s return values to bool.
These functions were returning 0 and 1 before.  Also, add a comment
explaining the synchronization of wb_state flags.

v2: Updated to accommodate b_dirty_time.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev-defs.h | 1 +
 include/linux/backing-dev.h      | 8 +++-----
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index eb386766b5f3..7a94b7850b7c 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -21,6 +21,7 @@ struct dentry;
 enum wb_state {
 	WB_registered,		/* bdi_register() was done */
 	WB_writeback_running,	/* Writeback is in progress */
+	WB_has_dirty_io,	/* Dirty inodes on ->b_{dirty|io|more_io} */
 };
 
 enum wb_congested_state {
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 6f0882105f95..3c8403c012ce 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -29,7 +29,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
 			enum wb_reason reason);
 void bdi_start_background_writeback(struct backing_dev_info *bdi);
 void wb_workfn(struct work_struct *work);
-int bdi_has_dirty_io(struct backing_dev_info *bdi);
+bool bdi_has_dirty_io(struct backing_dev_info *bdi);
 void wb_wakeup_delayed(struct bdi_writeback *wb);
 
 extern spinlock_t bdi_lock;
@@ -37,11 +37,9 @@ extern struct list_head bdi_list;
 
 extern struct workqueue_struct *bdi_wq;
 
-static inline int wb_has_dirty_io(struct bdi_writeback *wb)
+static inline bool wb_has_dirty_io(struct bdi_writeback *wb)
 {
-	return !list_empty(&wb->b_dirty) ||
-	       !list_empty(&wb->b_io) ||
-	       !list_empty(&wb->b_more_io);
+	return test_bit(WB_has_dirty_io, &wb->state);
 }
 
 static inline void __add_wb_stat(struct bdi_writeback *wb,
-- 
cgit v1.2.3


From 766a9d6e60578f1ef6de71f89f022084f8bffc82 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:46 -0400
Subject: writeback: implement backing_dev_info->tot_write_bandwidth

cgroup writeback support needs to keep track of the sum of
avg_write_bandwidth of all wb's (bdi_writeback's) with dirty inodes to
distribute write workload.  This patch adds bdi->tot_write_bandwidth
and updates inode_wb_list_move_locked(), inode_wb_list_del_locked()
and wb_update_write_bandwidth() to adjust it as wb's gain and lose
dirty inodes and its avg_write_bandwidth gets updated.

As the update events are not synchronized with each other,
bdi->tot_write_bandwidth is an atomic_long_t.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev-defs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 7a94b7850b7c..d631a61f4023 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -142,6 +142,8 @@ struct backing_dev_info {
 	unsigned int min_ratio;
 	unsigned int max_ratio, max_prop_frac;
 
+	atomic_long_t tot_write_bandwidth; /* sum of active avg_write_bw */
+
 	struct bdi_writeback wb;  /* the root writeback info for this bdi */
 	struct bdi_writeback_congested wb_congested; /* its congested state */
 #ifdef CONFIG_CGROUP_WRITEBACK
-- 
cgit v1.2.3


From 95a46c65e3c09edb9f17dabf2dc16670cd328739 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:47 -0400
Subject: writeback: make bdi_has_dirty_io() take multiple bdi_writeback's into
 account

bdi_has_dirty_io() used to only reflect whether the root wb
(bdi_writeback) has dirty inodes.  For cgroup writeback support, it
needs to take all active wb's into account.  If any wb on the bdi has
dirty inodes, bdi_has_dirty_io() should return true.

To achieve that, as inode_wb_list_{move|del}_locked() now keep track
of the dirty state transition of each wb, the number of dirty wbs can
be counted in the bdi; however, bdi is already aggregating
wb->avg_write_bandwidth which can easily be guaranteed to be > 0 when
there are any dirty inodes by ensuring wb->avg_write_bandwidth can't
dip below 1.  bdi_has_dirty_io() can simply test whether
bdi->tot_write_bandwidth is zero or not.

While this bumps the value of wb->avg_write_bandwidth to one when it
used to be zero, this shouldn't cause any meaningful behavior
difference.

bdi_has_dirty_io() is made an inline function which tests whether
->tot_write_bandwidth is non-zero.  Also, WARN_ON_ONCE()'s on its
value are added to inode_wb_list_{move|del}_locked().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev-defs.h |  8 ++++++--
 include/linux/backing-dev.h      | 10 +++++++++-
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index d631a61f4023..8c857d723023 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -98,7 +98,7 @@ struct bdi_writeback {
 	unsigned long dirtied_stamp;
 	unsigned long written_stamp;	/* pages written at bw_time_stamp */
 	unsigned long write_bandwidth;	/* the estimated write bandwidth */
-	unsigned long avg_write_bandwidth; /* further smoothed write bw */
+	unsigned long avg_write_bandwidth; /* further smoothed write bw, > 0 */
 
 	/*
 	 * The base dirty throttle rate, re-calculated on every 200ms.
@@ -142,7 +142,11 @@ struct backing_dev_info {
 	unsigned int min_ratio;
 	unsigned int max_ratio, max_prop_frac;
 
-	atomic_long_t tot_write_bandwidth; /* sum of active avg_write_bw */
+	/*
+	 * Sum of avg_write_bw of wbs with dirty inodes.  > 0 if there are
+	 * any dirty wbs, which is depended upon by bdi_has_dirty().
+	 */
+	atomic_long_t tot_write_bandwidth;
 
 	struct bdi_writeback wb;  /* the root writeback info for this bdi */
 	struct bdi_writeback_congested wb_congested; /* its congested state */
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 3c8403c012ce..0839e44105bd 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -29,7 +29,6 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
 			enum wb_reason reason);
 void bdi_start_background_writeback(struct backing_dev_info *bdi);
 void wb_workfn(struct work_struct *work);
-bool bdi_has_dirty_io(struct backing_dev_info *bdi);
 void wb_wakeup_delayed(struct bdi_writeback *wb);
 
 extern spinlock_t bdi_lock;
@@ -42,6 +41,15 @@ static inline bool wb_has_dirty_io(struct bdi_writeback *wb)
 	return test_bit(WB_has_dirty_io, &wb->state);
 }
 
+static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi)
+{
+	/*
+	 * @bdi->tot_write_bandwidth is guaranteed to be > 0 if there are
+	 * any dirty wbs.  See wb_update_write_bandwidth().
+	 */
+	return atomic_long_read(&bdi->tot_write_bandwidth);
+}
+
 static inline void __add_wb_stat(struct bdi_writeback *wb,
 				 enum wb_stat_item item, s64 amount)
 {
-- 
cgit v1.2.3


From ebe41ab0c79d5633123f6faa3265a1a63c5f22d8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:50 -0400
Subject: writeback: implement bdi_for_each_wb()

This will be used to implement bdi-wide operations which should be
distributed across all its cgroup bdi_writebacks.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev.h | 63 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 0839e44105bd..c7979806baee 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -383,6 +383,61 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
 	return inode->i_wb;
 }
 
+struct wb_iter {
+	int			start_blkcg_id;
+	struct radix_tree_iter	tree_iter;
+	void			**slot;
+};
+
+static inline struct bdi_writeback *__wb_iter_next(struct wb_iter *iter,
+						   struct backing_dev_info *bdi)
+{
+	struct radix_tree_iter *titer = &iter->tree_iter;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	if (iter->start_blkcg_id >= 0) {
+		iter->slot = radix_tree_iter_init(titer, iter->start_blkcg_id);
+		iter->start_blkcg_id = -1;
+	} else {
+		iter->slot = radix_tree_next_slot(iter->slot, titer, 0);
+	}
+
+	if (!iter->slot)
+		iter->slot = radix_tree_next_chunk(&bdi->cgwb_tree, titer, 0);
+	if (iter->slot)
+		return *iter->slot;
+	return NULL;
+}
+
+static inline struct bdi_writeback *__wb_iter_init(struct wb_iter *iter,
+						   struct backing_dev_info *bdi,
+						   int start_blkcg_id)
+{
+	iter->start_blkcg_id = start_blkcg_id;
+
+	if (start_blkcg_id)
+		return __wb_iter_next(iter, bdi);
+	else
+		return &bdi->wb;
+}
+
+/**
+ * bdi_for_each_wb - walk all wb's of a bdi in ascending blkcg ID order
+ * @wb_cur: cursor struct bdi_writeback pointer
+ * @bdi: bdi to walk wb's of
+ * @iter: pointer to struct wb_iter to be used as iteration buffer
+ * @start_blkcg_id: blkcg ID to start iteration from
+ *
+ * Iterate @wb_cur through the wb's (bdi_writeback's) of @bdi in ascending
+ * blkcg ID order starting from @start_blkcg_id.  @iter is struct wb_iter
+ * to be used as temp storage during iteration.  rcu_read_lock() must be
+ * held throughout iteration.
+ */
+#define bdi_for_each_wb(wb_cur, bdi, iter, start_blkcg_id)		\
+	for ((wb_cur) = __wb_iter_init(iter, bdi, start_blkcg_id);	\
+	     (wb_cur); (wb_cur) = __wb_iter_next(iter, bdi))
+
 #else	/* CONFIG_CGROUP_WRITEBACK */
 
 static inline bool inode_cgwb_enabled(struct inode *inode)
@@ -445,6 +500,14 @@ static inline void wb_blkcg_offline(struct blkcg *blkcg)
 {
 }
 
+struct wb_iter {
+	int		next_id;
+};
+
+#define bdi_for_each_wb(wb_cur, bdi, iter, start_blkcg_id)		\
+	for ((iter)->next_id = (start_blkcg_id);			\
+	     ({	(wb_cur) = !(iter)->next_id++ ? &(bdi)->wb : NULL; }); )
+
 static inline int inode_congested(struct inode *inode, int cong_bits)
 {
 	return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
-- 
cgit v1.2.3


From c00ddad39f512b1a81e25b7892217ce10efab0f1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:51 -0400
Subject: writeback: remove bdi_start_writeback()

bdi_start_writeback() is a thin wrapper on top of
__wb_start_writeback() which is used only by laptop_mode_timer_fn().
This patches removes bdi_start_writeback(), renames
__wb_start_writeback() to wb_start_writeback() and makes
laptop_mode_timer_fn() use it instead.

This doesn't cause any functional difference and will ease making
laptop_mode_timer_fn() cgroup writeback aware.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index c7979806baee..0ff40c228bee 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -25,8 +25,8 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
 void bdi_unregister(struct backing_dev_info *bdi);
 int __must_check bdi_setup_and_register(struct backing_dev_info *, char *);
-void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
-			enum wb_reason reason);
+void wb_start_writeback(struct bdi_writeback *wb, long nr_pages,
+			bool range_cyclic, enum wb_reason reason);
 void bdi_start_background_writeback(struct backing_dev_info *bdi);
 void wb_workfn(struct work_struct *work);
 void wb_wakeup_delayed(struct bdi_writeback *wb);
-- 
cgit v1.2.3


From bc05873dccd27d75d6acdf812c3edfb181f1ba17 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:53 -0400
Subject: writeback: make writeback_in_progress() take bdi_writeback instead of
 backing_dev_info

writeback_in_progress() currently takes @bdi and returns whether
writeback is in progress on its root wb (bdi_writeback).  In
preparation for cgroup writeback support, make it take wb instead.
While at it, make it an inline function.

This patch doesn't make any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 0ff40c228bee..f04956c900ec 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -156,7 +156,17 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
 
 extern struct backing_dev_info noop_backing_dev_info;
 
-int writeback_in_progress(struct backing_dev_info *bdi);
+/**
+ * writeback_in_progress - determine whether there is writeback in progress
+ * @wb: bdi_writeback of interest
+ *
+ * Determine whether there is writeback waiting to be handled against a
+ * bdi_writeback.
+ */
+static inline bool writeback_in_progress(struct bdi_writeback *wb)
+{
+	return test_bit(WB_writeback_running, &wb->state);
+}
 
 static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
 {
-- 
cgit v1.2.3


From 9ecf4866c018aeb304a7b49216c4d183665becb7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:54 -0400
Subject: writeback: make bdi_start_background_writeback() take bdi_writeback
 instead of backing_dev_info

bdi_start_background_writeback() currently takes @bdi and kicks the
root wb (bdi_writeback).  In preparation for cgroup writeback support,
make it take wb instead.

This patch doesn't make any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index f04956c900ec..9cc11e5b97ca 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -27,7 +27,7 @@ void bdi_unregister(struct backing_dev_info *bdi);
 int __must_check bdi_setup_and_register(struct backing_dev_info *, char *);
 void wb_start_writeback(struct bdi_writeback *wb, long nr_pages,
 			bool range_cyclic, enum wb_reason reason);
-void bdi_start_background_writeback(struct backing_dev_info *bdi);
+void wb_start_background_writeback(struct bdi_writeback *wb);
 void wb_workfn(struct work_struct *work);
 void wb_wakeup_delayed(struct bdi_writeback *wb);
 
-- 
cgit v1.2.3


From cc395d7f1f7b9c740ab6d367ef1f6eb248595dff Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:13:58 -0400
Subject: writeback: implement bdi_wait_for_completion()

If the completion of a wb_writeback_work can be waited upon by setting
its ->done to a struct completion and waiting on it; however, for
cgroup writeback support, it's necessary to issue multiple work items
to multiple bdi_writebacks and wait for the completion of all.

This patch implements wb_completion which can wait for multiple work
items and replaces the struct completion with it.  It can be defined
using DEFINE_WB_COMPLETION_ONSTACK(), used for multiple work items and
waited for by wb_wait_for_completion().

Nobody currently issues multiple work items and this patch doesn't
introduce any behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev-defs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 8c857d723023..97a92fa0cdb5 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -155,6 +155,8 @@ struct backing_dev_info {
 	struct rb_root cgwb_congested_tree; /* their congested states */
 	atomic_t usage_cnt; /* counts both cgwbs and cgwb_contested's */
 #endif
+	wait_queue_head_t wb_waitq;
+
 	struct device *dev;
 
 	struct timer_list laptop_mode_wb_timer;
-- 
cgit v1.2.3


From f30a7d0cc8d9096d6728fadd0ab024e648010ec0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 17:14:00 -0400
Subject: writeback: restructure try_writeback_inodes_sb[_nr]()

try_writeback_inodes_sb_nr() wraps writeback_inodes_sb_nr() so that it
handles s_umount locking and skips if writeback is already in
progress.  The in progress test is performed on the root wb
(bdi_writeback) which isn't sufficient for cgroup writeback support.
The test must be done per-wb.

To prepare for the change, this patch factors out
__writeback_inodes_sb_nr() from writeback_inodes_sb_nr() and adds
@skip_if_busy and moves the in progress test right before queueing the
wb_writeback_work.  try_writeback_inodes_sb_nr() now just grabs
s_umount and invokes __writeback_inodes_sb_nr() with asserted
@skip_if_busy.  This way, later addition of multiple wb handling can
skip only the wb's which already have writeback in progress.

This swaps the order between in progress test and s_umount test which
can flip the return value when writeback is in progress and s_umount
is being held by someone else but this shouldn't cause any meaningful
difference.  It's a fringe condition and the return value is an
unsynchronized hint anyway.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/writeback.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index a6b9db7fcee8..23af355d5471 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -93,9 +93,9 @@ struct bdi_writeback;
 void writeback_inodes_sb(struct super_block *, enum wb_reason reason);
 void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
 							enum wb_reason reason);
-int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason);
-int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
-				  enum wb_reason reason);
+bool try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason);
+bool try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
+				   enum wb_reason reason);
 void sync_inodes_sb(struct super_block *);
 void wakeup_flusher_threads(long nr_pages, enum wb_reason reason);
 void inode_wait_for_writeback(struct inode *inode);
-- 
cgit v1.2.3


From bafc0dba1e20d84578d7098d32caf63441e5743d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 2 Jun 2015 08:37:23 -0600
Subject: buffer, writeback: make __block_write_full_page() honor cgroup
 writeback

[__]block_write_full_page() is used to implement ->writepage in
various filesystems.  All writeback logic is now updated to handle
cgroup writeback and the block cgroup to issue IOs for is encoded in
writeback_control and can be retrieved from the inode; however,
[__]block_write_full_page() currently ignores the blkcg indicated by
inode and issues all bio's without explicit blkcg association.

This patch adds submit_bh_blkcg() which associates the bio with the
specified blkio cgroup before issuing and uses it in
__block_write_full_page() so that the issued bio's are associated with
inode_to_wb_blkcg_css(inode).

v2: Updated for per-inode wb association.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 9cc11e5b97ca..e9d7373f5f93 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -393,6 +393,12 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
 	return inode->i_wb;
 }
 
+static inline struct cgroup_subsys_state *
+inode_to_wb_blkcg_css(struct inode *inode)
+{
+	return inode_to_wb(inode)->blkcg_css;
+}
+
 struct wb_iter {
 	int			start_blkcg_id;
 	struct radix_tree_iter	tree_iter;
@@ -510,6 +516,12 @@ static inline void wb_blkcg_offline(struct blkcg *blkcg)
 {
 }
 
+static inline struct cgroup_subsys_state *
+inode_to_wb_blkcg_css(struct inode *inode)
+{
+	return blkcg_root_css;
+}
+
 struct wb_iter {
 	int		next_id;
 };
-- 
cgit v1.2.3


From 0d960a383ae7aa791b2833e122ba7519d264cf92 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 18:23:19 -0400
Subject: writeback: clean up wb_dirty_limit()

The function name wb_dirty_limit(), its argument @dirty and the local
variable @wb_dirty are mortally confusing given that the function
calculates per-wb threshold value not dirty pages, especially given
that @dirty and @wb_dirty are used elsewhere for dirty pages.

Let's rename the function to wb_calc_thresh() and wb_dirty to
wb_thresh.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/writeback.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 23af355d5471..0435c85d4cfa 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -155,7 +155,7 @@ int dirty_writeback_centisecs_handler(struct ctl_table *, int,
 				      void __user *, size_t *, loff_t *);
 
 void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
-unsigned long wb_dirty_limit(struct bdi_writeback *wb, unsigned long dirty);
+unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
 
 void __wb_update_bandwidth(struct bdi_writeback *wb,
 			   unsigned long thresh,
-- 
cgit v1.2.3


From 8a73179956e649df0d4b3250db17734f272d8266 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 18:23:20 -0400
Subject: writeback: reorganize [__]wb_update_bandwidth()

__wb_update_bandwidth() is called from two places -
fs/fs-writeback.c::balance_dirty_pages() and
mm/page-writeback.c::wb_writeback().  The latter updates only the
write bandwidth while the former also deals with the dirty ratelimit.
The two callsites are distinguished by whether @thresh parameter is
zero or not, which is cryptic.  In addition, the two files define
their own different versions of wb_update_bandwidth() on top of
__wb_update_bandwidth(), which is confusing to say the least.  This
patch cleans up [__]wb_update_bandwidth() in the following ways.

* __wb_update_bandwidth() now takes explicit @update_ratelimit
  parameter to gate dirty ratelimit handling.

* mm/page-writeback.c::wb_update_bandwidth() is flattened into its
  caller - balance_dirty_pages().

* fs/fs-writeback.c::wb_update_bandwidth() is moved to
  mm/page-writeback.c and __wb_update_bandwidth() is made static.

* While at it, add a lockdep assertion to __wb_update_bandwidth().

Except for the lockdep addition, this is pure reorganization and
doesn't introduce any behavioral changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/writeback.h | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 0435c85d4cfa..80adf3d88d9d 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -157,14 +157,7 @@ int dirty_writeback_centisecs_handler(struct ctl_table *, int,
 void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
 unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
 
-void __wb_update_bandwidth(struct bdi_writeback *wb,
-			   unsigned long thresh,
-			   unsigned long bg_thresh,
-			   unsigned long dirty,
-			   unsigned long bdi_thresh,
-			   unsigned long bdi_dirty,
-			   unsigned long start_time);
-
+void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time);
 void page_writeback_init(void);
 void balance_dirty_pages_ratelimited(struct address_space *mapping);
 
-- 
cgit v1.2.3


From 380c27ca33ebecc9da35aa90c8b3a9154f90aac2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 18:23:21 -0400
Subject: writeback: implement wb_domain

Dirtyable memory is distributed to a wb (bdi_writeback) according to
the relative bandwidth the wb is writing out in the whole system.
This distribution is global - each wb is measured against all other
wb's and gets the proportinately sized portion of the memory in the
whole system.

For cgroup writeback, the amount of dirtyable memory is scoped by
memcg and thus each wb would need to be measured and controlled in its
memcg.  IOW, a wb will belong to two writeback domains - the global
and memcg domains.

Currently, what constitutes the global writeback domain are scattered
across a number of global states.  This patch starts collecting them
into struct wb_domain.

* fprop_global which serves as the basis for proportional bandwidth
  measurement and its period timer are moved into struct wb_domain.

* global_wb_domain hosts the states for the global domain.

* While at it, flatten wb_writeout_fraction() into its callers.  This
  thin wrapper doesn't provide any actual benefits while getting in
  the way.

This is pure reorganization and doesn't introduce any behavioral
changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/writeback.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 80adf3d88d9d..3148db1296a2 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -7,6 +7,7 @@
 #include <linux/sched.h>
 #include <linux/workqueue.h>
 #include <linux/fs.h>
+#include <linux/flex_proportions.h>
 
 DECLARE_PER_CPU(int, dirty_throttle_leaks);
 
@@ -86,6 +87,36 @@ struct writeback_control {
 	unsigned for_sync:1;		/* sync(2) WB_SYNC_ALL writeback */
 };
 
+/*
+ * A wb_domain represents a domain that wb's (bdi_writeback's) belong to
+ * and are measured against each other in.  There always is one global
+ * domain, global_wb_domain, that every wb in the system is a member of.
+ * This allows measuring the relative bandwidth of each wb to distribute
+ * dirtyable memory accordingly.
+ */
+struct wb_domain {
+	/*
+	 * Scale the writeback cache size proportional to the relative
+	 * writeout speed.
+	 *
+	 * We do this by keeping a floating proportion between BDIs, based
+	 * on page writeback completions [end_page_writeback()]. Those
+	 * devices that write out pages fastest will get the larger share,
+	 * while the slower will get a smaller share.
+	 *
+	 * We use page writeout completions because we are interested in
+	 * getting rid of dirty pages. Having them written out is the
+	 * primary goal.
+	 *
+	 * We introduce a concept of time, a period over which we measure
+	 * these events, because demand can/will vary over time. The length
+	 * of this period itself is measured in page writeback completions.
+	 */
+	struct fprop_global completions;
+	struct timer_list period_timer;	/* timer for aging of completions */
+	unsigned long period_time;
+};
+
 /*
  * fs/fs-writeback.c
  */	
@@ -120,6 +151,7 @@ static inline void laptop_sync_completion(void) { }
 #endif
 void throttle_vm_writeout(gfp_t gfp_mask);
 bool zone_dirty_ok(struct zone *zone);
+int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
 
 extern unsigned long global_dirty_limit;
 
-- 
cgit v1.2.3


From dcc25ae76eb7b8ff883eaaab57e30e8f2f085be3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 18:23:22 -0400
Subject: writeback: move global_dirty_limit into wb_domain

This patch is a part of the series to define wb_domain which
represents a domain that wb's (bdi_writeback's) belong to and are
measured against each other in.  This will enable IO backpressure
propagation for cgroup writeback.

global_dirty_limit exists to regulate the global dirty threshold which
is a property of the wb_domain.  This patch moves hard_dirty_limit,
dirty_lock, and update_time into wb_domain.

This is pure reorganization and doesn't introduce any behavioral
changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/writeback.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 3148db1296a2..5fdd4e1805e6 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -95,6 +95,8 @@ struct writeback_control {
  * dirtyable memory accordingly.
  */
 struct wb_domain {
+	spinlock_t lock;
+
 	/*
 	 * Scale the writeback cache size proportional to the relative
 	 * writeout speed.
@@ -115,6 +117,19 @@ struct wb_domain {
 	struct fprop_global completions;
 	struct timer_list period_timer;	/* timer for aging of completions */
 	unsigned long period_time;
+
+	/*
+	 * The dirtyable memory and dirty threshold could be suddenly
+	 * knocked down by a large amount (eg. on the startup of KVM in a
+	 * swapless system). This may throw the system into deep dirty
+	 * exceeded state and throttle heavy/light dirtiers alike. To
+	 * retain good responsiveness, maintain global_dirty_limit for
+	 * tracking slowly down to the knocked down dirty threshold.
+	 *
+	 * Both fields are protected by ->lock.
+	 */
+	unsigned long dirty_limit_tstamp;
+	unsigned long dirty_limit;
 };
 
 /*
@@ -153,7 +168,7 @@ void throttle_vm_writeout(gfp_t gfp_mask);
 bool zone_dirty_ok(struct zone *zone);
 int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
 
-extern unsigned long global_dirty_limit;
+extern struct wb_domain global_wb_domain;
 
 /* These are exported to sysctl. */
 extern int dirty_background_ratio;
-- 
cgit v1.2.3


From aa661bbe1e61ce80ca4ae98804f673ede94b0827 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 18:23:31 -0400
Subject: writeback: move over_bground_thresh() to mm/page-writeback.c

and rename it to wb_over_bg_thresh().  The function is closely tied to
the dirty throttling mechanism implemented in page-writeback.c.  This
relocation will allow future updates necessary for cgroup writeback
support.

While at it, add function comment.

This is pure reorganization and doesn't introduce any behavioral
changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/writeback.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 5fdd4e1805e6..b57c2786b5aa 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -207,6 +207,7 @@ unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
 void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time);
 void page_writeback_init(void);
 void balance_dirty_pages_ratelimited(struct address_space *mapping);
+bool wb_over_bg_thresh(struct bdi_writeback *wb);
 
 typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
 				void *data);
-- 
cgit v1.2.3


From 841710aa6e4acd066ab9fe8c8cb6f4e4e6709d83 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 18:23:33 -0400
Subject: writeback: implement memcg wb_domain

Dirtyable memory is distributed to a wb (bdi_writeback) according to
the relative bandwidth the wb is writing out in the whole system.
This distribution is global - each wb is measured against all other
wb's and gets the proportinately sized portion of the memory in the
whole system.

For cgroup writeback, the amount of dirtyable memory is scoped by
memcg and thus each wb would need to be measured and controlled in its
memcg.  IOW, a wb will belong to two writeback domains - the global
and memcg domains.

The previous patches laid the groundwork to support the two wb_domains
and this patch implements memcg wb_domain.  memcg->cgwb_domain is
initialized on css online and destroyed on css release,
wb->memcg_completions is added, and __wb_writeout_inc() is updated to
increment completions against both global and memcg wb_domains.

The following patches will update balance_dirty_pages() and its
subroutines to actually consider memcg wb_domain for throttling.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev-defs.h |  1 +
 include/linux/memcontrol.h       | 12 +++++++++++-
 include/linux/writeback.h        |  3 +++
 3 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 97a92fa0cdb5..8d470b73824f 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -118,6 +118,7 @@ struct bdi_writeback {
 
 #ifdef CONFIG_CGROUP_WRITEBACK
 	struct percpu_ref refcnt;	/* used only for !root wb's */
+	struct fprop_local_percpu memcg_completions;
 	struct cgroup_subsys_state *memcg_css; /* the associated memcg */
 	struct cgroup_subsys_state *blkcg_css; /* and blkcg */
 	struct list_head memcg_node;	/* anchored at memcg->cgwb_list */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 662a953ea8ad..e3177bed23ea 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -389,8 +389,18 @@ enum {
 };
 
 #ifdef CONFIG_CGROUP_WRITEBACK
+
 struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg);
-#endif
+struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb);
+
+#else	/* CONFIG_CGROUP_WRITEBACK */
+
+static inline struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
+{
+	return NULL;
+}
+
+#endif	/* CONFIG_CGROUP_WRITEBACK */
 
 struct sock;
 #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index b57c2786b5aa..04a3786c456f 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -167,6 +167,9 @@ static inline void laptop_sync_completion(void) { }
 void throttle_vm_writeout(gfp_t gfp_mask);
 bool zone_dirty_ok(struct zone *zone);
 int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
+#ifdef CONFIG_CGROUP_WRITEBACK
+void wb_domain_exit(struct wb_domain *dom);
+#endif
 
 extern struct wb_domain global_wb_domain;
 
-- 
cgit v1.2.3


From 2529bb3aadc40a93e642f5f3650f63379a964467 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 18:23:34 -0400
Subject: writeback: reset wb_domain->dirty_limit[_tstmp] when memcg domain
 size changes

The amount of available memory to a memcg wb_domain can change as
memcg configuration changes.  A domain's ->dirty_limit exists to
smooth out sudden drops in dirty threshold; however, when a domain's
size actually drops significantly, it hinders the dirty throttling
from adjusting to the new configuration leading to unexpected
behaviors including unnecessary OOM kills.

This patch resolves the issue by adding wb_domain_size_changed() which
resets ->dirty_limit[_tstmp] and making memcg call it on configuration
changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/writeback.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 04a3786c456f..3b73e97ecfc7 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -132,6 +132,26 @@ struct wb_domain {
 	unsigned long dirty_limit;
 };
 
+/**
+ * wb_domain_size_changed - memory available to a wb_domain has changed
+ * @dom: wb_domain of interest
+ *
+ * This function should be called when the amount of memory available to
+ * @dom has changed.  It resets @dom's dirty limit parameters to prevent
+ * the past values which don't match the current configuration from skewing
+ * dirty throttling.  Without this, when memory size of a wb_domain is
+ * greatly reduced, the dirty throttling logic may allow too many pages to
+ * be dirtied leading to consecutive unnecessary OOMs and may get stuck in
+ * that situation.
+ */
+static inline void wb_domain_size_changed(struct wb_domain *dom)
+{
+	spin_lock(&dom->lock);
+	dom->dirty_limit_tstamp = jiffies;
+	dom->dirty_limit = 0;
+	spin_unlock(&dom->lock);
+}
+
 /*
  * fs/fs-writeback.c
  */	
-- 
cgit v1.2.3


From c2aa723a6093633ae4ec15b08a4db276643cab3e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 22 May 2015 18:23:35 -0400
Subject: writeback: implement memcg writeback domain based throttling

While cgroup writeback support now connects memcg and blkcg so that
writeback IOs are properly attributed and controlled, the IO back
pressure propagation mechanism implemented in balance_dirty_pages()
and its subroutines wasn't aware of cgroup writeback.

Processes belonging to a memcg may have access to only subset of total
memory available in the system and not factoring this into dirty
throttling rendered it completely ineffective for processes under
memcg limits and memcg ended up building a separate ad-hoc degenerate
mechanism directly into vmscan code to limit page dirtying.

The previous patches updated balance_dirty_pages() and its subroutines
so that they can deal with multiple wb_domain's (writeback domains)
and defined per-memcg wb_domain.  Processes belonging to a non-root
memcg are bound to two wb_domains, global wb_domain and memcg
wb_domain, and should be throttled according to IO pressures from both
domains.  This patch updates dirty throttling code so that it repeats
similar calculations for the two domains - the differences between the
two are few and minor - and applies the lower of the two sets of
resulting constraints.

wb_over_bg_thresh(), which controls when background writeback
terminates, is also updated to consider both global and memcg
wb_domains.  It returns true if dirty is over bg_thresh for either
domain.

This makes the dirty throttling mechanism operational for memcg
domains including writeback-bandwidth-proportional dirty page
distribution inside them but the ad-hoc memcg throttling mechanism in
vmscan is still in place.  The next patch will rip it out.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/memcontrol.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e3177bed23ea..c3eb19e2bc1c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -392,6 +392,8 @@ enum {
 
 struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg);
 struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb);
+void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pavail,
+			 unsigned long *pdirty, unsigned long *pwriteback);
 
 #else	/* CONFIG_CGROUP_WRITEBACK */
 
@@ -400,6 +402,13 @@ static inline struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
 	return NULL;
 }
 
+static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
+				       unsigned long *pavail,
+				       unsigned long *pdirty,
+				       unsigned long *pwriteback)
+{
+}
+
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
 struct sock;
-- 
cgit v1.2.3


From 21c6321fbb3a3787af07f1bc031d713a707fb69c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 28 May 2015 14:50:49 -0400
Subject: writeback: relocate wb[_try]_get(), wb_put(),
 inode_{attach|detach}_wb()

Currently, majority of cgroup writeback support including all the
above functions are implemented in include/linux/backing-dev.h and
mm/backing-dev.c; however, the portion closely related to writeback
logic implemented in include/linux/writeback.h and mm/page-writeback.c
will expand to support foreign writeback detection and correction.

This patch moves wb[_try]_get() and wb_put() to
include/linux/backing-dev-defs.h so that they can be used from
writeback.h and inode_{attach|detach}_wb() to writeback.h and
page-writeback.c.

This is pure reorganization and doesn't introduce any functional
changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev-defs.h | 50 ++++++++++++++++++++++++
 include/linux/backing-dev.h      | 82 ----------------------------------------
 include/linux/writeback.h        | 46 ++++++++++++++++++++++
 3 files changed, 96 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 8d470b73824f..e047b496a0b9 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -186,4 +186,54 @@ static inline void set_bdi_congested(struct backing_dev_info *bdi, int sync)
 	set_wb_congested(bdi->wb.congested, sync);
 }
 
+#ifdef CONFIG_CGROUP_WRITEBACK
+
+/**
+ * wb_tryget - try to increment a wb's refcount
+ * @wb: bdi_writeback to get
+ */
+static inline bool wb_tryget(struct bdi_writeback *wb)
+{
+	if (wb != &wb->bdi->wb)
+		return percpu_ref_tryget(&wb->refcnt);
+	return true;
+}
+
+/**
+ * wb_get - increment a wb's refcount
+ * @wb: bdi_writeback to get
+ */
+static inline void wb_get(struct bdi_writeback *wb)
+{
+	if (wb != &wb->bdi->wb)
+		percpu_ref_get(&wb->refcnt);
+}
+
+/**
+ * wb_put - decrement a wb's refcount
+ * @wb: bdi_writeback to put
+ */
+static inline void wb_put(struct bdi_writeback *wb)
+{
+	if (wb != &wb->bdi->wb)
+		percpu_ref_put(&wb->refcnt);
+}
+
+#else	/* CONFIG_CGROUP_WRITEBACK */
+
+static inline bool wb_tryget(struct bdi_writeback *wb)
+{
+	return true;
+}
+
+static inline void wb_get(struct bdi_writeback *wb)
+{
+}
+
+static inline void wb_put(struct bdi_writeback *wb)
+{
+}
+
+#endif	/* CONFIG_CGROUP_WRITEBACK */
+
 #endif	/* __LINUX_BACKING_DEV_DEFS_H */
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index e9d7373f5f93..5c978a924157 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -243,7 +243,6 @@ void wb_congested_put(struct bdi_writeback_congested *congested);
 struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
 				    struct cgroup_subsys_state *memcg_css,
 				    gfp_t gfp);
-void __inode_attach_wb(struct inode *inode, struct page *page);
 void wb_memcg_offline(struct mem_cgroup *memcg);
 void wb_blkcg_offline(struct blkcg *blkcg);
 int inode_congested(struct inode *inode, int cong_bits);
@@ -264,37 +263,6 @@ static inline bool inode_cgwb_enabled(struct inode *inode)
 		(inode->i_sb->s_type->fs_flags & FS_CGROUP_WRITEBACK);
 }
 
-/**
- * wb_tryget - try to increment a wb's refcount
- * @wb: bdi_writeback to get
- */
-static inline bool wb_tryget(struct bdi_writeback *wb)
-{
-	if (wb != &wb->bdi->wb)
-		return percpu_ref_tryget(&wb->refcnt);
-	return true;
-}
-
-/**
- * wb_get - increment a wb's refcount
- * @wb: bdi_writeback to get
- */
-static inline void wb_get(struct bdi_writeback *wb)
-{
-	if (wb != &wb->bdi->wb)
-		percpu_ref_get(&wb->refcnt);
-}
-
-/**
- * wb_put - decrement a wb's refcount
- * @wb: bdi_writeback to put
- */
-static inline void wb_put(struct bdi_writeback *wb)
-{
-	if (wb != &wb->bdi->wb)
-		percpu_ref_put(&wb->refcnt);
-}
-
 /**
  * wb_find_current - find wb for %current on a bdi
  * @bdi: bdi of interest
@@ -353,35 +321,6 @@ wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
 	return wb;
 }
 
-/**
- * inode_attach_wb - associate an inode with its wb
- * @inode: inode of interest
- * @page: page being dirtied (may be NULL)
- *
- * If @inode doesn't have its wb, associate it with the wb matching the
- * memcg of @page or, if @page is NULL, %current.  May be called w/ or w/o
- * @inode->i_lock.
- */
-static inline void inode_attach_wb(struct inode *inode, struct page *page)
-{
-	if (!inode->i_wb)
-		__inode_attach_wb(inode, page);
-}
-
-/**
- * inode_detach_wb - disassociate an inode from its wb
- * @inode: inode of interest
- *
- * @inode is being freed.  Detach from its wb.
- */
-static inline void inode_detach_wb(struct inode *inode)
-{
-	if (inode->i_wb) {
-		wb_put(inode->i_wb);
-		inode->i_wb = NULL;
-	}
-}
-
 /**
  * inode_to_wb - determine the wb of an inode
  * @inode: inode of interest
@@ -471,19 +410,6 @@ static inline void wb_congested_put(struct bdi_writeback_congested *congested)
 {
 }
 
-static inline bool wb_tryget(struct bdi_writeback *wb)
-{
-	return true;
-}
-
-static inline void wb_get(struct bdi_writeback *wb)
-{
-}
-
-static inline void wb_put(struct bdi_writeback *wb)
-{
-}
-
 static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
 {
 	return &bdi->wb;
@@ -495,14 +421,6 @@ wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
 	return &bdi->wb;
 }
 
-static inline void inode_attach_wb(struct inode *inode, struct page *page)
-{
-}
-
-static inline void inode_detach_wb(struct inode *inode)
-{
-}
-
 static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
 {
 	return &inode_to_bdi(inode)->wb;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 3b73e97ecfc7..6726b7e56beb 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -8,6 +8,7 @@
 #include <linux/workqueue.h>
 #include <linux/fs.h>
 #include <linux/flex_proportions.h>
+#include <linux/backing-dev-defs.h>
 
 DECLARE_PER_CPU(int, dirty_throttle_leaks);
 
@@ -173,6 +174,51 @@ static inline void wait_on_inode(struct inode *inode)
 	wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE);
 }
 
+#ifdef CONFIG_CGROUP_WRITEBACK
+
+void __inode_attach_wb(struct inode *inode, struct page *page);
+
+/**
+ * inode_attach_wb - associate an inode with its wb
+ * @inode: inode of interest
+ * @page: page being dirtied (may be NULL)
+ *
+ * If @inode doesn't have its wb, associate it with the wb matching the
+ * memcg of @page or, if @page is NULL, %current.  May be called w/ or w/o
+ * @inode->i_lock.
+ */
+static inline void inode_attach_wb(struct inode *inode, struct page *page)
+{
+	if (!inode->i_wb)
+		__inode_attach_wb(inode, page);
+}
+
+/**
+ * inode_detach_wb - disassociate an inode from its wb
+ * @inode: inode of interest
+ *
+ * @inode is being freed.  Detach from its wb.
+ */
+static inline void inode_detach_wb(struct inode *inode)
+{
+	if (inode->i_wb) {
+		wb_put(inode->i_wb);
+		inode->i_wb = NULL;
+	}
+}
+
+#else	/* CONFIG_CGROUP_WRITEBACK */
+
+static inline void inode_attach_wb(struct inode *inode, struct page *page)
+{
+}
+
+static inline void inode_detach_wb(struct inode *inode)
+{
+}
+
+#endif	/* CONFIG_CGROUP_WRITEBACK */
+
 /*
  * mm/page-writeback.c
  */
-- 
cgit v1.2.3


From b16b1deb553adcd7b3b7ce3e6d6fd1b923f314da Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 2 Jun 2015 08:39:48 -0600
Subject: writeback: make writeback_control track the inode being written back

Currently, for cgroup writeback, the IO submission paths directly
associate the bio's with the blkcg from inode_to_wb_blkcg_css();
however, it'd be necessary to keep more writeback context to implement
foreign inode writeback detection.  wbc (writeback_control) is the
natural fit for the extra context - it persists throughout the
writeback of each inode and is passed all the way down to IO
submission paths.

This patch adds wbc_attach_and_unlock_inode(), wbc_detach_inode(), and
wbc_attach_fdatawrite_inode() which are used to associate wbc with the
inode being written back.  IO submission paths now use wbc_init_bio()
instead of directly associating bio's with blkcg themselves.  This
leaves inode_to_wb_blkcg_css() w/o any user.  The function is removed.

wbc currently only tracks the associated wb (bdi_writeback).  Future
patches will add more for foreign inode detection.  The association is
established under i_lock which will be depended upon when migrating
foreign inodes to other wb's.

As currently, once established, inode to wb association never changes,
going through wbc when initializing bio's doesn't cause any behavior
changes.

v2: submit_blk_blkcg() now checks whether the wbc is associated with a
    wb before dereferencing it.  This can happen when pageout() is
    writing pages directly without going through the usual writeback
    path.  As pageout() path is single-threaded, we don't want it to
    be blocked behind a slow cgroup and ultimately want it to delegate
    actual writing to the usual writeback path.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev.h | 12 --------
 include/linux/writeback.h   | 68 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 5c978a924157..b1d2489a6536 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -332,12 +332,6 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
 	return inode->i_wb;
 }
 
-static inline struct cgroup_subsys_state *
-inode_to_wb_blkcg_css(struct inode *inode)
-{
-	return inode_to_wb(inode)->blkcg_css;
-}
-
 struct wb_iter {
 	int			start_blkcg_id;
 	struct radix_tree_iter	tree_iter;
@@ -434,12 +428,6 @@ static inline void wb_blkcg_offline(struct blkcg *blkcg)
 {
 }
 
-static inline struct cgroup_subsys_state *
-inode_to_wb_blkcg_css(struct inode *inode)
-{
-	return blkcg_root_css;
-}
-
 struct wb_iter {
 	int		next_id;
 };
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 6726b7e56beb..8f964e558af5 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -86,6 +86,9 @@ struct writeback_control {
 	unsigned for_reclaim:1;		/* Invoked from the page allocator */
 	unsigned range_cyclic:1;	/* range_start is cyclic */
 	unsigned for_sync:1;		/* sync(2) WB_SYNC_ALL writeback */
+#ifdef CONFIG_CGROUP_WRITEBACK
+	struct bdi_writeback *wb;	/* wb this writeback is issued under */
+#endif
 };
 
 /*
@@ -176,7 +179,14 @@ static inline void wait_on_inode(struct inode *inode)
 
 #ifdef CONFIG_CGROUP_WRITEBACK
 
+#include <linux/cgroup.h>
+#include <linux/bio.h>
+
 void __inode_attach_wb(struct inode *inode, struct page *page);
+void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
+				 struct inode *inode)
+	__releases(&inode->i_lock);
+void wbc_detach_inode(struct writeback_control *wbc);
 
 /**
  * inode_attach_wb - associate an inode with its wb
@@ -207,6 +217,44 @@ static inline void inode_detach_wb(struct inode *inode)
 	}
 }
 
+/**
+ * wbc_attach_fdatawrite_inode - associate wbc and inode for fdatawrite
+ * @wbc: writeback_control of interest
+ * @inode: target inode
+ *
+ * This function is to be used by __filemap_fdatawrite_range(), which is an
+ * alternative entry point into writeback code, and first ensures @inode is
+ * associated with a bdi_writeback and attaches it to @wbc.
+ */
+static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
+					       struct inode *inode)
+{
+	spin_lock(&inode->i_lock);
+	inode_attach_wb(inode, NULL);
+	wbc_attach_and_unlock_inode(wbc, inode);
+}
+
+/**
+ * wbc_init_bio - writeback specific initializtion of bio
+ * @wbc: writeback_control for the writeback in progress
+ * @bio: bio to be initialized
+ *
+ * @bio is a part of the writeback in progress controlled by @wbc.  Perform
+ * writeback specific initialization.  This is used to apply the cgroup
+ * writeback context.
+ */
+static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
+{
+	/*
+	 * pageout() path doesn't attach @wbc to the inode being written
+	 * out.  This is intentional as we don't want the function to block
+	 * behind a slow cgroup.  Ultimately, we want pageout() to kick off
+	 * regular writeback instead of writing things out itself.
+	 */
+	if (wbc->wb)
+		bio_associate_blkcg(bio, wbc->wb->blkcg_css);
+}
+
 #else	/* CONFIG_CGROUP_WRITEBACK */
 
 static inline void inode_attach_wb(struct inode *inode, struct page *page)
@@ -217,6 +265,26 @@ static inline void inode_detach_wb(struct inode *inode)
 {
 }
 
+static inline void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
+					       struct inode *inode)
+	__releases(&inode->i_lock)
+{
+	spin_unlock(&inode->i_lock);
+}
+
+static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
+					       struct inode *inode)
+{
+}
+
+static inline void wbc_detach_inode(struct writeback_control *wbc)
+{
+}
+
+static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
+{
+}
+
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
 /*
-- 
cgit v1.2.3


From 2a81490811d0296d390c571bb64eaa93e5ed7def Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 28 May 2015 14:50:51 -0400
Subject: writeback: implement foreign cgroup inode detection

As concurrent write sharing of an inode is expected to be very rare
and memcg only tracks page ownership on first-use basis severely
confining the usefulness of such sharing, cgroup writeback tracks
ownership per-inode.  While the support for concurrent write sharing
of an inode is deemed unnecessary, an inode being written to by
different cgroups at different points in time is a lot more common,
and, more importantly, charging only by first-use can too readily lead
to grossly incorrect behaviors (single foreign page can lead to
gigabytes of writeback to be incorrectly attributed).

To resolve this issue, cgroup writeback detects the majority dirtier
of an inode and will transfer the ownership to it.  To avoid
unnnecessary oscillation, the detection mechanism keeps track of
history and gives out the switch verdict only if the foreign usage
pattern is stable over a certain amount of time and/or writeback
attempts.

The detection mechanism has fairly low space and computation overhead.
It adds 8 bytes to struct inode (one int and two u16's) and minimal
amount of calculation per IO.  The detection mechanism converges to
the correct answer usually in several seconds of IO time when there's
a clear majority dirtier.  Even when there isn't, it can reach an
acceptable answer fairly quickly under most circumstances.

Please see wb_detach_inode() for more details.

This patch only implements detection.  Following patches will
implement actual switching.

v2: wbc_account_io() now checks whether the wbc is associated with a
    wb before dereferencing it.  This can happen when pageout() is
    writing pages directly without going through the usual writeback
    path.  As pageout() path is single-threaded, we don't want it to
    be blocked behind a slow cgroup and ultimately want it to delegate
    actual writing to the usual writeback path.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/fs.h        |  5 +++++
 include/linux/writeback.h | 16 ++++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 67a42ec95065..740126d7c44e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -638,6 +638,11 @@ struct inode {
 	struct list_head	i_wb_list;	/* backing dev IO list */
 #ifdef CONFIG_CGROUP_WRITEBACK
 	struct bdi_writeback	*i_wb;		/* the associated cgroup wb */
+
+	/* foreign inode detection, see wbc_detach_inode() */
+	int			i_wb_frn_winner;
+	u16			i_wb_frn_avg_time;
+	u16			i_wb_frn_history;
 #endif
 	struct list_head	i_lru;		/* inode LRU list */
 	struct list_head	i_sb_list;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 8f964e558af5..b333c945e571 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -88,6 +88,15 @@ struct writeback_control {
 	unsigned for_sync:1;		/* sync(2) WB_SYNC_ALL writeback */
 #ifdef CONFIG_CGROUP_WRITEBACK
 	struct bdi_writeback *wb;	/* wb this writeback is issued under */
+	struct inode *inode;		/* inode being written out */
+
+	/* foreign inode detection, see wbc_detach_inode() */
+	int wb_id;			/* current wb id */
+	int wb_lcand_id;		/* last foreign candidate wb id */
+	int wb_tcand_id;		/* this foreign candidate wb id */
+	size_t wb_bytes;		/* bytes written by current wb */
+	size_t wb_lcand_bytes;		/* bytes written by last candidate */
+	size_t wb_tcand_bytes;		/* bytes written by this candidate */
 #endif
 };
 
@@ -187,6 +196,8 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
 				 struct inode *inode)
 	__releases(&inode->i_lock);
 void wbc_detach_inode(struct writeback_control *wbc);
+void wbc_account_io(struct writeback_control *wbc, struct page *page,
+		    size_t bytes);
 
 /**
  * inode_attach_wb - associate an inode with its wb
@@ -285,6 +296,11 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
 {
 }
 
+static inline void wbc_account_io(struct writeback_control *wbc,
+				  struct page *page, size_t bytes)
+{
+}
+
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
 /*
-- 
cgit v1.2.3


From 682aa8e1a6a1504a4caaa62e6c2c9daae3757210 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 28 May 2015 14:50:53 -0400
Subject: writeback: implement unlocked_inode_to_wb transaction and use it for
 stat updates

The mechanism for detecting whether an inode should switch its wb
(bdi_writeback) association is now in place.  This patch build the
framework for the actual switching.

This patch adds a new inode flag I_WB_SWITCHING, which has two
functions.  First, the easy one, it ensures that there's only one
switching in progress for a give inode.  Second, it's used as a
mechanism to synchronize wb stat updates.

The two stats, WB_RECLAIMABLE and WB_WRITEBACK, aren't event counters
but track the current number of dirty pages and pages under writeback
respectively.  As such, when an inode is moved from one wb to another,
the inode's portion of those stats have to be transferred together;
unfortunately, this is a bit tricky as those stat updates are percpu
operations which are performed without holding any lock in some
places.

This patch solves the problem in a similar way as memcg.  Each such
lockless stat updates are wrapped in transaction surrounded by
unlocked_inode_to_wb_begin/end().  During normal operation, they map
to rcu_read_lock/unlock(); however, if I_WB_SWITCHING is asserted,
mapping->tree_lock is grabbed across the transaction.

In turn, the switching path sets I_WB_SWITCHING and waits for a RCU
grace period to pass before actually starting to switch, which
guarantees that all stat update paths are synchronizing against
mapping->tree_lock.

This patch still doesn't implement the actual switching.

v3: Updated on top of the recent cancel_dirty_page() updates.
    unlocked_inode_to_wb_begin() now nests inside
    mem_cgroup_begin_page_stat() to match the locking order.

v2: The i_wb access transaction will be used for !stat accesses too.
    Function names and comments updated accordingly.

    s/inode_wb_stat_unlocked_{begin|end}/unlocked_inode_to_wb_{begin|end}/
    s/switch_wb/switch_wbs/

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev.h | 54 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h          |  6 +++++
 include/linux/mm.h          |  3 ++-
 3 files changed, 62 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index b1d2489a6536..73ffa32e58ee 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -332,6 +332,50 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
 	return inode->i_wb;
 }
 
+/**
+ * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction
+ * @inode: target inode
+ * @lockedp: temp bool output param, to be passed to the end function
+ *
+ * The caller wants to access the wb associated with @inode but isn't
+ * holding inode->i_lock, mapping->tree_lock or wb->list_lock.  This
+ * function determines the wb associated with @inode and ensures that the
+ * association doesn't change until the transaction is finished with
+ * unlocked_inode_to_wb_end().
+ *
+ * The caller must call unlocked_inode_to_wb_end() with *@lockdep
+ * afterwards and can't sleep during transaction.  IRQ may or may not be
+ * disabled on return.
+ */
+static inline struct bdi_writeback *
+unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
+{
+	rcu_read_lock();
+
+	/*
+	 * Paired with store_release in inode_switch_wb_work_fn() and
+	 * ensures that we see the new wb if we see cleared I_WB_SWITCH.
+	 */
+	*lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
+
+	if (unlikely(*lockedp))
+		spin_lock_irq(&inode->i_mapping->tree_lock);
+	return inode_to_wb(inode);
+}
+
+/**
+ * unlocked_inode_to_wb_end - end inode wb access transaction
+ * @inode: target inode
+ * @locked: *@lockedp from unlocked_inode_to_wb_begin()
+ */
+static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
+{
+	if (unlikely(locked))
+		spin_unlock_irq(&inode->i_mapping->tree_lock);
+
+	rcu_read_unlock();
+}
+
 struct wb_iter {
 	int			start_blkcg_id;
 	struct radix_tree_iter	tree_iter;
@@ -420,6 +464,16 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
 	return &inode_to_bdi(inode)->wb;
 }
 
+static inline struct bdi_writeback *
+unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
+{
+	return inode_to_wb(inode);
+}
+
+static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
+{
+}
+
 static inline void wb_memcg_offline(struct mem_cgroup *memcg)
 {
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 740126d7c44e..b5e1dcfbc5e3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1815,6 +1815,11 @@ struct super_operations {
  *
  * I_DIO_WAKEUP		Never set.  Only used as a key for wait_on_bit().
  *
+ * I_WB_SWITCH		Cgroup bdi_writeback switching in progress.  Used to
+ *			synchronize competing switching instances and to tell
+ *			wb stat updates to grab mapping->tree_lock.  See
+ *			inode_switch_wb_work_fn() for details.
+ *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
  */
 #define I_DIRTY_SYNC		(1 << 0)
@@ -1834,6 +1839,7 @@ struct super_operations {
 #define I_DIRTY_TIME		(1 << 11)
 #define __I_DIRTY_TIME_EXPIRED	12
 #define I_DIRTY_TIME_EXPIRED	(1 << __I_DIRTY_TIME_EXPIRED)
+#define I_WB_SWITCH		(1 << 13)
 
 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
 #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f48d979ced4b..4024543b4203 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -27,6 +27,7 @@ struct anon_vma_chain;
 struct file_ra_state;
 struct user_struct;
 struct writeback_control;
+struct bdi_writeback;
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES	/* Don't use mapnrs, do it properly */
 extern unsigned long max_mapnr;
@@ -1214,7 +1215,7 @@ int redirty_page_for_writepage(struct writeback_control *wbc,
 void account_page_dirtied(struct page *page, struct address_space *mapping,
 			  struct mem_cgroup *memcg);
 void account_page_cleaned(struct page *page, struct address_space *mapping,
-			  struct mem_cgroup *memcg);
+			  struct mem_cgroup *memcg, struct bdi_writeback *wb);
 int set_page_dirty(struct page *page);
 int set_page_dirty_lock(struct page *page);
 void cancel_dirty_page(struct page *page);
-- 
cgit v1.2.3


From aaa2cacf8184e2a92accb8e443b1608d65f9a13f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 28 May 2015 14:50:55 -0400
Subject: writeback: add lockdep annotation to inode_to_wb()

With the previous three patches, all operations which acquire wb from
inode are either under one of inode->i_lock, mapping->tree_lock or
wb->list_lock or protected by unlocked_inode_to_wb transaction.  This
will be depended upon by foreign inode wb switching.

This patch adds lockdep assertion to inode_to_wb() so that usages
outside the above list locks can be caught easily.  There are three
exceptions.

* locked_inode_to_wb_and_lock_list() is holding wb->list_lock but the
  wb may not be the inode's.  Ensuring that is the function's role
  after all.  Updated to deref inode->i_wb directly.

* inode_wb_stat_unlocked_begin() is usually protected by combination
  of !I_WB_SWITCH and rcu_read_lock().  Updated to deref inode->i_wb
  directly.

* inode_congested() wants to test whether inode->i_wb is set before
  starting the transaction.  Added inode_to_wb_is_valid() which tests
  inode->i_wb directly.

v5: might_lock() removed.  It annotates that the lock is grabbed w/
    irq enabled which isn't the case and triggering lockdep warning
    spuriously.

v4: might_lock() added to unlocked_inode_to_wb_begin().

v3: inode_congested() conversion added.

v2: locked_inode_to_wb_and_lock_list() was missing in the first
    version.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev.h | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 73ffa32e58ee..dfce80869145 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -321,14 +321,34 @@ wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
 	return wb;
 }
 
+/**
+ * inode_to_wb_is_valid - test whether an inode has a wb associated
+ * @inode: inode of interest
+ *
+ * Returns %true if @inode has a wb associated.  May be called without any
+ * locking.
+ */
+static inline bool inode_to_wb_is_valid(struct inode *inode)
+{
+	return inode->i_wb;
+}
+
 /**
  * inode_to_wb - determine the wb of an inode
  * @inode: inode of interest
  *
- * Returns the wb @inode is currently associated with.
+ * Returns the wb @inode is currently associated with.  The caller must be
+ * holding either @inode->i_lock, @inode->i_mapping->tree_lock, or the
+ * associated wb's list_lock.
  */
 static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
 {
+#ifdef CONFIG_LOCKDEP
+	WARN_ON_ONCE(debug_locks &&
+		     (!lockdep_is_held(&inode->i_lock) &&
+		      !lockdep_is_held(&inode->i_mapping->tree_lock) &&
+		      !lockdep_is_held(&inode->i_wb->list_lock)));
+#endif
 	return inode->i_wb;
 }
 
@@ -360,7 +380,12 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
 
 	if (unlikely(*lockedp))
 		spin_lock_irq(&inode->i_mapping->tree_lock);
-	return inode_to_wb(inode);
+
+	/*
+	 * Protected by either !I_WB_SWITCH + rcu_read_lock() or tree_lock.
+	 * inode_to_wb() will bark.  Deref directly.
+	 */
+	return inode->i_wb;
 }
 
 /**
@@ -459,6 +484,11 @@ wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
 	return &bdi->wb;
 }
 
+static inline bool inode_to_wb_is_valid(struct inode *inode)
+{
+	return true;
+}
+
 static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
 {
 	return &inode_to_bdi(inode)->wb;
-- 
cgit v1.2.3


From e8a7abf5a5bd302a1e06a3c21a629eaa4cba57d6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 28 May 2015 14:50:57 -0400
Subject: writeback: disassociate inodes from dying bdi_writebacks

For the purpose of foreign inode detection, wb's (bdi_writeback's) are
identified by the associated memcg ID.  As we create a separate wb for
each memcg, this is enough to identify the active wb's; however, when
blkcg is enabled or disabled higher up in the hierarchy, the mapping
between memcg and blkcg changes which in turn creates a new wb to
service the new mapping.  The old wb is unlinked from index and
released after all references are drained.  The foreign inode
detection logic can't detect this condition because both the old and
new wb's point to the same memcg and thus never decides to move inodes
attached to the old wb to the new one.

This patch adds logic to initiate switching immediately in
wbc_attach_and_unlock_inode() if the associated wb is dying.  We can
make the usual foreign detection logic to distinguish the different
wb's mapped to the memcg but the dying wb is never gonna be in active
service again and there's no point in tracking the usage history and
reaching the switch verdict after enough data points are collected.
It's already known that the wb has to be switched.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev-defs.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index e047b496a0b9..a48d90e3bcbb 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -219,6 +219,17 @@ static inline void wb_put(struct bdi_writeback *wb)
 		percpu_ref_put(&wb->refcnt);
 }
 
+/**
+ * wb_dying - is a wb dying?
+ * @wb: bdi_writeback of interest
+ *
+ * Returns whether @wb is unlinked and being drained.
+ */
+static inline bool wb_dying(struct bdi_writeback *wb)
+{
+	return percpu_ref_is_dying(&wb->refcnt);
+}
+
 #else	/* CONFIG_CGROUP_WRITEBACK */
 
 static inline bool wb_tryget(struct bdi_writeback *wb)
@@ -234,6 +245,11 @@ static inline void wb_put(struct bdi_writeback *wb)
 {
 }
 
+static inline bool wb_dying(struct bdi_writeback *wb)
+{
+	return false;
+}
+
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
 #endif	/* __LINUX_BACKING_DEV_DEFS_H */
-- 
cgit v1.2.3


From be3ef76e9d9b97962c70bd6351787d29071ae481 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 2 Jun 2015 14:30:11 +0200
Subject: clockevents: Rename state to state_use_accessors

The only sensible way to make abuse of core internal fields obvious
and easy to grep for.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/clockchips.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 64214ad85af9..597a1e836f22 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -87,7 +87,7 @@ enum clock_event_state {
  * @mult:		nanosecond to cycles multiplier
  * @shift:		nanoseconds to cycles divisor (power of two)
  * @mode:		operating mode, relevant only to ->set_mode(), OBSOLETE
- * @state:		current state of the device, assigned by the core code
+ * @state_use_accessors:current state of the device, assigned by the core code
  * @features:		features
  * @retries:		number of forced programming retries
  * @set_mode:		legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME.
@@ -117,7 +117,7 @@ struct clock_event_device {
 	u32			mult;
 	u32			shift;
 	enum clock_event_mode	mode;
-	enum clock_event_state	state;
+	enum clock_event_state	state_use_accessors;
 	unsigned int		features;
 	unsigned long		retries;
 
@@ -152,27 +152,27 @@ struct clock_event_device {
 /* Helpers to verify state of a clockevent device */
 static inline bool clockevent_state_detached(struct clock_event_device *dev)
 {
-	return dev->state == CLOCK_EVT_STATE_DETACHED;
+	return dev->state_use_accessors == CLOCK_EVT_STATE_DETACHED;
 }
 
 static inline bool clockevent_state_shutdown(struct clock_event_device *dev)
 {
-	return dev->state == CLOCK_EVT_STATE_SHUTDOWN;
+	return dev->state_use_accessors == CLOCK_EVT_STATE_SHUTDOWN;
 }
 
 static inline bool clockevent_state_periodic(struct clock_event_device *dev)
 {
-	return dev->state == CLOCK_EVT_STATE_PERIODIC;
+	return dev->state_use_accessors == CLOCK_EVT_STATE_PERIODIC;
 }
 
 static inline bool clockevent_state_oneshot(struct clock_event_device *dev)
 {
-	return dev->state == CLOCK_EVT_STATE_ONESHOT;
+	return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT;
 }
 
 static inline bool clockevent_state_oneshot_stopped(struct clock_event_device *dev)
 {
-	return dev->state == CLOCK_EVT_STATE_ONESHOT_STOPPED;
+	return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT_STOPPED;
 }
 
 /*
-- 
cgit v1.2.3


From 24bbd929e6b9e62afd263c42b4318d3b603c956c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 1 Jun 2015 13:40:31 +0200
Subject: of/fdt: split off FDT self reservation from memreserve processing

This splits off the reservation of the memory occupied by the FDT
binary itself from the processing of the memory reservations it
contains. This is necessary because the physical address of the FDT,
which is needed to perform the reservation, may not be known to the
FDT driver core, i.e., it may be mapped outside the linear direct
mapping, in which case __pa() returns a bogus value.

Cc: Russell King <linux@arm.linux.org.uk>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/linux/of_fdt.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 587ee507965d..fd627a58068f 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -64,6 +64,7 @@ extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
 extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
 				     int depth, void *data);
 extern void early_init_fdt_scan_reserved_mem(void);
+extern void early_init_fdt_reserve_self(void);
 extern void early_init_dt_add_memory_arch(u64 base, u64 size);
 extern int early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size,
 					     bool no_map);
@@ -91,6 +92,7 @@ extern u64 fdt_translate_address(const void *blob, int node_offset);
 extern void of_fdt_limit_memory(int limit);
 #else /* CONFIG_OF_FLATTREE */
 static inline void early_init_fdt_scan_reserved_mem(void) {}
+static inline void early_init_fdt_reserve_self(void) {}
 static inline const char *of_flat_dt_get_machine_name(void) { return NULL; }
 static inline void unflatten_device_tree(void) {}
 static inline void unflatten_and_copy_device_tree(void) {}
-- 
cgit v1.2.3


From cfaed10d1f27d036b72bbdc6b1e59ea28c38ec7f Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Mon, 1 Jun 2015 11:15:25 -0500
Subject: scatterlist: introduce sg_nents_for_len

When performing a dma_map_sg() call, the number of sg entries to map is
required. Using sg_nents to retrieve the number of sg entries will
return the total number of entries in the sg list up to the entry marked
as the end. If there happen to be unused entries in the list, these will
still be counted. Some dma_map_sg() implementations will not handle the
unused entries correctly (lib/swiotlb.c) and execute a BUG_ON.

The sg_nents_for_len() function will traverse the sg list and return the
number of entries required to satisfy the supplied length argument. This
can then be supplied to the dma_map_sg() call to successfully map the
sg.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/scatterlist.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index ed8f9e70df9b..a0edb992c9c3 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -221,6 +221,7 @@ static inline void *sg_virt(struct scatterlist *sg)
 }
 
 int sg_nents(struct scatterlist *sg);
+int sg_nents_for_len(struct scatterlist *sg, u64 len);
 struct scatterlist *sg_next(struct scatterlist *);
 struct scatterlist *sg_last(struct scatterlist *s, unsigned int);
 void sg_init_table(struct scatterlist *, unsigned int);
-- 
cgit v1.2.3


From d6472302f242559d45dcf4ebace62508dc4d8aeb Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 2 Jun 2015 19:01:38 +1000
Subject: x86/mm: Decouple <linux/vmalloc.h> from <asm/io.h>

Nothing in <asm/io.h> uses anything from <linux/vmalloc.h>, so
remove it from there and fix up the resulting build problems
triggered on x86 {64|32}-bit {def|allmod|allno}configs.

The breakages were triggering in places where x86 builds relied
on vmalloc() facilities but did not include <linux/vmalloc.h>
explicitly and relied on the implicit inclusion via <asm/io.h>.

Also add:

  - <linux/init.h> to <linux/io.h>
  - <asm/pgtable_types> to <asm/io.h>

... which were two other implicit header file dependencies.

Suggested-by: David Miller <davem@davemloft.net>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
[ Tidied up the changelog. ]
Acked-by: David Miller <davem@davemloft.net>
Acked-by: Takashi Iwai <tiwai@suse.de>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Anton Vorontsov <anton@enomsg.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Colin Cross <ccross@android.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: James E.J. Bottomley <JBottomley@odin.com>
Cc: Jaroslav Kysela <perex@perex.cz>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Kristen Carlson Accardi <kristen@linux.intel.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Suma Ramars <sramars@cisco.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/io.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/io.h b/include/linux/io.h
index 04cce4da3685..fb5a99800e77 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -19,6 +19,7 @@
 #define _LINUX_IO_H
 
 #include <linux/types.h>
+#include <linux/init.h>
 #include <asm/io.h>
 #include <asm/page.h>
 
-- 
cgit v1.2.3


From da7049f834c3582c1ed1a04889bda5b4121973c0 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 26 May 2015 13:49:07 -0400
Subject: svcrdma: Remove svc_rdma_xdr_decode_deferred_req()

svc_rdma_xdr_decode_deferred_req() indexes an array with an
un-byte-swapped value off the wire. Fortunately this function
isn't used anywhere, so simply remove it.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index df8edf8ec914..8ad9b6d9d4e0 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -182,7 +182,6 @@ struct svcxprt_rdma {
 
 /* svc_rdma_marshal.c */
 extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
-extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *);
 extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
 				     struct rpcrdma_msg *,
 				     enum rpcrdma_errcode, u32 *);
-- 
cgit v1.2.3


From e842f2903908934187af7232fb5b21da527d1757 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Thu, 4 Jun 2015 09:18:18 +1000
Subject: dax: don't abuse get_block mapping for endio callbacks

dax_fault() currently relies on the get_block callback to attach an
io completion callback to the mapping buffer head so that it can
run unwritten extent conversion after zeroing allocated blocks.

Instead of this hack, pass the conversion callback directly into
dax_fault() similar to the get_block callback. When the filesystem
allocates unwritten extents, it will set the buffer_unwritten()
flag, and hence the dax_fault code can call the completion function
in the contexts where it is necessary without overloading the
mapping buffer head.

Note: The changes to ext4 to use this interface are suspect at best.
In fact, the way ext4 did this end_io assignment in the first place
looks suspect because it only set a completion callback when there
wasn't already some other write() call taking place on the same
inode. The ext4 end_io code looks rather intricate and fragile with
all it's reference counting and passing to different contexts for
modification via inode private pointers that aren't protected by
locks...

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 include/linux/fs.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 35ec87e490b1..c9b4cca9e08d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -70,6 +70,7 @@ typedef int (get_block_t)(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create);
 typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 			ssize_t bytes, void *private);
+typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate);
 
 #define MAY_EXEC		0x00000001
 #define MAY_WRITE		0x00000002
@@ -2627,9 +2628,10 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
 int dax_clear_blocks(struct inode *, sector_t block, long size);
 int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
-int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
+int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
+		dax_iodone_t);
 int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
-#define dax_mkwrite(vma, vmf, gb)	dax_fault(vma, vmf, gb)
+#define dax_mkwrite(vma, vmf, gb, iod)	dax_fault(vma, vmf, gb, iod)
 
 #ifdef CONFIG_BLOCK
 typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
-- 
cgit v1.2.3


From ce5c5d554dc47a4fb4360c84b72231fea081e7a0 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Thu, 4 Jun 2015 09:18:18 +1000
Subject: dax: expose __dax_fault for filesystems with locking constraints

Some filesystems cannot call dax_fault() directly because they have
different locking and/or allocation constraints in the page fault IO
path. To handle this, we need to follow the same model as the
generic block_page_mkwrite code, where the internals are exposed via
__block_page_mkwrite() so that filesystems can wrap the correct
locking and operations around the outside.

This is loosely based on a patch originally from Matthew Willcox.
Unlike the original patch, it does not change ext4 code, error
returns or unwritten extent conversion handling.  It also adds a
__dax_mkwrite() wrapper for .page_mkwrite implementations to do the
right thing, too.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 include/linux/fs.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index c9b4cca9e08d..5784377e7c56 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2630,8 +2630,11 @@ int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
 		dax_iodone_t);
+int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
+		dax_iodone_t);
 int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
-#define dax_mkwrite(vma, vmf, gb, iod)	dax_fault(vma, vmf, gb, iod)
+#define dax_mkwrite(vma, vmf, gb, iod)		dax_fault(vma, vmf, gb, iod)
+#define __dax_mkwrite(vma, vmf, gb, iod)	__dax_fault(vma, vmf, gb, iod)
 
 #ifdef CONFIG_BLOCK
 typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
-- 
cgit v1.2.3


From 12f7c14aa602f15ad60e5a9da459271f63b92917 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Thu, 4 Jun 2015 00:01:21 +0900
Subject: crypto: doc - Fix typo in crypto-API.xml

This patch fix some typos found in crypto-API.xml.
It is because the file is generated from comments in sources,
so I had to fix typo in sources.

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Acked-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 7d290a91c6f9..25a4b71d6d1f 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -451,7 +451,7 @@ struct compress_alg {
  *		     transformation algorithm.
  * @cra_type: Type of the cryptographic transformation. This is a pointer to
  *	      struct crypto_type, which implements callbacks common for all
- *	      trasnformation types. There are multiple options:
+ *	      transformation types. There are multiple options:
  *	      &crypto_blkcipher_type, &crypto_ablkcipher_type,
  *	      &crypto_ahash_type, &crypto_aead_type, &crypto_rng_type.
  *	      This field might be empty. In that case, there are no common
-- 
cgit v1.2.3


From 64d6067057d9658acb8675afcfba549abdb7fc16 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 7 May 2015 11:36:11 +0200
Subject: KVM: x86: stubs for SMM support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds the interface between x86.c and the emulator: the
SMBASE register, a new emulator flag, the RSM instruction.  It also
adds a new request bit that will be used by the KVM_SMI ioctl.

Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a8bcbc9c6078..b019fee6d941 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -134,6 +134,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_ENABLE_IBS        23
 #define KVM_REQ_DISABLE_IBS       24
 #define KVM_REQ_APIC_PAGE_RELOAD  25
+#define KVM_REQ_SMI               26
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
-- 
cgit v1.2.3


From 9e7c8f8c62c1e1cda203b5bfaba4575b141e42e7 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 4 Jun 2015 16:22:16 -0400
Subject: signals: don't abuse __flush_signals() in
 selinux_bprm_committed_creds()

selinux_bprm_committed_creds()->__flush_signals() is not right, we
shouldn't clear TIF_SIGPENDING unconditionally. There can be other
reasons for signal_pending(): freezing(), JOBCTL_PENDING_MASK, and
potentially more.

Also change this code to check fatal_signal_pending() rather than
SIGNAL_GROUP_EXIT, it looks a bit better.

Now we can kill __flush_signals() before it finds another buggy user.

Note: this code looks racy, we can flush a signal which was sent after
the task SID has been updated.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8222ae40ecb0..4f84aade8b4d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2373,7 +2373,6 @@ extern void sched_dead(struct task_struct *p);
 
 extern void proc_caches_init(void);
 extern void flush_signals(struct task_struct *);
-extern void __flush_signals(struct task_struct *);
 extern void ignore_signals(struct task_struct *);
 extern void flush_signal_handlers(struct task_struct *, int force_default);
 extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
-- 
cgit v1.2.3


From 30b7e246a6222f1fbad39b1451273375306fe1e2 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 4 Jun 2015 11:21:10 -0400
Subject: svcrdma: Keep rpcrdma_msg fields in network byte-order

Fields in struct rpcrdma_msg are __be32. Don't byte-swap these
fields when decoding RPC calls and then swap them back for the
reply. For the most part, they can be left alone.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 8ad9b6d9d4e0..c03ca0a1b743 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -184,7 +184,7 @@ struct svcxprt_rdma {
 extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
 extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
 				     struct rpcrdma_msg *,
-				     enum rpcrdma_errcode, u32 *);
+				     enum rpcrdma_errcode, __be32 *);
 extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int);
 extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int);
 extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int,
-- 
cgit v1.2.3


From b7e0b9a965a116341b4ef86ab98ea2843b218271 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 4 Jun 2015 11:21:20 -0400
Subject: svcrdma: Replace GFP_KERNEL in a loop with GFP_NOFAIL

At the 2015 LSF/MM, it was requested that memory allocation
call sites that request GFP_KERNEL allocations in a loop should be
annotated with __GFP_NOFAIL.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index c03ca0a1b743..d26384b22126 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -211,7 +211,6 @@ extern int svc_rdma_sendto(struct svc_rqst *);
 extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
 extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
 				enum rpcrdma_errcode);
-struct page *svc_rdma_get_page(void);
 extern int svc_rdma_post_recv(struct svcxprt_rdma *);
 extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
 extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
-- 
cgit v1.2.3


From 0380a3f37540ad0582b3c749a74fc127af914689 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 4 Jun 2015 11:21:32 -0400
Subject: svcrdma: Add a separate "max data segs macro for svcrdma

The server and client maximum are architecturally independent.
Allow changing one without affecting the other.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index d26384b22126..cb94ee4181d4 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -172,6 +172,13 @@ struct svcxprt_rdma {
 #define RDMAXPRT_SQ_PENDING	2
 #define RDMAXPRT_CONN_PENDING	3
 
+#define RPCRDMA_MAX_SVC_SEGS	(64)	/* server max scatter/gather */
+#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT)
+#define RPCRDMA_MAXPAYLOAD	RPCSVC_MAXPAYLOAD
+#else
+#define RPCRDMA_MAXPAYLOAD	(RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT)
+#endif
+
 #define RPCRDMA_LISTEN_BACKLOG  10
 /* The default ORD value is based on two outstanding full-size writes with a
  * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ.  */
-- 
cgit v1.2.3


From 42aecaa9bb2bd57eb8d61b4565cee5d3640863fb Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Thu, 4 Jun 2015 09:16:39 -0700
Subject: net: Get skb hash over flow_keys structure

This patch changes flow hashing to use jhash2 over the flow_keys
structure instead just doing jhash_3words over src, dst, and ports.
This method will allow us take more input into the hashing function
so that we can include full IPv6 addresses, VLAN, flow labels etc.
without needing to resort to xor'ing which makes for a poor hash.

Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6b41c15efa27..cc612fc0a894 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1943,7 +1943,7 @@ static inline void skb_probe_transport_header(struct sk_buff *skb,
 	if (skb_transport_header_was_set(skb))
 		return;
 	else if (skb_flow_dissect_flow_keys(skb, &keys))
-		skb_set_transport_header(skb, keys.basic.thoff);
+		skb_set_transport_header(skb, keys.control.thoff);
 	else
 		skb_set_transport_header(skb, offset_hint);
 }
-- 
cgit v1.2.3


From 01949d0109ee5fae33752f0db99a36f1619e1873 Mon Sep 17 00:00:00 2001
From: Haggai Abramonvsky <hagaya@mellanox.com>
Date: Thu, 4 Jun 2015 19:30:38 +0300
Subject: net/mlx5_core: Enable XRCs and SRQs when using ISSI > 0

When working in ISSI > 0 mode, the model exposed by the device for
XRCs and SRQs is different. XRCs use XRC SRQs and plain SRQs are based
on RPM (Receive Memory Pool).

Add helper functions to create, modify, query, and arm XRC SRQs and RMPs.

Signed-off-by: Haggai Abramovsky <hagaya@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/driver.h   |  6 +++++-
 include/linux/mlx5/mlx5_ifc.h | 16 ++++++++++------
 2 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 7fa26f03acc1..ba9f212c94bb 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -339,6 +339,8 @@ struct mlx5_core_mr {
 
 enum mlx5_res_type {
 	MLX5_RES_QP,
+	MLX5_RES_SRQ,
+	MLX5_RES_XSRQ,
 };
 
 struct mlx5_core_rsc_common {
@@ -348,6 +350,7 @@ struct mlx5_core_rsc_common {
 };
 
 struct mlx5_core_srq {
+	struct mlx5_core_rsc_common	common; /* must be first */
 	u32		srqn;
 	int		max;
 	int		max_gs;
@@ -640,7 +643,8 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev,
 void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev,
 				 struct mlx5_cmd_mailbox *head);
 int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-			 struct mlx5_create_srq_mbox_in *in, int inlen);
+			 struct mlx5_create_srq_mbox_in *in, int inlen,
+			 int is_xrc);
 int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq);
 int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 			struct mlx5_query_srq_mbox_out *out);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index b27e9f6e090a..dbe2b32c0539 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2022,12 +2022,9 @@ struct mlx5_ifc_srqc_bits {
 
 	u8         reserved_9[0x40];
 
-	u8         db_record_addr_h[0x20];
-
-	u8         db_record_addr_l[0x1e];
-	u8         reserved_10[0x2];
+	u8         dbr_addr[0x40];
 
-	u8         reserved_11[0x80];
+	u8         reserved_10[0x80];
 };
 
 enum {
@@ -4167,6 +4164,13 @@ struct mlx5_ifc_modify_rmp_out_bits {
 	u8         reserved_1[0x40];
 };
 
+struct mlx5_ifc_rmp_bitmask_bits {
+	u8	   reserved[0x20];
+
+	u8         reserved1[0x1f];
+	u8         lwm[0x1];
+};
+
 struct mlx5_ifc_modify_rmp_in_bits {
 	u8         opcode[0x10];
 	u8         reserved_0[0x10];
@@ -4180,7 +4184,7 @@ struct mlx5_ifc_modify_rmp_in_bits {
 
 	u8         reserved_3[0x20];
 
-	u8         modify_bitmask[0x40];
+	struct mlx5_ifc_rmp_bitmask_bits bitmask;
 
 	u8         reserved_4[0x40];
 
-- 
cgit v1.2.3


From d18a9470f89727f870db944a36223bf1bb15bdc1 Mon Sep 17 00:00:00 2001
From: Majd Dibbiny <majd@mellanox.com>
Date: Thu, 4 Jun 2015 19:30:40 +0300
Subject: net/mlx5_core: Make the vport helpers available for the IB driver too

Move the vport header file to be under include/linux/mlx5, such that
the mlx5 IB can use it as well.

Also add nic_ prefix to the vport NIC commands to differeniate between
HCA vport commands and NIC vport commands.

Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/vport.h | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 include/linux/mlx5/vport.h

(limited to 'include/linux')

diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
new file mode 100644
index 000000000000..99d0e9f85432
--- /dev/null
+++ b/include/linux/mlx5/vport.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_VPORT_H__
+#define __MLX5_VPORT_H__
+
+#include <linux/mlx5/driver.h>
+
+u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod);
+void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr);
+
+#endif /* __MLX5_VPORT_H__ */
-- 
cgit v1.2.3


From 707c4602cda6624940761b66a4119f1909492385 Mon Sep 17 00:00:00 2001
From: Majd Dibbiny <majd@mellanox.com>
Date: Thu, 4 Jun 2015 19:30:41 +0300
Subject: net/mlx5_core: Add new query HCA vport commands

Added the implementation for the following commands:

1. QUERY_HCA_VPORT_GID
2. QUERY_HCA_VPORT_PKEY
3. QUERY_HCA_VPORT_CONTEXT

They will be needed when we move to work with ISSI > 0 in the IB driver too.

Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/device.h   | 13 +++++++++++++
 include/linux/mlx5/driver.h   | 45 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/mlx5/mlx5_ifc.h | 23 ++++++++++++++--------
 include/linux/mlx5/vport.h    | 14 ++++++++++++++
 4 files changed, 87 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index b288c538347a..b2c43508a737 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -99,6 +99,12 @@ __mlx5_mask(typ, fld))
 
 #define MLX5_GET64(typ, p, fld) be64_to_cpu(*((__be64 *)(p) + __mlx5_64_off(typ, fld)))
 
+#define MLX5_GET64_PR(typ, p, fld) ({ \
+	u64 ___t = MLX5_GET64(typ, p, fld); \
+	pr_debug(#fld " = 0x%llx\n", ___t); \
+	___t; \
+})
+
 enum {
 	MLX5_MAX_COMMANDS		= 32,
 	MLX5_CMD_DATA_BLOCK_SIZE	= 512,
@@ -1172,4 +1178,11 @@ enum {
 	MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR	= 0x40,
 };
 
+static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
+{
+	if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE)
+		return 0;
+	return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz;
+}
+
 #endif /* MLX5_DEVICE_H */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index ba9f212c94bb..8ab8b8af5c32 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -553,6 +553,41 @@ struct mlx5_pas {
 	u8	log_sz;
 };
 
+enum port_state_policy {
+	MLX5_AAA_000
+};
+
+enum phy_port_state {
+	MLX5_AAA_111
+};
+
+struct mlx5_hca_vport_context {
+	u32			field_select;
+	bool			sm_virt_aware;
+	bool			has_smi;
+	bool			has_raw;
+	enum port_state_policy	policy;
+	enum phy_port_state	phys_state;
+	enum ib_port_state	vport_state;
+	u8			port_physical_state;
+	u64			sys_image_guid;
+	u64			port_guid;
+	u64			node_guid;
+	u32			cap_mask1;
+	u32			cap_mask1_perm;
+	u32			cap_mask2;
+	u32			cap_mask2_perm;
+	u16			lid;
+	u8			init_type_reply; /* bitmask: see ib spec 14.2.5.6 InitTypeReply */
+	u8			lmc;
+	u8			subnet_timeout;
+	u16			sm_lid;
+	u8			sm_sl;
+	u16			qkey_violation_counter;
+	u16			pkey_violation_counter;
+	bool			grh_required;
+};
+
 static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset)
 {
 		return buf->direct.buf + offset;
@@ -792,4 +827,14 @@ struct mlx5_profile {
 	} mr_cache[MAX_MR_CACHE_ENTRIES];
 };
 
+static inline int mlx5_get_gid_table_len(u16 param)
+{
+	if (param > 4) {
+		pr_warn("gid table length is zero\n");
+		return 0;
+	}
+
+	return 8 * (1 << param);
+}
+
 #endif /* MLX5_DRIVER_H */
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index dbe2b32c0539..f06d054ad021 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2221,12 +2221,15 @@ struct mlx5_ifc_hca_vport_context_bits {
 	u8         has_smi[0x1];
 	u8         has_raw[0x1];
 	u8         grh_required[0x1];
-	u8         reserved_1[0x10];
-	u8         port_state_policy[0x4];
-	u8         phy_port_state[0x4];
+	u8         reserved_1[0xc];
+	u8         port_physical_state[0x4];
+	u8         vport_state_policy[0x4];
+	u8         port_state[0x4];
 	u8         vport_state[0x4];
 
-	u8         reserved_2[0x60];
+	u8         reserved_2[0x20];
+
+	u8         system_image_guid[0x40];
 
 	u8         port_guid[0x40];
 
@@ -3490,7 +3493,8 @@ struct mlx5_ifc_query_hca_vport_pkey_in_bits {
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xf];
+	u8         reserved_2[0xb];
+	u8         port_num[0x4];
 	u8         vport_number[0x10];
 
 	u8         reserved_3[0x10];
@@ -3519,7 +3523,8 @@ struct mlx5_ifc_query_hca_vport_gid_in_bits {
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xf];
+	u8         reserved_2[0xb];
+	u8         port_num[0x4];
 	u8         vport_number[0x10];
 
 	u8         reserved_3[0x10];
@@ -3545,7 +3550,8 @@ struct mlx5_ifc_query_hca_vport_context_in_bits {
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xf];
+	u8         reserved_2[0xb];
+	u8         port_num[0x4];
 	u8         vport_number[0x10];
 
 	u8         reserved_3[0x20];
@@ -4243,7 +4249,8 @@ struct mlx5_ifc_modify_hca_vport_context_in_bits {
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xf];
+	u8         reserved_2[0xb];
+	u8         port_num[0x4];
 	u8         vport_number[0x10];
 
 	u8         reserved_3[0x20];
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 99d0e9f85432..67882a834efb 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -37,5 +37,19 @@
 
 u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod);
 void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr);
+int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
+			     u8 port_num, u16  vf_num, u16 gid_index,
+			     union ib_gid *gid);
+int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport,
+			      u8 port_num, u16 vf_num, u16 pkey_index,
+			      u16 *pkey);
+int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev,
+				 u8 other_vport, u8 port_num,
+				 u16 vf_num,
+				 struct mlx5_hca_vport_context *rep);
+int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev,
+					   __be64 *sys_image_guid);
+int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
+				   u64 *node_guid);
 
 #endif /* __MLX5_VPORT_H__ */
-- 
cgit v1.2.3


From 211e6c80e5a68ef39a81484583e8efbf9774627d Mon Sep 17 00:00:00 2001
From: Majd Dibbiny <majd@mellanox.com>
Date: Thu, 4 Jun 2015 19:30:42 +0300
Subject: net/mlx5_core: Get vendor-id using the query adapter command

Add two wrapper functions to the query adapter command:

1. mlx5_query_board_id -- replaces the old mlx5_cmd_query_adapter.

2. mlx5_core_query_vendor_id -- retrieves the vendor_id from the
   query_adapter command.

Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/driver.h   | 1 +
 include/linux/mlx5/mlx5_ifc.h | 7 +++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 8ab8b8af5c32..b90fb9336d21 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -817,6 +817,7 @@ struct mlx5_interface {
 void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol);
 int mlx5_register_interface(struct mlx5_interface *intf);
 void mlx5_unregister_interface(struct mlx5_interface *intf);
+int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id);
 
 struct mlx5_profile {
 	u64	mask;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f06d054ad021..6d2f6fee041c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2470,9 +2470,12 @@ union mlx5_ifc_cong_control_roce_ecn_auto_bits {
 };
 
 struct mlx5_ifc_query_adapter_param_block_bits {
-	u8         reserved_0[0xe0];
+	u8         reserved_0[0xc0];
 
-	u8         reserved_1[0x10];
+	u8         reserved_1[0x8];
+	u8         ieee_vendor_id[0x18];
+
+	u8         reserved_2[0x10];
 	u8         vsd_vendor_id[0x10];
 
 	u8         vsd[208][0x8];
-- 
cgit v1.2.3


From e760152d08da78aa160e68ac90bf8f3f10aff462 Mon Sep 17 00:00:00 2001
From: Majd Dibbiny <majd@mellanox.com>
Date: Thu, 4 Jun 2015 19:30:43 +0300
Subject: net/mlx5_core: Use port number in the query port mtu helpers

Extend the function prototypes for max and operational mtu to take the
local port number. In the Ethernet driver is this hard coded to one,
since ConnectX4 Ethernet devices are always function-per-port.
The IB driver also serves older devices (ConnectIB) which isn't such,
and hence the part can vary.

Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/driver.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index b90fb9336d21..cd09784b6999 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -751,8 +751,10 @@ int mlx5_set_port_status(struct mlx5_core_dev *dev,
 int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status);
 
 int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu);
-int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu);
-int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu);
+int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu,
+			    u8 local_port);
+int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu,
+			     u8 local_port);
 
 int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-- 
cgit v1.2.3


From a05bdefa4081d43f9c86c3bb693d0492a21590da Mon Sep 17 00:00:00 2001
From: Majd Dibbiny <majd@mellanox.com>
Date: Thu, 4 Jun 2015 19:30:44 +0300
Subject: net/mlx5_core: Use port number when querying port ptys

Until now, mlx5_query_port_ptys always queried port number one.

Added new argument in the function's prototype so we can also query
the second port. This will be needed  when thr helper will be invoked
from the IB driver on non FPP (Function-Per-Port) devices.

Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index cd09784b6999..e4b814f64014 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -739,7 +739,7 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
 
 int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
 int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
-			 int ptys_size, int proto_mask);
+			 int ptys_size, int proto_mask, u8 local_port);
 int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev,
 			      u32 *proto_cap, int proto_mask);
 int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev,
-- 
cgit v1.2.3


From a124d13ef59e09941fc0924fd7c29ae6d7cd77a3 Mon Sep 17 00:00:00 2001
From: Majd Dibbiny <majd@mellanox.com>
Date: Thu, 4 Jun 2015 19:30:45 +0300
Subject: net/mlx5_core: Add more query port helpers

Add the following helpers:

1. mlx5_query_port_proto_oper -- queries the port speed port mask
2. mlx5_query_port_link_width_oper - queries the port link with bitmask
3. mlx5_query_port_vl_hw_cap - queries the Virtual Lanes supported on this port

These helpers will be used from the IB driver when working in ISSI > 0 mode.

Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/driver.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index e4b814f64014..6093bde16b94 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -107,6 +107,7 @@ enum {
 	MLX5_REG_PUDE		 = 0x5009,
 	MLX5_REG_PMPE		 = 0x5010,
 	MLX5_REG_PELC		 = 0x500e,
+	MLX5_REG_PVLC		 = 0x500f,
 	MLX5_REG_PMLP		 = 0, /* TBD */
 	MLX5_REG_NODE_DESC	 = 0x6001,
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
@@ -744,6 +745,11 @@ int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev,
 			      u32 *proto_cap, int proto_mask);
 int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev,
 				u32 *proto_admin, int proto_mask);
+int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
+				    u8 *link_width_oper, u8 local_port);
+int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev,
+			       u8 *proto_oper, int proto_mask,
+			       u8 local_port);
 int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin,
 			int proto_mask);
 int mlx5_set_port_status(struct mlx5_core_dev *dev,
@@ -755,6 +761,8 @@ int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu,
 			    u8 local_port);
 int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu,
 			     u8 local_port);
+int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev,
+			      u8 *vl_hw_cap, u8 local_port);
 
 int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-- 
cgit v1.2.3


From 03fbf488cece461468d3abb795f5e5f055e00040 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Thu, 4 Jun 2015 16:55:10 +0300
Subject: spi: pxa2xx: Differentiate Intel LPSS types

Intel LPSS SPI properties differ between between platforms. Now private
registers offset 0x400 or 0x800 is autodetected but there is need to
support also other offset and handle a few other differences.

Prepare for that by splitting the LPSS_SSP type into compatible hardware
types and set it now based on PCI or ACPI ID. That type will be used to set
properties that differ between current and upcoming platforms.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/pxa2xx_ssp.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index dab545bb66b3..95a4b3bd7a5c 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -194,8 +194,9 @@ enum pxa_ssp_type {
 	PXA168_SSP,
 	PXA910_SSP,
 	CE4100_SSP,
-	LPSS_SSP,
 	QUARK_X1000_SSP,
+	LPSS_LPT_SSP,
+	LPSS_BYT_SSP,
 };
 
 struct ssp_device {
-- 
cgit v1.2.3


From dccf7369652f3934456345aab6a92fa905177886 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Thu, 4 Jun 2015 16:55:11 +0300
Subject: spi: pxa2xx: Prepare for new Intel LPSS SPI type

Some of the Intel LPSS SPI properties will be different in upcoming
platforms compared to existing Lynxpoint and BayTrail/Braswell. LPSS SPI
private registers will be at different offset and there will be changes in
individual registers and default FIFO thresholds too.

Add configuration for these differences and use them in runtime based on
LPSS SSP type. With this change private registers offset autodetection
becomes needless.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/pxa2xx_ssp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 95a4b3bd7a5c..0485bab061fd 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -195,7 +195,7 @@ enum pxa_ssp_type {
 	PXA910_SSP,
 	CE4100_SSP,
 	QUARK_X1000_SSP,
-	LPSS_LPT_SSP,
+	LPSS_LPT_SSP, /* Keep LPSS types sorted with lpss_platforms[] */
 	LPSS_BYT_SSP,
 };
 
-- 
cgit v1.2.3


From 8e73485c7959fd25650761eab04db1e72ea14c23 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sun, 17 May 2015 13:58:53 +0200
Subject: KVM: add vcpu-specific functions to read/write/translate GFNs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We need to hide SMRAM from guests not running in SMM.  Therefore, all
uses of kvm_read_guest* and kvm_write_guest* must be changed to use
different address spaces, depending on whether the VCPU is in system
management mode.  We need to introduce a new family of functions for
this purpose.

For now, the VCPU-based functions have the same behavior as the
existing per-VM ones, they just accept a different type for the
first argument.  Later however they will be changed to use one of many
"struct kvm_memslots" stored in struct kvm, through an architecture hook.
VM-based functions will unconditionally use the first memslots pointer.

Whenever possible, this patch introduces slot-based functions with an
__ prefix, with two wrappers for generic and vcpu-based actions.
The exceptions are kvm_read_guest and kvm_write_guest, which are copied
into the new functions kvm_vcpu_read_guest and kvm_vcpu_write_guest.

Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b019fee6d941..ba1ea43998e4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -471,6 +471,11 @@ static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
 			|| lockdep_is_held(&kvm->slots_lock));
 }
 
+static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu)
+{
+	return kvm_memslots(vcpu->kvm);
+}
+
 static inline struct kvm_memory_slot *
 id_to_memslot(struct kvm_memslots *slots, int id)
 {
@@ -576,6 +581,25 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
 unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 
+struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
+struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn);
+pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
+pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
+struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn);
+unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
+unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
+int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset,
+			     int len);
+int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, void *data,
+			       unsigned long len);
+int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data,
+			unsigned long len);
+int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, const void *data,
+			      int offset, int len);
+int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
+			 unsigned long len);
+void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
+
 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 int kvm_vcpu_yield_to(struct kvm_vcpu *target);
-- 
cgit v1.2.3


From f481b069e674378758c73761827e83ab05c46b52 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sun, 17 May 2015 17:30:37 +0200
Subject: KVM: implement multiple address spaces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Only two ioctls have to be modified; the address space id is
placed in the higher 16 bits of their slot id argument.

As of this patch, no architecture defines more than one
address space; x86 will be the first.

Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ba1ea43998e4..9564fd78c547 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -44,6 +44,10 @@
 /* Two fragments for cross MMIO pages. */
 #define KVM_MAX_MMIO_FRAGMENTS	2
 
+#ifndef KVM_ADDRESS_SPACE_NUM
+#define KVM_ADDRESS_SPACE_NUM	1
+#endif
+
 /*
  * For the normal pfn, the highest 12 bits should be zero,
  * so we can mask bit 62 ~ bit 52  to indicate the error pfn,
@@ -331,6 +335,13 @@ struct kvm_kernel_irq_routing_entry {
 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
 #endif
 
+#ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
+static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+#endif
+
 /*
  * Note:
  * memslots are not sorted by id anymore, please use id_to_memslot()
@@ -349,7 +360,7 @@ struct kvm {
 	spinlock_t mmu_lock;
 	struct mutex slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
-	struct kvm_memslots *memslots;
+	struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM];
 	struct srcu_struct srcu;
 	struct srcu_struct irq_srcu;
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
@@ -464,16 +475,23 @@ void kvm_exit(void);
 void kvm_get_kvm(struct kvm *kvm);
 void kvm_put_kvm(struct kvm *kvm);
 
-static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
+static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
 {
-	return rcu_dereference_check(kvm->memslots,
+	return rcu_dereference_check(kvm->memslots[as_id],
 			srcu_read_lock_held(&kvm->srcu)
 			|| lockdep_is_held(&kvm->slots_lock));
 }
 
+static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
+{
+	return __kvm_memslots(kvm, 0);
+}
+
 static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu)
 {
-	return kvm_memslots(vcpu->kvm);
+	int as_id = kvm_arch_vcpu_memslots_id(vcpu);
+
+	return __kvm_memslots(vcpu->kvm, as_id);
 }
 
 static inline struct kvm_memory_slot *
-- 
cgit v1.2.3


From 3f21c265cd5f7ae867cc0e86a1f6d5093f1963cc Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Fri, 5 Jun 2015 10:57:37 -0600
Subject: block: add blk_set_queue_dying() to blkdev.h

We export this function and NVMe wants to use it, but for some reason
it was never added to the block header. Do that.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ccaa9aecd593..a31380c35918 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1024,6 +1024,7 @@ bool __must_check blk_get_queue(struct request_queue *);
 struct request_queue *blk_alloc_queue(gfp_t);
 struct request_queue *blk_alloc_queue_node(gfp_t, int);
 extern void blk_put_queue(struct request_queue *);
+extern void blk_set_queue_dying(struct request_queue *);
 
 /*
  * block layer runtime pm functions
-- 
cgit v1.2.3


From a5768aa887fb636f0cc4c83a2f1242506aaf50f6 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 1 Jun 2015 14:28:14 -0600
Subject: NVMe: Automatic namespace rescan

Namespaces may be dynamically allocated and deleted or attached and
detached. This has the driver rescan the device for namespace changes
after each device reset or namespace change asynchronous event.

There could potentially be many detached namespaces that we don't want
polluting /dev/ with unusable block handles, so this will delete disks
if the namespace is not active as indicated by the response from identify
namespace. This also skips adding the disk if no capacity is provisioned
to the namespace in the first place.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/nvme.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 986bf8ad8e93..c0d94ed8ce9a 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -92,6 +92,7 @@ struct nvme_dev {
 	work_func_t reset_workfn;
 	struct work_struct reset_work;
 	struct work_struct probe_work;
+	struct work_struct scan_work;
 	char name[12];
 	char serial[20];
 	char model[40];
-- 
cgit v1.2.3


From d691f9e8d4405c334aa10d556e73c8bf44cb0e01 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 4 Jun 2015 10:11:54 -0700
Subject: bpf: allow programs to write to certain skb fields

allow programs read/write skb->mark, tc_index fields and
((struct qdisc_skb_cb *)cb)->data.

mark and tc_index are generically useful in TC.
cb[0]-cb[4] are primarily used to pass arguments from one
program to another called via bpf_tail_call() which can
be seen in sockex3_kern.c example.

All fields of 'struct __sk_buff' are readable to socket and tc_cls_act progs.
mark, tc_index are writeable from tc_cls_act only.
cb[0]-cb[4] are writeable by both sockets and tc_cls_act.

Add verifier tests and improve sample code.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ca854e5bb2f7..2235aee8096a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -105,7 +105,8 @@ struct bpf_verifier_ops {
 	 */
 	bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
 
-	u32 (*convert_ctx_access)(int dst_reg, int src_reg, int ctx_off,
+	u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg,
+				  int src_reg, int ctx_off,
 				  struct bpf_insn *insn);
 };
 
-- 
cgit v1.2.3


From 4eaca0a887eaee04fc7a3866d0f5b51b34030dfa Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 4 Jun 2015 17:39:08 +0200
Subject: preempt: Use preempt_schedule_context() as the official tracing
 preemption point

preempt_schedule_context() is a tracing safe preemption point but it's
only used when CONFIG_CONTEXT_TRACKING=y. Other configs have tracing
recursion issues since commit:

  b30f0e3ffedf ("sched/preempt: Optimize preemption operations on __schedule() callers")

introduced function based preemp_count_*() ops.

Lets make it available on all configs and give it a more appropriate
name for its new position.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1433432349-1021-3-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/preempt.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index a1a00e14c14f..7686dd63bc35 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -204,15 +204,11 @@ do { \
 
 #ifdef CONFIG_PREEMPT
 
-#ifndef CONFIG_CONTEXT_TRACKING
-#define __preempt_schedule_context() __preempt_schedule()
-#endif
-
 #define preempt_enable_notrace() \
 do { \
 	barrier(); \
 	if (unlikely(__preempt_count_dec_and_test())) \
-		__preempt_schedule_context(); \
+		__preempt_schedule_notrace(); \
 } while (0)
 #else
 #define preempt_enable_notrace() \
-- 
cgit v1.2.3


From 9a92e3dc6ad02208a014d0d8404ebbd697e3d5ef Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 4 Jun 2015 17:39:09 +0200
Subject: preempt: Reorganize the notrace definitions a bit

preempt.h has two seperate "#ifdef CONFIG_PREEMPT" sections: one to
define preempt_enable() and another to define preempt_enable_notrace().

Lets gather both.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1433432349-1021-4-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/preempt.h | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 7686dd63bc35..0f1534acaf60 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -175,48 +175,46 @@ do { \
 		__preempt_schedule(); \
 } while (0)
 
+#define preempt_enable_notrace() \
+do { \
+	barrier(); \
+	if (unlikely(__preempt_count_dec_and_test())) \
+		__preempt_schedule_notrace(); \
+} while (0)
+
 #define preempt_check_resched() \
 do { \
 	if (should_resched()) \
 		__preempt_schedule(); \
 } while (0)
 
-#else
+#else /* !CONFIG_PREEMPT */
 #define preempt_enable() \
 do { \
 	barrier(); \
 	preempt_count_dec(); \
 } while (0)
-#define preempt_check_resched() do { } while (0)
-#endif
-
-#define preempt_disable_notrace() \
-do { \
-	__preempt_count_inc(); \
-	barrier(); \
-} while (0)
 
-#define preempt_enable_no_resched_notrace() \
+#define preempt_enable_notrace() \
 do { \
 	barrier(); \
 	__preempt_count_dec(); \
 } while (0)
 
-#ifdef CONFIG_PREEMPT
+#define preempt_check_resched() do { } while (0)
+#endif /* CONFIG_PREEMPT */
 
-#define preempt_enable_notrace() \
+#define preempt_disable_notrace() \
 do { \
+	__preempt_count_inc(); \
 	barrier(); \
-	if (unlikely(__preempt_count_dec_and_test())) \
-		__preempt_schedule_notrace(); \
 } while (0)
-#else
-#define preempt_enable_notrace() \
+
+#define preempt_enable_no_resched_notrace() \
 do { \
 	barrier(); \
 	__preempt_count_dec(); \
 } while (0)
-#endif
 
 #else /* !CONFIG_PREEMPT_COUNT */
 
-- 
cgit v1.2.3


From 21509084f999d7accd32e45961ef76853112e978 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Wed, 6 May 2015 15:33:49 -0400
Subject: perf/x86/intel: Handle multiple records in the PEBS buffer

When the PEBS interrupt threshold is larger than one record and the
machine supports multiple PEBS events, the records of these events are
mixed up and we need to demultiplex them.

Demuxing the records is hard because the hardware is deficient. The
hardware has two issues that, when combined, create impossible
scenarios to demux.

The first issue is that the 'status' field of the PEBS record is a copy
of the GLOBAL_STATUS MSR at PEBS assist time. To see why this is a
problem let us first describe the regular PEBS cycle:

A) the CTRn value reaches 0:
  - the corresponding bit in GLOBAL_STATUS gets set
  - we start arming the hardware assist
  < some unspecified amount of time later -- this could cover multiple
    events of interest >

B) the hardware assist is armed, any next event will trigger it

C) a matching event happens:
  - the hardware assist triggers and generates a PEBS record
    this includes a copy of GLOBAL_STATUS at this moment
  - if we auto-reload we (re)set CTRn
  - we clear the relevant bit in GLOBAL_STATUS

Now consider the following chain of events:

  A0, B0, A1, C0

The event generated for counter 0 will include a status with counter 1
set, even though its not at all related to the record. A similar thing
can happen with a !PEBS event if it just happens to overflow at the
right moment.

The second issue is that the hardware will only emit one record for two
or more counters if the event that triggers the assist is 'close'. The
'close' can be several cycles. In some cases even the complete assist,
if the event is something that doesn't need retirement.

For instance, consider this chain of events:

  A0, B0, A1, B1, C01

Where C01 is an event that triggers both hardware assists, we will
generate but a single record, but again with both counters listed in the
status field.

This time the record pertains to both events.

Note that these two cases are different but undistinguishable with the
data as generated. Therefore demuxing records with multiple PEBS bits
(we can safely ignore status bits for !PEBS counters) is impossible.

Furthermore we cannot emit the record to both events because that might
cause a data leak -- the events might not have the same privileges -- so
what this patch does is discard such events.

The assumption/hope is that such discards will be rare.

Here lists some possible ways you may get high discard rate.

  - when you count the same thing multiple times. But it is not a useful
    configuration.
  - you can be unfortunate if you measure with a userspace only PEBS
    event along with either a kernel or unrestricted PEBS event. Imagine
    the event triggering and setting the overflow flag right before
    entering the kernel. Then all kernel side events will end up with
    multiple bits set.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
[ Changelog improvements. ]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: eranian@google.com
Link: http://lkml.kernel.org/r/1430940834-8964-4-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 06580028cee6..5f192e1bc98e 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -730,6 +730,19 @@ extern int perf_event_overflow(struct perf_event *event,
 				 struct perf_sample_data *data,
 				 struct pt_regs *regs);
 
+extern void perf_event_output(struct perf_event *event,
+				struct perf_sample_data *data,
+				struct pt_regs *regs);
+
+extern void
+perf_event_header__init_id(struct perf_event_header *header,
+			   struct perf_sample_data *data,
+			   struct perf_event *event);
+extern void
+perf_event__output_id_sample(struct perf_event *event,
+			     struct perf_output_handle *handle,
+			     struct perf_sample_data *sample);
+
 static inline bool is_sampling_event(struct perf_event *event)
 {
 	return event->attr.sample_period != 0;
-- 
cgit v1.2.3


From f38b0dbb491a6987e198aa6b428db8692a6480f8 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Sun, 10 May 2015 15:13:14 -0400
Subject: perf/x86/intel: Introduce PERF_RECORD_LOST_SAMPLES

After enlarging the PEBS interrupt threshold, there may be some mixed up
PEBS samples which are discarded by the kernel.

This patch makes the kernel emit a PERF_RECORD_LOST_SAMPLES record with
the number of possible discarded records when it is impossible to demux
the samples.

It makes sure the user is not left in the dark about such discards.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: eranian@google.com
Link: http://lkml.kernel.org/r/1431285195-14269-8-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 5f192e1bc98e..a204d5266f5f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -743,6 +743,9 @@ perf_event__output_id_sample(struct perf_event *event,
 			     struct perf_output_handle *handle,
 			     struct perf_sample_data *sample);
 
+extern void
+perf_log_lost_samples(struct perf_event *event, u64 lost);
+
 static inline bool is_sampling_event(struct perf_event *event)
 {
 	return event->attr.sample_period != 0;
-- 
cgit v1.2.3


From 7cf7fa529d0b6b514949cc67b39e3ce406c37006 Mon Sep 17 00:00:00 2001
From: Majd Dibbiny <majd@mellanox.com>
Date: Sun, 7 Jun 2015 15:44:23 +0300
Subject: net/mlx5_core: Fix static checker warnings around system guid query
 flow

Fix static checker warnings in the flow of system guid query.

Fixes: 707c4602cda6 ('net/mlx5_core: Add new query HCA vport commands')
Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/vport.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 67882a834efb..967e0fd06e89 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -48,7 +48,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev,
 				 u16 vf_num,
 				 struct mlx5_hca_vport_context *rep);
 int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev,
-					   __be64 *sys_image_guid);
+					   u64 *sys_image_guid);
 int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
 				   u64 *node_guid);
 
-- 
cgit v1.2.3


From cb4a316752709be4a644f070440a8be470d92b7d Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <cyphar@cyphar.com>
Date: Sat, 6 Jun 2015 10:02:14 +1000
Subject: cgroup: use bitmask to filter for_each_subsys

Add a new macro for_each_subsys_which that allows all enabled cgroup
subsystems to be filtered by a bitmask, such that mask & (1 << ssid)
determines if the subsystem is to be processed in the loop body (where
ssid is the unique id of the subsystem).

Also replace the need_forkexit_callback with two separate bitmasks for
each callback to make (ss->{fork,exit}) checks unnecessary.

tj: add a short comment for "if (!CGROUP_SUBSYS_COUNT)".

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
---
 include/linux/cgroup-defs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 26d1cea7929f..c5588c438448 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -490,6 +490,8 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk)
 
 #else	/* CONFIG_CGROUPS */
 
+#define CGROUP_SUBSYS_COUNT 0
+
 static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) {}
 static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {}
 
-- 
cgit v1.2.3


From 3846c15820a1841225d0245afda4875af23dfbbe Mon Sep 17 00:00:00 2001
From: Peter Jones <pjones@redhat.com>
Date: Fri, 5 Jun 2015 15:14:54 -0400
Subject: efi: Work around ia64 build problem with ESRT driver

So, I'm told this problem exists in the world:

 > Subject: Build error in -next due to 'efi: Add esrt support'
 >
 > Building ia64:defconfig ... failed
 > --------------
 > Error log:
 >
 > drivers/firmware/efi/esrt.c:28:31: fatal error: asm/early_ioremap.h: No such file or directory
 >

I'm not really sure how it's okay that we have things in asm-generic on
some platforms but not others - is having it the same everywhere not the
whole point of asm-generic?

That said, ia64 doesn't have early_ioremap.h .  So instead, since it's
difficult to imagine new IA64 machines with UEFI 2.5, just don't build
this code there.

To me this looks like a workaround - doing something like:

generic-y += early_ioremap.h

in arch/ia64/include/asm/Kbuild would appear to be more correct, but
ia64 has its own early_memremap() decl in arch/ia64/include/asm/io.h ,
and it's a macro.  So adding the above /and/ requiring that asm/io.h be
included /after/ asm/early_ioremap.h in all cases would fix it, but
that's pretty ugly as well.  Since I'm not going to spend the rest of my
life rectifying ia64 headers vs "generic" headers that aren't generic,
it's much simpler to just not build there.

Note that I've only actually tried to build this patch on x86_64, but
esrt.o still gets built there, and that would seem to demonstrate that
the conditional building is working correctly at all the places the code
built before.  I no longer have any ia64 machines handy to test that the
exclusion actually works there.

Signed-off-by: Peter Jones <pjones@redhat.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
(Compile-)Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 include/linux/efi.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 024c27e7c0fa..2092965afca3 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -879,7 +879,11 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned lon
 #endif
 extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr);
 extern int efi_config_init(efi_config_table_type_t *arch_tables);
+#ifdef CONFIG_EFI_ESRT
 extern void __init efi_esrt_init(void);
+#else
+static inline void efi_esrt_init(void) { }
+#endif
 extern int efi_config_parse_tables(void *config_tables, int count, int sz,
 				   efi_config_table_type_t *arch_tables);
 extern u64 efi_get_iobase (void);
-- 
cgit v1.2.3


From a8077d6573530a91d5674a28cdedbed39c391ff0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <zajec5@gmail.com>
Date: Sun, 7 Jun 2015 13:15:30 +0200
Subject: bcma: make calls to PCI hostmode functions config-safe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 include/linux/bcma/bcma_driver_pci.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h
index 5ba6918ca20b..9657f11d48a7 100644
--- a/include/linux/bcma/bcma_driver_pci.h
+++ b/include/linux/bcma/bcma_driver_pci.h
@@ -246,7 +246,18 @@ static inline void bcma_core_pci_power_save(struct bcma_bus *bus, bool up)
 }
 #endif
 
+#ifdef CONFIG_BCMA_DRIVER_PCI_HOSTMODE
 extern int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev);
 extern int bcma_core_pci_plat_dev_init(struct pci_dev *dev);
+#else
+static inline int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev)
+{
+	return -ENOTSUPP;
+}
+static inline int bcma_core_pci_plat_dev_init(struct pci_dev *dev)
+{
+	return -ENOTSUPP;
+}
+#endif
 
 #endif /* LINUX_BCMA_DRIVER_PCI_H_ */
-- 
cgit v1.2.3


From 01d72a95188880b22190e937ed8718ed4b45bdce Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 4 Jun 2015 16:38:17 -0500
Subject: PCI: Remove unused pci_dma_burst_advice()

pci_dma_burst_advice() was added by e24c2d963a60 ("[PATCH] PCI: DMA
bursting advice") but apparently never used.  Remove it.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Michal Simek <monstr@monstr.eu>	# microblaze
CC: David S. Miller <davem@davemloft.net>
---
 include/linux/pci.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 353db8dc4c6e..08fb4e307d68 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1197,15 +1197,6 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode,
 #define	pci_pool_alloc(pool, flags, handle) dma_pool_alloc(pool, flags, handle)
 #define	pci_pool_free(pool, vaddr, addr) dma_pool_free(pool, vaddr, addr)
 
-enum pci_dma_burst_strategy {
-	PCI_DMA_BURST_INFINITY,	/* make bursts as large as possible,
-				   strategy_parameter is N/A */
-	PCI_DMA_BURST_BOUNDARY, /* disconnect at every strategy_parameter
-				   byte boundaries */
-	PCI_DMA_BURST_MULTIPLE, /* disconnect at some multiple of
-				   strategy_parameter byte boundaries */
-};
-
 struct msix_entry {
 	u32	vector;	/* kernel uses to write allocated vector */
 	u16	entry;	/* driver uses to specify entry, OS writes */
@@ -1430,8 +1421,6 @@ static inline int pci_request_regions(struct pci_dev *dev, const char *res_name)
 { return -EIO; }
 static inline void pci_release_regions(struct pci_dev *dev) { }
 
-#define pci_dma_burst_advice(pdev, strat, strategy_parameter) do { } while (0)
-
 static inline void pci_block_cfg_access(struct pci_dev *dev) { }
 static inline int pci_block_cfg_access_in_atomic(struct pci_dev *dev)
 { return 0; }
-- 
cgit v1.2.3


From d711b8b30c803b1b2aedf6a3474758798078f9e1 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sat, 6 Jun 2015 11:30:00 +0200
Subject: hrtimers: Make sure hrtimer_resolution is unsigned int
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

... in the !CONFIG_HIGH_RES_TIMERS case too. And thus fix warnings like
this one:

net/sched/sch_api.c: In function ‘psched_show’:
net/sched/sch_api.c:1891:6: warning: format ‘%x’ expects argument of type ‘unsigned int’, but argument 6 has type ‘long int’ [-Wformat=]
      (u32)NSEC_PER_SEC / hrtimer_resolution);

Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/1433583000-32090-1-git-send-email-bp@alien8.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 470d876c2eda..3f82a7edc03d 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -309,7 +309,7 @@ extern unsigned int hrtimer_resolution;
 # define MONOTONIC_RES_NSEC	LOW_RES_NSEC
 # define KTIME_MONOTONIC_RES	KTIME_LOW_RES
 
-#define hrtimer_resolution	LOW_RES_NSEC
+#define hrtimer_resolution	(unsigned int)LOW_RES_NSEC
 
 static inline void hrtimer_peek_ahead_timers(void) { }
 
-- 
cgit v1.2.3


From 2c1296d92ac0367364bcb73a43c12a0bdfbfee75 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 28 May 2015 18:41:32 +0200
Subject: iommu: Add iommu_get_domain_for_dev function

This function can be used to request the current domain a
device is attached to.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 0546b8710ce3..683a1c4b15e7 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -193,6 +193,7 @@ extern int iommu_attach_device(struct iommu_domain *domain,
 			       struct device *dev);
 extern void iommu_detach_device(struct iommu_domain *domain,
 				struct device *dev);
+extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
 extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
 		     phys_addr_t paddr, size_t size, int prot);
 extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
@@ -332,6 +333,11 @@ static inline void iommu_detach_device(struct iommu_domain *domain,
 {
 }
 
+static inline struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
+{
+	return NULL;
+}
+
 static inline int iommu_map(struct iommu_domain *domain, unsigned long iova,
 			    phys_addr_t paddr, int gfp_order, int prot)
 {
-- 
cgit v1.2.3


From a1015c2b99b94cf521603b41debf167114031456 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 28 May 2015 18:41:33 +0200
Subject: iommu: Introduce direct mapped region handling

Add two new functions to the IOMMU-API to allow the IOMMU
drivers to export the requirements for direct mapped regions
per device.
This is useful for exporting the information in Intel VT-d's
RMRR entries or AMD-Vi's unity mappings.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 683a1c4b15e7..689499904166 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -114,6 +114,20 @@ enum iommu_attr {
 	DOMAIN_ATTR_MAX,
 };
 
+/**
+ * struct iommu_dm_region - descriptor for a direct mapped memory region
+ * @list: Linked list pointers
+ * @start: System physical start address of the region
+ * @length: Length of the region in bytes
+ * @prot: IOMMU Protection flags (READ/WRITE/...)
+ */
+struct iommu_dm_region {
+	struct list_head	list;
+	phys_addr_t		start;
+	size_t			length;
+	int			prot;
+};
+
 #ifdef CONFIG_IOMMU_API
 
 /**
@@ -159,6 +173,10 @@ struct iommu_ops {
 	int (*domain_set_attr)(struct iommu_domain *domain,
 			       enum iommu_attr attr, void *data);
 
+	/* Request/Free a list of direct mapping requirements for a device */
+	void (*get_dm_regions)(struct device *dev, struct list_head *list);
+	void (*put_dm_regions)(struct device *dev, struct list_head *list);
+
 	/* Window handling functions */
 	int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr,
 				    phys_addr_t paddr, u64 size, int prot);
@@ -205,6 +223,9 @@ extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t io
 extern void iommu_set_fault_handler(struct iommu_domain *domain,
 			iommu_fault_handler_t handler, void *token);
 
+extern void iommu_get_dm_regions(struct device *dev, struct list_head *list);
+extern void iommu_put_dm_regions(struct device *dev, struct list_head *list);
+
 extern int iommu_attach_group(struct iommu_domain *domain,
 			      struct iommu_group *group);
 extern void iommu_detach_group(struct iommu_domain *domain,
@@ -379,6 +400,16 @@ static inline void iommu_set_fault_handler(struct iommu_domain *domain,
 {
 }
 
+static inline void iommu_get_dm_regions(struct device *dev,
+					struct list_head *list)
+{
+}
+
+static inline void iommu_put_dm_regions(struct device *dev,
+					struct list_head *list)
+{
+}
+
 static inline int iommu_attach_group(struct iommu_domain *domain,
 				     struct iommu_group *group)
 {
-- 
cgit v1.2.3


From 6827ca83695d5e41ad31b0719788ee65f00ca4b3 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 28 May 2015 18:41:35 +0200
Subject: iommu: Add function to query the default domain of a group

This will be used to handle unity mappings in the iommu
drivers.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 689499904166..b944b2be4fa2 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -249,6 +249,7 @@ extern int iommu_group_unregister_notifier(struct iommu_group *group,
 					   struct notifier_block *nb);
 extern int iommu_group_id(struct iommu_group *group);
 extern struct iommu_group *iommu_group_get_for_dev(struct device *dev);
+extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *);
 
 extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr,
 				 void *data);
-- 
cgit v1.2.3


From 5179f0ce2f96f155e3bda93b3b82f912dbaddad2 Mon Sep 17 00:00:00 2001
From: Steve Twiss <stwiss.opensource@diasemi.com>
Date: Mon, 8 Jun 2015 16:26:20 -0700
Subject: Input: add OnKey driver for DA9063 MFD part

This adds OnKey driver support for DA9063.

Signed-off-by: Steve Twiss <stwiss.opensource@diasemi.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/mfd/da9063/pdata.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/da9063/pdata.h b/include/linux/mfd/da9063/pdata.h
index 95c8742215a7..612383bd80ae 100644
--- a/include/linux/mfd/da9063/pdata.h
+++ b/include/linux/mfd/da9063/pdata.h
@@ -103,6 +103,7 @@ struct da9063;
 struct da9063_pdata {
 	int				(*init)(struct da9063 *da9063);
 	int				irq_base;
+	bool				key_power;
 	unsigned			flags;
 	struct da9063_regulators_pdata	*regulators_pdata;
 	struct led_platform_data	*leds_pdata;
-- 
cgit v1.2.3


From ae60d6a0e3a9197d37f8c8c4584a8ecd18518cd6 Mon Sep 17 00:00:00 2001
From: Nicholas Mc Guire <hofrat@osadl.org>
Date: Thu, 28 May 2015 19:09:55 +0200
Subject: time: Refactor usecs_to_jiffies

Refactor the usecs_to_jiffies conditional code part in time.c and
jiffies.h putting it into conditional functions rather than #ifdefs
to improve readability. This is analogous to the msecs_to_jiffies()
cleanup in commit ca42aaf0c861 ("time: Refactor msecs_to_jiffies")

Signed-off-by: Nicholas Mc Guire <hofrat@osadl.org>
Cc: Masahiro Yamada <yamada.m@jp.panasonic.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Joe Perches <joe@perches.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Andrew Hunter <ahh@google.com>
Cc: Paul Turner <pjt@google.com>
Cc: Michal Marek <mmarek@suse.cz>
Link: http://lkml.kernel.org/r/1432832996-12129-1-git-send-email-hofrat@osadl.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/jiffies.h | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 3bde5eb8568b..a316ebea0a89 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -362,7 +362,32 @@ static inline unsigned long msecs_to_jiffies(const unsigned int m)
 	}
 }
 
-extern unsigned long usecs_to_jiffies(const unsigned int u);
+extern unsigned long __usecs_to_jiffies(const unsigned int u);
+#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ)
+static inline unsigned long _usecs_to_jiffies(const unsigned int u)
+{
+	return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ);
+}
+#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC)
+static inline unsigned long _usecs_to_jiffies(const unsigned int u)
+{
+	return u * (HZ / USEC_PER_SEC);
+}
+static inline unsigned long _usecs_to_jiffies(const unsigned int u)
+{
+#else
+static inline unsigned long _usecs_to_jiffies(const unsigned int u)
+{
+	return (USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32)
+		>> USEC_TO_HZ_SHR32;
+}
+#endif
+
+static inline unsigned long usecs_to_jiffies(const unsigned int u)
+{
+	return __usecs_to_jiffies(u);
+}
+
 extern unsigned long timespec_to_jiffies(const struct timespec *value);
 extern void jiffies_to_timespec(const unsigned long jiffies,
 				struct timespec *value);
-- 
cgit v1.2.3


From c569a23d65ac2900d9998d3fe04044fe95be6b2f Mon Sep 17 00:00:00 2001
From: Nicholas Mc Guire <hofrat@osadl.org>
Date: Thu, 28 May 2015 19:09:56 +0200
Subject: time: Allow gcc to fold usecs_to_jiffies(constant)

To allow constant folding in usecs_to_jiffies() conditionally calls
the HZ dependent _usecs_to_jiffies() helpers or, when gcc can not
figure out constant folding, __usecs_to_jiffies, which is the renamed
original usecs_to_jiffies() function.

Signed-off-by: Nicholas Mc Guire <hofrat@osadl.org>
Cc: Masahiro Yamada <yamada.m@jp.panasonic.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Joe Perches <joe@perches.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Andrew Hunter <ahh@google.com>
Cc: Paul Turner <pjt@google.com>
Cc: Michal Marek <mmarek@suse.cz>
Link: http://lkml.kernel.org/r/1432832996-12129-2-git-send-email-hofrat@osadl.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/jiffies.h | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index a316ebea0a89..535fd3bb1ba8 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -383,9 +383,37 @@ static inline unsigned long _usecs_to_jiffies(const unsigned int u)
 }
 #endif
 
+/**
+ * usecs_to_jiffies: - convert microseconds to jiffies
+ * @u:	time in microseconds
+ *
+ * conversion is done as follows:
+ *
+ * - 'too large' values [that would result in larger than
+ *   MAX_JIFFY_OFFSET values] mean 'infinite timeout' too.
+ *
+ * - all other values are converted to jiffies by either multiplying
+ *   the input value by a factor or dividing it with a factor and
+ *   handling any 32-bit overflows as for msecs_to_jiffies.
+ *
+ * usecs_to_jiffies() checks for the passed in value being a constant
+ * via __builtin_constant_p() allowing gcc to eliminate most of the
+ * code, __usecs_to_jiffies() is called if the value passed does not
+ * allow constant folding and the actual conversion must be done at
+ * runtime.
+ * the HZ range specific helpers _usecs_to_jiffies() are called both
+ * directly here and from __msecs_to_jiffies() in the case where
+ * constant folding is not possible.
+ */
 static inline unsigned long usecs_to_jiffies(const unsigned int u)
 {
-	return __usecs_to_jiffies(u);
+	if (__builtin_constant_p(u)) {
+		if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET))
+			return MAX_JIFFY_OFFSET;
+		return _usecs_to_jiffies(u);
+	} else {
+		return __usecs_to_jiffies(u);
+	}
 }
 
 extern unsigned long timespec_to_jiffies(const struct timespec *value);
-- 
cgit v1.2.3


From ed06aeefdac348cfb91a3db5fe1067e3202afd70 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 9 Jun 2015 22:26:05 +0200
Subject: nfc: st-nci: Rename st21nfcb to st-nci

STMicroelectronics NFC NCI chips family is extending
with the new ST21NFCC using the AMS AS39230 RF booster.
The st21nfcb driver is relevant for this solution and
might be with future products.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/platform_data/st-nci.h   | 29 +++++++++++++++++++++++++++++
 include/linux/platform_data/st21nfcb.h | 29 -----------------------------
 include/linux/platform_data/st_nci.h   | 29 +++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+), 29 deletions(-)
 create mode 100644 include/linux/platform_data/st-nci.h
 delete mode 100644 include/linux/platform_data/st21nfcb.h
 create mode 100644 include/linux/platform_data/st_nci.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/st-nci.h b/include/linux/platform_data/st-nci.h
new file mode 100644
index 000000000000..d9d400a297bd
--- /dev/null
+++ b/include/linux/platform_data/st-nci.h
@@ -0,0 +1,29 @@
+/*
+ * Driver include for ST NCI NFC chip family.
+ *
+ * Copyright (C) 2014-2015  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _ST_NCI_H_
+#define _ST_NCI_H_
+
+#define ST_NCI_DRIVER_NAME "st_nci"
+
+struct st_nci_nfc_platform_data {
+	unsigned int gpio_reset;
+	unsigned int irq_polarity;
+};
+
+#endif /* _ST_NCI_H_ */
diff --git a/include/linux/platform_data/st21nfcb.h b/include/linux/platform_data/st21nfcb.h
deleted file mode 100644
index b023373d9874..000000000000
--- a/include/linux/platform_data/st21nfcb.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Driver include for the ST21NFCB NFC chip.
- *
- * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _ST21NFCB_NCI_H_
-#define _ST21NFCB_NCI_H_
-
-#define ST21NFCB_NCI_DRIVER_NAME "st21nfcb_nci"
-
-struct st21nfcb_nfc_platform_data {
-	unsigned int gpio_reset;
-	unsigned int irq_polarity;
-};
-
-#endif /* _ST21NFCB_NCI_H_ */
diff --git a/include/linux/platform_data/st_nci.h b/include/linux/platform_data/st_nci.h
new file mode 100644
index 000000000000..d9d400a297bd
--- /dev/null
+++ b/include/linux/platform_data/st_nci.h
@@ -0,0 +1,29 @@
+/*
+ * Driver include for ST NCI NFC chip family.
+ *
+ * Copyright (C) 2014-2015  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _ST_NCI_H_
+#define _ST_NCI_H_
+
+#define ST_NCI_DRIVER_NAME "st_nci"
+
+struct st_nci_nfc_platform_data {
+	unsigned int gpio_reset;
+	unsigned int irq_polarity;
+};
+
+#endif /* _ST_NCI_H_ */
-- 
cgit v1.2.3


From 205a525c334295e3cd4cc7755fd2c0398e3a787f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 9 Jun 2015 18:19:39 +0800
Subject: random: Add callback API for random pool readiness

The get_blocking_random_bytes API is broken because the wait can
be arbitrarily long (potentially forever) so there is no safe way
of calling it from within the kernel.

This patch replaces it with a callback API instead.  The callback
is invoked potentially from interrupt context so the user needs
to schedule their own work thread if necessary.

In addition to adding callbacks, they can also be removed as
otherwise this opens up a way for user-space to allocate kernel
memory with no bound (by opening algif_rng descriptors and then
closing them).

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/random.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/random.h b/include/linux/random.h
index 796267d56901..30e2aca0b16a 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -6,8 +6,15 @@
 #ifndef _LINUX_RANDOM_H
 #define _LINUX_RANDOM_H
 
+#include <linux/list.h>
 #include <uapi/linux/random.h>
 
+struct random_ready_callback {
+	struct list_head list;
+	void (*func)(struct random_ready_callback *rdy);
+	struct module *owner;
+};
+
 extern void add_device_randomness(const void *, unsigned int);
 extern void add_input_randomness(unsigned int type, unsigned int code,
 				 unsigned int value);
@@ -15,6 +22,8 @@ extern void add_interrupt_randomness(int irq, int irq_flags);
 
 extern void get_random_bytes(void *buf, int nbytes);
 extern void get_blocking_random_bytes(void *buf, int nbytes);
+extern int add_random_ready_callback(struct random_ready_callback *rdy);
+extern void del_random_ready_callback(struct random_ready_callback *rdy);
 extern void get_random_bytes_arch(void *buf, int nbytes);
 void generate_random_uuid(unsigned char uuid_out[16]);
 extern int random_int_secret_init(void);
-- 
cgit v1.2.3


From c2719503f5e1e6213d716bb078bdad01e28ebcbf Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 9 Jun 2015 18:19:42 +0800
Subject: random: Remove kernel blocking API

This patch removes the kernel blocking API as it has been completely
replaced by the callback API.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/random.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/random.h b/include/linux/random.h
index 30e2aca0b16a..e651874df2c9 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -21,7 +21,6 @@ extern void add_input_randomness(unsigned int type, unsigned int code,
 extern void add_interrupt_randomness(int irq, int irq_flags);
 
 extern void get_random_bytes(void *buf, int nbytes);
-extern void get_blocking_random_bytes(void *buf, int nbytes);
 extern int add_random_ready_callback(struct random_ready_callback *rdy);
 extern void del_random_ready_callback(struct random_ready_callback *rdy);
 extern void get_random_bytes_arch(void *buf, int nbytes);
-- 
cgit v1.2.3


From 0bb979472a7401022109e81dd89d777adea58710 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Wed, 10 Jun 2015 08:01:20 -0600
Subject: cfq-iosched: fix the setting of IOPS mode on SSDs

A previous commit wanted to make CFQ default to IOPS mode on
non-rotational storage, however it did so when the queue was
initialized and the non-rotational flag is only set later on
in the probe.

Add an elevator hook that gets called off the add_disk() path,
at that point we know that feature probing has finished, and
we can reliably check for the various flags that drivers can
set.

Fixes: 41c0126b ("block: Make CFQ default to IOPS mode on SSDs")
Tested-by: Romain Francoise <romain@orebokech.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/elevator.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 45a91474487d..638b324f0291 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -39,6 +39,7 @@ typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct reques
 typedef int (elevator_init_fn) (struct request_queue *,
 				struct elevator_type *e);
 typedef void (elevator_exit_fn) (struct elevator_queue *);
+typedef void (elevator_registered_fn) (struct request_queue *);
 
 struct elevator_ops
 {
@@ -68,6 +69,7 @@ struct elevator_ops
 
 	elevator_init_fn *elevator_init_fn;
 	elevator_exit_fn *elevator_exit_fn;
+	elevator_registered_fn *elevator_registered_fn;
 };
 
 #define ELV_NAME_MAX	(16)
-- 
cgit v1.2.3


From 5c6e3a97e969e978368df83239583771c936efea Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Mon, 8 Jun 2015 10:09:48 +0900
Subject: power_supply: sysfs: Bring back write to writeable properties

The fix for NULL pointer exception related to calling uevent for not
finished probe caused to set all writeable properties as non-writeable.
This was caused by checking if property is writeable before the initial
increase of power supply usage counter and in the same time using
wrapper over property_is_writeable(). The wrapper returns ENODEV if the
usage counter is still 0.

The call trace looked like:
  device probe:
    power_supply_register()
      use_cnt = 0;
      device_add()
        create sysfs entries
          power_supply_attr_is_visible()
            power_supply_property_is_writeable()
              if (use_cnt == 0) return -ENODEV;
      use_cnt++;

Replace the usage of wrapper with direct call to property_is_writeable()
from driver. This should be safe call during device probe because
implementations of this callback just return 0/1 for different
properties and they do not access any of the driver's internal data.

Fixes: 8e59c7f23410 ("power_supply: Fix NULL pointer dereference during bq27x00_battery probe")
Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/power_supply.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index a80f1fd01ddb..0395bcb18ddb 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -206,6 +206,11 @@ struct power_supply_desc {
 	int (*set_property)(struct power_supply *psy,
 			    enum power_supply_property psp,
 			    const union power_supply_propval *val);
+	/*
+	 * property_is_writeable() will be called during registration
+	 * of power supply. If this happens during device probe then it must
+	 * not access internal data of device (because probe did not end).
+	 */
 	int (*property_is_writeable)(struct power_supply *psy,
 				     enum power_supply_property psp);
 	void (*external_power_changed)(struct power_supply *psy);
-- 
cgit v1.2.3


From fe27e1dfe9962b07215ee01445926306ddbb7c25 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 9 Jun 2015 23:37:56 +0200
Subject: power: Add devm_power_supply_get_by_phandle() helper function

This commit adds a resource-managed version of the
power_supply_get_by_phandle() function.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/power_supply.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 0395bcb18ddb..ef9f1592185d 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -292,10 +292,15 @@ extern void power_supply_put(struct power_supply *psy);
 #ifdef CONFIG_OF
 extern struct power_supply *power_supply_get_by_phandle(struct device_node *np,
 							const char *property);
+extern struct power_supply *devm_power_supply_get_by_phandle(
+				    struct device *dev, const char *property);
 #else /* !CONFIG_OF */
 static inline struct power_supply *
 power_supply_get_by_phandle(struct device_node *np, const char *property)
 { return NULL; }
+static inline struct power_supply *
+devm_power_supply_get_by_phandle(struct device *dev, const char *property)
+{ return NULL; }
 #endif /* CONFIG_OF */
 extern void power_supply_changed(struct power_supply *psy);
 extern int power_supply_am_i_supplied(struct power_supply *psy);
-- 
cgit v1.2.3


From b064a8fa77dfead647564c46ac8fc5b13bd1ab73 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 10 Jun 2015 01:33:36 +0200
Subject: ACPI / init: Switch over platform to the ACPI mode later

Commit 73f7d1ca3263 "ACPI / init: Run acpi_early_init() before
timekeeping_init()" moved the ACPI subsystem initialization,
including the ACPI mode enabling, to an earlier point in the
initialization sequence, to allow the timekeeping subsystem
use ACPI early.  Unfortunately, that resulted in boot regressions
on some systems and the early ACPI initialization was moved toward
its original position in the kernel initialization code by commit
c4e1acbb35e4 "ACPI / init: Invoke early ACPI initialization later".

However, that turns out to be insufficient, as boot is still broken
on the Tyan S8812 mainboard.

To fix that issue, split the ACPI early initialization code into
two pieces so the majority of it still located in acpi_early_init()
and the part switching over the platform into the ACPI mode goes into
a new function, acpi_subsystem_init(), executed at the original early
ACPI initialization spot.

That fixes the Tyan S8812 boot problem, but still allows ACPI
tables to be loaded earlier which is useful to the EFI code in
efi_enter_virtual_mode().

Link: https://bugzilla.kernel.org/show_bug.cgi?id=97141
Fixes: 73f7d1ca3263 "ACPI / init: Run acpi_early_init() before timekeeping_init()"
Reported-and-tested-by: Marius Tolzmann <tolzmann@molgen.mpg.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Toshi Kani <toshi.kani@hp.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Reviewed-by: Lee, Chun-Yi <jlee@suse.com>
---
 include/linux/acpi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index e4da5e35e29c..4550be3bb63b 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -440,6 +440,7 @@ extern acpi_status acpi_pci_osc_control_set(acpi_handle handle,
 #define ACPI_OST_SC_INSERT_NOT_SUPPORTED	0x82
 
 extern void acpi_early_init(void);
+extern void acpi_subsystem_init(void);
 
 extern int acpi_nvs_register(__u64 start, __u64 size);
 
@@ -494,6 +495,7 @@ static inline const char *acpi_dev_name(struct acpi_device *adev)
 }
 
 static inline void acpi_early_init(void) { }
+static inline void acpi_subsystem_init(void) { }
 
 static inline int early_acpi_boot_init(void)
 {
-- 
cgit v1.2.3


From 4f822c625f9c68267ffee7519519e304858a46c3 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 10 Jun 2015 18:07:57 -0700
Subject: net: phy: broadcom: include phy.h for brcmphy.h

We utilize inline functions from the PHY library, make sure that we do
include phy.h in brcmphy.h in order for the code including brcmphy.h not
to have to resolve this inclusion dependency.

Fixes: 705314797b8b ("net: phy: broadcom: move shadow 0x1C register accessors to brcmphy.h")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 656da2a12ffe..abb6106f839d 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_BRCMPHY_H
 #define _LINUX_BRCMPHY_H
 
+#include <linux/phy.h>
+
 #define PHY_ID_BCM50610			0x0143bd60
 #define PHY_ID_BCM50610M		0x0143bd70
 #define PHY_ID_BCM5241			0x0143bc30
-- 
cgit v1.2.3


From 8bc84b79265f66d5af736473db582e74b28e099d Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 10 Jun 2015 18:07:58 -0700
Subject: net: phy: broadcom: define Broadcom pseudo-PHY address in brcmphy.h

Define the pseudo-PHY address (30) which is used by all Broadcom
Ethernet switches in a shared header file.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index abb6106f839d..697ca7795bd9 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -3,6 +3,11 @@
 
 #include <linux/phy.h>
 
+/* All Broadcom Ethernet switches have a pseudo-PHY at address 30 which is used
+ * to configure the switch internal registers via MDIO accesses.
+ */
+#define BRCM_PSEUDO_PHY_ADDR           30
+
 #define PHY_ID_BCM50610			0x0143bd60
 #define PHY_ID_BCM50610M		0x0143bd70
 #define PHY_ID_BCM5241			0x0143bc30
-- 
cgit v1.2.3


From d290f1e70d85a9a4d124594c6a3d769329960bdc Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 28 May 2015 18:41:36 +0200
Subject: iommu: Introduce iommu_request_dm_for_dev()

This function can be called by an IOMMU driver to request
that a device's default domain is direct mapped.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index b944b2be4fa2..dc767f7c3704 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -225,6 +225,7 @@ extern void iommu_set_fault_handler(struct iommu_domain *domain,
 
 extern void iommu_get_dm_regions(struct device *dev, struct list_head *list);
 extern void iommu_put_dm_regions(struct device *dev, struct list_head *list);
+extern int iommu_request_dm_for_dev(struct device *dev);
 
 extern int iommu_attach_group(struct iommu_domain *domain,
 			      struct iommu_group *group);
@@ -411,6 +412,11 @@ static inline void iommu_put_dm_regions(struct device *dev,
 {
 }
 
+static inline int iommu_request_dm_for_dev(struct device *dev)
+{
+	return -ENODEV;
+}
+
 static inline int iommu_attach_group(struct iommu_domain *domain,
 				     struct iommu_group *group)
 {
-- 
cgit v1.2.3


From dfabde206aa10ae71a89ba75e68b1f58a6336a05 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Mon, 11 May 2015 17:08:50 +0100
Subject: mailbox: Add ability for clients to request channels by name

This patch supplies a new framework API; mbox_request_channel_byname().

It works by supplying the usual client pointer as the first argument and
a string as the second.  The API will search the client's node for a
'mbox-names' property then request a channel in the normal way using the
requested string's index as the expected second 'index' argument.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 include/linux/mailbox_client.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h
index 1726ccbd8009..44348710953f 100644
--- a/include/linux/mailbox_client.h
+++ b/include/linux/mailbox_client.h
@@ -40,6 +40,8 @@ struct mbox_client {
 	void (*tx_done)(struct mbox_client *cl, void *mssg, int r);
 };
 
+struct mbox_chan *mbox_request_channel_byname(struct mbox_client *cl,
+					      const char *name);
 struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index);
 int mbox_send_message(struct mbox_chan *chan, void *mssg);
 void mbox_client_txdone(struct mbox_chan *chan, int r); /* atomic */
-- 
cgit v1.2.3


From dc14bdef8762a8098b1da881b611d722e24fe787 Mon Sep 17 00:00:00 2001
From: Vincent Cuissard <cuissard@marvell.com>
Date: Thu, 11 Jun 2015 14:00:19 +0200
Subject: NFC: nfcmrvl: add platform_data and DT configuration

Declare nfcmrvl platform_data structure and few DT parameters
for nfcmrvl driver.

Signed-off-by: Vincent Cuissard <cuissard@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/platform_data/nfcmrvl.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 include/linux/platform_data/nfcmrvl.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/nfcmrvl.h b/include/linux/platform_data/nfcmrvl.h
new file mode 100644
index 000000000000..106cfe5ed589
--- /dev/null
+++ b/include/linux/platform_data/nfcmrvl.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2015, Marvell International Ltd.
+ *
+ * This software file (the "File") is distributed by Marvell International
+ * Ltd. under the terms of the GNU General Public License Version 2, June 1991
+ * (the "License").  You may use, redistribute and/or modify this File in
+ * accordance with the terms and conditions of the License, a copy of which
+ * is available on the worldwide web at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
+ *
+ * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
+ * ARE EXPRESSLY DISCLAIMED.  The License provides additional details about
+ * this warranty disclaimer.
+ */
+
+#ifndef _NFCMRVL_PTF_H_
+#define _NFCMRVL_PTF_H_
+
+struct nfcmrvl_platform_data {
+	/*
+	 * Generic
+	 */
+
+	/* GPIO that is wired to RESET_N signal */
+	unsigned int reset_n_io;
+	/* Tell if transport is muxed in HCI one */
+	unsigned int hci_muxed;
+};
+
+#endif /* _NFCMRVL_PTF_H_ */
-- 
cgit v1.2.3


From e097dc624f784debbde49701a493bf920bc422c7 Mon Sep 17 00:00:00 2001
From: Vincent Cuissard <cuissard@marvell.com>
Date: Thu, 11 Jun 2015 14:00:20 +0200
Subject: NFC: nfcmrvl: add UART driver

Add support of Marvell NFC chip controlled over UART

Signed-off-by: Vincent Cuissard <cuissard@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/platform_data/nfcmrvl.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/nfcmrvl.h b/include/linux/platform_data/nfcmrvl.h
index 106cfe5ed589..ac91707dabcb 100644
--- a/include/linux/platform_data/nfcmrvl.h
+++ b/include/linux/platform_data/nfcmrvl.h
@@ -26,6 +26,15 @@ struct nfcmrvl_platform_data {
 	unsigned int reset_n_io;
 	/* Tell if transport is muxed in HCI one */
 	unsigned int hci_muxed;
+
+	/*
+	 * UART specific
+	 */
+
+	/* Tell if UART needs flow control at init */
+	unsigned int flow_control;
+	/* Tell if firmware supports break control for power management */
+	unsigned int break_control;
 };
 
 #endif /* _NFCMRVL_PTF_H_ */
-- 
cgit v1.2.3


From facc9699f0fe7d65a92cc09e175662659306066d Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 11 Jun 2015 14:47:27 +0300
Subject: net/mlx5e: Fix HW MTU settings

Previously we configured HW MTU to be netdev->mtu, actually we
need to configure netdev->mtu + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN).

Also, query MTU can not fail, hence make the relevant helper a
void functionm, add mlx5e_set_dev_port_mtu, helper function to
handle MTU setting.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/driver.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 6093bde16b94..c0930f8d7021 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -756,11 +756,11 @@ int mlx5_set_port_status(struct mlx5_core_dev *dev,
 			 enum mlx5_port_status status);
 int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status);
 
-int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu);
-int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu,
-			    u8 local_port);
-int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu,
-			     u8 local_port);
+int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port);
+void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 port);
+void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu,
+			      u8 port);
+
 int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev,
 			      u8 *vl_hw_cap, u8 local_port);
 
-- 
cgit v1.2.3


From fc11fbf9a785b25c5d07f05a30d4169ec39818da Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 11 Jun 2015 14:47:28 +0300
Subject: net/mlx5e: Add HW cacheline start padding

Enable HW cacheline start padding and align RX WQE size to cacheline
while considering HW start padding. Also, fix dma_unmap call to use
the correct SKB data buffer size.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/device.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index b2c43508a737..b943cd9e2097 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -131,6 +131,10 @@ enum {
 	MLX5_INLINE_SEG = 0x80000000,
 };
 
+enum {
+	MLX5_HW_START_PADDING = MLX5_INLINE_SEG,
+};
+
 enum {
 	MLX5_MIN_PKEY_TABLE_SIZE = 128,
 	MLX5_MAX_LOG_PKEY_TABLE  = 5,
-- 
cgit v1.2.3


From 833f32d763028c1bb371c64f457788b933773b3e Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 11 Jun 2015 15:54:55 -0700
Subject: time: Prevent early expiry of hrtimers[CLOCK_REALTIME] at the leap
 second edge

Currently, leapsecond adjustments are done at tick time. As a result,
the leapsecond was applied at the first timer tick *after* the
leapsecond (~1-10ms late depending on HZ), rather then exactly on the
second edge.

This was in part historical from back when we were always tick based,
but correcting this since has been avoided since it adds extra
conditional checks in the gettime fastpath, which has performance
overhead.

However, it was recently pointed out that ABS_TIME CLOCK_REALTIME
timers set for right after the leapsecond could fire a second early,
since some timers may be expired before we trigger the timekeeping
timer, which then applies the leapsecond.

This isn't quite as bad as it sounds, since behaviorally it is similar
to what is possible w/ ntpd made leapsecond adjustments done w/o using
the kernel discipline. Where due to latencies, timers may fire just
prior to the settimeofday call. (Also, one should note that all
applications using CLOCK_REALTIME timers should always be careful,
since they are prone to quirks from settimeofday() disturbances.)

However, the purpose of having the kernel do the leap adjustment is to
avoid such latencies, so I think this is worth fixing.

So in order to properly keep those timers from firing a second early,
this patch modifies the ntp and timekeeping logic so that we keep
enough state so that the update_base_offsets_now accessor, which
provides the hrtimer core the current time, can check and apply the
leapsecond adjustment on the second edge. This prevents the hrtimer
core from expiring timers too early.

This patch does not modify any other time read path, so no additional
overhead is incurred. However, this also means that the leap-second
continues to be applied at tick time for all other read-paths.

Apologies to Richard Cochran, who pushed for similar changes years
ago, which I resisted due to the concerns about the performance
overhead.

While I suspect this isn't extremely critical, folks who care about
strict leap-second correctness will likely want to watch
this. Potentially a -stable candidate eventually.

Originally-suggested-by: Richard Cochran <richardcochran@gmail.com>
Reported-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Reported-by: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jiri Bohac <jbohac@suse.cz>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Ingo Molnar <mingo@kernel.org>
Link: http://lkml.kernel.org/r/1434063297-28657-4-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/time64.h              | 1 +
 include/linux/timekeeper_internal.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/time64.h b/include/linux/time64.h
index 12d4e82b0276..77b5df2acd2a 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -29,6 +29,7 @@ struct timespec64 {
 #define FSEC_PER_SEC	1000000000000000LL
 
 /* Located here for timespec[64]_valid_strict */
+#define TIME64_MAX			((s64)~((u64)1 << 63))
 #define KTIME_MAX			((s64)~((u64)1 << 63))
 #define KTIME_SEC_MAX			(KTIME_MAX / NSEC_PER_SEC)
 
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index e1f5a1136554..25247220b4b7 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -50,6 +50,7 @@ struct tk_read_base {
  * @offs_tai:		Offset clock monotonic -> clock tai
  * @tai_offset:		The current UTC to TAI offset in seconds
  * @clock_was_set_seq:	The sequence number of clock was set events
+ * @next_leap_ktime:	CLOCK_MONOTONIC time value of a pending leap-second
  * @raw_time:		Monotonic raw base time in timespec64 format
  * @cycle_interval:	Number of clock cycles in one NTP interval
  * @xtime_interval:	Number of clock shifted nano seconds in one NTP
@@ -90,6 +91,7 @@ struct timekeeper {
 	ktime_t			offs_tai;
 	s32			tai_offset;
 	unsigned int		clock_was_set_seq;
+	ktime_t			next_leap_ktime;
 	struct timespec64	raw_time;
 
 	/* The following members are for timekeeping internal use */
-- 
cgit v1.2.3


From 3bf17472226b0041b0c61363bd57a5cadbe620c4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 9 Jun 2015 13:20:29 +0800
Subject: iommu: dmar: Extend struct irte for VT-d Posted-Interrupts

The IRTE (Interrupt Remapping Table Entry) is either an entry for
remapped or for posted interrupts. The hardware distiguishes between
remapped and posted entries by bit 15 in the low 64 bit of the
IRTE. If cleared the entry is remapped, if set it's posted.

The entries have common fields and dependent on the posted bit fields
with different meanings.

Extend struct irte to handle the differences between remap and posted
mode by having three structs in the unions:

        - Shared
        - Remapped
        - Posted

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Feng Wu <feng.wu@intel.com>
Acked-by: Joerg Roedel <joro@8bytes.org>
Cc: jiang.liu@linux.intel.com
Cc: iommu@lists.linux-foundation.org
Cc: dwmw2@infradead.org
Link: http://lkml.kernel.org/r/1433827237-3382-3-git-send-email-feng.wu@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/dmar.h | 70 +++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 55 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 84737565c1fd..0dbcabcb5f0d 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -185,33 +185,73 @@ static inline int dmar_device_remove(void *handle)
 
 struct irte {
 	union {
+		/* Shared between remapped and posted mode*/
 		struct {
-			__u64	present 	: 1,
-				fpd		: 1,
-				dst_mode	: 1,
-				redir_hint	: 1,
-				trigger_mode	: 1,
-				dlvry_mode	: 3,
-				avail		: 4,
-				__reserved_1	: 4,
-				vector		: 8,
-				__reserved_2	: 8,
-				dest_id		: 32;
+			__u64	present		: 1,  /*  0      */
+				fpd		: 1,  /*  1      */
+				__res0		: 6,  /*  2 -  6 */
+				avail		: 4,  /*  8 - 11 */
+				__res1		: 3,  /* 12 - 14 */
+				pst		: 1,  /* 15      */
+				vector		: 8,  /* 16 - 23 */
+				__res2		: 40; /* 24 - 63 */
+		};
+
+		/* Remapped mode */
+		struct {
+			__u64	r_present	: 1,  /*  0      */
+				r_fpd		: 1,  /*  1      */
+				dst_mode	: 1,  /*  2      */
+				redir_hint	: 1,  /*  3      */
+				trigger_mode	: 1,  /*  4      */
+				dlvry_mode	: 3,  /*  5 -  7 */
+				r_avail		: 4,  /*  8 - 11 */
+				r_res0		: 4,  /* 12 - 15 */
+				r_vector	: 8,  /* 16 - 23 */
+				r_res1		: 8,  /* 24 - 31 */
+				dest_id		: 32; /* 32 - 63 */
+		};
+
+		/* Posted mode */
+		struct {
+			__u64	p_present	: 1,  /*  0      */
+				p_fpd		: 1,  /*  1      */
+				p_res0		: 6,  /*  2 -  7 */
+				p_avail		: 4,  /*  8 - 11 */
+				p_res1		: 2,  /* 12 - 13 */
+				p_urgent	: 1,  /* 14      */
+				p_pst		: 1,  /* 15      */
+				p_vector	: 8,  /* 16 - 23 */
+				p_res2		: 14, /* 24 - 37 */
+				pda_l		: 26; /* 38 - 63 */
 		};
 		__u64 low;
 	};
 
 	union {
+		/* Shared between remapped and posted mode*/
 		struct {
-			__u64	sid		: 16,
-				sq		: 2,
-				svt		: 2,
-				__reserved_3	: 44;
+			__u64	sid		: 16,  /* 64 - 79  */
+				sq		: 2,   /* 80 - 81  */
+				svt		: 2,   /* 82 - 83  */
+				__res3		: 44;  /* 84 - 127 */
+		};
+
+		/* Posted mode*/
+		struct {
+			__u64	p_sid		: 16,  /* 64 - 79  */
+				p_sq		: 2,   /* 80 - 81  */
+				p_svt		: 2,   /* 82 - 83  */
+				p_res3		: 12,  /* 84 - 95  */
+				pda_h		: 32;  /* 96 - 127 */
 		};
 		__u64 high;
 	};
 };
 
+#define PDA_LOW_BIT    26
+#define PDA_HIGH_BIT   32
+
 enum {
 	IRQ_REMAP_XAPIC_MODE,
 	IRQ_REMAP_X2APIC_MODE,
-- 
cgit v1.2.3


From bf56027ff4d9e75bf668ae990fe6204d00a23002 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 9 Jun 2015 13:20:30 +0800
Subject: iommu: dmar: Provide helper to copy shared irte fields

Instead of open coding, provide a helper function to copy the shared
irte fields.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: jiang.liu@linux.intel.com
Cc: iommu@lists.linux-foundation.org
Cc: joro@8bytes.org
Cc: dwmw2@infradead.org
Link: http://lkml.kernel.org/r/1433827237-3382-4-git-send-email-feng.wu@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/dmar.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 0dbcabcb5f0d..e9bc9292bd3a 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -249,6 +249,18 @@ struct irte {
 	};
 };
 
+static inline void dmar_copy_shared_irte(struct irte *dst, struct irte *src)
+{
+	dst->present	= src->present;
+	dst->fpd	= src->fpd;
+	dst->avail	= src->avail;
+	dst->pst	= src->pst;
+	dst->vector	= src->vector;
+	dst->sid	= src->sid;
+	dst->sq		= src->sq;
+	dst->svt	= src->svt;
+}
+
 #define PDA_LOW_BIT    26
 #define PDA_HIGH_BIT   32
 
-- 
cgit v1.2.3


From 07c09787b26db724c94a912a572a9a4fa66008f3 Mon Sep 17 00:00:00 2001
From: Feng Wu <feng.wu@intel.com>
Date: Tue, 9 Jun 2015 13:20:34 +0800
Subject: iommu, x86: Add cap_pi_support() to detect VT-d PI capability

Add helper function to detect VT-d Posted-Interrupts capability.

Signed-off-by: Feng Wu <feng.wu@intel.com>
Reviewed-by: Jiang Liu <jiang.liu@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: David Woodhouse <David.Woodhouse@intel.com>
Acked-by: Joerg Roedel <joro@8bytes.org>
Cc: iommu@lists.linux-foundation.org
Cc: dwmw2@infradead.org
Link: http://lkml.kernel.org/r/1433827237-3382-8-git-send-email-feng.wu@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/intel-iommu.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 0af9b03e2b1c..0c251be39836 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -87,6 +87,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 /*
  * Decoding Capability Register
  */
+#define cap_pi_support(c)	(((c) >> 59) & 1)
 #define cap_read_drain(c)	(((c) >> 55) & 1)
 #define cap_write_drain(c)	(((c) >> 54) & 1)
 #define cap_max_amask_val(c)	(((c) >> 48) & 0x3f)
-- 
cgit v1.2.3


From b6a00fae9760a49114016e4764d09e522a5ba5b6 Mon Sep 17 00:00:00 2001
From: Tim Kryger <tim.kryger@gmail.com>
Date: Tue, 26 May 2015 13:08:16 -0700
Subject: pwm: Add pwmchip_add_with_polarity() API

Add a new function to register a PWM chip with channels that have their
initial polarity as specified by an additional parameter. This benefits
drivers of controllers that by default operate with inversed polarity
by removing the need to modify the polarity during initialization.

Signed-off-by: Tim Kryger <tim.kryger@gmail.com>
Signed-off-by: Jonathan Richardson <jonathar@broadcom.com>
[thierry.reding@gmail.com: export pwmchip_add_with_polarity()]
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 include/linux/pwm.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index cfe2d8df5be0..36262d08a9da 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -182,6 +182,8 @@ struct pwm_chip {
 int pwm_set_chip_data(struct pwm_device *pwm, void *data);
 void *pwm_get_chip_data(struct pwm_device *pwm);
 
+int pwmchip_add_with_polarity(struct pwm_chip *chip,
+			      enum pwm_polarity polarity);
 int pwmchip_add(struct pwm_chip *chip);
 int pwmchip_remove(struct pwm_chip *chip);
 struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip,
@@ -217,6 +219,11 @@ static inline int pwmchip_add(struct pwm_chip *chip)
 	return -EINVAL;
 }
 
+static inline int pwmchip_add_inversed(struct pwm_chip *chip)
+{
+	return -EINVAL;
+}
+
 static inline int pwmchip_remove(struct pwm_chip *chip)
 {
 	return -EINVAL;
-- 
cgit v1.2.3


From 22a10bca280073f81e9e2d9fed6f90a3bcf00236 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Thu, 11 Jun 2015 17:37:03 -0700
Subject: regulator: Add system_load constraint

Some regulators have a fixed load that isn't captured by
consumers that the kernel knows about. Add a constraint to
support this.

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/machine.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index b07562e082c4..01526559c8c3 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -75,6 +75,7 @@ struct regulator_state {
  *
  * @min_uA: Smallest current consumers may set.
  * @max_uA: Largest current consumers may set.
+ * @system_load: Load that isn't captured by any consumer requests.
  *
  * @valid_modes_mask: Mask of modes which may be configured by consumers.
  * @valid_ops_mask: Operations which may be performed by consumers.
@@ -112,6 +113,8 @@ struct regulation_constraints {
 	int min_uA;
 	int max_uA;
 
+	int system_load;
+
 	/* valid regulator operating modes for this machine */
 	unsigned int valid_modes_mask;
 
-- 
cgit v1.2.3


From 72b31f7271df34c6aab36c01305287924826678f Mon Sep 17 00:00:00 2001
From: Bernhard Thaler <bernhard.thaler@wvnet.at>
Date: Sat, 30 May 2015 15:27:40 +0200
Subject: netfilter: bridge: detect NAT66 correctly and change MAC address

IPv4 iptables allows to REDIRECT/DNAT/SNAT any traffic over a bridge.

e.g. REDIRECT
$ sysctl -w net.bridge.bridge-nf-call-iptables=1
$ iptables -t nat -A PREROUTING -p tcp -m tcp --dport 8080 \
  -j REDIRECT --to-ports 81

This does not work with ip6tables on a bridge in NAT66 scenario
because the REDIRECT/DNAT/SNAT is not correctly detected.

The bridge pre-routing (finish) netfilter hook has to check for a possible
redirect and then fix the destination mac address. This allows to use the
ip6tables rules for local REDIRECT/DNAT/SNAT REDIRECT similar to the IPv4
iptables version.

e.g. REDIRECT
$ sysctl -w net.bridge.bridge-nf-call-ip6tables=1
$ ip6tables -t nat -A PREROUTING -p tcp -m tcp --dport 8080 \
  -j REDIRECT --to-ports 81

This patch makes it possible to use IPv6 NAT66 on a bridge. It was tested
on a bridge with two interfaces using SNAT/DNAT NAT66 rules.

Reported-by: Artie Hamilton <artiemhamilton@yahoo.com>
Signed-off-by: Sven Eckelmann <sven@open-mesh.com>
[bernhard.thaler@wvnet.at: rebased, add indirect call to ip6_route_input()]
[bernhard.thaler@wvnet.at: rebased, split into separate patches]
Signed-off-by: Bernhard Thaler <bernhard.thaler@wvnet.at>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv6.h | 1 +
 include/linux/skbuff.h         | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 64dad1cc1a4b..e2d19694ee8f 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -25,6 +25,7 @@ void ipv6_netfilter_fini(void);
 struct nf_ipv6_ops {
 	int (*chk_addr)(struct net *net, const struct in6_addr *addr,
 			const struct net_device *dev, int strict);
+	void (*route_input)(struct sk_buff *skb);
 };
 
 extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index cc612fc0a894..f70fc0e6bf7b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -36,6 +36,7 @@
 #include <linux/sched.h>
 #include <net/flow_dissector.h>
 #include <linux/splice.h>
+#include <linux/in6.h>
 
 /* A. Checksumming of received packets by device.
  *
@@ -179,7 +180,10 @@ struct nf_bridge_info {
 		struct net_device *physoutdev;
 		char neigh_header[8];
 	};
-	__be32			ipv4_daddr;
+	union {
+		__be32          ipv4_daddr;
+		struct in6_addr ipv6_daddr;
+	};
 };
 #endif
 
-- 
cgit v1.2.3


From 411ffb4fde80705a9a8db4c2d38dbeef6f5bd689 Mon Sep 17 00:00:00 2001
From: Bernhard Thaler <bernhard.thaler@wvnet.at>
Date: Sat, 30 May 2015 15:28:28 +0200
Subject: netfilter: bridge: refactor frag_max_size

Currently frag_max_size is member of br_input_skb_cb and copied back and
forth using IPCB(skb) and BR_INPUT_SKB_CB(skb) each time it is changed or
used.

Attach frag_max_size to nf_bridge_info and set value in pre_routing and
forward functions. Use its value in forward and xmit functions.

Signed-off-by: Bernhard Thaler <bernhard.thaler@wvnet.at>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/skbuff.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f70fc0e6bf7b..32b105e682b3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -174,6 +174,7 @@ struct nf_bridge_info {
 		BRNF_PROTO_PPPOE
 	} orig_proto:8;
 	bool			pkt_otherhost;
+	__u16			frag_max_size;
 	unsigned int		mask;
 	struct net_device	*physindev;
 	union {
-- 
cgit v1.2.3


From 23c779b9f9161d6568d3b2fca06e70ad182c480c Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Thu, 11 Jun 2015 17:37:04 -0700
Subject: regulator: Add pull down support

Some regulators need to be configured to pull down a resistor
when the regulator is disabled. Add an op (set_pull_down) and a
DT property + constraint to support this.

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/driver.h  | 5 +++++
 include/linux/regulator/machine.h | 2 ++
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index fffa688ac3a7..76144a337ff7 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -121,6 +121,9 @@ struct regulator_linear_range {
  * @set_suspend_mode: Set the operating mode for the regulator when the
  *                    system is suspended.
  *
+ * @set_pull_down: Configure the regulator to pull down when the regulator
+ *		   is disabled.
+ *
  * This struct describes regulator operations which can be implemented by
  * regulator chip drivers.
  */
@@ -187,6 +190,8 @@ struct regulator_ops {
 
 	/* set regulator suspend operating mode (defined in consumer.h) */
 	int (*set_suspend_mode) (struct regulator_dev *, unsigned int mode);
+
+	int (*set_pull_down) (struct regulator_dev *);
 };
 
 /*
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 01526559c8c3..8ffb0619a03c 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -87,6 +87,7 @@ struct regulator_state {
  *           applied.
  * @apply_uV: Apply the voltage constraint when initialising.
  * @ramp_disable: Disable ramp delay when initialising or when setting voltage.
+ * @pull_down: Enable pull down when regulator is disabled.
  *
  * @input_uV: Input voltage for regulator when supplied by another regulator.
  *
@@ -141,6 +142,7 @@ struct regulation_constraints {
 	unsigned boot_on:1;	/* bootloader/firmware enabled regulator */
 	unsigned apply_uV:1;	/* apply uV constraint if min == max */
 	unsigned ramp_disable:1; /* disable ramp delay */
+	unsigned pull_down:1;	/* pull down resistor when regulator off */
 };
 
 /**
-- 
cgit v1.2.3


From efb6de9b4ba0092b2c55f6a52d16294a8a698edd Mon Sep 17 00:00:00 2001
From: Bernhard Thaler <bernhard.thaler@wvnet.at>
Date: Sat, 30 May 2015 15:30:16 +0200
Subject: netfilter: bridge: forward IPv6 fragmented packets

IPv6 fragmented packets are not forwarded on an ethernet bridge
with netfilter ip6_tables loaded. e.g. steps to reproduce

1) create a simple bridge like this

        modprobe br_netfilter
        brctl addbr br0
        brctl addif br0 eth0
        brctl addif br0 eth2
        ifconfig eth0 up
        ifconfig eth2 up
        ifconfig br0 up

2) place a host with an IPv6 address on each side of the bridge

        set IPv6 address on host A:
        ip -6 addr add fd01:2345:6789:1::1/64 dev eth0

        set IPv6 address on host B:
        ip -6 addr add fd01:2345:6789:1::2/64 dev eth0

3) run a simple ping command on host A with packets > MTU

        ping6 -s 4000 fd01:2345:6789:1::2

4) wait some time and run e.g. "ip6tables -t nat -nvL" on the bridge

IPv6 fragmented packets traverse the bridge cleanly until somebody runs.
"ip6tables -t nat -nvL". As soon as it is run (and netfilter modules are
loaded) IPv6 fragmented packets do not traverse the bridge any more (you
see no more responses in ping's output).

After applying this patch IPv6 fragmented packets traverse the bridge
cleanly in above scenario.

Signed-off-by: Bernhard Thaler <bernhard.thaler@wvnet.at>
[pablo@netfilter.org: small changes to br_nf_dev_queue_xmit]
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv6.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index e2d19694ee8f..8b7d28f3aada 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -26,6 +26,8 @@ struct nf_ipv6_ops {
 	int (*chk_addr)(struct net *net, const struct in6_addr *addr,
 			const struct net_device *dev, int strict);
 	void (*route_input)(struct sk_buff *skb);
+	int (*fragment)(struct sock *sk, struct sk_buff *skb,
+			int (*output)(struct sock *, struct sk_buff *));
 };
 
 extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops;
-- 
cgit v1.2.3


From 33b1f31392861947fa2a2a57c3a39ab63b8c9f9d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 5 Jun 2015 13:28:38 +0200
Subject: net: ip_fragment: remove BRIDGE_NETFILTER mtu special handling

since commit d6b915e29f4adea9
("ip_fragment: don't forward defragmented DF packet") the largest
fragment size is available in the IPCB.

Therefore we no longer need to care about 'encapsulation'
overhead of stripped PPPOE/VLAN headers since ip_do_fragment
doesn't use device mtu in such cases.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index f2fdb5a52070..6d80fc686323 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -20,13 +20,6 @@ enum nf_br_hook_priorities {
 #define BRNF_BRIDGED_DNAT		0x02
 #define BRNF_NF_BRIDGE_PREROUTING	0x08
 
-static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
-{
-	if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
-		return PPPOE_SES_HLEN;
-	return 0;
-}
-
 int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb);
 
 static inline void br_drop_fake_rtable(struct sk_buff *skb)
-- 
cgit v1.2.3


From 57f66b78860968fc7eddc9ce25f8e57f7e5000bd Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Thu, 11 Jun 2015 17:37:05 -0700
Subject: regulator: Add soft start support

Some regulators support a "soft start" feature where the voltage
ramps up slowly when the regulator is enabled. Add an op
(set_soft_start) and a DT property + constraint to support this.

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/driver.h  | 2 ++
 include/linux/regulator/machine.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 76144a337ff7..e0635d0894aa 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -161,6 +161,8 @@ struct regulator_ops {
 				     unsigned int old_selector,
 				     unsigned int new_selector);
 
+	int (*set_soft_start) (struct regulator_dev *);
+
 	/* report regulator status ... most other accessors report
 	 * control inputs, this reports results of combining inputs
 	 * from Linux (and other sources) with the actual load.
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 8ffb0619a03c..7f7d0a3fe1e1 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -142,6 +142,7 @@ struct regulation_constraints {
 	unsigned boot_on:1;	/* bootloader/firmware enabled regulator */
 	unsigned apply_uV:1;	/* apply uV constraint if min == max */
 	unsigned ramp_disable:1; /* disable ramp delay */
+	unsigned soft_start:1;	/* ramp voltage slowly */
 	unsigned pull_down:1;	/* pull down resistor when regulator off */
 };
 
-- 
cgit v1.2.3


From 36e4f839de59b6216a16cdf5c1d3263f4dbd9421 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Thu, 11 Jun 2015 17:37:06 -0700
Subject: regulator: Add input current limit support

Some regulators can limit their input current (typically annotated
as ilim). Add an op (set_input_current_limit) and a DT property +
constraint to support this.

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/driver.h  | 3 +++
 include/linux/regulator/machine.h | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index e0635d0894aa..125264f8be93 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -91,6 +91,7 @@ struct regulator_linear_range {
  * @set_current_limit: Configure a limit for a current-limited regulator.
  *                     The driver should select the current closest to max_uA.
  * @get_current_limit: Get the configured limit for a current-limited regulator.
+ * @set_input_current_limit: Configure an input limit.
  *
  * @set_mode: Set the configured operating mode for the regulator.
  * @get_mode: Get the configured operating mode for the regulator.
@@ -145,6 +146,8 @@ struct regulator_ops {
 				 int min_uA, int max_uA);
 	int (*get_current_limit) (struct regulator_dev *);
 
+	int (*set_input_current_limit) (struct regulator_dev *, int lim_uA);
+
 	/* enable/disable regulator */
 	int (*enable) (struct regulator_dev *);
 	int (*disable) (struct regulator_dev *);
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 7f7d0a3fe1e1..85a3b457de51 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -75,6 +75,7 @@ struct regulator_state {
  *
  * @min_uA: Smallest current consumers may set.
  * @max_uA: Largest current consumers may set.
+ * @ilim_uA: Maximum input current.
  * @system_load: Load that isn't captured by any consumer requests.
  *
  * @valid_modes_mask: Mask of modes which may be configured by consumers.
@@ -113,6 +114,7 @@ struct regulation_constraints {
 	/* current output range (inclusive) - for current control */
 	int min_uA;
 	int max_uA;
+	int ilim_uA;
 
 	int system_load;
 
-- 
cgit v1.2.3


From 71ae0dff02d756e4d2ca710b79f2ff5390029a5f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 11 Jun 2015 01:34:54 +0200
Subject: netfilter: xtables: use percpu rule counters

The binary arp/ip/ip6tables ruleset is stored per cpu.

The only reason left as to why we need percpu duplication are the rule
counters embedded into ipt_entry et al -- since each cpu has its own copy
of the rules, all counters can be lockless.

The downside is that the more cpus are supported, the more memory is
required.  Rules are not just duplicated per online cpu but for each
possible cpu, i.e. if maxcpu is 144, then rule is duplicated 144 times,
not for the e.g. 64 cores present.

To save some memory and also improve utilization of shared caches it
would be preferable to only store the rule blob once.

So we first need to separate counters and the rule blob.

Instead of using entry->counters, allocate this percpu and store the
percpu address in entry->counters.pcnt on CONFIG_SMP.

This change makes no sense as-is; it is merely an intermediate step to
remove the percpu duplication of the rule set in a followup patch.

Suggested-by: Eric Dumazet <edumazet@google.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Reported-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 49 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 09f38206c18f..b77ab9f17641 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -353,6 +353,55 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
 	return ret;
 }
 
+
+/* On SMP, ip(6)t_entry->counters.pcnt holds address of the
+ * real (percpu) counter.  On !SMP, its just the packet count,
+ * so nothing needs to be done there.
+ *
+ * xt_percpu_counter_alloc returns the address of the percpu
+ * counter, or 0 on !SMP.
+ *
+ * Hence caller must use IS_ERR_VALUE to check for error, this
+ * allows us to return 0 for single core systems without forcing
+ * callers to deal with SMP vs. NONSMP issues.
+ */
+static inline u64 xt_percpu_counter_alloc(void)
+{
+	if (nr_cpu_ids > 1) {
+		void __percpu *res = alloc_percpu(struct xt_counters);
+
+		if (res == NULL)
+			return (u64) -ENOMEM;
+
+		return (__force u64) res;
+	}
+
+	return 0;
+}
+static inline void xt_percpu_counter_free(u64 pcnt)
+{
+	if (nr_cpu_ids > 1)
+		free_percpu((void __percpu *) pcnt);
+}
+
+static inline struct xt_counters *
+xt_get_this_cpu_counter(struct xt_counters *cnt)
+{
+	if (nr_cpu_ids > 1)
+		return this_cpu_ptr((void __percpu *) cnt->pcnt);
+
+	return cnt;
+}
+
+static inline struct xt_counters *
+xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
+{
+	if (nr_cpu_ids > 1)
+		return per_cpu_ptr((void __percpu *) cnt->pcnt, cpu);
+
+	return cnt;
+}
+
 struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *);
 void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *);
 
-- 
cgit v1.2.3


From 482cfc318559e2527dfd8513582d2fdb276e47c2 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 11 Jun 2015 01:34:55 +0200
Subject: netfilter: xtables: avoid percpu ruleset duplication

We store the rule blob per (possible) cpu.  Unfortunately this means we can
waste lot of memory on big smp machines. ipt_entry structure ('rule head')
is 112 byte, so e.g. with maxcpu=64 one single rule eats
close to 8k RAM.

Since previous patch made counters percpu it appears there is nothing
left in the rule blob that needs to be percpu.

On my test system (144 possible cpus, 400k dummy rules) this
change saves close to 9 Gigabyte of RAM.

Reported-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index b77ab9f17641..9969d79dcde1 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -224,9 +224,9 @@ struct xt_table_info {
 	unsigned int stacksize;
 	unsigned int __percpu *stackptr;
 	void ***jumpstack;
-	/* ipt_entry tables: one per CPU */
+
 	/* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
-	void *entries[1];
+	void *entries;
 };
 
 #define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \
-- 
cgit v1.2.3


From 87d001ef5366c4a24f7a1340246c4ce68190581c Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Wed, 27 May 2015 16:01:52 +0200
Subject: dmaengine: Move icg helpers to global header

Now that we can have ICGs set for both the source and destination (using
the icg field of struct data_chunk) or for only the source or the
destination (using the dst_icg or src_icg respectively), and that these
fields can be ignored depending on other parameters (src_inc, src_sgl,
etc.), the logic to get the actual ICG value can be quite tricky.

The XDMAC driver was already implementing it, but since we will need it in
other drivers, we can move it to the main header file.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Acked-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index ad419757241f..499c530bcbaa 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -874,6 +874,33 @@ static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags)
 	BUG();
 }
 
+static inline size_t dmaengine_get_icg(bool inc, bool sgl, size_t icg,
+				      size_t dir_icg)
+{
+	if (inc) {
+		if (dir_icg)
+			return dir_icg;
+		else if (sgl)
+			return icg;
+	}
+
+	return 0;
+}
+
+static inline size_t dmaengine_get_dst_icg(struct dma_interleaved_template *xt,
+					   struct data_chunk *chunk)
+{
+	return dmaengine_get_icg(xt->dst_inc, xt->dst_sgl,
+				 chunk->icg, chunk->dst_icg);
+}
+
+static inline size_t dmaengine_get_src_icg(struct dma_interleaved_template *xt,
+					   struct data_chunk *chunk)
+{
+	return dmaengine_get_icg(xt->src_inc, xt->src_sgl,
+				 chunk->icg, chunk->src_icg);
+}
+
 /* --- public DMA engine API --- */
 
 #ifdef CONFIG_DMA_ENGINE
-- 
cgit v1.2.3


From 4983a501afede12f95d26e1e213f8f2e9eda1871 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Mon, 18 May 2015 13:46:15 +0200
Subject: dmaengine: Revert "drivers/dma: remove unused support for MEMSET
 operations"

This reverts commit 48a9db462d99494583dad829969616ac90a8df4e.

Some platforms actually need support for the memset operations. Bring it back.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index ad419757241f..19face3168b4 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -65,6 +65,7 @@ enum dma_transaction_type {
 	DMA_PQ,
 	DMA_XOR_VAL,
 	DMA_PQ_VAL,
+	DMA_MEMSET,
 	DMA_INTERRUPT,
 	DMA_SG,
 	DMA_PRIVATE,
@@ -570,6 +571,7 @@ struct dma_tx_state {
  * @copy_align: alignment shift for memcpy operations
  * @xor_align: alignment shift for xor operations
  * @pq_align: alignment shift for pq operations
+ * @fill_align: alignment shift for memset operations
  * @dev_id: unique device ID
  * @dev: struct device reference for dma mapping api
  * @src_addr_widths: bit mask of src addr widths the device supports
@@ -588,6 +590,7 @@ struct dma_tx_state {
  * @device_prep_dma_xor_val: prepares a xor validation operation
  * @device_prep_dma_pq: prepares a pq operation
  * @device_prep_dma_pq_val: prepares a pqzero_sum operation
+ * @device_prep_dma_memset: prepares a memset operation
  * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
  * @device_prep_slave_sg: prepares a slave dma operation
  * @device_prep_dma_cyclic: prepare a cyclic dma operation suitable for audio.
@@ -620,6 +623,7 @@ struct dma_device {
 	u8 copy_align;
 	u8 xor_align;
 	u8 pq_align;
+	u8 fill_align;
 	#define DMA_HAS_PQ_CONTINUE (1 << 15)
 
 	int dev_id;
@@ -650,6 +654,9 @@ struct dma_device {
 		struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
 		unsigned int src_cnt, const unsigned char *scf, size_t len,
 		enum sum_check_flags *pqres, unsigned long flags);
+	struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
+		struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
+		unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)(
 		struct dma_chan *chan, unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_sg)(
@@ -745,6 +752,17 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma(
 	return chan->device->device_prep_interleaved_dma(chan, xt, flags);
 }
 
+static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memset(
+		struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
+		unsigned long flags)
+{
+	if (!chan || !chan->device)
+		return NULL;
+
+	return chan->device->device_prep_dma_memset(chan, dest, value,
+						    len, flags);
+}
+
 static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_sg(
 		struct dma_chan *chan,
 		struct scatterlist *dst_sg, unsigned int dst_nents,
@@ -820,6 +838,12 @@ static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1,
 	return dmaengine_check_align(dev->pq_align, off1, off2, len);
 }
 
+static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1,
+				       size_t off2, size_t len)
+{
+	return dmaengine_check_align(dev->fill_align, off1, off2, len);
+}
+
 static inline void
 dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
 {
-- 
cgit v1.2.3


From 7bbf1dd24b17b9ec4f47c43ce4e05bf190745553 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Mon, 1 Jun 2015 16:05:10 +0800
Subject: genirq: Enhance irq_data_to_desc() to support hierarchy irqdomain

For irq associated with hierarchy irqdomains, there will be multiple
irq_datas for one irq_desc. So enhance irq_data_to_desc() to support
hierarchy irqdomain. Also export irq_data_to_desc() as an inline
function for later reuse.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Link: http://lkml.kernel.org/r/1433145945-789-2-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdesc.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index dd1109fb241e..a113a8dc7438 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -93,6 +93,15 @@ struct irq_desc {
 extern struct irq_desc irq_desc[NR_IRQS];
 #endif
 
+static inline struct irq_desc *irq_data_to_desc(struct irq_data *data)
+{
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+	return irq_to_desc(data->irq);
+#else
+	return container_of(data, struct irq_desc, irq_data);
+#endif
+}
+
 static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc)
 {
 	return &desc->irq_data;
-- 
cgit v1.2.3


From 0d0b4c866bcce647f40d73efe5e90aeeb079050a Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Mon, 1 Jun 2015 16:05:12 +0800
Subject: genirq: Introduce struct irq_common_data to host shared irq data

With the introduction of hierarchy irqdomain, struct irq_data becomes
per-chip instead of per-irq and there may be multiple irq_datas
associated with the same irq. Some per-irq data stored in struct
irq_data now may get duplicated into multiple irq_datas, and causes
inconsistent view.

So introduce struct irq_common_data to host per-irq common data and to
achieve consistent view among irq_chips.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Kevin Cernekee <cernekee@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Link: http://lkml.kernel.org/r/1433145945-789-4-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h     | 54 +++++++++++++++++++++++++++++--------------------
 include/linux/irqdesc.h |  3 ++-
 2 files changed, 34 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 48cb7d1aa58f..3c7fbe44edae 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -126,13 +126,21 @@ struct msi_desc;
 struct irq_domain;
 
 /**
- * struct irq_data - per irq and irq chip data passed down to chip functions
+ * struct irq_common_data - per irq data shared by all irqchips
+ * @state_use_accessors: status information for irq chip functions.
+ *			Use accessor functions to deal with it
+ */
+struct irq_common_data {
+	unsigned int		state_use_accessors;
+};
+
+/**
+ * struct irq_data - per irq chip data passed down to chip functions
  * @mask:		precomputed bitmask for accessing the chip registers
  * @irq:		interrupt number
  * @hwirq:		hardware interrupt number, local to the interrupt domain
  * @node:		node index useful for balancing
- * @state_use_accessors: status information for irq chip functions.
- *			Use accessor functions to deal with it
+ * @common:		point to data shared by all irqchips
  * @chip:		low level interrupt hardware access
  * @domain:		Interrupt translation domain; responsible for mapping
  *			between hwirq number and linux irq number.
@@ -153,7 +161,7 @@ struct irq_data {
 	unsigned int		irq;
 	unsigned long		hwirq;
 	unsigned int		node;
-	unsigned int		state_use_accessors;
+	struct irq_common_data	*common;
 	struct irq_chip		*chip;
 	struct irq_domain	*domain;
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
@@ -166,7 +174,7 @@ struct irq_data {
 };
 
 /*
- * Bit masks for irq_data.state
+ * Bit masks for irq_common_data.state_use_accessors
  *
  * IRQD_TRIGGER_MASK		- Mask for the trigger type bits
  * IRQD_SETAFFINITY_PENDING	- Affinity setting is pending
@@ -198,34 +206,36 @@ enum {
 	IRQD_WAKEUP_ARMED		= (1 << 19),
 };
 
+#define __irqd_to_state(d)		((d)->common->state_use_accessors)
+
 static inline bool irqd_is_setaffinity_pending(struct irq_data *d)
 {
-	return d->state_use_accessors & IRQD_SETAFFINITY_PENDING;
+	return __irqd_to_state(d) & IRQD_SETAFFINITY_PENDING;
 }
 
 static inline bool irqd_is_per_cpu(struct irq_data *d)
 {
-	return d->state_use_accessors & IRQD_PER_CPU;
+	return __irqd_to_state(d) & IRQD_PER_CPU;
 }
 
 static inline bool irqd_can_balance(struct irq_data *d)
 {
-	return !(d->state_use_accessors & (IRQD_PER_CPU | IRQD_NO_BALANCING));
+	return !(__irqd_to_state(d) & (IRQD_PER_CPU | IRQD_NO_BALANCING));
 }
 
 static inline bool irqd_affinity_was_set(struct irq_data *d)
 {
-	return d->state_use_accessors & IRQD_AFFINITY_SET;
+	return __irqd_to_state(d) & IRQD_AFFINITY_SET;
 }
 
 static inline void irqd_mark_affinity_was_set(struct irq_data *d)
 {
-	d->state_use_accessors |= IRQD_AFFINITY_SET;
+	__irqd_to_state(d) |= IRQD_AFFINITY_SET;
 }
 
 static inline u32 irqd_get_trigger_type(struct irq_data *d)
 {
-	return d->state_use_accessors & IRQD_TRIGGER_MASK;
+	return __irqd_to_state(d) & IRQD_TRIGGER_MASK;
 }
 
 /*
@@ -233,43 +243,43 @@ static inline u32 irqd_get_trigger_type(struct irq_data *d)
  */
 static inline void irqd_set_trigger_type(struct irq_data *d, u32 type)
 {
-	d->state_use_accessors &= ~IRQD_TRIGGER_MASK;
-	d->state_use_accessors |= type & IRQD_TRIGGER_MASK;
+	__irqd_to_state(d) &= ~IRQD_TRIGGER_MASK;
+	__irqd_to_state(d) |= type & IRQD_TRIGGER_MASK;
 }
 
 static inline bool irqd_is_level_type(struct irq_data *d)
 {
-	return d->state_use_accessors & IRQD_LEVEL;
+	return __irqd_to_state(d) & IRQD_LEVEL;
 }
 
 static inline bool irqd_is_wakeup_set(struct irq_data *d)
 {
-	return d->state_use_accessors & IRQD_WAKEUP_STATE;
+	return __irqd_to_state(d) & IRQD_WAKEUP_STATE;
 }
 
 static inline bool irqd_can_move_in_process_context(struct irq_data *d)
 {
-	return d->state_use_accessors & IRQD_MOVE_PCNTXT;
+	return __irqd_to_state(d) & IRQD_MOVE_PCNTXT;
 }
 
 static inline bool irqd_irq_disabled(struct irq_data *d)
 {
-	return d->state_use_accessors & IRQD_IRQ_DISABLED;
+	return __irqd_to_state(d) & IRQD_IRQ_DISABLED;
 }
 
 static inline bool irqd_irq_masked(struct irq_data *d)
 {
-	return d->state_use_accessors & IRQD_IRQ_MASKED;
+	return __irqd_to_state(d) & IRQD_IRQ_MASKED;
 }
 
 static inline bool irqd_irq_inprogress(struct irq_data *d)
 {
-	return d->state_use_accessors & IRQD_IRQ_INPROGRESS;
+	return __irqd_to_state(d) & IRQD_IRQ_INPROGRESS;
 }
 
 static inline bool irqd_is_wakeup_armed(struct irq_data *d)
 {
-	return d->state_use_accessors & IRQD_WAKEUP_ARMED;
+	return __irqd_to_state(d) & IRQD_WAKEUP_ARMED;
 }
 
 
@@ -280,12 +290,12 @@ static inline bool irqd_is_wakeup_armed(struct irq_data *d)
  */
 static inline void irqd_set_chained_irq_inprogress(struct irq_data *d)
 {
-	d->state_use_accessors |= IRQD_IRQ_INPROGRESS;
+	__irqd_to_state(d) |= IRQD_IRQ_INPROGRESS;
 }
 
 static inline void irqd_clr_chained_irq_inprogress(struct irq_data *d)
 {
-	d->state_use_accessors &= ~IRQD_IRQ_INPROGRESS;
+	__irqd_to_state(d) &= ~IRQD_IRQ_INPROGRESS;
 }
 
 static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index a113a8dc7438..c52d1480f272 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -17,7 +17,7 @@ struct pt_regs;
 
 /**
  * struct irq_desc - interrupt descriptor
- * @irq_data:		per irq and chip data passed down to chip functions
+ * @irq_common_data:	per irq and chip data passed down to chip functions
  * @kstat_irqs:		irq stats per cpu
  * @handle_irq:		highlevel irq-events handler
  * @preflow_handler:	handler called before the flow handler (currently used by sparc)
@@ -47,6 +47,7 @@ struct pt_regs;
  * @name:		flow handler name for /proc/interrupts output
  */
 struct irq_desc {
+	struct irq_common_data	irq_common_data;
 	struct irq_data		irq_data;
 	unsigned int __percpu	*kstat_irqs;
 	irq_flow_handler_t	handle_irq;
-- 
cgit v1.2.3


From 6783011b48096b9a0c239d0f7645f93070b6eefd Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Mon, 1 Jun 2015 16:05:13 +0800
Subject: genirq: Introduce helper function irq_data_get_node()

Introduce helper function irq_data_get_node() and variants thereof to
hide struct irq_data implementation details.

Convert the core code to use them.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Kevin Cernekee <cernekee@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Link: http://lkml.kernel.org/r/1433145945-789-5-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 3c7fbe44edae..b3b82a5344c8 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -640,6 +640,11 @@ static inline u32 irq_get_trigger_type(unsigned int irq)
 	return d ? irqd_get_trigger_type(d) : 0;
 }
 
+static inline int irq_data_get_node(struct irq_data *d)
+{
+	return d->node;
+}
+
 unsigned int arch_dynirq_lower_bound(unsigned int from);
 
 int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
-- 
cgit v1.2.3


From c64301a230a64dfc2fcf4581cd98a2d703f3c057 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Mon, 1 Jun 2015 16:05:23 +0800
Subject: genirq: Introduce helper function irq_data_get_affinity_mask()

Introduce helper function irq_data_get_affinity_mask() and
irq_get_affinity_mask() to hide implementation details,
so we could move field 'affinity' from struct irq_data into
struct irq_common_data later.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Kevin Cernekee <cernekee@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Link: http://lkml.kernel.org/r/1433145945-789-15-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index b3b82a5344c8..1e0ccef205ed 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -645,6 +645,18 @@ static inline int irq_data_get_node(struct irq_data *d)
 	return d->node;
 }
 
+static inline struct cpumask *irq_get_affinity_mask(int irq)
+{
+	struct irq_data *d = irq_get_irq_data(irq);
+
+	return d ? d->affinity : NULL;
+}
+
+static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d)
+{
+	return d->affinity;
+}
+
 unsigned int arch_dynirq_lower_bound(unsigned int from);
 
 int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
-- 
cgit v1.2.3


From 52033cfb5aab2a54e238e93c9e52f61c2c5708aa Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Thu, 11 Jun 2015 16:35:26 +0300
Subject: IB/mlx4: Add mmap call to map the hardware clock

In order to read the HCA's cycle counter efficiently in
user space, we need to map the HCA's register.
This is done through mmap call.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/linux/mlx4/device.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 83e80ab94500..f94984fb8bb2 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -829,6 +829,12 @@ struct mlx4_dev {
 	struct mlx4_vf_dev     *dev_vfs;
 };
 
+struct mlx4_clock_params {
+	u64 offset;
+	u8 bar;
+	u8 size;
+};
+
 struct mlx4_eqe {
 	u8			reserved1;
 	u8			type;
@@ -1485,4 +1491,7 @@ int mlx4_ACCESS_PTYS_REG(struct mlx4_dev *dev,
 			 enum mlx4_access_reg_method method,
 			 struct mlx4_ptys_reg *ptys_reg);
 
+int mlx4_get_internal_clock_params(struct mlx4_dev *dev,
+				   struct mlx4_clock_params *params);
+
 #endif /* MLX4_DEVICE_H */
-- 
cgit v1.2.3


From c0300089fd2dbeebef5ab9b6d66b4e6cedf8500a Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Tue, 28 Apr 2015 17:32:34 +0800
Subject: PCI: Remove unused pci_scan_bus_parented()

No one uses pci_scan_bus_parented() any more, remove it.

Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 353db8dc4c6e..1ec0d5d9723c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -773,8 +773,6 @@ void pcibios_bus_to_resource(struct pci_bus *bus, struct resource *res,
 void pcibios_scan_specific_bus(int busn);
 struct pci_bus *pci_find_bus(int domain, int busnr);
 void pci_bus_add_devices(const struct pci_bus *bus);
-struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus,
-				      struct pci_ops *ops, void *sysdata);
 struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata);
 struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
 				    struct pci_ops *ops, void *sysdata,
-- 
cgit v1.2.3


From 73b6ecdb93e8e77752cae9077c424fcdc6f23c39 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Fri, 12 Jun 2015 11:10:06 +0900
Subject: extcon: Redefine the unique id of supported external connectors
 without 'enum extcon' type

This patch just redefine the unique id of supported external connectors without
'enum extcon' type. Because unique id would be used on devictree file(*.dts) to
indicate the specific external connectors like key number of input framework.
So, I have the plan to move this definitions to following header file which
includes the unique id of supported external connectors.
- include/dt-bindings/extcon/extcon.h

Fixes: 2a9de9c0f08d ("extcon: Use the unique id for external connector instead of string")
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/extcon.h | 90 +++++++++++++++++++++++---------------------------
 1 file changed, 42 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index a7b224b20ecc..b16d929fa75f 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -30,41 +30,35 @@
 #include <linux/notifier.h>
 #include <linux/sysfs.h>
 
-enum extcon {
-	EXTCON_NONE		= 0x0,
-
-	/* USB external connector */
-	EXTCON_USB		= 0x1,
-	EXTCON_USB_HOST		= 0x2,
-
-	/* Charger external connector */
-	EXTCON_TA		= 0x10,
-	EXTCON_FAST_CHARGER	= 0x11,
-	EXTCON_SLOW_CHARGER	= 0x12,
-	EXTCON_CHARGE_DOWNSTREAM = 0x13,
-
-	/* Audio/Video external connector */
-	EXTCON_LINE_IN		= 0x20,
-	EXTCON_LINE_OUT		= 0x21,
-	EXTCON_MICROPHONE	= 0x22,
-	EXTCON_HEADPHONE	= 0x23,
-
-	EXTCON_HDMI		= 0x30,
-	EXTCON_MHL		= 0x31,
-	EXTCON_DVI		= 0x32,
-	EXTCON_VGA		= 0x33,
-	EXTCON_SPDIF_IN		= 0x34,
-	EXTCON_SPDIF_OUT	= 0x35,
-	EXTCON_VIDEO_IN		= 0x36,
-	EXTCON_VIDEO_OUT	= 0x37,
-
-	/* Etc external connector */
-	EXTCON_DOCK		= 0x50,
-	EXTCON_JIG		= 0x51,
-	EXTCON_MECHANICAL	= 0x52,
-
-	EXTCON_END,
-};
+/*
+ * Define the unique id of supported external connectors
+ */
+#define EXTCON_NONE			0
+
+#define EXTCON_USB			1	/* USB connector */
+#define EXTCON_USB_HOST			2
+
+#define EXTCON_TA			3	/* Charger connector */
+#define EXTCON_FAST_CHARGER		4
+#define EXTCON_SLOW_CHARGER		5
+#define EXTCON_CHARGE_DOWNSTREAM	6
+
+#define EXTCON_LINE_IN			7	/* Audio/Video connector */
+#define EXTCON_LINE_OUT			8
+#define EXTCON_MICROPHONE		9
+#define EXTCON_HEADPHONE		10
+#define EXTCON_HDMI			11
+#define EXTCON_MHL			12
+#define EXTCON_DVI			13
+#define EXTCON_VGA			14
+#define EXTCON_SPDIF_IN			15
+#define EXTCON_SPDIF_OUT		16
+#define EXTCON_VIDEO_IN			17
+#define EXTCON_VIDEO_OUT		18
+
+#define EXTCON_DOCK			19	/* Misc connector */
+#define EXTCON_JIG			20
+#define EXTCON_MECHANICAL		21
 
 struct extcon_cable;
 
@@ -105,7 +99,7 @@ struct extcon_cable;
 struct extcon_dev {
 	/* Optional user initializing data */
 	const char *name;
-	const enum extcon *supported_cable;
+	const unsigned int *supported_cable;
 	const u32 *mutually_exclusive;
 
 	/* Optional callbacks to override class functions */
@@ -182,10 +176,10 @@ extern struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name);
 /*
  * Following APIs control the memory of extcon device.
  */
-extern struct extcon_dev *extcon_dev_allocate(const enum extcon *cable);
+extern struct extcon_dev *extcon_dev_allocate(const unsigned int *cable);
 extern void extcon_dev_free(struct extcon_dev *edev);
 extern struct extcon_dev *devm_extcon_dev_allocate(struct device *dev,
-						   const enum extcon *cable);
+						   const unsigned int *cable);
 extern void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev);
 
 /*
@@ -206,8 +200,8 @@ extern int extcon_update_state(struct extcon_dev *edev, u32 mask, u32 state);
  * get/set_cable_state access each bit of the 32b encoded state value.
  * They are used to access the status of each cable based on the cable_name.
  */
-extern int extcon_get_cable_state_(struct extcon_dev *edev, enum extcon id);
-extern int extcon_set_cable_state_(struct extcon_dev *edev, enum extcon id,
+extern int extcon_get_cable_state_(struct extcon_dev *edev, unsigned int id);
+extern int extcon_set_cable_state_(struct extcon_dev *edev, unsigned int id,
 				   bool cable_state);
 
 extern int extcon_get_cable_state(struct extcon_dev *edev,
@@ -234,9 +228,9 @@ extern int extcon_unregister_interest(struct extcon_specific_cable_nb *nb);
  * we do not recommend to use this for normal 'notifiee' device drivers who
  * want to be notified by a specific external port of the notifier.
  */
-extern int extcon_register_notifier(struct extcon_dev *edev, enum extcon id,
+extern int extcon_register_notifier(struct extcon_dev *edev, unsigned int id,
 				    struct notifier_block *nb);
-extern int extcon_unregister_notifier(struct extcon_dev *edev, enum extcon id,
+extern int extcon_unregister_notifier(struct extcon_dev *edev, unsigned int id,
 				    struct notifier_block *nb);
 
 /*
@@ -266,7 +260,7 @@ static inline int devm_extcon_dev_register(struct device *dev,
 static inline void devm_extcon_dev_unregister(struct device *dev,
 					      struct extcon_dev *edev) { }
 
-static inline struct extcon_dev *extcon_dev_allocate(const enum extcon *cable)
+static inline struct extcon_dev *extcon_dev_allocate(const unsigned int *cable)
 {
 	return ERR_PTR(-ENOSYS);
 }
@@ -274,7 +268,7 @@ static inline struct extcon_dev *extcon_dev_allocate(const enum extcon *cable)
 static inline void extcon_dev_free(struct extcon_dev *edev) { }
 
 static inline struct extcon_dev *devm_extcon_dev_allocate(struct device *dev,
-						const enum extcon *cable)
+						const unsigned int *cable)
 {
 	return ERR_PTR(-ENOSYS);
 }
@@ -298,13 +292,13 @@ static inline int extcon_update_state(struct extcon_dev *edev, u32 mask,
 }
 
 static inline int extcon_get_cable_state_(struct extcon_dev *edev,
-					  enum extcon id)
+					  unsigned int id)
 {
 	return 0;
 }
 
 static inline int extcon_set_cable_state_(struct extcon_dev *edev,
-					  enum extcon id, bool cable_state)
+					  unsigned int id, bool cable_state)
 {
 	return 0;
 }
@@ -327,14 +321,14 @@ static inline struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name)
 }
 
 static inline int extcon_register_notifier(struct extcon_dev *edev,
-					enum extcon id,
+					unsigned int id,
 					struct notifier_block *nb)
 {
 	return 0;
 }
 
 static inline int extcon_unregister_notifier(struct extcon_dev *edev,
-					enum extcon id,
+					unsigned int id,
 					struct notifier_block *nb)
 {
 	return 0;
-- 
cgit v1.2.3


From ef73f886b53548d83d71a439f51a0c13ea6c1dae Mon Sep 17 00:00:00 2001
From: Dmitry Kalinkin <dmitry.kalinkin@gmail.com>
Date: Thu, 28 May 2015 15:07:04 +0300
Subject: vme: export vme_check_window()

Signed-off-by: Dmitry Kalinkin <dmitry.kalinkin@gmail.com>
Cc: Igor Alekseev <igor.alekseev@itep.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/vme.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vme.h b/include/linux/vme.h
index 79242e9c06b8..c0131358f351 100644
--- a/include/linux/vme.h
+++ b/include/linux/vme.h
@@ -120,6 +120,8 @@ void vme_free_consistent(struct vme_resource *, size_t,  void *,
 	dma_addr_t);
 
 size_t vme_get_size(struct vme_resource *);
+int vme_check_window(u32 aspace, unsigned long long vme_base,
+		     unsigned long long size);
 
 struct vme_resource *vme_slave_request(struct vme_dev *, u32, u32);
 int vme_slave_set(struct vme_resource *, int, unsigned long long,
-- 
cgit v1.2.3


From 1e98a0f08abddde87f0f93237f10629ecb4880ef Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 12 Jun 2015 19:31:32 -0700
Subject: flow_dissector: fix ipv6 dst, hop-by-hop and routing ext hdrs

__skb_header_pointer() returns a pointer that must be checked.

Fixes infinite loop reported by Alexei, and add __must_check to
catch these errors earlier.

Fixes: 6a74fcf426f5 ("flow_dissector: add support for dst, hop-by-hop and routing ext hdrs")
Reported-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Tested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index cc612fc0a894..a7acc92aa668 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2743,8 +2743,9 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 __wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
 		    __wsum csum);
 
-static inline void *__skb_header_pointer(const struct sk_buff *skb, int offset,
-					 int len, void *data, int hlen, void *buffer)
+static inline void * __must_check
+__skb_header_pointer(const struct sk_buff *skb, int offset,
+		     int len, void *data, int hlen, void *buffer)
 {
 	if (hlen - offset >= len)
 		return data + offset;
@@ -2756,8 +2757,8 @@ static inline void *__skb_header_pointer(const struct sk_buff *skb, int offset,
 	return buffer;
 }
 
-static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
-				       int len, void *buffer)
+static inline void * __must_check
+skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer)
 {
 	return __skb_header_pointer(skb, offset, len, skb->data,
 				    skb_headlen(skb), buffer);
-- 
cgit v1.2.3


From aaeb6e24f5b6cb6a664fbdec6e08b65c3173c1b3 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Fri, 12 Jun 2015 21:07:54 +0200
Subject: netfilter: ipset: Use MSEC_PER_SEC consistently

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set_timeout.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h
index 83c2f9e0886c..3c8842bcedaa 100644
--- a/include/linux/netfilter/ipset/ip_set_timeout.h
+++ b/include/linux/netfilter/ipset/ip_set_timeout.h
@@ -61,7 +61,7 @@ ip_set_timeout_set(unsigned long *timeout, u32 t)
 		return;
 	}
 
-	*timeout = msecs_to_jiffies(t * 1000) + jiffies;
+	*timeout = msecs_to_jiffies(t * MSEC_PER_SEC) + jiffies;
 	if (*timeout == IPSET_ELEM_PERMANENT)
 		/* Bingo! :-) */
 		(*timeout)--;
@@ -71,7 +71,7 @@ static inline u32
 ip_set_timeout_get(unsigned long *timeout)
 {
 	return *timeout == IPSET_ELEM_PERMANENT ? 0 :
-		jiffies_to_msecs(*timeout - jiffies)/1000;
+		jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
 }
 
 #endif	/* __KERNEL__ */
-- 
cgit v1.2.3


From f690cbaed9fe4d77592e24139db7ad790641c4fd Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Fri, 12 Jun 2015 22:11:00 +0200
Subject: netfilter: ipset: Fix cidr handling for hash:*net* types

Commit "Simplify cidr handling for hash:*net* types" broke the cidr
handling for the hash:*net* types when the sets were used by the SET
target: entries with invalid cidr values were added to the sets.
Reported by Jonathan Johnson.

Testsuite entry is added to verify the fix.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index ffdfdc24952a..a6fe1ce96437 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -545,8 +545,6 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
 	{ .bytes = ULLONG_MAX, .packets = ULLONG_MAX,	\
 	  .timeout = (set)->timeout }
 
-#define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b))
-
 #define IPSET_CONCAT(a, b)		a##b
 #define IPSET_TOKEN(a, b)		IPSET_CONCAT(a, b)
 
-- 
cgit v1.2.3


From c4c997839cf92cb1037e43a85cdb4cbf44ed39a5 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 13 Jun 2015 11:59:45 +0200
Subject: netfilter: ipset: Fix parallel resizing and listing of the same set

When elements added to a hash:* type of set and resizing triggered,
parallel listing could start to list the original set (before resizing)
and "continue" with listing the new set. Fix it by references and
using the original hash table for listing. Therefore the destroying of
the original hash table may happen from the resizing or listing functions.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index a6fe1ce96437..5674b6ac6646 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -176,6 +176,9 @@ struct ip_set_type_variant {
 	/* List elements */
 	int (*list)(const struct ip_set *set, struct sk_buff *skb,
 		    struct netlink_callback *cb);
+	/* Keep listing private when resizing runs parallel */
+	void (*uref)(struct ip_set *set, struct netlink_callback *cb,
+		     bool start);
 
 	/* Return true if "b" set is the same as "a"
 	 * according to the create set parameters */
@@ -380,12 +383,12 @@ ip_set_init_counter(struct ip_set_counter *counter,
 
 /* Netlink CB args */
 enum {
-	IPSET_CB_NET = 0,
-	IPSET_CB_DUMP,
-	IPSET_CB_INDEX,
-	IPSET_CB_ARG0,
+	IPSET_CB_NET = 0,	/* net namespace */
+	IPSET_CB_DUMP,		/* dump single set/all sets */
+	IPSET_CB_INDEX,		/* set index */
+	IPSET_CB_PRIVATE,	/* set private data */
+	IPSET_CB_ARG0,		/* type specific */
 	IPSET_CB_ARG1,
-	IPSET_CB_ARG2,
 };
 
 /* register and unregister set references */
-- 
cgit v1.2.3


From b57b2d1fa53fe8563bdfc66a33b844463b9af285 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 13 Jun 2015 14:22:25 +0200
Subject: netfilter: ipset: Prepare the ipset core to use RCU at set level

Replace rwlock_t with spinlock_t in "struct ip_set" and change the locking
accordingly. Convert the comment extension into an rcu-avare object. Also,
simplify the timeout routines.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h         |  9 ++++--
 include/linux/netfilter/ipset/ip_set_comment.h | 38 ++++++++++++++++++--------
 include/linux/netfilter/ipset/ip_set_timeout.h | 25 +++++++----------
 3 files changed, 44 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 5674b6ac6646..19b4969a25fe 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -108,8 +108,13 @@ struct ip_set_counter {
 	atomic64_t packets;
 };
 
+struct ip_set_comment_rcu {
+	struct rcu_head rcu;
+	char str[0];
+};
+
 struct ip_set_comment {
-	char *str;
+	struct ip_set_comment_rcu __rcu *c;
 };
 
 struct ip_set_skbinfo {
@@ -226,7 +231,7 @@ struct ip_set {
 	/* The name of the set */
 	char name[IPSET_MAXNAMELEN];
 	/* Lock protecting the set data */
-	rwlock_t lock;
+	spinlock_t lock;
 	/* References to the set */
 	u32 ref;
 	/* The core set type */
diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h
index 21217ea008d7..8d0248525957 100644
--- a/include/linux/netfilter/ipset/ip_set_comment.h
+++ b/include/linux/netfilter/ipset/ip_set_comment.h
@@ -16,41 +16,57 @@ ip_set_comment_uget(struct nlattr *tb)
 	return nla_data(tb);
 }
 
+/* Called from uadd only, protected by the set spinlock.
+ * The kadt functions don't use the comment extensions in any way.
+ */
 static inline void
 ip_set_init_comment(struct ip_set_comment *comment,
 		    const struct ip_set_ext *ext)
 {
+	struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
 	size_t len = ext->comment ? strlen(ext->comment) : 0;
 
-	if (unlikely(comment->str)) {
-		kfree(comment->str);
-		comment->str = NULL;
+	if (unlikely(c)) {
+		kfree_rcu(c, rcu);
+		rcu_assign_pointer(comment->c, NULL);
 	}
 	if (!len)
 		return;
 	if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
 		len = IPSET_MAX_COMMENT_SIZE;
-	comment->str = kzalloc(len + 1, GFP_ATOMIC);
-	if (unlikely(!comment->str))
+	c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
+	if (unlikely(!c))
 		return;
-	strlcpy(comment->str, ext->comment, len + 1);
+	strlcpy(c->str, ext->comment, len + 1);
+	rcu_assign_pointer(comment->c, c);
 }
 
+/* Used only when dumping a set, protected by rcu_read_lock_bh() */
 static inline int
 ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment)
 {
-	if (!comment->str)
+	struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c);
+
+	if (!c)
 		return 0;
-	return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str);
+	return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
 }
 
+/* Called from uadd/udel, flush or the garbage collectors protected
+ * by the set spinlock.
+ * Called when the set is destroyed and when there can't be any user
+ * of the set data anymore.
+ */
 static inline void
 ip_set_comment_free(struct ip_set_comment *comment)
 {
-	if (unlikely(!comment->str))
+	struct ip_set_comment_rcu *c;
+
+	c = rcu_dereference_protected(comment->c, 1);
+	if (unlikely(!c))
 		return;
-	kfree(comment->str);
-	comment->str = NULL;
+	kfree_rcu(c, rcu);
+	rcu_assign_pointer(comment->c, NULL);
 }
 
 #endif
diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h
index 3c8842bcedaa..1d6a935c1ac5 100644
--- a/include/linux/netfilter/ipset/ip_set_timeout.h
+++ b/include/linux/netfilter/ipset/ip_set_timeout.h
@@ -40,31 +40,26 @@ ip_set_timeout_uget(struct nlattr *tb)
 }
 
 static inline bool
-ip_set_timeout_test(unsigned long timeout)
+ip_set_timeout_expired(unsigned long *t)
 {
-	return timeout == IPSET_ELEM_PERMANENT ||
-	       time_is_after_jiffies(timeout);
-}
-
-static inline bool
-ip_set_timeout_expired(unsigned long *timeout)
-{
-	return *timeout != IPSET_ELEM_PERMANENT &&
-	       time_is_before_jiffies(*timeout);
+	return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t);
 }
 
 static inline void
-ip_set_timeout_set(unsigned long *timeout, u32 t)
+ip_set_timeout_set(unsigned long *timeout, u32 value)
 {
-	if (!t) {
+	unsigned long t;
+
+	if (!value) {
 		*timeout = IPSET_ELEM_PERMANENT;
 		return;
 	}
 
-	*timeout = msecs_to_jiffies(t * MSEC_PER_SEC) + jiffies;
-	if (*timeout == IPSET_ELEM_PERMANENT)
+	t = msecs_to_jiffies(value * MSEC_PER_SEC) + jiffies;
+	if (t == IPSET_ELEM_PERMANENT)
 		/* Bingo! :-) */
-		(*timeout)--;
+		t--;
+	*timeout = t;
 }
 
 static inline u32
-- 
cgit v1.2.3


From ca0f6a5cd99e0c6ba4bb78dc402817f636370f26 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 13 Jun 2015 19:45:33 +0200
Subject: netfilter: ipset: Fix coding styles reported by checkpatch.pl

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 19b4969a25fe..48bb01edcf30 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -349,12 +349,11 @@ ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
 			      cpu_to_be64((u64)skbinfo->skbmark << 32 |
 					  skbinfo->skbmarkmask))) ||
 	       (skbinfo->skbprio &&
-	        nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+		nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
 			      cpu_to_be32(skbinfo->skbprio))) ||
 	       (skbinfo->skbqueue &&
-	        nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+		nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
 			     cpu_to_be16(skbinfo->skbqueue)));
-
 }
 
 static inline void
-- 
cgit v1.2.3


From c751ad0dd640f4ce9269acd7a54de5ba8092e99e Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Fri, 12 Jun 2015 15:48:06 -0700
Subject: regulator: Add docbook for soft start

The docbook for these members is missing. Add them.

Warning(include/linux/regulator/machine.h:147): No description
found for parameter 'soft_start'
Warning(include/linux/regulator/driver.h:197): No description
found for parameter 'set_soft_start'

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/driver.h  | 1 +
 include/linux/regulator/machine.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index e0635d0894aa..9398d31f9531 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -111,6 +111,7 @@ struct regulator_linear_range {
  *               to stabilise after being set to a new value, in microseconds.
  *               The function provides the from and to voltage selector, the
  *               function should return the worst case.
+ * @set_soft_start: Enable soft start for the regulator.
  *
  * @set_suspend_voltage: Set the voltage for the regulator when the system
  *                       is suspended.
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 7f7d0a3fe1e1..1258275d3751 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -87,6 +87,7 @@ struct regulator_state {
  *           applied.
  * @apply_uV: Apply the voltage constraint when initialising.
  * @ramp_disable: Disable ramp delay when initialising or when setting voltage.
+ * @soft_start: Enable soft start so that voltage ramps slowly.
  * @pull_down: Enable pull down when regulator is disabled.
  *
  * @input_uV: Input voltage for regulator when supplied by another regulator.
-- 
cgit v1.2.3


From d45337328b1fb86a8f045f6c9938e9e08e6d7134 Mon Sep 17 00:00:00 2001
From: Vincent Wan <vincent.wan@amd.com>
Date: Thu, 11 Jun 2015 20:11:45 +0800
Subject: pci_ids: Add AMD KERNCZ device ID support

The KERNCZ is new AMD SB/FCH generation name, like HUDSON2.
0x790b is the device ID for this generation.

Signed-off-by: Wan ZongShun <Vincent.Wan@amd.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 2f7b9a40f627..cb63a7b522ef 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -579,6 +579,7 @@
 #define PCI_DEVICE_ID_AMD_HUDSON2_SATA_IDE	0x7800
 #define PCI_DEVICE_ID_AMD_HUDSON2_SMBUS		0x780b
 #define PCI_DEVICE_ID_AMD_HUDSON2_IDE		0x780c
+#define PCI_DEVICE_ID_AMD_KERNCZ_SMBUS  0x790b
 
 #define PCI_VENDOR_ID_TRIDENT		0x1023
 #define PCI_DEVICE_ID_TRIDENT_4DWAVE_DX	0x2000
-- 
cgit v1.2.3


From 32be6d3e362b896c81aae7c635d44e5a91406ce2 Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Fri, 12 Jun 2015 10:58:46 -0400
Subject: crypto: nx - move include/linux/nx842.h into
 drivers/crypto/nx/nx-842.h

Move the contents of the include/linux/nx842.h header file into the
drivers/crypto/nx/nx-842.h header file.  Remove the nx842.h header
file and its entry in the MAINTAINERS file.

The include/linux/nx842.h header originally was there because the
crypto/842.c driver needed it to communicate with the nx-842 hw
driver.  However, that crypto compression driver was moved into
the drivers/crypto/nx/ directory, and now can directly include the
nx-842.h header.  Nothing else needs the public include/linux/nx842.h
header file, as all use of the nx-842 hardware driver will be through
the "842-nx" crypto compression driver, since the direct nx-842 api is
very limited in the buffer alignments and sizes that it will accept,
and the crypto compression interface handles those limitations and
allows any alignment and size buffers.

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/nx842.h | 24 ------------------------
 1 file changed, 24 deletions(-)
 delete mode 100644 include/linux/nx842.h

(limited to 'include/linux')

diff --git a/include/linux/nx842.h b/include/linux/nx842.h
deleted file mode 100644
index 4ddf68d9c0d4..000000000000
--- a/include/linux/nx842.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef __NX842_H__
-#define __NX842_H__
-
-#define __NX842_PSERIES_MEM_COMPRESS	(10240)
-#define __NX842_POWERNV_MEM_COMPRESS	(1024)
-
-#define NX842_MEM_COMPRESS	(max_t(unsigned int,			\
-	__NX842_PSERIES_MEM_COMPRESS, __NX842_POWERNV_MEM_COMPRESS))
-
-struct nx842_constraints {
-	int alignment;
-	int multiple;
-	int minimum;
-	int maximum;
-};
-
-int nx842_constraints(struct nx842_constraints *constraints);
-
-int nx842_compress(const unsigned char *in, unsigned int in_len,
-		   unsigned char *out, unsigned int *out_len, void *wrkmem);
-int nx842_decompress(const unsigned char *in, unsigned int in_len,
-		     unsigned char *out, unsigned int *out_len, void *wrkmem);
-
-#endif
-- 
cgit v1.2.3


From e7b707f96820161820a864d2ee81740a14da6b93 Mon Sep 17 00:00:00 2001
From: Gwendal Grignou <gwendal@chromium.org>
Date: Wed, 20 May 2015 11:31:27 +0200
Subject: mfd: cros_ec: Remove parent field

Parent and device were pointing to the same device structure.

Parent is unused, removed.

Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
Tested-by: Stephen Barber <smbarber@chromium.org>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Tested-by: Gwendal Grignou <gwendal@chromium.org>
Reviewed-by: Puthikorn Voravootivat <puthik@chromium.org>
Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 324a34683971..14cf522123dd 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -85,7 +85,6 @@ struct cros_ec_command {
  * to using dword.
  * @din_size: size of din buffer to allocate (zero to use static din)
  * @dout_size: size of dout buffer to allocate (zero to use static dout)
- * @parent: pointer to parent device (e.g. i2c or spi device)
  * @wake_enabled: true if this device can wake the system from sleep
  * @cmd_xfer: send command to EC and get response
  *     Returns the number of bytes received if the communication succeeded, but
@@ -113,7 +112,6 @@ struct cros_ec_device {
 	uint8_t *dout;
 	int din_size;
 	int dout_size;
-	struct device *parent;
 	bool wake_enabled;
 	int (*cmd_xfer)(struct cros_ec_device *ec,
 			struct cros_ec_command *msg);
-- 
cgit v1.2.3


From a841178445bb72a3d566b4e6ab9d19e9b002eb47 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Date: Tue, 9 Jun 2015 13:04:42 +0200
Subject: mfd: cros_ec: Use a zero-length array for command data

Commit 1b84f2a4cd4a ("mfd: cros_ec: Use fixed size arrays to transfer
data with the EC") modified the struct cros_ec_command fields to not
use pointers for the input and output buffers and use fixed length
arrays instead.

This change was made because the cros_ec ioctl API uses that struct
cros_ec_command to allow user-space to send commands to the EC and
to get data from the EC. So using pointers made the API not 64-bit
safe. Unfortunately this approach was not flexible enough for all
the use-cases since there may be a need to send larger commands
on newer versions of the EC command protocol.

So to avoid to choose a constant length that it may be too big for
most commands and thus wasting memory and CPU cycles on copy from
and to user-space or having a size that is too small for some big
commands, use a zero-length array that is both 64-bit safe and
flexible. The same buffer is used for both output and input data
so the maximum of these values should be used to allocate it.

Suggested-by: Gwendal Grignou <gwendal@chromium.org>
Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 14cf522123dd..7eee38abd02a 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -42,8 +42,7 @@ enum {
  * @outsize: Outgoing length in bytes
  * @insize: Max number of bytes to accept from EC
  * @result: EC's response to the command (separate from communication failure)
- * @outdata: Outgoing data to EC
- * @indata: Where to put the incoming data from EC
+ * @data: Where to put the incoming data from EC and outgoing data to EC
  */
 struct cros_ec_command {
 	uint32_t version;
@@ -51,8 +50,7 @@ struct cros_ec_command {
 	uint32_t outsize;
 	uint32_t insize;
 	uint32_t result;
-	uint8_t outdata[EC_PROTO2_MAX_PARAM_SIZE];
-	uint8_t indata[EC_PROTO2_MAX_PARAM_SIZE];
+	uint8_t data[0];
 };
 
 /**
-- 
cgit v1.2.3


From 256ab950bdaa8797b7bac8fc11a567030d486304 Mon Sep 17 00:00:00 2001
From: Stephen Barber <smbarber@chromium.org>
Date: Tue, 9 Jun 2015 13:04:43 +0200
Subject: mfd: cros_ec: rev cros_ec_commands.h

Update cros_ec_commands.h to the latest version in the EC
firmware sources and add power domain and passthru commands.

Also, update lightbar to use new command names.

Signed-off-by: Stephen Barber <smbarber@chromium.org>
Reviewed-by: Randall Spangler <rspangler@chromium.org>
Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Gwendal Grignou <gwendal@chromium.org>
Tested-by: Gwendal Grignou <gwendal@chromium.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec_commands.h | 277 ++++++++++++++++++++++++++++++++---
 1 file changed, 255 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index a49cd41feea7..13b630c10d4c 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -515,7 +515,7 @@ struct ec_host_response {
 /*
  * Notes on commands:
  *
- * Each command is an 8-byte command value.  Commands which take params or
+ * Each command is an 16-bit command value.  Commands which take params or
  * return response data specify structs for that data.  If no struct is
  * specified, the command does not input or output data, respectively.
  * Parameter/response length is implicit in the structs.  Some underlying
@@ -966,7 +966,7 @@ struct rgb_s {
 /* List of tweakable parameters. NOTE: It's __packed so it can be sent in a
  * host command, but the alignment is the same regardless. Keep it that way.
  */
-struct lightbar_params {
+struct lightbar_params_v0 {
 	/* Timing */
 	int32_t google_ramp_up;
 	int32_t google_ramp_down;
@@ -1000,32 +1000,81 @@ struct lightbar_params {
 	struct rgb_s color[8];			/* 0-3 are Google colors */
 } __packed;
 
+struct lightbar_params_v1 {
+	/* Timing */
+	int32_t google_ramp_up;
+	int32_t google_ramp_down;
+	int32_t s3s0_ramp_up;
+	int32_t s0_tick_delay[2];		/* AC=0/1 */
+	int32_t s0a_tick_delay[2];		/* AC=0/1 */
+	int32_t s0s3_ramp_down;
+	int32_t s3_sleep_for;
+	int32_t s3_ramp_up;
+	int32_t s3_ramp_down;
+	int32_t tap_tick_delay;
+	int32_t tap_display_time;
+
+	/* Tap-for-battery params */
+	uint8_t tap_pct_red;
+	uint8_t tap_pct_green;
+	uint8_t tap_seg_min_on;
+	uint8_t tap_seg_max_on;
+	uint8_t tap_seg_osc;
+	uint8_t tap_idx[3];
+
+	/* Oscillation */
+	uint8_t osc_min[2];			/* AC=0/1 */
+	uint8_t osc_max[2];			/* AC=0/1 */
+	uint8_t w_ofs[2];			/* AC=0/1 */
+
+	/* Brightness limits based on the backlight and AC. */
+	uint8_t bright_bl_off_fixed[2];		/* AC=0/1 */
+	uint8_t bright_bl_on_min[2];		/* AC=0/1 */
+	uint8_t bright_bl_on_max[2];		/* AC=0/1 */
+
+	/* Battery level thresholds */
+	uint8_t battery_threshold[LB_BATTERY_LEVELS - 1];
+
+	/* Map [AC][battery_level] to color index */
+	uint8_t s0_idx[2][LB_BATTERY_LEVELS];	/* AP is running */
+	uint8_t s3_idx[2][LB_BATTERY_LEVELS];	/* AP is sleeping */
+
+	/* Color palette */
+	struct rgb_s color[8];			/* 0-3 are Google colors */
+} __packed;
+
 struct ec_params_lightbar {
 	uint8_t cmd;		      /* Command (see enum lightbar_command) */
 	union {
 		struct {
 			/* no args */
-		} dump, off, on, init, get_seq, get_params, version;
+		} dump, off, on, init, get_seq, get_params_v0, get_params_v1,
+			version, get_brightness, get_demo;
 
-		struct num {
+		struct {
 			uint8_t num;
-		} brightness, seq, demo;
+		} set_brightness, seq, demo;
 
-		struct reg {
+		struct {
 			uint8_t ctrl, reg, value;
 		} reg;
 
-		struct rgb {
+		struct {
 			uint8_t led, red, green, blue;
-		} rgb;
+		} set_rgb;
+
+		struct {
+			uint8_t led;
+		} get_rgb;
 
-		struct lightbar_params set_params;
+		struct lightbar_params_v0 set_params_v0;
+		struct lightbar_params_v1 set_params_v1;
 	};
 } __packed;
 
 struct ec_response_lightbar {
 	union {
-		struct dump {
+		struct {
 			struct {
 				uint8_t reg;
 				uint8_t ic0;
@@ -1033,20 +1082,26 @@ struct ec_response_lightbar {
 			} vals[23];
 		} dump;
 
-		struct get_seq {
+		struct  {
 			uint8_t num;
-		} get_seq;
+		} get_seq, get_brightness, get_demo;
 
-		struct lightbar_params get_params;
+		struct lightbar_params_v0 get_params_v0;
+		struct lightbar_params_v1 get_params_v1;
 
-		struct version {
+		struct {
 			uint32_t num;
 			uint32_t flags;
 		} version;
 
+		struct {
+			uint8_t red, green, blue;
+		} get_rgb;
+
 		struct {
 			/* no return params */
-		} off, on, init, brightness, seq, reg, rgb, demo, set_params;
+		} off, on, init, set_brightness, seq, reg, set_rgb,
+			demo, set_params_v0, set_params_v1;
 	};
 } __packed;
 
@@ -1056,15 +1111,20 @@ enum lightbar_command {
 	LIGHTBAR_CMD_OFF = 1,
 	LIGHTBAR_CMD_ON = 2,
 	LIGHTBAR_CMD_INIT = 3,
-	LIGHTBAR_CMD_BRIGHTNESS = 4,
+	LIGHTBAR_CMD_SET_BRIGHTNESS = 4,
 	LIGHTBAR_CMD_SEQ = 5,
 	LIGHTBAR_CMD_REG = 6,
-	LIGHTBAR_CMD_RGB = 7,
+	LIGHTBAR_CMD_SET_RGB = 7,
 	LIGHTBAR_CMD_GET_SEQ = 8,
 	LIGHTBAR_CMD_DEMO = 9,
-	LIGHTBAR_CMD_GET_PARAMS = 10,
-	LIGHTBAR_CMD_SET_PARAMS = 11,
+	LIGHTBAR_CMD_GET_PARAMS_V0 = 10,
+	LIGHTBAR_CMD_SET_PARAMS_V0 = 11,
 	LIGHTBAR_CMD_VERSION = 12,
+	LIGHTBAR_CMD_GET_BRIGHTNESS = 13,
+	LIGHTBAR_CMD_GET_RGB = 14,
+	LIGHTBAR_CMD_GET_DEMO = 15,
+	LIGHTBAR_CMD_GET_PARAMS_V1 = 16,
+	LIGHTBAR_CMD_SET_PARAMS_V1 = 17,
 	LIGHTBAR_NUM_CMDS
 };
 
@@ -1421,8 +1481,40 @@ struct ec_response_rtc {
 /*****************************************************************************/
 /* Port80 log access */
 
+/* Maximum entries that can be read/written in a single command */
+#define EC_PORT80_SIZE_MAX 32
+
 /* Get last port80 code from previous boot */
 #define EC_CMD_PORT80_LAST_BOOT 0x48
+#define EC_CMD_PORT80_READ 0x48
+
+enum ec_port80_subcmd {
+	EC_PORT80_GET_INFO = 0,
+	EC_PORT80_READ_BUFFER,
+};
+
+struct ec_params_port80_read {
+	uint16_t subcmd;
+	union {
+		struct {
+			uint32_t offset;
+			uint32_t num_entries;
+		} read_buffer;
+	};
+} __packed;
+
+struct ec_response_port80_read {
+	union {
+		struct {
+			uint32_t writes;
+			uint32_t history_size;
+			uint32_t last_boot;
+		} get_info;
+		struct {
+			uint16_t codes[EC_PORT80_SIZE_MAX];
+		} data;
+	};
+} __packed;
 
 struct ec_response_port80_last_boot {
 	uint16_t code;
@@ -1782,6 +1874,7 @@ struct ec_params_gpio_set {
 /* Get GPIO value */
 #define EC_CMD_GPIO_GET 0x93
 
+/* Version 0 of input params and response */
 struct ec_params_gpio_get {
 	char name[32];
 } __packed;
@@ -1789,6 +1882,38 @@ struct ec_response_gpio_get {
 	uint8_t val;
 } __packed;
 
+/* Version 1 of input params and response */
+struct ec_params_gpio_get_v1 {
+	uint8_t subcmd;
+	union {
+		struct {
+			char name[32];
+		} get_value_by_name;
+		struct {
+			uint8_t index;
+		} get_info;
+	};
+} __packed;
+
+struct ec_response_gpio_get_v1 {
+	union {
+		struct {
+			uint8_t val;
+		} get_value_by_name, get_count;
+		struct {
+			uint8_t val;
+			char name[32];
+			uint32_t flags;
+		} get_info;
+	};
+} __packed;
+
+enum gpio_get_subcmd {
+	EC_GPIO_GET_BY_NAME = 0,
+	EC_GPIO_GET_COUNT = 1,
+	EC_GPIO_GET_INFO = 2,
+};
+
 /*****************************************************************************/
 /* I2C commands. Only available when flash write protect is unlocked. */
 
@@ -1857,13 +1982,21 @@ struct ec_params_charge_control {
 /*****************************************************************************/
 
 /*
- * Cut off battery power output if the battery supports.
+ * Cut off battery power immediately or after the host has shut down.
  *
- * For unsupported battery, just don't implement this command and lets EC
- * return EC_RES_INVALID_COMMAND.
+ * return EC_RES_INVALID_COMMAND if unsupported by a board/battery.
+ *	  EC_RES_SUCCESS if the command was successful.
+ *	  EC_RES_ERROR if the cut off command failed.
  */
+
 #define EC_CMD_BATTERY_CUT_OFF 0x99
 
+#define EC_BATTERY_CUTOFF_FLAG_AT_SHUTDOWN	(1 << 0)
+
+struct ec_params_battery_cutoff {
+	uint8_t flags;
+} __packed;
+
 /*****************************************************************************/
 /* USB port mux control. */
 
@@ -2141,6 +2274,32 @@ struct ec_params_sb_wr_block {
 	uint16_t data[32];
 } __packed;
 
+/*****************************************************************************/
+/* Battery vendor parameters
+ *
+ * Get or set vendor-specific parameters in the battery. Implementations may
+ * differ between boards or batteries. On a set operation, the response
+ * contains the actual value set, which may be rounded or clipped from the
+ * requested value.
+ */
+
+#define EC_CMD_BATTERY_VENDOR_PARAM 0xb4
+
+enum ec_battery_vendor_param_mode {
+	BATTERY_VENDOR_PARAM_MODE_GET = 0,
+	BATTERY_VENDOR_PARAM_MODE_SET,
+};
+
+struct ec_params_battery_vendor_param {
+	uint32_t param;
+	uint32_t value;
+	uint8_t mode;
+} __packed;
+
+struct ec_response_battery_vendor_param {
+	uint32_t value;
+} __packed;
+
 /*****************************************************************************/
 /* System commands */
 
@@ -2336,6 +2495,80 @@ struct ec_params_reboot_ec {
 
 #endif  /* !__ACPI__ */
 
+/*****************************************************************************/
+/*
+ * PD commands
+ *
+ * These commands are for PD MCU communication.
+ */
+
+/* EC to PD MCU exchange status command */
+#define EC_CMD_PD_EXCHANGE_STATUS 0x100
+
+/* Status of EC being sent to PD */
+struct ec_params_pd_status {
+	int8_t batt_soc; /* battery state of charge */
+} __packed;
+
+/* Status of PD being sent back to EC */
+struct ec_response_pd_status {
+	int8_t status;        /* PD MCU status */
+	uint32_t curr_lim_ma; /* input current limit */
+} __packed;
+
+/* Set USB type-C port role and muxes */
+#define EC_CMD_USB_PD_CONTROL 0x101
+
+enum usb_pd_control_role {
+	USB_PD_CTRL_ROLE_NO_CHANGE = 0,
+	USB_PD_CTRL_ROLE_TOGGLE_ON = 1, /* == AUTO */
+	USB_PD_CTRL_ROLE_TOGGLE_OFF = 2,
+	USB_PD_CTRL_ROLE_FORCE_SINK = 3,
+	USB_PD_CTRL_ROLE_FORCE_SOURCE = 4,
+};
+
+enum usb_pd_control_mux {
+	USB_PD_CTRL_MUX_NO_CHANGE = 0,
+	USB_PD_CTRL_MUX_NONE = 1,
+	USB_PD_CTRL_MUX_USB = 2,
+	USB_PD_CTRL_MUX_DP = 3,
+	USB_PD_CTRL_MUX_DOCK = 4,
+	USB_PD_CTRL_MUX_AUTO = 5,
+};
+
+struct ec_params_usb_pd_control {
+	uint8_t port;
+	uint8_t role;
+	uint8_t mux;
+} __packed;
+
+/*****************************************************************************/
+/*
+ * Passthru commands
+ *
+ * Some platforms have sub-processors chained to each other.  For example.
+ *
+ *     AP <--> EC <--> PD MCU
+ *
+ * The top 2 bits of the command number are used to indicate which device the
+ * command is intended for.  Device 0 is always the device receiving the
+ * command; other device mapping is board-specific.
+ *
+ * When a device receives a command to be passed to a sub-processor, it passes
+ * it on with the device number set back to 0.  This allows the sub-processor
+ * to remain blissfully unaware of whether the command originated on the next
+ * device up the chain, or was passed through from the AP.
+ *
+ * In the above example, if the AP wants to send command 0x0002 to the PD MCU,
+ *     AP sends command 0x4002 to the EC
+ *     EC sends command 0x0002 to the PD MCU
+ *     EC forwards PD MCU response back to the AP
+ */
+
+/* Offset and max command number for sub-device n */
+#define EC_CMD_PASSTHRU_OFFSET(n) (0x4000 * (n))
+#define EC_CMD_PASSTHRU_MAX(n) (EC_CMD_PASSTHRU_OFFSET(n) + 0x3fff)
+
 /*****************************************************************************/
 /*
  * Deprecated constants. These constants have been renamed for clarity. The
-- 
cgit v1.2.3


From 2c7589af3c4dee844e6a4174f2aa8996cf837604 Mon Sep 17 00:00:00 2001
From: Stephen Barber <smbarber@chromium.org>
Date: Tue, 9 Jun 2015 13:04:45 +0200
Subject: mfd: cros_ec: add proto v3 skeleton

Add support in cros_ec.c to handle EC host command protocol v3.
For v3+, probe for maximum shared protocol version and max
request, response, and passthrough sizes. For now, this will
always fall back to v2, since there is no bus-specific code
for handling proto v3 packets.

Signed-off-by: Stephen Barber <smbarber@chromium.org>
Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Reviewed-by: Gwendal Grignou <gwendal@chromium.org>
Tested-by: Gwendal Grignou <gwendal@chromium.org>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec.h | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 7eee38abd02a..59d909434efd 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -21,6 +21,15 @@
 #include <linux/mfd/cros_ec_commands.h>
 #include <linux/mutex.h>
 
+/*
+ * Max bus-specific overhead incurred by request/responses.
+ * I2C requires 1 additional byte for requests.
+ * I2C requires 2 additional bytes for responses.
+ * */
+#define EC_PROTO_VERSION_UNKNOWN	0
+#define EC_MAX_REQUEST_OVERHEAD		1
+#define EC_MAX_RESPONSE_OVERHEAD	2
+
 /*
  * Command interface between EC and AP, for LPC, I2C and SPI interfaces.
  */
@@ -88,6 +97,7 @@ struct cros_ec_command {
  *     Returns the number of bytes received if the communication succeeded, but
  *     that doesn't mean the EC was happy with the command. The caller
  *     should check msg.result for the EC's result code.
+ * @pkt_xfer: send packet to EC and get response
  * @lock: one transaction at a time
  */
 struct cros_ec_device {
@@ -104,15 +114,21 @@ struct cros_ec_device {
 			   unsigned int bytes, void *dest);
 
 	/* These are used to implement the platform-specific interface */
+	u16 max_request;
+	u16 max_response;
+	u16 max_passthru;
+	u16 proto_version;
 	void *priv;
 	int irq;
-	uint8_t *din;
-	uint8_t *dout;
+	u8 *din;
+	u8 *dout;
 	int din_size;
 	int dout_size;
 	bool wake_enabled;
 	int (*cmd_xfer)(struct cros_ec_device *ec,
 			struct cros_ec_command *msg);
+	int (*pkt_xfer)(struct cros_ec_device *ec,
+			struct cros_ec_command *msg);
 	struct mutex lock;
 };
 
@@ -194,4 +210,12 @@ int cros_ec_remove(struct cros_ec_device *ec_dev);
  */
 int cros_ec_register(struct cros_ec_device *ec_dev);
 
+/**
+ * cros_ec_register -  Query the protocol version supported by the ChromeOS EC
+ *
+ * @ec_dev: Device to register
+ * @return 0 if ok, -ve on error
+ */
+int cros_ec_query_all(struct cros_ec_device *ec_dev);
+
 #endif /* __LINUX_MFD_CROS_EC_H */
-- 
cgit v1.2.3


From d365407079d33106f76bd486a863de05eb5ae95d Mon Sep 17 00:00:00 2001
From: Stephen Barber <smbarber@chromium.org>
Date: Tue, 9 Jun 2015 13:04:46 +0200
Subject: mfd: cros_ec: add bus-specific proto v3 code

Add proto v3 support to the SPI, I2C, and LPC.

Signed-off-by: Stephen Barber <smbarber@chromium.org>
Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Gwendal Grignou <gwendal@chromium.org>
Tested-by: Gwendal Grignou <gwendal@chromium.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 59d909434efd..92e13aaa450c 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -21,6 +21,12 @@
 #include <linux/mfd/cros_ec_commands.h>
 #include <linux/mutex.h>
 
+/*
+ * The EC is unresponsive for a time after a reboot command.  Add a
+ * simple delay to make sure that the bus stays locked.
+ */
+#define EC_REBOOT_DELAY_MS             50
+
 /*
  * Max bus-specific overhead incurred by request/responses.
  * I2C requires 1 additional byte for requests.
-- 
cgit v1.2.3


From 57b33ff077beebb68481a2b6b8e5fe58ca998169 Mon Sep 17 00:00:00 2001
From: Gwendal Grignou <gwendal@chromium.org>
Date: Tue, 9 Jun 2015 13:04:47 +0200
Subject: mfd: cros_ec: Support multiple EC in a system

Chromebooks can have more than one Embedded Controller so the
cros_ec device id has to be incremented for each EC registered.

Add a new structure to represent multiple EC as different char
devices (e.g: /dev/cros_ec, /dev/cros_pd). It connects to
cros_ec_device and allows sysfs inferface for cros_pd.

Also reduce number of allocated objects, make chromeos sysfs
class object a static and add refcounting to prevent object
deletion while command is in progress.

Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
Reviewed-by: Dmitry Torokhov <dtor@chromium.org>
Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec.h | 44 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 38 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 92e13aaa450c..da72671a42fa 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -17,10 +17,14 @@
 #define __LINUX_MFD_CROS_EC_H
 
 #include <linux/cdev.h>
+#include <linux/device.h>
 #include <linux/notifier.h>
 #include <linux/mfd/cros_ec_commands.h>
 #include <linux/mutex.h>
 
+#define CROS_EC_DEV_NAME "cros_ec"
+#define CROS_EC_DEV_PD_NAME "cros_pd"
+
 /*
  * The EC is unresponsive for a time after a reboot command.  Add a
  * simple delay to make sure that the bus stays locked.
@@ -71,11 +75,8 @@ struct cros_ec_command {
 /**
  * struct cros_ec_device - Information about a ChromeOS EC device
  *
- * @ec_name: name of EC device (e.g. 'chromeos-ec')
  * @phys_name: name of physical comms layer (e.g. 'i2c-4')
  * @dev: Device pointer for physical comms device
- * @vdev: Device pointer for virtual comms device
- * @cdev: Character device structure for virtual comms device
  * @was_wake_device: true if this device was set to wake the system from
  * sleep at the last suspend
  * @cmd_readmem: direct read of the EC memory-mapped region, if supported
@@ -87,6 +88,7 @@ struct cros_ec_command {
  *
  * @priv: Private data
  * @irq: Interrupt to use
+ * @id: Device id
  * @din: input buffer (for data from EC)
  * @dout: output buffer (for data to EC)
  * \note
@@ -109,11 +111,8 @@ struct cros_ec_command {
 struct cros_ec_device {
 
 	/* These are used by other drivers that want to talk to the EC */
-	const char *ec_name;
 	const char *phys_name;
 	struct device *dev;
-	struct device *vdev;
-	struct cdev cdev;
 	bool was_wake_device;
 	struct class *cros_class;
 	int (*cmd_readmem)(struct cros_ec_device *ec, unsigned int offset,
@@ -138,6 +137,35 @@ struct cros_ec_device {
 	struct mutex lock;
 };
 
+/* struct cros_ec_platform - ChromeOS EC platform information
+ *
+ * @ec_name: name of EC device (e.g. 'cros-ec', 'cros-pd', ...)
+ * used in /dev/ and sysfs.
+ * @cmd_offset: offset to apply for each command. Set when
+ * registering a devicde behind another one.
+ */
+struct cros_ec_platform {
+	const char *ec_name;
+	u16 cmd_offset;
+};
+
+/*
+ * struct cros_ec_dev - ChromeOS EC device entry point
+ *
+ * @class_dev: Device structure used in sysfs
+ * @cdev: Character device structure in /dev
+ * @ec_dev: cros_ec_device structure to talk to the physical device
+ * @dev: pointer to the platform device
+ * @cmd_offset: offset to apply for each command.
+ */
+struct cros_ec_dev {
+	struct device class_dev;
+	struct cdev cdev;
+	struct cros_ec_device *ec_dev;
+	struct device *dev;
+	u16 cmd_offset;
+};
+
 /**
  * cros_ec_suspend - Handle a suspend operation for the ChromeOS EC device
  *
@@ -224,4 +252,8 @@ int cros_ec_register(struct cros_ec_device *ec_dev);
  */
 int cros_ec_query_all(struct cros_ec_device *ec_dev);
 
+/* sysfs stuff */
+extern struct attribute_group cros_ec_attr_group;
+extern struct attribute_group cros_ec_lightbar_attr_group;
+
 #endif /* __LINUX_MFD_CROS_EC_H */
-- 
cgit v1.2.3


From d0562674838c08ff142c0e9a8e12634e133c4361 Mon Sep 17 00:00:00 2001
From: "Suthikulpanit, Suravee" <Suravee.Suthikulpanit@amd.com>
Date: Wed, 10 Jun 2015 11:08:52 -0500
Subject: ACPI / scan: Parse _CCA and setup device coherency

This patch implements support for ACPI _CCA object, which is introduced in
ACPIv5.1, can be used for specifying device DMA coherency attribute.

The parsing logic traverses device namespace to parse coherency
information, and stores it in acpi_device_flags. Then uses it to call
arch_setup_dma_ops() when creating each device enumerated in DSDT
during ACPI scan.

This patch also introduces acpi_dma_is_coherent(), which provides
an interface for device drivers to check the coherency information
similarly to the of_dma_is_coherent().

Signed-off-by: Mark Salter <msalter@redhat.com>
Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index e4da5e35e29c..d46a48c21c67 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -569,6 +569,11 @@ static inline int acpi_device_modalias(struct device *dev,
 	return -ENODEV;
 }
 
+static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent)
+{
+	return false;
+}
+
 #define ACPI_PTR(_ptr)	(NULL)
 
 #endif	/* !CONFIG_ACPI */
-- 
cgit v1.2.3


From 05ca556003b1d6b4df0b8831e4c07fad7f5bdd2c Mon Sep 17 00:00:00 2001
From: "Suthikulpanit, Suravee" <Suravee.Suthikulpanit@amd.com>
Date: Wed, 10 Jun 2015 11:08:54 -0500
Subject: device property: Introduces device_dma_is_coherent()

Currently, device drivers, which support both OF and ACPI,
need to call two separate APIs, of_dma_is_coherent() and
acpi_dma_is_coherent()) to determine device coherency attribute.

This patch simplifies this process by introducing a new device
property API, device_dma_is_coherent(), which calls the appropriate
interface based on the booting architecture.

Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/property.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/property.h b/include/linux/property.h
index de8bdf417a35..76ebde9c11d4 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -164,4 +164,6 @@ struct property_set {
 
 void device_add_property_set(struct device *dev, struct property_set *pset);
 
+bool device_dma_is_coherent(struct device *dev);
+
 #endif /* _LINUX_PROPERTY_H_ */
-- 
cgit v1.2.3


From 711bdde6a884354ddae8da2fcb495b2a9364cc90 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Jun 2015 09:57:30 -0700
Subject: netfilter: x_tables: remove XT_TABLE_INFO_SZ and a dereference.

After Florian patches, there is no need for XT_TABLE_INFO_SZ anymore :
Only one copy of table is kept, instead of one copy per cpu.

We also can avoid a dereference if we put table data right after
xt_table_info. It reduces register pressure and helps compiler.

Then, we attempt a kmalloc() if total size is under order-3 allocation,
to reduce TLB pressure, as in many cases, rules fit in 32 KB.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 9969d79dcde1..95693c4cebdd 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -225,12 +225,9 @@ struct xt_table_info {
 	unsigned int __percpu *stackptr;
 	void ***jumpstack;
 
-	/* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
-	void *entries;
+	unsigned char entries[0] __aligned(8);
 };
 
-#define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \
-			  + nr_cpu_ids * sizeof(char *))
 int xt_register_target(struct xt_target *target);
 void xt_unregister_target(struct xt_target *target);
 int xt_register_targets(struct xt_target *target, unsigned int n);
-- 
cgit v1.2.3


From 6f6a6fda294506dfe0e3e0a253bb2d2923f28f0a Mon Sep 17 00:00:00 2001
From: Joseph Qi <joseph.qi@huawei.com>
Date: Mon, 15 Jun 2015 14:36:01 -0400
Subject: jbd2: fix ocfs2 corrupt when updating journal superblock fails

If updating journal superblock fails after journal data has been
flushed, the error is omitted and this will mislead the caller as a
normal case.  In ocfs2, the checkpoint will be treated successfully
and the other node can get the lock to update. Since the sb_start is
still pointing to the old log block, it will rewrite the journal data
during journal recovery by the other node. Thus the new updates will
be overwritten and ocfs2 corrupts.  So in above case we have to return
the error, and ocfs2_commit_cache will take care of the error and
prevent the other node to do update first.  And only after recovering
journal it can do the new updates.

The issue discussion mail can be found at:
https://oss.oracle.com/pipermail/ocfs2-devel/2015-June/010856.html
http://comments.gmane.org/gmane.comp.file-systems.ext4/48841

[ Fixed bug in patch which allowed a non-negative error return from
  jbd2_cleanup_journal_tail() to leak out of jbd2_fjournal_flush(); this
  was causing xfstests ext4/306 to fail. -- Ted ]

Reported-by: Yiwen Jiang <jiangyiwen@huawei.com>
Signed-off-by: Joseph Qi <joseph.qi@huawei.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Tested-by: Yiwen Jiang <jiangyiwen@huawei.com>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: stable@vger.kernel.org
---
 include/linux/jbd2.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 20e7f78041c8..edb640ae9a94 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1035,7 +1035,7 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal);
 int jbd2_journal_next_log_block(journal_t *, unsigned long long *);
 int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
 			      unsigned long *block);
-void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
+int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
 void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
 
 /* Commit management */
@@ -1157,7 +1157,7 @@ extern int	   jbd2_journal_recover    (journal_t *journal);
 extern int	   jbd2_journal_wipe       (journal_t *, int);
 extern int	   jbd2_journal_skip_recovery	(journal_t *);
 extern void	   jbd2_journal_update_sb_errno(journal_t *);
-extern void	   jbd2_journal_update_sb_log_tail	(journal_t *, tid_t,
+extern int	   jbd2_journal_update_sb_log_tail	(journal_t *, tid_t,
 				unsigned long, int);
 extern void	   __jbd2_journal_abort_hard	(journal_t *);
 extern void	   jbd2_journal_abort      (journal_t *, int);
-- 
cgit v1.2.3


From ffeedafbf0236f03aeb2e8db273b3e5ae5f5bc89 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 12 Jun 2015 19:39:12 -0700
Subject: bpf: introduce current->pid, tgid, uid, gid, comm accessors

eBPF programs attached to kprobes need to filter based on
current->pid, uid and other fields, so introduce helper functions:

u64 bpf_get_current_pid_tgid(void)
Return: current->tgid << 32 | current->pid

u64 bpf_get_current_uid_gid(void)
Return: current_gid << 32 | current_uid

bpf_get_current_comm(char *buf, int size_of_buf)
stores current->comm into buf

They can be used from the programs attached to TC as well to classify packets
based on current task fields.

Update tracex2 example to print histogram of write syscalls for each process
instead of aggregated for all.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2235aee8096a..1b9a3f5b27f6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -188,5 +188,8 @@ extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
 extern const struct bpf_func_proto bpf_tail_call_proto;
 extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
+extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
+extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
+extern const struct bpf_func_proto bpf_get_current_comm_proto;
 
 #endif /* _LINUX_BPF_H */
-- 
cgit v1.2.3


From 0756ea3e85139d23a8148ebaa95411c2f0aa4f11 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 12 Jun 2015 19:39:13 -0700
Subject: bpf: allow networking programs to use bpf_trace_printk() for
 debugging

bpf_trace_printk() is a helper function used to debug eBPF programs.
Let socket and TC programs use it as well.
Note, it's DEBUG ONLY helper. If it's used in the program,
the kernel will print warning banner to make sure users don't use
it in production.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1b9a3f5b27f6..4383476a0d48 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -150,6 +150,7 @@ struct bpf_array {
 u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
 void bpf_prog_array_map_clear(struct bpf_map *map);
 bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
+const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
 
 #ifdef CONFIG_BPF_SYSCALL
 void bpf_register_prog_type(struct bpf_prog_type_list *tl);
-- 
cgit v1.2.3


From 9464ca650008615d28d9aaf7de99b4edf61f0109 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 12 Jun 2015 19:44:48 -0700
Subject: net: make u64_stats_init() a function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using a function instead of a macro is cleaner and remove
following W=1 warnings (extract)

In file included from net/ipv6/ip6_vti.c:29:0:
net/ipv6/ip6_vti.c: In function ‘vti6_dev_init_gen’:
include/linux/netdevice.h:2029:18: warning: variable ‘stat’ set but not
used [-Wunused-but-set-variable]
    typeof(type) *stat;   \
                  ^
net/ipv6/ip6_vti.c:862:16: note: in expansion of macro
‘netdev_alloc_pcpu_stats’
  dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
                ^
  CC [M]  net/ipv6/sit.o
In file included from net/ipv6/sit.c:30:0:
net/ipv6/sit.c: In function ‘ipip6_tunnel_init’:
include/linux/netdevice.h:2029:18: warning: variable ‘stat’ set but not
used [-Wunused-but-set-variable]
    typeof(type) *stat;   \
                  ^

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/u64_stats_sync.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index 4b4439e75f45..df89c9bcba7d 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -68,11 +68,12 @@ struct u64_stats_sync {
 };
 
 
+static inline void u64_stats_init(struct u64_stats_sync *syncp)
+{
 #if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
-# define u64_stats_init(syncp)	seqcount_init(syncp.seq)
-#else
-# define u64_stats_init(syncp)	do { } while (0)
+	seqcount_init(&syncp->seq);
 #endif
+}
 
 static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
 {
-- 
cgit v1.2.3


From 47d8417f5914012c794684f651213ffae1b91619 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Mon, 15 Jun 2015 17:58:58 +0300
Subject: net/mlx4_core: Add sink counter

Reserve the last valid counter index for "sink" counter, when a
new counter cannot be allocated, the driver will use this counter.

In order to avoid allocating this counter on any other flow, fix the
indices bitmap allocation range, and reserve the sink counter index.

Add macro for the sink counter index and replace all appearences of the
index with the macro.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ad31e476873f..312c50420dde 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -957,6 +957,7 @@ struct mlx4_mad_ifc {
 			((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
 
 #define MLX4_INVALID_SLAVE_ID	0xFF
+#define MLX4_SINK_COUNTER_INDEX(dev)	(dev->caps.max_counters - 1)
 
 void handle_port_mgmt_change_event(struct work_struct *work);
 
-- 
cgit v1.2.3


From 6de5f7f6a1fa2288552d46b3effbb6d5571413e5 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Mon, 15 Jun 2015 17:59:02 +0300
Subject: net/mlx4_core: Allocate default counter per port

Default counter per port will be allocated at the mlx4 core driver load.

Every QP opened by the Ethernet driver will be attached to the port's default
counter.  This is an infrastructure step to collect VF statistics from the PF.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 312c50420dde..4820080ac394 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1348,6 +1348,7 @@ int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port);
 
 int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx);
 void mlx4_counter_free(struct mlx4_dev *dev, u32 idx);
+int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port);
 
 void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry,
 			 int port);
-- 
cgit v1.2.3


From 9616982f3fcc9e6577d7f41009c4ef2df19a71ec Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Mon, 15 Jun 2015 17:59:05 +0300
Subject: net/mlx4_core: Add helper to query counters

This is an infrastructure step for querying VF and PF counters.

This code was in the IB driver, move it to the mlx4 core driver
so it will be accessible for more use cases.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/cmd.h    | 3 +++
 include/linux/mlx4/device.h | 8 ++++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index f62e7cf227c6..5dffc869988b 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -35,6 +35,7 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/if_link.h>
+#include <linux/mlx4/device.h>
 
 enum {
 	/* initialization and general commands */
@@ -300,6 +301,8 @@ static inline int mlx4_cmd_imm(struct mlx4_dev *dev, u64 in_param, u64 *out_para
 struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev);
 void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox);
 
+int mlx4_get_counter_stats(struct mlx4_dev *dev, int counter_index,
+			   struct mlx4_counter *counter_stats, int reset);
 u32 mlx4_comm_get_version(void);
 int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac);
 int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 4820080ac394..efe80c754b2f 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -771,6 +771,14 @@ union mlx4_ext_av {
 	struct mlx4_eth_av	eth;
 };
 
+/* Counters should be saturate once they reach their maximum value */
+#define ASSIGN_32BIT_COUNTER(counter, value) do {	\
+	if ((value) > U32_MAX)				\
+		counter = cpu_to_be32(U32_MAX);		\
+	else						\
+		counter = cpu_to_be32(value);		\
+} while (0)
+
 struct mlx4_counter {
 	u8	reserved1[3];
 	u8	counter_mode;
-- 
cgit v1.2.3


From 3b766cd832328fcb87db3507e7b98cf42f21689d Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Mon, 15 Jun 2015 17:59:07 +0300
Subject: net/core: Add reading VF statistics through the PF netdevice

Add ndo_get_vf_stats where the PF retrieves and fills the VFs traffic
statistics. We encode the VF stats in a nested manner to allow for
future extensions.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_link.h   | 9 +++++++++
 include/linux/netdevice.h | 4 ++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index da4929927f69..ae5d0d22955d 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -5,6 +5,15 @@
 
 
 /* We don't want this structure exposed to user space */
+struct ifla_vf_stats {
+	__u64 rx_packets;
+	__u64 tx_packets;
+	__u64 rx_bytes;
+	__u64 tx_bytes;
+	__u64 broadcast;
+	__u64 multicast;
+};
+
 struct ifla_vf_info {
 	__u32 vf;
 	__u8 mac[32];
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6f5f71ff5169..e20979dfd6a9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1100,6 +1100,10 @@ struct net_device_ops {
 						     struct ifla_vf_info *ivf);
 	int			(*ndo_set_vf_link_state)(struct net_device *dev,
 							 int vf, int link_state);
+	int			(*ndo_get_vf_stats)(struct net_device *dev,
+						    int vf,
+						    struct ifla_vf_stats
+						    *vf_stats);
 	int			(*ndo_set_vf_port)(struct net_device *dev,
 						   int vf,
 						   struct nlattr *port[]);
-- 
cgit v1.2.3


From 62a890557f57e6cbebe9cc6c32aef045405d4fa2 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Mon, 15 Jun 2015 17:59:08 +0300
Subject: net/mlx4_en: Support ndo_get_vf_stats

Implement the ndo to gather VF statistics through the PF.

All counters related to this VF are stored in a per slave
list, run over the slave's list and collect all statistics.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/cmd.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index 5dffc869988b..58391f2e0414 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -36,6 +36,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/if_link.h>
 #include <linux/mlx4/device.h>
+#include <linux/netdevice.h>
 
 enum {
 	/* initialization and general commands */
@@ -303,6 +304,8 @@ void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbo
 
 int mlx4_get_counter_stats(struct mlx4_dev *dev, int counter_index,
 			   struct mlx4_counter *counter_stats, int reset);
+int mlx4_get_vf_stats(struct mlx4_dev *dev, int port, int vf_idx,
+		      struct ifla_vf_stats *vf_stats);
 u32 mlx4_comm_get_version(void);
 int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac);
 int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos);
-- 
cgit v1.2.3


From eb4cb008529ca08e0d8c0fa54e8f739520197a65 Mon Sep 17 00:00:00 2001
From: Craig Gallek <kraig@google.com>
Date: Mon, 15 Jun 2015 11:26:18 -0400
Subject: sock_diag: define destruction multicast groups

These groups will contain socket-destruction events for
AF_INET/AF_INET6, IPPROTO_TCP/IPPROTO_UDP.

Near the end of socket destruction, a check for listeners is
performed.  In the presence of a listener, rather than completely
cleanup the socket, a unit of work will be added to a private
work queue which will first broadcast information about the socket
and then finish the cleanup operation.

Signed-off-by: Craig Gallek <kraig@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sock_diag.h | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 083ac388098e..fddebc617469 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -1,7 +1,10 @@
 #ifndef __SOCK_DIAG_H__
 #define __SOCK_DIAG_H__
 
+#include <linux/netlink.h>
 #include <linux/user_namespace.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
 #include <uapi/linux/sock_diag.h>
 
 struct sk_buff;
@@ -11,6 +14,7 @@ struct sock;
 struct sock_diag_handler {
 	__u8 family;
 	int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
+	int (*get_info)(struct sk_buff *skb, struct sock *sk);
 };
 
 int sock_diag_register(const struct sock_diag_handler *h);
@@ -26,4 +30,42 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr);
 int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
 			     struct sk_buff *skb, int attrtype);
 
+static inline
+enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk)
+{
+	switch (sk->sk_family) {
+	case AF_INET:
+		switch (sk->sk_protocol) {
+		case IPPROTO_TCP:
+			return SKNLGRP_INET_TCP_DESTROY;
+		case IPPROTO_UDP:
+			return SKNLGRP_INET_UDP_DESTROY;
+		default:
+			return SKNLGRP_NONE;
+		}
+	case AF_INET6:
+		switch (sk->sk_protocol) {
+		case IPPROTO_TCP:
+			return SKNLGRP_INET6_TCP_DESTROY;
+		case IPPROTO_UDP:
+			return SKNLGRP_INET6_UDP_DESTROY;
+		default:
+			return SKNLGRP_NONE;
+		}
+	default:
+		return SKNLGRP_NONE;
+	}
+}
+
+static inline
+bool sock_diag_has_destroy_listeners(const struct sock *sk)
+{
+	const struct net *n = sock_net(sk);
+	const enum sknetlink_groups group = sock_diag_destroy_group(sk);
+
+	return group != SKNLGRP_NONE && n->diag_nlsk &&
+		netlink_has_listeners(n->diag_nlsk, group);
+}
+void sock_diag_broadcast_destroy(struct sock *sk);
+
 #endif
-- 
cgit v1.2.3


From 3fd22af808f4d7455ba91596d334438c7ee0f889 Mon Sep 17 00:00:00 2001
From: Craig Gallek <kraig@google.com>
Date: Mon, 15 Jun 2015 11:26:19 -0400
Subject: sock_diag: specify info_size per inet protocol

Previously, there was no clear distinction between the inet protocols
that used struct tcp_info to report information and those that didn't.
This change adds a specific size attribute to the inet_diag_handler
struct which defines these interfaces.  This will make dispatching
sock_diag get_info requests identical for all inet protocols in a
following patch.

Tested: ss -au
Tested: ss -at
Signed-off-by: Craig Gallek <kraig@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index ac48b10c9395..0e707f0c1a3e 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -24,6 +24,7 @@ struct inet_diag_handler {
 					  struct inet_diag_msg *r,
 					  void *info);
 	__u16		idiag_type;
+	__u16		idiag_info_size;
 };
 
 struct inet_connection_sock;
-- 
cgit v1.2.3


From d37e296979ed1652aec6850e2d736bd0ebf0cdb1 Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tadeusz.struk@intel.com>
Date: Mon, 15 Jun 2015 13:18:36 -0700
Subject: MPILIB: add mpi_read_buf() and mpi_get_size() helpers

Added a mpi_read_buf() helper function to export MPI to a buf provided by
the user, and a mpi_get_size() helper, that tells the user how big the buf is.
Changed mpi_free to use kzfree instead of kfree because it is used to free
crypto keys.

Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/mpi.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mpi.h b/include/linux/mpi.h
index 5af1b81def49..641b7d6fd096 100644
--- a/include/linux/mpi.h
+++ b/include/linux/mpi.h
@@ -81,6 +81,8 @@ MPI mpi_read_from_buffer(const void *buffer, unsigned *ret_nread);
 int mpi_fromstr(MPI val, const char *str);
 u32 mpi_get_keyid(MPI a, u32 *keyid);
 void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign);
+int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
+		    int *sign);
 void *mpi_get_secure_buffer(MPI a, unsigned *nbytes, int *sign);
 int mpi_set_buffer(MPI a, const void *buffer, unsigned nbytes, int sign);
 
@@ -142,4 +144,17 @@ int mpi_rshift(MPI x, MPI a, unsigned n);
 /*-- mpi-inv.c --*/
 int mpi_invm(MPI x, MPI u, MPI v);
 
+/* inline functions */
+
+/**
+ * mpi_get_size() - returns max size required to store the number
+ *
+ * @a:	A multi precision integer for which we want to allocate a bufer
+ *
+ * Return: size required to store the number
+ */
+static inline unsigned int mpi_get_size(MPI a)
+{
+	return a->nlimbs * BYTES_PER_MPI_LIMB;
+}
 #endif /*G10_MPI_H */
-- 
cgit v1.2.3


From c7cfc94096db28d3072b402c224eb50349926e24 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Mon, 1 Jun 2015 16:05:10 +0800
Subject: genirq: Enhance irq_data_to_desc() to support hierarchy irqdomain

For irq associated with hierarchy irqdomains, there will be multiple
irq_datas for one irq_desc. So enhance irq_data_to_desc() to support
hierarchy irqdomain. Also export irq_data_to_desc() as an inline
function for later reuse.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Link: http://lkml.kernel.org/r/1433145945-789-2-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdesc.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index dd1109fb241e..a113a8dc7438 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -93,6 +93,15 @@ struct irq_desc {
 extern struct irq_desc irq_desc[NR_IRQS];
 #endif
 
+static inline struct irq_desc *irq_data_to_desc(struct irq_data *data)
+{
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+	return irq_to_desc(data->irq);
+#else
+	return container_of(data, struct irq_desc, irq_data);
+#endif
+}
+
 static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc)
 {
 	return &desc->irq_data;
-- 
cgit v1.2.3


From 4158c2eca3c77ed3cccdcaeab153aad4e433369c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 12 Jun 2015 10:14:02 +0200
Subject: iommu/vt-d: Detect pre enabled translation

Add code to detect whether translation is already enabled in
the IOMMU. Save this state in a flags field added to
struct intel_iommu.

Tested-by: ZhenHua Li <zhen-hual@hp.com>
Tested-by: Baoquan He <bhe@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/intel-iommu.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index a240e61a7700..b85b81ad5eba 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -320,6 +320,9 @@ enum {
 	MAX_SR_DMAR_REGS
 };
 
+#define VTD_FLAG_TRANS_PRE_ENABLED	(1 << 0)
+#define VTD_FLAG_IRQ_REMAP_PRE_ENABLED	(1 << 1)
+
 struct intel_iommu {
 	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
 	u64 		reg_phys; /* physical address of hw register set */
@@ -351,6 +354,7 @@ struct intel_iommu {
 #endif
 	struct device	*iommu_dev; /* IOMMU-sysfs device */
 	int		node;
+	u32		flags;      /* Software defined flags */
 };
 
 static inline void __iommu_flush_cache(
-- 
cgit v1.2.3


From af3b358e48115588d905cc07a47b3f356e0d01d1 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 12 Jun 2015 15:00:21 +0200
Subject: iommu/vt-d: Copy IR table from old kernel when in kdump mode

When we are booting into a kdump kernel and find IR enabled,
copy over the contents of the previous IR table so that
spurious interrupts will not be target aborted.

Tested-by: ZhenHua Li <zhen-hual@hp.com>
Tested-by: Baoquan He <bhe@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/intel-iommu.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index b85b81ad5eba..9e14edcf7f6e 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -296,6 +296,7 @@ struct q_inval {
 /* 1MB - maximum possible interrupt remapping table size */
 #define INTR_REMAP_PAGE_ORDER	8
 #define INTR_REMAP_TABLE_REG_SIZE	0xf
+#define INTR_REMAP_TABLE_REG_SIZE_MASK  0xf
 
 #define INTR_REMAP_TABLE_ENTRIES	65536
 
-- 
cgit v1.2.3


From 9f3520c3115b451ac1301779fc3c769d94907a70 Mon Sep 17 00:00:00 2001
From: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Date: Fri, 8 May 2015 18:19:05 +1000
Subject: wait: introduce wait_event_exclusive_cmd

It's just a variant of wait_event_cmd(), with exclusive flag being set.

For cases like RAID5, which puts many processes to sleep until 1/4
resources are free, a wake_up wakes up all processes to run, but
there is one process being able to get the resource as it's protected
by a spin lock. That ends up introducing heavy lock contentions, and
hurts performance badly.

Here introduce wait_event_exclusive_cmd to relieve the lock contention
naturally by letting wake_up just wake up one process.

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
v2: its assumed that wait*() and __wait*() have the same arguments - peterz

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/wait.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 2db83349865b..db78c7204947 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -358,6 +358,19 @@ do {									\
 	__ret;								\
 })
 
+#define __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2)		\
+	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 1, 0,	\
+			    cmd1; schedule(); cmd2)
+/*
+ * Just like wait_event_cmd(), except it sets exclusive flag
+ */
+#define wait_event_exclusive_cmd(wq, condition, cmd1, cmd2)		\
+do {									\
+	if (condition)							\
+		break;							\
+	__wait_event_exclusive_cmd(wq, condition, cmd1, cmd2);		\
+} while (0)
+
 #define __wait_event_cmd(wq, condition, cmd1, cmd2)			\
 	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
 			    cmd1; schedule(); cmd2)
-- 
cgit v1.2.3


From 3c339ab83fc09d9d91fb7e8b4a60e8ddc91de417 Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tadeusz.struk@intel.com>
Date: Tue, 16 Jun 2015 10:30:55 -0700
Subject: crypto: akcipher - add PKE API

Add Public Key Encryption API.

Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>

Made CRYPTO_AKCIPHER invisible like other type config options.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h     | 1 +
 include/linux/cryptouser.h | 5 +++++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 25a4b71d6d1f..0e3f71a73e3b 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -53,6 +53,7 @@
 #define CRYPTO_ALG_TYPE_SHASH		0x00000009
 #define CRYPTO_ALG_TYPE_AHASH		0x0000000a
 #define CRYPTO_ALG_TYPE_RNG		0x0000000c
+#define CRYPTO_ALG_TYPE_AKCIPHER	0x0000000d
 #define CRYPTO_ALG_TYPE_PCOMPRESS	0x0000000f
 
 #define CRYPTO_ALG_TYPE_HASH_MASK	0x0000000e
diff --git a/include/linux/cryptouser.h b/include/linux/cryptouser.h
index 4abf2ea6a887..36efbbbf2f83 100644
--- a/include/linux/cryptouser.h
+++ b/include/linux/cryptouser.h
@@ -43,6 +43,7 @@ enum crypto_attr_type_t {
 	CRYPTOCFGA_REPORT_COMPRESS,	/* struct crypto_report_comp */
 	CRYPTOCFGA_REPORT_RNG,		/* struct crypto_report_rng */
 	CRYPTOCFGA_REPORT_CIPHER,	/* struct crypto_report_cipher */
+	CRYPTOCFGA_REPORT_AKCIPHER,	/* struct crypto_report_akcipher */
 	__CRYPTOCFGA_MAX
 
 #define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1)
@@ -101,5 +102,9 @@ struct crypto_report_rng {
 	unsigned int seedsize;
 };
 
+struct crypto_report_akcipher {
+	char type[CRYPTO_MAX_NAME];
+};
+
 #define CRYPTO_REPORT_MAXSIZE (sizeof(struct crypto_user_alg) + \
 			       sizeof(struct crypto_report_blkcipher))
-- 
cgit v1.2.3


From 46b15caa7cb19b0f6e3bc8ebaee5bc1bb2e35110 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 16 Jun 2015 18:48:31 -0400
Subject: vfs, writeback: replace FS_CGROUP_WRITEBACK with SB_I_CGROUPWB

FS_CGROUP_WRITEBACK indicates whether a file_system_type supports
cgroup writeback; however, different super_blocks of the same
file_system_type may or may not support cgroup writeback depending on
filesystem options.  This patch replaces FS_CGROUP_WRITEBACK with a
per-super_block flag.

super_block->s_flags carries some internal flags in the high bits but
it's exposd to userland through uapi header and running out of space
anyway.  This patch adds a new field super_block->s_iflags to carry
kernel-internal flags.  It is currently only used by the new
SB_I_CGROUPWB flag whose concatenated and abbreviated name is for
consistency with other super_block flags.

ext2_fill_super() is updated to set SB_I_CGROUPWB.

v2: Added super_block->s_iflags instead of stealing another high bit
    from sb->s_flags as suggested by Christoph and Jan.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Jan Kara <jack@suse.cz>
Cc: linux-ext4@vger.kernel.org
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev.h | 2 +-
 include/linux/fs.h          | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index dfce80869145..a13181a42b9a 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -260,7 +260,7 @@ static inline bool inode_cgwb_enabled(struct inode *inode)
 
 	return bdi_cap_account_dirty(bdi) &&
 		(bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) &&
-		(inode->i_sb->s_type->fs_flags & FS_CGROUP_WRITEBACK);
+		(inode->i_sb->s_iflags & SB_I_CGROUPWB);
 }
 
 /**
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b5e1dcfbc5e3..2c5e33a5b2af 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1241,6 +1241,8 @@ struct mm_struct;
 #define UMOUNT_NOFOLLOW	0x00000008	/* Don't follow symlink on umount */
 #define UMOUNT_UNUSED	0x80000000	/* Flag guaranteed to be unused */
 
+/* sb->s_iflags */
+#define SB_I_CGROUPWB	0x00000001	/* cgroup-aware writeback enabled */
 
 /* Possible states of 'frozen' field */
 enum {
@@ -1279,6 +1281,7 @@ struct super_block {
 	const struct quotactl_ops	*s_qcop;
 	const struct export_operations *s_export_op;
 	unsigned long		s_flags;
+	unsigned long		s_iflags;	/* internal SB_I_* flags */
 	unsigned long		s_magic;
 	struct dentry		*s_root;
 	struct rw_semaphore	s_umount;
@@ -1912,7 +1915,6 @@ struct file_system_type {
 #define FS_HAS_SUBTYPE		4
 #define FS_USERNS_MOUNT		8	/* Can be mounted by userns root */
 #define FS_USERNS_DEV_MOUNT	16 /* A userns mount does not imply MNT_NODEV */
-#define FS_CGROUP_WRITEBACK	32	/* Supports cgroup-aware writeback */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
 	struct dentry *(*mount) (struct file_system_type *, int,
 		       const char *, void *);
-- 
cgit v1.2.3


From a9bd32a8b4c4c2670f9ed8cae63f9378b6df3ded Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Fri, 13 Mar 2015 11:09:45 -0700
Subject: msm: msm_fb: Remove dead code

This code is no longer used now that mach-msm has been removed.
Delete it.

Cc: Jean-Christophe Plagniol-Villard <plagnioj@jcrosoft.com>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: David Brown <davidb@codeaurora.org>
Cc: Bryan Huntsman <bryanh@codeaurora.org>
Cc: Daniel Walker <dwalker@fifo99.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Acked-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 include/linux/platform_data/video-msm_fb.h | 146 -----------------------------
 1 file changed, 146 deletions(-)
 delete mode 100644 include/linux/platform_data/video-msm_fb.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/video-msm_fb.h b/include/linux/platform_data/video-msm_fb.h
deleted file mode 100644
index 31449be3eadb..000000000000
--- a/include/linux/platform_data/video-msm_fb.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Internal shared definitions for various MSM framebuffer parts.
- *
- * Copyright (C) 2007 Google Incorporated
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef _MSM_FB_H_
-#define _MSM_FB_H_
-
-#include <linux/device.h>
-
-struct mddi_info;
-
-struct msm_fb_data {
-	int xres;	/* x resolution in pixels */
-	int yres;	/* y resolution in pixels */
-	int width;	/* disply width in mm */
-	int height;	/* display height in mm */
-	unsigned output_format;
-};
-
-struct msmfb_callback {
-	void (*func)(struct msmfb_callback *);
-};
-
-enum {
-	MSM_MDDI_PMDH_INTERFACE,
-	MSM_MDDI_EMDH_INTERFACE,
-	MSM_EBI2_INTERFACE,
-};
-
-#define MSMFB_CAP_PARTIAL_UPDATES	(1 << 0)
-
-struct msm_panel_data {
-	/* turns off the fb memory */
-	int (*suspend)(struct msm_panel_data *);
-	/* turns on the fb memory */
-	int (*resume)(struct msm_panel_data *);
-	/* turns off the panel */
-	int (*blank)(struct msm_panel_data *);
-	/* turns on the panel */
-	int (*unblank)(struct msm_panel_data *);
-	void (*wait_vsync)(struct msm_panel_data *);
-	void (*request_vsync)(struct msm_panel_data *, struct msmfb_callback *);
-	void (*clear_vsync)(struct msm_panel_data *);
-	/* from the enum above */
-	unsigned interface_type;
-	/* data to be passed to the fb driver */
-	struct msm_fb_data *fb_data;
-
-	/* capabilities supported by the panel */
-	uint32_t caps;
-};
-
-struct msm_mddi_client_data {
-	void (*suspend)(struct msm_mddi_client_data *);
-	void (*resume)(struct msm_mddi_client_data *);
-	void (*activate_link)(struct msm_mddi_client_data *);
-	void (*remote_write)(struct msm_mddi_client_data *, uint32_t val,
-			     uint32_t reg);
-	uint32_t (*remote_read)(struct msm_mddi_client_data *, uint32_t reg);
-	void (*auto_hibernate)(struct msm_mddi_client_data *, int);
-	/* custom data that needs to be passed from the board file to a 
-	 * particular client */
-	void *private_client_data;
-	struct resource *fb_resource;
-	/* from the list above */
-	unsigned interface_type;
-};
-
-struct msm_mddi_platform_data {
-	unsigned int clk_rate;
-	void (*power_client)(struct msm_mddi_client_data *, int on);
-
-	/* fixup the mfr name, product id */
-	void (*fixup)(uint16_t *mfr_name, uint16_t *product_id);
-
-	struct resource *fb_resource; /*optional*/
-	/* number of clients in the list that follows */
-	int num_clients;
-	/* array of client information of clients */
-	struct {
-		unsigned product_id; /* mfr id in top 16 bits, product id
-				      * in lower 16 bits
-				      */
-		char *name;	/* the device name will be the platform
-				 * device name registered for the client,
-				 * it should match the name of the associated
-				 * driver
-				 */
-		unsigned id;	/* id for mddi client device node, will also
-				 * be used as device id of panel devices, if
-				 * the client device will have multiple panels
-				 * space must be left here for them
-				 */
-		void *client_data;	/* required private client data */
-		unsigned int clk_rate;	/* optional: if the client requires a
-					* different mddi clk rate
-					*/
-	} client_platform_data[];
-};
-
-struct mdp_blit_req;
-struct fb_info;
-struct mdp_device {
-	struct device dev;
-	void (*dma)(struct mdp_device *mpd, uint32_t addr,
-		    uint32_t stride, uint32_t w, uint32_t h, uint32_t x,
-		    uint32_t y, struct msmfb_callback *callback, int interface);
-	void (*dma_wait)(struct mdp_device *mdp);
-	int (*blit)(struct mdp_device *mdp, struct fb_info *fb,
-		    struct mdp_blit_req *req);
-	void (*set_grp_disp)(struct mdp_device *mdp, uint32_t disp_id);
-};
-
-struct class_interface;
-int register_mdp_client(struct class_interface *class_intf);
-
-/**** private client data structs go below this line ***/
-
-struct msm_mddi_bridge_platform_data {
-	/* from board file */
-	int (*init)(struct msm_mddi_bridge_platform_data *,
-		    struct msm_mddi_client_data *);
-	int (*uninit)(struct msm_mddi_bridge_platform_data *,
-		      struct msm_mddi_client_data *);
-	/* passed to panel for use by the fb driver */
-	int (*blank)(struct msm_mddi_bridge_platform_data *,
-		     struct msm_mddi_client_data *);
-	int (*unblank)(struct msm_mddi_bridge_platform_data *,
-		       struct msm_mddi_client_data *);
-	struct msm_fb_data fb_data;
-};
-
-
-
-#endif
-- 
cgit v1.2.3


From 208489032bdd8d4a7de50f3057c175058f271956 Mon Sep 17 00:00:00 2001
From: Chaotian Jing <chaotian.jing@mediatek.com>
Date: Mon, 15 Jun 2015 19:20:48 +0800
Subject: mmc: mediatek: Add Mediatek MMC driver

Add Mediatek MMC driver code
Support eMMC/SD/SDIO

Signed-off-by: Chaotian Jing <chaotian.jing@mediatek.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/core.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index de722d4e9d61..258daf914c6d 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -121,6 +121,7 @@ struct mmc_data {
 	struct mmc_request	*mrq;		/* associated request */
 
 	unsigned int		sg_len;		/* size of scatter list */
+	int			sg_count;	/* mapped sg entries */
 	struct scatterlist	*sg;		/* I/O scatter list */
 	s32			host_cookie;	/* host private data */
 };
-- 
cgit v1.2.3


From a1a56aaa0735c7821e85c37268a7c12e132415cb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Jun 2015 18:10:13 -0700
Subject: netfilter: x_tables: align per cpu xt_counter

Let's force a 16 bytes alignment on xt_counter percpu allocations,
so that bytes and packets sit in same cache line.

xt_counter being exported to user space, we cannot add __align(16) on
the structure itself.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 95693c4cebdd..1c97a2204379 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -356,7 +356,8 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
  * so nothing needs to be done there.
  *
  * xt_percpu_counter_alloc returns the address of the percpu
- * counter, or 0 on !SMP.
+ * counter, or 0 on !SMP. We force an alignment of 16 bytes
+ * so that bytes/packets share a common cache line.
  *
  * Hence caller must use IS_ERR_VALUE to check for error, this
  * allows us to return 0 for single core systems without forcing
@@ -365,7 +366,8 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
 static inline u64 xt_percpu_counter_alloc(void)
 {
 	if (nr_cpu_ids > 1) {
-		void __percpu *res = alloc_percpu(struct xt_counters);
+		void __percpu *res = __alloc_percpu(sizeof(struct xt_counters),
+						    sizeof(struct xt_counters));
 
 		if (res == NULL)
 			return (u64) -ENOMEM;
-- 
cgit v1.2.3


From 3b0f95be143bea1aa47beb20134ef82e4e4068dc Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Tue, 16 Jun 2015 23:06:20 +0100
Subject: irq: Add irq_set_chained_handler_and_data()

Driver authors seem to get the ordering of irq_set_chained_handler()
and irq_set_handler_data() wrong - ordering the former before the
latter.  This opens a race window where, if there is an interrupt
pending, the handler will be called between these two calls,
potentially resulting in an oops.

Provide a single interface to set both of these together, especially
as that's commonly what is required.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Alexandre Courbot <gnurou@gmail.com>
Cc: Hans Ulli Kroll <ulli.kroll@googlemail.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Lee Jones <lee.jones@linaro.org>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Thierry Reding <thierry.reding@gmail.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/E1Z4yzs-0002Rw-4B@rmk-PC.arm.linux.org.uk
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index de3213d271ff..42861d28fc2a 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -525,6 +525,15 @@ irq_set_chained_handler(unsigned int irq, irq_flow_handler_t handle)
 	__irq_set_handler(irq, handle, 1, NULL);
 }
 
+/*
+ * Set a highlevel chained flow handler and its data for a given IRQ.
+ * (a chained handler is automatically enabled and set to
+ *  IRQ_NOREQUEST, IRQ_NOPROBE, and IRQ_NOTHREAD)
+ */
+void
+irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
+				 void *data);
+
 void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set);
 
 static inline void irq_set_status_flags(unsigned int irq, unsigned long set)
-- 
cgit v1.2.3


From 0f1b414d190724617eb1cdd615592fa8cd9d0b50 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 18 Jun 2015 18:32:02 +0200
Subject: ACPI / PNP: Avoid conflicting resource reservations

Commit b9a5e5e18fbf "ACPI / init: Fix the ordering of
acpi_reserve_resources()" overlooked the fact that the memory
and/or I/O regions reserved by acpi_reserve_resources() may
conflict with those reserved by the PNP "system" driver.

If that conflict actually takes place, it causes the reservations
made by the "system" driver to fail while before commit b9a5e5e18fbf
all reservations made by it and by acpi_reserve_resources() would be
successful.  In turn, that allows the resources that haven't been
reserved by the "system" driver to be used by others (e.g. PCI) which
sometimes leads to functional problems (up to and including boot
failures).

To fix that issue, introduce a common resource reservation routine,
acpi_reserve_region(), to be used by both acpi_reserve_resources()
and the "system" driver, that will track all resources reserved by
it and avoid making conflicting requests.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=99831
Link: http://marc.info/?t=143389402600001&r=1&w=2
Fixes: b9a5e5e18fbf "ACPI / init: Fix the ordering of acpi_reserve_resources()"
Reported-by: Roland Dreier <roland@purestorage.com>
Cc: All applicable <stable@vger.kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index e4da5e35e29c..299763df1b27 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -332,6 +332,9 @@ int acpi_check_region(resource_size_t start, resource_size_t n,
 
 int acpi_resources_are_enforced(void);
 
+int acpi_reserve_region(u64 start, unsigned int length, u8 space_id,
+			unsigned long flags, char *desc);
+
 #ifdef CONFIG_HIBERNATION
 void __init acpi_no_s4_hw_signature(void);
 #endif
@@ -525,6 +528,13 @@ static inline int acpi_check_region(resource_size_t start, resource_size_t n,
 	return 0;
 }
 
+static inline int acpi_reserve_region(u64 start, unsigned int length,
+				      u8 space_id, unsigned long flags,
+				      char *desc)
+{
+	return -ENXIO;
+}
+
 struct acpi_table_header;
 static inline int acpi_table_parse(char *id,
 				int (*handler)(struct acpi_table_header *))
-- 
cgit v1.2.3


From a263653ed798216c0069922d7b5237ca49436007 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 17 Jun 2015 10:28:27 -0500
Subject: netfilter: don't pull include/linux/netfilter.h from netns headers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This pulls the full hook netfilter definitions from all those that include
net_namespace.h.

Instead let's just include the bare minimum required in the new
linux/netfilter_defs.h file, and use it from the netfilter netns header files.

I also needed to include in.h and in6.h from linux/netfilter.h otherwise we hit
this compilation error:

In file included from include/linux/netfilter_defs.h:4:0,
                 from include/net/netns/netfilter.h:4,
                 from include/net/net_namespace.h:22,
                 from include/linux/netdevice.h:43,
                 from net/netfilter/nfnetlink_queue_core.c:23:
include/uapi/linux/netfilter.h:76:17: error: field ‘in’ has incomplete type struct in_addr in;

And also explicit include linux/netfilter.h in several spots.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/netfilter.h      | 6 ++----
 include/linux/netfilter_defs.h | 9 +++++++++
 2 files changed, 11 insertions(+), 4 deletions(-)
 create mode 100644 include/linux/netfilter_defs.h

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index f5ff5d156da8..00050dfd9f23 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -10,7 +10,8 @@
 #include <linux/wait.h>
 #include <linux/list.h>
 #include <linux/static_key.h>
-#include <uapi/linux/netfilter.h>
+#include <linux/netfilter_defs.h>
+
 #ifdef CONFIG_NETFILTER
 static inline int NF_DROP_GETERR(int verdict)
 {
@@ -38,9 +39,6 @@ static inline void nf_inet_addr_mask(const union nf_inet_addr *a1,
 
 int netfilter_init(void);
 
-/* Largest hook number + 1 */
-#define NF_MAX_HOOKS 8
-
 struct sk_buff;
 
 struct nf_hook_ops;
diff --git a/include/linux/netfilter_defs.h b/include/linux/netfilter_defs.h
new file mode 100644
index 000000000000..d3a7f8597e82
--- /dev/null
+++ b/include/linux/netfilter_defs.h
@@ -0,0 +1,9 @@
+#ifndef __LINUX_NETFILTER_CORE_H_
+#define __LINUX_NETFILTER_CORE_H_
+
+#include <uapi/linux/netfilter.h>
+
+/* Largest hook number + 1, see uapi/linux/netfilter_decnet.h */
+#define NF_MAX_HOOKS 8
+
+#endif
-- 
cgit v1.2.3


From dcb8f5c8139ef945cdfd55900fae265c4dbefc02 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 17 Jun 2015 23:58:28 +0200
Subject: netfilter: xtables: fix warnings on 32bit platforms

On 32bit archs gcc complains due to cast from void* to u64.
Add intermediate casts to long to silence these warnings.

include/linux/netfilter/x_tables.h:376:10: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
include/linux/netfilter/x_tables.h:384:15: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
include/linux/netfilter/x_tables.h:391:23: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
include/linux/netfilter/x_tables.h:400:22: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]

Fixes: 71ae0dff02d756e ("netfilter: xtables: use percpu rule counters")
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 1c97a2204379..286098a5667f 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -372,7 +372,7 @@ static inline u64 xt_percpu_counter_alloc(void)
 		if (res == NULL)
 			return (u64) -ENOMEM;
 
-		return (__force u64) res;
+		return (u64) (__force unsigned long) res;
 	}
 
 	return 0;
@@ -380,14 +380,14 @@ static inline u64 xt_percpu_counter_alloc(void)
 static inline void xt_percpu_counter_free(u64 pcnt)
 {
 	if (nr_cpu_ids > 1)
-		free_percpu((void __percpu *) pcnt);
+		free_percpu((void __percpu *) (unsigned long) pcnt);
 }
 
 static inline struct xt_counters *
 xt_get_this_cpu_counter(struct xt_counters *cnt)
 {
 	if (nr_cpu_ids > 1)
-		return this_cpu_ptr((void __percpu *) cnt->pcnt);
+		return this_cpu_ptr((void __percpu *) (unsigned long) cnt->pcnt);
 
 	return cnt;
 }
@@ -396,7 +396,7 @@ static inline struct xt_counters *
 xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
 {
 	if (nr_cpu_ids > 1)
-		return per_cpu_ptr((void __percpu *) cnt->pcnt, cpu);
+		return per_cpu_ptr((void __percpu *) (unsigned long) cnt->pcnt, cpu);
 
 	return cnt;
 }
-- 
cgit v1.2.3


From fb02915f47181e824339d91f8e385fd4bd746d6a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 18 Jun 2015 16:54:28 -0400
Subject: kernfs: make kernfs_get_inode() public

Move kernfs_get_inode() prototype from fs/kernfs/kernfs-internal.h to
include/linux/kernfs.h.  It obtains the matching inode for a
kernfs_node.

It will be used by cgroup for inode based permission checks for now
but is generally useful.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kernfs.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 71ecdab1671b..e6b2f7db9c0c 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -277,6 +277,7 @@ void kernfs_put(struct kernfs_node *kn);
 
 struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry);
 struct kernfs_root *kernfs_root_from_sb(struct super_block *sb);
+struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn);
 
 struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
 				       unsigned int flags, void *priv);
@@ -352,6 +353,10 @@ static inline struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
 static inline struct kernfs_root *kernfs_root_from_sb(struct super_block *sb)
 { return NULL; }
 
+static inline struct inode *
+kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn)
+{ return NULL; }
+
 static inline struct kernfs_root *
 kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags,
 		   void *priv)
-- 
cgit v1.2.3


From 187fe84067bd377047cfcb7f2bbc7c9dc12d290c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 18 Jun 2015 16:54:28 -0400
Subject: cgroup: require write perm on common ancestor when moving processes
 on the default hierarchy

On traditional hierarchies, if a task has write access to "tasks" or
"cgroup.procs" file of a cgroup and its euid agrees with the target,
it can move the target to the cgroup; however, consider the following
scenario.  The owner of each cgroup is in the parentheses.

 R (root) - 0 (root) - 00 (user1) - 000 (user1)
          |                       \ 001 (user1)
          \ 1 (root) - 10 (user1)

The subtrees of 00 and 10 are delegated to user1; however, while both
subtrees may belong to the same user, it is clear that the two
subtrees are to be isolated - they're under completely separate
resource limits imposed by 0 and 1, respectively.  Note that 0 and 1
aren't strictly necessary but added to ease illustrating the issue.

If user1 is allowed to move processes between the two subtrees, the
intention of the hierarchy - keeping a given group of processes under
a subtree with certain resource restrictions while delegating
management of the subtree - can be circumvented by user1.

This happens because migration permission check doesn't consider the
hierarchical nature of cgroups.  To fix the issue, this patch adds an
extra permission requirement when userland tries to migrate a process
in the default hierarchy - the issuing task must have write access to
the common ancestor of "cgroup.procs" file of the ancestor in addition
to the destination's.

Conceptually, the issuer must be able to move the target process from
the source cgroup to the common ancestor of source and destination
cgroups and then to the destination.  As long as delegation is done in
a proper top-down way, this guarantees that a delegatee can't smuggle
processes across disjoint delegation domains.

The next patch will add documentation on the delegation model on the
default hierarchy.

v2: Fixed missing !ret test.  Spotted by Li Zefan.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup-defs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index c5588c438448..93755a629299 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -220,6 +220,7 @@ struct cgroup {
 	int populated_cnt;
 
 	struct kernfs_node *kn;		/* cgroup kernfs entry */
+	struct kernfs_node *procs_kn;	/* kn for "cgroup.procs" */
 	struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */
 
 	/*
-- 
cgit v1.2.3


From c04dca02bc73096435a5c36efd5ccb2171edcbe1 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 11 Jun 2015 14:46:44 +0200
Subject: hrtimer: Remove HRTIMER_STATE_MIGRATE

I do not understand HRTIMER_STATE_MIGRATE. Unless I am totally
confused it looks buggy and simply unneeded.

migrate_hrtimer_list() sets it to keep hrtimer_active() == T, but this
is not enough: this can fool, say, hrtimer_is_queued() in
dequeue_signal().

Can't migrate_hrtimer_list() simply use HRTIMER_STATE_ENQUEUED?
This fixes the race and we can kill STATE_MIGRATE.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: ktkhai@parallels.com
Cc: rostedt@goodmis.org
Cc: juri.lelli@gmail.com
Cc: pang.xunlei@linaro.org
Cc: wanpeng.li@linux.intel.com
Cc: umgwanakikbuti@gmail.com
Link: http://lkml.kernel.org/r/20150611124743.072387650@infradead.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 3f82a7edc03d..2f9e57d3d126 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -70,17 +70,13 @@ enum hrtimer_restart {
  * the handling of the timer.
  *
  * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state
- * to preserve the HRTIMER_STATE_CALLBACK in the above scenario. This
- * also affects HRTIMER_STATE_MIGRATE where the preservation is not
- * necessary. HRTIMER_STATE_MIGRATE is cleared after the timer is
- * enqueued on the new cpu.
+ * to preserve the HRTIMER_STATE_CALLBACK in the above scenario.
  *
  * All state transitions are protected by cpu_base->lock.
  */
 #define HRTIMER_STATE_INACTIVE	0x00
 #define HRTIMER_STATE_ENQUEUED	0x01
 #define HRTIMER_STATE_CALLBACK	0x02
-#define HRTIMER_STATE_MIGRATE	0x04
 
 /**
  * struct hrtimer - the basic hrtimer structure
-- 
cgit v1.2.3


From a7c6f571ff51cc77d90dd54968f7c5c938c43998 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 11 Jun 2015 14:46:46 +0200
Subject: seqcount: Rename write_seqcount_barrier()

I'll shortly be introducing another seqcount primitive that's useful
to provide ordering semantics and would like to use the
write_seqcount_barrier() name for that.

Seeing how there's only one user of the current primitive, lets rename
it to invalidate, as that appears what its doing.

While there, employ lockdep_assert_held() instead of
assert_spin_locked() to not generate debug code for regular kernels.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: ktkhai@parallels.com
Cc: rostedt@goodmis.org
Cc: juri.lelli@gmail.com
Cc: pang.xunlei@linaro.org
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: wanpeng.li@linux.intel.com
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: umgwanakikbuti@gmail.com
Link: http://lkml.kernel.org/r/20150611124743.279926217@infradead.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/seqlock.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 5f68d0a391ce..c07e3a536099 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -266,13 +266,13 @@ static inline void write_seqcount_end(seqcount_t *s)
 }
 
 /**
- * write_seqcount_barrier - invalidate in-progress read-side seq operations
+ * write_seqcount_invalidate - invalidate in-progress read-side seq operations
  * @s: pointer to seqcount_t
  *
- * After write_seqcount_barrier, no read-side seq operations will complete
+ * After write_seqcount_invalidate, no read-side seq operations will complete
  * successfully and see data older than this.
  */
-static inline void write_seqcount_barrier(seqcount_t *s)
+static inline void write_seqcount_invalidate(seqcount_t *s)
 {
 	smp_wmb();
 	s->sequence+=2;
-- 
cgit v1.2.3


From c4bfa3f5f906aee2e084c5b1fb15caf876338ef8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 17 Jun 2015 14:29:24 +0200
Subject: seqcount: Introduce raw_write_seqcount_barrier()

Introduce raw_write_seqcount_barrier(), a new construct that can be
used to provide write barrier semantics in seqcount read loops instead
of the usual consistency guarantee.

raw_write_seqcount_barier() is equivalent to:

	raw_write_seqcount_begin();
	raw_write_seqcount_end();

But avoids issueing two back-to-back smp_wmb() instructions.

This construct works because the read side will 'stall' when observing
odd values. This means that -- referring to the example in the comment
below -- even though there is no (matching) read barrier between the
loads of X and Y, we cannot observe !x && !y, because:

 - if we observe Y == false we must observe the first sequence
   increment, which makes us loop, until

 - we observe !(seq & 1) -- the second sequence increment -- at which
   time we must also observe T == true.

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: umgwanakikbuti@gmail.com
Cc: ktkhai@parallels.com
Cc: rostedt@goodmis.org
Cc: juri.lelli@gmail.com
Cc: pang.xunlei@linaro.org
Cc: oleg@redhat.com
Cc: wanpeng.li@linux.intel.com
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20150617122924.GP3644@twins.programming.kicks-ass.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/seqlock.h | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index c07e3a536099..486e685a226a 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -233,6 +233,47 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
 	s->sequence++;
 }
 
+/**
+ * raw_write_seqcount_barrier - do a seq write barrier
+ * @s: pointer to seqcount_t
+ *
+ * This can be used to provide an ordering guarantee instead of the
+ * usual consistency guarantee. It is one wmb cheaper, because we can
+ * collapse the two back-to-back wmb()s.
+ *
+ *      seqcount_t seq;
+ *      bool X = true, Y = false;
+ *
+ *      void read(void)
+ *      {
+ *              bool x, y;
+ *
+ *              do {
+ *                      int s = read_seqcount_begin(&seq);
+ *
+ *                      x = X; y = Y;
+ *
+ *              } while (read_seqcount_retry(&seq, s));
+ *
+ *              BUG_ON(!x && !y);
+ *      }
+ *
+ *      void write(void)
+ *      {
+ *              Y = true;
+ *
+ *              raw_write_seqcount_barrier(seq);
+ *
+ *              X = false;
+ *      }
+ */
+static inline void raw_write_seqcount_barrier(seqcount_t *s)
+{
+	s->sequence++;
+	smp_wmb();
+	s->sequence++;
+}
+
 /*
  * raw_write_seqcount_latch - redirect readers to even/odd copy
  * @s: pointer to seqcount_t
-- 
cgit v1.2.3


From 887d9dc989eb0154492e41e7c07492edbb088ba1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 11 Jun 2015 14:46:48 +0200
Subject: hrtimer: Allow hrtimer::function() to free the timer

Currently an hrtimer callback function cannot free its own timer
because __run_hrtimer() still needs to clear HRTIMER_STATE_CALLBACK
after it. Freeing the timer would result in a clear use-after-free.

Solve this by using a scheme similar to regular timers; track the
current running timer in hrtimer_clock_base::running.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: ktkhai@parallels.com
Cc: rostedt@goodmis.org
Cc: juri.lelli@gmail.com
Cc: pang.xunlei@linaro.org
Cc: wanpeng.li@linux.intel.com
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: umgwanakikbuti@gmail.com
Link: http://lkml.kernel.org/r/20150611124743.471563047@infradead.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 41 ++++++++++++++++-------------------------
 1 file changed, 16 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 2f9e57d3d126..5db055821ef3 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -53,30 +53,25 @@ enum hrtimer_restart {
  *
  * 0x00		inactive
  * 0x01		enqueued into rbtree
- * 0x02		callback function running
- * 0x04		timer is migrated to another cpu
  *
- * Special cases:
- * 0x03		callback function running and enqueued
- *		(was requeued on another CPU)
- * 0x05		timer was migrated on CPU hotunplug
+ * The callback state is not part of the timer->state because clearing it would
+ * mean touching the timer after the callback, this makes it impossible to free
+ * the timer from the callback function.
  *
- * The "callback function running and enqueued" status is only possible on
- * SMP. It happens for example when a posix timer expired and the callback
+ * Therefore we track the callback state in:
+ *
+ *	timer->base->cpu_base->running == timer
+ *
+ * On SMP it is possible to have a "callback function running and enqueued"
+ * status. It happens for example when a posix timer expired and the callback
  * queued a signal. Between dropping the lock which protects the posix timer
  * and reacquiring the base lock of the hrtimer, another CPU can deliver the
- * signal and rearm the timer. We have to preserve the callback running state,
- * as otherwise the timer could be removed before the softirq code finishes the
- * the handling of the timer.
- *
- * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state
- * to preserve the HRTIMER_STATE_CALLBACK in the above scenario.
+ * signal and rearm the timer.
  *
  * All state transitions are protected by cpu_base->lock.
  */
 #define HRTIMER_STATE_INACTIVE	0x00
 #define HRTIMER_STATE_ENQUEUED	0x01
-#define HRTIMER_STATE_CALLBACK	0x02
 
 /**
  * struct hrtimer - the basic hrtimer structure
@@ -163,6 +158,8 @@ enum  hrtimer_base_type {
  * struct hrtimer_cpu_base - the per cpu clock bases
  * @lock:		lock protecting the base and associated clock bases
  *			and timers
+ * @seq:		seqcount around __run_hrtimer
+ * @running:		pointer to the currently running hrtimer
  * @cpu:		cpu number
  * @active_bases:	Bitfield to mark bases with active timers
  * @clock_was_set_seq:	Sequence counter of clock was set events
@@ -184,6 +181,8 @@ enum  hrtimer_base_type {
  */
 struct hrtimer_cpu_base {
 	raw_spinlock_t			lock;
+	seqcount_t			seq;
+	struct hrtimer			*running;
 	unsigned int			cpu;
 	unsigned int			active_bases;
 	unsigned int			clock_was_set_seq;
@@ -391,15 +390,7 @@ extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
 
 extern u64 hrtimer_get_next_event(void);
 
-/*
- * A timer is active, when it is enqueued into the rbtree or the
- * callback function is running or it's in the state of being migrated
- * to another cpu.
- */
-static inline int hrtimer_active(const struct hrtimer *timer)
-{
-	return timer->state != HRTIMER_STATE_INACTIVE;
-}
+extern bool hrtimer_active(const struct hrtimer *timer);
 
 /*
  * Helper function to check, whether the timer is on one of the queues
@@ -415,7 +406,7 @@ static inline int hrtimer_is_queued(struct hrtimer *timer)
  */
 static inline int hrtimer_callback_running(struct hrtimer *timer)
 {
-	return timer->state & HRTIMER_STATE_CALLBACK;
+	return timer->base->cpu_base->running == timer;
 }
 
 /* Forward a hrtimer so it expires after now: */
-- 
cgit v1.2.3


From a24fc60d63da2b0b31bf7c876d12a51ed4b778bd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 11 Jun 2015 14:46:53 +0200
Subject: lockdep: Implement lock pinning

Add a lockdep annotation that WARNs if you 'accidentially' unlock a
lock.

This is especially helpful for code with callbacks, where the upper
layer assumes a lock remains taken but a lower layer thinks it maybe
can drop and reacquire the lock.

By unwittingly breaking up the lock, races can be introduced.

Lock pinning is a lockdep annotation that helps with this, when you
lockdep_pin_lock() a held lock, any unlock without a
lockdep_unpin_lock() will produce a WARN. Think of this as a relative
of lockdep_assert_held(), except you don't only assert its held now,
but ensure it stays held until you release your assertion.

RFC: a possible alternative API would be something like:

  int cookie = lockdep_pin_lock(&foo);
  ...
  lockdep_unpin_lock(&foo, cookie);

Where we pick a random number for the pin_count; this makes it
impossible to sneak a lock break in without also passing the right
cookie along.

I've not done this because it ends up generating code for !LOCKDEP,
esp. if you need to pass the cookie around for some reason.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: ktkhai@parallels.com
Cc: rostedt@goodmis.org
Cc: juri.lelli@gmail.com
Cc: pang.xunlei@linaro.org
Cc: oleg@redhat.com
Cc: wanpeng.li@linux.intel.com
Cc: umgwanakikbuti@gmail.com
Link: http://lkml.kernel.org/r/20150611124743.906731065@infradead.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/lockdep.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 066ba4157541..c5b6b5830acf 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -255,6 +255,7 @@ struct held_lock {
 	unsigned int check:1;       /* see lock_acquire() comment */
 	unsigned int hardirqs_off:1;
 	unsigned int references:12;					/* 32 bits */
+	unsigned int pin_count;
 };
 
 /*
@@ -354,6 +355,9 @@ extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask);
 extern void lockdep_clear_current_reclaim_state(void);
 extern void lockdep_trace_alloc(gfp_t mask);
 
+extern void lock_pin_lock(struct lockdep_map *lock);
+extern void lock_unpin_lock(struct lockdep_map *lock);
+
 # define INIT_LOCKDEP				.lockdep_recursion = 0, .lockdep_reclaim_gfp = 0,
 
 #define lockdep_depth(tsk)	(debug_locks ? (tsk)->lockdep_depth : 0)
@@ -368,6 +372,9 @@ extern void lockdep_trace_alloc(gfp_t mask);
 
 #define lockdep_recursing(tsk)	((tsk)->lockdep_recursion)
 
+#define lockdep_pin_lock(l)		lock_pin_lock(&(l)->dep_map)
+#define lockdep_unpin_lock(l)	lock_unpin_lock(&(l)->dep_map)
+
 #else /* !CONFIG_LOCKDEP */
 
 static inline void lockdep_off(void)
@@ -420,6 +427,9 @@ struct lock_class_key { };
 
 #define lockdep_recursing(tsk)			(0)
 
+#define lockdep_pin_lock(l)				do { (void)(l); } while (0)
+#define lockdep_unpin_lock(l)			do { (void)(l); } while (0)
+
 #endif /* !LOCKDEP */
 
 #ifdef CONFIG_LOCK_STAT
-- 
cgit v1.2.3


From 7f3b62cf945dc3e57fbd693022a5651206ce85b0 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 16 Jun 2015 16:27:43 +0200
Subject: acpi-video-detect: Remove the unused acpi_video_dmi_demote_vendor()
 function

Remove the now unused acpi_video_dmi_demote_vendor() function, this was
never a proper counter part of acpi_video_dmi_promote_vendor() since
the calls to acpi_video_dmi_promote_vendor() are not counted.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index e4da5e35e29c..01bffd30c6c7 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -248,7 +248,6 @@ extern bool wmi_has_guid(const char *guid);
 extern long acpi_video_get_capabilities(acpi_handle graphics_dev_handle);
 extern long acpi_is_video_device(acpi_handle handle);
 extern void acpi_video_dmi_promote_vendor(void);
-extern void acpi_video_dmi_demote_vendor(void);
 extern int acpi_video_backlight_support(void);
 extern int acpi_video_display_switch_support(void);
 
@@ -268,10 +267,6 @@ static inline void acpi_video_dmi_promote_vendor(void)
 {
 }
 
-static inline void acpi_video_dmi_demote_vendor(void)
-{
-}
-
 static inline int acpi_video_backlight_support(void)
 {
 	return 0;
-- 
cgit v1.2.3


From fb105d964226ce4834b45d7e3d9f339aa716ed70 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 16 Jun 2015 16:27:44 +0200
Subject: acpi-video-detect: Make acpi_video_get_capabilities a private
 function

acpi_video_get_capabilities() is only used inside video_detect.c so make
it static. While at it also remove the prototype for the non existent
acpi_video_display_switch_support function from acpi.h

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 01bffd30c6c7..88c92a03a77e 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -245,19 +245,12 @@ extern bool wmi_has_guid(const char *guid);
 
 #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE)
 
-extern long acpi_video_get_capabilities(acpi_handle graphics_dev_handle);
 extern long acpi_is_video_device(acpi_handle handle);
 extern void acpi_video_dmi_promote_vendor(void);
 extern int acpi_video_backlight_support(void);
-extern int acpi_video_display_switch_support(void);
 
 #else
 
-static inline long acpi_video_get_capabilities(acpi_handle graphics_dev_handle)
-{
-	return 0;
-}
-
 static inline long acpi_is_video_device(acpi_handle handle)
 {
 	return 0;
@@ -272,11 +265,6 @@ static inline int acpi_video_backlight_support(void)
 	return 0;
 }
 
-static inline int acpi_video_display_switch_support(void)
-{
-	return 0;
-}
-
 #endif /* defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) */
 
 extern int acpi_blacklisted(void);
-- 
cgit v1.2.3


From adc8bb8e0fe005ed29366e6c4621652481878214 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 16 Jun 2015 16:27:45 +0200
Subject: acpi-video-detect: Move acpi_is_video_device() to acpi/scan.c

This allows video_detect.c to be build as a module, this is a preparation
patch for the backlight interface selection logic cleanup.

Note this commit also causes acpi_is_video_device() to always be build
indepedent of CONFIG_ACPI_VIDEO, as there is no reason to make its
building depend on CONFIG_ACPI_VIDEO.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 88c92a03a77e..7cb3b0bc4a7e 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -243,19 +243,15 @@ extern bool wmi_has_guid(const char *guid);
 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VENDOR		0x0400
 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VIDEO		0x0800
 
+extern long acpi_is_video_device(acpi_handle handle);
+
 #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE)
 
-extern long acpi_is_video_device(acpi_handle handle);
 extern void acpi_video_dmi_promote_vendor(void);
 extern int acpi_video_backlight_support(void);
 
 #else
 
-static inline long acpi_is_video_device(acpi_handle handle)
-{
-	return 0;
-}
-
 static inline void acpi_video_dmi_promote_vendor(void)
 {
 }
-- 
cgit v1.2.3


From a87878bafa1f82c20eddaf2d23780b194c35ccf5 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 16 Jun 2015 16:27:46 +0200
Subject: acpi-video-detect: Move acpi_osi_is_win8 to osl.c

acpi_osi_is_win8 needs access to acpi_gbl_osi_data which is not exported,
so move it to osl.c. Alternatively we could export acpi_gbl_osi_data but
that seems undesirable.

This allows video_detect.c to be build as a module, besides that
acpi_osi_is_win8() is something which does not really belong in
video_detect.c in the first place.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 7cb3b0bc4a7e..913a1c19818a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -266,6 +266,7 @@ static inline int acpi_video_backlight_support(void)
 extern int acpi_blacklisted(void);
 extern void acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d);
 extern void acpi_osi_setup(char *str);
+extern bool acpi_osi_is_win8(void);
 
 #ifdef CONFIG_ACPI_NUMA
 int acpi_get_node(acpi_handle handle);
-- 
cgit v1.2.3


From 14ca7a47d0ab2a7a35faab130e6d9682f8ff1a46 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 16 Jun 2015 16:27:47 +0200
Subject: acpi-video-detect: video: Make video_detect code part of the video
 module

This is a preparation patch for the backlight interface selection logic
cleanup, there are 2 reasons to not always build the video_detect code
into the kernel:

1) In order for the video_detect.c to also deal with / select native
backlight interfaces on win8 systems, instead of doing this in video.c
where it does not belong, video_detect.c needs to call into the backlight
class code. Which cannot be done if it is builtin and the blacklight class
is not.

2) Currently all the platform/x86 drivers which have quirks to prefer
the vendor driver over acpi-video call acpi_video_unregister_backlight()
to remove the acpi-video backlight interface, this logic really belongs
in video_detect.c, which will cause video_detect.c to depend on symbols of
video.c and video.c already depends on video_detect.c symbols, so they
really need to be a single module.

Note that this commits make 2 changes so as to maintain 100% kernel
commandline compatibility:

1) The __setup call for the acpi_backlight= handling is moved to
   acpi/util.c as __setup may only be used by code which is alwasy builtin
2) video.c is renamed to acpi_video.c so that it can be combined with
   video_detect.c into video.ko

This commit also makes changes to drivers/platform/x86/Kconfig to ensure
that drivers which use acpi_video_backlight_support() from video_detect.c,
will not be built-in when acpi_video is not built in. This also changes
some "select" uses to "depends on" to avoid dependency loops.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 913a1c19818a..f097c0a2718e 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -243,6 +243,7 @@ extern bool wmi_has_guid(const char *guid);
 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VENDOR		0x0400
 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VIDEO		0x0800
 
+extern char acpi_video_backlight_string[];
 extern long acpi_is_video_device(acpi_handle handle);
 
 #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE)
-- 
cgit v1.2.3


From d0a530ba424ec1be7630f7fce2db9860b9429b8f Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 16 Jun 2015 16:28:12 +0200
Subject: acpi-video-detect: Remove old API

Remove the old backlight interface selection API now that all drivers
have been ported to the new API.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 19 -------------------
 1 file changed, 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index f097c0a2718e..5966d1d9280e 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -245,25 +245,6 @@ extern bool wmi_has_guid(const char *guid);
 
 extern char acpi_video_backlight_string[];
 extern long acpi_is_video_device(acpi_handle handle);
-
-#if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE)
-
-extern void acpi_video_dmi_promote_vendor(void);
-extern int acpi_video_backlight_support(void);
-
-#else
-
-static inline void acpi_video_dmi_promote_vendor(void)
-{
-}
-
-static inline int acpi_video_backlight_support(void)
-{
-	return 0;
-}
-
-#endif /* defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) */
-
 extern int acpi_blacklisted(void);
 extern void acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d);
 extern void acpi_osi_setup(char *str);
-- 
cgit v1.2.3


From edf18b9108f5025f9e83b2c167c9122954acbc62 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 18 Jun 2015 14:00:48 +0800
Subject: crypto: api - Add CRYPTO_MINALIGN_ATTR to struct crypto_alg

The struct crypto_alg is embedded into various type-specific structs
such as aead_alg.  This is then used as part of instances such as
struct aead_instance.  It is also embedded into the generic struct
crypto_instance.  In order to ensure that struct aead_instance can
be converted to struct crypto_instance when necessary, we need to
ensure that crypto_alg is aligned properly.

This patch adds an alignment attribute to struct crypto_alg to
ensure this.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 0e3f71a73e3b..964e5735a6a9 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -513,7 +513,7 @@ struct crypto_alg {
 	void (*cra_destroy)(struct crypto_alg *alg);
 	
 	struct module *cra_module;
-};
+} CRYPTO_MINALIGN_ATTR;
 
 /*
  * Algorithm registration interface.
-- 
cgit v1.2.3


From 68722101ec3a0e179408a13708dd020e04f54aab Mon Sep 17 00:00:00 2001
From: George Beshers <gbeshers@sgi.com>
Date: Thu, 18 Jun 2015 10:25:13 -0500
Subject: locking/lockdep: Remove hard coded array size dependency

An apparent oversight left a hardcoded '4' in place when
LOCKSTAT_POINTS was introduced.

The contention_point[] and contending_point[] arrays in the
structs lock_class and lock_class_stats need to be the same
size for the loops in lock_stats() to be correct.

This patch allows LOCKSTAT_POINTS to be changed without
affecting the correctness of the code.

Signed-off-by: George Beshers <gbeshers@sgi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lockdep.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 066ba4157541..2722111591a3 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -130,8 +130,8 @@ enum bounce_type {
 };
 
 struct lock_class_stats {
-	unsigned long			contention_point[4];
-	unsigned long			contending_point[4];
+	unsigned long			contention_point[LOCKSTAT_POINTS];
+	unsigned long			contending_point[LOCKSTAT_POINTS];
 	struct lock_time		read_waittime;
 	struct lock_time		write_waittime;
 	struct lock_time		read_holdtime;
-- 
cgit v1.2.3


From b17718d02f54b90978d0e0146368b512b11c3e84 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 5 Jun 2015 17:30:23 +0200
Subject: sched/stop_machine: Fix deadlock between multiple stop_two_cpus()

Jiri reported a machine stuck in multi_cpu_stop() with
migrate_swap_stop() as function and with the following src,dst cpu
pairs: {11,  4} {13, 11} { 4, 13}

                        4       11      13

cpuM: queue(4 ,13)
                        *Ma
cpuN: queue(13,11)
                                *N      Na
                        *M              Mb
cpuO: queue(11, 4)
                        *O      Oa
                                *Nb
                        *Ob

Where *X denotes the cpu running the queueing of cpu-X and X[ab] denotes
the first/second queued work.

You'll observe the top of the workqueue for each cpu: 4,11,13 to be work
from cpus: M, O, N resp. IOW. deadlock.

Do away with the queueing trickery and introduce lg_double_lock() to
lock both CPUs and fully serialize the stop_two_cpus() callers instead
of the partial (and buggy) serialization we have now.

Reported-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150605153023.GH19282@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lglock.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index 0081f000e34b..c92ebd100d9b 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -52,10 +52,15 @@ struct lglock {
 	static struct lglock name = { .lock = &name ## _lock }
 
 void lg_lock_init(struct lglock *lg, char *name);
+
 void lg_local_lock(struct lglock *lg);
 void lg_local_unlock(struct lglock *lg);
 void lg_local_lock_cpu(struct lglock *lg, int cpu);
 void lg_local_unlock_cpu(struct lglock *lg, int cpu);
+
+void lg_double_lock(struct lglock *lg, int cpu1, int cpu2);
+void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2);
+
 void lg_global_lock(struct lglock *lg);
 void lg_global_unlock(struct lglock *lg);
 
-- 
cgit v1.2.3


From 1dabbcec2c0a36fe43509d06499b9e512e70a028 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 26 May 2015 22:50:28 +0000
Subject: timer: Use hlist for the timer wheel hash buckets

This reduces the size of struct tvec_base by 50% and results in
slightly smaller code as well.

Before:
   struct tvec_base: size: 8256, cachelines: 129

   text	   data	    bss	    dec	    hex	filename
  17698	  13297	   8256	  39251	   9953	../build/kernel/time/timer.o

After:
  struct tvec_base: 4160, cachelines: 65

   text	   data	    bss	    dec	    hex	filename
  17491	   9201	   4160	  30852	   7884	../build/kernel/time/timer.o

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Joonwoo Park <joonwoop@codeaurora.org>
Cc: Wenbo Wang <wenbo.wang@memblaze.com>
Link: http://lkml.kernel.org/r/20150526224511.854731214@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timer.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index fbb80e0030bf..064ee24d3f38 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -14,7 +14,7 @@ struct timer_list {
 	 * All fields that change during normal runtime grouped to the
 	 * same cacheline
 	 */
-	struct list_head entry;
+	struct hlist_node entry;
 	unsigned long expires;
 	struct tvec_base *base;
 
@@ -71,7 +71,7 @@ extern struct tvec_base boot_tvec_bases;
 #define TIMER_FLAG_MASK			0x3LU
 
 #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \
-		.entry = { .prev = TIMER_ENTRY_STATIC },	\
+		.entry = { .next = TIMER_ENTRY_STATIC },	\
 		.function = (_function),			\
 		.expires = (_expires),				\
 		.data = (_data),				\
@@ -168,7 +168,7 @@ static inline void init_timer_on_stack_key(struct timer_list *timer,
  */
 static inline int timer_pending(const struct timer_list * timer)
 {
-	return timer->entry.next != NULL;
+	return timer->entry.pprev != NULL;
 }
 
 extern void add_timer_on(struct timer_list *timer, int cpu);
-- 
cgit v1.2.3


From 0eeda71bc30d74f66f8231f45621d5ace3419186 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 26 May 2015 22:50:29 +0000
Subject: timer: Replace timer base by a cpu index

Instead of storing a pointer to the per cpu tvec_base we can simply
cache a CPU index in the timer_list and use that to get hold of the
correct per cpu tvec_base. This is only used in lock_timer_base() and
the slightly larger code is peanuts versus the spinlock operation and
the d-cache foot print of the timer wheel.

Aside of that this allows to get rid of following nuisances:

 - boot_tvec_base

   That statically allocated 4k bss data is just kept around so the
   timer has a home when it gets statically initialized. It serves no
   other purpose.

   With the CPU index we assign the timer to CPU0 at static
   initialization time and therefor can avoid the whole boot_tvec_base
   dance.  That also simplifies the init code, which just can use the
   per cpu base.

   Before:
     text	   data	    bss	    dec	    hex	filename
    17491	   9201	   4160	  30852	   7884	../build/kernel/time/timer.o
   After:
     text	   data	    bss	    dec	    hex	filename
    17440	   9193	      0	  26633	   6809	../build/kernel/time/timer.o

 - Overloading the base pointer with various flags

   The CPU index has enough space to hold the flags (deferrable,
   irqsafe) so we can get rid of the extra masking and bit fiddling
   with the base pointer.

As a benefit we reduce the size of struct timer_list on 64 bit
machines. 4 - 8 bytes, a size reduction up to 15% per struct timer_list,
which is a real win as we have tons of them embedded in other structs.

This changes also the newly added deferrable printout of the timer
start trace point to capture and print all timer->flags, which allows
us to decode the target cpu of the timer as well.

We might have used bitfields for this, but that would change the
static initializers and the init function for no value to accomodate
big endian bitfields.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Joonwoo Park <joonwoop@codeaurora.org>
Cc: Wenbo Wang <wenbo.wang@memblaze.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Badhri Jagan Sridharan <Badhri@google.com>
Link: http://lkml.kernel.org/r/20150526224511.950084301@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timer.h | 38 ++++++++++++++++----------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 064ee24d3f38..4a0d52bc2073 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -14,27 +14,23 @@ struct timer_list {
 	 * All fields that change during normal runtime grouped to the
 	 * same cacheline
 	 */
-	struct hlist_node entry;
-	unsigned long expires;
-	struct tvec_base *base;
-
-	void (*function)(unsigned long);
-	unsigned long data;
-
-	int slack;
+	struct hlist_node	entry;
+	unsigned long		expires;
+	void			(*function)(unsigned long);
+	unsigned long		data;
+	u32			flags;
+	int			slack;
 
 #ifdef CONFIG_TIMER_STATS
-	int start_pid;
-	void *start_site;
-	char start_comm[16];
+	int			start_pid;
+	void			*start_site;
+	char			start_comm[16];
 #endif
 #ifdef CONFIG_LOCKDEP
-	struct lockdep_map lockdep_map;
+	struct lockdep_map	lockdep_map;
 #endif
 };
 
-extern struct tvec_base boot_tvec_bases;
-
 #ifdef CONFIG_LOCKDEP
 /*
  * NB: because we have to copy the lockdep_map, setting the lockdep_map key
@@ -49,9 +45,6 @@ extern struct tvec_base boot_tvec_bases;
 #endif
 
 /*
- * Note that all tvec_bases are at least 4 byte aligned and lower two bits
- * of base in timer_list is guaranteed to be zero. Use them for flags.
- *
  * A deferrable timer will work normally when the system is busy, but
  * will not cause a CPU to come out of idle just to service it; instead,
  * the timer will be serviced when the CPU eventually wakes up with a
@@ -65,17 +58,18 @@ extern struct tvec_base boot_tvec_bases;
  * workqueue locking issues. It's not meant for executing random crap
  * with interrupts disabled. Abuse is monitored!
  */
-#define TIMER_DEFERRABLE		0x1LU
-#define TIMER_IRQSAFE			0x2LU
-
-#define TIMER_FLAG_MASK			0x3LU
+#define TIMER_CPUMASK		0x0007FFFF
+#define TIMER_MIGRATING		0x00080000
+#define TIMER_BASEMASK		(TIMER_CPUMASK | TIMER_MIGRATING)
+#define TIMER_DEFERRABLE	0x00100000
+#define TIMER_IRQSAFE		0x00200000
 
 #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \
 		.entry = { .next = TIMER_ENTRY_STATIC },	\
 		.function = (_function),			\
 		.expires = (_expires),				\
 		.data = (_data),				\
-		.base = (void *)((unsigned long)&boot_tvec_bases + (_flags)), \
+		.flags = (_flags),				\
 		.slack = -1,					\
 		__TIMER_LOCKDEP_MAP_INITIALIZER(		\
 			__FILE__ ":" __stringify(__LINE__))	\
-- 
cgit v1.2.3


From c74441a17eb975b604e339ca6c11b9ab9aaca11f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 26 May 2015 22:50:31 +0000
Subject: timer: Stats: Simplify the flags handling

Simplify the handling of the flag storage for the timer statistics. No
intermediate storage anymore. Just hand over the flags field.

I left the printout of 'deferrable' for now because changing this
would be an ABI update and I have no idea how strong people feel about
that. OTOH, I wonder whether we should kill the whole timer stats
stuff because all of that information can be retrieved via ftrace/perf
as well.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Joonwoo Park <joonwoop@codeaurora.org>
Cc: Wenbo Wang <wenbo.wang@memblaze.com>
Link: http://lkml.kernel.org/r/20150526224512.046626248@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timer.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 4a0d52bc2073..ff0689b6e297 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -188,13 +188,10 @@ extern void set_timer_slack(struct timer_list *time, int slack_hz);
 
 extern int timer_stats_active;
 
-#define TIMER_STATS_FLAG_DEFERRABLE	0x1
-
 extern void init_timer_stats(void);
 
 extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
-				     void *timerf, char *comm,
-				     unsigned int timer_flag);
+				     void *timerf, char *comm, u32 flags);
 
 extern void __timer_stats_timer_set_start_info(struct timer_list *timer,
 					       void *addr);
-- 
cgit v1.2.3


From bc7a34b8b9ebfb0f4b8a35a72a0b134fd6c5ef50 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 26 May 2015 22:50:33 +0000
Subject: timer: Reduce timer migration overhead if disabled

Eric reported that the timer_migration sysctl is not really nice
performance wise as it needs to check at every timer insertion whether
the feature is enabled or not. Further the check does not live in the
timer code, so we have an extra function call which checks an extra
cache line to figure out that it is disabled.

We can do better and store that information in the per cpu (hr)timer
bases. I pondered to use a static key, but that's a nightmare to
update from the nohz code and the timer base cache line is hot anyway
when we select a timer base.

The old logic enabled the timer migration unconditionally if
CONFIG_NO_HZ was set even if nohz was disabled on the kernel command
line.

With this modification, we start off with migration disabled. The user
visible sysctl is still set to enabled. If the kernel switches to NOHZ
migration is enabled, if the user did not disable it via the sysctl
prior to the switch. If nohz=off is on the kernel command line,
migration stays disabled no matter what.

Before:
  47.76%  hog       [.] main
  14.84%  [kernel]  [k] _raw_spin_lock_irqsave
   9.55%  [kernel]  [k] _raw_spin_unlock_irqrestore
   6.71%  [kernel]  [k] mod_timer
   6.24%  [kernel]  [k] lock_timer_base.isra.38
   3.76%  [kernel]  [k] detach_if_pending
   3.71%  [kernel]  [k] del_timer
   2.50%  [kernel]  [k] internal_add_timer
   1.51%  [kernel]  [k] get_nohz_timer_target
   1.28%  [kernel]  [k] __internal_add_timer
   0.78%  [kernel]  [k] timerfn
   0.48%  [kernel]  [k] wake_up_nohz_cpu

After:
  48.10%  hog       [.] main
  15.25%  [kernel]  [k] _raw_spin_lock_irqsave
   9.76%  [kernel]  [k] _raw_spin_unlock_irqrestore
   6.50%  [kernel]  [k] mod_timer
   6.44%  [kernel]  [k] lock_timer_base.isra.38
   3.87%  [kernel]  [k] detach_if_pending
   3.80%  [kernel]  [k] del_timer
   2.67%  [kernel]  [k] internal_add_timer
   1.33%  [kernel]  [k] __internal_add_timer
   0.73%  [kernel]  [k] timerfn
   0.54%  [kernel]  [k] wake_up_nohz_cpu


Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Joonwoo Park <joonwoop@codeaurora.org>
Cc: Wenbo Wang <wenbo.wang@memblaze.com>
Link: http://lkml.kernel.org/r/20150526224512.127050787@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h      |  2 ++
 include/linux/sched.h        |  6 +-----
 include/linux/sched/sysctl.h | 12 ------------
 include/linux/timer.h        |  9 +++++++++
 4 files changed, 12 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 5db055821ef3..69551020bb97 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -163,6 +163,7 @@ enum  hrtimer_base_type {
  * @cpu:		cpu number
  * @active_bases:	Bitfield to mark bases with active timers
  * @clock_was_set_seq:	Sequence counter of clock was set events
+ * @migration_enabled:	The migration of hrtimers to other cpus is enabled
  * @expires_next:	absolute time of the next event which was scheduled
  *			via clock_set_next_event()
  * @next_timer:		Pointer to the first expiring timer
@@ -186,6 +187,7 @@ struct hrtimer_cpu_base {
 	unsigned int			cpu;
 	unsigned int			active_bases;
 	unsigned int			clock_was_set_seq;
+	bool				migration_enabled;
 #ifdef CONFIG_HIGH_RES_TIMERS
 	unsigned int			in_hrtirq	: 1,
 					hres_active	: 1,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26a2e6122734..d7151460b0cf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -335,14 +335,10 @@ extern int runqueue_is_locked(int cpu);
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 extern void nohz_balance_enter_idle(int cpu);
 extern void set_cpu_sd_state_idle(void);
-extern int get_nohz_timer_target(int pinned);
+extern int get_nohz_timer_target(void);
 #else
 static inline void nohz_balance_enter_idle(int cpu) { }
 static inline void set_cpu_sd_state_idle(void) { }
-static inline int get_nohz_timer_target(int pinned)
-{
-	return smp_processor_id();
-}
 #endif
 
 /*
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 596a0e007c62..c9e4731cf10b 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -57,24 +57,12 @@ extern unsigned int sysctl_numa_balancing_scan_size;
 extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
 extern unsigned int sysctl_sched_time_avg;
-extern unsigned int sysctl_timer_migration;
 extern unsigned int sysctl_sched_shares_window;
 
 int sched_proc_update_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *length,
 		loff_t *ppos);
 #endif
-#ifdef CONFIG_SCHED_DEBUG
-static inline unsigned int get_sysctl_timer_migration(void)
-{
-	return sysctl_timer_migration;
-}
-#else
-static inline unsigned int get_sysctl_timer_migration(void)
-{
-	return 1;
-}
-#endif
 
 /*
  *  control realtime throttling:
diff --git a/include/linux/timer.h b/include/linux/timer.h
index ff0689b6e297..61aa61dc410c 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -238,6 +238,15 @@ extern void run_local_timers(void);
 struct hrtimer;
 extern enum hrtimer_restart it_real_fn(struct hrtimer *);
 
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+#include <linux/sysctl.h>
+
+extern unsigned int sysctl_timer_migration;
+int timer_migration_handler(struct ctl_table *table, int write,
+			    void __user *buffer, size_t *lenp,
+			    loff_t *ppos);
+#endif
+
 unsigned long __round_jiffies(unsigned long j, int cpu);
 unsigned long __round_jiffies_relative(unsigned long j, int cpu);
 unsigned long round_jiffies(unsigned long j);
-- 
cgit v1.2.3


From 683be13a284720205228e29207ef11a1c3c322b9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 26 May 2015 22:50:35 +0000
Subject: timer: Minimize nohz off overhead

If nohz is disabled on the kernel command line the [hr]timer code
still calls wake_up_nohz_cpu() and tick_nohz_full_cpu(), a pretty
pointless exercise. Cache nohz_active in [hr]timer per cpu bases and
avoid the overhead.

Before:
  48.10%  hog       [.] main
  15.25%  [kernel]  [k] _raw_spin_lock_irqsave
   9.76%  [kernel]  [k] _raw_spin_unlock_irqrestore
   6.50%  [kernel]  [k] mod_timer
   6.44%  [kernel]  [k] lock_timer_base.isra.38
   3.87%  [kernel]  [k] detach_if_pending
   3.80%  [kernel]  [k] del_timer
   2.67%  [kernel]  [k] internal_add_timer
   1.33%  [kernel]  [k] __internal_add_timer
   0.73%  [kernel]  [k] timerfn
   0.54%  [kernel]  [k] wake_up_nohz_cpu

After:
  48.73%  hog       [.] main
  15.36%  [kernel]  [k] _raw_spin_lock_irqsave
   9.77%  [kernel]  [k] _raw_spin_unlock_irqrestore
   6.61%  [kernel]  [k] lock_timer_base.isra.38
   6.42%  [kernel]  [k] mod_timer
   3.90%  [kernel]  [k] detach_if_pending
   3.76%  [kernel]  [k] del_timer
   2.41%  [kernel]  [k] internal_add_timer
   1.39%  [kernel]  [k] __internal_add_timer
   0.76%  [kernel]  [k] timerfn

We probably should have a cached value for nohz full in the per cpu
bases as well to avoid the cpumask check. The base cache line is hot
already, the cpumask not necessarily.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Joonwoo Park <joonwoop@codeaurora.org>
Cc: Wenbo Wang <wenbo.wang@memblaze.com>
Link: http://lkml.kernel.org/r/20150526224512.207378134@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 69551020bb97..76dd4f0da5ca 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -164,6 +164,7 @@ enum  hrtimer_base_type {
  * @active_bases:	Bitfield to mark bases with active timers
  * @clock_was_set_seq:	Sequence counter of clock was set events
  * @migration_enabled:	The migration of hrtimers to other cpus is enabled
+ * @nohz_active:	The nohz functionality is enabled
  * @expires_next:	absolute time of the next event which was scheduled
  *			via clock_set_next_event()
  * @next_timer:		Pointer to the first expiring timer
@@ -188,6 +189,7 @@ struct hrtimer_cpu_base {
 	unsigned int			active_bases;
 	unsigned int			clock_was_set_seq;
 	bool				migration_enabled;
+	bool				nohz_active;
 #ifdef CONFIG_HIGH_RES_TIMERS
 	unsigned int			in_hrtirq	: 1,
 					hres_active	: 1,
-- 
cgit v1.2.3


From 1796dcce2daacc125f2d60afc3f631ca29e36684 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski.k@gmail.com>
Date: Sun, 3 May 2015 18:57:10 +0900
Subject: rtc: interface: Fix coding style violations

Fix issues reported by checkpatch:
  ERROR: open brace '{' following struct go on the same line
  ERROR: "foo* bar" should be "foo *bar"
Additionally adjust alignment of wrapped function arguments.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski.k@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 include/linux/rtc.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 8dcf6825fa88..b0709f80dfb0 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -101,8 +101,7 @@ struct rtc_timer {
 /* flags */
 #define RTC_DEV_BUSY 0
 
-struct rtc_device
-{
+struct rtc_device {
 	struct device dev;
 	struct module *owner;
 
@@ -198,10 +197,10 @@ int rtc_register(rtc_task_t *task);
 int rtc_unregister(rtc_task_t *task);
 int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg);
 
-void rtc_timer_init(struct rtc_timer *timer, void (*f)(void* p), void* data);
-int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer* timer,
-			ktime_t expires, ktime_t period);
-int rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer* timer);
+void rtc_timer_init(struct rtc_timer *timer, void (*f)(void *p), void *data);
+int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer *timer,
+		    ktime_t expires, ktime_t period);
+int rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer *timer);
 void rtc_timer_do_work(struct work_struct *work);
 
 static inline bool is_leap_year(unsigned int year)
-- 
cgit v1.2.3


From 73744a64aab872703b851b9678a7f488b507eb81 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski.k@gmail.com>
Date: Sun, 3 May 2015 18:57:11 +0900
Subject: rtc: interface: Remove unused return value from rtc_timer_cancel()

The rtc_timer_cancel() always returns 0 and cannot fail (calls only
other void-returning functions).

Signed-off-by: Krzysztof Kozlowski <k.kozlowski.k@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 include/linux/rtc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index b0709f80dfb0..587017e7939c 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -200,7 +200,7 @@ int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg);
 void rtc_timer_init(struct rtc_timer *timer, void (*f)(void *p), void *data);
 int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer *timer,
 		    ktime_t expires, ktime_t period);
-int rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer *timer);
+void rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer *timer);
 void rtc_timer_do_work(struct work_struct *work);
 
 static inline bool is_leap_year(unsigned int year)
-- 
cgit v1.2.3


From 1387fe7d292b66677dae31d25a8e3c953bf21748 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <zajec5@gmail.com>
Date: Tue, 12 May 2015 11:31:02 +0200
Subject: MIPS: BCM47xx: Extract all boardflags to new u32 fields
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For years we planned to get rid of old u16 fields, let's start doing it
with MIPS code. This process will take some time, it requires doing the
same in ssb/bcma and then switching all drivers to new fields. This will
be handled in separated patches submitted to appropriate trees.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: Hauke Mehrtens <hauke@hauke-m.de>
Patchwork: https://patchwork.linux-mips.org/patch/10026/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/linux/ssb/ssb.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 4568a5cc9ab8..ee90e32a0607 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -88,11 +88,14 @@ struct ssb_sprom {
 	u32 ofdm5glpo;		/* 5.2GHz OFDM power offset */
 	u32 ofdm5gpo;		/* 5.3GHz OFDM power offset */
 	u32 ofdm5ghpo;		/* 5.8GHz OFDM power offset */
+	u32 boardflags;
+	u32 boardflags2;
+	u32 boardflags3;
+	/* TODO: Switch all drivers to new u32 fields and drop below ones */
 	u16 boardflags_lo;	/* Board flags (bits 0-15) */
 	u16 boardflags_hi;	/* Board flags (bits 16-31) */
 	u16 boardflags2_lo;	/* Board flags (bits 32-47) */
 	u16 boardflags2_hi;	/* Board flags (bits 48-63) */
-	/* TODO store board flags in a single u64 */
 
 	struct ssb_sprom_core_pwr_info core_pwr_info[4];
 
-- 
cgit v1.2.3


From 6e122ac0053d071976686dd04cdd60ea8039bb7a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <zajec5@gmail.com>
Date: Tue, 12 May 2015 11:54:48 +0200
Subject: MIPS: BCM47xx: Extract info about et2 interface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New devices may have more than 1 Ethernet core (device). We should
extract info about them to make it available to Ethernet drivers.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: Hauke Mehrtens <hauke@hauke-m.de>
Cc: Hante Meuleman <meuleman@broadcom.com>
Cc: Ian Kent <raven@themaw.net>
Patchwork: https://patchwork.linux-mips.org/patch/10027/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/linux/ssb/ssb.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index ee90e32a0607..c3d1a525bacc 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -29,10 +29,13 @@ struct ssb_sprom {
 	u8 il0mac[6] __aligned(sizeof(u16));	/* MAC address for 802.11b/g */
 	u8 et0mac[6] __aligned(sizeof(u16));	/* MAC address for Ethernet */
 	u8 et1mac[6] __aligned(sizeof(u16));	/* MAC address for 802.11a */
+	u8 et2mac[6] __aligned(sizeof(u16));	/* MAC address for extra Ethernet */
 	u8 et0phyaddr;		/* MII address for enet0 */
 	u8 et1phyaddr;		/* MII address for enet1 */
+	u8 et2phyaddr;		/* MII address for enet2 */
 	u8 et0mdcport;		/* MDIO for enet0 */
 	u8 et1mdcport;		/* MDIO for enet1 */
+	u8 et2mdcport;		/* MDIO for enet2 */
 	u16 dev_id;		/* Device ID overriding e.g. PCI ID */
 	u16 board_rev;		/* Board revision number from SPROM. */
 	u16 board_num;		/* Board number from SPROM. */
-- 
cgit v1.2.3


From 44e08e7099c8de226606cfc989b45d6fa27f507f Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Sun, 24 May 2015 16:11:31 +0100
Subject: MIPS/IRQCHIP: Move Ingenic SoC intc driver to drivers/irqchip

Move the driver for Ingenic SoC interrupt controllers into
drivers/irqchip where it belongs.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Cc: Brian Norris <computersforpeace@gmail.com>
Patchwork: https://patchwork.linux-mips.org/patch/10147/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/linux/irqchip/ingenic.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 include/linux/irqchip/ingenic.h

(limited to 'include/linux')

diff --git a/include/linux/irqchip/ingenic.h b/include/linux/irqchip/ingenic.h
new file mode 100644
index 000000000000..0ee319a4029d
--- /dev/null
+++ b/include/linux/irqchip/ingenic.h
@@ -0,0 +1,23 @@
+/*
+ *  Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under  the terms of the GNU General	 Public License as published by the
+ *  Free Software Foundation;  either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __LINUX_IRQCHIP_INGENIC_H__
+#define __LINUX_IRQCHIP_INGENIC_H__
+
+#include <linux/irq.h>
+
+extern void ingenic_intc_irq_suspend(struct irq_data *data);
+extern void ingenic_intc_irq_resume(struct irq_data *data);
+
+#endif
-- 
cgit v1.2.3


From 55cab93bcf1422ab4298edc65c349c4304b4884e Mon Sep 17 00:00:00 2001
From: Hante Meuleman <meuleman@broadcom.com>
Date: Thu, 21 May 2015 15:27:23 +0200
Subject: mips: bcm47xx: allow retrieval of complete nvram contents

Host platforms such as routers supported by OpenWrt can
support NVRAM reading directly from internal NVRAM store.
The brcmfmac for one requires the complete nvram contents
to select what needs to be sent to wireless device.

Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Hante Meuleman <meuleman@broadcom.com>
Reviewed-by: Arend Van Spriel <arend@broadcom.com>
Reviewed-by: Franky (Zhenhui) Lin <frankyl@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieterpg@broadcom.com>
Reviewed-by: Daniel (Deognyoun) Kim <dekim@broadcom.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/10093/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/linux/bcm47xx_nvram.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bcm47xx_nvram.h b/include/linux/bcm47xx_nvram.h
index b12b07e75929..c73927c66c2c 100644
--- a/include/linux/bcm47xx_nvram.h
+++ b/include/linux/bcm47xx_nvram.h
@@ -10,11 +10,17 @@
 
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/vmalloc.h>
 
 #ifdef CONFIG_BCM47XX
 int bcm47xx_nvram_init_from_mem(u32 base, u32 lim);
 int bcm47xx_nvram_getenv(const char *name, char *val, size_t val_len);
 int bcm47xx_nvram_gpio_pin(const char *name);
+char *bcm47xx_nvram_get_contents(size_t *val_len);
+static inline void bcm47xx_nvram_release_contents(char *nvram)
+{
+	vfree(nvram);
+};
 #else
 static inline int bcm47xx_nvram_init_from_mem(u32 base, u32 lim)
 {
@@ -29,6 +35,15 @@ static inline int bcm47xx_nvram_gpio_pin(const char *name)
 {
 	return -ENOTSUPP;
 };
+
+static inline char *bcm47xx_nvram_get_contents(size_t *val_len)
+{
+	return NULL;
+};
+
+static inline void bcm47xx_nvram_release_contents(char *nvram)
+{
+};
 #endif
 
 #endif /* __BCM47XX_NVRAM_H */
-- 
cgit v1.2.3


From 2ddf3a792218cddd30140b1f8b32cb6e2d67921f Mon Sep 17 00:00:00 2001
From: Alban Bedel <albeu@free.fr>
Date: Sun, 31 May 2015 02:18:24 +0200
Subject: MIPS: ath79: Add OF support to the GPIO driver

Replace the simple GPIO chip registration by a platform driver
and make ath79_gpio_init() just register the device.

Signed-off-by: Alban Bedel <albeu@free.fr>
Cc: linux-mips@linux-mips.org
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/linux/platform_data/gpio-ath79.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 include/linux/platform_data/gpio-ath79.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/gpio-ath79.h b/include/linux/platform_data/gpio-ath79.h
new file mode 100644
index 000000000000..88b0db7bee74
--- /dev/null
+++ b/include/linux/platform_data/gpio-ath79.h
@@ -0,0 +1,19 @@
+/*
+ *  Atheros AR7XXX/AR9XXX GPIO controller platform data
+ *
+ * Copyright (C) 2015 Alban Bedel <albeu@free.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_PLATFORM_DATA_GPIO_ATH79_H
+#define __LINUX_PLATFORM_DATA_GPIO_ATH79_H
+
+struct ath79_gpio_platform_data {
+	unsigned ngpios;
+	bool oe_inverted;
+};
+
+#endif
-- 
cgit v1.2.3


From f6e734a8c162297953d7bfc0f3f6bf4f8c33d72f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <zajec5@gmail.com>
Date: Wed, 10 Jun 2015 23:05:08 +0200
Subject: MIPS: BCM47xx: Move NVRAM driver to the drivers/firmware/
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After Broadcom switched from MIPS to ARM for their home routers we need
to have NVRAM driver in some common place (not arch/mips/). As explained
in Kconfig, this driver is responsible for parsing SoC configuration
data that is passed to the kernel in flash from the bootloader firmware
called "CFE".

We were thinking about putting it in bus directory, however there are
two possible buses for MIPS: drivers/ssb/ and drivers/bcma/. So this
won't fit there and this is why I would like to move this driver to the
drivers/firmware/.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Reviewed-by: Paul Walmsley <paul@pwsan.com>
Cc: linux-mips@linux-mips.org
Cc: Hauke Mehrtens <hauke@hauke-m.de>
Cc: Seiji Aguchi <seiji.aguchi@hds.com>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Mike Waychison <mikew@google.com>
Cc: Roy Franz <roy.franz@linaro.org>
Cc: Matt Fleming <matt.fleming@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Patchwork: https://patchwork.linux-mips.org/patch/10544/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/linux/bcm47xx_nvram.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bcm47xx_nvram.h b/include/linux/bcm47xx_nvram.h
index c73927c66c2c..2793652fbf66 100644
--- a/include/linux/bcm47xx_nvram.h
+++ b/include/linux/bcm47xx_nvram.h
@@ -12,7 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/vmalloc.h>
 
-#ifdef CONFIG_BCM47XX
+#ifdef CONFIG_BCM47XX_NVRAM
 int bcm47xx_nvram_init_from_mem(u32 base, u32 lim);
 int bcm47xx_nvram_getenv(const char *name, char *val, size_t val_len);
 int bcm47xx_nvram_gpio_pin(const char *name);
-- 
cgit v1.2.3


From d0497524658e37956737d7dbee73cc42120255dc Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 21 Jun 2015 19:11:44 +0800
Subject: crypto: user - Move cryptouser.h to uapi

The header file cryptouser.h only contains information that is
exported to user-space.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/cryptouser.h | 110 ---------------------------------------------
 1 file changed, 110 deletions(-)
 delete mode 100644 include/linux/cryptouser.h

(limited to 'include/linux')

diff --git a/include/linux/cryptouser.h b/include/linux/cryptouser.h
deleted file mode 100644
index 36efbbbf2f83..000000000000
--- a/include/linux/cryptouser.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Crypto user configuration API.
- *
- * Copyright (C) 2011 secunet Security Networks AG
- * Copyright (C) 2011 Steffen Klassert <steffen.klassert@secunet.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-/* Netlink configuration messages.  */
-enum {
-	CRYPTO_MSG_BASE = 0x10,
-	CRYPTO_MSG_NEWALG = 0x10,
-	CRYPTO_MSG_DELALG,
-	CRYPTO_MSG_UPDATEALG,
-	CRYPTO_MSG_GETALG,
-	__CRYPTO_MSG_MAX
-};
-#define CRYPTO_MSG_MAX (__CRYPTO_MSG_MAX - 1)
-#define CRYPTO_NR_MSGTYPES (CRYPTO_MSG_MAX + 1 - CRYPTO_MSG_BASE)
-
-#define CRYPTO_MAX_NAME CRYPTO_MAX_ALG_NAME
-
-/* Netlink message attributes.  */
-enum crypto_attr_type_t {
-	CRYPTOCFGA_UNSPEC,
-	CRYPTOCFGA_PRIORITY_VAL,	/* __u32 */
-	CRYPTOCFGA_REPORT_LARVAL,	/* struct crypto_report_larval */
-	CRYPTOCFGA_REPORT_HASH,		/* struct crypto_report_hash */
-	CRYPTOCFGA_REPORT_BLKCIPHER,	/* struct crypto_report_blkcipher */
-	CRYPTOCFGA_REPORT_AEAD,		/* struct crypto_report_aead */
-	CRYPTOCFGA_REPORT_COMPRESS,	/* struct crypto_report_comp */
-	CRYPTOCFGA_REPORT_RNG,		/* struct crypto_report_rng */
-	CRYPTOCFGA_REPORT_CIPHER,	/* struct crypto_report_cipher */
-	CRYPTOCFGA_REPORT_AKCIPHER,	/* struct crypto_report_akcipher */
-	__CRYPTOCFGA_MAX
-
-#define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1)
-};
-
-struct crypto_user_alg {
-	char cru_name[CRYPTO_MAX_ALG_NAME];
-	char cru_driver_name[CRYPTO_MAX_ALG_NAME];
-	char cru_module_name[CRYPTO_MAX_ALG_NAME];
-	__u32 cru_type;
-	__u32 cru_mask;
-	__u32 cru_refcnt;
-	__u32 cru_flags;
-};
-
-struct crypto_report_larval {
-	char type[CRYPTO_MAX_NAME];
-};
-
-struct crypto_report_hash {
-	char type[CRYPTO_MAX_NAME];
-	unsigned int blocksize;
-	unsigned int digestsize;
-};
-
-struct crypto_report_cipher {
-	char type[CRYPTO_MAX_ALG_NAME];
-	unsigned int blocksize;
-	unsigned int min_keysize;
-	unsigned int max_keysize;
-};
-
-struct crypto_report_blkcipher {
-	char type[CRYPTO_MAX_NAME];
-	char geniv[CRYPTO_MAX_NAME];
-	unsigned int blocksize;
-	unsigned int min_keysize;
-	unsigned int max_keysize;
-	unsigned int ivsize;
-};
-
-struct crypto_report_aead {
-	char type[CRYPTO_MAX_NAME];
-	char geniv[CRYPTO_MAX_NAME];
-	unsigned int blocksize;
-	unsigned int maxauthsize;
-	unsigned int ivsize;
-};
-
-struct crypto_report_comp {
-	char type[CRYPTO_MAX_NAME];
-};
-
-struct crypto_report_rng {
-	char type[CRYPTO_MAX_NAME];
-	unsigned int seedsize;
-};
-
-struct crypto_report_akcipher {
-	char type[CRYPTO_MAX_NAME];
-};
-
-#define CRYPTO_REPORT_MAXSIZE (sizeof(struct crypto_user_alg) + \
-			       sizeof(struct crypto_report_blkcipher))
-- 
cgit v1.2.3


From 3e90950d36f19e5477becd5acb02e9b8d8c8956f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 22 Jun 2015 10:31:40 +0800
Subject: crypto: algif_aead - Temporarily disable all AEAD algorithms

As the AEAD conversion is still ongoing, we do not yet wish to
export legacy AEAD implementations to user-space, as their calling
convention will change.

This patch actually disables all AEAD algorithms because some of
them (e.g., cryptd) will need to be modified to propagate this flag.

Subsequent patches will reenable them on an individual basis.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 964e5735a6a9..81ef938b0a8e 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -101,6 +101,12 @@
  */
 #define CRYPTO_ALG_INTERNAL		0x00002000
 
+/*
+ * Temporary flag used to prevent legacy AEAD implementations from
+ * being used by user-space.
+ */
+#define CRYPTO_ALG_AEAD_NEW		0x00004000
+
 /*
  * Transform masks and values (for crt_flags).
  */
-- 
cgit v1.2.3


From 8ccd0d0ca04147e91890c373677f1e741dda2631 Mon Sep 17 00:00:00 2001
From: Hyungwon Hwang <human.hwang@samsung.com>
Date: Fri, 12 Jun 2015 21:59:01 +0900
Subject: of: add helper for getting endpoint node of specific identifiers

When there are multiple ports or multiple endpoints in a port, they have to be
distinguished by the value of reg property. It is common. The drivers can get
the specific endpoint in the specific port via this function. Now the drivers
have to implement this code in themselves or have to force the order of dt nodes
to get the right node.

Signed-off-by: Hyungwon Hwang <human.hwang@samsung.com>
Acked-by: Rob Herring <robh+dt@kernel.org>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 include/linux/of_graph.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h
index 7bc92e050608..1c1d5b901a72 100644
--- a/include/linux/of_graph.h
+++ b/include/linux/of_graph.h
@@ -45,6 +45,8 @@ int of_graph_parse_endpoint(const struct device_node *node,
 struct device_node *of_graph_get_port_by_id(struct device_node *node, u32 id);
 struct device_node *of_graph_get_next_endpoint(const struct device_node *parent,
 					struct device_node *previous);
+struct device_node *of_graph_get_endpoint_by_regs(
+		const struct device_node *parent, int port_reg, int reg);
 struct device_node *of_graph_get_remote_port_parent(
 					const struct device_node *node);
 struct device_node *of_graph_get_remote_port(const struct device_node *node);
@@ -69,6 +71,12 @@ static inline struct device_node *of_graph_get_next_endpoint(
 	return NULL;
 }
 
+struct device_node *of_graph_get_endpoint_by_regs(
+		const struct device_node *parent, int port_reg, int reg)
+{
+	return NULL;
+}
+
 static inline struct device_node *of_graph_get_remote_port_parent(
 					const struct device_node *node)
 {
-- 
cgit v1.2.3


From 7ce7b26f84cfcbcb04f526f56f685a56ccddf355 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski.k@gmail.com>
Date: Mon, 27 Apr 2015 21:54:13 +0900
Subject: mfd: Constify regmap and irq configuration data

Constify in various drivers configuration data which is not modified:
 - regmap_irq_chip,
 - individual regmap_irq's in array,
 - regmap_config,
 - irq_domain_ops,

Signed-off-by: Krzysztof Kozlowski <k.kozlowski.k@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/da9055/core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/da9055/core.h b/include/linux/mfd/da9055/core.h
index 956afa445998..5dc743fd63a6 100644
--- a/include/linux/mfd/da9055/core.h
+++ b/include/linux/mfd/da9055/core.h
@@ -89,6 +89,6 @@ static inline int da9055_reg_update(struct da9055 *da9055, unsigned char reg,
 int da9055_device_init(struct da9055 *da9055);
 void da9055_device_exit(struct da9055 *da9055);
 
-extern struct regmap_config da9055_regmap_config;
+extern const struct regmap_config da9055_regmap_config;
 
 #endif /* __DA9055_CORE_H */
-- 
cgit v1.2.3


From 5c188d748216f67c928d67a42f14b5569b6404a5 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 27 Apr 2015 10:18:14 -0700
Subject: mfd: twl4030-power: Fix pmic for boards that need AC charger disabled

I noticed the PMIC configuration on 37xx-evm won't actually shut down
the voltages during off-idle. Turns out 37xx-evm needs the AC charger
state transitions disabled like we are doing for SDP and LDP in the
legacy booting case.

Let's fix this for device tree based booting by setting up the quirk
flag based on the compatible flag. And let's also use the existing
define for STARTON_CHG.

Note that SDP and EVM do not have the PMIC clken wired to gate the
the oscillator while LDP has.

Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/i2c/twl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 0bc03f100d04..9ad7828d9d34 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -675,6 +675,7 @@ struct twl4030_power_data {
 	struct twl4030_resconfig *board_config;
 #define TWL4030_RESCONFIG_UNDEF	((u8)-1)
 	bool use_poweroff;	/* Board is wired for TWL poweroff */
+	bool ac_charger_quirk;	/* Disable AC charger on board */
 };
 
 extern int twl4030_remove_script(u8 flags);
-- 
cgit v1.2.3


From 8be4efad81d814b607cbdad47176f426be83ba75 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Date: Wed, 6 May 2015 19:07:16 +0200
Subject: mfd: max77686: Remove unused struct max77686_opmode_data

The defined struct max77686_opmode_data isn't used neither by
the max77686 mfd driver nor the drivers for its sub-devices.

Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/max77686.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/max77686.h b/include/linux/mfd/max77686.h
index bb995ab9a575..d4b72d519115 100644
--- a/include/linux/mfd/max77686.h
+++ b/include/linux/mfd/max77686.h
@@ -125,9 +125,4 @@ enum max77686_opmode {
 	MAX77686_OPMODE_STANDBY,
 };
 
-struct max77686_opmode_data {
-	int id;
-	int mode;
-};
-
 #endif /* __LINUX_MFD_MAX77686_H */
-- 
cgit v1.2.3


From e6cb73410a6db70eab266f15b7e25053a45b842d Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Mon, 11 May 2015 13:58:09 +0100
Subject: mfd: arizona: Add better support for system suspend

Allow the chip to completely power off if we enter runtime suspend and
there is no jack detection active. This is helpful for systems where
system suspend might remove the supplies to the CODEC, without informing
us. Note the powering off is done in runtime suspend rather than system
suspend, because we need to hold reset until the first time DCVDD is
powered anyway (which would be in runtime resume), and we might as well
save the extra power.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/arizona/core.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h
index 16a498f48169..847bdaf47f1d 100644
--- a/include/linux/mfd/arizona/core.h
+++ b/include/linux/mfd/arizona/core.h
@@ -117,6 +117,7 @@ struct arizona {
 	int num_core_supplies;
 	struct regulator_bulk_data core_supplies[ARIZONA_MAX_CORE_SUPPLIES];
 	struct regulator *dcvdd;
+	bool has_fully_powered_off;
 
 	struct arizona_pdata pdata;
 
-- 
cgit v1.2.3


From fc027d138b79537e5353f3d3bad2bcaac787cd17 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.wolfsonmicro.com>
Date: Fri, 1 May 2015 16:15:12 +0100
Subject: mfd: arizona: Split INx_MODE into two fields

Later arizona silicon has the single/differential selector
in a different register, and IN1_MODE only selects between
analogue or digital. Prepare for this by splitting the
INx_MODE definition into two fields.

Signed-off-by: Richard Fitzgerald <rf@opensource.wolfsonmicro.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/arizona/pdata.h     |  5 ++++-
 include/linux/mfd/arizona/registers.h | 27 ++++++++++++++++++---------
 2 files changed, 22 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h
index 1789cb0f4f17..f6722677e6d0 100644
--- a/include/linux/mfd/arizona/pdata.h
+++ b/include/linux/mfd/arizona/pdata.h
@@ -156,7 +156,10 @@ struct arizona_pdata {
 	/** MICBIAS configurations */
 	struct arizona_micbias micbias[ARIZONA_MAX_MICBIAS];
 
-	/** Mode of input structures */
+	/**
+	 * Mode of input structures
+	 * One of the ARIZONA_INMODE_xxx values
+	 */
 	int inmode[ARIZONA_MAX_INPUT];
 
 	/** Mode for outputs */
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index aacc10d7789c..3499d36e6067 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -2515,9 +2515,12 @@
 #define ARIZONA_IN1_DMIC_SUP_MASK                0x1800  /* IN1_DMIC_SUP - [12:11] */
 #define ARIZONA_IN1_DMIC_SUP_SHIFT                   11  /* IN1_DMIC_SUP - [12:11] */
 #define ARIZONA_IN1_DMIC_SUP_WIDTH                    2  /* IN1_DMIC_SUP - [12:11] */
-#define ARIZONA_IN1_MODE_MASK                    0x0600  /* IN1_MODE - [10:9] */
-#define ARIZONA_IN1_MODE_SHIFT                        9  /* IN1_MODE - [10:9] */
-#define ARIZONA_IN1_MODE_WIDTH                        2  /* IN1_MODE - [10:9] */
+#define ARIZONA_IN1_MODE_MASK                    0x0400  /* IN1_MODE - [10] */
+#define ARIZONA_IN1_MODE_SHIFT                       10  /* IN1_MODE - [10] */
+#define ARIZONA_IN1_MODE_WIDTH                        1  /* IN1_MODE - [10] */
+#define ARIZONA_IN1_SINGLE_ENDED_MASK            0x0200  /* IN1_MODE - [9] */
+#define ARIZONA_IN1_SINGLE_ENDED_SHIFT                9  /* IN1_MODE - [9] */
+#define ARIZONA_IN1_SINGLE_ENDED_WIDTH                1  /* IN1_MODE - [9] */
 #define ARIZONA_IN1L_PGA_VOL_MASK                0x00FE  /* IN1L_PGA_VOL - [7:1] */
 #define ARIZONA_IN1L_PGA_VOL_SHIFT                    1  /* IN1L_PGA_VOL - [7:1] */
 #define ARIZONA_IN1L_PGA_VOL_WIDTH                    7  /* IN1L_PGA_VOL - [7:1] */
@@ -2588,9 +2591,12 @@
 #define ARIZONA_IN2_DMIC_SUP_MASK                0x1800  /* IN2_DMIC_SUP - [12:11] */
 #define ARIZONA_IN2_DMIC_SUP_SHIFT                   11  /* IN2_DMIC_SUP - [12:11] */
 #define ARIZONA_IN2_DMIC_SUP_WIDTH                    2  /* IN2_DMIC_SUP - [12:11] */
-#define ARIZONA_IN2_MODE_MASK                    0x0600  /* IN2_MODE - [10:9] */
-#define ARIZONA_IN2_MODE_SHIFT                        9  /* IN2_MODE - [10:9] */
-#define ARIZONA_IN2_MODE_WIDTH                        2  /* IN2_MODE - [10:9] */
+#define ARIZONA_IN2_MODE_MASK                    0x0400  /* IN2_MODE - [10] */
+#define ARIZONA_IN2_MODE_SHIFT                       10  /* IN2_MODE - [10] */
+#define ARIZONA_IN2_MODE_WIDTH                        1  /* IN2_MODE - [10] */
+#define ARIZONA_IN2_SINGLE_ENDED_MASK            0x0200  /* IN2_MODE - [9] */
+#define ARIZONA_IN2_SINGLE_ENDED_SHIFT                9  /* IN2_MODE - [9] */
+#define ARIZONA_IN2_SINGLE_ENDED_WIDTH                1  /* IN2_MODE - [9] */
 #define ARIZONA_IN2L_PGA_VOL_MASK                0x00FE  /* IN2L_PGA_VOL - [7:1] */
 #define ARIZONA_IN2L_PGA_VOL_SHIFT                    1  /* IN2L_PGA_VOL - [7:1] */
 #define ARIZONA_IN2L_PGA_VOL_WIDTH                    7  /* IN2L_PGA_VOL - [7:1] */
@@ -2661,9 +2667,12 @@
 #define ARIZONA_IN3_DMIC_SUP_MASK                0x1800  /* IN3_DMIC_SUP - [12:11] */
 #define ARIZONA_IN3_DMIC_SUP_SHIFT                   11  /* IN3_DMIC_SUP - [12:11] */
 #define ARIZONA_IN3_DMIC_SUP_WIDTH                    2  /* IN3_DMIC_SUP - [12:11] */
-#define ARIZONA_IN3_MODE_MASK                    0x0600  /* IN3_MODE - [10:9] */
-#define ARIZONA_IN3_MODE_SHIFT                        9  /* IN3_MODE - [10:9] */
-#define ARIZONA_IN3_MODE_WIDTH                        2  /* IN3_MODE - [10:9] */
+#define ARIZONA_IN3_MODE_MASK                    0x0400  /* IN3_MODE - [10] */
+#define ARIZONA_IN3_MODE_SHIFT                       10  /* IN3_MODE - [10] */
+#define ARIZONA_IN3_MODE_WIDTH                        1  /* IN3_MODE - [10] */
+#define ARIZONA_IN3_SINGLE_ENDED_MASK            0x0200  /* IN3_MODE - [9] */
+#define ARIZONA_IN3_SINGLE_ENDED_SHIFT                9  /* IN3_MODE - [9] */
+#define ARIZONA_IN3_SINGLE_ENDED_WIDTH                1  /* IN3_MODE - [9] */
 #define ARIZONA_IN3L_PGA_VOL_MASK                0x00FE  /* IN3L_PGA_VOL - [7:1] */
 #define ARIZONA_IN3L_PGA_VOL_SHIFT                    1  /* IN3L_PGA_VOL - [7:1] */
 #define ARIZONA_IN3L_PGA_VOL_WIDTH                    7  /* IN3L_PGA_VOL - [7:1] */
-- 
cgit v1.2.3


From 7e2d67e94ab05f70d2a73d00283f46778386ca52 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.wolfsonmicro.com>
Date: Wed, 13 May 2015 16:52:25 +0100
Subject: mfd: arizona: Add stub for wm5102_patch()

For the WM5102 there is a dependency in the core code on wm5102_patch()
which only exists when CONFIG_MFD_WM5102 is defined. To avoid having
to sprinkle #ifdefs around the code it is given an alternative empty
stub version when CONFIG_MFD_WM5102 is deselected

Signed-off-by: Richard Fitzgerald <rf@opensource.wolfsonmicro.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/arizona/core.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h
index 847bdaf47f1d..2f434f4f79a1 100644
--- a/include/linux/mfd/arizona/core.h
+++ b/include/linux/mfd/arizona/core.h
@@ -154,7 +154,15 @@ int arizona_request_irq(struct arizona *arizona, int irq, char *name,
 void arizona_free_irq(struct arizona *arizona, int irq, void *data);
 int arizona_set_irq_wake(struct arizona *arizona, int irq, int on);
 
+#ifdef CONFIG_MFD_WM5102
 int wm5102_patch(struct arizona *arizona);
+#else
+static inline int wm5102_patch(struct arizona *arizona)
+{
+	return 0;
+}
+#endif
+
 int wm5110_patch(struct arizona *arizona);
 int wm8997_patch(struct arizona *arizona);
 
-- 
cgit v1.2.3


From dfe816c5e37272f2f3c1311f0e9934e1b4229261 Mon Sep 17 00:00:00 2001
From: Pankaj Gupta <pagupta@redhat.com>
Date: Fri, 19 Jun 2015 19:47:53 +0530
Subject: macvtap: Increase limit of macvtap queues

Macvtap should be compatible with tuntap for
maximum number of queues.

commit 'baf71c5c1f80d82e92924050a60b5baaf97e3094 (tuntap:
Increase the number of queues in tun.)' removes
the limitations and increases number of queues in tuntap.
Now, Its safe to increase number of queues in Macvtap as well.

This patch also modifies 'macvtap_del_queues' function
to avoid extra memory allocation in stack.

Changes from v1->v2 :
Michael S. Tsirkin, Jason Wang  :
                  Better way to use linked list to
avoid use of extra memory in stack.
Sergei Shtylyov : Specify dependent commit's summary.

Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_macvlan.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 6f6929ea8a0c..a4ccc3122f93 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -29,7 +29,7 @@ struct macvtap_queue;
  * Maximum times a macvtap device can be opened. This can be used to
  * configure the number of receive queue, e.g. for multiqueue virtio.
  */
-#define MAX_MACVTAP_QUEUES	16
+#define MAX_MACVTAP_QUEUES	256
 
 #define MACVLAN_MC_FILTER_BITS	8
 #define MACVLAN_MC_FILTER_SZ	(1 << MACVLAN_MC_FILTER_BITS)
-- 
cgit v1.2.3


From 7d4f8d871ab15bd50a5771382ca2c9355b38d73c Mon Sep 17 00:00:00 2001
From: Scott Feldman <sfeldma@gmail.com>
Date: Mon, 22 Jun 2015 00:27:17 -0700
Subject: switchdev; add VLAN support for port's bridge_getlink

One more missing piece of the puzzle.  Add vlan dump support to switchdev
port's bridge_getlink.  iproute2 "bridge vlan show" cmd already knows how
to show the vlans installed on the bridge and the device , but (until now)
no one implemented the port vlan part of the netlink PF_BRIDGE:RTM_GETLINK
msg.  Before this patch, "bridge vlan show":

	$ bridge -c vlan show
	port    vlan ids
	sw1p1    30-34			<< bridge side vlans
		 57

	sw1p1				<< device side vlans (missing)

	sw1p2    57

	sw1p2

	sw1p3

	sw1p4

	br0     None

(When the port is bridged, the output repeats the vlan list for the vlans
on the bridge side of the port and the vlans on the device side of the
port.  The listing above show no vlans for the device side even though they
are installed).

After this patch:

	$ bridge -c vlan show
	port    vlan ids
	sw1p1    30-34			<< bridge side vlan
		 57

	sw1p1    30-34			<< device side vlans
		 57
		 3840 PVID

	sw1p2    57

	sw1p2    57
		 3840 PVID

	sw1p3    3842 PVID

	sw1p4    3843 PVID

	br0     None

I re-used ndo_dflt_bridge_getlink to add vlan fill call-back func.
switchdev support adds an obj dump for VLAN objects, using the same
call-back scheme as FDB dump.  Support included for both compressed and
un-compressed vlan dumps.

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index a2324fb45cf4..39adaa9529eb 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -114,5 +114,9 @@ extern int ndo_dflt_fdb_del(struct ndmsg *ndm,
 
 extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 				   struct net_device *dev, u16 mode,
-				   u32 flags, u32 mask, int nlflags);
+				   u32 flags, u32 mask, int nlflags,
+				   u32 filter_mask,
+				   int (*vlan_fill)(struct sk_buff *skb,
+						    struct net_device *dev,
+						    u32 filter_mask));
 #endif	/* __LINUX_RTNETLINK_H */
-- 
cgit v1.2.3


From cca0ba2df3d4000bacd9b7807d46ffafac62d53a Mon Sep 17 00:00:00 2001
From: Hyungwon Hwang <human.hwang@samsung.com>
Date: Thu, 28 May 2015 16:25:15 +0900
Subject: backlight: Change the return type of backlight_update_status() to int

Backlight device returns the result of update_status(), but
backlight_update_status() ignores it. So the consumers cannot confirm the
result of their function call. This patch makes the result to be returned
back for consumers.

Signed-off-by: Hyungwon Hwang <human.hwang@samsung.com>
Acked-by: Jingoo Han <jingoohan1@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/backlight.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index adb14a8616df..1e7a69adbe6f 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -117,12 +117,16 @@ struct backlight_device {
 	int use_count;
 };
 
-static inline void backlight_update_status(struct backlight_device *bd)
+static inline int backlight_update_status(struct backlight_device *bd)
 {
+	int ret = -ENOENT;
+
 	mutex_lock(&bd->update_lock);
 	if (bd->ops && bd->ops->update_status)
-		bd->ops->update_status(bd);
+		ret = bd->ops->update_status(bd);
 	mutex_unlock(&bd->update_lock);
+
+	return ret;
 }
 
 extern struct backlight_device *backlight_device_register(const char *name,
-- 
cgit v1.2.3


From ce0bdb849ad46e4b4e4cae6913b447ae9938bdcf Mon Sep 17 00:00:00 2001
From: Inki Dae <inki.dae@samsung.com>
Date: Tue, 23 Jun 2015 16:06:44 +0900
Subject: of: fix a build error to of_graph_get_endpoint_by_regs function

This patch fixes the below build error reported by Stephen,

     Stephen reported:
     After merging the drm-exynos tree, today's linux-next build (x86_64
     allmodconfig) failed like this:

     drivers/media/i2c/adv7604.o: In function `of_graph_get_endpoint_by_regs':
     adv7604.c:(.text+0x586c): multiple definition of `of_graph_get_endpoint_by_regs'
     drivers/media/i2c/adv7343.o:adv7343.c:(.text+0xa13): first defined here
     drivers/media/platform/soc_camera/atmel-isi.o: In function `of_graph_get_endpoint_by_regs':
     atmel-isi.c:(.text+0x1ec9): multiple definition of `of_graph_get_endpoint_by_regs'
     drivers/media/platform/soc_camera/soc_camera.o:soc_camera.c:(.text+0x2ce3): first defined here
     drivers/media/platform/soc_camera/rcar_vin.o: In function `of_graph_get_endpoint_by_regs':
     rcar_vin.c:(.text+0x307c): multiple definition of `of_graph_get_endpoint_by_regs'
     drivers/media/platform/soc_camera/soc_camera.o:soc_camera.c:(.text+0x2ce3): first defined here

     Caused by commit:
       a0f7001c18ca ("of: add helper for getting endpoint node of specific identifiers")

To fix the error, this patch declares of_graph_get_endpoint_by_regs function
with "static inline".

Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/of_graph.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h
index 1c1d5b901a72..f8bcd0e21a26 100644
--- a/include/linux/of_graph.h
+++ b/include/linux/of_graph.h
@@ -71,7 +71,7 @@ static inline struct device_node *of_graph_get_next_endpoint(
 	return NULL;
 }
 
-struct device_node *of_graph_get_endpoint_by_regs(
+static inline struct device_node *of_graph_get_endpoint_by_regs(
 		const struct device_node *parent, int port_reg, int reg)
 {
 	return NULL;
-- 
cgit v1.2.3


From 0eeb075fad736fb92620af995c47c204bbb5e829 Mon Sep 17 00:00:00 2001
From: Andy Gospodarek <gospo@cumulusnetworks.com>
Date: Tue, 23 Jun 2015 13:45:37 -0400
Subject: net: ipv4 sysctl option to ignore routes when nexthop link is down

This feature is only enabled with the new per-interface or ipv4 global
sysctls called 'ignore_routes_with_linkdown'.

net.ipv4.conf.all.ignore_routes_with_linkdown = 0
net.ipv4.conf.default.ignore_routes_with_linkdown = 0
net.ipv4.conf.lo.ignore_routes_with_linkdown = 0
...

When the above sysctls are set, will report to userspace that a route is
dead and will no longer resolve to this nexthop when performing a fib
lookup.  This will signal to userspace that the route will not be
selected.  The signalling of a RTNH_F_DEAD is only passed to userspace
if the sysctl is enabled and link is down.  This was done as without it
the netlink listeners would have no idea whether or not a nexthop would
be selected.   The kernel only sets RTNH_F_DEAD internally if the
interface has IFF_UP cleared.

With the new sysctl set, the following behavior can be observed
(interface p8p1 is link-down):

default via 10.0.5.2 dev p9p1
10.0.5.0/24 dev p9p1  proto kernel  scope link  src 10.0.5.15
70.0.0.0/24 dev p7p1  proto kernel  scope link  src 70.0.0.1
80.0.0.0/24 dev p8p1  proto kernel  scope link  src 80.0.0.1 dead linkdown
90.0.0.0/24 via 80.0.0.2 dev p8p1  metric 1 dead linkdown
90.0.0.0/24 via 70.0.0.2 dev p7p1  metric 2
90.0.0.1 via 70.0.0.2 dev p7p1  src 70.0.0.1
    cache
local 80.0.0.1 dev lo  src 80.0.0.1
    cache <local>
80.0.0.2 via 10.0.5.2 dev p9p1  src 10.0.5.15
    cache

While the route does remain in the table (so it can be modified if
needed rather than being wiped away as it would be if IFF_UP was
cleared), the proper next-hop is chosen automatically when the link is
down.  Now interface p8p1 is linked-up:

default via 10.0.5.2 dev p9p1
10.0.5.0/24 dev p9p1  proto kernel  scope link  src 10.0.5.15
70.0.0.0/24 dev p7p1  proto kernel  scope link  src 70.0.0.1
80.0.0.0/24 dev p8p1  proto kernel  scope link  src 80.0.0.1
90.0.0.0/24 via 80.0.0.2 dev p8p1  metric 1
90.0.0.0/24 via 70.0.0.2 dev p7p1  metric 2
192.168.56.0/24 dev p2p1  proto kernel  scope link  src 192.168.56.2
90.0.0.1 via 80.0.0.2 dev p8p1  src 80.0.0.1
    cache
local 80.0.0.1 dev lo  src 80.0.0.1
    cache <local>
80.0.0.2 dev p8p1  src 80.0.0.1
    cache

and the output changes to what one would expect.

If the sysctl is not set, the following output would be expected when
p8p1 is down:

default via 10.0.5.2 dev p9p1
10.0.5.0/24 dev p9p1  proto kernel  scope link  src 10.0.5.15
70.0.0.0/24 dev p7p1  proto kernel  scope link  src 70.0.0.1
80.0.0.0/24 dev p8p1  proto kernel  scope link  src 80.0.0.1 linkdown
90.0.0.0/24 via 80.0.0.2 dev p8p1  metric 1 linkdown
90.0.0.0/24 via 70.0.0.2 dev p7p1  metric 2

Since the dead flag does not appear, there should be no expectation that
the kernel would skip using this route due to link being down.

v2: Split kernel changes into 2 patches, this actually makes a
behavioral change if the sysctl is set.  Also took suggestion from Alex
to simplify code by only checking sysctl during fib lookup and
suggestion from Scott to add a per-interface sysctl.

v3: Code clean-ups to make it more readable and efficient as well as a
reverse path check fix.

v4: Drop binary sysctl

v5: Whitespace fixups from Dave

v6: Style changes from Dave and checkpatch suggestions

v7: One more checkpatch fixup

Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: Dinesh Dutt <ddutt@cumulusnetworks.com>
Acked-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 0a21fbefdfbe..a4328cea376a 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -120,6 +120,9 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 	 || (!IN_DEV_FORWARD(in_dev) && \
 	  IN_DEV_ORCONF((in_dev), ACCEPT_REDIRECTS)))
 
+#define IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) \
+	IN_DEV_CONF_GET((in_dev), IGNORE_ROUTES_WITH_LINKDOWN)
+
 #define IN_DEV_ARPFILTER(in_dev)	IN_DEV_ORCONF((in_dev), ARPFILTER)
 #define IN_DEV_ARP_ACCEPT(in_dev)	IN_DEV_ORCONF((in_dev), ARP_ACCEPT)
 #define IN_DEV_ARP_ANNOUNCE(in_dev)	IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE)
-- 
cgit v1.2.3


From 9200025724619d83f9fc366281f0bde36afe6e5a Mon Sep 17 00:00:00 2001
From: Xunlei Pang <pang.xunlei@linaro.org>
Date: Fri, 12 Jun 2015 10:04:10 +0800
Subject: rtc: Introduce rtc_tm_sub() helper function

There're many sites need comparing the two rtc_time variants for many
rtc drivers, especially in the instances of rtc_class_ops::set_alarm().

So add this common helper function to make things easy.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 include/linux/rtc.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 587017e7939c..b36160321458 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -24,6 +24,14 @@ extern void rtc_time64_to_tm(time64_t time, struct rtc_time *tm);
 ktime_t rtc_tm_to_ktime(struct rtc_time tm);
 struct rtc_time rtc_ktime_to_tm(ktime_t kt);
 
+/*
+ * rtc_tm_sub - Return the difference in seconds.
+ */
+static inline time64_t rtc_tm_sub(struct rtc_time *lhs, struct rtc_time *rhs)
+{
+	return rtc_tm_to_time64(lhs) - rtc_tm_to_time64(rhs);
+}
+
 /**
  * Deprecated. Use rtc_time64_to_tm().
  */
-- 
cgit v1.2.3


From c86a6c28957a9e8e9a71582a32e96971ad411ffe Mon Sep 17 00:00:00 2001
From: Xunlei Pang <pang.xunlei@linaro.org>
Date: Fri, 12 Jun 2015 11:10:18 +0800
Subject: rtc: interface: Remove rtc_set_mmss()

Now rtc_set_mmss() has no users, just remove it.

We still have rtc_set_time() doing similar things.

Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 include/linux/rtc.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index b36160321458..3359f0422c6b 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -168,7 +168,6 @@ extern void devm_rtc_device_unregister(struct device *dev,
 
 extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm);
 extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm);
-extern int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs);
 extern int rtc_set_ntp_time(struct timespec64 now);
 int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm);
 extern int rtc_read_alarm(struct rtc_device *rtc,
-- 
cgit v1.2.3


From c3cddc4c296c3a1498df7181015cbcea178a0546 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <kernel@kyup.com>
Date: Wed, 24 Jun 2015 16:54:45 -0700
Subject: fsnotify: remove obsolete documentation

should_send_event is no longer part of struct fsnotify_ops, so remove it.

Signed-off-by: Nikolay Borisov <kernel@kyup.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fsnotify_backend.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 0f313f93c586..65a517dd32f7 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -84,8 +84,6 @@ struct fsnotify_fname;
  * Each group much define these ops.  The fsnotify infrastructure will call
  * these operations for each relevant group.
  *
- * should_send_event - given a group, inode, and mask this function determines
- *		if the group is interested in this event.
  * handle_event - main call for a group to handle an fs event
  * free_group_priv - called when a group refcnt hits 0 to clean up the private union
  * freeing_mark - called when a mark is being destroyed for some reason.  The group
-- 
cgit v1.2.3


From 5286d20c4eb7b0c29217f8756652609df74f5489 Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Wed, 24 Jun 2015 16:54:51 -0700
Subject: configfs: unexport/make static config_item_init()

config_item_init() is only used in item.c

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/configfs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 34025df61829..c9e5c57e4edf 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -71,7 +71,6 @@ static inline char *config_item_name(struct config_item * item)
 	return item->ci_name;
 }
 
-extern void config_item_init(struct config_item *);
 extern void config_item_init_type_name(struct config_item *item,
 				       const char *name,
 				       struct config_item_type *type);
-- 
cgit v1.2.3


From b5242e98c1cb834feb1e84026f09a4796b49eb4d Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@ezchip.com>
Date: Wed, 24 Jun 2015 16:55:42 -0700
Subject: smpboot: allow excluding cpus from the smpboot threads

This patch series allows the watchdog to run by default only on the
housekeeping cores when nohz_full is in effect; this seems to be a good
compromise short of turning it off completely (since the nohz_full cores
can't tolerate a watchdog).

To provide customizability, we add /proc/sys/kernel/watchdog_cpumask so
that the set of cores running the watchdog can be tuned to different
values after bootup.

To implement this customizability, we add a new
smpboot_update_cpumask_percpu_thread() API to the smpboot_thread
subsystem that lets us park or unpark "unwanted" threads.

And now that threads can be parked for long periods of time, we tweak the
/proc/<pid>/stat and /proc/<pid>/status code so parked threads aren't
reported as running, which is otherwise confusing.

This patch (of 3):

This change allows some cores to be excluded from running the
smp_hotplug_thread tasks.  The following commit to update
kernel/watchdog.c to use this functionality is the motivating example, and
more information on the motivation is provided there.

A new smp_hotplug_thread field is introduced, "cpumask", which is cpumask
field managed by the smpboot subsystem that indicates whether or not the
given smp_hotplug_thread should run on that core; the cpumask is checked
when deciding whether to unpark the thread.

To limit the cpumask to less than cpu_possible, you must call
smpboot_update_cpumask_percpu_thread() after registering.

Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/smpboot.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h
index d600afb21926..da3c593f9845 100644
--- a/include/linux/smpboot.h
+++ b/include/linux/smpboot.h
@@ -27,6 +27,8 @@ struct smpboot_thread_data;
  * @pre_unpark:		Optional unpark function, called before the thread is
  *			unparked (cpu online). This is not guaranteed to be
  *			called on the target cpu of the thread. Careful!
+ * @cpumask:		Internal state.  To update which threads are unparked,
+ *			call smpboot_update_cpumask_percpu_thread().
  * @selfparking:	Thread is not parked by the park function.
  * @thread_comm:	The base name of the thread
  */
@@ -41,11 +43,14 @@ struct smp_hotplug_thread {
 	void				(*park)(unsigned int cpu);
 	void				(*unpark)(unsigned int cpu);
 	void				(*pre_unpark)(unsigned int cpu);
+	cpumask_var_t			cpumask;
 	bool				selfparking;
 	const char			*thread_comm;
 };
 
 int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread);
 void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread);
+int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
+					 const struct cpumask *);
 
 #endif
-- 
cgit v1.2.3


From fe4ba3c34352b7e8068b7f18eb233444aed17011 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@ezchip.com>
Date: Wed, 24 Jun 2015 16:55:45 -0700
Subject: watchdog: add watchdog_cpumask sysctl to assist nohz

Change the default behavior of watchdog so it only runs on the
housekeeping cores when nohz_full is enabled at build and boot time.
Allow modifying the set of cores the watchdog is currently running on
with a new kernel.watchdog_cpumask sysctl.

In the current system, the watchdog subsystem runs a periodic timer that
schedules the watchdog kthread to run.  However, nohz_full cores are
designed to allow userspace application code running on those cores to
have 100% access to the CPU.  So the watchdog system prevents the
nohz_full application code from being able to run the way it wants to,
thus the motivation to suppress the watchdog on nohz_full cores, which
this patchset provides by default.

However, if we disable the watchdog globally, then the housekeeping
cores can't benefit from the watchdog functionality.  So we allow
disabling it only on some cores.  See Documentation/lockup-watchdogs.txt
for more information.

[jhubbard@nvidia.com: fix a watchdog crash in some configurations]
Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com>
Acked-by: Don Zickus <dzickus@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nmi.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 3d46fb4708e0..f94da0e65dea 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -67,6 +67,7 @@ extern int nmi_watchdog_enabled;
 extern int soft_watchdog_enabled;
 extern int watchdog_user_enabled;
 extern int watchdog_thresh;
+extern unsigned long *watchdog_cpumask_bits;
 extern int sysctl_softlockup_all_cpu_backtrace;
 struct ctl_table;
 extern int proc_watchdog(struct ctl_table *, int ,
@@ -77,6 +78,8 @@ extern int proc_soft_watchdog(struct ctl_table *, int ,
 			      void __user *, size_t *, loff_t *);
 extern int proc_watchdog_thresh(struct ctl_table *, int ,
 				void __user *, size_t *, loff_t *);
+extern int proc_watchdog_cpumask(struct ctl_table *, int,
+				 void __user *, size_t *, loff_t *);
 #endif
 
 #ifdef CONFIG_HAVE_ACPI_APEI_NMI
-- 
cgit v1.2.3


From 4066c33d0308f87e9a3b0c7fafb9141c0bfbfa77 Mon Sep 17 00:00:00 2001
From: Gavin Guo <gavin.guo@canonical.com>
Date: Wed, 24 Jun 2015 16:55:54 -0700
Subject: mm/slab_common: support the slub_debug boot option on specific object
 size

The slub_debug=PU,kmalloc-xx cannot work because in the
create_kmalloc_caches() the s->name is created after the
create_kmalloc_cache() is called.  The name is NULL in the
create_kmalloc_cache() so the kmem_cache_flags() would not set the
slub_debug flags to the s->flags.  The fix here set up a kmalloc_names
string array for the initialization purpose and delete the dynamic name
creation of kmalloc_caches.

[akpm@linux-foundation.org: s/kmalloc_names/kmalloc_info/, tweak comment text]
Signed-off-by: Gavin Guo <gavin.guo@canonical.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index ffd24c830151..96f0ea506b5c 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -153,8 +153,30 @@ size_t ksize(const void *);
 #define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
 #define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN
 #define KMALLOC_SHIFT_LOW ilog2(ARCH_DMA_MINALIGN)
+/*
+ * The KMALLOC_LOOP_LOW is the definition for the for loop index start number
+ * to create the kmalloc_caches object in create_kmalloc_caches(). The first
+ * and the second are 96 and 192. You can see that in the kmalloc_index(), if
+ * the KMALLOC_MIN_SIZE <= 32, then return 1 (96). If KMALLOC_MIN_SIZE <= 64,
+ * then return 2 (192). If the KMALLOC_MIN_SIZE is bigger than 64, we don't
+ * need to initialize 96 and 192. Go directly to start the KMALLOC_SHIFT_LOW.
+ */
+#if KMALLOC_MIN_SIZE <= 32
+#define KMALLOC_LOOP_LOW 1
+#elif KMALLOC_MIN_SIZE <= 64
+#define KMALLOC_LOOP_LOW 2
+#else
+#define KMALLOC_LOOP_LOW KMALLOC_SHIFT_LOW
+#endif
+
 #else
 #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
+/*
+ * The KMALLOC_MIN_SIZE of slub/slab/slob is 2^3/2^5/2^3. So, even slab is used.
+ * The KMALLOC_MIN_SIZE <= 32. The kmalloc-96 and kmalloc-192 should also be
+ * initialized.
+ */
+#define KMALLOC_LOOP_LOW 1
 #endif
 
 /*
-- 
cgit v1.2.3


From 1ed58b6051b67e5cfe9e465fb60bf7d5f55e0a64 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 24 Jun 2015 16:55:59 -0700
Subject: linux/slab.h: fix three off-by-one typos in comment

The first is a keyboard-off-by-one, the other two the ordinary mathy kind.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 96f0ea506b5c..9de2fdc8b5e4 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -262,8 +262,8 @@ extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
  * belongs to.
  * 0 = zero alloc
  * 1 =  65 .. 96 bytes
- * 2 = 120 .. 192 bytes
- * n = 2^(n-1) .. 2^n -1
+ * 2 = 129 .. 192 bytes
+ * n = 2^(n-1)+1 .. 2^n
  */
 static __always_inline int kmalloc_index(size_t size)
 {
-- 
cgit v1.2.3


From 2ae416b142b625c58c9ccb039aa3ef48ad0e9bae Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Date: Wed, 24 Jun 2015 16:56:16 -0700
Subject: mm: new mm hook framework

CRIU is recreating the process memory layout by remapping the checkpointee
memory area on top of the current process (criu).  This includes remapping
the vDSO to the place it has at checkpoint time.

However some architectures like powerpc are keeping a reference to the
vDSO base address to build the signal return stack frame by calling the
vDSO sigreturn service.  So once the vDSO has been moved, this reference
is no more valid and the signal frame built later are not usable.

This patch serie is introducing a new mm hook framework, and a new
arch_remap hook which is called when mremap is done and the mm lock still
hold.  The next patch is adding the vDSO remap and unmap tracking to the
powerpc architecture.

This patch (of 3):

This patch introduces a new set of header file to manage mm hooks:
- per architecture empty header file (arch/x/include/asm/mm-arch-hooks.h)
- a generic header (include/linux/mm-arch-hooks.h)

The architecture which need to overwrite a hook as to redefine it in its
header file, while architecture which doesn't need have nothing to do.

The default hooks are defined in the generic header and are used in the
case the architecture is not defining it.

In a next step, mm hooks defined in include/asm-generic/mm_hooks.h should
be moved here.

Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm-arch-hooks.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 include/linux/mm-arch-hooks.h

(limited to 'include/linux')

diff --git a/include/linux/mm-arch-hooks.h b/include/linux/mm-arch-hooks.h
new file mode 100644
index 000000000000..63005e367abd
--- /dev/null
+++ b/include/linux/mm-arch-hooks.h
@@ -0,0 +1,16 @@
+/*
+ * Generic mm no-op hooks.
+ *
+ * Copyright (C) 2015, IBM Corporation
+ * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _LINUX_MM_ARCH_HOOKS_H
+#define _LINUX_MM_ARCH_HOOKS_H
+
+#include <asm/mm-arch-hooks.h>
+
+#endif /* _LINUX_MM_ARCH_HOOKS_H */
-- 
cgit v1.2.3


From 4abad2ca4a4dbdd4a218c12451231ab628f2e60c Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Date: Wed, 24 Jun 2015 16:56:19 -0700
Subject: mm: new arch_remap() hook

Some architectures would like to be triggered when a memory area is moved
through the mremap system call.

This patch introduces a new arch_remap() mm hook which is placed in the
path of mremap, and is called before the old area is unmapped (and the
arch_unmap() hook is called).

Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm-arch-hooks.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm-arch-hooks.h b/include/linux/mm-arch-hooks.h
index 63005e367abd..4efc3f56e6df 100644
--- a/include/linux/mm-arch-hooks.h
+++ b/include/linux/mm-arch-hooks.h
@@ -13,4 +13,13 @@
 
 #include <asm/mm-arch-hooks.h>
 
+#ifndef arch_remap
+static inline void arch_remap(struct mm_struct *mm,
+			      unsigned long old_start, unsigned long old_end,
+			      unsigned long new_start, unsigned long new_end)
+{
+}
+#define arch_remap arch_remap
+#endif
+
 #endif /* _LINUX_MM_ARCH_HOOKS_H */
-- 
cgit v1.2.3


From a9919c79359df9a706ff9e14fc0a93cd15791c9b Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 24 Jun 2015 16:56:28 -0700
Subject: mm: only define hashdist variable when needed

For !CONFIG_NUMA, hashdist will always be 0, since it's setter is
otherwise compiled out.  So we can save 4 bytes of data and some .text
(although mostly in __init functions) by only defining it for
CONFIG_NUMA.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: David Rientjes <rientjes@google.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 0995c2de8162..f589222bfa87 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -357,12 +357,12 @@ extern void *alloc_large_system_hash(const char *tablename,
 /* Only NUMA needs hash distribution. 64bit NUMA architectures have
  * sufficient vmalloc space.
  */
-#if defined(CONFIG_NUMA) && defined(CONFIG_64BIT)
-#define HASHDIST_DEFAULT 1
+#ifdef CONFIG_NUMA
+#define HASHDIST_DEFAULT IS_ENABLED(CONFIG_64BIT)
+extern int hashdist;		/* Distribute hashes across NUMA nodes? */
 #else
-#define HASHDIST_DEFAULT 0
+#define hashdist (0)
 #endif
-extern int hashdist;		/* Distribute hashes across NUMA nodes? */
 
 
 #endif /* _LINUX_BOOTMEM_H */
-- 
cgit v1.2.3


From c761471b58e6138938ebc6eafec20b2f60cb3397 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Wed, 24 Jun 2015 16:56:33 -0700
Subject: mm: avoid tail page refcounting on non-THP compound pages

Reintroduce 8d63d99a5dfb ("mm: avoid tail page refcounting on non-THP
compound pages") after removing bogus VM_BUG_ON_PAGE() in
put_unrefcounted_compound_page().

THP uses tail page refcounting to be able to split huge pages at any time.
 Tail page refcounting is not needed for other users of compound pages and
it's harmful because of overhead.

We try to exclude non-THP pages from tail page refcounting using
__compound_tail_refcounted() check.  It excludes most common non-THP
compound pages: SL*B and hugetlb, but it doesn't catch rest of __GFP_COMP
users -- drivers.

And it's not only about overhead.

Drivers might want to use compound pages to get refcounting semantics
suitable for mapping high-order pages to userspace.  But tail page
refcounting breaks it.

Tail page refcounting uses ->_mapcount in tail pages to store GUP pins on
them.  It means GUP pins would affect page_mapcount() for tail pages.
It's not a problem for THP, because it never maps tail pages.  But unlike
THP, drivers map parts of compound pages with PTEs and it makes
page_mapcount() be called for tail pages.

In particular, GUP pins would shift PSS up and affect /proc/kpagecount for
such pages.  But, I'm not aware about anything which can lead to crash or
other serious misbehaviour.

Since currently all THP pages are anonymous and all drivers pages are not,
we can fix the __compound_tail_refcounted() check by requiring PageAnon()
to enable tail page refcounting.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Reported-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0755b9fd03a7..8b086070c3a5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -499,7 +499,7 @@ static inline int page_count(struct page *page)
 
 static inline bool __compound_tail_refcounted(struct page *page)
 {
-	return !PageSlab(page) && !PageHeadHuge(page);
+	return PageAnon(page) && !PageSlab(page) && !PageHeadHuge(page);
 }
 
 /*
-- 
cgit v1.2.3


From ead07f6a867b5b1b41cf703735e8b39094987a7d Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Wed, 24 Jun 2015 16:56:48 -0700
Subject: mm/memory-failure: introduce get_hwpoison_page() for consistent
 refcount handling

memory_failure() can run in 2 different mode (specified by
MF_COUNT_INCREASED) in page refcount perspective.  When
MF_COUNT_INCREASED is set, memory_failure() assumes that the caller
takes a refcount of the target page.  And if cleared, memory_failure()
takes it in it's own.

In current code, however, refcounting is done differently in each caller.
For example, madvise_hwpoison() uses get_user_pages_fast() and
hwpoison_inject() uses get_page_unless_zero().  So this inconsistent
refcounting causes refcount failure especially for thp tail pages.
Typical user visible effects are like memory leak or
VM_BUG_ON_PAGE(!page_count(page)) in isolate_lru_page().

To fix this refcounting issue, this patch introduces get_hwpoison_page()
to handle thp tail pages in the same manner for each caller of hwpoison
code.

memory_failure() might fail to split thp and in such case it returns
without completing page isolation.  This is not good because PageHWPoison
on the thp is still set and there's no easy way to unpoison such thps.  So
this patch try to roll back any action to the thp in "non anonymous thp"
case and "thp split failed" case, expecting an MCE(SRAR) generated by
later access afterward will properly free such thps.

[akpm@linux-foundation.org: fix CONFIG_HWPOISON_INJECT=m]
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8b086070c3a5..cd9df66138a1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2146,6 +2146,7 @@ enum mf_flags {
 extern int memory_failure(unsigned long pfn, int trapno, int flags);
 extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);
 extern int unpoison_memory(unsigned long pfn);
+extern int get_hwpoison_page(struct page *page);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
 extern void shake_page(struct page *p, int access);
-- 
cgit v1.2.3


From 16e951966f05da5ccd650104176f6ba289f7fa20 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 24 Jun 2015 16:57:07 -0700
Subject: mm: oom_kill: clean up victim marking and exiting interfaces

Rename unmark_oom_victim() to exit_oom_victim().  Marking and unmarking
are related in functionality, but the interface is not symmetrical at
all: one is an internal OOM killer function used during the killing, the
other is for an OOM victim to signal its own death on exit later on.
This has locking implications, see follow-up changes.

While at it, rename mark_tsk_oom_victim() to mark_oom_victim(), which
is easier on the eye.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/oom.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/oom.h b/include/linux/oom.h
index 44b2f6f7bbd8..a8e6a498cbcb 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -47,9 +47,7 @@ static inline bool oom_task_origin(const struct task_struct *p)
 	return !!(p->signal->oom_flags & OOM_FLAG_ORIGIN);
 }
 
-extern void mark_tsk_oom_victim(struct task_struct *tsk);
-
-extern void unmark_oom_victim(void);
+extern void mark_oom_victim(struct task_struct *tsk);
 
 extern unsigned long oom_badness(struct task_struct *p,
 		struct mem_cgroup *memcg, const nodemask_t *nodemask,
@@ -75,6 +73,9 @@ extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
 
 extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
 		int order, nodemask_t *mask, bool force_kill);
+
+extern void exit_oom_victim(void);
+
 extern int register_oom_notifier(struct notifier_block *nb);
 extern int unregister_oom_notifier(struct notifier_block *nb);
 
-- 
cgit v1.2.3


From dc56401fc9f25e8f93899991ec858c98a331d88c Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 24 Jun 2015 16:57:19 -0700
Subject: mm: oom_kill: simplify OOM killer locking

The zonelist locking and the oom_sem are two overlapping locks that are
used to serialize global OOM killing against different things.

The historical zonelist locking serializes OOM kills from allocations with
overlapping zonelists against each other to prevent killing more tasks
than necessary in the same memory domain.  Only when neither tasklists nor
zonelists from two concurrent OOM kills overlap (tasks in separate memcgs
bound to separate nodes) are OOM kills allowed to execute in parallel.

The younger oom_sem is a read-write lock to serialize OOM killing against
the PM code trying to disable the OOM killer altogether.

However, the OOM killer is a fairly cold error path, there is really no
reason to optimize for highly performant and concurrent OOM kills.  And
the oom_sem is just flat-out redundant.

Replace both locking schemes with a single global mutex serializing OOM
kills regardless of context.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/oom.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/oom.h b/include/linux/oom.h
index a8e6a498cbcb..7deecb7bca5e 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -32,6 +32,8 @@ enum oom_scan_t {
 /* Thread is the potential origin of an oom condition; kill first on oom */
 #define OOM_FLAG_ORIGIN		((__force oom_flags_t)0x1)
 
+extern struct mutex oom_lock;
+
 static inline void set_current_oom_origin(void)
 {
 	current->signal->oom_flags |= OOM_FLAG_ORIGIN;
@@ -60,9 +62,6 @@ extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 			     struct mem_cgroup *memcg, nodemask_t *nodemask,
 			     const char *message);
 
-extern bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_flags);
-extern void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_flags);
-
 extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
 			       int order, const nodemask_t *nodemask,
 			       struct mem_cgroup *memcg);
-- 
cgit v1.2.3


From cc637b1704d78b068c2eb700eec384c69ea56cdf Mon Sep 17 00:00:00 2001
From: Xie XiuQi <xiexiuqi@huawei.com>
Date: Wed, 24 Jun 2015 16:57:30 -0700
Subject: memory-failure: export page_type and action result

Export 'outcome' and 'action_page_type' to mm.h, so we could use
this emnus outside.

This patch is preparation for adding trace events for memory-failure
recovery action.

Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com>
Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Chen Gong <gong.chen@linux.intel.com>
Cc: Jim Davis <jim.epost@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index cd9df66138a1..986a9a221ee0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2153,6 +2153,40 @@ extern void shake_page(struct page *p, int access);
 extern atomic_long_t num_poisoned_pages;
 extern int soft_offline_page(struct page *page, int flags);
 
+
+/*
+ * Error handlers for various types of pages.
+ */
+enum mf_outcome {
+	MF_IGNORED,	/* Error: cannot be handled */
+	MF_FAILED,	/* Error: handling failed */
+	MF_DELAYED,	/* Will be handled later */
+	MF_RECOVERED,	/* Successfully recovered */
+};
+
+enum mf_action_page_type {
+	MF_MSG_KERNEL,
+	MF_MSG_KERNEL_HIGH_ORDER,
+	MF_MSG_SLAB,
+	MF_MSG_DIFFERENT_COMPOUND,
+	MF_MSG_POISONED_HUGE,
+	MF_MSG_HUGE,
+	MF_MSG_FREE_HUGE,
+	MF_MSG_UNMAP_FAILED,
+	MF_MSG_DIRTY_SWAPCACHE,
+	MF_MSG_CLEAN_SWAPCACHE,
+	MF_MSG_DIRTY_MLOCKED_LRU,
+	MF_MSG_CLEAN_MLOCKED_LRU,
+	MF_MSG_DIRTY_UNEVICTABLE_LRU,
+	MF_MSG_CLEAN_UNEVICTABLE_LRU,
+	MF_MSG_DIRTY_LRU,
+	MF_MSG_CLEAN_LRU,
+	MF_MSG_TRUNCATED_LRU,
+	MF_MSG_BUDDY,
+	MF_MSG_BUDDY_2ND,
+	MF_MSG_UNKNOWN,
+};
+
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
 extern void clear_huge_page(struct page *page,
 			    unsigned long addr,
-- 
cgit v1.2.3


From cc3e2af42e7b7e0457b93bf17c19b44c635cd40c Mon Sep 17 00:00:00 2001
From: Xie XiuQi <xiexiuqi@huawei.com>
Date: Wed, 24 Jun 2015 16:57:33 -0700
Subject: memory-failure: change type of action_result's param 3 to enum

Change type of action_result's param 3 to enum for type consistency,
and rename mf_outcome to mf_result for clearly.

Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com>
Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Chen Gong <gong.chen@linux.intel.com>
Cc: Jim Davis <jim.epost@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 986a9a221ee0..24ad583596d1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2157,7 +2157,7 @@ extern int soft_offline_page(struct page *page, int flags);
 /*
  * Error handlers for various types of pages.
  */
-enum mf_outcome {
+enum mf_result {
 	MF_IGNORED,	/* Error: cannot be handled */
 	MF_FAILED,	/* Error: handling failed */
 	MF_DELAYED,	/* Will be handled later */
-- 
cgit v1.2.3


From 8809aa2d28d74111ff2f1928edaa4e9845c97a7d Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 24 Jun 2015 16:57:44 -0700
Subject: mm: clarify that the function operates on hugepage pte

We have confusing functions to clear pmd, pmd_clear_* and pmd_clear.  Add
_huge_ to pmdp_clear functions so that we are clear that they operate on
hugepage pte.

We don't bother about other functions like pmdp_set_wrprotect,
pmdp_clear_flush_young, because they operate on PTE bits and hence
indicate they are operating on hugepage ptes

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmu_notifier.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 95243d28a0ee..61cd67f4d788 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -324,25 +324,25 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 	___pte;								\
 })
 
-#define pmdp_clear_flush_notify(__vma, __haddr, __pmd)			\
+#define pmdp_huge_clear_flush_notify(__vma, __haddr, __pmd)		\
 ({									\
 	unsigned long ___haddr = __haddr & HPAGE_PMD_MASK;		\
 	struct mm_struct *___mm = (__vma)->vm_mm;			\
 	pmd_t ___pmd;							\
 									\
-	___pmd = pmdp_clear_flush(__vma, __haddr, __pmd);		\
+	___pmd = pmdp_huge_clear_flush(__vma, __haddr, __pmd);		\
 	mmu_notifier_invalidate_range(___mm, ___haddr,			\
 				      ___haddr + HPAGE_PMD_SIZE);	\
 									\
 	___pmd;								\
 })
 
-#define pmdp_get_and_clear_notify(__mm, __haddr, __pmd)			\
+#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd)		\
 ({									\
 	unsigned long ___haddr = __haddr & HPAGE_PMD_MASK;		\
 	pmd_t ___pmd;							\
 									\
-	___pmd = pmdp_get_and_clear(__mm, __haddr, __pmd);		\
+	___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd);		\
 	mmu_notifier_invalidate_range(__mm, ___haddr,			\
 				      ___haddr + HPAGE_PMD_SIZE);	\
 									\
@@ -428,8 +428,8 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 #define ptep_clear_flush_young_notify ptep_clear_flush_young
 #define pmdp_clear_flush_young_notify pmdp_clear_flush_young
 #define	ptep_clear_flush_notify ptep_clear_flush
-#define pmdp_clear_flush_notify pmdp_clear_flush
-#define pmdp_get_and_clear_notify pmdp_get_and_clear
+#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
+#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear
 #define set_pte_at_notify set_pte_at
 
 #endif /* CONFIG_MMU_NOTIFIER */
-- 
cgit v1.2.3


From fc6daaf93151877748f8096af6b3fddb147f22d6 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Wed, 24 Jun 2015 16:58:09 -0700
Subject: mm/memblock: add extra "flags" to memblock to allow selection of
 memory based on attribute

Some high end Intel Xeon systems report uncorrectable memory errors as a
recoverable machine check.  Linux has included code for some time to
process these and just signal the affected processes (or even recover
completely if the error was in a read only page that can be replaced by
reading from disk).

But we have no recovery path for errors encountered during kernel code
execution.  Except for some very specific cases were are unlikely to ever
be able to recover.

Enter memory mirroring. Actually 3rd generation of memory mirroing.

Gen1: All memory is mirrored
	Pro: No s/w enabling - h/w just gets good data from other side of the
	     mirror
	Con: Halves effective memory capacity available to OS/applications

Gen2: Partial memory mirror - just mirror memory begind some memory controllers
	Pro: Keep more of the capacity
	Con: Nightmare to enable. Have to choose between allocating from
	     mirrored memory for safety vs. NUMA local memory for performance

Gen3: Address range partial memory mirror - some mirror on each memory
      controller
	Pro: Can tune the amount of mirror and keep NUMA performance
	Con: I have to write memory management code to implement

The current plan is just to use mirrored memory for kernel allocations.
This has been broken into two phases:

1) This patch series - find the mirrored memory, use it for boot time
   allocations

2) Wade into mm/page_alloc.c and define a ZONE_MIRROR to pick up the
   unused mirrored memory from mm/memblock.c and only give it out to
   select kernel allocations (this is still being scoped because
   page_alloc.c is scary).

This patch (of 3):

Add extra "flags" to memblock to allow selection of memory based on
attribute.  No functional changes

Signed-off-by: Tony Luck <tony.luck@intel.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Cc: Xiexiuqi <xiexiuqi@huawei.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h | 41 ++++++++++++++++++++++++++---------------
 1 file changed, 26 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 9497ec7c77ea..7aeec0cb4c27 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -21,7 +21,10 @@
 #define INIT_PHYSMEM_REGIONS	4
 
 /* Definition of memblock flags. */
-#define MEMBLOCK_HOTPLUG	0x1	/* hotpluggable region */
+enum {
+	MEMBLOCK_NONE		= 0x0,	/* No special request */
+	MEMBLOCK_HOTPLUG	= 0x1,	/* hotpluggable region */
+};
 
 struct memblock_region {
 	phys_addr_t base;
@@ -61,7 +64,7 @@ extern bool movable_node_enabled;
 
 phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
 					    phys_addr_t start, phys_addr_t end,
-					    int nid);
+					    int nid, ulong flags);
 phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
 				   phys_addr_t size, phys_addr_t align);
 phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr);
@@ -85,11 +88,13 @@ int memblock_remove_range(struct memblock_type *type,
 			  phys_addr_t base,
 			  phys_addr_t size);
 
-void __next_mem_range(u64 *idx, int nid, struct memblock_type *type_a,
+void __next_mem_range(u64 *idx, int nid, ulong flags,
+		      struct memblock_type *type_a,
 		      struct memblock_type *type_b, phys_addr_t *out_start,
 		      phys_addr_t *out_end, int *out_nid);
 
-void __next_mem_range_rev(u64 *idx, int nid, struct memblock_type *type_a,
+void __next_mem_range_rev(u64 *idx, int nid, ulong flags,
+			  struct memblock_type *type_a,
 			  struct memblock_type *type_b, phys_addr_t *out_start,
 			  phys_addr_t *out_end, int *out_nid);
 
@@ -100,16 +105,17 @@ void __next_mem_range_rev(u64 *idx, int nid, struct memblock_type *type_a,
  * @type_a: ptr to memblock_type to iterate
  * @type_b: ptr to memblock_type which excludes from the iteration
  * @nid: node selector, %NUMA_NO_NODE for all nodes
+ * @flags: pick from blocks based on memory attributes
  * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
  * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @p_nid: ptr to int for nid of the range, can be %NULL
  */
-#define for_each_mem_range(i, type_a, type_b, nid,			\
+#define for_each_mem_range(i, type_a, type_b, nid, flags,		\
 			   p_start, p_end, p_nid)			\
-	for (i = 0, __next_mem_range(&i, nid, type_a, type_b,		\
+	for (i = 0, __next_mem_range(&i, nid, flags, type_a, type_b,	\
 				     p_start, p_end, p_nid);		\
 	     i != (u64)ULLONG_MAX;					\
-	     __next_mem_range(&i, nid, type_a, type_b,			\
+	     __next_mem_range(&i, nid, flags, type_a, type_b,		\
 			      p_start, p_end, p_nid))
 
 /**
@@ -119,17 +125,18 @@ void __next_mem_range_rev(u64 *idx, int nid, struct memblock_type *type_a,
  * @type_a: ptr to memblock_type to iterate
  * @type_b: ptr to memblock_type which excludes from the iteration
  * @nid: node selector, %NUMA_NO_NODE for all nodes
+ * @flags: pick from blocks based on memory attributes
  * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
  * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @p_nid: ptr to int for nid of the range, can be %NULL
  */
-#define for_each_mem_range_rev(i, type_a, type_b, nid,			\
+#define for_each_mem_range_rev(i, type_a, type_b, nid, flags,		\
 			       p_start, p_end, p_nid)			\
 	for (i = (u64)ULLONG_MAX,					\
-		     __next_mem_range_rev(&i, nid, type_a, type_b,	\
+		     __next_mem_range_rev(&i, nid, flags, type_a, type_b,\
 					 p_start, p_end, p_nid);	\
 	     i != (u64)ULLONG_MAX;					\
-	     __next_mem_range_rev(&i, nid, type_a, type_b,		\
+	     __next_mem_range_rev(&i, nid, flags, type_a, type_b,	\
 				  p_start, p_end, p_nid))
 
 #ifdef CONFIG_MOVABLE_NODE
@@ -181,13 +188,14 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
  * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
  * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @p_nid: ptr to int for nid of the range, can be %NULL
+ * @flags: pick from blocks based on memory attributes
  *
  * Walks over free (memory && !reserved) areas of memblock.  Available as
  * soon as memblock is initialized.
  */
-#define for_each_free_mem_range(i, nid, p_start, p_end, p_nid)		\
+#define for_each_free_mem_range(i, nid, flags, p_start, p_end, p_nid)	\
 	for_each_mem_range(i, &memblock.memory, &memblock.reserved,	\
-			   nid, p_start, p_end, p_nid)
+			   nid, flags, p_start, p_end, p_nid)
 
 /**
  * for_each_free_mem_range_reverse - rev-iterate through free memblock areas
@@ -196,13 +204,15 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
  * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
  * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @p_nid: ptr to int for nid of the range, can be %NULL
+ * @flags: pick from blocks based on memory attributes
  *
  * Walks over free (memory && !reserved) areas of memblock in reverse
  * order.  Available as soon as memblock is initialized.
  */
-#define for_each_free_mem_range_reverse(i, nid, p_start, p_end, p_nid)	\
+#define for_each_free_mem_range_reverse(i, nid, flags, p_start, p_end,	\
+					p_nid)				\
 	for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved,	\
-			       nid, p_start, p_end, p_nid)
+			       nid, flags, p_start, p_end, p_nid)
 
 static inline void memblock_set_region_flags(struct memblock_region *r,
 					     unsigned long flags)
@@ -273,7 +283,8 @@ static inline bool memblock_bottom_up(void) { return false; }
 #define MEMBLOCK_ALLOC_ACCESSIBLE	0
 
 phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
-					phys_addr_t start, phys_addr_t end);
+					phys_addr_t start, phys_addr_t end,
+					ulong flags);
 phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
 				phys_addr_t max_addr);
 phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
-- 
cgit v1.2.3


From a3f5bafcc04aaf62990e0cf3ced1cc6d8dc6fe95 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Wed, 24 Jun 2015 16:58:12 -0700
Subject: mm/memblock: allocate boot time data structures from mirrored memory

Try to allocate all boot time kernel data structures from mirrored
memory.

If we run out of mirrored memory print warnings, but fall back to using
non-mirrored memory to make sure that we still boot.

By number of bytes, most of what we allocate at boot time is the page
structures.  64 bytes per 4K page on x86_64 ...  or about 1.5% of total
system memory.  For workloads where the bulk of memory is allocated to
applications this may represent a useful improvement to system
availability since 1.5% of total memory might be a third of the memory
allocated to the kernel.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Cc: Xiexiuqi <xiexiuqi@huawei.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 7aeec0cb4c27..0215ffd63069 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -24,6 +24,7 @@
 enum {
 	MEMBLOCK_NONE		= 0x0,	/* No special request */
 	MEMBLOCK_HOTPLUG	= 0x1,	/* hotpluggable region */
+	MEMBLOCK_MIRROR		= 0x2,	/* mirrored region */
 };
 
 struct memblock_region {
@@ -78,6 +79,8 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size);
 void memblock_trim_memory(phys_addr_t align);
 int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size);
 int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
+int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
+ulong choose_memblock_flags(void);
 
 /* Low level functions */
 int memblock_add_range(struct memblock_type *type,
@@ -160,6 +163,11 @@ static inline bool movable_node_is_enabled(void)
 }
 #endif
 
+static inline bool memblock_is_mirror(struct memblock_region *m)
+{
+	return m->flags & MEMBLOCK_MIRROR;
+}
+
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
 			    unsigned long  *end_pfn);
-- 
cgit v1.2.3


From b05b9f5f9dcf593a0e9327676b78e6c17b4218e8 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Wed, 24 Jun 2015 16:58:15 -0700
Subject: x86, mirror: x86 enabling - find mirrored memory ranges

UEFI GetMemoryMap() uses a new attribute bit to mark mirrored memory
address ranges.  See UEFI 2.5 spec pages 157-158:

  http://www.uefi.org/sites/default/files/resources/UEFI%202_5.pdf

On EFI enabled systems scan the memory map and tell memblock about any
mirrored ranges.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Cc: Xiexiuqi <xiexiuqi@huawei.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/efi.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 2092965afca3..5f19efe4eb3f 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -96,6 +96,8 @@ typedef	struct {
 #define EFI_MEMORY_WP		((u64)0x0000000000001000ULL)	/* write-protect */
 #define EFI_MEMORY_RP		((u64)0x0000000000002000ULL)	/* read-protect */
 #define EFI_MEMORY_XP		((u64)0x0000000000004000ULL)	/* execute-protect */
+#define EFI_MEMORY_MORE_RELIABLE \
+				((u64)0x0000000000010000ULL)	/* higher reliability */
 #define EFI_MEMORY_RUNTIME	((u64)0x8000000000000000ULL)	/* range requires runtime mapping */
 #define EFI_MEMORY_DESCRIPTOR_VERSION	1
 
@@ -868,6 +870,7 @@ extern void efi_enter_virtual_mode (void);	/* switch EFI to virtual mode, if pos
 extern void efi_late_init(void);
 extern void efi_free_boot_services(void);
 extern efi_status_t efi_query_variable_store(u32 attributes, unsigned long size);
+extern void efi_find_mirror(void);
 #else
 static inline void efi_late_init(void) {}
 static inline void efi_free_boot_services(void) {}
-- 
cgit v1.2.3


From d1dc6f1bcf1e998e7ce65fc120da371ab047a999 Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Wed, 24 Jun 2015 16:58:18 -0700
Subject: frontswap: allow multiple backends

Change frontswap single pointer to a singly linked list of frontswap
implementations.  Update Xen tmem implementation as register no longer
returns anything.

Frontswap only keeps track of a single implementation; any
implementation that registers second (or later) will replace the
previously registered implementation, and gets a pointer to the previous
implementation that the new implementation is expected to pass all
frontswap functions to if it can't handle the function itself.  However
that method doesn't really make much sense, as passing that work on to
every implementation adds unnecessary work to implementations; instead,
frontswap should simply keep a list of all registered implementations
and try each implementation for any function.  Most importantly, neither
of the two currently existing frontswap implementations in the kernel
actually do anything with any previous frontswap implementation that
they replace when registering.

This allows frontswap to successfully manage multiple implementations by
keeping a list of them all.

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/frontswap.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
index 8293262401de..e65ef959546c 100644
--- a/include/linux/frontswap.h
+++ b/include/linux/frontswap.h
@@ -6,16 +6,16 @@
 #include <linux/bitops.h>
 
 struct frontswap_ops {
-	void (*init)(unsigned);
-	int (*store)(unsigned, pgoff_t, struct page *);
-	int (*load)(unsigned, pgoff_t, struct page *);
-	void (*invalidate_page)(unsigned, pgoff_t);
-	void (*invalidate_area)(unsigned);
+	void (*init)(unsigned); /* this swap type was just swapon'ed */
+	int (*store)(unsigned, pgoff_t, struct page *); /* store a page */
+	int (*load)(unsigned, pgoff_t, struct page *); /* load a page */
+	void (*invalidate_page)(unsigned, pgoff_t); /* page no longer needed */
+	void (*invalidate_area)(unsigned); /* swap type just swapoff'ed */
+	struct frontswap_ops *next; /* private pointer to next ops */
 };
 
 extern bool frontswap_enabled;
-extern struct frontswap_ops *
-	frontswap_register_ops(struct frontswap_ops *ops);
+extern void frontswap_register_ops(struct frontswap_ops *ops);
 extern void frontswap_shrink(unsigned long);
 extern unsigned long frontswap_curr_pages(void);
 extern void frontswap_writethrough(bool);
-- 
cgit v1.2.3


From 8a8c35fadfaf55629a37ef1a8ead1b8fb32581d2 Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Wed, 24 Jun 2015 16:58:51 -0700
Subject: mm: kmemleak_alloc_percpu() should follow the gfp from per_alloc()

Beginning at commit d52d3997f843 ("ipv6: Create percpu rt6_info"), the
following INFO splat is logged:

  ===============================
  [ INFO: suspicious RCU usage. ]
  4.1.0-rc7-next-20150612 #1 Not tainted
  -------------------------------
  kernel/sched/core.c:7318 Illegal context switch in RCU-bh read-side critical section!
  other info that might help us debug this:
  rcu_scheduler_active = 1, debug_locks = 0
   3 locks held by systemd/1:
   #0:  (rtnl_mutex){+.+.+.}, at: [<ffffffff815f0c8f>] rtnetlink_rcv+0x1f/0x40
   #1:  (rcu_read_lock_bh){......}, at: [<ffffffff816a34e2>] ipv6_add_addr+0x62/0x540
   #2:  (addrconf_hash_lock){+...+.}, at: [<ffffffff816a3604>] ipv6_add_addr+0x184/0x540
  stack backtrace:
  CPU: 0 PID: 1 Comm: systemd Not tainted 4.1.0-rc7-next-20150612 #1
  Hardware name: TOSHIBA TECRA A50-A/TECRA A50-A, BIOS Version 4.20   04/17/2014
  Call Trace:
    dump_stack+0x4c/0x6e
    lockdep_rcu_suspicious+0xe7/0x120
    ___might_sleep+0x1d5/0x1f0
    __might_sleep+0x4d/0x90
    kmem_cache_alloc+0x47/0x250
    create_object+0x39/0x2e0
    kmemleak_alloc_percpu+0x61/0xe0
    pcpu_alloc+0x370/0x630

Additional backtrace lines are truncated.  In addition, the above splat
is followed by several "BUG: sleeping function called from invalid
context at mm/slub.c:1268" outputs.  As suggested by Martin KaFai Lau,
these are the clue to the fix.  Routine kmemleak_alloc_percpu() always
uses GFP_KERNEL for its allocations, whereas it should follow the gfp
from its callers.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: <stable@vger.kernel.org>	[3.18+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmemleak.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index e705467ddb47..d0a1f99e24e3 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -28,7 +28,8 @@
 extern void kmemleak_init(void) __ref;
 extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
 			   gfp_t gfp) __ref;
-extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) __ref;
+extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
+				  gfp_t gfp) __ref;
 extern void kmemleak_free(const void *ptr) __ref;
 extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
 extern void kmemleak_free_percpu(const void __percpu *ptr) __ref;
@@ -71,7 +72,8 @@ static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
 					    gfp_t gfp)
 {
 }
-static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size)
+static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
+					 gfp_t gfp)
 {
 }
 static inline void kmemleak_free(const void *ptr)
-- 
cgit v1.2.3


From b94d5230d06eb930be82e67fb1a9a58271e78297 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 19 May 2015 22:54:31 -0400
Subject: libnvdimm, nfit: initial libnvdimm infrastructure and NFIT support

A struct nvdimm_bus is the anchor device for registering nvdimm
resources and interfaces, for example, a character control device,
nvdimm devices, and I/O region devices.  The ACPI NFIT (NVDIMM Firmware
Interface Table) is one possible platform description for such
non-volatile memory resources in a system.  The nfit.ko driver attaches
to the "ACPI0012" device that indicates the presence of the NFIT and
parses the table to register a struct nvdimm_bus instance.

Cc: <linux-acpi@vger.kernel.org>
Cc: Lv Zheng <lv.zheng@intel.com>
Cc: Robert Moore <robert.moore@intel.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/libnvdimm.h | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 include/linux/libnvdimm.h

(limited to 'include/linux')

diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
new file mode 100644
index 000000000000..2b3c63950c91
--- /dev/null
+++ b/include/linux/libnvdimm.h
@@ -0,0 +1,34 @@
+/*
+ * libnvdimm - Non-volatile-memory Devices Subsystem
+ *
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __LIBNVDIMM_H__
+#define __LIBNVDIMM_H__
+struct nvdimm;
+struct nvdimm_bus_descriptor;
+typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
+		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+		unsigned int buf_len);
+
+struct nvdimm_bus_descriptor {
+	unsigned long dsm_mask;
+	char *provider_name;
+	ndctl_fn ndctl;
+};
+
+struct device;
+struct nvdimm_bus;
+struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
+		struct nvdimm_bus_descriptor *nfit_desc);
+void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
+#endif /* __LIBNVDIMM_H__ */
-- 
cgit v1.2.3


From 45def22c1fab85764646746ce38d45b2f3281fa5 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 26 Apr 2015 19:26:48 -0400
Subject: libnvdimm: control character device and nvdimm_bus sysfs attributes

The control device for a nvdimm_bus is registered as an "nd" class
device.  The expectation is that there will usually only be one "nd" bus
registered under /sys/class/nd.  However, we allow for the possibility
of multiple buses and they will listed in discovery order as
ndctl0...ndctlN.  This character device hosts the ioctl for passing
control messages.  The initial command set has a 1:1 correlation with
the commands listed in the by the "NFIT DSM Example" document [1], but
this scheme is extensible to future command sets.

Note, nd_ioctl() and the backing ->ndctl() implementation are defined in
a subsequent patch.  This is simply the initial registrations and sysfs
attributes.

[1]: http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf

Cc: Neil Brown <neilb@suse.de>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: <linux-acpi@vger.kernel.org>
Cc: Robert Moore <robert.moore@intel.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/libnvdimm.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 2b3c63950c91..d375cdc4abd5 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -14,6 +14,8 @@
  */
 #ifndef __LIBNVDIMM_H__
 #define __LIBNVDIMM_H__
+extern struct attribute_group nvdimm_bus_attribute_group;
+
 struct nvdimm;
 struct nvdimm_bus_descriptor;
 typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
@@ -21,14 +23,16 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
 		unsigned int buf_len);
 
 struct nvdimm_bus_descriptor {
+	const struct attribute_group **attr_groups;
 	unsigned long dsm_mask;
 	char *provider_name;
 	ndctl_fn ndctl;
 };
 
 struct device;
-struct nvdimm_bus;
 struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
 		struct nvdimm_bus_descriptor *nfit_desc);
 void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
+struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
+struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
 #endif /* __LIBNVDIMM_H__ */
-- 
cgit v1.2.3


From e6dfb2de47768efe8cc37c9a1863d2aff81440fb Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 25 Apr 2015 03:56:17 -0400
Subject: libnvdimm, nfit: dimm/memory-devices

Enable nvdimm devices to be registered on a nvdimm_bus.  The kernel
assigned device id for nvdimm devicesis dynamic.  If userspace needs a
more static identifier it should consult a provider-specific attribute.
In the case where NFIT is the provider, the 'nmemX/nfit/handle' or
'nmemX/nfit/serial' attributes may be used for this purpose.

Cc: Neil Brown <neilb@suse.de>
Cc: <linux-acpi@vger.kernel.org>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Robert Moore <robert.moore@intel.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/libnvdimm.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index d375cdc4abd5..07787f0dd7de 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -14,6 +14,12 @@
  */
 #ifndef __LIBNVDIMM_H__
 #define __LIBNVDIMM_H__
+
+enum {
+	/* when a dimm supports both PMEM and BLK access a label is required */
+	NDD_ALIASING = 1 << 0,
+};
+
 extern struct attribute_group nvdimm_bus_attribute_group;
 
 struct nvdimm;
@@ -34,5 +40,10 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
 		struct nvdimm_bus_descriptor *nfit_desc);
 void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
 struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
+struct nvdimm *to_nvdimm(struct device *dev);
 struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
+const char *nvdimm_name(struct nvdimm *nvdimm);
+void *nvdimm_provider_data(struct nvdimm *nvdimm);
+struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
+		const struct attribute_group **groups, unsigned long flags);
 #endif /* __LIBNVDIMM_H__ */
-- 
cgit v1.2.3


From 62232e45f4a265abb43f0acf16e58f5d0b6e1ec9 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 8 Jun 2015 14:27:06 -0400
Subject: libnvdimm: control (ioctl) messages for nvdimm_bus and nvdimm devices

Most discovery/configuration of the nvdimm-subsystem is done via sysfs
attributes.  However, some nvdimm_bus instances, particularly the
ACPI.NFIT bus, define a small set of messages that can be passed to the
platform.  For convenience we derive the initial libnvdimm-ioctl command
formats directly from the NFIT DSM Interface Example formats.

    ND_CMD_SMART: media health and diagnostics
    ND_CMD_GET_CONFIG_SIZE: size of the label space
    ND_CMD_GET_CONFIG_DATA: read label space
    ND_CMD_SET_CONFIG_DATA: write label space
    ND_CMD_VENDOR: vendor-specific command passthrough
    ND_CMD_ARS_CAP: report address-range-scrubbing capabilities
    ND_CMD_ARS_START: initiate scrubbing
    ND_CMD_ARS_STATUS: report on scrubbing state
    ND_CMD_SMART_THRESHOLD: configure alarm thresholds for smart events

If a platform later defines different commands than this set it is
straightforward to extend support to those formats.

Most of the commands target a specific dimm.  However, the
address-range-scrubbing commands target the bus.  The 'commands'
attribute in sysfs of an nvdimm_bus, or nvdimm, enumerate the supported
commands for that object.

Cc: <linux-acpi@vger.kernel.org>
Cc: Robert Moore <robert.moore@intel.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reported-by: Nicholas Moulin <nicholas.w.moulin@linux.intel.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/libnvdimm.h | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 07787f0dd7de..a39235819af3 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -14,13 +14,22 @@
  */
 #ifndef __LIBNVDIMM_H__
 #define __LIBNVDIMM_H__
+#include <linux/sizes.h>
+#include <linux/types.h>
 
 enum {
 	/* when a dimm supports both PMEM and BLK access a label is required */
 	NDD_ALIASING = 1 << 0,
+
+	/* need to set a limit somewhere, but yes, this is likely overkill */
+	ND_IOCTL_MAX_BUFLEN = SZ_4M,
+	ND_CMD_MAX_ELEM = 4,
+	ND_CMD_MAX_ENVELOPE = 16,
+	ND_CMD_ARS_STATUS_MAX = SZ_4K,
 };
 
 extern struct attribute_group nvdimm_bus_attribute_group;
+extern struct attribute_group nvdimm_attribute_group;
 
 struct nvdimm;
 struct nvdimm_bus_descriptor;
@@ -35,6 +44,14 @@ struct nvdimm_bus_descriptor {
 	ndctl_fn ndctl;
 };
 
+struct nd_cmd_desc {
+	int in_num;
+	int out_num;
+	u32 in_sizes[ND_CMD_MAX_ELEM];
+	int out_sizes[ND_CMD_MAX_ELEM];
+};
+
+struct nvdimm_bus;
 struct device;
 struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
 		struct nvdimm_bus_descriptor *nfit_desc);
@@ -45,5 +62,13 @@ struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
 const char *nvdimm_name(struct nvdimm *nvdimm);
 void *nvdimm_provider_data(struct nvdimm *nvdimm);
 struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
-		const struct attribute_group **groups, unsigned long flags);
+		const struct attribute_group **groups, unsigned long flags,
+		unsigned long *dsm_mask);
+const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd);
+const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd);
+u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
+		const struct nd_cmd_desc *desc, int idx, void *buf);
+u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
+		const struct nd_cmd_desc *desc, int idx, const u32 *in_field,
+		const u32 *out_field);
 #endif /* __LIBNVDIMM_H__ */
-- 
cgit v1.2.3


From 4d88a97aa9e8cfa6460aab119c5da60ad2267423 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 31 May 2015 14:41:48 -0400
Subject: libnvdimm, nvdimm: dimm driver and base libnvdimm device-driver
 infrastructure

* Implement the device-model infrastructure for loading modules and
  attaching drivers to nvdimm devices.  This is a simple association of a
  nd-device-type number with a driver that has a bitmask of supported
  device types.  To facilitate userspace bind/unbind operations 'modalias'
  and 'devtype', that also appear in the uevent, are added as generic
  sysfs attributes for all nvdimm devices.  The reason for the device-type
  number is to support sub-types within a given parent devtype, be it a
  vendor-specific sub-type or otherwise.

* The first consumer of this infrastructure is the driver
  for dimm devices.  It simply uses control messages to retrieve and
  store the configuration-data image (label set) from each dimm.

Note: nd_device_register() arranges for asynchronous registration of
      nvdimm bus devices by default.

Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Neil Brown <neilb@suse.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/libnvdimm.h |  2 ++
 include/linux/nd.h        | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)
 create mode 100644 include/linux/nd.h

(limited to 'include/linux')

diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index a39235819af3..d3ebccf4ea8b 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -30,6 +30,7 @@ enum {
 
 extern struct attribute_group nvdimm_bus_attribute_group;
 extern struct attribute_group nvdimm_attribute_group;
+extern struct attribute_group nd_device_attribute_group;
 
 struct nvdimm;
 struct nvdimm_bus_descriptor;
@@ -71,4 +72,5 @@ u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
 u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
 		const struct nd_cmd_desc *desc, int idx, const u32 *in_field,
 		const u32 *out_field);
+int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count);
 #endif /* __LIBNVDIMM_H__ */
diff --git a/include/linux/nd.h b/include/linux/nd.h
new file mode 100644
index 000000000000..e074f67e53a3
--- /dev/null
+++ b/include/linux/nd.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __LINUX_ND_H__
+#define __LINUX_ND_H__
+#include <linux/ndctl.h>
+#include <linux/device.h>
+
+struct nd_device_driver {
+	struct device_driver drv;
+	unsigned long type;
+	int (*probe)(struct device *dev);
+	int (*remove)(struct device *dev);
+};
+
+static inline struct nd_device_driver *to_nd_device_driver(
+		struct device_driver *drv)
+{
+	return container_of(drv, struct nd_device_driver, drv);
+}
+
+#define MODULE_ALIAS_ND_DEVICE(type) \
+	MODULE_ALIAS("nd:t" __stringify(type) "*")
+#define ND_DEVICE_MODALIAS_FMT "nd:t%d"
+
+int __must_check __nd_driver_register(struct nd_device_driver *nd_drv,
+		struct module *module, const char *mod_name);
+#define nd_driver_register(driver) \
+	__nd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
+#endif /* __LINUX_ND_H__ */
-- 
cgit v1.2.3


From 1f7df6f88b9245a7f2d0f8ecbc97dc88c8d0d8e1 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 9 Jun 2015 20:13:14 -0400
Subject: libnvdimm, nfit: regions (block-data-window, persistent memory,
 volatile memory)

A "region" device represents the maximum capacity of a BLK range (mmio
block-data-window(s)), or a PMEM range (DAX-capable persistent memory or
volatile memory), without regard for aliasing.  Aliasing, in the
dimm-local address space (DPA), is resolved by metadata on a dimm to
designate which exclusive interface will access the aliased DPA ranges.
Support for the per-dimm metadata/label arrvies is in a subsequent
patch.

The name format of "region" devices is "regionN" where, like dimms, N is
a global ida index assigned at discovery time.  This id is not reliable
across reboots nor in the presence of hotplug.  Look to attributes of
the region or static id-data of the sub-namespace to generate a
persistent name.  However, if the platform configuration does not change
it is reasonable to expect the same region id to be assigned at the next
boot.

"region"s have 2 generic attributes "size", and "mapping"s where:
- size: the BLK accessible capacity or the span of the
  system physical address range in the case of PMEM.

- mappingN: a tuple describing a dimm's contribution to the region's
  capacity in the format (<nmemX>,<dpa>,<size>).  For a PMEM-region
  there will be at least one mapping per dimm in the interleave set.  For
  a BLK-region there is only "mapping0" listing the starting DPA of the
  BLK-region and the available DPA capacity of that space (matches "size"
  above).

The max number of mappings per "region" is hard coded per the
constraints of sysfs attribute groups.  That said the number of mappings
per region should never exceed the maximum number of possible dimms in
the system.  If the current number turns out to not be enough then the
"mappings" attribute clarifies how many there are supposed to be. "32
should be enough for anybody...".

Cc: Neil Brown <neilb@suse.de>
Cc: <linux-acpi@vger.kernel.org>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Robert Moore <robert.moore@intel.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/libnvdimm.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index d3ebccf4ea8b..39e7e606092a 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -26,11 +26,14 @@ enum {
 	ND_CMD_MAX_ELEM = 4,
 	ND_CMD_MAX_ENVELOPE = 16,
 	ND_CMD_ARS_STATUS_MAX = SZ_4K,
+	ND_MAX_MAPPINGS = 32,
 };
 
 extern struct attribute_group nvdimm_bus_attribute_group;
 extern struct attribute_group nvdimm_attribute_group;
 extern struct attribute_group nd_device_attribute_group;
+extern struct attribute_group nd_region_attribute_group;
+extern struct attribute_group nd_mapping_attribute_group;
 
 struct nvdimm;
 struct nvdimm_bus_descriptor;
@@ -38,6 +41,12 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
 		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 		unsigned int buf_len);
 
+struct nd_mapping {
+	struct nvdimm *nvdimm;
+	u64 start;
+	u64 size;
+};
+
 struct nvdimm_bus_descriptor {
 	const struct attribute_group **attr_groups;
 	unsigned long dsm_mask;
@@ -52,6 +61,14 @@ struct nd_cmd_desc {
 	int out_sizes[ND_CMD_MAX_ELEM];
 };
 
+struct nd_region_desc {
+	struct resource *res;
+	struct nd_mapping *nd_mapping;
+	u16 num_mappings;
+	const struct attribute_group **attr_groups;
+	void *provider_data;
+};
+
 struct nvdimm_bus;
 struct device;
 struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
@@ -59,9 +76,11 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
 void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
 struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
 struct nvdimm *to_nvdimm(struct device *dev);
+struct nd_region *to_nd_region(struct device *dev);
 struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
 const char *nvdimm_name(struct nvdimm *nvdimm);
 void *nvdimm_provider_data(struct nvdimm *nvdimm);
+void *nd_region_provider_data(struct nd_region *nd_region);
 struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
 		const struct attribute_group **groups, unsigned long flags,
 		unsigned long *dsm_mask);
@@ -73,4 +92,10 @@ u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
 		const struct nd_cmd_desc *desc, int idx, const u32 *in_field,
 		const u32 *out_field);
 int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count);
+struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus,
+		struct nd_region_desc *ndr_desc);
+struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus,
+		struct nd_region_desc *ndr_desc);
+struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
+		struct nd_region_desc *ndr_desc);
 #endif /* __LIBNVDIMM_H__ */
-- 
cgit v1.2.3


From 3d88002e4a7bd40f355550284c6cd140e6fe29dc Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 31 May 2015 15:02:11 -0400
Subject: libnvdimm: support for legacy (non-aliasing) nvdimms

The libnvdimm region driver is an intermediary driver that translates
non-volatile "region"s into "namespace" sub-devices that are surfaced by
persistent memory block-device drivers (PMEM and BLK).

ACPI 6 introduces the concept that a given nvdimm may simultaneously
offer multiple access modes to its media through direct PMEM load/store
access, or windowed BLK mode.  Existing nvdimms mostly implement a PMEM
interface, some offer a BLK-like mode, but never both as ACPI 6 defines.
If an nvdimm is single interfaced, then there is no need for dimm
metadata labels.  For these devices we can take the region boundaries
directly to create a child namespace device (nd_namespace_io).

Acked-by: Christoph Hellwig <hch@lst.de>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/libnvdimm.h |  7 +++++--
 include/linux/nd.h        | 10 ++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 39e7e606092a..37f966aff386 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -71,8 +71,11 @@ struct nd_region_desc {
 
 struct nvdimm_bus;
 struct device;
-struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
-		struct nvdimm_bus_descriptor *nfit_desc);
+struct module;
+struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
+		struct nvdimm_bus_descriptor *nfit_desc, struct module *module);
+#define nvdimm_bus_register(parent, desc) \
+	__nvdimm_bus_register(parent, desc, THIS_MODULE)
 void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
 struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
 struct nvdimm *to_nvdimm(struct device *dev);
diff --git a/include/linux/nd.h b/include/linux/nd.h
index e074f67e53a3..da70e9962197 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -26,6 +26,16 @@ static inline struct nd_device_driver *to_nd_device_driver(
 		struct device_driver *drv)
 {
 	return container_of(drv, struct nd_device_driver, drv);
+};
+
+struct nd_namespace_io {
+	struct device dev;
+	struct resource res;
+};
+
+static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev)
+{
+	return container_of(dev, struct nd_namespace_io, dev);
 }
 
 #define MODULE_ALIAS_ND_DEVICE(type) \
-- 
cgit v1.2.3


From eaf961536e1622ad21247ac8d44acd48ba65566e Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 1 May 2015 13:11:27 -0400
Subject: libnvdimm, nfit: add interleave-set state-tracking infrastructure

On platforms that have firmware support for reading/writing per-dimm
label space, a portion of the dimm may be accessible via an interleave
set PMEM mapping in addition to the dimm's BLK (block-data-window
aperture(s)) interface.  A label, stored in a "configuration data
region" on the dimm, disambiguates which dimm addresses are accessed
through which exclusive interface.

Add infrastructure that allows the kernel to block modifications to a
label in the set while any member dimm is active.  Note that this is
meant only for enforcing "no modifications of active labels" via the
coarse ioctl command.  Adding/deleting namespaces from an active
interleave set is always possible via sysfs.

Another aspect of tracking interleave sets is tracking their integrity
when DIMMs in a set are physically re-ordered.  For this purpose we
generate an "interleave-set cookie" that can be recorded in a label and
validated against the current configuration.  It is the bus provider
implementation's responsibility to calculate the interleave set cookie
and attach it to a given region.

Cc: Neil Brown <neilb@suse.de>
Cc: <linux-acpi@vger.kernel.org>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Robert Moore <robert.moore@intel.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/libnvdimm.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 37f966aff386..1b627b109360 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -61,11 +61,16 @@ struct nd_cmd_desc {
 	int out_sizes[ND_CMD_MAX_ELEM];
 };
 
+struct nd_interleave_set {
+	u64 cookie;
+};
+
 struct nd_region_desc {
 	struct resource *res;
 	struct nd_mapping *nd_mapping;
 	u16 num_mappings;
 	const struct attribute_group **attr_groups;
+	struct nd_interleave_set *nd_set;
 	void *provider_data;
 };
 
@@ -101,4 +106,5 @@ struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus,
 		struct nd_region_desc *ndr_desc);
 struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
 		struct nd_region_desc *ndr_desc);
+u64 nd_fletcher64(void *addr, size_t len, bool le);
 #endif /* __LIBNVDIMM_H__ */
-- 
cgit v1.2.3


From bf9bccc14c05dae8caba29df6187c731710f5380 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 17 Jun 2015 17:14:46 -0400
Subject: libnvdimm: pmem label sets and namespace instantiation.

A complete label set is a PMEM-label per-dimm per-interleave-set where
all the UUIDs match and the interleave set cookie matches the hosting
interleave set.

Present sysfs attributes for manipulation of a PMEM-namespace's
'alt_name', 'uuid', and 'size' attributes.  A later patch will make
these settings persistent by writing back the label.

Note that PMEM allocations grow forwards from the start of an interleave
set (lowest dimm-physical-address (DPA)).  BLK-namespaces that alias
with a PMEM interleave set will grow allocations backward from the
highest DPA.

Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Neil Brown <neilb@suse.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/libnvdimm.h | 10 ++++++++++
 include/linux/nd.h        | 24 ++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 1b627b109360..c130972e08c4 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -41,10 +41,20 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
 		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 		unsigned int buf_len);
 
+struct nd_namespace_label;
+struct nvdimm_drvdata;
 struct nd_mapping {
 	struct nvdimm *nvdimm;
+	struct nd_namespace_label **labels;
 	u64 start;
 	u64 size;
+	/*
+	 * @ndd is for private use at region enable / disable time for
+	 * get_ndd() + put_ndd(), all other nd_mapping to ndd
+	 * conversions use to_ndd() which respects enabled state of the
+	 * nvdimm.
+	 */
+	struct nvdimm_drvdata *ndd;
 };
 
 struct nvdimm_bus_descriptor {
diff --git a/include/linux/nd.h b/include/linux/nd.h
index da70e9962197..255c38a83083 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -28,16 +28,40 @@ static inline struct nd_device_driver *to_nd_device_driver(
 	return container_of(drv, struct nd_device_driver, drv);
 };
 
+/**
+ * struct nd_namespace_io - infrastructure for loading an nd_pmem instance
+ * @dev: namespace device created by the nd region driver
+ * @res: struct resource conversion of a NFIT SPA table
+ */
 struct nd_namespace_io {
 	struct device dev;
 	struct resource res;
 };
 
+/**
+ * struct nd_namespace_pmem - namespace device for dimm-backed interleaved memory
+ * @nsio: device and system physical address range to drive
+ * @alt_name: namespace name supplied in the dimm label
+ * @uuid: namespace name supplied in the dimm label
+ */
+struct nd_namespace_pmem {
+	struct nd_namespace_io nsio;
+	char *alt_name;
+	u8 *uuid;
+};
+
 static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev)
 {
 	return container_of(dev, struct nd_namespace_io, dev);
 }
 
+static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev)
+{
+	struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
+
+	return container_of(nsio, struct nd_namespace_pmem, nsio);
+}
+
 #define MODULE_ALIAS_ND_DEVICE(type) \
 	MODULE_ALIAS("nd:t" __stringify(type) "*")
 #define ND_DEVICE_MODALIAS_FMT "nd:t%d"
-- 
cgit v1.2.3


From 1b40e09a1232de537b193fa1b6b3ef16d3a1e397 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 1 May 2015 13:34:01 -0400
Subject: libnvdimm: blk labels and namespace instantiation

A blk label set describes a namespace comprised of one or more
discontiguous dpa ranges on a single dimm.  They may alias with one or
more pmem interleave sets that include the given dimm.

This is the runtime/volatile configuration infrastructure for sysfs
manipulation of 'alt_name', 'uuid', 'size', and 'sector_size'.  A later
patch will make these settings persistent by writing back the label(s).

Unlike pmem namespaces, multiple blk namespaces can be created per
region.  Once a blk namespace has been created a new seed device
(unconfigured child of a parent blk region) is instantiated.  As long as
a region has 'available_size' != 0 new child namespaces may be created.

Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Neil Brown <neilb@suse.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/libnvdimm.h |  3 +++
 include/linux/nd.h        | 25 +++++++++++++++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index c130972e08c4..a59dca17b3aa 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -27,6 +27,9 @@ enum {
 	ND_CMD_MAX_ENVELOPE = 16,
 	ND_CMD_ARS_STATUS_MAX = SZ_4K,
 	ND_MAX_MAPPINGS = 32,
+
+	/* mark newly adjusted resources as requiring a label update */
+	DPA_RESOURCE_ADJUSTED = 1 << 0,
 };
 
 extern struct attribute_group nvdimm_bus_attribute_group;
diff --git a/include/linux/nd.h b/include/linux/nd.h
index 255c38a83083..23276ea91690 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -50,6 +50,26 @@ struct nd_namespace_pmem {
 	u8 *uuid;
 };
 
+/**
+ * struct nd_namespace_blk - namespace for dimm-bounded persistent memory
+ * @dev: namespace device creation by the nd region driver
+ * @alt_name: namespace name supplied in the dimm label
+ * @uuid: namespace name supplied in the dimm label
+ * @id: ida allocated id
+ * @lbasize: blk namespaces have a native sector size when btt not present
+ * @num_resources: number of dpa extents to claim
+ * @res: discontiguous dpa extents for given dimm
+ */
+struct nd_namespace_blk {
+	struct device dev;
+	char *alt_name;
+	u8 *uuid;
+	int id;
+	unsigned long lbasize;
+	int num_resources;
+	struct resource **res;
+};
+
 static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev)
 {
 	return container_of(dev, struct nd_namespace_io, dev);
@@ -62,6 +82,11 @@ static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev)
 	return container_of(nsio, struct nd_namespace_pmem, nsio);
 }
 
+static inline struct nd_namespace_blk *to_nd_namespace_blk(struct device *dev)
+{
+	return container_of(dev, struct nd_namespace_blk, dev);
+}
+
 #define MODULE_ALIAS_ND_DEVICE(type) \
 	MODULE_ALIAS("nd:t" __stringify(type) "*")
 #define ND_DEVICE_MODALIAS_FMT "nd:t%d"
-- 
cgit v1.2.3


From d7f96f97c4031fa4ffdb7801f9aae23e96170a6f Mon Sep 17 00:00:00 2001
From: Ivan Khoronzhuk <ivan.khoronzhuk@globallogic.com>
Date: Thu, 25 Jun 2015 09:06:56 +0200
Subject: firmware: dmi_scan: add SBMIOS entry and DMI tables

Some utils, like dmidecode and smbios, need to access SMBIOS entry
table area in order to get information like SMBIOS version, size, etc.
Currently it's done via /dev/mem. But for situation when /dev/mem
usage is disabled, the utils have to use dmi sysfs instead, which
doesn't represent SMBIOS entry and adds code/delay redundancy when direct
access for table is needed.

So this patch creates dmi/tables and adds SMBIOS entry point to allow
utils in question to work correctly without /dev/mem. Also patch adds
raw dmi table to simplify dmi table processing in user space, as
proposed by Jean Delvare.

Tested-by: Roy Franz <roy.franz@linaro.org>
Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@globallogic.com>
Signed-off-by: Jean Delvare <jdelvare@suse.de>
---
 include/linux/dmi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index f820f0a336c9..2f9f98827c0a 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -2,6 +2,7 @@
 #define __DMI_H__
 
 #include <linux/list.h>
+#include <linux/kobject.h>
 #include <linux/mod_devicetable.h>
 
 /* enum dmi_field is in mod_devicetable.h */
@@ -93,6 +94,7 @@ struct dmi_dev_onboard {
 	int devfn;
 };
 
+extern struct kobject *dmi_kobj;
 extern int dmi_check_system(const struct dmi_system_id *list);
 const struct dmi_system_id *dmi_first_match(const struct dmi_system_id *list);
 extern const char * dmi_get_system_info(int field);
-- 
cgit v1.2.3


From 9ea650c804404e55dbf17c928203141282e759ab Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Thu, 25 Jun 2015 09:06:57 +0200
Subject: firmware: dmi: struct dmi_header should be packed

Apparently the compiler does fine without it, but it feels safer and
clearer to add the missing attribute.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
---
 include/linux/dmi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index 2f9f98827c0a..5055ac34142d 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -75,7 +75,7 @@ struct dmi_header {
 	u8 type;
 	u8 length;
 	u16 handle;
-};
+} __packed;
 
 struct dmi_device {
 	struct list_head list;
-- 
cgit v1.2.3


From 8c2f7e8658df1d3b7cbfa62706941d14c715823a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 25 Jun 2015 04:20:04 -0400
Subject: libnvdimm: infrastructure for btt devices

NVDIMM namespaces, in addition to accepting "struct bio" based requests,
also have the capability to perform byte-aligned accesses.  By default
only the bio/block interface is used.  However, if another driver can
make effective use of the byte-aligned capability it can claim namespace
interface and use the byte-aligned ->rw_bytes() interface.

The BTT driver is the initial first consumer of this mechanism to allow
adding atomic sector update semantics to a pmem or blk namespace.  This
patch is the sysfs infrastructure to allow configuring a BTT instance
for a namespace.  Enabling that BTT and performing i/o is in a
subsequent patch.

Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/nd.h | 63 +++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 58 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nd.h b/include/linux/nd.h
index 23276ea91690..507e47c86737 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -12,6 +12,7 @@
  */
 #ifndef __LINUX_ND_H__
 #define __LINUX_ND_H__
+#include <linux/fs.h>
 #include <linux/ndctl.h>
 #include <linux/device.h>
 
@@ -28,13 +29,33 @@ static inline struct nd_device_driver *to_nd_device_driver(
 	return container_of(drv, struct nd_device_driver, drv);
 };
 
+/**
+ * struct nd_namespace_common - core infrastructure of a namespace
+ * @force_raw: ignore other personalities for the namespace (e.g. btt)
+ * @dev: device model node
+ * @claim: when set a another personality has taken ownership of the namespace
+ * @rw_bytes: access the raw namespace capacity with byte-aligned transfers
+ */
+struct nd_namespace_common {
+	int force_raw;
+	struct device dev;
+	struct device *claim;
+	int (*rw_bytes)(struct nd_namespace_common *, resource_size_t offset,
+			void *buf, size_t size, int rw);
+};
+
+static inline struct nd_namespace_common *to_ndns(struct device *dev)
+{
+	return container_of(dev, struct nd_namespace_common, dev);
+}
+
 /**
  * struct nd_namespace_io - infrastructure for loading an nd_pmem instance
  * @dev: namespace device created by the nd region driver
  * @res: struct resource conversion of a NFIT SPA table
  */
 struct nd_namespace_io {
-	struct device dev;
+	struct nd_namespace_common common;
 	struct resource res;
 };
 
@@ -52,7 +73,6 @@ struct nd_namespace_pmem {
 
 /**
  * struct nd_namespace_blk - namespace for dimm-bounded persistent memory
- * @dev: namespace device creation by the nd region driver
  * @alt_name: namespace name supplied in the dimm label
  * @uuid: namespace name supplied in the dimm label
  * @id: ida allocated id
@@ -61,7 +81,7 @@ struct nd_namespace_pmem {
  * @res: discontiguous dpa extents for given dimm
  */
 struct nd_namespace_blk {
-	struct device dev;
+	struct nd_namespace_common common;
 	char *alt_name;
 	u8 *uuid;
 	int id;
@@ -72,7 +92,7 @@ struct nd_namespace_blk {
 
 static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev)
 {
-	return container_of(dev, struct nd_namespace_io, dev);
+	return container_of(dev, struct nd_namespace_io, common.dev);
 }
 
 static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev)
@@ -84,7 +104,40 @@ static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev)
 
 static inline struct nd_namespace_blk *to_nd_namespace_blk(struct device *dev)
 {
-	return container_of(dev, struct nd_namespace_blk, dev);
+	return container_of(dev, struct nd_namespace_blk, common.dev);
+}
+
+/**
+ * nvdimm_read_bytes() - synchronously read bytes from an nvdimm namespace
+ * @ndns: device to read
+ * @offset: namespace-relative starting offset
+ * @buf: buffer to fill
+ * @size: transfer length
+ *
+ * @buf is up-to-date upon return from this routine.
+ */
+static inline int nvdimm_read_bytes(struct nd_namespace_common *ndns,
+		resource_size_t offset, void *buf, size_t size)
+{
+	return ndns->rw_bytes(ndns, offset, buf, size, READ);
+}
+
+/**
+ * nvdimm_write_bytes() - synchronously write bytes to an nvdimm namespace
+ * @ndns: device to read
+ * @offset: namespace-relative starting offset
+ * @buf: buffer to drain
+ * @size: transfer length
+ *
+ * NVDIMM Namepaces disks do not implement sectors internally.  Depending on
+ * the @ndns, the contents of @buf may be in cpu cache, platform buffers,
+ * or on backing memory media upon return from this routine.  Flushing
+ * to media is handled internal to the @ndns driver, if at all.
+ */
+static inline int nvdimm_write_bytes(struct nd_namespace_common *ndns,
+		resource_size_t offset, void *buf, size_t size)
+{
+	return ndns->rw_bytes(ndns, offset, buf, size, WRITE);
 }
 
 #define MODULE_ALIAS_ND_DEVICE(type) \
-- 
cgit v1.2.3


From 479305fd7172503772575997eb6f1b0a2bb4a107 Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Thu, 25 Jun 2015 15:00:40 -0700
Subject: zpool: remove zpool_evict()

Remove zpool_evict() helper function.  As zbud is currently the only
zpool implementation that supports eviction, add zpool and zpool_ops
references to struct zbud_pool and directly call zpool_ops->evict(zpool,
handle) on eviction.

Currently zpool provides the zpool_evict helper which locks the zpool
list lock and searches through all pools to find the specific one
matching the caller, and call the corresponding zpool_ops->evict
function.  However, this is unnecessary, as the zbud pool can simply
keep a reference to the zpool that created it, as well as the zpool_ops,
and directly call the zpool_ops->evict function, when it needs to evict
a page.  This avoids a spinlock and list search in zpool for each
eviction.

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Cc: Seth Jennings <sjennings@variantweb.net>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/zpool.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index 56529b34dc63..d30eff3d84d5 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -81,7 +81,8 @@ struct zpool_driver {
 	atomic_t refcount;
 	struct list_head list;
 
-	void *(*create)(char *name, gfp_t gfp, struct zpool_ops *ops);
+	void *(*create)(char *name, gfp_t gfp, struct zpool_ops *ops,
+			struct zpool *zpool);
 	void (*destroy)(void *pool);
 
 	int (*malloc)(void *pool, size_t size, gfp_t gfp,
@@ -102,6 +103,4 @@ void zpool_register_driver(struct zpool_driver *driver);
 
 int zpool_unregister_driver(struct zpool_driver *driver);
 
-int zpool_evict(void *pool, unsigned long handle);
-
 #endif
-- 
cgit v1.2.3


From f6d133f877c8bb0a0934dc8c521c758ee771e901 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 25 Jun 2015 15:01:00 -0700
Subject: compiler-gcc.h: neatening

 - Move the inline and noinline blocks together

 - Comment neatening

 - Alignment of __attribute__ uses

 - Consistent naming of __must_be_array macro argument

 - Multiline macro neatening

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Segher Boessenkool <segher@kernel.crashing.org>
Cc: Sasha Levin <levinsasha928@gmail.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: Alan Modra <amodra@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc.h | 85 +++++++++++++++++++++++---------------------
 1 file changed, 45 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 371e560d13cf..5c2c14e3c647 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -5,9 +5,9 @@
 /*
  * Common definitions for all gcc versions go here.
  */
-#define GCC_VERSION (__GNUC__ * 10000 \
-		   + __GNUC_MINOR__ * 100 \
-		   + __GNUC_PATCHLEVEL__)
+#define GCC_VERSION (__GNUC__ * 10000		\
+		     + __GNUC_MINOR__ * 100	\
+		     + __GNUC_PATCHLEVEL__)
 
 /* Optimization barrier */
 
@@ -46,55 +46,63 @@
  * the inline assembly constraint from =g to =r, in this particular
  * case either is valid.
  */
-#define RELOC_HIDE(ptr, off)					\
-  ({ unsigned long __ptr;					\
-    __asm__ ("" : "=r"(__ptr) : "0"(ptr));		\
-    (typeof(ptr)) (__ptr + (off)); })
+#define RELOC_HIDE(ptr, off)						\
+({									\
+	unsigned long __ptr;						\
+	__asm__ ("" : "=r"(__ptr) : "0"(ptr));				\
+	(typeof(ptr)) (__ptr + (off));					\
+})
 
 /* Make the optimizer believe the variable can be manipulated arbitrarily. */
-#define OPTIMIZER_HIDE_VAR(var) __asm__ ("" : "=r" (var) : "0" (var))
+#define OPTIMIZER_HIDE_VAR(var)						\
+	__asm__ ("" : "=r" (var) : "0" (var))
 
 #ifdef __CHECKER__
-#define __must_be_array(arr) 0
+#define __must_be_array(a)	0
 #else
 /* &a[0] degrades to a pointer: a different type from an array */
-#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
+#define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
 #endif
 
 /*
  * Force always-inline if the user requests it so via the .config,
  * or if gcc is too old:
  */
-#if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \
+#if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) ||		\
     !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
-# define inline		inline		__attribute__((always_inline)) notrace
-# define __inline__	__inline__	__attribute__((always_inline)) notrace
-# define __inline	__inline	__attribute__((always_inline)) notrace
+#define inline		inline		__attribute__((always_inline)) notrace
+#define __inline__	__inline__	__attribute__((always_inline)) notrace
+#define __inline	__inline	__attribute__((always_inline)) notrace
 #else
 /* A lot of inline functions can cause havoc with function tracing */
-# define inline		inline		notrace
-# define __inline__	__inline__	notrace
-# define __inline	__inline	notrace
+#define inline		inline		notrace
+#define __inline__	__inline__	notrace
+#define __inline	__inline	notrace
 #endif
 
-#define __deprecated			__attribute__((deprecated))
-#define __packed			__attribute__((packed))
-#define __weak				__attribute__((weak))
-#define __alias(symbol)		__attribute__((alias(#symbol)))
+#define __always_inline	inline __attribute__((always_inline))
+#define  noinline	__attribute__((noinline))
+
+#define __deprecated	__attribute__((deprecated))
+#define __packed	__attribute__((packed))
+#define __weak		__attribute__((weak))
+#define __alias(symbol)	__attribute__((alias(#symbol)))
 
 /*
- * it doesn't make sense on ARM (currently the only user of __naked) to trace
- * naked functions because then mcount is called without stack and frame pointer
- * being set up and there is no chance to restore the lr register to the value
- * before mcount was called.
+ * it doesn't make sense on ARM (currently the only user of __naked)
+ * to trace naked functions because then mcount is called without
+ * stack and frame pointer being set up and there is no chance to
+ * restore the lr register to the value before mcount was called.
+ *
+ * The asm() bodies of naked functions often depend on standard calling
+ * conventions, therefore they must be noinline and noclone.
  *
- * The asm() bodies of naked functions often depend on standard calling conventions,
- * therefore they must be noinline and noclone.  GCC 4.[56] currently fail to enforce
- * this, so we must do so ourselves.  See GCC PR44290.
+ * GCC 4.[56] currently fail to enforce this, so we must do so ourselves.
+ * See GCC PR44290.
  */
-#define __naked				__attribute__((naked)) noinline __noclone notrace
+#define __naked		__attribute__((naked)) noinline __noclone notrace
 
-#define __noreturn			__attribute__((noreturn))
+#define __noreturn	__attribute__((noreturn))
 
 /*
  * From the GCC manual:
@@ -106,14 +114,13 @@
  * would be.
  * [...]
  */
-#define __pure				__attribute__((pure))
-#define __aligned(x)			__attribute__((aligned(x)))
-#define __printf(a, b)			__attribute__((format(printf, a, b)))
-#define __scanf(a, b)			__attribute__((format(scanf, a, b)))
-#define  noinline			__attribute__((noinline))
-#define __attribute_const__		__attribute__((__const__))
-#define __maybe_unused			__attribute__((unused))
-#define __always_unused			__attribute__((unused))
+#define __pure			__attribute__((pure))
+#define __aligned(x)		__attribute__((aligned(x)))
+#define __printf(a, b)		__attribute__((format(printf, a, b)))
+#define __scanf(a, b)		__attribute__((format(scanf, a, b)))
+#define __attribute_const__	__attribute__((__const__))
+#define __maybe_unused		__attribute__((unused))
+#define __always_unused		__attribute__((unused))
 
 #define __gcc_header(x) #x
 #define _gcc_header(x) __gcc_header(linux/compiler-gcc##x.h)
@@ -129,5 +136,3 @@
  * code
  */
 #define uninitialized_var(x) x = x
-
-#define __always_inline		inline __attribute__((always_inline))
-- 
cgit v1.2.3


From cb984d101b30eb7478d32df56a0023e4603cba7f Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 25 Jun 2015 15:01:02 -0700
Subject: compiler-gcc: integrate the various compiler-gcc[345].h files

As gcc major version numbers are going to advance rather rapidly in the
future, there's no real value in separate files for each compiler
version.

Deduplicate some of the macros #defined in each file too.

Neaten comments using normal kernel commenting style.

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Segher Boessenkool <segher@kernel.crashing.org>
Cc: Sasha Levin <levinsasha928@gmail.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: Alan Modra <amodra@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc.h  | 120 ++++++++++++++++++++++++++++++++++++++++--
 include/linux/compiler-gcc3.h |  23 --------
 include/linux/compiler-gcc4.h |  91 --------------------------------
 include/linux/compiler-gcc5.h |  67 -----------------------
 4 files changed, 116 insertions(+), 185 deletions(-)
 delete mode 100644 include/linux/compiler-gcc3.h
 delete mode 100644 include/linux/compiler-gcc4.h
 delete mode 100644 include/linux/compiler-gcc5.h

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 5c2c14e3c647..dfaa7b3e9ae9 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -122,10 +122,122 @@
 #define __maybe_unused		__attribute__((unused))
 #define __always_unused		__attribute__((unused))
 
-#define __gcc_header(x) #x
-#define _gcc_header(x) __gcc_header(linux/compiler-gcc##x.h)
-#define gcc_header(x) _gcc_header(x)
-#include gcc_header(__GNUC__)
+/* gcc version specific checks */
+
+#if GCC_VERSION < 30200
+# error Sorry, your compiler is too old - please upgrade it.
+#endif
+
+#if GCC_VERSION < 30300
+# define __used			__attribute__((__unused__))
+#else
+# define __used			__attribute__((__used__))
+#endif
+
+#ifdef CONFIG_GCOV_KERNEL
+# if GCC_VERSION < 30400
+#   error "GCOV profiling support for gcc versions below 3.4 not included"
+# endif /* __GNUC_MINOR__ */
+#endif /* CONFIG_GCOV_KERNEL */
+
+#if GCC_VERSION >= 30400
+#define __must_check		__attribute__((warn_unused_result))
+#endif
+
+#if GCC_VERSION >= 40000
+
+/* GCC 4.1.[01] miscompiles __weak */
+#ifdef __KERNEL__
+# if GCC_VERSION >= 40100 &&  GCC_VERSION <= 40101
+#  error Your version of gcc miscompiles the __weak directive
+# endif
+#endif
+
+#define __used			__attribute__((__used__))
+#define __compiler_offsetof(a, b)					\
+	__builtin_offsetof(a, b)
+
+#if GCC_VERSION >= 40100 && GCC_VERSION < 40600
+# define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
+#endif
+
+#if GCC_VERSION >= 40300
+/* Mark functions as cold. gcc will assume any path leading to a call
+ * to them will be unlikely.  This means a lot of manual unlikely()s
+ * are unnecessary now for any paths leading to the usual suspects
+ * like BUG(), printk(), panic() etc. [but let's keep them for now for
+ * older compilers]
+ *
+ * Early snapshots of gcc 4.3 don't support this and we can't detect this
+ * in the preprocessor, but we can live with this because they're unreleased.
+ * Maketime probing would be overkill here.
+ *
+ * gcc also has a __attribute__((__hot__)) to move hot functions into
+ * a special section, but I don't see any sense in this right now in
+ * the kernel context
+ */
+#define __cold			__attribute__((__cold__))
+
+#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
+
+#ifndef __CHECKER__
+# define __compiletime_warning(message) __attribute__((warning(message)))
+# define __compiletime_error(message) __attribute__((error(message)))
+#endif /* __CHECKER__ */
+#endif /* GCC_VERSION >= 40300 */
+
+#if GCC_VERSION >= 40500
+/*
+ * Mark a position in code as unreachable.  This can be used to
+ * suppress control flow warnings after asm blocks that transfer
+ * control elsewhere.
+ *
+ * Early snapshots of gcc 4.5 don't support this and we can't detect
+ * this in the preprocessor, but we can live with this because they're
+ * unreleased.  Really, we need to have autoconf for the kernel.
+ */
+#define unreachable() __builtin_unreachable()
+
+/* Mark a function definition as prohibited from being cloned. */
+#define __noclone	__attribute__((__noclone__))
+
+#endif /* GCC_VERSION >= 40500 */
+
+#if GCC_VERSION >= 40600
+/*
+ * Tell the optimizer that something else uses this function or variable.
+ */
+#define __visible	__attribute__((externally_visible))
+#endif
+
+/*
+ * GCC 'asm goto' miscompiles certain code sequences:
+ *
+ *   http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
+ *
+ * Work it around via a compiler barrier quirk suggested by Jakub Jelinek.
+ *
+ * (asm goto is automatically volatile - the naming reflects this.)
+ */
+#define asm_volatile_goto(x...)	do { asm goto(x); asm (""); } while (0)
+
+#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
+#if GCC_VERSION >= 40400
+#define __HAVE_BUILTIN_BSWAP32__
+#define __HAVE_BUILTIN_BSWAP64__
+#endif
+#if GCC_VERSION >= 40800 || (defined(__powerpc__) && GCC_VERSION >= 40600)
+#define __HAVE_BUILTIN_BSWAP16__
+#endif
+#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */
+
+#if GCC_VERSION >= 50000
+#define KASAN_ABI_VERSION 4
+#elif GCC_VERSION >= 40902
+#define KASAN_ABI_VERSION 3
+#endif
+
+#endif	/* gcc version >= 40000 specific checks */
 
 #if !defined(__noclone)
 #define __noclone	/* not needed */
diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h
deleted file mode 100644
index 7d89febe4d79..000000000000
--- a/include/linux/compiler-gcc3.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef __LINUX_COMPILER_H
-#error "Please don't include <linux/compiler-gcc3.h> directly, include <linux/compiler.h> instead."
-#endif
-
-#if GCC_VERSION < 30200
-# error Sorry, your compiler is too old - please upgrade it.
-#endif
-
-#if GCC_VERSION >= 30300
-# define __used			__attribute__((__used__))
-#else
-# define __used			__attribute__((__unused__))
-#endif
-
-#if GCC_VERSION >= 30400
-#define __must_check		__attribute__((warn_unused_result))
-#endif
-
-#ifdef CONFIG_GCOV_KERNEL
-# if GCC_VERSION < 30400
-#   error "GCOV profiling support for gcc versions below 3.4 not included"
-# endif /* __GNUC_MINOR__ */
-#endif /* CONFIG_GCOV_KERNEL */
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
deleted file mode 100644
index 769e19864632..000000000000
--- a/include/linux/compiler-gcc4.h
+++ /dev/null
@@ -1,91 +0,0 @@
-#ifndef __LINUX_COMPILER_H
-#error "Please don't include <linux/compiler-gcc4.h> directly, include <linux/compiler.h> instead."
-#endif
-
-/* GCC 4.1.[01] miscompiles __weak */
-#ifdef __KERNEL__
-# if GCC_VERSION >= 40100 &&  GCC_VERSION <= 40101
-#  error Your version of gcc miscompiles the __weak directive
-# endif
-#endif
-
-#define __used			__attribute__((__used__))
-#define __must_check 		__attribute__((warn_unused_result))
-#define __compiler_offsetof(a,b) __builtin_offsetof(a,b)
-
-#if GCC_VERSION >= 40100 && GCC_VERSION < 40600
-# define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
-#endif
-
-#if GCC_VERSION >= 40300
-/* Mark functions as cold. gcc will assume any path leading to a call
-   to them will be unlikely.  This means a lot of manual unlikely()s
-   are unnecessary now for any paths leading to the usual suspects
-   like BUG(), printk(), panic() etc. [but let's keep them for now for
-   older compilers]
-
-   Early snapshots of gcc 4.3 don't support this and we can't detect this
-   in the preprocessor, but we can live with this because they're unreleased.
-   Maketime probing would be overkill here.
-
-   gcc also has a __attribute__((__hot__)) to move hot functions into
-   a special section, but I don't see any sense in this right now in
-   the kernel context */
-#define __cold			__attribute__((__cold__))
-
-#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
-
-#ifndef __CHECKER__
-# define __compiletime_warning(message) __attribute__((warning(message)))
-# define __compiletime_error(message) __attribute__((error(message)))
-#endif /* __CHECKER__ */
-#endif /* GCC_VERSION >= 40300 */
-
-#if GCC_VERSION >= 40500
-/*
- * Mark a position in code as unreachable.  This can be used to
- * suppress control flow warnings after asm blocks that transfer
- * control elsewhere.
- *
- * Early snapshots of gcc 4.5 don't support this and we can't detect
- * this in the preprocessor, but we can live with this because they're
- * unreleased.  Really, we need to have autoconf for the kernel.
- */
-#define unreachable() __builtin_unreachable()
-
-/* Mark a function definition as prohibited from being cloned. */
-#define __noclone	__attribute__((__noclone__))
-
-#endif /* GCC_VERSION >= 40500 */
-
-#if GCC_VERSION >= 40600
-/*
- * Tell the optimizer that something else uses this function or variable.
- */
-#define __visible __attribute__((externally_visible))
-#endif
-
-/*
- * GCC 'asm goto' miscompiles certain code sequences:
- *
- *   http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
- *
- * Work it around via a compiler barrier quirk suggested by Jakub Jelinek.
- *
- * (asm goto is automatically volatile - the naming reflects this.)
- */
-#define asm_volatile_goto(x...)	do { asm goto(x); asm (""); } while (0)
-
-#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
-#if GCC_VERSION >= 40400
-#define __HAVE_BUILTIN_BSWAP32__
-#define __HAVE_BUILTIN_BSWAP64__
-#endif
-#if GCC_VERSION >= 40800 || (defined(__powerpc__) && GCC_VERSION >= 40600)
-#define __HAVE_BUILTIN_BSWAP16__
-#endif
-#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */
-
-#if GCC_VERSION >= 40902
-#define KASAN_ABI_VERSION 3
-#endif
diff --git a/include/linux/compiler-gcc5.h b/include/linux/compiler-gcc5.h
deleted file mode 100644
index efee493714eb..000000000000
--- a/include/linux/compiler-gcc5.h
+++ /dev/null
@@ -1,67 +0,0 @@
-#ifndef __LINUX_COMPILER_H
-#error "Please don't include <linux/compiler-gcc5.h> directly, include <linux/compiler.h> instead."
-#endif
-
-#define __used				__attribute__((__used__))
-#define __must_check			__attribute__((warn_unused_result))
-#define __compiler_offsetof(a, b)	__builtin_offsetof(a, b)
-
-/* Mark functions as cold. gcc will assume any path leading to a call
-   to them will be unlikely.  This means a lot of manual unlikely()s
-   are unnecessary now for any paths leading to the usual suspects
-   like BUG(), printk(), panic() etc. [but let's keep them for now for
-   older compilers]
-
-   Early snapshots of gcc 4.3 don't support this and we can't detect this
-   in the preprocessor, but we can live with this because they're unreleased.
-   Maketime probing would be overkill here.
-
-   gcc also has a __attribute__((__hot__)) to move hot functions into
-   a special section, but I don't see any sense in this right now in
-   the kernel context */
-#define __cold			__attribute__((__cold__))
-
-#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
-
-#ifndef __CHECKER__
-# define __compiletime_warning(message) __attribute__((warning(message)))
-# define __compiletime_error(message) __attribute__((error(message)))
-#endif /* __CHECKER__ */
-
-/*
- * Mark a position in code as unreachable.  This can be used to
- * suppress control flow warnings after asm blocks that transfer
- * control elsewhere.
- *
- * Early snapshots of gcc 4.5 don't support this and we can't detect
- * this in the preprocessor, but we can live with this because they're
- * unreleased.  Really, we need to have autoconf for the kernel.
- */
-#define unreachable() __builtin_unreachable()
-
-/* Mark a function definition as prohibited from being cloned. */
-#define __noclone	__attribute__((__noclone__))
-
-/*
- * Tell the optimizer that something else uses this function or variable.
- */
-#define __visible __attribute__((externally_visible))
-
-/*
- * GCC 'asm goto' miscompiles certain code sequences:
- *
- *   http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
- *
- * Work it around via a compiler barrier quirk suggested by Jakub Jelinek.
- *
- * (asm goto is automatically volatile - the naming reflects this.)
- */
-#define asm_volatile_goto(x...)	do { asm goto(x); asm (""); } while (0)
-
-#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
-#define __HAVE_BUILTIN_BSWAP32__
-#define __HAVE_BUILTIN_BSWAP64__
-#define __HAVE_BUILTIN_BSWAP16__
-#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */
-
-#define KASAN_ABI_VERSION 4
-- 
cgit v1.2.3


From b86a50c3b5414eafdbee7f34af4a201a4a7817c2 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 25 Jun 2015 15:01:05 -0700
Subject: compiler-intel: fix wrong compiler barrier() macro

Cleanup commit 73679e508201 ("compiler-intel.h: Remove duplicate
definition") removed the double definition of __memory_barrier()
intrinsics.

However, in doing so, it also removed the preceding #undef barrier by
accident, meaning, the actual barrier() macro from compiler-gcc.h with
inline asm is still in place as __GNUC__ is provided.

Subsequently, barrier() can never be defined as __memory_barrier() from
compiler.h since it already has a definition in place and if we trust
the comment in compiler-intel.h, ecc doesn't support gcc specific asm
statements.

I don't have an ecc at hand (unsure if that's still used in the field?)
and only found this by accident during code review, a revert of that
cleanup would be simplest option.

Fixes: 73679e508201 ("compiler-intel.h: Remove duplicate definition")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Pranith Kumar <bobby.prani@gmail.com>
Cc: Pranith Kumar <bobby.prani@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: mancha security <mancha1@zoho.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-intel.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index 0c9a2f2c2802..d4c71132d07f 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -13,10 +13,12 @@
 /* Intel ECC compiler doesn't support gcc specific asm stmts.
  * It uses intrinsics to do the equivalent things.
  */
+#undef barrier
 #undef barrier_data
 #undef RELOC_HIDE
 #undef OPTIMIZER_HIDE_VAR
 
+#define barrier() __memory_barrier()
 #define barrier_data(ptr) barrier()
 
 #define RELOC_HIDE(ptr, off)					\
-- 
cgit v1.2.3


From 8c7fbe5795a016259445a61e072eb0118aaf6a61 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 25 Jun 2015 15:01:16 -0700
Subject: stddef.h: move offsetofend inside #ifndef/#endif guard, neaten

Commit 3876488444e7 ("include/stddef.h: Move offsetofend() from vfio.h
to a generic kernel header") added offsetofend outside the normal
include #ifndef/#endif guard.  Move it inside.

Miscellanea:

o remove unnecessary blank line
o standardize offsetof macros whitespace style

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/stddef.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stddef.h b/include/linux/stddef.h
index 076af437284d..9c61c7cda936 100644
--- a/include/linux/stddef.h
+++ b/include/linux/stddef.h
@@ -3,7 +3,6 @@
 
 #include <uapi/linux/stddef.h>
 
-
 #undef NULL
 #define NULL ((void *)0)
 
@@ -14,10 +13,9 @@ enum {
 
 #undef offsetof
 #ifdef __compiler_offsetof
-#define offsetof(TYPE,MEMBER) __compiler_offsetof(TYPE,MEMBER)
+#define offsetof(TYPE, MEMBER)	__compiler_offsetof(TYPE, MEMBER)
 #else
-#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
-#endif
+#define offsetof(TYPE, MEMBER)	((size_t)&((TYPE *)0)->MEMBER)
 #endif
 
 /**
@@ -28,3 +26,5 @@ enum {
  */
 #define offsetofend(TYPE, MEMBER) \
 	(offsetof(TYPE, MEMBER)	+ sizeof(((TYPE *)0)->MEMBER))
+
+#endif
-- 
cgit v1.2.3


From 3033f14ab78c326871a4902591c2518410add24a Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Thu, 25 Jun 2015 15:01:19 -0700
Subject: clone: support passing tls argument via C rather than pt_regs magic

clone has some of the quirkiest syscall handling in the kernel, with a
pile of special cases, historical curiosities, and architecture-specific
calling conventions.  In particular, clone with CLONE_SETTLS accepts a
parameter "tls" that the C entry point completely ignores and some
assembly entry points overwrite; instead, the low-level arch-specific
code pulls the tls parameter out of the arch-specific register captured
as part of pt_regs on entry to the kernel.  That's a massive hack, and
it makes the arch-specific code only work when called via the specific
existing syscall entry points; because of this hack, any new clone-like
system call would have to accept an identical tls argument in exactly
the same arch-specific position, rather than providing a unified system
call entry point across architectures.

The first patch allows architectures to handle the tls argument via
normal C parameter passing, if they opt in by selecting
HAVE_COPY_THREAD_TLS.  The second patch makes 32-bit and 64-bit x86 opt
into this.

These two patches came out of the clone4 series, which isn't ready for
this merge window, but these first two cleanup patches were entirely
uncontroversial and have acks.  I'd like to go ahead and submit these
two so that other architectures can begin building on top of this and
opting into HAVE_COPY_THREAD_TLS.  However, I'm also happy to wait and
send these through the next merge window (along with v3 of clone4) if
anyone would prefer that.

This patch (of 2):

clone with CLONE_SETTLS accepts an argument to set the thread-local
storage area for the new thread.  sys_clone declares an int argument
tls_val in the appropriate point in the argument list (based on the
various CLONE_BACKWARDS variants), but doesn't actually use or pass along
that argument.  Instead, sys_clone calls do_fork, which calls
copy_process, which calls the arch-specific copy_thread, and copy_thread
pulls the corresponding syscall argument out of the pt_regs captured at
kernel entry (knowing what argument of clone that architecture passes tls
in).

Apart from being awful and inscrutable, that also only works because only
one code path into copy_thread can pass the CLONE_SETTLS flag, and that
code path comes from sys_clone with its architecture-specific
argument-passing order.  This prevents introducing a new version of the
clone system call without propagating the same architecture-specific
position of the tls argument.

However, there's no reason to pull the argument out of pt_regs when
sys_clone could just pass it down via C function call arguments.

Introduce a new CONFIG_HAVE_COPY_THREAD_TLS for architectures to opt into,
and a new copy_thread_tls that accepts the tls parameter as an additional
unsigned long (syscall-argument-sized) argument.  Change sys_clone's tls
argument to an unsigned long (which does not change the ABI), and pass
that down to copy_thread_tls.

Architectures that don't opt into copy_thread_tls will continue to ignore
the C argument to sys_clone in favor of the pt_regs captured at kernel
entry, and thus will be unable to introduce new versions of the clone
syscall.

Patch co-authored by Josh Triplett and Thiago Macieira.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thiago Macieira <thiago.macieira@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h    | 15 +++++++++++++++
 include/linux/syscalls.h |  6 +++---
 2 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6633e83e608a..93ed0b682adb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2556,8 +2556,22 @@ extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
 /* Remove the current tasks stale references to the old mm_struct */
 extern void mm_release(struct task_struct *, struct mm_struct *);
 
+#ifdef CONFIG_HAVE_COPY_THREAD_TLS
+extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
+			struct task_struct *, unsigned long);
+#else
 extern int copy_thread(unsigned long, unsigned long, unsigned long,
 			struct task_struct *);
+
+/* Architectures that haven't opted into copy_thread_tls get the tls argument
+ * via pt_regs, so ignore the tls argument passed via C. */
+static inline int copy_thread_tls(
+		unsigned long clone_flags, unsigned long sp, unsigned long arg,
+		struct task_struct *p, unsigned long tls)
+{
+	return copy_thread(clone_flags, sp, arg, p);
+}
+#endif
 extern void flush_thread(void);
 extern void exit_thread(void);
 
@@ -2576,6 +2590,7 @@ extern int do_execveat(int, struct filename *,
 		       const char __user * const __user *,
 		       const char __user * const __user *,
 		       int);
+extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long);
 extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 76d1e38aabe1..bb51becf23f8 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -827,15 +827,15 @@ asmlinkage long sys_syncfs(int fd);
 asmlinkage long sys_fork(void);
 asmlinkage long sys_vfork(void);
 #ifdef CONFIG_CLONE_BACKWARDS
-asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int,
+asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, unsigned long,
 	       int __user *);
 #else
 #ifdef CONFIG_CLONE_BACKWARDS3
 asmlinkage long sys_clone(unsigned long, unsigned long, int, int __user *,
-			  int __user *, int);
+			  int __user *, unsigned long);
 #else
 asmlinkage long sys_clone(unsigned long, unsigned long, int __user *,
-	       int __user *, int);
+	       int __user *, unsigned long);
 #endif
 #endif
 
-- 
cgit v1.2.3


From d43ff430f434d862db59582c0f1f02382a678036 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 25 Jun 2015 15:01:24 -0700
Subject: printk: guard the amount written per line by devkmsg_read()

This patchset updates netconsole so that it can emit messages with the
same header as used in /dev/kmsg which gives neconsole receiver full log
information which enables things like structured logging and detection
of lost messages.

This patch (of 7):

devkmsg_read() uses 8k buffer and assumes that the formatted output
message won't overrun which seems safe given LOG_LINE_MAX, the current use
of dict and the escaping method being used; however, we're planning to use
devkmsg formatting wider and accounting for the buffer size properly isn't
that complicated.

This patch defines CONSOLE_EXT_LOG_MAX as 8192 and updates devkmsg_read()
so that it limits output accordingly.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: David Miller <davem@davemloft.net>
Cc: Kay Sievers <kay@vrfy.org>
Reviewed-by: Petr Mladek <pmladek@suse.cz>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/printk.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 9b30871c9149..58b1fec40d37 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -30,6 +30,8 @@ static inline const char *printk_skip_level(const char *buffer)
 	return buffer;
 }
 
+#define CONSOLE_EXT_LOG_MAX	8192
+
 /* printk's without a loglevel use this.. */
 #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
 
-- 
cgit v1.2.3


From 6fe29354befe4c46eb308b662155d4d8017358e1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 25 Jun 2015 15:01:30 -0700
Subject: printk: implement support for extended console drivers

printk log_buf keeps various metadata for each message including its
sequence number and timestamp.  The metadata is currently available only
through /dev/kmsg and stripped out before passed onto console drivers.  We
want this metadata to be available to console drivers too so that console
consumers can get full information including the metadata and dictionary,
which among other things can be used to detect whether messages got lost
in transit.

This patch implements support for extended console drivers.  Consoles can
indicate that they want extended messages by setting the new CON_EXTENDED
flag and they'll be fed messages formatted the same way as /dev/kmsg.

 "<level>,<sequnum>,<timestamp>,<contflag>;<message text>\n"

If extended consoles exist, in-kernel fragment assembly is disabled.  This
ensures that all messages emitted to consoles have full metadata including
sequence number.  The contflag carries enough information to reassemble
the fragments from the reader side trivially.  Note that this only affects
/dev/kmsg.  Regular console and /proc/kmsg outputs are not affected by
this change.

* Extended message formatting for console drivers is enabled iff there
  are registered extended consoles.

* Comment describing /dev/kmsg message format updated to add missing
  contflag field and help distinguishing variable from verbatim terms.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: David Miller <davem@davemloft.net>
Cc: Kay Sievers <kay@vrfy.org>
Reviewed-by: Petr Mladek <pmladek@suse.cz>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/console.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/console.h b/include/linux/console.h
index 9f50fb413c11..bd194343c346 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -115,6 +115,7 @@ static inline int con_debug_leave(void)
 #define CON_BOOT	(8)
 #define CON_ANYTIME	(16) /* Safe to call when cpu is offline */
 #define CON_BRL		(32) /* Used for a braille device */
+#define CON_EXTENDED	(64) /* Use the extended output format a la /dev/kmsg */
 
 struct console {
 	char	name[16];
-- 
cgit v1.2.3


From 3ea4331c60be3eee4c97e5ddabad95399f879b76 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Thu, 25 Jun 2015 15:01:47 -0700
Subject: check_syslog_permissions() cleanup

Patch fixes drawbacks in heck_syslog_permissions() noticed by AKPM:
"from_file handling makes me cry.

That's not a boolean - it's an enumerated value with two values
currently defined.

But the code in check_syslog_permissions() treats it as a boolean and
also hardwires the knowledge that SYSLOG_FROM_PROC == 1 (or == `true`).

And the name is wrong: it should be called from_proc to match
SYSLOG_FROM_PROC."

Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Josh Boyer <jwboyer@redhat.com>
Cc: Eric Paris <eparis@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/syslog.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/syslog.h b/include/linux/syslog.h
index 4b7b875a7ce1..c3a7f0cc3a27 100644
--- a/include/linux/syslog.h
+++ b/include/linux/syslog.h
@@ -47,12 +47,12 @@
 #define SYSLOG_FROM_READER           0
 #define SYSLOG_FROM_PROC             1
 
-int do_syslog(int type, char __user *buf, int count, bool from_file);
+int do_syslog(int type, char __user *buf, int count, int source);
 
 #ifdef CONFIG_PRINTK
-int check_syslog_permissions(int type, bool from_file);
+int check_syslog_permissions(int type, int source);
 #else
-static inline int check_syslog_permissions(int type, bool from_file)
+static inline int check_syslog_permissions(int type, int source)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 94df290404cd0da8016698bf3f398410f29d9a64 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 25 Jun 2015 15:02:22 -0700
Subject: lib/string.c: introduce strreplace()

Strings are sometimes sanitized by replacing a certain character (often
'/') by another (often '!').  In a few places, this is done the same way
Schlemiel the Painter would do it.  Others are slightly smarter but still
do multiple strchr() calls.  Introduce strreplace() to do this using a
single function call and a single pass over the string.

One would expect the return value to be one of three things: void, s, or
the number of replacements made.  I chose the fourth, returning a pointer
to the end of the string.  This is more likely to be useful (for example
allowing the caller to avoid a strlen call).

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Neil Brown <neilb@suse.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/string.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index e40099e585c9..a8d90db9c4b0 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -111,6 +111,7 @@ extern int memcmp(const void *,const void *,__kernel_size_t);
 extern void * memchr(const void *,int,__kernel_size_t);
 #endif
 void *memchr_inv(const void *s, int c, size_t n);
+char *strreplace(char *s, char old, char new);
 
 extern void kfree_const(const void *x);
 
-- 
cgit v1.2.3


From 78d8e58a086b214dddf1fd463e20a7e1d82d7866 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Fri, 26 Jun 2015 10:01:13 -0400
Subject: Revert "block, dm: don't copy bios for request clones"

This reverts commit 5f1b670d0bef508a5554d92525f5f6d00d640b38.

Justification for revert as reported in this dm-devel post:
https://www.redhat.com/archives/dm-devel/2015-June/msg00160.html

this change should not be pushed to mainline yet.

Firstly, Christoph has a newer version of the patch that fixes silent
data corruption problem:
  https://www.redhat.com/archives/dm-devel/2015-May/msg00229.html

And the new version still depends on LLDDs to always complete requests
to the end when error happens, while block API doesn't enforce such a
requirement. If the assumption is ever broken, the inconsistency between
request and bio (e.g. rq->__sector and rq->bio) will cause silent data
corruption:
  https://www.redhat.com/archives/dm-devel/2015-June/msg00022.html

Reported-by: Junichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 include/linux/blk_types.h | 2 --
 include/linux/blkdev.h    | 6 +++++-
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 6ab9d12d1f17..7303b3405520 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -192,7 +192,6 @@ enum rq_flag_bits {
 	__REQ_HASHED,		/* on IO scheduler merge hash */
 	__REQ_MQ_INFLIGHT,	/* track inflight for MQ */
 	__REQ_NO_TIMEOUT,	/* requests may never expire */
-	__REQ_CLONE,		/* cloned bios */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -247,6 +246,5 @@ enum rq_flag_bits {
 #define REQ_HASHED		(1ULL << __REQ_HASHED)
 #define REQ_MQ_INFLIGHT		(1ULL << __REQ_MQ_INFLIGHT)
 #define REQ_NO_TIMEOUT		(1ULL << __REQ_NO_TIMEOUT)
-#define REQ_CLONE		(1ULL << __REQ_CLONE)
 
 #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 776d2ee43ba6..41c0fb573dff 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -775,7 +775,11 @@ extern void blk_add_request_payload(struct request *rq, struct page *page,
 		unsigned int len);
 extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
 extern int blk_lld_busy(struct request_queue *q);
-extern void blk_rq_prep_clone(struct request *rq, struct request *rq_src);
+extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
+			     struct bio_set *bs, gfp_t gfp_mask,
+			     int (*bio_ctr)(struct bio *, struct bio *, void *),
+			     void *data);
+extern void blk_rq_unprep_clone(struct request *rq);
 extern int blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
-- 
cgit v1.2.3


From 5212e11fde4d40fa627668b4f2222d20db488f71 Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma@intel.com>
Date: Thu, 25 Jun 2015 04:20:32 -0400
Subject: nd_btt: atomic sector updates

BTT stands for Block Translation Table, and is a way to provide power
fail sector atomicity semantics for block devices that have the ability
to perform byte granularity IO. It relies on the capability of libnvdimm
namespace devices to do byte aligned IO.

The BTT works as a stacked blocked device, and reserves a chunk of space
from the backing device for its accounting metadata. It is a bio-based
driver because all IO is done synchronously, and there is no queuing or
asynchronous completions at either the device or the driver level.

The BTT uses 'lanes' to index into various 'on-disk' data structures,
and lanes also act as a synchronization mechanism in case there are more
CPUs than available lanes. We did a comparison between two lane lock
strategies - first where we kept an atomic counter around that tracked
which was the last lane that was used, and 'our' lane was determined by
atomically incrementing that. That way, for the nr_cpus > nr_lanes case,
theoretically, no CPU would be blocked waiting for a lane. The other
strategy was to use the cpu number we're scheduled on to and hash it to
a lane number. Theoretically, this could block an IO that could've
otherwise run using a different, free lane. But some fio workloads
showed that the direct cpu -> lane hash performed faster than tracking
'last lane' - my reasoning is the cache thrash caused by moving the
atomic variable made that approach slower than simply waiting out the
in-progress IO. This supports the conclusion that the driver can be a
very simple bio-based one that does synchronous IOs instead of queuing.

Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Boaz Harrosh <boaz@plexistor.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jens Axboe <axboe@fb.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Neil Brown <neilb@suse.de>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
[jmoyer: fix nmi watchdog timeout in btt_map_init]
[jmoyer: move btt initialization to module load path]
[jmoyer: fix memory leak in the btt initialization path]
[jmoyer: Don't overwrite corrupted arenas]
Signed-off-by: Vishal Verma <vishal.l.verma@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/libnvdimm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index a59dca17b3aa..531d99dfac68 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -85,6 +85,7 @@ struct nd_region_desc {
 	const struct attribute_group **attr_groups;
 	struct nd_interleave_set *nd_set;
 	void *provider_data;
+	int num_lanes;
 };
 
 struct nvdimm_bus;
-- 
cgit v1.2.3


From 047fc8a1f9a6330eacc80374dff087e20dc2304b Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Thu, 25 Jun 2015 04:21:02 -0400
Subject: libnvdimm, nfit, nd_blk: driver for BLK-mode access persistent memory

The libnvdimm implementation handles allocating dimm address space (DPA)
between PMEM and BLK mode interfaces.  After DPA has been allocated from
a BLK-region to a BLK-namespace the nd_blk driver attaches to handle I/O
as a struct bio based block device. Unlike PMEM, BLK is required to
handle platform specific details like mmio register formats and memory
controller interleave.  For this reason the libnvdimm generic nd_blk
driver calls back into the bus provider to carry out the I/O.

This initial implementation handles the BLK interface defined by the
ACPI 6 NFIT [1] and the NVDIMM DSM Interface Example [2] composed from
DCR (dimm control region), BDW (block data window), IDT (interleave
descriptor) NFIT structures and the hardware register format.
[1]: http://www.uefi.org/sites/default/files/resources/ACPI_6.0.pdf
[2]: http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf

Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Boaz Harrosh <boaz@plexistor.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jens Axboe <axboe@fb.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/libnvdimm.h | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 531d99dfac68..7fc1b25bdb5d 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -14,6 +14,7 @@
  */
 #ifndef __LIBNVDIMM_H__
 #define __LIBNVDIMM_H__
+#include <linux/kernel.h>
 #include <linux/sizes.h>
 #include <linux/types.h>
 
@@ -89,8 +90,24 @@ struct nd_region_desc {
 };
 
 struct nvdimm_bus;
-struct device;
 struct module;
+struct device;
+struct nd_blk_region;
+struct nd_blk_region_desc {
+	int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
+	void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
+	int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
+			void *iobuf, u64 len, int rw);
+	struct nd_region_desc ndr_desc;
+};
+
+static inline struct nd_blk_region_desc *to_blk_region_desc(
+		struct nd_region_desc *ndr_desc)
+{
+	return container_of(ndr_desc, struct nd_blk_region_desc, ndr_desc);
+
+}
+
 struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
 		struct nvdimm_bus_descriptor *nfit_desc, struct module *module);
 #define nvdimm_bus_register(parent, desc) \
@@ -99,10 +116,10 @@ void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
 struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
 struct nvdimm *to_nvdimm(struct device *dev);
 struct nd_region *to_nd_region(struct device *dev);
+struct nd_blk_region *to_nd_blk_region(struct device *dev);
 struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
 const char *nvdimm_name(struct nvdimm *nvdimm);
 void *nvdimm_provider_data(struct nvdimm *nvdimm);
-void *nd_region_provider_data(struct nd_region *nd_region);
 struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
 		const struct attribute_group **groups, unsigned long flags,
 		unsigned long *dsm_mask);
@@ -120,5 +137,11 @@ struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus,
 		struct nd_region_desc *ndr_desc);
 struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
 		struct nd_region_desc *ndr_desc);
+void *nd_region_provider_data(struct nd_region *nd_region);
+void *nd_blk_region_provider_data(struct nd_blk_region *ndbr);
+void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data);
+struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr);
+unsigned int nd_region_acquire_lane(struct nd_region *nd_region);
+void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane);
 u64 nd_fletcher64(void *addr, size_t len, bool le);
 #endif /* __LIBNVDIMM_H__ */
-- 
cgit v1.2.3


From 581388209405902b56d055f644b4dd124a206112 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 23 Jun 2015 20:08:34 -0400
Subject: libnvdimm, nfit: handle unarmed dimms, mark namespaces read-only

Upon detection of an unarmed dimm in a region, arrange for descendant
BTT, PMEM, or BLK instances to be read-only.  A dimm is primarily marked
"unarmed" via flags passed by platform firmware (NFIT).

The flags in the NFIT memory device sub-structure indicate the state of
the data on the nvdimm relative to its energy source or last "flush to
persistence".  For the most part there is nothing the driver can do but
advertise the state of these flags in sysfs and emit a message if
firmware indicates that the contents of the device may be corrupted.
However, for the case of ACPI_NFIT_MEM_ARMED, the driver can arrange for
the block devices incorporating that nvdimm to be marked read-only.
This is a safe default as the data is still available and new writes are
held off until the administrator either forces read-write mode, or the
energy source becomes armed.

A 'read_only' attribute is added to REGION devices to allow for
overriding the default read-only policy of all descendant block devices.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/libnvdimm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 7fc1b25bdb5d..dc799a29ed1a 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -21,6 +21,8 @@
 enum {
 	/* when a dimm supports both PMEM and BLK access a label is required */
 	NDD_ALIASING = 1 << 0,
+	/* unarmed memory devices may not persist writes */
+	NDD_UNARMED = 1 << 1,
 
 	/* need to set a limit somewhere, but yes, this is likely overkill */
 	ND_IOCTL_MAX_BUFLEN = SZ_4M,
-- 
cgit v1.2.3


From 99759869faf15471cfce251bc138848d8af7d162 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hp.com>
Date: Fri, 19 Jun 2015 17:14:15 -0600
Subject: acpi: Add acpi_map_pxm_to_online_node()

The kernel initializes CPU & memory's NUMA topology from ACPI
SRAT table.  Some other ACPI tables, such as NFIT and DMAR, also
contain proximity IDs for their device's NUMA topology.  This
information can be used to improve performance of these devices.

This patch introduces acpi_map_pxm_to_online_node(), which is
similar to acpi_map_pxm_to_node(), but always returns an online
node.  When the mapped node from a given proximity ID is offline,
it looks up the node distance table and returns the nearest
online node.

ACPI device drivers, which are called after the NUMA initialization
has completed in the kernel, can call this interface to obtain their
device NUMA topology from ACPI tables.  Such drivers do not have to
deal with offline nodes.  A node may be offline when a device
proximity ID is unique, SRAT memory entry does not exist, or NUMA is
disabled, ex. "numa=off" on x86.

This patch also moves the pxm range check from acpi_get_node() to
acpi_map_pxm_to_node().

Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/acpi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index e4da5e35e29c..1b3bbb11d11c 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -289,8 +289,13 @@ extern void acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d);
 extern void acpi_osi_setup(char *str);
 
 #ifdef CONFIG_ACPI_NUMA
+int acpi_map_pxm_to_online_node(int pxm);
 int acpi_get_node(acpi_handle handle);
 #else
+static inline int acpi_map_pxm_to_online_node(int pxm)
+{
+	return 0;
+}
 static inline int acpi_get_node(acpi_handle handle)
 {
 	return 0;
-- 
cgit v1.2.3


From 41d7a6d637e1440f5410cb43c25a3c41255540c5 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hp.com>
Date: Fri, 19 Jun 2015 12:18:33 -0600
Subject: libnvdimm: Set numa_node to NVDIMM devices

ACPI NFIT table has System Physical Address Range Structure entries that
describe a proximity ID of each range when ACPI_NFIT_PROXIMITY_VALID is
set in the flags.

Change acpi_nfit_register_region() to map a proximity ID to its node ID,
and set it to a new numa_node field of nd_region_desc, which is then
conveyed to the nd_region device.

The device core arranges for btt and namespace devices to inherit their
node from their parent region.

Signed-off-by: Toshi Kani <toshi.kani@hp.com>
[djbw: move set_dev_node() from region.c to bus.c]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/libnvdimm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index dc799a29ed1a..30b3deaafd51 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -89,6 +89,7 @@ struct nd_region_desc {
 	struct nd_interleave_set *nd_set;
 	void *provider_data;
 	int num_lanes;
+	int numa_node;
 };
 
 struct nvdimm_bus;
-- 
cgit v1.2.3


From 74ae66c3b14ffa94c8d2dea201cdf8e6203d13d5 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hp.com>
Date: Fri, 19 Jun 2015 12:18:34 -0600
Subject: libnvdimm: Add sysfs numa_node to NVDIMM devices

Add support of sysfs 'numa_node' to I/O-related NVDIMM devices
under /sys/bus/nd/devices, regionN, namespaceN.0, and bttN.x.

An example of numa_node values on a 2-socket system with a single
NVDIMM range on each socket is shown below.
  /sys/bus/nd/devices
  |-- btt0.0/numa_node:0
  |-- btt1.0/numa_node:1
  |-- btt1.1/numa_node:1
  |-- namespace0.0/numa_node:0
  |-- namespace1.0/numa_node:1
  |-- region0/numa_node:0
  |-- region1/numa_node:1

These numa_node files are then linked under the block class of
their device names.
  /sys/class/block/pmem0/device/numa_node:0
  /sys/class/block/pmem1s/device/numa_node:1

This enables numactl(8) to accept 'block:' and 'file:' paths of
pmem and btt devices as shown in the examples below.
  numactl --preferred block:pmem0 --show
  numactl --preferred file:/dev/pmem1s --show

Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/libnvdimm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 30b3deaafd51..75e3af01ee32 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -38,6 +38,7 @@ enum {
 extern struct attribute_group nvdimm_bus_attribute_group;
 extern struct attribute_group nvdimm_attribute_group;
 extern struct attribute_group nd_device_attribute_group;
+extern struct attribute_group nd_numa_attribute_group;
 extern struct attribute_group nd_region_attribute_group;
 extern struct attribute_group nd_mapping_attribute_group;
 
-- 
cgit v1.2.3


From 61031952f4c89dba1065f7a5b9419badb112554c Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Thu, 25 Jun 2015 03:08:39 -0400
Subject: arch, x86: pmem api for ensuring durability of persistent memory
 updates

Based on an original patch by Ross Zwisler [1].

Writes to persistent memory have the potential to be posted to cpu
cache, cpu write buffers, and platform write buffers (memory controller)
before being committed to persistent media.  Provide apis,
memcpy_to_pmem(), wmb_pmem(), and memremap_pmem(), to write data to
pmem and assert that it is durable in PMEM (a persistent linear address
range).  A '__pmem' attribute is added so sparse can track proper usage
of pointers to pmem.

This continues the status quo of pmem being x86 only for 4.2, but
reworks to ioremap, and wider implementation of memremap() will enable
other archs in 4.3.

[1]: https://lists.01.org/pipermail/linux-nvdimm/2015-May/000932.html

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
[djbw: various reworks]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/compiler.h |   2 +
 include/linux/pmem.h     | 153 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 155 insertions(+)
 create mode 100644 include/linux/pmem.h

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 867722591be2..9a528d945498 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -21,6 +21,7 @@
 # define __rcu		__attribute__((noderef, address_space(4)))
 #else
 # define __rcu
+# define __pmem		__attribute__((noderef, address_space(5)))
 #endif
 extern void __chk_user_ptr(const volatile void __user *);
 extern void __chk_io_ptr(const volatile void __iomem *);
@@ -42,6 +43,7 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # define __cond_lock(x,c) (c)
 # define __percpu
 # define __rcu
+# define __pmem
 #endif
 
 /* Indirect macros required for expanded argument pasting, eg. __LINE__. */
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
new file mode 100644
index 000000000000..f6481a0b1d4f
--- /dev/null
+++ b/include/linux/pmem.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __PMEM_H__
+#define __PMEM_H__
+
+#include <linux/io.h>
+
+#ifdef CONFIG_ARCH_HAS_PMEM_API
+#include <asm/cacheflush.h>
+#else
+static inline void arch_wmb_pmem(void)
+{
+	BUG();
+}
+
+static inline bool __arch_has_wmb_pmem(void)
+{
+	return false;
+}
+
+static inline void __pmem *arch_memremap_pmem(resource_size_t offset,
+		unsigned long size)
+{
+	return NULL;
+}
+
+static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
+		size_t n)
+{
+	BUG();
+}
+#endif
+
+/*
+ * Architectures that define ARCH_HAS_PMEM_API must provide
+ * implementations for arch_memremap_pmem(), arch_memcpy_to_pmem(),
+ * arch_wmb_pmem(), and __arch_has_wmb_pmem().
+ */
+
+static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
+{
+	memcpy(dst, (void __force const *) src, size);
+}
+
+static inline void memunmap_pmem(void __pmem *addr)
+{
+	iounmap((void __force __iomem *) addr);
+}
+
+/**
+ * arch_has_wmb_pmem - true if wmb_pmem() ensures durability
+ *
+ * For a given cpu implementation within an architecture it is possible
+ * that wmb_pmem() resolves to a nop.  In the case this returns
+ * false, pmem api users are unable to ensure durability and may want to
+ * fall back to a different data consistency model, or otherwise notify
+ * the user.
+ */
+static inline bool arch_has_wmb_pmem(void)
+{
+	if (IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API))
+		return __arch_has_wmb_pmem();
+	return false;
+}
+
+static inline bool arch_has_pmem_api(void)
+{
+	return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && arch_has_wmb_pmem();
+}
+
+/*
+ * These defaults seek to offer decent performance and minimize the
+ * window between i/o completion and writes being durable on media.
+ * However, it is undefined / architecture specific whether
+ * default_memremap_pmem + default_memcpy_to_pmem is sufficient for
+ * making data durable relative to i/o completion.
+ */
+static void default_memcpy_to_pmem(void __pmem *dst, const void *src,
+		size_t size)
+{
+	memcpy((void __force *) dst, src, size);
+}
+
+static void __pmem *default_memremap_pmem(resource_size_t offset,
+		unsigned long size)
+{
+	/* TODO: convert to ioremap_wt() */
+	return (void __pmem __force *)ioremap_nocache(offset, size);
+}
+
+/**
+ * memremap_pmem - map physical persistent memory for pmem api
+ * @offset: physical address of persistent memory
+ * @size: size of the mapping
+ *
+ * Establish a mapping of the architecture specific memory type expected
+ * by memcpy_to_pmem() and wmb_pmem().  For example, it may be
+ * the case that an uncacheable or writethrough mapping is sufficient,
+ * or a writeback mapping provided memcpy_to_pmem() and
+ * wmb_pmem() arrange for the data to be written through the
+ * cache to persistent media.
+ */
+static inline void __pmem *memremap_pmem(resource_size_t offset,
+		unsigned long size)
+{
+	if (arch_has_pmem_api())
+		return arch_memremap_pmem(offset, size);
+	return default_memremap_pmem(offset, size);
+}
+
+/**
+ * memcpy_to_pmem - copy data to persistent memory
+ * @dst: destination buffer for the copy
+ * @src: source buffer for the copy
+ * @n: length of the copy in bytes
+ *
+ * Perform a memory copy that results in the destination of the copy
+ * being effectively evicted from, or never written to, the processor
+ * cache hierarchy after the copy completes.  After memcpy_to_pmem()
+ * data may still reside in cpu or platform buffers, so this operation
+ * must be followed by a wmb_pmem().
+ */
+static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n)
+{
+	if (arch_has_pmem_api())
+		arch_memcpy_to_pmem(dst, src, n);
+	else
+		default_memcpy_to_pmem(dst, src, n);
+}
+
+/**
+ * wmb_pmem - synchronize writes to persistent memory
+ *
+ * After a series of memcpy_to_pmem() operations this drains data from
+ * cpu write buffers and any platform (memory controller) buffers to
+ * ensure that written data is durable on persistent memory media.
+ */
+static inline void wmb_pmem(void)
+{
+	if (arch_has_pmem_api())
+		arch_wmb_pmem();
+}
+#endif /* __PMEM_H__ */
-- 
cgit v1.2.3


From a9730fca9946f3697410479e0ef1bd759ba00a77 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Mon, 29 Jun 2015 09:28:08 -0500
Subject: Fix kmalloc slab creation sequence

This patch restores the slab creation sequence that was broken by commit
4066c33d0308f8 and also reverts the portions that introduced the
KMALLOC_LOOP_XXX macros. Those can never really work since the slab creation
is much more complex than just going from a minimum to a maximum number.

The latest upstream kernel boots cleanly on my machine with a 64 bit x86
configuration under KVM using either SLAB or SLUB.

Fixes: 4066c33d0308f8 ("support the slub_debug boot option")
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 22 ----------------------
 1 file changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 9de2fdc8b5e4..a99f0e5243e1 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -153,30 +153,8 @@ size_t ksize(const void *);
 #define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
 #define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN
 #define KMALLOC_SHIFT_LOW ilog2(ARCH_DMA_MINALIGN)
-/*
- * The KMALLOC_LOOP_LOW is the definition for the for loop index start number
- * to create the kmalloc_caches object in create_kmalloc_caches(). The first
- * and the second are 96 and 192. You can see that in the kmalloc_index(), if
- * the KMALLOC_MIN_SIZE <= 32, then return 1 (96). If KMALLOC_MIN_SIZE <= 64,
- * then return 2 (192). If the KMALLOC_MIN_SIZE is bigger than 64, we don't
- * need to initialize 96 and 192. Go directly to start the KMALLOC_SHIFT_LOW.
- */
-#if KMALLOC_MIN_SIZE <= 32
-#define KMALLOC_LOOP_LOW 1
-#elif KMALLOC_MIN_SIZE <= 64
-#define KMALLOC_LOOP_LOW 2
-#else
-#define KMALLOC_LOOP_LOW KMALLOC_SHIFT_LOW
-#endif
-
 #else
 #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
-/*
- * The KMALLOC_MIN_SIZE of slub/slab/slob is 2^3/2^5/2^3. So, even slab is used.
- * The KMALLOC_MIN_SIZE <= 32. The kmalloc-96 and kmalloc-192 should also be
- * initialized.
- */
-#define KMALLOC_LOOP_LOW 1
 #endif
 
 /*
-- 
cgit v1.2.3