From a9aa53df6e6c768fc0f25a7c80ba586b0290720a Mon Sep 17 00:00:00 2001
From: Simo Sorce <simo@redhat.com>
Date: Thu, 29 Mar 2012 19:18:19 -0400
Subject: svcauth: remove unused define

Signed-off-by: Simo Sorce <simo@redhat.com>
---
 include/linux/sunrpc/svcauth.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 548790e9113b..2e2af101b59c 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -16,7 +16,6 @@
 #include <linux/sunrpc/cache.h>
 #include <linux/hash.h>
 
-#define SVC_CRED_NGROUPS	32
 struct svc_cred {
 	uid_t			cr_uid;
 	gid_t			cr_gid;
-- 
cgit v1.2.3


From db3a35326362624dd4d8473e676d63afa52bedcc Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 28 Mar 2012 19:09:08 +0400
Subject: nfsd: add link to owner cache detail to svc_export structure

Without info about owner cache datail it won't be able to find out, which
per-net cache detail have to be.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/nfsd/export.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index f85308e688fd..64455292bbba 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -103,6 +103,7 @@ struct svc_export {
 	struct nfsd4_fs_locations ex_fslocs;
 	int			ex_nflavors;
 	struct exp_flavor_info	ex_flavors[MAX_SECINFO_LIST];
+	struct cache_detail	*cd;
 };
 
 /* an "export key" (expkey) maps a filehandlefragement to an
-- 
cgit v1.2.3


From 71234978e81ee515c8025d087a197561b311c183 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 28 Mar 2012 19:09:15 +0400
Subject: nfsd: use cache detail pointer from svc_export structure on cache put

Hard-coded pointer is redundant now and can be replaced.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/nfsd/export.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 64455292bbba..485c2afa96f7 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -147,7 +147,7 @@ extern struct cache_detail svc_export_cache;
 
 static inline void exp_put(struct svc_export *exp)
 {
-	cache_put(&exp->h, &svc_export_cache);
+	cache_put(&exp->h, exp->cd);
 }
 
 static inline void exp_get(struct svc_export *exp)
-- 
cgit v1.2.3


From e3f70eadb7dddfb5a2bb9afff7abfc6ee17a29d0 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Thu, 29 Mar 2012 18:54:33 +0400
Subject: Lockd: pass network namespace to creation and destruction routines

v2: dereference of most probably already released nlm_host removed in
nlmclnt_done() and reclaimer().

These routines are called from locks reclaimer() kernel thread. This thread
works in "init_net" network context and currently relays on persence on lockd
thread and it's per-net resources. Thus lockd_up() and lockd_down() can't relay
on current network context. So let's pass corrent one into them.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/lockd/bind.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 11a966e5f829..4d24d64578c4 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -54,7 +54,7 @@ extern void	nlmclnt_done(struct nlm_host *host);
 
 extern int	nlmclnt_proc(struct nlm_host *host, int cmd,
 					struct file_lock *fl);
-extern int	lockd_up(void);
-extern void	lockd_down(void);
+extern int	lockd_up(struct net *net);
+extern void	lockd_down(struct net *net);
 
 #endif /* LINUX_LOCKD_BIND_H */
-- 
cgit v1.2.3


From b89109bef4a6a4a8ab5788778ee0addca0787870 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 11 Apr 2012 15:13:14 +0400
Subject: nfsd: pass network context to export caches init/shutdown routines

These functions will be called from per-net operations.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/nfsd/export.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 485c2afa96f7..375096c083d3 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -130,8 +130,8 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp);
 /*
  * Function declarations
  */
-int			nfsd_export_init(void);
-void			nfsd_export_shutdown(void);
+int			nfsd_export_init(struct net *);
+void			nfsd_export_shutdown(struct net *);
 void			nfsd_export_flush(void);
 struct svc_export *	rqst_exp_get_by_name(struct svc_rqst *,
 					     struct path *);
-- 
cgit v1.2.3


From b3853e0ea1f2ef58f7e7c03e47819e2ae3766dea Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 11 Apr 2012 15:13:21 +0400
Subject: nfsd: make export cache allocated per network namespace context

This patch also changes prototypes of nfsd_export_flush() and exp_rootfh():
network namespace parameter added.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/nfsd/export.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 375096c083d3..565c2122993f 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -132,13 +132,13 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp);
  */
 int			nfsd_export_init(struct net *);
 void			nfsd_export_shutdown(struct net *);
-void			nfsd_export_flush(void);
+void			nfsd_export_flush(struct net *);
 struct svc_export *	rqst_exp_get_by_name(struct svc_rqst *,
 					     struct path *);
 struct svc_export *	rqst_exp_parent(struct svc_rqst *,
 					struct path *);
 struct svc_export *	rqst_find_fsidzero_export(struct svc_rqst *);
-int			exp_rootfh(struct auth_domain *, 
+int			exp_rootfh(struct net *, struct auth_domain *,
 					char *path, struct knfsd_fh *, int maxsize);
 __be32			exp_pseudoroot(struct svc_rqst *, struct svc_fh *);
 __be32			nfserrno(int errno);
-- 
cgit v1.2.3


From e5f06f720eff24e32f1cc08ec03bcc8c4b2d2934 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 11 Apr 2012 15:13:28 +0400
Subject: nfsd: make expkey cache allocated per network namespace context

This patch also changes svcauth_unix_purge() function: added network namespace
as a parameter and thus loop over all networks was replaced by only one call
for ip map cache purge.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/nfsd/export.h    | 2 --
 include/linux/sunrpc/svcauth.h | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 565c2122993f..e33f747b173c 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -143,8 +143,6 @@ int			exp_rootfh(struct net *, struct auth_domain *,
 __be32			exp_pseudoroot(struct svc_rqst *, struct svc_fh *);
 __be32			nfserrno(int errno);
 
-extern struct cache_detail svc_export_cache;
-
 static inline void exp_put(struct svc_export *exp)
 {
 	cache_put(&exp->h, exp->cd);
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 2e2af101b59c..2c54683b91de 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -130,7 +130,7 @@ extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *ne
 extern struct auth_domain *auth_domain_find(char *name);
 extern struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr);
 extern int auth_unix_forget_old(struct auth_domain *dom);
-extern void svcauth_unix_purge(void);
+extern void svcauth_unix_purge(struct net *net);
 extern void svcauth_unix_info_release(struct svc_xprt *xpt);
 extern int svcauth_unix_set_client(struct svc_rqst *rqstp);
 
-- 
cgit v1.2.3


From 05ba1f0823004e947748523782e9c2f07f3bff0d Mon Sep 17 00:00:00 2001
From: Anatol Pomozov <anatol.pomozov@gmail.com>
Date: Sun, 22 Apr 2012 18:45:24 -0700
Subject: fuse: add FALLOCATE operation

fallocate filesystem operation preallocates media space for the given file.
If fallocate returns success then any subsequent write to the given range
never fails with 'not enough space' error.

Signed-off-by: Anatol Pomozov <anatol.pomozov@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 include/linux/fuse.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 8f2ab8fef929..9303348965fb 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -54,6 +54,9 @@
  * 7.18
  *  - add FUSE_IOCTL_DIR flag
  *  - add FUSE_NOTIFY_DELETE
+ *
+ * 7.19
+ *  - add FUSE_FALLOCATE
  */
 
 #ifndef _LINUX_FUSE_H
@@ -85,7 +88,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 18
+#define FUSE_KERNEL_MINOR_VERSION 19
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -278,6 +281,7 @@ enum fuse_opcode {
 	FUSE_POLL          = 40,
 	FUSE_NOTIFY_REPLY  = 41,
 	FUSE_BATCH_FORGET  = 42,
+	FUSE_FALLOCATE     = 43,
 
 	/* CUSE specific operations */
 	CUSE_INIT          = 4096,
@@ -571,6 +575,14 @@ struct fuse_notify_poll_wakeup_out {
 	__u64	kh;
 };
 
+struct fuse_fallocate_in {
+	__u64	fh;
+	__u64	offset;
+	__u64	length;
+	__u32	mode;
+	__u32	padding;
+};
+
 struct fuse_in_header {
 	__u32	len;
 	__u32	opcode;
-- 
cgit v1.2.3


From 1f45f9dbb392f9ca0919e9cd2370ab66ae752ec8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Heiko=20St=C3=BCbner?= <heiko@sntech.de>
Date: Sat, 28 Apr 2012 12:19:10 +0200
Subject: fb_defio: add first_io callback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With this optional callback the driver is notified when the first page
is entered into the pagelist and a new deferred_io call is scheduled.

A possible use-case for this is runtime-pm. In the first_io call
	pm_runtime_get()
could be called, which starts an asynchronous runtime_resume of the
device. In the deferred_io callback a call to
	pm_runtime_barrier()
makes the sure, the device is resumed by then and a
	pm_runtime_put()
may put the device back to sleep.

Also, some SoCs may use the runtime-pm system to determine if they
are able to enter deeper idle states. Therefore it is necessary to
keep the use-count from the first written page until the conclusion
of the screen update, to prevent the system from going to sleep before
completing the pending update.

Two users of defio were using kmalloc to allocate the structure.
These allocations are changed to kzalloc, to prevent uninitialised
.first_io members in those drivers.

Signed-off-by: Heiko Stübner <heiko@sntech.de>
Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
---
 include/linux/fb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index d31cb682e173..c10e71efb8f5 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -607,6 +607,7 @@ struct fb_deferred_io {
 	struct mutex lock; /* mutex that protects the page list */
 	struct list_head pagelist; /* list of touched pages */
 	/* callback */
+	void (*first_io)(struct fb_info *info);
 	void (*deferred_io)(struct fb_info *info, struct list_head *pagelist);
 };
 #endif
-- 
cgit v1.2.3


From f84891289e62a74e9b4942eaad80617368b2d778 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Sun, 29 Apr 2012 18:21:10 -0400
Subject: ext4: create a new BH_Verified flag to avoid unnecessary metadata
 validation

Create a new BH_Verified flag to indicate that we've verified all the
data in a buffer_head for correctness.  This allows us to bypass
expensive verification steps when they are not necessary without
missing them when they are.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/jbd_common.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/jbd_common.h b/include/linux/jbd_common.h
index 6230f8556a4e..6133679bc4c0 100644
--- a/include/linux/jbd_common.h
+++ b/include/linux/jbd_common.h
@@ -12,6 +12,7 @@ enum jbd_state_bits {
 	BH_State,		/* Pins most journal_head state */
 	BH_JournalHead,		/* Pins bh->b_private and jh->b_bh */
 	BH_Unshadow,		/* Dummy bit, for BJ_Shadow wakeup filtering */
+	BH_Verified,		/* Metadata block has been verified ok */
 	BH_JBDPrivateStart,	/* First bit available for private use by FS */
 };
 
@@ -24,6 +25,7 @@ TAS_BUFFER_FNS(Revoked, revoked)
 BUFFER_FNS(RevokeValid, revokevalid)
 TAS_BUFFER_FNS(RevokeValid, revokevalid)
 BUFFER_FNS(Freed, freed)
+BUFFER_FNS(Verified, verified)
 
 static inline struct buffer_head *jh2bh(struct journal_head *jh)
 {
-- 
cgit v1.2.3


From a2ebfe2fc6e088a70d06cd15a5bc9bcb621cc195 Mon Sep 17 00:00:00 2001
From: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Date: Tue, 10 Apr 2012 16:21:20 +0530
Subject: power_supply: Add voltage_ocv property and use it for max17042 driver

This adds a new sysfs file called 'voltage_ocv' which gives the
Open Circuit Voltage of the battery.

This property can be used for platform shutdown policies and
can be useful for initial capacity estimations.

Note: This patch is generated against linux-next branch.

Signed-off-by: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 include/linux/power_supply.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index c38c13db8832..fd17ae0f9c20 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -96,6 +96,7 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
 	POWER_SUPPLY_PROP_VOLTAGE_AVG,
+	POWER_SUPPLY_PROP_VOLTAGE_OCV,
 	POWER_SUPPLY_PROP_CURRENT_MAX,
 	POWER_SUPPLY_PROP_CURRENT_NOW,
 	POWER_SUPPLY_PROP_CURRENT_AVG,
@@ -261,6 +262,7 @@ static inline bool power_supply_is_watt_property(enum power_supply_property psp)
 	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
 	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
 	case POWER_SUPPLY_PROP_VOLTAGE_AVG:
+	case POWER_SUPPLY_PROP_VOLTAGE_OCV:
 	case POWER_SUPPLY_PROP_POWER_NOW:
 		return 1;
 	default:
-- 
cgit v1.2.3


From 0d4ed4e27a4cb180af395fa3d7aa98d79f3d3015 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <cbouatmailru@gmail.com>
Date: Fri, 4 May 2012 21:06:19 -0700
Subject: power_supply: Make the core a boolean instead of a tristate

On Mon, Apr 02, 2012 at 01:53:23PM +1000, Benjamin Herrenschmidt wrote:
> > drivers/built-in.o: In function `.nouveau_pm_trigger':
> > (.text+0xa56e8): undefined reference to `.power_supply_is_system_supplied'
> >
> > nouveau probably needs to depends on CONFIG_POWER_SUPPLY to force a module
> > build with the latter is =m
>
> Ok, not that trivial...
>
> The problem is more like POWER_SUPPLY should be a bool, not a tristate.
>
> If you think about it: you don't want things like nouveau to depend on a
> random subsystem like that, people will never get it. In fact,
> POWER_SUPPLY provides empty inline stubs when not enabled, so that's
> really designed to not have depends...
>
> However that -cannot- work if POWER_SUPPLY is modular and the drivers
> who use it are not.
>
> The only fixes here that make sense I can think of
> that don't also involve Kconfig horrors are:
>
>  - Ugly: in power_supply.h, use the extern variant if
>
>       defined(CONFIG_POWER_SUPPLY) ||
>        (defined(CONFIG_POWER_SUPPLY_MODULE) && defined(MODULE))
>
> IE. use the stub if power supply is a module and what is being built is
> built-in. Of course that's not only ugly, it somewhat sucks from a user
> perspective as the subsystem now exists but can't be used by some
> drivers...
>
>  - Better: Just make the bloody thing a bool :-) The power supply
> framework itself is small enough, just make it a boolean option and
> avoid the problem entirely. The actual power supply sub drivers can
> remain modular of course.

Suggested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 include/linux/power_supply.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index fd17ae0f9c20..3b912bee28d1 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -212,7 +212,7 @@ extern void power_supply_changed(struct power_supply *psy);
 extern int power_supply_am_i_supplied(struct power_supply *psy);
 extern int power_supply_set_battery_charged(struct power_supply *psy);
 
-#if defined(CONFIG_POWER_SUPPLY) || defined(CONFIG_POWER_SUPPLY_MODULE)
+#ifdef CONFIG_POWER_SUPPLY
 extern int power_supply_is_system_supplied(void);
 #else
 static inline int power_supply_is_system_supplied(void) { return -ENOSYS; }
-- 
cgit v1.2.3


From d829dc75bafb10754f35fb8895e5143d20267b04 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Sat, 5 May 2012 06:24:10 -0700
Subject: charger-manager: Poll battery health in normal state

Charger-Manager needs to check battery health in normal state
as well as suspend-to-RAM state. When the battery is fully charged,
Charger-Manager needs to determine when the chargers restart charging.

This patch allows Charger-Manager to monitor battery health in normal
state and handle operation for chargers after battery is fully charged.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Donggeun Kim <dg77.kim@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 include/linux/power/charger-manager.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h
index 4f75e531c112..baa299a95e13 100644
--- a/include/linux/power/charger-manager.h
+++ b/include/linux/power/charger-manager.h
@@ -18,6 +18,8 @@
 #include <linux/power_supply.h>
 
 enum data_source {
+	CM_BATTERY_PRESENT,
+	CM_NO_BATTERY,
 	CM_FUEL_GAUGE,
 	CM_CHARGER_STAT,
 };
@@ -38,11 +40,18 @@ enum polling_modes {
  *	rtc_only_wakeup() returning false.
  *	If the RTC given to CM is the only wakeup reason,
  *	rtc_only_wakeup should return true.
+ * @assume_timer_stops_in_suspend:
+ *	Assume that the jiffy timer stops in suspend-to-RAM.
+ *	When enabled, CM does not rely on jiffies value in
+ *	suspend_again and assumes that jiffies value does not
+ *	change during suspend.
  */
 struct charger_global_desc {
 	char *rtc_name;
 
 	bool (*rtc_only_wakeup)(void);
+
+	bool assume_timer_stops_in_suspend;
 };
 
 /**
@@ -50,6 +59,11 @@ struct charger_global_desc {
  * @psy_name: the name of power-supply-class for charger manager
  * @polling_mode:
  *	Determine which polling mode will be used
+ * @fullbatt_vchkdrop_ms:
+ * @fullbatt_vchkdrop_uV:
+ *	Check voltage drop after the battery is fully charged.
+ *	If it has dropped more than fullbatt_vchkdrop_uV after
+ *	fullbatt_vchkdrop_ms, CM will restart charging.
  * @fullbatt_uV: voltage in microvolt
  *	If it is not being charged and VBATT >= fullbatt_uV,
  *	it is assumed to be full.
@@ -76,6 +90,8 @@ struct charger_desc {
 	enum polling_modes polling_mode;
 	unsigned int polling_interval_ms;
 
+	unsigned int fullbatt_vchkdrop_ms;
+	unsigned int fullbatt_vchkdrop_uV;
 	unsigned int fullbatt_uV;
 
 	enum data_source battery_present;
@@ -101,6 +117,11 @@ struct charger_desc {
  * @fuel_gauge: power_supply for fuel gauge
  * @charger_stat: array of power_supply for chargers
  * @charger_enabled: the state of charger
+ * @fullbatt_vchk_jiffies_at:
+ *	jiffies at the time full battery check will occur.
+ * @fullbatt_vchk_uV: voltage in microvolt
+ *	criteria for full battery
+ * @fullbatt_vchk_work: work queue for full battery check
  * @emergency_stop:
  *	When setting true, stop charging
  * @last_temp_mC: the measured temperature in milli-Celsius
@@ -121,6 +142,10 @@ struct charger_manager {
 
 	bool charger_enabled;
 
+	unsigned long fullbatt_vchk_jiffies_at;
+	unsigned int fullbatt_vchk_uV;
+	struct delayed_work fullbatt_vchk_work;
+
 	int emergency_stop;
 	int last_temp_mC;
 
-- 
cgit v1.2.3


From dfeccb12b4614befc49a92eb121c2211294ca669 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Sat, 5 May 2012 06:26:47 -0700
Subject: charger-manager: Provide cm_notify_event function for in-kernel use

By using cm_notify_event function, charger driver can report several
charger events (e.g. battery full and external power in/out, etc) to
Charger-Manager. Charger-Manager can properly and immediately control
chargers by the reported event.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Donggeun Kim <dg77.kim@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 include/linux/power/charger-manager.h | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h
index baa299a95e13..241065c9ce51 100644
--- a/include/linux/power/charger-manager.h
+++ b/include/linux/power/charger-manager.h
@@ -31,6 +31,16 @@ enum polling_modes {
 	CM_POLL_CHARGING_ONLY,
 };
 
+enum cm_event_types {
+	CM_EVENT_UNKNOWN = 0,
+	CM_EVENT_BATT_FULL,
+	CM_EVENT_BATT_IN,
+	CM_EVENT_BATT_OUT,
+	CM_EVENT_EXT_PWR_IN_OUT,
+	CM_EVENT_CHG_START_STOP,
+	CM_EVENT_OTHERS,
+};
+
 /**
  * struct charger_global_desc
  * @rtc_name: the name of RTC used to wake up the system from suspend.
@@ -159,14 +169,13 @@ struct charger_manager {
 #ifdef CONFIG_CHARGER_MANAGER
 extern int setup_charger_manager(struct charger_global_desc *gd);
 extern bool cm_suspend_again(void);
+extern void cm_notify_event(struct power_supply *psy,
+				enum cm_event_types type, char *msg);
 #else
-static void __maybe_unused setup_charger_manager(struct charger_global_desc *gd)
-{ }
-
-static bool __maybe_unused cm_suspend_again(void)
-{
-	return false;
-}
+static inline int setup_charger_manager(struct charger_global_desc *gd)
+{ return 0; }
+static inline bool cm_suspend_again(void) { return false; }
+static inline void cm_notify_event(struct power_supply *psy,
+				enum cm_event_types type, char *msg) { }
 #endif
-
 #endif /* _CHARGER_MANAGER_H */
-- 
cgit v1.2.3


From 9a8422d205ea142a27c2573e5ca3d2cc87d75260 Mon Sep 17 00:00:00 2001
From: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Date: Sat, 5 May 2012 14:34:26 +0530
Subject: max17042_battery: Add support for max17047/50 chip

max17047 is improved version of max17042 chip. It has few HW bug
fixes with minor changes in register set.

max17050 is same as max17047 chip except its silicon packging. So from
driver's point of view there is no difference btw max1047 and max1050.

This patch adds the support to dynamically detect the chip type and
adds steps to initialize the max17047 chip.

Signed-off-by: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 include/linux/power/max17042_battery.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h
index e01b167e66f0..89dd84f47c6e 100644
--- a/include/linux/power/max17042_battery.h
+++ b/include/linux/power/max17042_battery.h
@@ -116,6 +116,18 @@ enum max17042_register {
 	MAX17042_VFSOC		= 0xFF,
 };
 
+/* Registers specific to max17047/50 */
+enum max17047_register {
+	MAX17047_QRTbl00	= 0x12,
+	MAX17047_FullSOCThr	= 0x13,
+	MAX17047_QRTbl10	= 0x22,
+	MAX17047_QRTbl20	= 0x32,
+	MAX17047_V_empty	= 0x3A,
+	MAX17047_QRTbl30	= 0x42,
+};
+
+enum max170xx_chip_type {MAX17042, MAX17047};
+
 /*
  * used for setting a register to a desired value
  * addr : address for a register
@@ -144,6 +156,7 @@ struct max17042_config_data {
 	u16	shdntimer;	/* 0x03F */
 
 	/* App data */
+	u16	full_soc_thresh;	/* 0x13 */
 	u16	design_cap;	/* 0x18 */
 	u16	ichgt_term;	/* 0x1E */
 
@@ -162,6 +175,10 @@ struct max17042_config_data {
 	u16	lavg_empty;	/* 0x36 */
 	u16	dqacc;		/* 0x45 */
 	u16	dpacc;		/* 0x46 */
+	u16	qrtbl00;	/* 0x12 */
+	u16	qrtbl10;	/* 0x22 */
+	u16	qrtbl20;	/* 0x32 */
+	u16	qrtbl30;	/* 0x42 */
 
 	/* Cell technology from power_supply.h */
 	u16	cell_technology;
-- 
cgit v1.2.3


From 86c2072be6f3c2150cc35f00233f2c31bdba2745 Mon Sep 17 00:00:00 2001
From: Mike Dunn <mikedunn@newsguy.com>
Date: Wed, 25 Apr 2012 12:06:05 -0700
Subject: mtd: ecc_strength is at ecc step granularity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ecc_strength element of mtd_info will be the strength of one ecc step, not of
the entire writesize, as was previously planned.  This is the appropriate way
because, as was pointed out¹, bit errors in excess of the strength of one
step can cause a hard error if they all occur within the same ecc region.

¹ http://lists.infradead.org/pipermail/linux-mtd/2012-March/040313.html

Signed-off-by: Mike Dunn <mikedunn@newsguy.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/mtd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index cf5ea8cdcf8e..cd0119d19cd9 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -164,7 +164,7 @@ struct mtd_info {
 	/* ECC layout structure pointer - read only! */
 	struct nand_ecclayout *ecclayout;
 
-	/* max number of correctible bit errors per writesize */
+	/* max number of correctible bit errors per ecc step */
 	unsigned int ecc_strength;
 
 	/* Data for variable erase regions. If numeraseregions is zero,
-- 
cgit v1.2.3


From d062d4ede877fcd2ecc4c6262abad09a6f32950a Mon Sep 17 00:00:00 2001
From: Mike Dunn <mikedunn@newsguy.com>
Date: Wed, 25 Apr 2012 12:06:08 -0700
Subject: mtd: bitflip_threshold added to mtd_info and sysfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

An element 'bitflip_threshold' is added to struct mtd_info, and also exposed as
a read/write variable in sysfs.  This will be used to determine whether or not
mtd_read() returns -EUCLEAN or 0 (absent a hard error).  If the driver leaves it
as zero, mtd will set it to a default value of ecc_strength.

This v2 adds the line that propagates bitflip_threshold from the master to the
partitions - thanks Ivan¹.

¹ http://lists.infradead.org/pipermail/linux-mtd/2012-April/040900.html

Signed-off-by: Mike Dunn <mikedunn@newsguy.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/mtd.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index cd0119d19cd9..63dadc0dfb62 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -157,6 +157,15 @@ struct mtd_info {
 	unsigned int erasesize_mask;
 	unsigned int writesize_mask;
 
+	/*
+	 * read ops return -EUCLEAN if max number of bitflips corrected on any
+	 * one region comprising an ecc step equals or exceeds this value.
+	 * Settable by driver, else defaults to ecc_strength.  User can override
+	 * in sysfs.  N.B. The meaning of the -EUCLEAN return code has changed;
+	 * see Documentation/ABI/testing/sysfs-class-mtd for more detail.
+	 */
+	unsigned int bitflip_threshold;
+
 	// Kernel-only stuff starts here.
 	const char *name;
 	int index;
-- 
cgit v1.2.3


From edbc4540e02c201bdd4f4d498ebb6ed517fd36e2 Mon Sep 17 00:00:00 2001
From: Mike Dunn <mikedunn@newsguy.com>
Date: Wed, 25 Apr 2012 12:06:11 -0700
Subject: mtd: driver _read() returns max_bitflips; mtd_read() returns -EUCLEAN
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The drivers' _read() method, absent an error, returns a non-negative integer
indicating the maximum number of bit errors that were corrected in any one
region comprising an ecc step.  MTD returns -EUCLEAN if this is >=
bitflip_threshold, 0 otherwise.  If bitflip_threshold is zero, the comparison is
not made since these devices lack ECC and always return zero in the non-error
case (thanks Brian)¹.  Note that this is a subtle change to the driver
interface.

This and the preceding patches in this set were tested with ubi on top of the
nandsim and docg4 devices, running the ubi test io_basic from mtd-utils.

¹ http://lists.infradead.org/pipermail/linux-mtd/2012-March/040468.html

Signed-off-by: Mike Dunn <mikedunn@newsguy.com>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Acked-by: Brian Norris <computersforpeace@gmail.com>
Ivan Djelic <ivan.djelic@parrot.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/nand.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 1482340d3d9f..2829e8be3a62 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -459,6 +459,8 @@ struct nand_buffers {
  * @pagemask:		[INTERN] page number mask = number of (pages / chip) - 1
  * @pagebuf:		[INTERN] holds the pagenumber which is currently in
  *			data_buf.
+ * @pagebuf_bitflips:	[INTERN] holds the bitflip count for the page which is
+ *			currently in data_buf.
  * @subpagesize:	[INTERN] holds the subpagesize
  * @onfi_version:	[INTERN] holds the chip ONFI version (BCD encoded),
  *			non 0 if ONFI supported.
@@ -519,6 +521,7 @@ struct nand_chip {
 	uint64_t chipsize;
 	int pagemask;
 	int pagebuf;
+	unsigned int pagebuf_bitflips;
 	int subpagesize;
 	uint8_t cellinfo;
 	int badblockpos;
-- 
cgit v1.2.3


From 1826dbccebc9a58a0b0c0a9b7c09e47b19d97398 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Tue, 1 May 2012 17:12:55 -0700
Subject: mtd: nand: kill NAND_NO_AUTOINCR option

No drivers use auto-increment NAND, so kill the NO_AUTOINCR option entirely.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/nand.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 2829e8be3a62..627f0c575ac4 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -161,8 +161,6 @@ typedef enum {
  * Option constants for bizarre disfunctionality and real
  * features.
  */
-/* Chip can not auto increment pages */
-#define NAND_NO_AUTOINCR	0x00000001
 /* Buswidth is 16 bit */
 #define NAND_BUSWIDTH_16	0x00000002
 /* Device supports partial programming without padding */
@@ -207,7 +205,6 @@ typedef enum {
 	(NAND_NO_PADDING | NAND_CACHEPRG | NAND_COPYBACK)
 
 /* Macros to identify the above */
-#define NAND_CANAUTOINCR(chip) (!(chip->options & NAND_NO_AUTOINCR))
 #define NAND_MUST_PAD(chip) (!(chip->options & NAND_NO_PADDING))
 #define NAND_HAS_CACHEPROG(chip) ((chip->options & NAND_CACHEPRG))
 #define NAND_HAS_COPYBACK(chip) ((chip->options & NAND_COPYBACK))
@@ -216,7 +213,7 @@ typedef enum {
 					&& (chip->page_shift > 9))
 
 /* Mask to zero out the chip options, which come from the id table */
-#define NAND_CHIPOPTIONS_MSK	(0x0000ffff & ~NAND_NO_AUTOINCR)
+#define NAND_CHIPOPTIONS_MSK	0x0000ffff
 
 /* Non chip related options */
 /* This option skips the bbt scan during initialization. */
-- 
cgit v1.2.3


From b4f7aa84d6ff44327ab91a2973ebf0c2a7797d24 Mon Sep 17 00:00:00 2001
From: John Crispin <blogic@openwrt.org>
Date: Mon, 30 Apr 2012 19:30:47 +0200
Subject: mtd: add read_byte support to plat_nand

Lantiq SoCs have a External Bus Unit (EBU) that is used to attach MTD media.
As we need to co-exist with PCI on the same bus, certain swapping settings must
be applied. Similar to the NOR map driver we need to apply a fix to make NAND
work. The easiest way is to use byte reads.

Signed-off-by: John Crispin <blogic@openwrt.org>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/nand.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 627f0c575ac4..94a6679bfc2e 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -654,6 +654,7 @@ struct platform_nand_ctrl {
 	void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl);
 	void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len);
 	void (*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len);
+	unsigned char (*read_byte)(struct mtd_info *mtd);
 	void *priv;
 };
 
-- 
cgit v1.2.3


From 1fbb938dff5b6bb4514a4e7600276b03c7f08e25 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Wed, 2 May 2012 10:14:55 -0700
Subject: mtd: nand: add 'oob_required' argument to NAND {read,write}_page
 interfaces

New NAND controllers can perform read/write via HW engines which don't expose
OOB data in their DMA mode. To reflect this, we should rework the nand_chip /
nand_ecc_ctrl interfaces that assume that drivers will always read/write OOB
data in the nand_chip.oob_poi buffer. A better interface includes a boolean
argument that explicitly tells the callee when OOB data is requested by the
calling layer (for reading/writing to/from nand_chip.oob_poi).

This patch adds the 'oob_required' parameter to each relevant {read,write}_page
interface; all 'oob_required' parameters are left unused for now. The next
patch will set the parameter properly in the nand_base.c callers, and follow-up
patches will make use of 'oob_required' in some of the callee functions.

Note that currently, there is no harm in ignoring the 'oob_required' parameter
and *always* utilizing nand_chip.oob_poi, but there can be
performance/complexity/design benefits from avoiding filling oob_poi in the
common case. I will try to implement this for some drivers which can be ported
easily.

Note: I couldn't compile-test all of these easily, as some had ARCH
dependencies.

[dwmw2: Merge later 1/0 vs. true/false cleanup]
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Acked-by: Jiandong Zheng <jdzheng@broadcom.com>
Acked-by: Mike Dunn <mikedunn@newsguy.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/nand.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 94a6679bfc2e..c7755f455c81 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -360,15 +360,15 @@ struct nand_ecc_ctrl {
 	int (*correct)(struct mtd_info *mtd, uint8_t *dat, uint8_t *read_ecc,
 			uint8_t *calc_ecc);
 	int (*read_page_raw)(struct mtd_info *mtd, struct nand_chip *chip,
-			uint8_t *buf, int page);
+			uint8_t *buf, int oob_required, int page);
 	void (*write_page_raw)(struct mtd_info *mtd, struct nand_chip *chip,
-			const uint8_t *buf);
+			const uint8_t *buf, int oob_required);
 	int (*read_page)(struct mtd_info *mtd, struct nand_chip *chip,
-			uint8_t *buf, int page);
+			uint8_t *buf, int oob_required, int page);
 	int (*read_subpage)(struct mtd_info *mtd, struct nand_chip *chip,
 			uint32_t offs, uint32_t len, uint8_t *buf);
 	void (*write_page)(struct mtd_info *mtd, struct nand_chip *chip,
-			const uint8_t *buf);
+			const uint8_t *buf, int oob_required);
 	int (*write_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip,
 			int page);
 	int (*read_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip,
@@ -504,7 +504,8 @@ struct nand_chip {
 	int (*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state,
 			int status, int page);
 	int (*write_page)(struct mtd_info *mtd, struct nand_chip *chip,
-			const uint8_t *buf, int page, int cached, int raw);
+			const uint8_t *buf, int oob_required, int page,
+			int cached, int raw);
 
 	int chip_delay;
 	unsigned int options;
-- 
cgit v1.2.3


From e10db1f00a5e3c2ec04d7fe26c7444dc55a59b19 Mon Sep 17 00:00:00 2001
From: Huang Shijie <b32955@freescale.com>
Date: Fri, 4 May 2012 21:42:05 -0400
Subject: mtd: gpmi: add device tree support to gpmi-nand

This patch just adds the DT support to gpmi-nand.

Signed-off-by: Huang Shijie <b32955@freescale.com>
Signed-off-by: Huang Shijie <shijie8@gmail.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/gpmi-nand.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/gpmi-nand.h b/include/linux/mtd/gpmi-nand.h
index 69b6dbf46b5e..ed3c4e09f3d1 100644
--- a/include/linux/mtd/gpmi-nand.h
+++ b/include/linux/mtd/gpmi-nand.h
@@ -23,12 +23,12 @@
 #define GPMI_NAND_RES_SIZE	6
 
 /* Resource names for the GPMI NAND driver. */
-#define GPMI_NAND_GPMI_REGS_ADDR_RES_NAME  "GPMI NAND GPMI Registers"
+#define GPMI_NAND_GPMI_REGS_ADDR_RES_NAME  "gpmi-nand"
 #define GPMI_NAND_GPMI_INTERRUPT_RES_NAME  "GPMI NAND GPMI Interrupt"
-#define GPMI_NAND_BCH_REGS_ADDR_RES_NAME   "GPMI NAND BCH Registers"
-#define GPMI_NAND_BCH_INTERRUPT_RES_NAME   "GPMI NAND BCH Interrupt"
+#define GPMI_NAND_BCH_REGS_ADDR_RES_NAME   "bch"
+#define GPMI_NAND_BCH_INTERRUPT_RES_NAME   "bch"
 #define GPMI_NAND_DMA_CHANNELS_RES_NAME    "GPMI NAND DMA Channels"
-#define GPMI_NAND_DMA_INTERRUPT_RES_NAME   "GPMI NAND DMA Interrupt"
+#define GPMI_NAND_DMA_INTERRUPT_RES_NAME   "gpmi-dma"
 
 /**
  * struct gpmi_nand_platform_data - GPMI NAND driver platform data.
-- 
cgit v1.2.3


From 5c2ffb11d40dd967eecb45b8570a871746ba124b Mon Sep 17 00:00:00 2001
From: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Date: Wed, 9 May 2012 13:06:35 +0300
Subject: mtd: nand: remove 'sndcmd' parameter of 'read_oob/read_oob_raw'

As of [mtd: nand: remove autoincrement 'sndcmd' code], the
NAND_CMD_READ0 command is issued unconditionally.

Thus, read_oob/read_oob_raw's 'sndcmd' argument is no longer needed, as
well as their return code.

Remove the 'sndcmd' parameter, and set the return code to 0.

Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/nand.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index c7755f455c81..57977c640529 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -372,9 +372,8 @@ struct nand_ecc_ctrl {
 	int (*write_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip,
 			int page);
 	int (*read_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip,
-			int page, int sndcmd);
-	int (*read_oob)(struct mtd_info *mtd, struct nand_chip *chip, int page,
-			int sndcmd);
+			int page);
+	int (*read_oob)(struct mtd_info *mtd, struct nand_chip *chip, int page);
 	int (*write_oob)(struct mtd_info *mtd, struct nand_chip *chip,
 			int page);
 };
-- 
cgit v1.2.3


From c3ba9698152b17fdc2c7cd0f7cbeb571e3367e9d Mon Sep 17 00:00:00 2001
From: Dan Magenheimer <dan.magenheimer@oracle.com>
Date: Mon, 9 Apr 2012 17:06:54 -0600
Subject: mm: frontswap: add frontswap header file

Frontswap is the alter ego of cleancache, the "yang" to cleancache's
"yin"... and more precisely frontswap is the provider of anonymous
pages to transcendent memory to nicely complement cleancache's providing
of clean pagecache pages to transcendent memory.  For optimal use
of transcendent memory, both are necessary... because a kernel
under memory pressure first reclaims clean pagecache pages and,
when under more memory pressure, starts swapping anonymous pages.

Frontswap and cleancache (which was merged at 3.0) are the "frontends"
and the only necessary changes to the core kernel for transcendent memory;
all other supporting code -- the "backends" -- is implemented as drivers.
See the LWN.net article "Transcendent memory in a nutshell" for a detailed
overview of frontswap and related kernel parts:
https://lwn.net/Articles/454795/

Frontswap code was first posted publicly in January 2009 and on LKML in
May 2009, and has remained functionally stable for nearly three years now.
It is barely invasive, touching only the swap subsystem and adds less
than 100 lines of code to existing swap subsystem code files.
It has improved syntactically substantially between V1 and this posting
of V14, thanks to the review of a few kernel developers, and has adapted
easily to at least one major swap subsystem change.  As of 3.4, there are
three in-tree users of frontswap patiently waiting for this patchset and
for CONFIG_FRONTSWAP to be enabled: zcache (staging driver merged at
2.6.39), Xen tmem (merged at 3.0 and 3.1) and RAMster (staging driver
merged at 3.4).  In addition, a RFC has been posted for a KVM backend.
The frontswap patchset has been in linux-next since next-110603.  Earlier
versions of frontswap already ship in the Oracle Unbreakable Enterprise Kernel
and SuSE SLES.

This patch, 1of4, provides the header file for the core code for frontswap
that interfaces between the hooks in the swap subsystem and a frontswap
backend via frontswap_ops.
---
New file added: include/linux/frontswap.h

[v14: add support for writethrough, per suggestion by aarcange@redhat.com]
[v14: rebase to 3.4-rc2]
[v11: konrad.wilk@oracle.com: squashed s/flush/invalidate/ in]
[v10: no change]
[v9: akpm@linux-foundation.org: change "flush" to "invalidate", part 1]
[v8: rebase to 3.0-rc4]
[v7: rebase to 3.0-rc3]
[v7: JBeulich@novell.com: new static inlines resolve to no-ops if not config'd]
[v7: JBeulich@novell.com: avoid redundant shifts/divides for *_bit lib calls]
[v6: rebase to 3.1-rc1]
[v5: no change from v4]
[v4: rebase to 2.6.39]
Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Jan Beulich <JBeulich@novell.com>
Acked-by: Seth Jennings <sjenning@linux.vnet.ibm.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Rik Riel <riel@redhat.com>
[v15: int/bool on some functions]
Signed-off-by: Konrad Wilk <konrad.wilk@oracle.com>
---
 include/linux/frontswap.h | 127 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 127 insertions(+)
 create mode 100644 include/linux/frontswap.h

(limited to 'include/linux')

diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
new file mode 100644
index 000000000000..68ff7af5c5fb
--- /dev/null
+++ b/include/linux/frontswap.h
@@ -0,0 +1,127 @@
+#ifndef _LINUX_FRONTSWAP_H
+#define _LINUX_FRONTSWAP_H
+
+#include <linux/swap.h>
+#include <linux/mm.h>
+#include <linux/bitops.h>
+
+struct frontswap_ops {
+	void (*init)(unsigned);
+	int (*put_page)(unsigned, pgoff_t, struct page *);
+	int (*get_page)(unsigned, pgoff_t, struct page *);
+	void (*invalidate_page)(unsigned, pgoff_t);
+	void (*invalidate_area)(unsigned);
+};
+
+extern bool frontswap_enabled;
+extern struct frontswap_ops
+	frontswap_register_ops(struct frontswap_ops *ops);
+extern void frontswap_shrink(unsigned long);
+extern unsigned long frontswap_curr_pages(void);
+extern void frontswap_writethrough(bool);
+
+extern void __frontswap_init(unsigned type);
+extern int __frontswap_put_page(struct page *page);
+extern int __frontswap_get_page(struct page *page);
+extern void __frontswap_invalidate_page(unsigned, pgoff_t);
+extern void __frontswap_invalidate_area(unsigned);
+
+#ifdef CONFIG_FRONTSWAP
+
+static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
+{
+	bool ret = false;
+
+	if (frontswap_enabled && sis->frontswap_map)
+		ret = test_bit(offset, sis->frontswap_map);
+	return ret;
+}
+
+static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset)
+{
+	if (frontswap_enabled && sis->frontswap_map)
+		set_bit(offset, sis->frontswap_map);
+}
+
+static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset)
+{
+	if (frontswap_enabled && sis->frontswap_map)
+		clear_bit(offset, sis->frontswap_map);
+}
+
+static inline void frontswap_map_set(struct swap_info_struct *p,
+				     unsigned long *map)
+{
+	p->frontswap_map = map;
+}
+
+static inline unsigned long *frontswap_map_get(struct swap_info_struct *p)
+{
+	return p->frontswap_map;
+}
+#else
+/* all inline routines become no-ops and all externs are ignored */
+
+#define frontswap_enabled (0)
+
+static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
+{
+	return false;
+}
+
+static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset)
+{
+}
+
+static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset)
+{
+}
+
+static inline void frontswap_map_set(struct swap_info_struct *p,
+				     unsigned long *map)
+{
+}
+
+static inline unsigned long *frontswap_map_get(struct swap_info_struct *p)
+{
+	return NULL;
+}
+#endif
+
+static inline int frontswap_put_page(struct page *page)
+{
+	int ret = -1;
+
+	if (frontswap_enabled)
+		ret = __frontswap_put_page(page);
+	return ret;
+}
+
+static inline int frontswap_get_page(struct page *page)
+{
+	int ret = -1;
+
+	if (frontswap_enabled)
+		ret = __frontswap_get_page(page);
+	return ret;
+}
+
+static inline void frontswap_invalidate_page(unsigned type, pgoff_t offset)
+{
+	if (frontswap_enabled)
+		__frontswap_invalidate_page(type, offset);
+}
+
+static inline void frontswap_invalidate_area(unsigned type)
+{
+	if (frontswap_enabled)
+		__frontswap_invalidate_area(type);
+}
+
+static inline void frontswap_init(unsigned type)
+{
+	if (frontswap_enabled)
+		__frontswap_init(type);
+}
+
+#endif /* _LINUX_FRONTSWAP_H */
-- 
cgit v1.2.3


From 38b5faf4b178d5279b1fca5d7dadc68881342660 Mon Sep 17 00:00:00 2001
From: Dan Magenheimer <dan.magenheimer@oracle.com>
Date: Mon, 9 Apr 2012 17:08:06 -0600
Subject: mm: frontswap: core swap subsystem hooks and headers

This patch, 2of4, contains the changes to the core swap subsystem.
This includes:

(1) makes available core swap data structures (swap_lock, swap_list and
swap_info) that are needed by frontswap.c but we don't need to expose them
to the dozens of files that include swap.h so we create a new swapfile.h
just to extern-ify these and modify their declarations to non-static

(2) adds frontswap-related elements to swap_info_struct.  Frontswap_map
points to vzalloc'ed one-bit-per-swap-page metadata that indicates
whether the swap page is in frontswap or in the device and frontswap_pages
counts how many pages are in frontswap.

(3) adds hooks in the swap subsystem and extends try_to_unuse so that
frontswap_shrink can do a "partial swapoff".

Note that a failed frontswap_map allocation is safe... failure is noted
by lack of "FS" in the subsequent printk.

---

[v14: rebase to 3.4-rc2]
[v10: no change]
[v9: akpm@linux-foundation.org: mark some statics __read_mostly]
[v9: akpm@linux-foundation.org: add clarifying comments]
[v9: akpm@linux-foundation.org: no need to loop repeating try_to_unuse]
[v9: error27@gmail.com: remove superfluous check for NULL]
[v8: rebase to 3.0-rc4]
[v8: kamezawa.hiroyu@jp.fujitsu.com: change counter to atomic_t to avoid races]
[v8: kamezawa.hiroyu@jp.fujitsu.com: comment to clarify informational counters]
[v7: rebase to 3.0-rc3]
[v7: JBeulich@novell.com: add new swap struct elements only if config'd]
[v6: rebase to 3.0-rc1]
[v6: lliubbo@gmail.com: fix null pointer deref if vzalloc fails]
[v6: konrad.wilk@oracl.com: various checks and code clarifications/comments]
[v5: no change from v4]
[v4: rebase to 2.6.39]
Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Reviewed-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Jan Beulich <JBeulich@novell.com>
Acked-by: Seth Jennings <sjenning@linux.vnet.ibm.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Rik Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
[v11: Rebased, fixed mm/swapfile.c context change]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/linux/swap.h     |  4 ++++
 include/linux/swapfile.h | 13 +++++++++++++
 2 files changed, 17 insertions(+)
 create mode 100644 include/linux/swapfile.h

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index b1fd5c7925fe..50a55e2d58ec 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -197,6 +197,10 @@ struct swap_info_struct {
 	struct block_device *bdev;	/* swap device or bdev of swap file */
 	struct file *swap_file;		/* seldom referenced */
 	unsigned int old_block_size;	/* seldom referenced */
+#ifdef CONFIG_FRONTSWAP
+	unsigned long *frontswap_map;	/* frontswap in-use, one bit per page */
+	atomic_t frontswap_pages;	/* frontswap pages in-use counter */
+#endif
 };
 
 struct swap_list_t {
diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
new file mode 100644
index 000000000000..e282624e8c10
--- /dev/null
+++ b/include/linux/swapfile.h
@@ -0,0 +1,13 @@
+#ifndef _LINUX_SWAPFILE_H
+#define _LINUX_SWAPFILE_H
+
+/*
+ * these were static in swapfile.c but frontswap.c needs them and we don't
+ * want to expose them to the dozens of source files that include swap.h
+ */
+extern spinlock_t swap_lock;
+extern struct swap_list_t swap_list;
+extern struct swap_info_struct *swap_info[];
+extern int try_to_unuse(unsigned int, bool, unsigned long);
+
+#endif /* _LINUX_SWAPFILE_H */
-- 
cgit v1.2.3


From 165c8aed5bbc6bdddbccae0ba9db451732558ff9 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 15 May 2012 11:32:15 -0400
Subject: frontswap: s/put_page/store/g s/get_page/load

Sounds so much more natural.

Suggested-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/linux/frontswap.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
index 68ff7af5c5fb..0e4e2eec5c1d 100644
--- a/include/linux/frontswap.h
+++ b/include/linux/frontswap.h
@@ -7,8 +7,8 @@
 
 struct frontswap_ops {
 	void (*init)(unsigned);
-	int (*put_page)(unsigned, pgoff_t, struct page *);
-	int (*get_page)(unsigned, pgoff_t, struct page *);
+	int (*store)(unsigned, pgoff_t, struct page *);
+	int (*load)(unsigned, pgoff_t, struct page *);
 	void (*invalidate_page)(unsigned, pgoff_t);
 	void (*invalidate_area)(unsigned);
 };
@@ -21,8 +21,8 @@ extern unsigned long frontswap_curr_pages(void);
 extern void frontswap_writethrough(bool);
 
 extern void __frontswap_init(unsigned type);
-extern int __frontswap_put_page(struct page *page);
-extern int __frontswap_get_page(struct page *page);
+extern int __frontswap_store(struct page *page);
+extern int __frontswap_load(struct page *page);
 extern void __frontswap_invalidate_page(unsigned, pgoff_t);
 extern void __frontswap_invalidate_area(unsigned);
 
@@ -88,21 +88,21 @@ static inline unsigned long *frontswap_map_get(struct swap_info_struct *p)
 }
 #endif
 
-static inline int frontswap_put_page(struct page *page)
+static inline int frontswap_store(struct page *page)
 {
 	int ret = -1;
 
 	if (frontswap_enabled)
-		ret = __frontswap_put_page(page);
+		ret = __frontswap_store(page);
 	return ret;
 }
 
-static inline int frontswap_get_page(struct page *page)
+static inline int frontswap_load(struct page *page)
 {
 	int ret = -1;
 
 	if (frontswap_enabled)
-		ret = __frontswap_get_page(page);
+		ret = __frontswap_load(page);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 8f888ef846d4481e24c74b4a91ece771d2bcbcb5 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Tue, 22 May 2012 22:43:41 -0400
Subject: jbd2: change disk layout for metadata checksumming

Define flags and allocate space in on-disk journal structures to support
checksumming of journal metadata.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/jbd2.h | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 912c30a8ddb1..809c439066c5 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -147,12 +147,24 @@ typedef struct journal_header_s
 #define JBD2_CRC32_CHKSUM   1
 #define JBD2_MD5_CHKSUM     2
 #define JBD2_SHA1_CHKSUM    3
+#define JBD2_CRC32C_CHKSUM  4
 
 #define JBD2_CRC32_CHKSUM_SIZE 4
 
 #define JBD2_CHECKSUM_BYTES (32 / sizeof(u32))
 /*
  * Commit block header for storing transactional checksums:
+ *
+ * NOTE: If FEATURE_COMPAT_CHECKSUM (checksum v1) is set, the h_chksum*
+ * fields are used to store a checksum of the descriptor and data blocks.
+ *
+ * If FEATURE_INCOMPAT_CSUM_V2 (checksum v2) is set, then the h_chksum
+ * field is used to store crc32c(uuid+commit_block).  Each journal metadata
+ * block gets its own checksum, and data block checksums are stored in
+ * journal_block_tag (in the descriptor).  The other h_chksum* fields are
+ * not used.
+ *
+ * Checksum v1 and v2 are mutually exclusive features.
  */
 struct commit_header {
 	__be32		h_magic;
@@ -175,13 +187,19 @@ struct commit_header {
 typedef struct journal_block_tag_s
 {
 	__be32		t_blocknr;	/* The on-disk block number */
-	__be32		t_flags;	/* See below */
+	__be16		t_checksum;	/* truncated crc32c(uuid+seq+block) */
+	__be16		t_flags;	/* See below */
 	__be32		t_blocknr_high; /* most-significant high 32bits. */
 } journal_block_tag_t;
 
 #define JBD2_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high))
 #define JBD2_TAG_SIZE64 (sizeof(journal_block_tag_t))
 
+/* Tail of descriptor block, for checksumming */
+struct jbd2_journal_block_tail {
+	__be32		t_checksum;	/* crc32c(uuid+descr_block) */
+};
+
 /*
  * The revoke descriptor: used on disk to describe a series of blocks to
  * be revoked from the log
@@ -192,6 +210,10 @@ typedef struct jbd2_journal_revoke_header_s
 	__be32		 r_count;	/* Count of bytes used in the block */
 } jbd2_journal_revoke_header_t;
 
+/* Tail of revoke block, for checksumming */
+struct jbd2_journal_revoke_tail {
+	__be32		r_checksum;	/* crc32c(uuid+revoke_block) */
+};
 
 /* Definitions for the journal tag flags word: */
 #define JBD2_FLAG_ESCAPE		1	/* on-disk block is escaped */
@@ -241,7 +263,10 @@ typedef struct journal_superblock_s
 	__be32	s_max_trans_data;	/* Limit of data blocks per trans. */
 
 /* 0x0050 */
-	__u32	s_padding[44];
+	__u8	s_checksum_type;	/* checksum type */
+	__u8	s_padding2[3];
+	__u32	s_padding[42];
+	__be32	s_checksum;		/* crc32c(superblock) */
 
 /* 0x0100 */
 	__u8	s_users[16*48];		/* ids of all fs'es sharing the log */
@@ -263,6 +288,7 @@ typedef struct journal_superblock_s
 #define JBD2_FEATURE_INCOMPAT_REVOKE		0x00000001
 #define JBD2_FEATURE_INCOMPAT_64BIT		0x00000002
 #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT	0x00000004
+#define JBD2_FEATURE_INCOMPAT_CSUM_V2		0x00000008
 
 /* Features known to this kernel version: */
 #define JBD2_KNOWN_COMPAT_FEATURES	JBD2_FEATURE_COMPAT_CHECKSUM
-- 
cgit v1.2.3


From 1227dd773d8d4e3983b4b751f9ffa0f41402fb7c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 24 Apr 2012 02:44:49 -0400
Subject: TIF_NOTIFY_RESUME is defined on all targets now

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/tracehook.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 51bd91d911c3..8a2a3fc9bd05 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -153,7 +153,6 @@ static inline void tracehook_signal_handler(int sig, siginfo_t *info,
 		ptrace_notify(SIGTRAP);
 }
 
-#ifdef TIF_NOTIFY_RESUME
 /**
  * set_notify_resume - cause tracehook_notify_resume() to be called
  * @task:		task that will call tracehook_notify_resume()
@@ -185,6 +184,5 @@ static inline void set_notify_resume(struct task_struct *task)
 static inline void tracehook_notify_resume(struct pt_regs *regs)
 {
 }
-#endif	/* TIF_NOTIFY_RESUME */
 
 #endif	/* <linux/tracehook.h> */
-- 
cgit v1.2.3


From a42c6ded827dbd396d2efde7530620be029a72d1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 23 May 2012 14:44:37 -0400
Subject: move key_repace_session_keyring() into tracehook_notify_resume()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/tracehook.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 8a2a3fc9bd05..b9ca903bb553 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -183,6 +183,8 @@ static inline void set_notify_resume(struct task_struct *task)
  */
 static inline void tracehook_notify_resume(struct pt_regs *regs)
 {
+	if (current->replacement_session_keyring)
+		key_replace_session_keyring();
 }
 
 #endif	/* <linux/tracehook.h> */
-- 
cgit v1.2.3


From e73f8959af0439d114847eab5a8a5ce48f1217c4 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 11 May 2012 10:59:07 +1000
Subject: task_work_add: generic process-context callbacks

Provide a simple mechanism that allows running code in the (nonatomic)
context of the arbitrary task.

The caller does task_work_add(task, task_work) and this task executes
task_work->func() either from do_notify_resume() or from do_exit().  The
callback can rely on PF_EXITING to detect the latter case.

"struct task_work" can be embedded in another struct, still it has "void
*data" to handle the most common/simple case.

This allows us to kill the ->replacement_session_keyring hack, and
potentially this can have more users.

Performance-wise, this adds 2 "unlikely(!hlist_empty())" checks into
tracehook_notify_resume() and do_exit().  But at the same time we can
remove the "replacement_session_keyring != NULL" checks from
arch/*/signal.c and exit_creds().

Note: task_work_add/task_work_run abuses ->pi_lock.  This is only because
this lock is already used by lookup_pi_state() to synchronize with
do_exit() setting PF_EXITING.  Fortunately the scope of this lock in
task_work.c is really tiny, and the code is unlikely anyway.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alexander Gordeev <agordeev@redhat.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: David Smith <dsmith@redhat.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/sched.h     |  2 ++
 include/linux/task_work.h | 33 +++++++++++++++++++++++++++++++++
 include/linux/tracehook.h | 11 +++++++++++
 3 files changed, 46 insertions(+)
 create mode 100644 include/linux/task_work.h

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5ea8baea9387..7930131abc1a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1400,6 +1400,8 @@ struct task_struct {
 	int (*notifier)(void *priv);
 	void *notifier_data;
 	sigset_t *notifier_mask;
+	struct hlist_head task_works;
+
 	struct audit_context *audit_context;
 #ifdef CONFIG_AUDITSYSCALL
 	uid_t loginuid;
diff --git a/include/linux/task_work.h b/include/linux/task_work.h
new file mode 100644
index 000000000000..294d5d5e90b1
--- /dev/null
+++ b/include/linux/task_work.h
@@ -0,0 +1,33 @@
+#ifndef _LINUX_TASK_WORK_H
+#define _LINUX_TASK_WORK_H
+
+#include <linux/list.h>
+#include <linux/sched.h>
+
+struct task_work;
+typedef void (*task_work_func_t)(struct task_work *);
+
+struct task_work {
+	struct hlist_node hlist;
+	task_work_func_t func;
+	void *data;
+};
+
+static inline void
+init_task_work(struct task_work *twork, task_work_func_t func, void *data)
+{
+	twork->func = func;
+	twork->data = data;
+}
+
+int task_work_add(struct task_struct *task, struct task_work *twork, bool);
+struct task_work *task_work_cancel(struct task_struct *, task_work_func_t);
+void task_work_run(void);
+
+static inline void exit_task_work(struct task_struct *task)
+{
+	if (unlikely(!hlist_empty(&task->task_works)))
+		task_work_run();
+}
+
+#endif	/* _LINUX_TASK_WORK_H */
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index b9ca903bb553..b2dd0917ca0d 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -49,6 +49,7 @@
 #include <linux/sched.h>
 #include <linux/ptrace.h>
 #include <linux/security.h>
+#include <linux/task_work.h>
 struct linux_binprm;
 
 /*
@@ -164,8 +165,10 @@ static inline void tracehook_signal_handler(int sig, siginfo_t *info,
  */
 static inline void set_notify_resume(struct task_struct *task)
 {
+#ifdef TIF_NOTIFY_RESUME
 	if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME))
 		kick_process(task);
+#endif
 }
 
 /**
@@ -185,6 +188,14 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
 {
 	if (current->replacement_session_keyring)
 		key_replace_session_keyring();
+	/*
+	 * The caller just cleared TIF_NOTIFY_RESUME. This barrier
+	 * pairs with task_work_add()->set_notify_resume() after
+	 * hlist_add_head(task->task_works);
+	 */
+	smp_mb__after_clear_bit();
+	if (unlikely(!hlist_empty(&current->task_works)))
+		task_work_run();
 }
 
 #endif	/* <linux/tracehook.h> */
-- 
cgit v1.2.3


From 4d1d61a6b203d957777d73fcebf19d90b038b5b2 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 11 May 2012 10:59:08 +1000
Subject: genirq: reimplement exit_irq_thread() hook via task_work_add()

exit_irq_thread() and task->irq_thread are needed to handle the unexpected
(and unlikely) exit of irq-thread.

We can use task_work instead and make this all private to
kernel/irq/manage.c, cleanup plus micro-optimization.

1. rename exit_irq_thread() to irq_thread_dtor(), make it
   static, and move it up before irq_thread().

2. change irq_thread() to do task_work_add(irq_thread_dtor)
   at the start and task_work_cancel() before return.

   tracehook_notify_resume() can never play with kthreads,
   only do_exit()->exit_task_work() can call the callback
   and this is what we want.

3. remove task_struct->irq_thread and the special hook
   in do_exit().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: David Howells <dhowells@redhat.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alexander Gordeev <agordeev@redhat.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: David Smith <dsmith@redhat.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/interrupt.h |  4 ----
 include/linux/sched.h     | 10 ++--------
 2 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index c91171599cb6..e68a8e53bb59 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -142,8 +142,6 @@ request_any_context_irq(unsigned int irq, irq_handler_t handler,
 extern int __must_check
 request_percpu_irq(unsigned int irq, irq_handler_t handler,
 		   const char *devname, void __percpu *percpu_dev_id);
-
-extern void exit_irq_thread(void);
 #else
 
 extern int __must_check
@@ -177,8 +175,6 @@ request_percpu_irq(unsigned int irq, irq_handler_t handler,
 {
 	return request_irq(irq, handler, 0, devname, percpu_dev_id);
 }
-
-static inline void exit_irq_thread(void) { }
 #endif
 
 extern void free_irq(unsigned int, void *);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7930131abc1a..da013853a622 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1301,11 +1301,6 @@ struct task_struct {
 	unsigned sched_reset_on_fork:1;
 	unsigned sched_contributes_to_load:1;
 
-#ifdef CONFIG_GENERIC_HARDIRQS
-	/* IRQ handler threads */
-	unsigned irq_thread:1;
-#endif
-
 	pid_t pid;
 	pid_t tgid;
 
@@ -1313,10 +1308,9 @@ struct task_struct {
 	/* Canary value for the -fstack-protector gcc feature */
 	unsigned long stack_canary;
 #endif
-
-	/* 
+	/*
 	 * pointers to (original) parent process, youngest child, younger sibling,
-	 * older sibling, respectively.  (p->father can be replaced with 
+	 * older sibling, respectively.  (p->father can be replaced with
 	 * p->real_parent->pid)
 	 */
 	struct task_struct __rcu *real_parent; /* real parent process */
-- 
cgit v1.2.3


From 413cd3d9abeaef590e5ce00564f7a443165db238 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 11 May 2012 10:59:08 +1000
Subject: keys: change keyctl_session_to_parent() to use task_work_add()

Change keyctl_session_to_parent() to use task_work_add() and move
key_replace_session_keyring() logic into task_work->func().

Note that we do task_work_cancel() before task_work_add() to ensure that
only one work can be pending at any time.  This is important, we must not
allow user-space to abuse the parent's ->task_works list.

The callback, replace_session_keyring(), checks PF_EXITING.  I guess this
is not really needed but looks better.

As a side effect, this fixes the (unlikely) race.  The callers of
key_replace_session_keyring() and keyctl_session_to_parent() lack the
necessary barriers, the parent can miss the request.

Now we can remove task_struct->replacement_session_keyring and related
code.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alexander Gordeev <agordeev@redhat.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: David Smith <dsmith@redhat.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/key.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index 5231800770e1..2a0ee11584e9 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -33,6 +33,8 @@ typedef uint32_t key_perm_t;
 
 struct key;
 
+#define key_replace_session_keyring()	do { } while (0)
+
 #ifdef CONFIG_KEYS
 
 #undef KEY_DEBUGGING
@@ -308,9 +310,6 @@ static inline bool key_is_instantiated(const struct key *key)
 #ifdef CONFIG_SYSCTL
 extern ctl_table key_sysctls[];
 #endif
-
-extern void key_replace_session_keyring(void);
-
 /*
  * the userspace interface
  */
@@ -334,7 +333,6 @@ extern void key_init(void);
 #define key_fsuid_changed(t)		do { } while(0)
 #define key_fsgid_changed(t)		do { } while(0)
 #define key_init()			do { } while(0)
-#define key_replace_session_keyring()	do { } while(0)
 
 #endif /* CONFIG_KEYS */
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From dea649b8ac1861107c5d91e1a71121434fc64193 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 11 May 2012 10:59:09 +1000
Subject: keys: kill the dummy key_replace_session_keyring()

After the previouse change key_replace_session_keyring() becomes a nop.
Remove the dummy definition in key.h and update the callers in
arch/*/kernel/signal.c.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alexander Gordeev <agordeev@redhat.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: David Smith <dsmith@redhat.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/key.h       | 2 --
 include/linux/tracehook.h | 2 --
 2 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index 2a0ee11584e9..4cd22ed627ef 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -33,8 +33,6 @@ typedef uint32_t key_perm_t;
 
 struct key;
 
-#define key_replace_session_keyring()	do { } while (0)
-
 #ifdef CONFIG_KEYS
 
 #undef KEY_DEBUGGING
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index b2dd0917ca0d..6a4d82bedb03 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -186,8 +186,6 @@ static inline void set_notify_resume(struct task_struct *task)
  */
 static inline void tracehook_notify_resume(struct pt_regs *regs)
 {
-	if (current->replacement_session_keyring)
-		key_replace_session_keyring();
 	/*
 	 * The caller just cleared TIF_NOTIFY_RESUME. This barrier
 	 * pairs with task_work_add()->set_notify_resume() after
-- 
cgit v1.2.3


From f23ca335462e3c84f13270b9e65f83936068ec2c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 11 May 2012 10:59:09 +1000
Subject: keys: kill task_struct->replacement_session_keyring

Kill the no longer used task_struct->replacement_session_keyring, update
copy_creds() and exit_creds().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alexander Gordeev <agordeev@redhat.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: David Smith <dsmith@redhat.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/sched.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index da013853a622..17c6c929ee94 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1357,8 +1357,6 @@ struct task_struct {
 					 * credentials (COW) */
 	const struct cred __rcu *cred;	/* effective (overridable) subjective task
 					 * credentials (COW) */
-	struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */
-
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
 				     - access with [gs]et_task_comm (which lock
 				       it with task_lock())
-- 
cgit v1.2.3


From e5400321a6f15ce0fe77c8455954f213ef7dcc54 Mon Sep 17 00:00:00 2001
From: Magnus Damm <magnus.damm@gmail.com>
Date: Wed, 9 May 2012 23:39:34 +0900
Subject: clockevents: Make clockevents_config() a global symbol

Make clockevents_config() into a global symbol to allow it to be used
by compiled-in clockevent drivers. This is needed by drivers that want
to update the timer frequency after registration time.

Signed-off-by: Magnus Damm <damm@opensource.se>
Tested-by: Simon Horman <horms@verge.net.au>
Cc: arnd@arndb.de
Cc: johnstul@us.ibm.com
Cc: rjw@sisk.pl
Cc: lethal@linux-sh.org
Cc: gregkh@linuxfoundation.org
Cc: olof@lixom.net
Cc: Magnus Damm <magnus.damm@gmail.com>
Link: http://lkml.kernel.org/r/20120509143934.27521.46553.sendpatchset@w520
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clockchips.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 81e803e90aa4..acba894374a1 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -132,6 +132,7 @@ extern u64 clockevent_delta2ns(unsigned long latch,
 			       struct clock_event_device *evt);
 extern void clockevents_register_device(struct clock_event_device *dev);
 
+extern void clockevents_config(struct clock_event_device *dev, u32 freq);
 extern void clockevents_config_and_register(struct clock_event_device *dev,
 					    u32 freq, unsigned long min_delta,
 					    unsigned long max_delta);
-- 
cgit v1.2.3


From 01b5adcebb977bc61b64167adce6d8260c9da33c Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Sun, 27 May 2012 07:50:56 -0400
Subject: jbd2: Grab a reference to the crc32c driver if necessary

Obtain a reference to the crc32c driver if needed for the v2 checksum.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/jbd2.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 809c439066c5..71e77dddebf1 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -31,6 +31,7 @@
 #include <linux/mutex.h>
 #include <linux/timer.h>
 #include <linux/slab.h>
+#include <crypto/hash.h>
 #endif
 
 #define journal_oom_retry 1
@@ -965,6 +966,9 @@ struct journal_s
 	 * superblock pointer here
 	 */
 	void *j_private;
+
+	/* Reference to checksum algorithm driver via cryptoapi */
+	struct crypto_shash *j_chksum_driver;
 };
 
 /*
@@ -1294,6 +1298,25 @@ static inline int jbd_space_needed(journal_t *journal)
 
 extern int jbd_blocks_per_page(struct inode *inode);
 
+static inline u32 jbd2_chksum(journal_t *journal, u32 crc,
+			      const void *address, unsigned int length)
+{
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(journal->j_chksum_driver)];
+	} desc;
+	int err;
+
+	desc.shash.tfm = journal->j_chksum_driver;
+	desc.shash.flags = 0;
+	*(u32 *)desc.ctx = crc;
+
+	err = crypto_shash_update(&desc.shash, address, length);
+	BUG_ON(err);
+
+	return *(u32 *)desc.ctx;
+}
+
 #ifdef __KERNEL__
 
 #define buffer_trace_init(bh)	do {} while (0)
-- 
cgit v1.2.3


From 4fd5ea43bc11602bfabe2c8f5378586d34bd2b0a Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Sun, 27 May 2012 08:08:22 -0400
Subject: jbd2: checksum journal superblock

Calculate and verify a checksum covering the journal superblock.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/jbd2.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 71e77dddebf1..a9632bc55d97 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -969,6 +969,9 @@ struct journal_s
 
 	/* Reference to checksum algorithm driver via cryptoapi */
 	struct crypto_shash *j_chksum_driver;
+
+	/* Precomputed journal UUID checksum for seeding other checksums */
+	__u32 j_csum_seed;
 };
 
 /*
-- 
cgit v1.2.3


From e93376c20b70d1e62bb3246acd1bbe21fe58859f Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Sun, 27 May 2012 08:12:42 -0400
Subject: ext4/jbd2: add metadata checksumming to the list of supported
 features

Activate the metadata checksumming feature by adding it to ext4 and
jbd2's lists of supported features.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/jbd2.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index a9632bc55d97..f334c7fab967 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -296,7 +296,8 @@ typedef struct journal_superblock_s
 #define JBD2_KNOWN_ROCOMPAT_FEATURES	0
 #define JBD2_KNOWN_INCOMPAT_FEATURES	(JBD2_FEATURE_INCOMPAT_REVOKE | \
 					JBD2_FEATURE_INCOMPAT_64BIT | \
-					JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)
+					JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \
+					JBD2_FEATURE_INCOMPAT_CSUM_V2)
 
 #ifdef __KERNEL__
 
-- 
cgit v1.2.3


From 617c8c11236716dcbda877e764b7bf37c6fd8063 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Tue, 29 May 2012 03:35:08 +0000
Subject: skb: avoid unnecessary reallocations in __skb_cow

At the beginning of __skb_cow, headroom gets set to a minimum of
NET_SKB_PAD. This causes unnecessary reallocations if the buffer was not
cloned and the headroom is just below NET_SKB_PAD, but still more than the
amount requested by the caller.
This was showing up frequently in my tests on VLAN tx, where
vlan_insert_tag calls skb_cow_head(skb, VLAN_HLEN).

Locally generated packets should have enough headroom, and for forward
paths, we already have NET_SKB_PAD bytes of headroom, so we don't need to
add any extra space here.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0e501714d47f..b534a1be540a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1896,8 +1896,6 @@ static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom,
 {
 	int delta = 0;
 
-	if (headroom < NET_SKB_PAD)
-		headroom = NET_SKB_PAD;
 	if (headroom > skb_headroom(skb))
 		delta = headroom - skb_headroom(skb);
 
-- 
cgit v1.2.3


From 2033e9bf06f07e049bbc77e9452856df846714cc Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 29 May 2012 09:30:40 +0000
Subject: net: add MODULE_ALIAS_NET_PF_PROTO_NAME

The MODULE_ALAIS_NET_PF macro set is missing a variant that allows for the
appending of an arbitrary string to the net-pf-<x>-proto-<y> base.  while
MODULE_ALIAS_NET_PF_PROTO_NAME_TYPE allows an appending of a numerical type, we
need to be able to append a generic string to support generic netlink families
that have neither a fix numberical protocol nor type number

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: David Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 2d7510f38934..e9ac2df079ba 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -313,5 +313,8 @@ extern int kernel_sock_shutdown(struct socket *sock,
 	MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \
 		     "-type-" __stringify(type))
 
+#define MODULE_ALIAS_NET_PF_PROTO_NAME(pf, proto, name) \
+	MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \
+		     name)
 #endif /* __KERNEL__ */
 #endif	/* _LINUX_NET_H */
-- 
cgit v1.2.3


From e9412c37082b5c932e83364aaed0c38c2ce33acb Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 29 May 2012 09:30:41 +0000
Subject: genetlink: Build a generic netlink family module alias

Generic netlink searches for -type- formatted aliases when requesting a module to
fulfill a protocol request (i.e. net-pf-16-proto-16-type-<x>, where x is a type
value).  However generic netlink protocols have no well defined type numbers,
they have string names.  Modify genl_ctrl_getfamily to request an alias in the
format net-pf-16-proto-16-family-<x> instead, where x is a generic string, and
add a macro that builds on the previously added MODULE_ALIAS_NET_PF_PROTO_NAME
macro to allow modules to specifify those generic strings.

Note, l2tp previously hacked together an net-pf-16-proto-16-type-l2tp alias
using the MODULE_ALIAS macro, with these updates we can convert that to use the
PROTO_NAME macro.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: James Chapman <jchapman@katalix.com>
CC: David Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/genetlink.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h
index 73c28dea10ae..7a114016ac7d 100644
--- a/include/linux/genetlink.h
+++ b/include/linux/genetlink.h
@@ -110,6 +110,9 @@ extern int lockdep_genl_is_held(void);
 #define genl_dereference(p)					\
 	rcu_dereference_protected(p, lockdep_genl_is_held())
 
+#define MODULE_ALIAS_GENL_FAMILY(family)\
+ MODULE_ALIAS_NET_PF_PROTO_NAME(PF_NETLINK, NETLINK_GENERIC, "-family-" family)
+
 #endif /* __KERNEL__ */
 
 #endif	/* __LINUX_GENERIC_NETLINK_H */
-- 
cgit v1.2.3


From b0b0382bb4904965a9e9fca77ad87514dfda0d1c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 2 Apr 2012 14:34:06 -0400
Subject: ->encode_fh() API change

pass inode + parent's inode or NULL instead of dentry + bool saying
whether we want the parent or not.

NOTE: that needs ceph fix folded in.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/exportfs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 3a4cef5322dc..12291a7ee275 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -165,8 +165,8 @@ struct fid {
  */
 
 struct export_operations {
-	int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len,
-			int connectable);
+	int (*encode_fh)(struct inode *inode, __u32 *fh, int *max_len,
+			struct inode *parent);
 	struct dentry * (*fh_to_dentry)(struct super_block *sb, struct fid *fid,
 			int fh_len, int fh_type);
 	struct dentry * (*fh_to_parent)(struct super_block *sb, struct fid *fid,
-- 
cgit v1.2.3


From 9dd6fa03ab31bb57cee4623a689d058d222fbe68 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 8 May 2012 13:29:45 +0930
Subject: lglock: remove online variants of lock

Optimizing the slow paths adds a lot of complexity.  If you need to
grab every lock often, you have other problems.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Nick Piggin <npiggin@kernel.dk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/lglock.h | 58 ++------------------------------------------------
 1 file changed, 2 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index 87f402ccec55..0fdd821e77b7 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -28,8 +28,8 @@
 #define br_lock_init(name)	name##_lock_init()
 #define br_read_lock(name)	name##_local_lock()
 #define br_read_unlock(name)	name##_local_unlock()
-#define br_write_lock(name)	name##_global_lock_online()
-#define br_write_unlock(name)	name##_global_unlock_online()
+#define br_write_lock(name)	name##_global_lock()
+#define br_write_unlock(name)	name##_global_unlock()
 
 #define DECLARE_BRLOCK(name)	DECLARE_LGLOCK(name)
 #define DEFINE_BRLOCK(name)	DEFINE_LGLOCK(name)
@@ -42,8 +42,6 @@
 #define lg_local_unlock_cpu(name, cpu)	name##_local_unlock_cpu(cpu)
 #define lg_global_lock(name)	name##_global_lock()
 #define lg_global_unlock(name)	name##_global_unlock()
-#define lg_global_lock_online(name) name##_global_lock_online()
-#define lg_global_unlock_online(name) name##_global_unlock_online()
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 #define LOCKDEP_INIT_MAP lockdep_init_map
@@ -68,36 +66,13 @@
  extern void name##_local_unlock_cpu(int cpu);				\
  extern void name##_global_lock(void);					\
  extern void name##_global_unlock(void);				\
- extern void name##_global_lock_online(void);				\
- extern void name##_global_unlock_online(void);				\
 
 #define DEFINE_LGLOCK(name)						\
 									\
  DEFINE_SPINLOCK(name##_cpu_lock);					\
- cpumask_t name##_cpus __read_mostly;					\
  DEFINE_PER_CPU(arch_spinlock_t, name##_lock);				\
  DEFINE_LGLOCK_LOCKDEP(name);						\
 									\
- static int								\
- name##_lg_cpu_callback(struct notifier_block *nb,			\
-				unsigned long action, void *hcpu)	\
- {									\
-	switch (action & ~CPU_TASKS_FROZEN) {				\
-	case CPU_UP_PREPARE:						\
-		spin_lock(&name##_cpu_lock);				\
-		cpu_set((unsigned long)hcpu, name##_cpus);		\
-		spin_unlock(&name##_cpu_lock);				\
-		break;							\
-	case CPU_UP_CANCELED: case CPU_DEAD:				\
-		spin_lock(&name##_cpu_lock);				\
-		cpu_clear((unsigned long)hcpu, name##_cpus);		\
-		spin_unlock(&name##_cpu_lock);				\
-	}								\
-	return NOTIFY_OK;						\
- }									\
- static struct notifier_block name##_lg_cpu_notifier = {		\
-	.notifier_call = name##_lg_cpu_callback,			\
- };									\
  void name##_lock_init(void) {						\
 	int i;								\
 	LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \
@@ -106,11 +81,6 @@
 		lock = &per_cpu(name##_lock, i);			\
 		*lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;	\
 	}								\
-	register_hotcpu_notifier(&name##_lg_cpu_notifier);		\
-	get_online_cpus();						\
-	for_each_online_cpu(i)						\
-		cpu_set(i, name##_cpus);				\
-	put_online_cpus();						\
  }									\
  EXPORT_SYMBOL(name##_lock_init);					\
 									\
@@ -150,30 +120,6 @@
  }									\
  EXPORT_SYMBOL(name##_local_unlock_cpu);				\
 									\
- void name##_global_lock_online(void) {					\
-	int i;								\
-	spin_lock(&name##_cpu_lock);					\
-	rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_);		\
-	for_each_cpu(i, &name##_cpus) {					\
-		arch_spinlock_t *lock;					\
-		lock = &per_cpu(name##_lock, i);			\
-		arch_spin_lock(lock);					\
-	}								\
- }									\
- EXPORT_SYMBOL(name##_global_lock_online);				\
-									\
- void name##_global_unlock_online(void) {				\
-	int i;								\
-	rwlock_release(&name##_lock_dep_map, 1, _RET_IP_);		\
-	for_each_cpu(i, &name##_cpus) {					\
-		arch_spinlock_t *lock;					\
-		lock = &per_cpu(name##_lock, i);			\
-		arch_spin_unlock(lock);					\
-	}								\
-	spin_unlock(&name##_cpu_lock);					\
- }									\
- EXPORT_SYMBOL(name##_global_unlock_online);				\
-									\
  void name##_global_lock(void) {					\
 	int i;								\
 	preempt_disable();						\
-- 
cgit v1.2.3


From eea62f831b8030b0eeea8314eed73b6132d1de26 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 8 May 2012 13:32:24 +0930
Subject: brlocks/lglocks: turn into functions

lglocks and brlocks are currently generated with some complicated macros
in lglock.h.  But there's no reason to not just use common utility
functions and put all the data into a common data structure.

Since there are at least two users it makes sense to share this code in a
library.  This is also easier maintainable than a macro forest.

This will also make it later possible to dynamically allocate lglocks and
also use them in modules (this would both still need some additional, but
now straightforward, code)

[akpm@linux-foundation.org: checkpatch fixes]
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/lglock.h | 125 ++++++++++---------------------------------------
 1 file changed, 26 insertions(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index 0fdd821e77b7..f01e5f6d1f07 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -23,26 +23,17 @@
 #include <linux/lockdep.h>
 #include <linux/percpu.h>
 #include <linux/cpu.h>
+#include <linux/notifier.h>
 
 /* can make br locks by using local lock for read side, global lock for write */
-#define br_lock_init(name)	name##_lock_init()
-#define br_read_lock(name)	name##_local_lock()
-#define br_read_unlock(name)	name##_local_unlock()
-#define br_write_lock(name)	name##_global_lock()
-#define br_write_unlock(name)	name##_global_unlock()
+#define br_lock_init(name)	lg_lock_init(name, #name)
+#define br_read_lock(name)	lg_local_lock(name)
+#define br_read_unlock(name)	lg_local_unlock(name)
+#define br_write_lock(name)	lg_global_lock(name)
+#define br_write_unlock(name)	lg_global_unlock(name)
 
-#define DECLARE_BRLOCK(name)	DECLARE_LGLOCK(name)
 #define DEFINE_BRLOCK(name)	DEFINE_LGLOCK(name)
 
-
-#define lg_lock_init(name)	name##_lock_init()
-#define lg_local_lock(name)	name##_local_lock()
-#define lg_local_unlock(name)	name##_local_unlock()
-#define lg_local_lock_cpu(name, cpu)	name##_local_lock_cpu(cpu)
-#define lg_local_unlock_cpu(name, cpu)	name##_local_unlock_cpu(cpu)
-#define lg_global_lock(name)	name##_global_lock()
-#define lg_global_unlock(name)	name##_global_unlock()
-
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 #define LOCKDEP_INIT_MAP lockdep_init_map
 
@@ -57,90 +48,26 @@
 #define DEFINE_LGLOCK_LOCKDEP(name)
 #endif
 
-
-#define DECLARE_LGLOCK(name)						\
- extern void name##_lock_init(void);					\
- extern void name##_local_lock(void);					\
- extern void name##_local_unlock(void);					\
- extern void name##_local_lock_cpu(int cpu);				\
- extern void name##_local_unlock_cpu(int cpu);				\
- extern void name##_global_lock(void);					\
- extern void name##_global_unlock(void);				\
+struct lglock {
+	arch_spinlock_t __percpu *lock;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lock_class_key lock_key;
+	struct lockdep_map    lock_dep_map;
+#endif
+};
 
 #define DEFINE_LGLOCK(name)						\
-									\
- DEFINE_SPINLOCK(name##_cpu_lock);					\
- DEFINE_PER_CPU(arch_spinlock_t, name##_lock);				\
- DEFINE_LGLOCK_LOCKDEP(name);						\
-									\
- void name##_lock_init(void) {						\
-	int i;								\
-	LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \
-	for_each_possible_cpu(i) {					\
-		arch_spinlock_t *lock;					\
-		lock = &per_cpu(name##_lock, i);			\
-		*lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;	\
-	}								\
- }									\
- EXPORT_SYMBOL(name##_lock_init);					\
-									\
- void name##_local_lock(void) {						\
-	arch_spinlock_t *lock;						\
-	preempt_disable();						\
-	rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_);	\
-	lock = &__get_cpu_var(name##_lock);				\
-	arch_spin_lock(lock);						\
- }									\
- EXPORT_SYMBOL(name##_local_lock);					\
-									\
- void name##_local_unlock(void) {					\
-	arch_spinlock_t *lock;						\
-	rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_);		\
-	lock = &__get_cpu_var(name##_lock);				\
-	arch_spin_unlock(lock);						\
-	preempt_enable();						\
- }									\
- EXPORT_SYMBOL(name##_local_unlock);					\
-									\
- void name##_local_lock_cpu(int cpu) {					\
-	arch_spinlock_t *lock;						\
-	preempt_disable();						\
-	rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_);	\
-	lock = &per_cpu(name##_lock, cpu);				\
-	arch_spin_lock(lock);						\
- }									\
- EXPORT_SYMBOL(name##_local_lock_cpu);					\
-									\
- void name##_local_unlock_cpu(int cpu) {				\
-	arch_spinlock_t *lock;						\
-	rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_);		\
-	lock = &per_cpu(name##_lock, cpu);				\
-	arch_spin_unlock(lock);						\
-	preempt_enable();						\
- }									\
- EXPORT_SYMBOL(name##_local_unlock_cpu);				\
-									\
- void name##_global_lock(void) {					\
-	int i;								\
-	preempt_disable();						\
-	rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_);		\
-	for_each_possible_cpu(i) {					\
-		arch_spinlock_t *lock;					\
-		lock = &per_cpu(name##_lock, i);			\
-		arch_spin_lock(lock);					\
-	}								\
- }									\
- EXPORT_SYMBOL(name##_global_lock);					\
-									\
- void name##_global_unlock(void) {					\
-	int i;								\
-	rwlock_release(&name##_lock_dep_map, 1, _RET_IP_);		\
-	for_each_possible_cpu(i) {					\
-		arch_spinlock_t *lock;					\
-		lock = &per_cpu(name##_lock, i);			\
-		arch_spin_unlock(lock);					\
-	}								\
-	preempt_enable();						\
- }									\
- EXPORT_SYMBOL(name##_global_unlock);
+	DEFINE_LGLOCK_LOCKDEP(name);					\
+	DEFINE_PER_CPU(arch_spinlock_t, name ## _lock)			\
+	= __ARCH_SPIN_LOCK_UNLOCKED;					\
+	struct lglock name = { .lock = &name ## _lock }
+
+void lg_lock_init(struct lglock *lg, char *name);
+void lg_local_lock(struct lglock *lg);
+void lg_local_unlock(struct lglock *lg);
+void lg_local_lock_cpu(struct lglock *lg, int cpu);
+void lg_local_unlock_cpu(struct lglock *lg, int cpu);
+void lg_global_lock(struct lglock *lg);
+void lg_global_unlock(struct lglock *lg);
+
 #endif
-- 
cgit v1.2.3


From 5aaa0b7a2ed5b12692c9ffb5222182bd558d3146 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 17 May 2012 17:15:29 +0200
Subject: sched/nohz: Fix rq->cpu_load calculations some more

Follow up on commit 556061b00 ("sched/nohz: Fix rq->cpu_load[]
calculations") since while that fixed the busy case it regressed the
mostly idle case.

Add a callback from the nohz exit to also age the rq->cpu_load[]
array. This closes the hole where either there was no nohz load
balance pass during the nohz, or there was a 'significant' amount of
idle time between the last nohz balance and the nohz exit.

So we'll update unconditionally from the tick to not insert any
accidental 0 load periods while busy, and we try and catch up from
nohz idle balance and nohz exit. Both these are still prone to missing
a jiffy, but that has always been the case.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: pjt@google.com
Cc: Venkatesh Pallipadi <venki@google.com>
Link: http://lkml.kernel.org/n/tip-kt0trz0apodbf84ucjfdbr1a@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f45c0b280b5d..d61e5977e517 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -145,6 +145,7 @@ extern unsigned long this_cpu_load(void);
 
 
 extern void calc_global_load(unsigned long ticks);
+extern void update_cpu_load_nohz(void);
 
 extern unsigned long get_parent_ip(unsigned long addr);
 
-- 
cgit v1.2.3


From 29baa7478ba47d746e3625c91d3b2afbf46b4312 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 23 Apr 2012 12:11:21 +0200
Subject: sched: Move nr_cpus_allowed out of 'struct sched_rt_entity'

Since nr_cpus_allowed is used outside of sched/rt.c and wants to be
used outside of there more, move it to a more natural site.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-kr61f02y9brwzkh6x53pdptm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/init_task.h | 2 +-
 include/linux/sched.h     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index e4baff5f7ff4..9e65eff6af3b 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -149,6 +149,7 @@ extern struct cred init_cred;
 	.normal_prio	= MAX_PRIO-20,					\
 	.policy		= SCHED_NORMAL,					\
 	.cpus_allowed	= CPU_MASK_ALL,					\
+	.nr_cpus_allowed= NR_CPUS,					\
 	.mm		= NULL,						\
 	.active_mm	= &init_mm,					\
 	.se		= {						\
@@ -157,7 +158,6 @@ extern struct cred init_cred;
 	.rt		= {						\
 		.run_list	= LIST_HEAD_INIT(tsk.rt.run_list),	\
 		.time_slice	= RR_TIMESLICE,				\
-		.nr_cpus_allowed = NR_CPUS,				\
 	},								\
 	.tasks		= LIST_HEAD_INIT(tsk.tasks),			\
 	INIT_PUSHABLE_TASKS(tsk)					\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d61e5977e517..0f50e78f7f44 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1188,7 +1188,6 @@ struct sched_rt_entity {
 	struct list_head run_list;
 	unsigned long timeout;
 	unsigned int time_slice;
-	int nr_cpus_allowed;
 
 	struct sched_rt_entity *back;
 #ifdef CONFIG_RT_GROUP_SCHED
@@ -1253,6 +1252,7 @@ struct task_struct {
 #endif
 
 	unsigned int policy;
+	int nr_cpus_allowed;
 	cpumask_t cpus_allowed;
 
 #ifdef CONFIG_PREEMPT_RCU
-- 
cgit v1.2.3


From 0053ea9c34a41865ec89ffbf3d3033f9a503bccc Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 30 May 2012 07:43:34 +0000
Subject: netdevice: Update netif_dbg for CONFIG_DYNAMIC_DEBUG

Make netif_dbg use dynamic debugging whenever
CONFIG_DYNAMIC_DEBUG is enabled.

commit b558c96ffa53
("dynamic_debug: make dynamic-debug supersede DEBUG ccflag")
missed updating the netif_dbg variant.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e7fd468f7126..d94cb1431519 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2795,15 +2795,15 @@ do {								\
 #define netif_info(priv, type, dev, fmt, args...)		\
 	netif_level(info, priv, type, dev, fmt, ##args)
 
-#if defined(DEBUG)
-#define netif_dbg(priv, type, dev, format, args...)		\
-	netif_printk(priv, type, KERN_DEBUG, dev, format, ##args)
-#elif defined(CONFIG_DYNAMIC_DEBUG)
+#if defined(CONFIG_DYNAMIC_DEBUG)
 #define netif_dbg(priv, type, netdev, format, args...)		\
 do {								\
 	if (netif_msg_##type(priv))				\
 		dynamic_netdev_dbg(netdev, format, ##args);	\
 } while (0)
+#elif defined(DEBUG)
+#define netif_dbg(priv, type, dev, format, args...)		\
+	netif_printk(priv, type, KERN_DEBUG, dev, format, ##args)
 #else
 #define netif_dbg(priv, type, dev, format, args...)			\
 ({									\
-- 
cgit v1.2.3


From bb8ac181a5cf50458a0d83b4460790badc9fdc16 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 19 May 2012 10:25:23 -0400
Subject: bury __kernel_nlink_t, make internal nlink_t consistent

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/types.h b/include/linux/types.h
index 7f480db60231..9c1bd539ea70 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -25,7 +25,7 @@ typedef __kernel_dev_t		dev_t;
 typedef __kernel_ino_t		ino_t;
 typedef __kernel_mode_t		mode_t;
 typedef unsigned short		umode_t;
-typedef __kernel_nlink_t	nlink_t;
+typedef __u32			nlink_t;
 typedef __kernel_off_t		off_t;
 typedef __kernel_pid_t		pid_t;
 typedef __kernel_daddr_t	daddr_t;
-- 
cgit v1.2.3


From a4f9a9a635e4d54ac93df4b861ed8792e17bd4a2 Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naota@elisp.net>
Date: Tue, 29 May 2012 11:02:24 -0700
Subject: fsnotify: handle subfiles' perm events

Recently I'm working on fanotify and found the following strange
behaviors.

I wrote a program to set fanotify_mark on "/tmp/block" and FAN_DENY
all events notified.

fanotify_mask = FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD:
$ cd /tmp/block; cat foo
cat: foo: Operation not permitted

Operation on the file is blocked as expected.

But,

fanotify_mask = FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD:
$ cd /tmp/block; cat foo
aaa

It's not blocked anymore.  This is confusing behavior.  Also reading
commit "fsnotify: call fsnotify_parent in perm events", it seems like
fsnotify should handle subfiles' perm events as well as the other notify
events.

With this patch, regardless of FAN_ALL_EVENTS set or not:
$ cd /tmp/block; cat foo
cat: foo: Operation not permitted

Operation on the file is now blocked properly.

FS_OPEN_PERM and FS_ACCESS_PERM are not listed on FS_EVENTS_POSS_ON_CHILD.
 Due to fsnotify_inode_watches_children() check, if you only specify only
these events as fsnotify_mask, you don't get subfiles' perm events
notified.

This patch add the events to FS_EVENTS_POSS_ON_CHILD to get them notified
even if only these events are specified to fsnotify_mask.

Signed-off-by: Naohiro Aota <naota@elisp.net>
Cc: Eric Paris <eparis@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fsnotify_backend.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 91d0e0a34ef3..63d966d5c2ea 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -60,7 +60,7 @@
 #define FS_EVENTS_POSS_ON_CHILD   (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\
 				   FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\
 				   FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\
-				   FS_DELETE)
+				   FS_DELETE | FS_OPEN_PERM | FS_ACCESS_PERM)
 
 #define FS_MOVE			(FS_MOVED_FROM | FS_MOVED_TO)
 
-- 
cgit v1.2.3


From 114067b69e7b2c691faace0e33db2f04096f668d Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Thu, 31 May 2012 14:43:27 +0900
Subject: perf tools: Check if callchain is corrupted

We faced segmentation fault on perf top -G at very high sampling rate
due to a corrupted callchain. While the root cause was not revealed (I
failed to figure it out), this patch tries to protect us from the
segfault on such cases.

Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Namhyung Kim <namhyung.kim@lge.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sunjin Yang <fan4326@gmail.com>
Link: http://lkml.kernel.org/r/1338443007-24857-2-git-send-email-namhyung.kim@lge.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/perf_event.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f32578634d9d..1817d4015e5f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -555,6 +555,8 @@ enum perf_event_type {
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
+#define PERF_MAX_STACK_DEPTH		255
+
 enum perf_callchain_context {
 	PERF_CONTEXT_HV			= (__u64)-32,
 	PERF_CONTEXT_KERNEL		= (__u64)-128,
@@ -609,8 +611,6 @@ struct perf_guest_info_callbacks {
 #include <linux/sysfs.h>
 #include <asm/local.h>
 
-#define PERF_MAX_STACK_DEPTH		255
-
 struct perf_callchain_entry {
 	__u64				nr;
 	__u64				ip[PERF_MAX_STACK_DEPTH];
-- 
cgit v1.2.3


From d007794a182bc072a7b7479909dbd0d67ba341be Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 30 May 2012 13:11:37 -0400
Subject: split cap_mmap_addr() out of cap_file_mmap()

... switch callers.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/security.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index ab0e091ce5fa..4ad59c9fa731 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -86,6 +86,7 @@ extern int cap_inode_setxattr(struct dentry *dentry, const char *name,
 extern int cap_inode_removexattr(struct dentry *dentry, const char *name);
 extern int cap_inode_need_killpriv(struct dentry *dentry);
 extern int cap_inode_killpriv(struct dentry *dentry);
+extern int cap_mmap_addr(unsigned long addr);
 extern int cap_file_mmap(struct file *file, unsigned long reqprot,
 			 unsigned long prot, unsigned long flags,
 			 unsigned long addr, unsigned long addr_only);
@@ -2187,7 +2188,7 @@ static inline int security_file_mmap(struct file *file, unsigned long reqprot,
 				     unsigned long addr,
 				     unsigned long addr_only)
 {
-	return cap_file_mmap(file, reqprot, prot, flags, addr, addr_only);
+	return cap_mmap_addr(addr);
 }
 
 static inline int security_file_mprotect(struct vm_area_struct *vma,
-- 
cgit v1.2.3


From e5467859f7f79b69fc49004403009dfdba3bec53 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 30 May 2012 13:30:51 -0400
Subject: split ->file_mmap() into ->mmap_addr()/->mmap_file()

... i.e. file-dependent and address-dependent checks.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/security.h | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 4ad59c9fa731..f1bae0963ddc 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -87,9 +87,8 @@ extern int cap_inode_removexattr(struct dentry *dentry, const char *name);
 extern int cap_inode_need_killpriv(struct dentry *dentry);
 extern int cap_inode_killpriv(struct dentry *dentry);
 extern int cap_mmap_addr(unsigned long addr);
-extern int cap_file_mmap(struct file *file, unsigned long reqprot,
-			 unsigned long prot, unsigned long flags,
-			 unsigned long addr, unsigned long addr_only);
+extern int cap_mmap_file(struct file *file, unsigned long reqprot,
+			 unsigned long prot, unsigned long flags);
 extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags);
 extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 			  unsigned long arg4, unsigned long arg5);
@@ -587,15 +586,17 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	simple integer value.  When @arg represents a user space pointer, it
  *	should never be used by the security module.
  *	Return 0 if permission is granted.
- * @file_mmap :
+ * @mmap_addr :
+ *	Check permissions for a mmap operation at @addr.
+ *	@addr contains virtual address that will be used for the operation.
+ *	Return 0 if permission is granted.
+ * @mmap_file :
  *	Check permissions for a mmap operation.  The @file may be NULL, e.g.
  *	if mapping anonymous memory.
  *	@file contains the file structure for file to map (may be NULL).
  *	@reqprot contains the protection requested by the application.
  *	@prot contains the protection that will be applied by the kernel.
  *	@flags contains the operational flags.
- *	@addr contains virtual address that will be used for the operation.
- *	@addr_only contains a boolean: 0 if file-backed VMA, otherwise 1.
  *	Return 0 if permission is granted.
  * @file_mprotect:
  *	Check permissions before changing memory access permissions.
@@ -1482,10 +1483,10 @@ struct security_operations {
 	void (*file_free_security) (struct file *file);
 	int (*file_ioctl) (struct file *file, unsigned int cmd,
 			   unsigned long arg);
-	int (*file_mmap) (struct file *file,
+	int (*mmap_addr) (unsigned long addr);
+	int (*mmap_file) (struct file *file,
 			  unsigned long reqprot, unsigned long prot,
-			  unsigned long flags, unsigned long addr,
-			  unsigned long addr_only);
+			  unsigned long flags);
 	int (*file_mprotect) (struct vm_area_struct *vma,
 			      unsigned long reqprot,
 			      unsigned long prot);
@@ -1744,9 +1745,9 @@ int security_file_permission(struct file *file, int mask);
 int security_file_alloc(struct file *file);
 void security_file_free(struct file *file);
 int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
-int security_file_mmap(struct file *file, unsigned long reqprot,
-			unsigned long prot, unsigned long flags,
-			unsigned long addr, unsigned long addr_only);
+int security_mmap_file(struct file *file, unsigned long reqprot,
+			unsigned long prot, unsigned long flags);
+int security_mmap_addr(unsigned long addr);
 int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
 			   unsigned long prot);
 int security_file_lock(struct file *file, unsigned int cmd);
@@ -2182,11 +2183,14 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd,
 	return 0;
 }
 
-static inline int security_file_mmap(struct file *file, unsigned long reqprot,
+static inline int security_mmap_file(struct file *file, unsigned long reqprot,
 				     unsigned long prot,
-				     unsigned long flags,
-				     unsigned long addr,
-				     unsigned long addr_only)
+				     unsigned long flags)
+{
+	return 0;
+}
+
+static inline int security_mmap_addr(unsigned long addr)
 {
 	return cap_mmap_addr(addr);
 }
-- 
cgit v1.2.3


From 1d59d61f606547f0712aa6971f91f71154071c99 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 31 May 2012 12:22:33 -0400
Subject: NFS: Ensure that setattr and getattr wait for O_DIRECT write
 completion

Use the same mechanism as the block devices are using, but move the
helper functions from fs/direct-io.c into fs/inode.c to remove the
dependency on CONFIG_BLOCK.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 038076b27ea4..598a5892ff2b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2453,8 +2453,6 @@ enum {
 };
 
 void dio_end_io(struct bio *bio, int error);
-void inode_dio_wait(struct inode *inode);
-void inode_dio_done(struct inode *inode);
 
 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct block_device *bdev, const struct iovec *iov, loff_t offset,
@@ -2469,12 +2467,11 @@ static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
 				    offset, nr_segs, get_block, NULL, NULL,
 				    DIO_LOCKING | DIO_SKIP_HOLES);
 }
-#else
-static inline void inode_dio_wait(struct inode *inode)
-{
-}
 #endif
 
+void inode_dio_wait(struct inode *inode);
+void inode_dio_done(struct inode *inode);
+
 extern const struct file_operations generic_ro_fops;
 
 #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
-- 
cgit v1.2.3


From 3fc929e2d693185aac2686e5e64e24eae10642a4 Mon Sep 17 00:00:00 2001
From: Marcel Apfelbaum <marcela@dev.mellanox.co.il>
Date: Wed, 30 May 2012 09:14:51 +0000
Subject: net/mlx4_core: Fix number of EQs used in ICM initialisation

In SRIOV mode, the number of EQs used when computing the total ICM size
was incorrect.

To fix this, we do the following:
1. We add a new structure to mlx4_dev, mlx4_phys_caps, to contain physical HCA
   capabilities.  The PPF uses the phys capabilities when it computes things
   like ICM size.

   The dev_caps structure will then contain the paravirtualized values, making
   bookkeeping much easier in SRIOV mode. We add a structure rather than a
   single parameter because there will be other fields in the phys_caps.

   The first field we add to the mlx4_phys_caps structure is num_phys_eqs.

2. In INIT_HCA, when running in SRIOV mode, the "log_num_eqs" parameter
   passed to the FW is the number of EQs per VF/PF; each function (PF or VF)
   has this number of EQs available.

   However, the total number of EQs which must be allowed for in the ICM is
   (1 << log_num_eqs) * (#VFs + #PFs).  Rather than compute this quantity,
   we allocate ICM space for 1024 EQs (which is the device maximum
   number of EQs, and which is the value we place in the mlx4_phys_caps structure).

   For INIT_HCA, however, we use the per-function number of EQs as described
   above.

Signed-off-by: Marcel Apfelbaum <marcela@dev.mellanox.co.il>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/device.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 6e27fa99e8b9..6a8f002b8ed3 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -64,6 +64,7 @@ enum {
 	MLX4_MAX_NUM_PF		= 16,
 	MLX4_MAX_NUM_VF		= 64,
 	MLX4_MFUNC_MAX		= 80,
+	MLX4_MAX_EQ_NUM		= 1024,
 	MLX4_MFUNC_EQ_NUM	= 4,
 	MLX4_MFUNC_MAX_EQES     = 8,
 	MLX4_MFUNC_EQE_MASK     = (MLX4_MFUNC_MAX_EQES - 1)
@@ -239,6 +240,10 @@ static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor)
 	return (major << 32) | (minor << 16) | subminor;
 }
 
+struct mlx4_phys_caps {
+	u32			num_phys_eqs;
+};
+
 struct mlx4_caps {
 	u64			fw_ver;
 	u32			function;
@@ -499,6 +504,7 @@ struct mlx4_dev {
 	unsigned long		flags;
 	unsigned long		num_slaves;
 	struct mlx4_caps	caps;
+	struct mlx4_phys_caps	phys_caps;
 	struct radix_tree_root	qp_table_tree;
 	u8			rev_id;
 	char			board_id[MLX4_BOARD_ID_LEN];
-- 
cgit v1.2.3


From 9793f7c88937e7ac07305ab1af1a519225836823 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 2 May 2012 16:08:38 +0400
Subject: SUNRPC: new svc_bind() routine introduced

This new routine is responsible for service registration in a specified
network context.

The idea is to separate service creation from per-net operations.

Note also: since registering service with svc_bind() can fail, the
service will be destroyed and during destruction it will try to
unregister itself from rpcbind. In this case unregistration has to be
skipped.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 51b29ac45a8e..2b43e0214261 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -416,6 +416,7 @@ struct svc_procedure {
  */
 int svc_rpcb_setup(struct svc_serv *serv, struct net *net);
 void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net);
+int svc_bind(struct svc_serv *serv, struct net *net);
 struct svc_serv *svc_create(struct svc_program *, unsigned int,
 			    void (*shutdown)(struct svc_serv *, struct net *net));
 struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
-- 
cgit v1.2.3


From 03a4e1f6ddf25f48848e1bddcffc0ad489648331 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 14 May 2012 19:55:22 -0400
Subject: nfsd4: move principal name into svc_cred

Instead of keeping the principal name associated with a request in a
structure that's private to auth_gss and using an accessor function,
move it to svc_cred.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svcauth.h     | 9 +++++++++
 include/linux/sunrpc/svcauth_gss.h | 1 -
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 2c54683b91de..16fe477a96e0 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -15,13 +15,22 @@
 #include <linux/sunrpc/msg_prot.h>
 #include <linux/sunrpc/cache.h>
 #include <linux/hash.h>
+#include <linux/cred.h>
 
 struct svc_cred {
 	uid_t			cr_uid;
 	gid_t			cr_gid;
 	struct group_info	*cr_group_info;
+	char			*cr_principal; /* for gss */
 };
 
+static inline void free_svc_cred(struct svc_cred *cred)
+{
+	if (cred->cr_group_info)
+		put_group_info(cred->cr_group_info);
+	kfree(cred->cr_principal);
+}
+
 struct svc_rqst;		/* forward decl */
 struct in6_addr;
 
diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h
index 7c32daa025eb..726aff1a5201 100644
--- a/include/linux/sunrpc/svcauth_gss.h
+++ b/include/linux/sunrpc/svcauth_gss.h
@@ -22,7 +22,6 @@ int gss_svc_init_net(struct net *net);
 void gss_svc_shutdown_net(struct net *net);
 int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name);
 u32 svcauth_gss_flavor(struct auth_domain *dom);
-char *svc_gss_principal(struct svc_rqst *);
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */
-- 
cgit v1.2.3


From d5497fc693a446ce9100fcf4117c3f795ddfd0d2 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 14 May 2012 22:06:49 -0400
Subject: nfsd4: move rq_flavor into svc_cred

Move the rq_flavor into struct svc_cred, and use it in setclientid and
exchange_id comparisons as well.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h     | 1 -
 include/linux/sunrpc/svcauth.h | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 2b43e0214261..40e0a273faea 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -232,7 +232,6 @@ struct svc_rqst {
 	struct svc_pool *	rq_pool;	/* thread pool */
 	struct svc_procedure *	rq_procinfo;	/* procedure info */
 	struct auth_ops *	rq_authop;	/* authentication flavour */
-	u32			rq_flavor;	/* pseudoflavor */
 	struct svc_cred		rq_cred;	/* auth info */
 	void *			rq_xprt_ctxt;	/* transport specific context ptr */
 	struct svc_deferred_req*rq_deferred;	/* deferred request we are replaying */
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 16fe477a96e0..dd74084a9799 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -21,6 +21,7 @@ struct svc_cred {
 	uid_t			cr_uid;
 	gid_t			cr_gid;
 	struct group_info	*cr_group_info;
+	u32			cr_flavor; /* pseudoflavor */
 	char			*cr_principal; /* for gss */
 };
 
-- 
cgit v1.2.3


From a3860c1c5dd1137db23d7786d284939c5761d517 Mon Sep 17 00:00:00 2001
From: Xi Wang <xi.wang@gmail.com>
Date: Thu, 31 May 2012 16:26:04 -0700
Subject: introduce SIZE_MAX

ULONG_MAX is often used to check for integer overflow when calculating
allocation size.  While ULONG_MAX happens to work on most systems, there
is no guarantee that `size_t' must be the same size as `long'.

This patch introduces SIZE_MAX, the maximum value of `size_t', to improve
portability and readability for allocation size validation.

Signed-off-by: Xi Wang <xi.wang@gmail.com>
Acked-by: Alex Elder <elder@dreamhost.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 1 +
 include/linux/slab.h   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index ec55a3c8ba77..e07f5e0c5df4 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -35,6 +35,7 @@
 #define LLONG_MAX	((long long)(~0ULL>>1))
 #define LLONG_MIN	(-LLONG_MAX - 1)
 #define ULLONG_MAX	(~0ULL)
+#define SIZE_MAX	(~(size_t)0)
 
 #define STACK_MAGIC	0xdeadbeef
 
diff --git a/include/linux/slab.h b/include/linux/slab.h
index a595dce6b0c7..67d5d94b783a 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -242,7 +242,7 @@ size_t ksize(const void *);
  */
 static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
 {
-	if (size != 0 && n > ULONG_MAX / size)
+	if (size != 0 && n > SIZE_MAX / size)
 		return NULL;
 	return __kmalloc(n * size, flags);
 }
-- 
cgit v1.2.3


From 020ac5b6bef15785f9dde9de89d2734ff97da733 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 31 May 2012 16:26:12 -0700
Subject: fat: introduce special inode for managing the FSINFO block

This is patchset makes fatfs stop using the VFS '->write_super()' method
for writing out the FSINFO block.

The final goal is to get rid of the 'sync_supers()' kernel thread.  This
kernel thread wakes up every 5 seconds (by default) and calls
'->write_super()' for all mounted file-systems.  And the bad thing is that
this is done even if all the superblocks are clean.  Moreover, some
file-systems do not even need this end they do not register the
'->write_super()' method at all (e.g., btrfs).

So 'sync_supers()' most often just generates useless wake-ups and wastes
power.  I am trying to make all file-systems independent of
'->write_super()' and plan to remove 'sync_supers()' and '->write_super'
completely once there are no more users.

The '->write_supers()' method is mostly used by baroque file-systems like
hfs, udf, etc.  Modern file-systems like btrfs and xfs do not use it.
This justifies removing this stuff from VFS completely and make every FS
self-manage own superblock.

Tested with xfstests.

This patch:

Preparation for further changes.  It introduces a special inode
('fsinfo_inode') in FAT file-system which we'll later use for managing the
FSINFO block.  Note, this there is already one special inode ('fat_inode')
which is used for managing the FAT tables.

Introduce new 'MSDOS_FSINFO_INO' constant for this special inode.  It is
safe to do because FAT file-system does not store inode numbers on the
media but generates them run-time.

I've also cleaned up the comment to existing 'MSDOS_ROOT_INO' constant,
while on it.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/msdos_fs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index 34066e65fdeb..11cc2ac67e75 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -21,8 +21,9 @@
 #define CT_LE_W(v)	cpu_to_le16(v)
 #define CT_LE_L(v)	cpu_to_le32(v)
 
+#define MSDOS_ROOT_INO	 1	/* The root inode number */
+#define MSDOS_FSINFO_INO 2	/* Used for managing the FSINFO block */
 
-#define MSDOS_ROOT_INO	1	/* == MINIX_ROOT_INO */
 #define MSDOS_DIR_BITS	5	/* log2(sizeof(struct msdos_dir_entry)) */
 
 /* directory limit */
-- 
cgit v1.2.3


From ae3cef7300e9fddc35ad251dd5f27c5b88c8594a Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 31 May 2012 16:26:14 -0700
Subject: kmod: unexport call_usermodehelper_freeinfo()

call_usermodehelper_freeinfo() is not used outside of kmod.c.  So unexport
it, and make it static to kmod.c

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmod.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index dd99c329e161..f07f9a4e10ff 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -79,10 +79,6 @@ void call_usermodehelper_setfns(struct subprocess_info *info,
 /* Actually execute the sub-process */
 int call_usermodehelper_exec(struct subprocess_info *info, int wait);
 
-/* Free the subprocess_info. This is only needed if you're not going
-   to call call_usermodehelper_exec */
-void call_usermodehelper_freeinfo(struct subprocess_info *info);
-
 static inline int
 call_usermodehelper_fns(char *path, char **argv, char **envp, int wait,
 			int (*init)(struct subprocess_info *info, struct cred *new),
-- 
cgit v1.2.3


From 785042f2e275089e22c36b462f6495ce8d91732d Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 31 May 2012 16:26:15 -0700
Subject: kmod: move call_usermodehelper_fns() to .c file and unexport all it's
 helpers

If we move call_usermodehelper_fns() to kmod.c file and EXPORT_SYMBOL it
we can avoid exporting all it's helper functions:
	call_usermodehelper_setup
	call_usermodehelper_setfns
	call_usermodehelper_exec
And make all of them static to kmod.c

Since the optimizer will see all these as a single call site it will
inline them inside call_usermodehelper_fns().  So we loose the call to
_fns but gain 3 calls to the helpers.  (Not that it matters)

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmod.h | 30 ++----------------------------
 1 file changed, 2 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index f07f9a4e10ff..5398d5807075 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -66,36 +66,10 @@ struct subprocess_info {
 	void *data;
 };
 
-/* Allocate a subprocess_info structure */
-struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
-						  char **envp, gfp_t gfp_mask);
-
-/* Set various pieces of state into the subprocess_info structure */
-void call_usermodehelper_setfns(struct subprocess_info *info,
-		    int (*init)(struct subprocess_info *info, struct cred *new),
-		    void (*cleanup)(struct subprocess_info *info),
-		    void *data);
-
-/* Actually execute the sub-process */
-int call_usermodehelper_exec(struct subprocess_info *info, int wait);
-
-static inline int
+extern int
 call_usermodehelper_fns(char *path, char **argv, char **envp, int wait,
 			int (*init)(struct subprocess_info *info, struct cred *new),
-			void (*cleanup)(struct subprocess_info *), void *data)
-{
-	struct subprocess_info *info;
-	gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
-
-	info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
-
-	if (info == NULL)
-		return -ENOMEM;
-
-	call_usermodehelper_setfns(info, init, cleanup, data);
-
-	return call_usermodehelper_exec(info, wait);
-}
+			void (*cleanup)(struct subprocess_info *), void *data);
 
 static inline int
 call_usermodehelper(char *path, char **argv, char **envp, int wait)
-- 
cgit v1.2.3


From 43e13cc107cf6cd3c15fbe1cef849435c2223d50 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 31 May 2012 16:26:16 -0700
Subject: cred: remove task_is_dead() from __task_cred() validation

Commit 8f92054e7ca1 ("CRED: Fix __task_cred()'s lockdep check and banner
comment"):

    add the following validation condition:

        task->exit_state >= 0

    to permit the access if the target task is dead and therefore
    unable to change its own credentials.

OK, but afaics currently this can only help wait_task_zombie() which calls
__task_cred() without rcu lock.

Remove this validation and change wait_task_zombie() to use task_uid()
instead.  This means we do rcu_read_lock() only to shut up the lockdep,
but we already do the same in, say, wait_task_stopped().

task_is_dead() should die, task->exit_state != 0 means that this task has
passed exit_notify(), only do_wait-like code paths should use this.

Unfortunately, we can't kill task_is_dead() right now, it has already
acquired buggy users in drivers/staging.  The fix already exists.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cred.h | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 917dc5aeb1d4..ebbed2ce6637 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -277,17 +277,13 @@ static inline void put_cred(const struct cred *_cred)
  * @task: The task to query
  *
  * Access the objective credentials of a task.  The caller must hold the RCU
- * readlock or the task must be dead and unable to change its own credentials.
+ * readlock.
  *
  * The result of this function should not be passed directly to get_cred();
  * rather get_task_cred() should be used instead.
  */
-#define __task_cred(task)						\
-	({								\
-		const struct task_struct *__t = (task);			\
-		rcu_dereference_check(__t->real_cred,			\
-				      task_is_dead(__t));		\
-	})
+#define __task_cred(task)	\
+	rcu_dereference((task)->real_cred)
 
 /**
  * get_current_cred - Get the current task's subjective credentials
-- 
cgit v1.2.3


From cb79295e20a8088a2fd6a9b3cb5f2d889ec36b4d Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <anton.vorontsov@linaro.org>
Date: Thu, 31 May 2012 16:26:22 -0700
Subject: cpu: introduce clear_tasks_mm_cpumask() helper

Many architectures clear tasks' mm_cpumask like this:

	read_lock(&tasklist_lock);
	for_each_process(p) {
		if (p->mm)
			cpumask_clear_cpu(cpu, mm_cpumask(p->mm));
	}
	read_unlock(&tasklist_lock);

Depending on the context, the code above may have several problems,
such as:

1. Working with task->mm w/o getting mm or grabing the task lock is
   dangerous as ->mm might disappear (exit_mm() assigns NULL under
   task_lock(), so tasklist lock is not enough).

2. Checking for process->mm is not enough because process' main
   thread may exit or detach its mm via use_mm(), but other threads
   may still have a valid mm.

This patch implements a small helper function that does things
correctly, i.e.:

1. We take the task's lock while whe handle its mm (we can't use
   get_task_mm()/mmput() pair as mmput() might sleep);

2. To catch exited main thread case, we use find_lock_task_mm(),
   which walks up all threads and returns an appropriate task
   (with task lock held).

Also, Per Peter Zijlstra's idea, now we don't grab tasklist_lock in
the new helper, instead we take the rcu read lock. We can do this
because the function is called after the cpu is taken down and marked
offline, so no new tasks will get this cpu set in their mm mask.

Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpu.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 7230bb59a06f..2e9b9ebbeb78 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -177,6 +177,7 @@ extern void put_online_cpus(void);
 #define hotcpu_notifier(fn, pri)	cpu_notifier(fn, pri)
 #define register_hotcpu_notifier(nb)	register_cpu_notifier(nb)
 #define unregister_hotcpu_notifier(nb)	unregister_cpu_notifier(nb)
+void clear_tasks_mm_cpumask(int cpu);
 int cpu_down(unsigned int cpu);
 
 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
-- 
cgit v1.2.3


From 29a5c67e7a78815fda0567a867adce467f6e6e5a Mon Sep 17 00:00:00 2001
From: maximilian attems <max@stro.at>
Date: Thu, 31 May 2012 16:26:27 -0700
Subject: kexec: export kexec.h to user space

Add userspace definitions, guard all relevant kernel structures.  While at
it document stuff and remove now useless userspace hint.

It is easy to add the relevant system call to respective libc's, but it
seems pointless to have to duplicate the data structures.

This is based on the kexec-tools headers, with the exception of just using
int on return (succes or failure) and using size_t instead of 'unsigned
long int' for the number of segments argument of kexec_load().

Signed-off-by: maximilian attems <max@stro.at>
Cc: Simon Horman <horms@verge.net.au>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Haren Myneni <hbabu@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/Kbuild  |  1 +
 include/linux/kexec.h | 75 ++++++++++++++++++++++++++++++++++++---------------
 2 files changed, 54 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 7185b8f15ced..8760be30b375 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -226,6 +226,7 @@ header-y += kdev_t.h
 header-y += kernel.h
 header-y += kernelcapi.h
 header-y += kernel-page-flags.h
+header-y += kexec.h
 header-y += keyboard.h
 header-y += keyctl.h
 header-y += l2tp.h
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 0d7d6a1b172f..37c5f7261142 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -1,8 +1,58 @@
 #ifndef LINUX_KEXEC_H
 #define LINUX_KEXEC_H
 
-#ifdef CONFIG_KEXEC
+/* kexec system call -  It loads the new kernel to boot into.
+ * kexec does not sync, or unmount filesystems so if you need
+ * that to happen you need to do that yourself.
+ */
+
 #include <linux/types.h>
+
+/* kexec flags for different usage scenarios */
+#define KEXEC_ON_CRASH		0x00000001
+#define KEXEC_PRESERVE_CONTEXT	0x00000002
+#define KEXEC_ARCH_MASK		0xffff0000
+
+/* These values match the ELF architecture values.
+ * Unless there is a good reason that should continue to be the case.
+ */
+#define KEXEC_ARCH_DEFAULT ( 0 << 16)
+#define KEXEC_ARCH_386     ( 3 << 16)
+#define KEXEC_ARCH_X86_64  (62 << 16)
+#define KEXEC_ARCH_PPC     (20 << 16)
+#define KEXEC_ARCH_PPC64   (21 << 16)
+#define KEXEC_ARCH_IA_64   (50 << 16)
+#define KEXEC_ARCH_ARM     (40 << 16)
+#define KEXEC_ARCH_S390    (22 << 16)
+#define KEXEC_ARCH_SH      (42 << 16)
+#define KEXEC_ARCH_MIPS_LE (10 << 16)
+#define KEXEC_ARCH_MIPS    ( 8 << 16)
+
+/* The artificial cap on the number of segments passed to kexec_load. */
+#define KEXEC_SEGMENT_MAX 16
+
+#ifndef __KERNEL__
+/*
+ * This structure is used to hold the arguments that are used when
+ * loading  kernel binaries.
+ */
+struct kexec_segment {
+	const void *buf;
+	size_t bufsz;
+	const void *mem;
+	size_t memsz;
+};
+
+/* Load a new kernel image as described by the kexec_segment array
+ * consisting of passed number of segments at the entry-point address.
+ * The flags allow different useage types.
+ */
+extern int kexec_load(void *, size_t, struct kexec_segment *,
+		unsigned long int);
+#endif /* __KERNEL__ */
+
+#ifdef __KERNEL__
+#ifdef CONFIG_KEXEC
 #include <linux/list.h>
 #include <linux/linkage.h>
 #include <linux/compat.h>
@@ -67,11 +117,10 @@ typedef unsigned long kimage_entry_t;
 #define IND_DONE         0x4
 #define IND_SOURCE       0x8
 
-#define KEXEC_SEGMENT_MAX 16
 struct kexec_segment {
 	void __user *buf;
 	size_t bufsz;
-	unsigned long mem;	/* User space sees this as a (void *) ... */
+	unsigned long mem;
 	size_t memsz;
 };
 
@@ -175,25 +224,6 @@ extern struct kimage *kexec_crash_image;
 #define kexec_flush_icache_page(page)
 #endif
 
-#define KEXEC_ON_CRASH		0x00000001
-#define KEXEC_PRESERVE_CONTEXT	0x00000002
-#define KEXEC_ARCH_MASK		0xffff0000
-
-/* These values match the ELF architecture values.
- * Unless there is a good reason that should continue to be the case.
- */
-#define KEXEC_ARCH_DEFAULT ( 0 << 16)
-#define KEXEC_ARCH_386     ( 3 << 16)
-#define KEXEC_ARCH_X86_64  (62 << 16)
-#define KEXEC_ARCH_PPC     (20 << 16)
-#define KEXEC_ARCH_PPC64   (21 << 16)
-#define KEXEC_ARCH_IA_64   (50 << 16)
-#define KEXEC_ARCH_ARM     (40 << 16)
-#define KEXEC_ARCH_S390    (22 << 16)
-#define KEXEC_ARCH_SH      (42 << 16)
-#define KEXEC_ARCH_MIPS_LE (10 << 16)
-#define KEXEC_ARCH_MIPS    ( 8 << 16)
-
 /* List of defined/legal kexec flags */
 #ifndef CONFIG_KEXEC_JUMP
 #define KEXEC_FLAGS    KEXEC_ON_CRASH
@@ -228,4 +258,5 @@ struct task_struct;
 static inline void crash_kexec(struct pt_regs *regs) { }
 static inline int kexec_should_crash(struct task_struct *p) { return 0; }
 #endif /* CONFIG_KEXEC */
+#endif /* __KERNEL__ */
 #endif /* LINUX_KEXEC_H */
-- 
cgit v1.2.3


From 93e6f119c0ce8a1bba6e81dc8dd97d67be360844 Mon Sep 17 00:00:00 2001
From: Doug Ledford <dledford@redhat.com>
Date: Thu, 31 May 2012 16:26:28 -0700
Subject: ipc/mqueue: cleanup definition names and locations

Since commit b231cca4381e ("message queues: increase range limits") on
Oct 18, 2008, calls to mq_open() that did not pass in an attribute
struct and expected to get default values for the size of the queue and
the max message size now get the system wide maximums instead of
hardwired defaults like they used to get.

This was uncovered when one of the earlier patches in this patch set
increased the default system wide maximums at the same time it increased
the hard ceiling on the system wide maximums (a customer specifically
needed the hard ceiling brought back up, the new ceiling that commit
b231cca4381e introduced was too low for their production systems).  By
increasing the default maximums and not realising they were tied to any
attempt to create a message queue without an attribute struct, I had
inadvertently made it such that all message queue creation attempts
without an attribute struct were failing because the new default
maximums would create a queue that exceeded the default rlimit for
message queue bytes.

As a result, the system wide defaults were brought back down to their
previous levels, and the system wide ceilings on the maximums were
raised to meet the customer's needs.  However, the fact that the no
attribute struct behavior of mq_open() could be broken by changing the
system wide maximums for message queues was seen as fundamentally broken
itself.  So we hardwired the no attribute case back like it used to be.
But, then we realized that on the very off chance that some piece of
software in the wild depended on that behavior, we could work around
that issue by adding two new knobs to /proc that allowed setting the
defaults for message queues created without an attr struct separately
from the system wide maximums.

What is not an option IMO is to leave the current behavior in place.  No
piece of software should ever rely on setting the system wide maximums
in order to get a desired message queue.  Such a reliance would be so
fundamentally multitasking OS unfriendly as to not really be tolerable.
Fortunately, we don't know of any software in the wild that uses this
except for a regression test program that caught the issue in the first
place.  If there is though, we have made accommodations with the two new
/proc knobs (and that's all the accommodations such fundamentally broken
software can be allowed)..

This patch:

The various defines for minimums and maximums of the sysctl controllable
mqueue values are scattered amongst different files and named
inconsistently.  Move them all into ipc_namespace.h and make them have
consistent names.  Additionally, make the number of queues per namespace
also have a minimum and maximum and use the same sysctl function as the
other two settable variables.

Signed-off-by: Doug Ledford <dledford@redhat.com>
Acked-by: Serge E. Hallyn <serue@us.ibm.com>
Cc: Amerigo Wang <amwang@redhat.com>
Cc: Joe Korty <joe.korty@ccur.com>
Cc: Jiri Slaby <jslaby@suse.cz>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 8a297a5e794c..1372b566e1e1 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -91,10 +91,15 @@ static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {}
 #ifdef CONFIG_POSIX_MQUEUE
 extern int mq_init_ns(struct ipc_namespace *ns);
 /* default values */
+#define MIN_QUEUESMAX  1
 #define DFLT_QUEUESMAX 256     /* max number of message queues */
+#define HARD_QUEUESMAX 1024
+#define MIN_MSGMAX     1
 #define DFLT_MSGMAX    10      /* max number of messages in each queue */
 #define HARD_MSGMAX    (32768*sizeof(void *)/4)
+#define MIN_MSGSIZEMAX  128
 #define DFLT_MSGSIZEMAX 8192   /* max message size */
+#define HARD_MSGSIZEMAX (8192*128)
 #else
 static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
 #endif
-- 
cgit v1.2.3


From 858ee3784e8105467f1f3017f4ece51cb51d4830 Mon Sep 17 00:00:00 2001
From: Doug Ledford <dledford@redhat.com>
Date: Thu, 31 May 2012 16:26:29 -0700
Subject: ipc/mqueue: switch back to using non-max values on create

Commit b231cca4381e ("message queues: increase range limits") changed
how we create a queue that does not include an attr struct passed to
open so that it creates the queue with whatever the maximum values are.
However, if the admin has set the maximums to allow flexibility in
creating a queue (aka, both a large size and large queue are allowed,
but combined they create a queue too large for the RLIMIT_MSGQUEUE of
the user), then attempts to create a queue without an attr struct will
fail.  Switch back to using acceptable defaults regardless of what the
maximums are.

Note: so far, we only know of a few applications that rely on this
behavior (specifically, set the maximums in /proc, then run the
application which calls mq_open() without passing in an attr struct, and
the application expects the newly created message queue to have the
maximum sizes that were set in /proc used on the mq_open() call, and all
of those applications that we know of are actually part of regression
test suites that were coded to do something like this:

for size in 4096 65536 $((1024 * 1024)) $((16 * 1024 * 1024)); do
	echo $size > /proc/sys/fs/mqueue/msgsize_max
	mq_open || echo "Error opening mq with size $size"
done

These test suites that depend on any behavior like this are broken.  The
concept that programs should rely upon the system wide maximum in order
to get their desired results instead of simply using a attr struct to
specify what they want is fundamentally unfriendly programming practice
for any multi-tasking OS.

Fixing this will break those few apps that we know of (and those app
authors recognize the brokenness of their code and the need to fix it).
However, the following patch "mqueue: separate mqueue default value"
allows a workaround in the form of new knobs for the default msg queue
creation parameters for any software out there that we don't already
know about that might rely on this behavior at the moment.

Signed-off-by: Doug Ledford <dledford@redhat.com>
Cc: Serge E. Hallyn <serue@us.ibm.com>
Cc: Amerigo Wang <amwang@redhat.com>
Cc: Joe Korty <joe.korty@ccur.com>
Cc: Jiri Slaby <jslaby@suse.cz>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 1372b566e1e1..bde094ee7b0e 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -95,9 +95,11 @@ extern int mq_init_ns(struct ipc_namespace *ns);
 #define DFLT_QUEUESMAX 256     /* max number of message queues */
 #define HARD_QUEUESMAX 1024
 #define MIN_MSGMAX     1
+#define DFLT_MSG       10U
 #define DFLT_MSGMAX    10      /* max number of messages in each queue */
 #define HARD_MSGMAX    (32768*sizeof(void *)/4)
 #define MIN_MSGSIZEMAX  128
+#define DFLT_MSGSIZE    8192U
 #define DFLT_MSGSIZEMAX 8192   /* max message size */
 #define HARD_MSGSIZEMAX (8192*128)
 #else
-- 
cgit v1.2.3


From 5b5c4d1a1440e94994c73dddbad7be0676cd8b9a Mon Sep 17 00:00:00 2001
From: Doug Ledford <dledford@redhat.com>
Date: Thu, 31 May 2012 16:26:30 -0700
Subject: ipc/mqueue: update maximums for the mqueue subsystem

Commit b231cca4381e ("message queues: increase range limits") changed the
maximum size of a message in a message queue from INT_MAX to 8192*128.
Unfortunately, we had customers that relied on a size much larger than
8192*128 on their production systems.  After reviewing POSIX, we found
that it is silent on the maximum message size.  We did find a couple other
areas in which it was not silent.  Fix up the mqueue maximums so that the
customer's system can continue to work, and document both the POSIX and
real world requirements in ipc_namespace.h so that we don't have this
issue crop back up.

Also, commit 9cf18e1dd74cd0 ("ipc: HARD_MSGMAX should be higher not lower
on 64bit") fiddled with HARD_MSGMAX without realizing that the number was
intentionally in place to limit the msg queue depth to one that was small
enough to kmalloc an array of pointers (hence why we divided 128k by
sizeof(long)).  If we wish to meet POSIX requirements, we have no choice
but to change our allocation to a vmalloc instead (at least for the large
queue size case).  With that, it's possible to increase our allowed
maximum to the POSIX requirements (or more if we choose).

[sfr@canb.auug.org.au: using vmalloc requires including vmalloc.h]
Signed-off-by: Doug Ledford <dledford@redhat.com>
Cc: Serge E. Hallyn <serue@us.ibm.com>
Cc: Amerigo Wang <amwang@redhat.com>
Cc: Joe Korty <joe.korty@ccur.com>
Cc: Jiri Slaby <jslaby@suse.cz>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h | 47 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index bde094ee7b0e..6e1dd08194fd 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -90,18 +90,41 @@ static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {}
 
 #ifdef CONFIG_POSIX_MQUEUE
 extern int mq_init_ns(struct ipc_namespace *ns);
-/* default values */
-#define MIN_QUEUESMAX  1
-#define DFLT_QUEUESMAX 256     /* max number of message queues */
-#define HARD_QUEUESMAX 1024
-#define MIN_MSGMAX     1
-#define DFLT_MSG       10U
-#define DFLT_MSGMAX    10      /* max number of messages in each queue */
-#define HARD_MSGMAX    (32768*sizeof(void *)/4)
-#define MIN_MSGSIZEMAX  128
-#define DFLT_MSGSIZE    8192U
-#define DFLT_MSGSIZEMAX 8192   /* max message size */
-#define HARD_MSGSIZEMAX (8192*128)
+/*
+ * POSIX Message Queue default values:
+ *
+ * MIN_*: Lowest value an admin can set the maximum unprivileged limit to
+ * DFLT_*MAX: Default values for the maximum unprivileged limits
+ * DFLT_{MSG,MSGSIZE}: Default values used when the user doesn't supply
+ *   an attribute to the open call and the queue must be created
+ * HARD_*: Highest value the maximums can be set to.  These are enforced
+ *   on CAP_SYS_RESOURCE apps as well making them inviolate (so make them
+ *   suitably high)
+ *
+ * POSIX Requirements:
+ *   Per app minimum openable message queues - 8.  This does not map well
+ *     to the fact that we limit the number of queues on a per namespace
+ *     basis instead of a per app basis.  So, make the default high enough
+ *     that no given app should have a hard time opening 8 queues.
+ *   Minimum maximum for HARD_MSGMAX - 32767.  I bumped this to 65536.
+ *   Minimum maximum for HARD_MSGSIZEMAX - POSIX is silent on this.  However,
+ *     we have run into a situation where running applications in the wild
+ *     require this to be at least 5MB, and preferably 10MB, so I set the
+ *     value to 16MB in hopes that this user is the worst of the bunch and
+ *     the new maximum will handle anyone else.  I may have to revisit this
+ *     in the future.
+ */
+#define MIN_QUEUESMAX			1
+#define DFLT_QUEUESMAX		      256
+#define HARD_QUEUESMAX		     1024
+#define MIN_MSGMAX			1
+#define DFLT_MSG		       64U
+#define DFLT_MSGMAX		     1024
+#define HARD_MSGMAX		    65536
+#define MIN_MSGSIZEMAX		      128
+#define DFLT_MSGSIZE		     8192U
+#define DFLT_MSGSIZEMAX	       (1024*1024)
+#define HARD_MSGSIZEMAX	    (16*1024*1024)
 #else
 static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
 #endif
-- 
cgit v1.2.3


From e6315bb154e778391ce64b194756bd3d108dadf6 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Thu, 31 May 2012 16:26:31 -0700
Subject: mqueue: revert bump up DFLT_*MAX

Mqueue limitation is slightly naieve parameter likes other ipcs because
unprivileged user can consume kernel memory by using ipcs.

Thus, too aggressive raise bring us security issue.  Example, current
setting allow evil unprivileged user use 256GB (= 256 * 1024 * 1024*1024)
and it's enough large to system will belome unresponsive.  Don't do that.

Instead, every admin should adjust the knobs for their own systems.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Doug Ledford <dledford@redhat.com>
Acked-by: Joe Korty <joe.korty@ccur.com>
Cc: Amerigo Wang <amwang@redhat.com>
Acked-by: Serge E. Hallyn <serue@us.ibm.com>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 6e1dd08194fd..2488535a32a3 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -118,12 +118,12 @@ extern int mq_init_ns(struct ipc_namespace *ns);
 #define DFLT_QUEUESMAX		      256
 #define HARD_QUEUESMAX		     1024
 #define MIN_MSGMAX			1
-#define DFLT_MSG		       64U
-#define DFLT_MSGMAX		     1024
+#define DFLT_MSG		       10U
+#define DFLT_MSGMAX		       10
 #define HARD_MSGMAX		    65536
 #define MIN_MSGSIZEMAX		      128
 #define DFLT_MSGSIZE		     8192U
-#define DFLT_MSGSIZEMAX	       (1024*1024)
+#define DFLT_MSGSIZEMAX		     8192
 #define HARD_MSGSIZEMAX	    (16*1024*1024)
 #else
 static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
-- 
cgit v1.2.3


From cef0184c115e5e4e10498f6548d9526465e72478 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Thu, 31 May 2012 16:26:33 -0700
Subject: mqueue: separate mqueue default value from maximum value

Commit b231cca4381e ("message queues: increase range limits") changed
mqueue default value when attr parameter is specified NULL from hard
coded value to fs.mqueue.{msg,msgsize}_max sysctl value.

This made large side effect.  When user need to use two mqueue
applications 1) using !NULL attr parameter and it require big message
size and 2) using NULL attr parameter and only need small size message,
app (1) require to raise fs.mqueue.msgsize_max and app (2) consume large
memory size even though it doesn't need.

Doug Ledford propsed to switch back it to static hard coded value.
However it also has a compatibility problem.  Some applications might
started depend on the default value is tunable.

The solution is to separate default value from maximum value.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Acked-by: Doug Ledford <dledford@redhat.com>
Acked-by: Joe Korty <joe.korty@ccur.com>
Cc: Amerigo Wang <amwang@redhat.com>
Acked-by: Serge E. Hallyn <serue@us.ibm.com>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 2488535a32a3..5499c92a9153 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -62,6 +62,8 @@ struct ipc_namespace {
 	unsigned int    mq_queues_max;   /* initialized to DFLT_QUEUESMAX */
 	unsigned int    mq_msg_max;      /* initialized to DFLT_MSGMAX */
 	unsigned int    mq_msgsize_max;  /* initialized to DFLT_MSGSIZEMAX */
+	unsigned int    mq_msg_default;
+	unsigned int    mq_msgsize_default;
 
 	/* user_ns which owns the ipc ns */
 	struct user_namespace *user_ns;
-- 
cgit v1.2.3


From e42d98ebe7d754a2c9fbccd6186721d3ca8679f6 Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Thu, 31 May 2012 16:26:38 -0700
Subject: rapidio: add DMA engine support for RIO data transfers

Adds DMA Engine framework support into RapidIO subsystem.

Uses DMA Engine DMA_SLAVE interface to generate data transfers to/from
remote RapidIO target devices.

Introduces RapidIO-specific wrapper for prep_slave_sg() interface with an
extra parameter to pass target specific information.

Uses scatterlist to describe local data buffer.  Address flat data buffer
on a remote side.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Acked-by: Vinod Koul <vinod.koul@linux.intel.com>
Cc: Li Yang <leoli@freescale.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dmaengine.h | 12 ++++++++++++
 include/linux/rio.h       | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/rio_drv.h   |  9 +++++++++
 3 files changed, 68 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index d3fec584e8c3..56377df39124 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -635,6 +635,18 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_sg(
 						  dir, flags, NULL);
 }
 
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+struct rio_dma_ext;
+static inline struct dma_async_tx_descriptor *dmaengine_prep_rio_sg(
+	struct dma_chan *chan, struct scatterlist *sgl,	unsigned int sg_len,
+	enum dma_transfer_direction dir, unsigned long flags,
+	struct rio_dma_ext *rio_ext)
+{
+	return chan->device->device_prep_slave_sg(chan, sgl, sg_len,
+						  dir, flags, rio_ext);
+}
+#endif
+
 static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction dir)
diff --git a/include/linux/rio.h b/include/linux/rio.h
index 4d50611112ba..a90ebadd9da0 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -20,6 +20,9 @@
 #include <linux/errno.h>
 #include <linux/device.h>
 #include <linux/rio_regs.h>
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+#include <linux/dmaengine.h>
+#endif
 
 #define RIO_NO_HOPCOUNT		-1
 #define RIO_INVALID_DESTID	0xffff
@@ -254,6 +257,9 @@ struct rio_mport {
 	u32 phys_efptr;
 	unsigned char name[40];
 	void *priv;		/* Master port private data */
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+	struct dma_device	dma;
+#endif
 };
 
 /**
@@ -395,6 +401,47 @@ union rio_pw_msg {
 	u32 raw[RIO_PW_MSG_SIZE/sizeof(u32)];
 };
 
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+
+/**
+ * enum rio_write_type - RIO write transaction types used in DMA transfers
+ *
+ * Note: RapidIO specification defines write (NWRITE) and
+ * write-with-response (NWRITE_R) data transfer operations.
+ * Existing DMA controllers that service RapidIO may use one of these operations
+ * for entire data transfer or their combination with only the last data packet
+ * requires response.
+ */
+enum rio_write_type {
+	RDW_DEFAULT,		/* default method used by DMA driver */
+	RDW_ALL_NWRITE,		/* all packets use NWRITE */
+	RDW_ALL_NWRITE_R,	/* all packets use NWRITE_R */
+	RDW_LAST_NWRITE_R,	/* last packet uses NWRITE_R, others - NWRITE */
+};
+
+struct rio_dma_ext {
+	u16 destid;
+	u64 rio_addr;	/* low 64-bits of 66-bit RapidIO address */
+	u8  rio_addr_u;  /* upper 2-bits of 66-bit RapidIO address */
+	enum rio_write_type wr_type; /* preferred RIO write operation type */
+};
+
+struct rio_dma_data {
+	/* Local data (as scatterlist) */
+	struct scatterlist	*sg;	/* I/O scatter list */
+	unsigned int		sg_len;	/* size of scatter list */
+	/* Remote device address (flat buffer) */
+	u64 rio_addr;	/* low 64-bits of 66-bit RapidIO address */
+	u8  rio_addr_u;  /* upper 2-bits of 66-bit RapidIO address */
+	enum rio_write_type wr_type; /* preferred RIO write operation type */
+};
+
+static inline struct rio_mport *dma_to_mport(struct dma_device *ddev)
+{
+	return container_of(ddev, struct rio_mport, dma);
+}
+#endif /* CONFIG_RAPIDIO_DMA_ENGINE */
+
 /* Architecture and hardware-specific functions */
 extern int rio_register_mport(struct rio_mport *);
 extern int rio_open_inb_mbox(struct rio_mport *, void *, int, int);
diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
index 7f07470e1ed9..31ad146be316 100644
--- a/include/linux/rio_drv.h
+++ b/include/linux/rio_drv.h
@@ -377,6 +377,15 @@ void rio_unregister_driver(struct rio_driver *);
 struct rio_dev *rio_dev_get(struct rio_dev *);
 void rio_dev_put(struct rio_dev *);
 
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+extern struct dma_chan *rio_request_dma(struct rio_dev *rdev);
+extern void rio_release_dma(struct dma_chan *dchan);
+extern struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(
+		struct rio_dev *rdev, struct dma_chan *dchan,
+		struct rio_dma_data *data,
+		enum dma_transfer_direction direction, unsigned long flags);
+#endif
+
 /**
  * rio_name - Get the unique RIO device identifier
  * @rdev: RIO device
-- 
cgit v1.2.3


From ee62c6b2dc93c09585b51fad18449dc5edb9977f Mon Sep 17 00:00:00 2001
From: Sha Zhengju <handai.szj@taobao.com>
Date: Thu, 31 May 2012 16:26:41 -0700
Subject: eventfd: change int to __u64 in eventfd_signal()

eventfd_ctx->count is an __u64 counter which is allowed to reach
ULLONG_MAX.  eventfd_write() adds a __u64 value to "count", but the kernel
side eventfd_signal() only adds an int value to it.  Make them consistent.

[akpm@linux-foundation.org: update interface documentation]
Signed-off-by: Sha Zhengju <handai.szj@taobao.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/eventfd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 91bb4f27238c..3c3ef19a625a 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -34,7 +34,7 @@ void eventfd_ctx_put(struct eventfd_ctx *ctx);
 struct file *eventfd_fget(int fd);
 struct eventfd_ctx *eventfd_ctx_fdget(int fd);
 struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
-int eventfd_signal(struct eventfd_ctx *ctx, int n);
+__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
 ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt);
 int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
 				  __u64 *cnt);
-- 
cgit v1.2.3


From ac34ebb3a67e699edcb5ac72f19d31679369dfaa Mon Sep 17 00:00:00 2001
From: Christopher Yeoh <cyeoh@au1.ibm.com>
Date: Thu, 31 May 2012 16:26:42 -0700
Subject: aio/vfs: cleanup of rw_copy_check_uvector() and
 compat_rw_copy_check_uvector()

A cleanup of rw_copy_check_uvector and compat_rw_copy_check_uvector after
changes made to support CMA in an earlier patch.

Rather than having an additional check_access parameter to these
functions, the first paramater type is overloaded to allow the caller to
specify CHECK_IOVEC_ONLY which means check that the contents of the iovec
are valid, but do not check the memory that they point to.  This is used
by process_vm_readv/writev where we need to validate that a iovec passed
to the syscall is valid but do not want to check the memory that it points
to at this point because it refers to an address space in another process.

Signed-off-by: Chris Yeoh <yeohc@au1.ibm.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compat.h |  3 +--
 include/linux/fs.h     | 12 ++++++++++--
 2 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 5d46217f84ad..4e890394ef99 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -577,8 +577,7 @@ extern ssize_t compat_rw_copy_check_uvector(int type,
 		const struct compat_iovec __user *uvector,
 		unsigned long nr_segs,
 		unsigned long fast_segs, struct iovec *fast_pointer,
-		struct iovec **ret_pointer,
-		int check_access);
+		struct iovec **ret_pointer);
 
 extern void __user *compat_alloc_user_space(unsigned long len);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 038076b27ea4..cf2c5611b19b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -173,6 +173,15 @@ struct inodes_stat_t {
 #define WRITE_FUA		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA)
 #define WRITE_FLUSH_FUA		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
 
+
+/*
+ * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
+ * that indicates that they should check the contents of the iovec are
+ * valid, but not check the memory that the iovec elements
+ * points too.
+ */
+#define CHECK_IOVEC_ONLY -1
+
 #define SEL_IN		1
 #define SEL_OUT		2
 #define SEL_EX		4
@@ -1690,8 +1699,7 @@ struct seq_file;
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
-			      struct iovec **ret_pointer,
-			      int check_access);
+			      struct iovec **ret_pointer);
 
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
-- 
cgit v1.2.3


From d97b46a64674a267bc41c9e16132ee2a98c3347d Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Thu, 31 May 2012 16:26:44 -0700
Subject: syscalls, x86: add __NR_kcmp syscall

While doing the checkpoint-restore in the user space one need to determine
whether various kernel objects (like mm_struct-s of file_struct-s) are
shared between tasks and restore this state.

The 2nd step can be solved by using appropriate CLONE_ flags and the
unshare syscall, while there's currently no ways for solving the 1st one.

One of the ways for checking whether two tasks share e.g.  mm_struct is to
provide some mm_struct ID of a task to its proc file, but showing such
info considered to be not that good for security reasons.

Thus after some debates we end up in conclusion that using that named
'comparison' syscall might be the best candidate.  So here is it --
__NR_kcmp.

It takes up to 5 arguments - the pids of the two tasks (which
characteristics should be compared), the comparison type and (in case of
comparison of files) two file descriptors.

Lookups for pids are done in the caller's PID namespace only.

At moment only x86 is supported and tested.

[akpm@linux-foundation.org: fix up selftests, warnings]
[akpm@linux-foundation.org: include errno.h]
[akpm@linux-foundation.org: tweak comment text]
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Andrey Vagin <avagin@openvz.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Glauber Costa <glommer@parallels.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Vasiliy Kulikov <segoon@openwall.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Valdis.Kletnieks@vt.edu
Cc: Michal Marek <mmarek@suse.cz>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kcmp.h     | 17 +++++++++++++++++
 include/linux/syscalls.h |  2 ++
 2 files changed, 19 insertions(+)
 create mode 100644 include/linux/kcmp.h

(limited to 'include/linux')

diff --git a/include/linux/kcmp.h b/include/linux/kcmp.h
new file mode 100644
index 000000000000..2dcd1b3aafc8
--- /dev/null
+++ b/include/linux/kcmp.h
@@ -0,0 +1,17 @@
+#ifndef _LINUX_KCMP_H
+#define _LINUX_KCMP_H
+
+/* Comparison type */
+enum kcmp_type {
+	KCMP_FILE,
+	KCMP_VM,
+	KCMP_FILES,
+	KCMP_FS,
+	KCMP_SIGHAND,
+	KCMP_IO,
+	KCMP_SYSVSEM,
+
+	KCMP_TYPES,
+};
+
+#endif /* _LINUX_KCMP_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 3de3acb84a95..19439c75c5b2 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -858,4 +858,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
 				      unsigned long riovcnt,
 				      unsigned long flags);
 
+asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
+			 unsigned long idx1, unsigned long idx2);
 #endif
-- 
cgit v1.2.3


From fe8c7f5cbf91124987106faa3bdf0c8b955c4cf7 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Thu, 31 May 2012 16:26:45 -0700
Subject: c/r: prctl: extend PR_SET_MM to set up more mm_struct entries

During checkpoint we dump whole process memory to a file and the dump
includes process stack memory.  But among stack data itself, the stack
carries additional parameters such as command line arguments, environment
data and auxiliary vector.

So when we do restore procedure and once we've restored stack data itself
we need to setup mm_struct::arg_start/end, env_start/end, so restored
process would be able to find command line arguments and environment data
it had at checkpoint time.  The same applies to auxiliary vector.

For this reason additional PR_SET_MM_(ARG_START | ARG_END | ENV_START |
ENV_END | AUXV) codes are introduced.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Vagin <avagin@openvz.org>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Vasiliy Kulikov <segoon@openwall.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/prctl.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index 78b76e24cc7e..18d84c4b42d8 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -113,6 +113,11 @@
 # define PR_SET_MM_START_STACK		5
 # define PR_SET_MM_START_BRK		6
 # define PR_SET_MM_BRK			7
+# define PR_SET_MM_ARG_START		8
+# define PR_SET_MM_ARG_END		9
+# define PR_SET_MM_ENV_START		10
+# define PR_SET_MM_ENV_END		11
+# define PR_SET_MM_AUXV			12
 
 /*
  * Set specific pid that is allowed to ptrace the current task.
-- 
cgit v1.2.3


From b32dfe377102ce668775f8b6b1461f7ad428f8b6 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Thu, 31 May 2012 16:26:46 -0700
Subject: c/r: prctl: add ability to set new mm_struct::exe_file

When we do restore we would like to have a way to setup a former
mm_struct::exe_file so that /proc/pid/exe would point to the original
executable file a process had at checkpoint time.

For this the PR_SET_MM_EXE_FILE code is introduced.  This option takes a
file descriptor which will be set as a source for new /proc/$pid/exe
symlink.

Note it allows to change /proc/$pid/exe if there are no VM_EXECUTABLE
vmas present for current process, simply because this feature is a special
to C/R and mm::num_exe_file_vmas become meaningless after that.

To minimize the amount of transition the /proc/pid/exe symlink might have,
this feature is implemented in one-shot manner.  Thus once changed the
symlink can't be changed again.  This should help sysadmins to monitor the
symlinks over all process running in a system.

In particular one could make a snapshot of processes and ring alarm if
there unexpected changes of /proc/pid/exe's in a system.

Note -- this feature is available iif CONFIG_CHECKPOINT_RESTORE is set and
the caller must have CAP_SYS_RESOURCE capability granted, otherwise the
request to change symlink will be rejected.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Matt Helsley <matthltc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/prctl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index 18d84c4b42d8..711e0a30aacc 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -118,6 +118,7 @@
 # define PR_SET_MM_ENV_START		10
 # define PR_SET_MM_ENV_END		11
 # define PR_SET_MM_AUXV			12
+# define PR_SET_MM_EXE_FILE		13
 
 /*
  * Set specific pid that is allowed to ptrace the current task.
-- 
cgit v1.2.3


From 8b3ec6814c83d76b85bd13badc48552836c24839 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 30 May 2012 17:11:23 -0400
Subject: take security_mmap_file() outside of ->mmap_sem

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/security.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index f1bae0963ddc..4e5a73cdbbef 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1745,8 +1745,8 @@ int security_file_permission(struct file *file, int mask);
 int security_file_alloc(struct file *file);
 void security_file_free(struct file *file);
 int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
-int security_mmap_file(struct file *file, unsigned long reqprot,
-			unsigned long prot, unsigned long flags);
+int security_mmap_file(struct file *file, unsigned long prot,
+			unsigned long flags);
 int security_mmap_addr(unsigned long addr);
 int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
 			   unsigned long prot);
@@ -2183,8 +2183,7 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd,
 	return 0;
 }
 
-static inline int security_mmap_file(struct file *file, unsigned long reqprot,
-				     unsigned long prot,
+static inline int security_mmap_file(struct file *file, unsigned long prot,
 				     unsigned long flags)
 {
 	return 0;
-- 
cgit v1.2.3


From e3fc629d7bb70848fbf479688a66d4e76dff46ac Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 30 May 2012 20:08:42 -0400
Subject: switch aio and shm to do_mmap_pgoff(), make do_mmap() static

after all, 0 bytes and 0 pages is the same thing...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7d5c37f24c63..4189e0d0ac05 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1394,7 +1394,7 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long flags,
 	vm_flags_t vm_flags, unsigned long pgoff);
-extern unsigned long do_mmap(struct file *, unsigned long,
+extern unsigned long do_mmap_pgoff(struct file *, unsigned long,
         unsigned long, unsigned long,
         unsigned long, unsigned long);
 extern int do_munmap(struct mm_struct *, unsigned long, size_t);
-- 
cgit v1.2.3


From c3b2da314834499f34cba94f7053e55f6d6f92d8 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Mon, 26 Mar 2012 09:59:21 -0400
Subject: fs: introduce inode operation ->update_time

Btrfs has to make sure we have space to allocate new blocks in order to modify
the inode, so updating time can fail.  We've gotten around this by having our
own file_update_time but this is kind of a pain, and Christoph has indicated he
would like to make xfs do something different with atime updates.  So introduce
->update_time, where we will deal with i_version an a/m/c time updates and
indicate which changes need to be made.  The normal version just does what it
has always done, updates the time and marks the inode dirty, and then
filesystems can choose to do something different.

I've gone through all of the users of file_update_time and made them check for
errors with the exception of the fault code since it's complicated and I wasn't
quite sure what to do there, also Jan is going to be pushing the file time
updates into page_mkwrite for those who have it so that should satisfy btrfs and
make it not a big deal to check the file_update_time() return code in the
generic fault path. Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
---
 include/linux/fs.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index cdc1a9630948..57fc70574d20 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1684,6 +1684,7 @@ struct inode_operations {
 	void (*truncate_range)(struct inode *, loff_t, loff_t);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
 		      u64 len);
+	int (*update_time)(struct inode *, struct timespec *, int);
 } ____cacheline_aligned;
 
 struct seq_file;
@@ -1843,6 +1844,13 @@ static inline void inode_inc_iversion(struct inode *inode)
        spin_unlock(&inode->i_lock);
 }
 
+enum file_time_flags {
+	S_ATIME = 1,
+	S_MTIME = 2,
+	S_CTIME = 4,
+	S_VERSION = 8,
+};
+
 extern void touch_atime(struct path *);
 static inline void file_accessed(struct file *file)
 {
@@ -2579,7 +2587,7 @@ extern int inode_change_ok(const struct inode *, struct iattr *);
 extern int inode_newsize_ok(const struct inode *, loff_t offset);
 extern void setattr_copy(struct inode *inode, const struct iattr *attr);
 
-extern void file_update_time(struct file *file);
+extern int file_update_time(struct file *file);
 
 extern int generic_show_options(struct seq_file *m, struct dentry *root);
 extern void save_mount_options(struct super_block *sb, char *options);
-- 
cgit v1.2.3


From 16b1c1cd71176ab0a76b26818fbf12db9183ed57 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 21 May 2012 17:30:19 +0200
Subject: vfs: retry last component if opening stale dentry

NFS optimizes away d_revalidates for last component of open.  This means that
open itself can find the dentry stale.

This patch allows the filesystem to return EOPENSTALE and the VFS will retry the
lookup on just the last component if possible.

If the lookup was done using RCU mode, including the last component, then this
is not possible since the parent dentry is lost.  In this case fall back to
non-RCU lookup.  Currently this is not used since NFS will always leave RCU
mode.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/errno.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/errno.h b/include/linux/errno.h
index 2d09bfa5c262..e0de516374da 100644
--- a/include/linux/errno.h
+++ b/include/linux/errno.h
@@ -17,6 +17,7 @@
 #define ENOIOCTLCMD	515	/* No ioctl command */
 #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */
 #define EPROBE_DEFER	517	/* Driver requests probe retry */
+#define EOPENSTALE	518	/* open found a stale dentry */
 
 /* Defined for the NFSv3 protocol */
 #define EBADHANDLE	521	/* Illegal NFS file handle */
-- 
cgit v1.2.3


From 754421c8cab1a568be844a7069fe04c1cf6391b8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 26 Apr 2012 18:31:00 -0400
Subject: HAVE_RESTORE_SIGMASK is defined on all architectures now

Everyone either defines it in arch thread_info.h or has TIF_RESTORE_SIGMASK
and picks default set_restore_sigmask() in linux/thread_info.h.  Kill the
ifdefs, slap #error in linux/thread_info.h to catch breakage when new ones
get merged.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/thread_info.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index db78775eff3b..eee729428683 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -129,6 +129,10 @@ static inline void set_restore_sigmask(void)
 }
 #endif	/* TIF_RESTORE_SIGMASK && !HAVE_SET_RESTORE_SIGMASK */
 
+#ifndef HAVE_SET_RESTORE_SIGMASK
+#error "no set_restore_sigmask() provided and default one won't work"
+#endif
+
 #endif	/* __KERNEL__ */
 
 #endif /* _LINUX_THREAD_INFO_H */
-- 
cgit v1.2.3


From 4ebefe3ec729003443daf153ed6fad1739271283 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 26 Apr 2012 22:29:20 -0400
Subject: new helpers: {clear,test,test_and_clear}_restore_sigmask()

helpers parallel to set_restore_sigmask(), used in the next commits

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/thread_info.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index eee729428683..ed279701ac79 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -127,6 +127,18 @@ static inline void set_restore_sigmask(void)
 	set_thread_flag(TIF_RESTORE_SIGMASK);
 	set_thread_flag(TIF_SIGPENDING);
 }
+static inline void clear_restore_sigmask(void)
+{
+	clear_thread_flag(TIF_RESTORE_SIGMASK);
+}
+static inline bool test_restore_sigmask(void)
+{
+	return test_thread_flag(TIF_RESTORE_SIGMASK);
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+	return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK);
+}
 #endif	/* TIF_RESTORE_SIGMASK && !HAVE_SET_RESTORE_SIGMASK */
 
 #ifndef HAVE_SET_RESTORE_SIGMASK
-- 
cgit v1.2.3


From 51a7b448d4134e3e8eec633435e3e8faee14a828 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 21 May 2012 23:33:55 -0400
Subject: new helper: restore_saved_sigmask()

first fruits of ..._restore_sigmask() helpers: now we can take
boilerplate "signal didn't have a handler, clear RESTORE_SIGMASK
and restore the blocked mask from ->saved_mask" into a common
helper.  Open-coded instances switched...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/sched.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 660c8ae93471..f1b46b88f6f5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2207,6 +2207,12 @@ extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
 extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
 extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long);
 
+static inline void restore_saved_sigmask(void)
+{
+	if (test_and_clear_restore_sigmask())
+		set_current_blocked(&current->saved_sigmask);
+}
+
 static inline int kill_cad_pid(int sig, int priv)
 {
 	return kill_pid(cad_pid, sig, priv);
-- 
cgit v1.2.3


From b7f9a11a6cf1ea9ee6be3eb2b90d91327a09ad14 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 2 May 2012 09:59:21 -0400
Subject: new helper: sigmask_to_save()

replace boilerplate "should we use ->saved_sigmask or ->blocked?"
with calls of obvious inlined helper...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/sched.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f1b46b88f6f5..ded3fb63fb06 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2213,6 +2213,14 @@ static inline void restore_saved_sigmask(void)
 		set_current_blocked(&current->saved_sigmask);
 }
 
+static inline sigset_t *sigmask_to_save(void)
+{
+	sigset_t *res = &current->blocked;
+	if (unlikely(test_restore_sigmask()))
+		res = &current->saved_sigmask;
+	return res;
+}
+
 static inline int kill_cad_pid(int sig, int priv)
 {
 	return kill_pid(cad_pid, sig, priv);
-- 
cgit v1.2.3


From edd63a2763bdae0daa4f0a4d4c5d61d1154352a5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 27 Apr 2012 13:42:45 -0400
Subject: set_restore_sigmask() is never called without SIGPENDING (and never
 should be)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/thread_info.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index ed279701ac79..ccc1899bd62e 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -8,6 +8,7 @@
 #define _LINUX_THREAD_INFO_H
 
 #include <linux/types.h>
+#include <linux/bug.h>
 
 struct timespec;
 struct compat_timespec;
@@ -125,7 +126,7 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
 static inline void set_restore_sigmask(void)
 {
 	set_thread_flag(TIF_RESTORE_SIGMASK);
-	set_thread_flag(TIF_SIGPENDING);
+	WARN_ON(!test_thread_flag(TIF_SIGPENDING));
 }
 static inline void clear_restore_sigmask(void)
 {
-- 
cgit v1.2.3


From 77097ae503b170120ab66dd1d547f8577193f91f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 27 Apr 2012 13:58:59 -0400
Subject: most of set_current_blocked() callers want SIGKILL/SIGSTOP removed
 from set

Only 3 out of 63 do not.  Renamed the current variant to __set_current_blocked(),
added set_current_blocked() that will exclude unblockable signals, switched
open-coded instances to it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/sched.h  | 2 +-
 include/linux/signal.h | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ded3fb63fb06..f34437e835a7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2210,7 +2210,7 @@ extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned lon
 static inline void restore_saved_sigmask(void)
 {
 	if (test_and_clear_restore_sigmask())
-		set_current_blocked(&current->saved_sigmask);
+		__set_current_blocked(&current->saved_sigmask);
 }
 
 static inline sigset_t *sigmask_to_save(void)
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 17046cc484bc..065e76330398 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -250,7 +250,8 @@ extern long do_sigpending(void __user *, unsigned long);
 extern int do_sigtimedwait(const sigset_t *, siginfo_t *,
 				const struct timespec *);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
-extern void set_current_blocked(const sigset_t *);
+extern void set_current_blocked(sigset_t *);
+extern void __set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
 extern int sigsuspend(sigset_t *);
 
-- 
cgit v1.2.3


From efee984c27b67e3ebef40410f35671997441b57c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 28 Apr 2012 02:04:15 -0400
Subject: new helper: signal_delivered()

Does block_sigmask() + tracehook_signal_handler();  called when
sigframe has been successfully built.  All architectures converted
to it; block_sigmask() itself is gone now (merged into this one).

I'm still not too happy with the signature, but that's a separate
story (IMO we need a structure that would contain signal number +
siginfo + k_sigaction, so that get_signal_to_deliver() would fill one,
signal_delivered(), handle_signal() and probably setup...frame() -
take one).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/signal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 065e76330398..26b424adc842 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -256,7 +256,7 @@ extern int show_unhandled_signals;
 extern int sigsuspend(sigset_t *);
 
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
-extern void block_sigmask(struct k_sigaction *ka, int signr);
+extern void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka, struct pt_regs *regs, int stepping);
 extern void exit_signals(struct task_struct *tsk);
 
 extern struct kmem_cache *sighand_cachep;
-- 
cgit v1.2.3


From f309532bf3e1cc1b787403d84e3039812a7dbe50 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 2 Jun 2012 15:21:43 -0700
Subject: tty: Revert the tty locking series, it needs more work

This reverts the tty layer change to use per-tty locking, because it's
not correct yet, and fixing it will require some more deep surgery.

The main revert is d29f3ef39be4 ("tty_lock: Localise the lock"), but
there are several smaller commits that built upon it, they also get
reverted here. The list of reverted commits is:

  fde86d310886 - tty: add lockdep annotations
  8f6576ad476b - tty: fix ldisc lock inversion trace
  d3ca8b64b97e - pty: Fix lock inversion
  b1d679afd766 - tty: drop the pty lock during hangup
  abcefe5fc357 - tty/amiserial: Add missing argument for tty_unlock()
  fd11b42e3598 - cris: fix missing tty arg in wait_event_interruptible_tty call
  d29f3ef39be4 - tty_lock: Localise the lock

The revert had a trivial conflict in the 68360serial.c staging driver
that got removed in the meantime.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tty.h | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 4990ef2b1fb7..9f47ab540f65 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -268,7 +268,6 @@ struct tty_struct {
 	struct mutex ldisc_mutex;
 	struct tty_ldisc *ldisc;
 
-	struct mutex legacy_mutex;
 	struct mutex termios_mutex;
 	spinlock_t ctrl_lock;
 	/* Termios values are protected by the termios mutex */
@@ -606,12 +605,8 @@ extern long vt_compat_ioctl(struct tty_struct *tty,
 
 /* tty_mutex.c */
 /* functions for preparation of BKL removal */
-extern void __lockfunc tty_lock(struct tty_struct *tty);
-extern void __lockfunc tty_unlock(struct tty_struct *tty);
-extern void __lockfunc tty_lock_pair(struct tty_struct *tty,
-				struct tty_struct *tty2);
-extern void __lockfunc tty_unlock_pair(struct tty_struct *tty,
-				struct tty_struct *tty2);
+extern void __lockfunc tty_lock(void) __acquires(tty_lock);
+extern void __lockfunc tty_unlock(void) __releases(tty_lock);
 
 /*
  * this shall be called only from where BTM is held (like close)
@@ -626,9 +621,9 @@ extern void __lockfunc tty_unlock_pair(struct tty_struct *tty,
 static inline void tty_wait_until_sent_from_close(struct tty_struct *tty,
 		long timeout)
 {
-	tty_unlock(tty); /* tty->ops->close holds the BTM, drop it while waiting */
+	tty_unlock(); /* tty->ops->close holds the BTM, drop it while waiting */
 	tty_wait_until_sent(tty, timeout);
-	tty_lock(tty);
+	tty_lock();
 }
 
 /*
@@ -643,16 +638,16 @@ static inline void tty_wait_until_sent_from_close(struct tty_struct *tty,
  *
  * Do not use in new code.
  */
-#define wait_event_interruptible_tty(tty, wq, condition)		\
+#define wait_event_interruptible_tty(wq, condition)			\
 ({									\
 	int __ret = 0;							\
 	if (!(condition)) {						\
-		__wait_event_interruptible_tty(tty, wq, condition, __ret);	\
+		__wait_event_interruptible_tty(wq, condition, __ret);	\
 	}								\
 	__ret;								\
 })
 
-#define __wait_event_interruptible_tty(tty, wq, condition, ret)		\
+#define __wait_event_interruptible_tty(wq, condition, ret)		\
 do {									\
 	DEFINE_WAIT(__wait);						\
 									\
@@ -661,9 +656,9 @@ do {									\
 		if (condition)						\
 			break;						\
 		if (!signal_pending(current)) {				\
-			tty_unlock(tty);					\
+			tty_unlock();					\
 			schedule();					\
-			tty_lock(tty);					\
+			tty_lock();					\
 			continue;					\
 		}							\
 		ret = -ERESTARTSYS;					\
-- 
cgit v1.2.3


From 2f9d3df8aa1cc3c6db5cfa0bad3f0745e04cc27d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 3 Jun 2012 14:50:19 -0700
Subject: vfs: move inode stat information closer together

The comment above it says "Stat data, not accessed from path walking",
but in fact some of inode fields we use for the common stat data was way
down at the end of the inode, causing unnecessary cache misses for the
common stat operations.

The inode structure is pretty big, and this can change padding depending
on field width, but at least on the common 64-bit configurations this
doesn't change the size.  Some of our inode layout has historically been
to tro to avoid unnecessary padding fields, but cache locality is at
least as important for layout, if not more.

Noticed by looking at kernel profiles, and noticing that the "i_blkbits"
access stood out like a sore thumb.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 51978ed43e97..17fd887c798f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -802,13 +802,14 @@ struct inode {
 		unsigned int __i_nlink;
 	};
 	dev_t			i_rdev;
+	loff_t			i_size;
 	struct timespec		i_atime;
 	struct timespec		i_mtime;
 	struct timespec		i_ctime;
 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
 	unsigned short          i_bytes;
+	unsigned int		i_blkbits;
 	blkcnt_t		i_blocks;
-	loff_t			i_size;
 
 #ifdef __NEED_I_SIZE_ORDERED
 	seqcount_t		i_size_seqcount;
@@ -828,9 +829,8 @@ struct inode {
 		struct list_head	i_dentry;
 		struct rcu_head		i_rcu;
 	};
-	atomic_t		i_count;
-	unsigned int		i_blkbits;
 	u64			i_version;
+	atomic_t		i_count;
 	atomic_t		i_dio_count;
 	atomic_t		i_writecount;
 	const struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
-- 
cgit v1.2.3


From 68e3e92620c323703bc7db75c2ba15239ee85c39 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 3 Jun 2012 20:05:57 -0700
Subject: Revert "mm: compaction: handle incorrect MIGRATE_UNMOVABLE type
 pageblocks"

This reverts commit 5ceb9ce6fe9462a298bb2cd5c9f1ca6cb80a0199.

That commit seems to be the cause of the mm compation list corruption
issues that Dave Jones reported.  The locking (or rather, absense
there-of) is dubious, as is the use of the 'page' variable once it has
been found to be outside the pageblock range.

So revert it for now, we can re-visit this for 3.6.  If we even need to:
as Minchan Kim says, "The patch wasn't a bug fix and even test workload
was very theoretical".

Reported-and-tested-by: Dave Jones <davej@redhat.com>
Acked-by: Hugh Dickins <hughd@google.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h | 19 -------------------
 1 file changed, 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index e988037abd2a..51a90b7f2d60 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_COMPACTION_H
 #define _LINUX_COMPACTION_H
 
-#include <linux/node.h>
-
 /* Return values for compact_zone() and try_to_compact_pages() */
 /* compaction didn't start as it was not possible or direct reclaim was more suitable */
 #define COMPACT_SKIPPED		0
@@ -13,23 +11,6 @@
 /* The full zone was compacted */
 #define COMPACT_COMPLETE	3
 
-/*
- * compaction supports three modes
- *
- * COMPACT_ASYNC_MOVABLE uses asynchronous migration and only scans
- *    MIGRATE_MOVABLE pageblocks as migration sources and targets.
- * COMPACT_ASYNC_UNMOVABLE uses asynchronous migration and only scans
- *    MIGRATE_MOVABLE pageblocks as migration sources.
- *    MIGRATE_UNMOVABLE pageblocks are scanned as potential migration
- *    targets and convers them to MIGRATE_MOVABLE if possible
- * COMPACT_SYNC uses synchronous migration and scans all pageblocks
- */
-enum compact_mode {
-	COMPACT_ASYNC_MOVABLE,
-	COMPACT_ASYNC_UNMOVABLE,
-	COMPACT_SYNC,
-};
-
 #ifdef CONFIG_COMPACTION
 extern int sysctl_compact_memory;
 extern int sysctl_compaction_handler(struct ctl_table *table, int write,
-- 
cgit v1.2.3


From ae58d1e406986f31d1e88b32f5ac601506c196d8 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Fri, 18 May 2012 09:29:34 -0600
Subject: i2c: Add generic I2C multiplexer using pinctrl API

This is useful for SoCs whose I2C module's signals can be routed to
different sets of pins at run-time, using the pinctrl API.

                                 +-----+  +-----+
                                 | dev |  | dev |
    +------------------------+   +-----+  +-----+
    | SoC                    |      |        |
    |                   /----|------+--------+
    |   +---+   +------+     | child bus A, on first set of pins
    |   |I2C|---|Pinmux|     |
    |   +---+   +------+     | child bus B, on second set of pins
    |                   \----|------+--------+--------+
    |                        |      |        |        |
    +------------------------+  +-----+  +-----+  +-----+
                                | dev |  | dev |  | dev |
                                +-----+  +-----+  +-----+

Signed-off-by: Stephen Warren <swarren@nvidia.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Rob Herring <rob.herring@calxeda.com>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 include/linux/i2c-mux-pinctrl.h | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 include/linux/i2c-mux-pinctrl.h

(limited to 'include/linux')

diff --git a/include/linux/i2c-mux-pinctrl.h b/include/linux/i2c-mux-pinctrl.h
new file mode 100644
index 000000000000..a65c86429e84
--- /dev/null
+++ b/include/linux/i2c-mux-pinctrl.h
@@ -0,0 +1,41 @@
+/*
+ * i2c-mux-pinctrl platform data
+ *
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _LINUX_I2C_MUX_PINCTRL_H
+#define _LINUX_I2C_MUX_PINCTRL_H
+
+/**
+ * struct i2c_mux_pinctrl_platform_data - Platform data for i2c-mux-pinctrl
+ * @parent_bus_num: Parent I2C bus number
+ * @base_bus_num: Base I2C bus number for the child busses. 0 for dynamic.
+ * @bus_count: Number of child busses. Also the number of elements in
+ *	@pinctrl_states
+ * @pinctrl_states: The names of the pinctrl state to select for each child bus
+ * @pinctrl_state_idle: The pinctrl state to select when no child bus is being
+ *	accessed. If NULL, the most recently used pinctrl state will be left
+ *	selected.
+ */
+struct i2c_mux_pinctrl_platform_data {
+	int parent_bus_num;
+	int base_bus_num;
+	int bus_count;
+	const char **pinctrl_states;
+	const char *pinctrl_state_idle;
+};
+
+#endif
-- 
cgit v1.2.3


From 1549210fcc17e9ae20c09ac8cd4c48a8dfd431bd Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Jun 2012 09:16:47 -0400
Subject: NFSv4: Fix an Oops in the open recovery code

The open recovery code does not need to request a new value for the
mdsthreshold, and so does not allocate a struct nfs4_threshold.
The problem is that encode_getfattr_open() will still request an
mdsthreshold, and so we end up Oopsing in decode_attr_mdsthreshold.

This patch fixes encode_getfattr_open so that it doesn't request an
mdsthreshold when the caller isn't asking for one. It also fixes
decode_attr_mdsthreshold so that it errors if the server returns
an mdsthreshold that we didn't ask for (instead of Oopsing).

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Andy Adamson <andros@netapp.com>
---
 include/linux/nfs_xdr.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index d1a7bf51c326..7519baef025b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -348,6 +348,7 @@ struct nfs_openargs {
 	const struct qstr *	name;
 	const struct nfs_server *server;	 /* Needed for ID mapping */
 	const u32 *		bitmask;
+	const u32 *		open_bitmap;
 	__u32			claim;
 	struct nfs4_sequence_args	seq_args;
 };
-- 
cgit v1.2.3


From fffaee365fded09f9ebf2db19066065fa54323c3 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 5 Jun 2012 21:36:33 +0400
Subject: radix-tree: fix contiguous iterator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes bug in macro radix_tree_for_each_contig().

If radix_tree_next_slot() sees NULL in next slot it returns NULL, but following
radix_tree_next_chunk() switches iterating into next chunk. As result iterating
becomes non-contiguous and breaks vfs "splice" and all its users.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Reported-and-bisected-by: Hans de Bruin <jmdebruin@xmsnet.nl>
Reported-and-bisected-by: Ondrej Zary <linux@rainbow-software.org>
Reported-bisected-and-tested-by: Toralf Förster <toralf.foerster@gmx.de>
Link: https://lkml.org/lkml/2012/6/5/64
Cc: stable <stable@vger.kernel.org> # 3.4.x
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 0d04cd69ab9b..ffc444c38b0a 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -368,8 +368,11 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
 			iter->index++;
 			if (likely(*slot))
 				return slot;
-			if (flags & RADIX_TREE_ITER_CONTIG)
+			if (flags & RADIX_TREE_ITER_CONTIG) {
+				/* forbid switching to the next chunk */
+				iter->next_index = 0;
 				break;
+			}
 		}
 	}
 	return NULL;
-- 
cgit v1.2.3


From 9bce008bae8b57bc7b007bcc2071d1247a527120 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Jun 2012 18:32:03 -0400
Subject: NFS: Fix a commit bug

The new commit code fails to copy the verifier into the wb_verf field
of _all_ the nfs_page structures; it only copies it into the first entry.
The consequence is that most requests end up failing to match in
nfs_commit_release.

Fix is to copy the verifier into the req->wb_verf field in
nfs_write_completion.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Fred Isaman <iisaman@netapp.com>
---
 include/linux/nfs_xdr.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 7519baef025b..8aadd90b808a 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1237,6 +1237,7 @@ struct nfs_pgio_header {
 	struct list_head	rpc_list;
 	atomic_t		refcnt;
 	struct nfs_page		*req;
+	struct nfs_writeverf	*verf;
 	struct pnfs_layout_segment *lseg;
 	loff_t			io_start;
 	const struct rpc_call_ops *mds_ops;
@@ -1274,6 +1275,7 @@ struct nfs_write_data {
 struct nfs_write_header {
 	struct nfs_pgio_header	header;
 	struct nfs_write_data	rpc_data;
+	struct nfs_writeverf	verf;
 };
 
 struct nfs_mds_commit_info {
-- 
cgit v1.2.3


From 2a0fe914a38745f5b03534c4e4f4056cbd6978b8 Mon Sep 17 00:00:00 2001
From: Yong Ding <yongd@marvell.com>
Date: Tue, 15 May 2012 13:09:43 +0800
Subject: mmc: sdio: fix setting card data bus width as 4-bit

SDIO_CCCR_IF[1:0] in SDIO card is used for card data bus width
setting as below:

     00b: 1-bit bus
     01b: Reserved
     10b: 4-bit bus
     11b: 8-bit bus (only for embedded SDIO)

And sdio_enable_wide is for setting data bus width as 4-bit.
But currently, it first reads the register, second OR' 1b with
SDIO_CCCR_IF[1], and then writes it back.

As we can see, this is based on such assumption that the
SDIO_CCCR_IF[0] is always 0. Apparently, this is not right.

Signed-off-by: Yong Ding <yongd@marvell.com>
Acked-by: Philip Rakity <prakity@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 include/linux/mmc/sdio.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h
index c9fe66c58f8f..17446d3c3602 100644
--- a/include/linux/mmc/sdio.h
+++ b/include/linux/mmc/sdio.h
@@ -98,7 +98,9 @@
 
 #define SDIO_CCCR_IF		0x07	/* bus interface controls */
 
+#define  SDIO_BUS_WIDTH_MASK	0x03	/* data bus width setting */
 #define  SDIO_BUS_WIDTH_1BIT	0x00
+#define  SDIO_BUS_WIDTH_RESERVED 0x01
 #define  SDIO_BUS_WIDTH_4BIT	0x02
 #define  SDIO_BUS_ECSI		0x20	/* Enable continuous SPI interrupt */
 #define  SDIO_BUS_SCSI		0x40	/* Support continuous SPI interrupt */
-- 
cgit v1.2.3


From c1174876874dcf8986806e4dad3d7d07af20b439 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 31 May 2012 14:47:33 +0200
Subject: sched: Fix domain iteration

Weird topologies can lead to asymmetric domain setups. This needs
further consideration since these setups are typically non-minimal
too.

For now, make it work by adding an extra mask selecting which CPUs
are allowed to iterate up.

The topology that triggered it is the one from David Rientjes:

	10 20 20 30
	20 10 20 20
	20 20 10 20
	30 20 20 10

resulting in boxes that wouldn't even boot.

Reported-by: David Rientjes <rientjes@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-3p86l9cuaqnxz7uxsojmz5rm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6029d8c54476..ac321d753470 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -876,6 +876,8 @@ struct sched_group_power {
 	 * Number of busy cpus in this group.
 	 */
 	atomic_t nr_busy_cpus;
+
+	unsigned long cpumask[0]; /* iteration mask */
 };
 
 struct sched_group {
@@ -900,6 +902,15 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
 	return to_cpumask(sg->cpumask);
 }
 
+/*
+ * cpumask masking which cpus in the group are allowed to iterate up the domain
+ * tree.
+ */
+static inline struct cpumask *sched_group_mask(struct sched_group *sg)
+{
+	return to_cpumask(sg->sgp->cpumask);
+}
+
 /**
  * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
  * @group: The group whose first cpu is to be returned.
-- 
cgit v1.2.3


From 0b0d9cf6ec7bab91977da2d71c09157f110f7c2e Mon Sep 17 00:00:00 2001
From: Arun Sharma <asharma@fb.com>
Date: Fri, 20 Apr 2012 15:41:34 -0700
Subject: perf: Limit callchains to 127

Stack depth of 255 seems excessive, given that copy_from_user_nmi()
could be slow.

Signed-off-by: Arun Sharma <asharma@fb.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1334961696-19580-3-git-send-email-asharma@fb.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1817d4015e5f..45db49f64bb4 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -555,7 +555,7 @@ enum perf_event_type {
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
-#define PERF_MAX_STACK_DEPTH		255
+#define PERF_MAX_STACK_DEPTH		127
 
 enum perf_callchain_context {
 	PERF_CONTEXT_HV			= (__u64)-32,
-- 
cgit v1.2.3


From aa9b16306e3243229580ff889cc59fd66bf77973 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Thu, 10 May 2012 16:41:44 -0700
Subject: rcu: Precompute RCU_FAST_NO_HZ timer offsets

When a CPU is entering dyntick-idle mode, tick_nohz_stop_sched_tick()
calls rcu_needs_cpu() see if RCU needs that CPU, and, if not, computes the
next wakeup time based on the timer wheels.  Only later, when actually
entering the idle loop, rcu_prepare_for_idle() will be invoked.  In some
cases, rcu_prepare_for_idle() will post timers to wake the CPU back up.
But all for naught: The next wakeup time for the CPU has already been
computed, and posting a timer afterwards does not force that wakeup
time to be recomputed.  This means that rcu_prepare_for_idle()'s have
no effect.

This is not a problem on a busy system because something else will wake
up the CPU soon enough.  However, on lightly loaded systems, the CPU
might stay asleep for a considerable length of time.  If that CPU has
a callback that the rest of the system is waiting on, the system might
run very slowly or (in theory) even hang.

This commit avoids this problem by having rcu_needs_cpu() give
tick_nohz_stop_sched_tick() an estimate of when RCU will need the CPU
to wake back up, which tick_nohz_stop_sched_tick() takes into account
when programming the CPU's wakeup time.  An alternative approach is
for rcu_prepare_for_idle() to use hrtimers instead of normal timers,
but timers are much more efficient than are hrtimers for frequently
and repeatedly posting and cancelling a given timer, which is exactly
what RCU_FAST_NO_HZ does.

Reported-by: Pascal Chapperon <pascal.chapperon@wanadoo.fr>
Reported-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Tested-by: Pascal Chapperon <pascal.chapperon@wanadoo.fr>
---
 include/linux/rcutiny.h | 6 ++++--
 include/linux/rcutree.h | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index adb5e5a38cae..854dc4c5c271 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -87,8 +87,9 @@ static inline void kfree_call_rcu(struct rcu_head *head,
 
 #ifdef CONFIG_TINY_RCU
 
-static inline int rcu_needs_cpu(int cpu)
+static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
 {
+	*delta_jiffies = ULONG_MAX;
 	return 0;
 }
 
@@ -96,8 +97,9 @@ static inline int rcu_needs_cpu(int cpu)
 
 int rcu_preempt_needs_cpu(void);
 
-static inline int rcu_needs_cpu(int cpu)
+static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
 {
+	*delta_jiffies = ULONG_MAX;
 	return rcu_preempt_needs_cpu();
 }
 
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 3c6083cde4fc..952b79339304 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -32,7 +32,7 @@
 
 extern void rcu_init(void);
 extern void rcu_note_context_switch(int cpu);
-extern int rcu_needs_cpu(int cpu);
+extern int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies);
 extern void rcu_cpu_stall_reset(void);
 
 /*
-- 
cgit v1.2.3


From d1992b169d31f339dc5ea4e9f312567c8cf322a3 Mon Sep 17 00:00:00 2001
From: Hans Schillstrom <hans@schillstrom.com>
Date: Thu, 17 May 2012 22:35:46 +0000
Subject: netfilter: xt_HMARK: fix endianness and provide consistent hashing

This patch addresses two issues:

a) Fix usage of u32 and __be32 that causes endianess warnings via sparse.
b) Ensure consistent hashing in a cluster that is composed of big and
   little endian systems. Thus, we obtain the same hash mark in an
   heterogeneous cluster.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/xt_HMARK.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/xt_HMARK.h b/include/linux/netfilter/xt_HMARK.h
index abb1650940d2..826fc5807577 100644
--- a/include/linux/netfilter/xt_HMARK.h
+++ b/include/linux/netfilter/xt_HMARK.h
@@ -27,7 +27,12 @@ union hmark_ports {
 		__u16	src;
 		__u16	dst;
 	} p16;
+	struct {
+		__be16	src;
+		__be16	dst;
+	} b16;
 	__u32	v32;
+	__be32	b32;
 };
 
 struct xt_hmark_info {
-- 
cgit v1.2.3


From bafb282df29c1524b1617019adebd6d0c3eb7a47 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Thu, 7 Jun 2012 14:21:11 -0700
Subject: c/r: prctl: update prctl_set_mm_exe_file() after
 mm->num_exe_file_vmas removal

A fix for commit b32dfe377102 ("c/r: prctl: add ability to set new
mm_struct::exe_file").

After removing mm->num_exe_file_vmas kernel keeps mm->exe_file until
final mmput(), it never becomes NULL while task is alive.

We can check for other mapped files in mm instead of checking
mm->num_exe_file_vmas, and mark mm with flag MMF_EXE_FILE_CHANGED in
order to forbid second changing of mm->exe_file.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6029d8c54476..c688d4cc2e40 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -439,6 +439,7 @@ extern int get_dumpable(struct mm_struct *mm);
 					/* leave room for more dump flags */
 #define MMF_VM_MERGEABLE	16	/* KSM may merge identical pages */
 #define MMF_VM_HUGEPAGE		17	/* set when VM_HUGEPAGE is set on vma */
+#define MMF_EXE_FILE_CHANGED	18	/* see prctl_set_mm_exe_file() */
 
 #define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
 
-- 
cgit v1.2.3


From 300f786b2683f8bb1ec0afb6e1851183a479c86d Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Thu, 7 Jun 2012 14:21:12 -0700
Subject: c/r: prctl: add ability to get clear_tid_address

Zero is written at clear_tid_address when the process exits.  This
functionality is used by pthread_join().

We already have sys_set_tid_address() to change this address for the
current task but there is no way to obtain it from user space.

Without the ability to find this address and dump it we can't restore
pthread'ed apps which call pthread_join() once they have been restored.

This patch introduces the PR_GET_TID_ADDRESS prctl option which allows
the current process to obtain own clear_tid_address.

This feature is available iif CONFIG_CHECKPOINT_RESTORE is set.

[akpm@linux-foundation.org: fix prctl numbering]
Signed-off-by: Andrew Vagin <avagin@openvz.org>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Pedro Alves <palves@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Tejun Heo <tj@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/prctl.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index 711e0a30aacc..3988012255dc 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -127,8 +127,8 @@
 #define PR_SET_PTRACER 0x59616d61
 # define PR_SET_PTRACER_ANY ((unsigned long)-1)
 
-#define PR_SET_CHILD_SUBREAPER 36
-#define PR_GET_CHILD_SUBREAPER 37
+#define PR_SET_CHILD_SUBREAPER	36
+#define PR_GET_CHILD_SUBREAPER	37
 
 /*
  * If no_new_privs is set, then operations that grant new privileges (i.e.
@@ -142,7 +142,9 @@
  * asking selinux for a specific new context (e.g. with runcon) will result
  * in execve returning -EPERM.
  */
-#define PR_SET_NO_NEW_PRIVS 38
-#define PR_GET_NO_NEW_PRIVS 39
+#define PR_SET_NO_NEW_PRIVS	38
+#define PR_GET_NO_NEW_PRIVS	39
+
+#define PR_GET_TID_ADDRESS	40
 
 #endif /* _LINUX_PRCTL_H */
-- 
cgit v1.2.3


From ae82fdb1406ad41d68f07027fe31f2d35ba22a90 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 8 Jun 2012 14:58:13 +0930
Subject: module_param: stop double-calling parameters.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 026cee0086fe1df4cf74691cf273062cc769617d "params:
<level>_initcall-like kernel parameters" set old-style module
parameters to level 0.  And we call those level 0 calls where we used
to, early in start_kernel().

We also loop through the initcall levels and call the levelled
module_params before the corresponding initcall.  Unfortunately level
0 is early_init(), so we call the standard module_param calls twice.

(Turns out most things don't care, but at least ubi.mtd does).

Change the level to -1 for standard module_param calls.

Reported-by: Benoît Thébaudeau <benoit.thebaudeau@advansee.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: stable@kernel.org
---
 include/linux/moduleparam.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 1b14d25162cb..d6a58065c09c 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -128,7 +128,7 @@ struct kparam_array
  * The ops can have NULL set or get functions.
  */
 #define module_param_cb(name, ops, arg, perm)				      \
-	__module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, 0)
+	__module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1)
 
 /**
  * <level>_param_cb - general callback for a module/cmdline parameter
@@ -192,7 +192,7 @@ struct kparam_array
 		 { (void *)set, (void *)get };				\
 	__module_param_call(MODULE_PARAM_PREFIX,			\
 			    name, &__param_ops_##name, arg,		\
-			    (perm) + sizeof(__check_old_set_param(set))*0, 0)
+			    (perm) + sizeof(__check_old_set_param(set))*0, -1)
 
 /* We don't get oldget: it's often a new-style param_get_uint, etc. */
 static inline int
@@ -272,7 +272,7 @@ static inline void __kernel_param_unlock(void)
  */
 #define core_param(name, var, type, perm)				\
 	param_check_##type(name, &(var));				\
-	__module_param_call("", name, &param_ops_##type, &var, perm, 0)
+	__module_param_call("", name, &param_ops_##type, &var, perm, -1)
 #endif /* !MODULE */
 
 /**
@@ -290,7 +290,7 @@ static inline void __kernel_param_unlock(void)
 		= { len, string };					\
 	__module_param_call(MODULE_PARAM_PREFIX, name,			\
 			    &param_ops_string,				\
-			    .str = &__param_string_##name, perm, 0);	\
+			    .str = &__param_string_##name, perm, -1);	\
 	__MODULE_PARM_TYPE(name, "string")
 
 /**
@@ -432,7 +432,7 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp);
 	__module_param_call(MODULE_PARAM_PREFIX, name,			\
 			    &param_array_ops,				\
 			    .arr = &__param_arr_##name,			\
-			    perm, 0);					\
+			    perm, -1);					\
 	__MODULE_PARM_TYPE(name, "array of " #type)
 
 extern struct kernel_param_ops param_array_ops;
-- 
cgit v1.2.3


From c8e9cf7bb240049117d2fa64d1540476c289396d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 7 Jun 2012 12:15:15 +0200
Subject: vga_switcheroo: Add a helper function to get the client state

Add vga_switcheroo_get_client_state() to get the current state of the
client.  This is necessary to determine the proper initial state of
audio clients in HD-audio driver.

Acked-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/vga_switcheroo.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index b455c7c212eb..b176342ca031 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -12,6 +12,9 @@
 enum vga_switcheroo_state {
 	VGA_SWITCHEROO_OFF,
 	VGA_SWITCHEROO_ON,
+	/* below are referred only from vga_switcheroo_get_client_state() */
+	VGA_SWITCHEROO_INIT,
+	VGA_SWITCHEROO_NOT_FOUND,
 };
 
 enum vga_switcheroo_client_id {
@@ -50,6 +53,8 @@ void vga_switcheroo_unregister_handler(void);
 
 int vga_switcheroo_process_delayed_switch(void);
 
+int vga_switcheroo_get_client_state(struct pci_dev *dev);
+
 #else
 
 static inline void vga_switcheroo_unregister_client(struct pci_dev *dev) {}
@@ -62,5 +67,7 @@ static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 	int id, bool active) { return 0; }
 static inline void vga_switcheroo_unregister_handler(void) {}
 static inline int vga_switcheroo_process_delayed_switch(void) { return 0; }
+static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_CLIENT_ON; }
+
 
 #endif
-- 
cgit v1.2.3


From 505cff00de9c303b95c204eb4544066e3e707911 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 8 Jun 2012 12:49:17 +0200
Subject: vga_switcheroo: Fix error without CONFIG_VGA_SWITCHEROO

Fix a typo that is built only when CONFIG_VGA_SWITCHEROO=n.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/vga_switcheroo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index b176342ca031..60da41fe9dc2 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -67,7 +67,7 @@ static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 	int id, bool active) { return 0; }
 static inline void vga_switcheroo_unregister_handler(void) {}
 static inline int vga_switcheroo_process_delayed_switch(void) { return 0; }
-static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_CLIENT_ON; }
+static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; }
 
 
 #endif
-- 
cgit v1.2.3


From 8876d6b5f81f4e242a6660da22bbd92f17a8d058 Mon Sep 17 00:00:00 2001
From: Paul Pluzhnikov <ppluzhnikov@google.com>
Date: Sat, 9 Jun 2012 07:53:03 -0700
Subject: net: Make linux/tcp.h C++ friendly (trivial)

I originally sent this patch to <trivial@kernel.org>, but Jiri Kosina did
not feel that this is fully appropriate for the trivial tree.

Using linux/tcp.h from C++ results in:

cat t.cc
#include <linux/tcp.h>
int main() { }

g++ -c t.cc

In file included from t.cc:1:
/usr/include/linux/tcp.h:72: error: '__u32 __fswab32(__u32)' cannot appear in a constant-expression
/usr/include/linux/tcp.h:72: error: a function call cannot appear in a constant-expression
...

Attached trivial patch fixes this problem.

Tested:
- the t.cc above compiles with g++ and
- the following program generates the same output before/after
  the patch:

#include <linux/tcp.h>
#include <stdio.h>

int main ()
{
#define P(a) printf("%s: %08x\n", #a, (int)a)
 P(TCP_FLAG_CWR);
 P(TCP_FLAG_ECE);
 P(TCP_FLAG_URG);
 P(TCP_FLAG_ACK);
 P(TCP_FLAG_PSH);
 P(TCP_FLAG_RST);
 P(TCP_FLAG_SYN);
 P(TCP_FLAG_FIN);
 P(TCP_RESERVED_BITS);
 P(TCP_DATA_OFFSET);
#undef P
 return 0;
}

Signed-off-by: Paul Pluzhnikov <ppluzhnikov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4c5b63283377..5f359dbfcdce 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -69,16 +69,16 @@ union tcp_word_hdr {
 #define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) 
 
 enum { 
-	TCP_FLAG_CWR = __cpu_to_be32(0x00800000),
-	TCP_FLAG_ECE = __cpu_to_be32(0x00400000),
-	TCP_FLAG_URG = __cpu_to_be32(0x00200000),
-	TCP_FLAG_ACK = __cpu_to_be32(0x00100000),
-	TCP_FLAG_PSH = __cpu_to_be32(0x00080000),
-	TCP_FLAG_RST = __cpu_to_be32(0x00040000),
-	TCP_FLAG_SYN = __cpu_to_be32(0x00020000),
-	TCP_FLAG_FIN = __cpu_to_be32(0x00010000),
-	TCP_RESERVED_BITS = __cpu_to_be32(0x0F000000),
-	TCP_DATA_OFFSET = __cpu_to_be32(0xF0000000)
+	TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000),
+	TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000),
+	TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000),
+	TCP_FLAG_ACK = __constant_cpu_to_be32(0x00100000),
+	TCP_FLAG_PSH = __constant_cpu_to_be32(0x00080000),
+	TCP_FLAG_RST = __constant_cpu_to_be32(0x00040000),
+	TCP_FLAG_SYN = __constant_cpu_to_be32(0x00020000),
+	TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000),
+	TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000),
+	TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000)
 }; 
 
 /*
-- 
cgit v1.2.3


From 601722157b3f6be73623644eeae6f14940f0bd8f Mon Sep 17 00:00:00 2001
From: Qiao Zhou <zhouqiao@marvell.com>
Date: Mon, 4 Jun 2012 10:41:03 +0800
Subject: ARM: MMP: add pxa910-ssp into ssp_id_table

add pxa910-ssp into ssp_id_table, and fix pxa-ssp compiling issue
under mach-mmp architect.

Signed-off-by: Qiao Zhou <zhouqiao@marvell.com>
Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/pxa2xx_ssp.h     | 1 +
 include/linux/spi/pxa2xx_spi.h | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 44835fb39793..f9fe15ec2b51 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -161,6 +161,7 @@ enum pxa_ssp_type {
 	PXA25x_NSSP, /* pxa 255, 26x (including ASSP) */
 	PXA27x_SSP,
 	PXA168_SSP,
+	PXA910_SSP,
 	CE4100_SSP,
 };
 
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index d3e1075f7b60..c73d1445c77e 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -43,7 +43,7 @@ struct pxa2xx_spi_chip {
 	void (*cs_control)(u32 command);
 };
 
-#ifdef CONFIG_ARCH_PXA
+#if defined(CONFIG_ARCH_PXA) || defined(CONFIG_ARCH_MMP)
 
 #include <linux/clk.h>
 #include <mach/dma.h>
-- 
cgit v1.2.3


From 972a55b62d592cfcd6d73577df8a52f1251ea9a7 Mon Sep 17 00:00:00 2001
From: Qiao Zhou <zhouqiao@marvell.com>
Date: Mon, 4 Jun 2012 10:41:04 +0800
Subject: ASoC: fix pxa-ssp compiling issue under mach-mmp

pxa-ssp.c uses API like cpu_is_pxa3xx(), cpu_is_pxa2xx(), which is
defined under arch-pxa architecture, and drivers under mach-mmp
can't find it. so just use ssp->type to replace that API.

Signed-off-by: Qiao Zhou <zhouqiao@marvell.com>
Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/pxa2xx_ssp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index f9fe15ec2b51..f36632061c66 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -160,6 +160,7 @@ enum pxa_ssp_type {
 	PXA25x_SSP,  /* pxa 210, 250, 255, 26x */
 	PXA25x_NSSP, /* pxa 255, 26x (including ASSP) */
 	PXA27x_SSP,
+	PXA3xx_SSP,
 	PXA168_SSP,
 	PXA910_SSP,
 	CE4100_SSP,
-- 
cgit v1.2.3


From c2fb8a3fa25513de8fedb38509b1f15a5bbee47b Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 13 Jun 2012 11:20:19 -0400
Subject: USB: add NO_D3_DURING_SLEEP flag and revert 151b61284776be2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch (as1558) fixes a problem affecting several ASUS computers:
The machine crashes or corrupts memory when going into suspend if the
ehci-hcd driver is bound to any controllers.  Users have been forced
to unbind or unload ehci-hcd before putting their systems to sleep.

After extensive testing, it was determined that the machines don't
like going into suspend when any EHCI controllers are in the PCI D3
power state.  Presumably this is a firmware bug, but there's nothing
we can do about it except to avoid putting the controllers in D3
during system sleep.

The patch adds a new flag to indicate whether the problem is present,
and avoids changing the controller's power state if the flag is set.
Runtime suspend is unaffected; this matters only for system suspend.
However as a side effect, the controller will not respond to remote
wakeup requests while the system is asleep.  Hence USB wakeup is not
functional -- but of course, this is already true in the current state
of affairs.

A similar patch has already been applied as commit
151b61284776be2d6f02d48c23c3625678960b97 (USB: EHCI: fix crash during
suspend on ASUS computers).  The patch supersedes that one and reverts
it.  There are two differences:

	The old patch added the flag at the USB level; this patch
	adds it at the PCI level.

	The old patch applied to all chipsets with the same vendor,
	subsystem vendor, and product IDs; this patch makes an
	exception for a known-good system (based on DMI information).

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Tested-by: Dâniel Fraga <fragabr@gmail.com>
Tested-by: Andrey Rahmatullin <wrar@wrar.name>
Tested-by: Steven Rostedt <rostedt@goodmis.org>
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/pci.h     | 2 ++
 include/linux/usb/hcd.h | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index d8c379dba6ad..fefb4e19bf6a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -176,6 +176,8 @@ enum pci_dev_flags {
 	PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2,
 	/* Provide indication device is assigned by a Virtual Machine Manager */
 	PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) 4,
+	/* Device causes system crash if in D3 during S3 sleep */
+	PCI_DEV_FLAGS_NO_D3_DURING_SLEEP = (__force pci_dev_flags_t) 8,
 };
 
 enum pci_irq_reroute_variant {
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 7f855d50cdf5..49b3ac29726a 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -126,8 +126,6 @@ struct usb_hcd {
 	unsigned		wireless:1;	/* Wireless USB HCD */
 	unsigned		authorized_default:1;
 	unsigned		has_tt:1;	/* Integrated TT in root hub */
-	unsigned		broken_pci_sleep:1;	/* Don't put the
-			controller in PCI-D3 for system sleep */
 
 	unsigned int		irq;		/* irq allocated */
 	void __iomem		*regs;		/* device memory/io */
-- 
cgit v1.2.3


From 201e4aca5aa179e6c69a4dcd36a3562e56b8d670 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <anton.vorontsov@linaro.org>
Date: Sat, 26 May 2012 06:07:49 -0700
Subject: pstore/ram: Should update old dmesg buffer before reading

Without the update, we'll only see the new dmesg buffer after the
reboot, but previously we could see it right away. Making an oops
visible in pstore filesystem before reboot is a somewhat dubious
feature, but removing it wasn't an intentional change, so let's
restore it.

For this we have to make persistent_ram_save_old() safe for calling
multiple times, and also extern it.

Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/pstore_ram.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index 7ed7fd4dba49..4491e8ff36e6 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -75,6 +75,7 @@ struct persistent_ram_zone *persistent_ram_init_ringbuffer(struct device *dev,
 int persistent_ram_write(struct persistent_ram_zone *prz, const void *s,
 	unsigned int count);
 
+void persistent_ram_save_old(struct persistent_ram_zone *prz);
 size_t persistent_ram_old_size(struct persistent_ram_zone *prz);
 void *persistent_ram_old(struct persistent_ram_zone *prz);
 void persistent_ram_free_old(struct persistent_ram_zone *prz);
-- 
cgit v1.2.3


From fce397930475f7efc712a1345dc0dad269a10544 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <anton.vorontsov@linaro.org>
Date: Sat, 26 May 2012 06:07:51 -0700
Subject: pstore/ram_core: Factor persistent_ram_zap() out of post_init()

A handy function that we will use outside of ram_core soon. But
so far just factor it out and start using it in post_init().

Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/pstore_ram.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index 4491e8ff36e6..3b823d49a85a 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -69,6 +69,7 @@ struct persistent_ram_zone * __init persistent_ram_new(phys_addr_t start,
 						       size_t size,
 						       bool ecc);
 void persistent_ram_free(struct persistent_ram_zone *prz);
+void persistent_ram_zap(struct persistent_ram_zone *prz);
 struct persistent_ram_zone *persistent_ram_init_ringbuffer(struct device *dev,
 		bool ecc);
 
-- 
cgit v1.2.3


From e2ae715d66bf4becfb85eb84b7150e23cf27df30 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay@vrfy.org>
Date: Fri, 15 Jun 2012 14:07:51 +0200
Subject: kmsg - kmsg_dump() use iterator to receive log buffer content

Provide an iterator to receive the log buffer content, and convert all
kmsg_dump() users to it.

The structured data in the kmsg buffer now contains binary data, which
should no longer be copied verbatim to the kmsg_dump() users.

The iterator should provide reliable access to the buffer data, and also
supports proper log line-aware chunking of data while iterating.

Signed-off-by: Kay Sievers <kay@vrfy.org>
Tested-by: Tony Luck <tony.luck@intel.com>
Reported-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Tested-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kmsg_dump.h | 45 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 38 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index 35f7237ec972..af4eb5a39d9a 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -21,6 +21,7 @@
  * is passed to the kernel.
  */
 enum kmsg_dump_reason {
+	KMSG_DUMP_UNDEF,
 	KMSG_DUMP_PANIC,
 	KMSG_DUMP_OOPS,
 	KMSG_DUMP_EMERG,
@@ -31,23 +32,37 @@ enum kmsg_dump_reason {
 
 /**
  * struct kmsg_dumper - kernel crash message dumper structure
- * @dump:	The callback which gets called on crashes. The buffer is passed
- * 		as two sections, where s1 (length l1) contains the older
- * 		messages and s2 (length l2) contains the newer.
  * @list:	Entry in the dumper list (private)
+ * @dump:	Call into dumping code which will retrieve the data with
+ * 		through the record iterator
+ * @max_reason:	filter for highest reason number that should be dumped
  * @registered:	Flag that specifies if this is already registered
  */
 struct kmsg_dumper {
-	void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason,
-			const char *s1, unsigned long l1,
-			const char *s2, unsigned long l2);
 	struct list_head list;
-	int registered;
+	void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason);
+	enum kmsg_dump_reason max_reason;
+	bool active;
+	bool registered;
+
+	/* private state of the kmsg iterator */
+	u32 cur_idx;
+	u32 next_idx;
+	u64 cur_seq;
+	u64 next_seq;
 };
 
 #ifdef CONFIG_PRINTK
 void kmsg_dump(enum kmsg_dump_reason reason);
 
+bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
+			char *line, size_t size, size_t *len);
+
+bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
+			  char *buf, size_t size, size_t *len);
+
+void kmsg_dump_rewind(struct kmsg_dumper *dumper);
+
 int kmsg_dump_register(struct kmsg_dumper *dumper);
 
 int kmsg_dump_unregister(struct kmsg_dumper *dumper);
@@ -56,6 +71,22 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason)
 {
 }
 
+bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
+			  const char *line, size_t size, size_t *len)
+{
+	return false;
+}
+
+bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
+			    char *buf, size_t size, size_t *len)
+{
+	return false;
+}
+
+void kmsg_dump_rewind(struct kmsg_dumper *dumper)
+{
+}
+
 static inline int kmsg_dump_register(struct kmsg_dumper *dumper)
 {
 	return -EINVAL;
-- 
cgit v1.2.3


From 9b15b817f3d62409290fd56fe3cbb076a931bb0a Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 15 Jun 2012 17:55:50 -0700
Subject: swap: fix shmem swapping when more than 8 areas

Minchan Kim reports that when a system has many swap areas, and tmpfs
swaps out to the ninth or more, shmem_getpage_gfp()'s attempts to read
back the page cannot locate it, and the read fails with -ENOMEM.

Whoops.  Yes, I blindly followed read_swap_header()'s pte_to_swp_entry(
swp_entry_to_pte()) technique for determining maximum usable swap
offset, without stopping to realize that that actually depends upon the
pte swap encoding shifting swap offset to the higher bits and truncating
it there.  Whereas our radix_tree swap encoding leaves offset in the
lower bits: it's swap "type" (that is, index of swap area) that was
truncated.

Fix it by reducing the SWP_TYPE_SHIFT() in swapops.h, and removing the
broken radix_to_swp_entry(swp_to_radix_entry()) from read_swap_header().

This does not reduce the usable size of a swap area any further, it
leaves it as claimed when making the original commit: no change from 3.0
on x86_64, nor on i386 without PAE; but 3.0's 512GB is reduced to 128GB
per swapfile on i386 with PAE.  It's not a change I would have risked
five years ago, but with x86_64 supported for ten years, I believe it's
appropriate now.

Hmm, and what if some architecture implements its swap pte with offset
encoded below type? That would equally break the maximum usable swap
offset check.  Happily, they all follow the same tradition of encoding
offset above type, but I'll prepare a check on that for next.

Reported-and-Reviewed-and-Tested-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: stable@vger.kernel.org [3.1, 3.2, 3.3, 3.4]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swapops.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 792d16d9cbc7..47ead515c811 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -9,13 +9,15 @@
  * get good packing density in that tree, so the index should be dense in
  * the low-order bits.
  *
- * We arrange the `type' and `offset' fields so that `type' is at the five
+ * We arrange the `type' and `offset' fields so that `type' is at the seven
  * high-order bits of the swp_entry_t and `offset' is right-aligned in the
- * remaining bits.
+ * remaining bits.  Although `type' itself needs only five bits, we allow for
+ * shmem/tmpfs to shift it all up a further two bits: see swp_to_radix_entry().
  *
  * swp_entry_t's are *never* stored anywhere in their arch-dependent format.
  */
-#define SWP_TYPE_SHIFT(e)	(sizeof(e.val) * 8 - MAX_SWAPFILES_SHIFT)
+#define SWP_TYPE_SHIFT(e)	((sizeof(e.val) * 8) - \
+			(MAX_SWAPFILES_SHIFT + RADIX_TREE_EXCEPTIONAL_SHIFT))
 #define SWP_OFFSET_MASK(e)	((1UL << SWP_TYPE_SHIFT(e)) - 1)
 
 /*
-- 
cgit v1.2.3


From f8fee8f5acb5c3f82e02f2ae139a6f1e7b4eb583 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Fri, 15 Jun 2012 12:46:17 -0700
Subject: vga_switcheroo.h: fix pci_dev warning

Fix warnings on some architectures/configs (not on x86):

include/linux/vga_switcheroo.h:28:30: warning: 'struct pci_dev' declared inside parameter list [enabled by default]
include/linux/vga_switcheroo.h:28:30: warning: its scope is only this definition or declaration, which is probably not what you want [enabled by default]

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc:	Takashi Iwai <tiwai@suse.de>
Reported-by:	Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/vga_switcheroo.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index 60da41fe9dc2..d844b7790ea6 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -9,6 +9,8 @@
 
 #include <linux/fb.h>
 
+struct pci_dev;
+
 enum vga_switcheroo_state {
 	VGA_SWITCHEROO_OFF,
 	VGA_SWITCHEROO_ON,
-- 
cgit v1.2.3


From 2a4c8994eeef50796015f8a2005e4a75c1929166 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 14 Jun 2012 13:08:38 -0400
Subject: NFSv4.1: Fix umount when filelayout DS is also the MDS

Currently there is a 'chicken and egg' issue when the DS is also the mounted
MDS. The nfs_match_client() reference from nfs4_set_ds_client bumps the
cl_count, the nfs_client is not freed at umount, and nfs4_deviceid_purge_client
is not called to dereference the MDS usage of a deviceid which holds a
reference to the DS nfs_client.  The result is the umount program returns,
but the nfs_client is not freed, and the cl_session hearbeat continues.

The MDS (and all other nfs mounts) lose their last nfs_client reference in
nfs_free_server when the last nfs_server (fsid) is umounted.
The file layout DS lose their last nfs_client reference in destroy_ds
when the last deviceid referencing the data server is put and destroy_ds is
called. This is triggered by a call to nfs4_deviceid_purge_client which
removes references to a pNFS deviceid used by an MDS mount.

The fix is to track how many pnfs enabled filesystems are mounted from
this server, and then to purge the device id cache once that count reaches
zero.

Reported-by: Jorge Mora <Jorge.Mora@netapp.com>
Reported-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs_sb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index fbb78fb09bd2..f58325a1d8fb 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -25,6 +25,7 @@ struct nfs41_impl_id;
  */
 struct nfs_client {
 	atomic_t		cl_count;
+	atomic_t		cl_mds_count;
 	int			cl_cons_state;	/* current construction state (-ve: init error) */
 #define NFS_CS_READY		0		/* ready to be used */
 #define NFS_CS_INITING		1		/* busy initialising */
-- 
cgit v1.2.3


From 93b3cca1ccd30b1ad290951a3fc7c10c73db7313 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 14 Jun 2012 10:54:28 -0400
Subject: ftrace: Make all inline tags also include notrace

Commit 5963e317b1e9d2a ("ftrace/x86: Do not change stacks in DEBUG when
calling lockdep") prevented lockdep calls from the int3 breakpoint handler
from reseting the stack if a function that was called was in the process
of being converted for tracing and had a breakpoint on it. The idea is,
before calling the lockdep code, do a load_idt() to the special IDT that
kept the breakpoint stack from reseting. This worked well as a quick fix
for this kernel release, until a certain config caused a lockup in the
function tracer start up tests.

Investigating it, I found that the load_idt that was used to prevent
the int3 from changing stacks was itself being traced!

Even though the config had CONFIG_OPTIMIZE_INLINING disabled, and
all 'inline' tags were set to always inline, there were still cases that
it did not inline! This was caused by CONFIG_PARAVIRT_GUEST, where it
would add a pointer to the native_load_idt() which made that function
to be traced.

Commit 45959ee7aa645815a ("ftrace: Do not function trace inlined functions")
only touched the 'inline' tags when CONFIG_OPMITIZE_INLINING was enabled.
PARAVIRT_GUEST shows that this was not enough and we need to also
mark always_inline with notrace as well.

Reported-by: Fengguang Wu <wfg@linux.intel.com>
Tested-by: Fengguang Wu <wfg@linux.intel.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/compiler-gcc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index e5834aa24b9e..6a6d7aefe12d 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -47,9 +47,9 @@
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \
     !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
-# define inline		inline		__attribute__((always_inline))
-# define __inline__	__inline__	__attribute__((always_inline))
-# define __inline	__inline	__attribute__((always_inline))
+# define inline		inline		__attribute__((always_inline)) notrace
+# define __inline__	__inline__	__attribute__((always_inline)) notrace
+# define __inline	__inline	__attribute__((always_inline)) notrace
 #else
 /* A lot of inline functions can cause havoc with function tracing */
 # define inline		inline		notrace
-- 
cgit v1.2.3


From 246f6f2ff2c5cf46ded6d06f11f63e38bad880d1 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay@vrfy.org>
Date: Tue, 19 Jun 2012 00:32:57 +0200
Subject: kmsg - kmsg_dump() fix CONFIG_PRINTK=n compilation

Signed-off-by: Kay Sievers <kay@vrfy.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Reported-by: Randy Dunlap <rdunlap@xenotime.net>
Reported-by: Fengguang Wu <wfg@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kmsg_dump.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index af4eb5a39d9a..d6bd50110ec2 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -71,19 +71,19 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason)
 {
 }
 
-bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
-			  const char *line, size_t size, size_t *len)
+static inline bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
+				const char *line, size_t size, size_t *len)
 {
 	return false;
 }
 
-bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
-			    char *buf, size_t size, size_t *len)
+static inline bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
+					char *buf, size_t size, size_t *len)
 {
 	return false;
 }
 
-void kmsg_dump_rewind(struct kmsg_dumper *dumper)
+static inline void kmsg_dump_rewind(struct kmsg_dumper *dumper)
 {
 }
 
-- 
cgit v1.2.3


From abca7c4965845924f65d40e0aa1092bdd895e314 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Wed, 20 Jun 2012 12:52:56 -0700
Subject: mm: fix slab->page _count corruption when using slub

On arches that do not support this_cpu_cmpxchg_double() slab_lock is used
to do atomic cmpxchg() on double word which contains page->_count.  The
page count can be changed from get_page() or put_page() without taking
slab_lock.  That corrupts page counter.

Fix it by moving page->_count out of cmpxchg_double data.  So that slub
does no change it while updating slub meta-data in struct page.

[akpm@linux-foundation.org: use standard comment layout, tweak comment text]
Reported-by: Amey Bhide <abhide@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index dad95bdd06d7..704a626d94a0 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -57,8 +57,18 @@ struct page {
 		};
 
 		union {
+#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
+	defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
 			/* Used for cmpxchg_double in slub */
 			unsigned long counters;
+#else
+			/*
+			 * Keep _count separate from slub cmpxchg_double data.
+			 * As the rest of the double word is protected by
+			 * slab_lock but _count is not.
+			 */
+			unsigned counters;
+#endif
 
 			struct {
 
-- 
cgit v1.2.3


From 10d8935f46e5028847b179757ecbf9238b13d129 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.linux@gmail.com>
Date: Wed, 20 Jun 2012 12:53:02 -0700
Subject: Viresh has moved

viresh.kumar@st.com email-id doesn't exist anymore as I have left the
company.  Replace ST's id with viresh.linux@gmail.com.

It also updates .mailmap file to fix address for 'git shortlog'

Signed-off-by: Viresh Kumar <viresh.linux@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmc/sdhci-spear.h     | 2 +-
 include/linux/pata_arasan_cf_data.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/sdhci-spear.h b/include/linux/mmc/sdhci-spear.h
index 5cdc96da9dd5..e78c0e236e9d 100644
--- a/include/linux/mmc/sdhci-spear.h
+++ b/include/linux/mmc/sdhci-spear.h
@@ -4,7 +4,7 @@
  * SDHCI declarations specific to ST SPEAr platform
  *
  * Copyright (C) 2010 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/include/linux/pata_arasan_cf_data.h b/include/linux/pata_arasan_cf_data.h
index a6ee9aa898bb..a7b4fc386e63 100644
--- a/include/linux/pata_arasan_cf_data.h
+++ b/include/linux/pata_arasan_cf_data.h
@@ -4,7 +4,7 @@
  * Arasan Compact Flash host controller platform data header file
  *
  * Copyright (C) 2011 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
-- 
cgit v1.2.3


From d3decf3a0c1d28c80c76be170373f4c7a7217f09 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ozan=20=C3=87a=C4=9Flayan?= <ozancag@gmail.com>
Date: Thu, 14 Jun 2012 15:02:35 +0300
Subject: vga_switcheroo: Add include guard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Guard vga_switcheroo.h against multiple inclusion.

Signed-off-by: Ozan Çağlayan <ozancag@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/vga_switcheroo.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index d844b7790ea6..ddb419cf4530 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -7,6 +7,9 @@
  * vga_switcheroo.h - Support for laptop with dual GPU using one set of outputs
  */
 
+#ifndef _LINUX_VGA_SWITCHEROO_H_
+#define _LINUX_VGA_SWITCHEROO_H_
+
 #include <linux/fb.h>
 
 struct pci_dev;
@@ -73,3 +76,4 @@ static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return
 
 
 #endif
+#endif /* _LINUX_VGA_SWITCHEROO_H_ */
-- 
cgit v1.2.3