From a9aa53df6e6c768fc0f25a7c80ba586b0290720a Mon Sep 17 00:00:00 2001 From: Simo Sorce Date: Thu, 29 Mar 2012 19:18:19 -0400 Subject: svcauth: remove unused define Signed-off-by: Simo Sorce --- include/linux/sunrpc/svcauth.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 548790e9113b..2e2af101b59c 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -16,7 +16,6 @@ #include #include -#define SVC_CRED_NGROUPS 32 struct svc_cred { uid_t cr_uid; gid_t cr_gid; -- cgit v1.2.3 From db3a35326362624dd4d8473e676d63afa52bedcc Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 28 Mar 2012 19:09:08 +0400 Subject: nfsd: add link to owner cache detail to svc_export structure Without info about owner cache datail it won't be able to find out, which per-net cache detail have to be. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- include/linux/nfsd/export.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index f85308e688fd..64455292bbba 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -103,6 +103,7 @@ struct svc_export { struct nfsd4_fs_locations ex_fslocs; int ex_nflavors; struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST]; + struct cache_detail *cd; }; /* an "export key" (expkey) maps a filehandlefragement to an -- cgit v1.2.3 From 71234978e81ee515c8025d087a197561b311c183 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 28 Mar 2012 19:09:15 +0400 Subject: nfsd: use cache detail pointer from svc_export structure on cache put Hard-coded pointer is redundant now and can be replaced. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- include/linux/nfsd/export.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 64455292bbba..485c2afa96f7 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -147,7 +147,7 @@ extern struct cache_detail svc_export_cache; static inline void exp_put(struct svc_export *exp) { - cache_put(&exp->h, &svc_export_cache); + cache_put(&exp->h, exp->cd); } static inline void exp_get(struct svc_export *exp) -- cgit v1.2.3 From e3f70eadb7dddfb5a2bb9afff7abfc6ee17a29d0 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Thu, 29 Mar 2012 18:54:33 +0400 Subject: Lockd: pass network namespace to creation and destruction routines v2: dereference of most probably already released nlm_host removed in nlmclnt_done() and reclaimer(). These routines are called from locks reclaimer() kernel thread. This thread works in "init_net" network context and currently relays on persence on lockd thread and it's per-net resources. Thus lockd_up() and lockd_down() can't relay on current network context. So let's pass corrent one into them. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- include/linux/lockd/bind.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h index 11a966e5f829..4d24d64578c4 100644 --- a/include/linux/lockd/bind.h +++ b/include/linux/lockd/bind.h @@ -54,7 +54,7 @@ extern void nlmclnt_done(struct nlm_host *host); extern int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl); -extern int lockd_up(void); -extern void lockd_down(void); +extern int lockd_up(struct net *net); +extern void lockd_down(struct net *net); #endif /* LINUX_LOCKD_BIND_H */ -- cgit v1.2.3 From b89109bef4a6a4a8ab5788778ee0addca0787870 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 11 Apr 2012 15:13:14 +0400 Subject: nfsd: pass network context to export caches init/shutdown routines These functions will be called from per-net operations. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- include/linux/nfsd/export.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 485c2afa96f7..375096c083d3 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -130,8 +130,8 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); /* * Function declarations */ -int nfsd_export_init(void); -void nfsd_export_shutdown(void); +int nfsd_export_init(struct net *); +void nfsd_export_shutdown(struct net *); void nfsd_export_flush(void); struct svc_export * rqst_exp_get_by_name(struct svc_rqst *, struct path *); -- cgit v1.2.3 From b3853e0ea1f2ef58f7e7c03e47819e2ae3766dea Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 11 Apr 2012 15:13:21 +0400 Subject: nfsd: make export cache allocated per network namespace context This patch also changes prototypes of nfsd_export_flush() and exp_rootfh(): network namespace parameter added. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- include/linux/nfsd/export.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 375096c083d3..565c2122993f 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -132,13 +132,13 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); */ int nfsd_export_init(struct net *); void nfsd_export_shutdown(struct net *); -void nfsd_export_flush(void); +void nfsd_export_flush(struct net *); struct svc_export * rqst_exp_get_by_name(struct svc_rqst *, struct path *); struct svc_export * rqst_exp_parent(struct svc_rqst *, struct path *); struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *); -int exp_rootfh(struct auth_domain *, +int exp_rootfh(struct net *, struct auth_domain *, char *path, struct knfsd_fh *, int maxsize); __be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); __be32 nfserrno(int errno); -- cgit v1.2.3 From e5f06f720eff24e32f1cc08ec03bcc8c4b2d2934 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 11 Apr 2012 15:13:28 +0400 Subject: nfsd: make expkey cache allocated per network namespace context This patch also changes svcauth_unix_purge() function: added network namespace as a parameter and thus loop over all networks was replaced by only one call for ip map cache purge. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- include/linux/nfsd/export.h | 2 -- include/linux/sunrpc/svcauth.h | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 565c2122993f..e33f747b173c 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -143,8 +143,6 @@ int exp_rootfh(struct net *, struct auth_domain *, __be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); __be32 nfserrno(int errno); -extern struct cache_detail svc_export_cache; - static inline void exp_put(struct svc_export *exp) { cache_put(&exp->h, exp->cd); diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 2e2af101b59c..2c54683b91de 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -130,7 +130,7 @@ extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *ne extern struct auth_domain *auth_domain_find(char *name); extern struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr); extern int auth_unix_forget_old(struct auth_domain *dom); -extern void svcauth_unix_purge(void); +extern void svcauth_unix_purge(struct net *net); extern void svcauth_unix_info_release(struct svc_xprt *xpt); extern int svcauth_unix_set_client(struct svc_rqst *rqstp); -- cgit v1.2.3 From 05ba1f0823004e947748523782e9c2f07f3bff0d Mon Sep 17 00:00:00 2001 From: Anatol Pomozov Date: Sun, 22 Apr 2012 18:45:24 -0700 Subject: fuse: add FALLOCATE operation fallocate filesystem operation preallocates media space for the given file. If fallocate returns success then any subsequent write to the given range never fails with 'not enough space' error. Signed-off-by: Anatol Pomozov Signed-off-by: Miklos Szeredi --- include/linux/fuse.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 8f2ab8fef929..9303348965fb 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -54,6 +54,9 @@ * 7.18 * - add FUSE_IOCTL_DIR flag * - add FUSE_NOTIFY_DELETE + * + * 7.19 + * - add FUSE_FALLOCATE */ #ifndef _LINUX_FUSE_H @@ -85,7 +88,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 18 +#define FUSE_KERNEL_MINOR_VERSION 19 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -278,6 +281,7 @@ enum fuse_opcode { FUSE_POLL = 40, FUSE_NOTIFY_REPLY = 41, FUSE_BATCH_FORGET = 42, + FUSE_FALLOCATE = 43, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -571,6 +575,14 @@ struct fuse_notify_poll_wakeup_out { __u64 kh; }; +struct fuse_fallocate_in { + __u64 fh; + __u64 offset; + __u64 length; + __u32 mode; + __u32 padding; +}; + struct fuse_in_header { __u32 len; __u32 opcode; -- cgit v1.2.3 From 1f45f9dbb392f9ca0919e9cd2370ab66ae752ec8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Heiko=20St=C3=BCbner?= Date: Sat, 28 Apr 2012 12:19:10 +0200 Subject: fb_defio: add first_io callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With this optional callback the driver is notified when the first page is entered into the pagelist and a new deferred_io call is scheduled. A possible use-case for this is runtime-pm. In the first_io call pm_runtime_get() could be called, which starts an asynchronous runtime_resume of the device. In the deferred_io callback a call to pm_runtime_barrier() makes the sure, the device is resumed by then and a pm_runtime_put() may put the device back to sleep. Also, some SoCs may use the runtime-pm system to determine if they are able to enter deeper idle states. Therefore it is necessary to keep the use-count from the first written page until the conclusion of the screen update, to prevent the system from going to sleep before completing the pending update. Two users of defio were using kmalloc to allocate the structure. These allocations are changed to kzalloc, to prevent uninitialised .first_io members in those drivers. Signed-off-by: Heiko Stübner Signed-off-by: Florian Tobias Schandinat --- include/linux/fb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index d31cb682e173..c10e71efb8f5 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -607,6 +607,7 @@ struct fb_deferred_io { struct mutex lock; /* mutex that protects the page list */ struct list_head pagelist; /* list of touched pages */ /* callback */ + void (*first_io)(struct fb_info *info); void (*deferred_io)(struct fb_info *info, struct list_head *pagelist); }; #endif -- cgit v1.2.3 From f84891289e62a74e9b4942eaad80617368b2d778 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 29 Apr 2012 18:21:10 -0400 Subject: ext4: create a new BH_Verified flag to avoid unnecessary metadata validation Create a new BH_Verified flag to indicate that we've verified all the data in a buffer_head for correctness. This allows us to bypass expensive verification steps when they are not necessary without missing them when they are. Signed-off-by: Darrick J. Wong Signed-off-by: "Theodore Ts'o" --- include/linux/jbd_common.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd_common.h b/include/linux/jbd_common.h index 6230f8556a4e..6133679bc4c0 100644 --- a/include/linux/jbd_common.h +++ b/include/linux/jbd_common.h @@ -12,6 +12,7 @@ enum jbd_state_bits { BH_State, /* Pins most journal_head state */ BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ + BH_Verified, /* Metadata block has been verified ok */ BH_JBDPrivateStart, /* First bit available for private use by FS */ }; @@ -24,6 +25,7 @@ TAS_BUFFER_FNS(Revoked, revoked) BUFFER_FNS(RevokeValid, revokevalid) TAS_BUFFER_FNS(RevokeValid, revokevalid) BUFFER_FNS(Freed, freed) +BUFFER_FNS(Verified, verified) static inline struct buffer_head *jh2bh(struct journal_head *jh) { -- cgit v1.2.3 From a2ebfe2fc6e088a70d06cd15a5bc9bcb621cc195 Mon Sep 17 00:00:00 2001 From: Ramakrishna Pallala Date: Tue, 10 Apr 2012 16:21:20 +0530 Subject: power_supply: Add voltage_ocv property and use it for max17042 driver This adds a new sysfs file called 'voltage_ocv' which gives the Open Circuit Voltage of the battery. This property can be used for platform shutdown policies and can be useful for initial capacity estimations. Note: This patch is generated against linux-next branch. Signed-off-by: Ramakrishna Pallala Signed-off-by: Anton Vorontsov --- include/linux/power_supply.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index c38c13db8832..fd17ae0f9c20 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -96,6 +96,7 @@ enum power_supply_property { POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN, POWER_SUPPLY_PROP_VOLTAGE_NOW, POWER_SUPPLY_PROP_VOLTAGE_AVG, + POWER_SUPPLY_PROP_VOLTAGE_OCV, POWER_SUPPLY_PROP_CURRENT_MAX, POWER_SUPPLY_PROP_CURRENT_NOW, POWER_SUPPLY_PROP_CURRENT_AVG, @@ -261,6 +262,7 @@ static inline bool power_supply_is_watt_property(enum power_supply_property psp) case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN: case POWER_SUPPLY_PROP_VOLTAGE_NOW: case POWER_SUPPLY_PROP_VOLTAGE_AVG: + case POWER_SUPPLY_PROP_VOLTAGE_OCV: case POWER_SUPPLY_PROP_POWER_NOW: return 1; default: -- cgit v1.2.3 From 0d4ed4e27a4cb180af395fa3d7aa98d79f3d3015 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Fri, 4 May 2012 21:06:19 -0700 Subject: power_supply: Make the core a boolean instead of a tristate On Mon, Apr 02, 2012 at 01:53:23PM +1000, Benjamin Herrenschmidt wrote: > > drivers/built-in.o: In function `.nouveau_pm_trigger': > > (.text+0xa56e8): undefined reference to `.power_supply_is_system_supplied' > > > > nouveau probably needs to depends on CONFIG_POWER_SUPPLY to force a module > > build with the latter is =m > > Ok, not that trivial... > > The problem is more like POWER_SUPPLY should be a bool, not a tristate. > > If you think about it: you don't want things like nouveau to depend on a > random subsystem like that, people will never get it. In fact, > POWER_SUPPLY provides empty inline stubs when not enabled, so that's > really designed to not have depends... > > However that -cannot- work if POWER_SUPPLY is modular and the drivers > who use it are not. > > The only fixes here that make sense I can think of > that don't also involve Kconfig horrors are: > > - Ugly: in power_supply.h, use the extern variant if > > defined(CONFIG_POWER_SUPPLY) || > (defined(CONFIG_POWER_SUPPLY_MODULE) && defined(MODULE)) > > IE. use the stub if power supply is a module and what is being built is > built-in. Of course that's not only ugly, it somewhat sucks from a user > perspective as the subsystem now exists but can't be used by some > drivers... > > - Better: Just make the bloody thing a bool :-) The power supply > framework itself is small enough, just make it a boolean option and > avoid the problem entirely. The actual power supply sub drivers can > remain modular of course. Suggested-by: Benjamin Herrenschmidt Signed-off-by: Anton Vorontsov --- include/linux/power_supply.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index fd17ae0f9c20..3b912bee28d1 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -212,7 +212,7 @@ extern void power_supply_changed(struct power_supply *psy); extern int power_supply_am_i_supplied(struct power_supply *psy); extern int power_supply_set_battery_charged(struct power_supply *psy); -#if defined(CONFIG_POWER_SUPPLY) || defined(CONFIG_POWER_SUPPLY_MODULE) +#ifdef CONFIG_POWER_SUPPLY extern int power_supply_is_system_supplied(void); #else static inline int power_supply_is_system_supplied(void) { return -ENOSYS; } -- cgit v1.2.3 From d829dc75bafb10754f35fb8895e5143d20267b04 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Sat, 5 May 2012 06:24:10 -0700 Subject: charger-manager: Poll battery health in normal state Charger-Manager needs to check battery health in normal state as well as suspend-to-RAM state. When the battery is fully charged, Charger-Manager needs to determine when the chargers restart charging. This patch allows Charger-Manager to monitor battery health in normal state and handle operation for chargers after battery is fully charged. Signed-off-by: MyungJoo Ham Signed-off-by: Donggeun Kim Signed-off-by: Kyungmin Park Signed-off-by: Anton Vorontsov --- include/linux/power/charger-manager.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h index 4f75e531c112..baa299a95e13 100644 --- a/include/linux/power/charger-manager.h +++ b/include/linux/power/charger-manager.h @@ -18,6 +18,8 @@ #include enum data_source { + CM_BATTERY_PRESENT, + CM_NO_BATTERY, CM_FUEL_GAUGE, CM_CHARGER_STAT, }; @@ -38,11 +40,18 @@ enum polling_modes { * rtc_only_wakeup() returning false. * If the RTC given to CM is the only wakeup reason, * rtc_only_wakeup should return true. + * @assume_timer_stops_in_suspend: + * Assume that the jiffy timer stops in suspend-to-RAM. + * When enabled, CM does not rely on jiffies value in + * suspend_again and assumes that jiffies value does not + * change during suspend. */ struct charger_global_desc { char *rtc_name; bool (*rtc_only_wakeup)(void); + + bool assume_timer_stops_in_suspend; }; /** @@ -50,6 +59,11 @@ struct charger_global_desc { * @psy_name: the name of power-supply-class for charger manager * @polling_mode: * Determine which polling mode will be used + * @fullbatt_vchkdrop_ms: + * @fullbatt_vchkdrop_uV: + * Check voltage drop after the battery is fully charged. + * If it has dropped more than fullbatt_vchkdrop_uV after + * fullbatt_vchkdrop_ms, CM will restart charging. * @fullbatt_uV: voltage in microvolt * If it is not being charged and VBATT >= fullbatt_uV, * it is assumed to be full. @@ -76,6 +90,8 @@ struct charger_desc { enum polling_modes polling_mode; unsigned int polling_interval_ms; + unsigned int fullbatt_vchkdrop_ms; + unsigned int fullbatt_vchkdrop_uV; unsigned int fullbatt_uV; enum data_source battery_present; @@ -101,6 +117,11 @@ struct charger_desc { * @fuel_gauge: power_supply for fuel gauge * @charger_stat: array of power_supply for chargers * @charger_enabled: the state of charger + * @fullbatt_vchk_jiffies_at: + * jiffies at the time full battery check will occur. + * @fullbatt_vchk_uV: voltage in microvolt + * criteria for full battery + * @fullbatt_vchk_work: work queue for full battery check * @emergency_stop: * When setting true, stop charging * @last_temp_mC: the measured temperature in milli-Celsius @@ -121,6 +142,10 @@ struct charger_manager { bool charger_enabled; + unsigned long fullbatt_vchk_jiffies_at; + unsigned int fullbatt_vchk_uV; + struct delayed_work fullbatt_vchk_work; + int emergency_stop; int last_temp_mC; -- cgit v1.2.3 From dfeccb12b4614befc49a92eb121c2211294ca669 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Sat, 5 May 2012 06:26:47 -0700 Subject: charger-manager: Provide cm_notify_event function for in-kernel use By using cm_notify_event function, charger driver can report several charger events (e.g. battery full and external power in/out, etc) to Charger-Manager. Charger-Manager can properly and immediately control chargers by the reported event. Signed-off-by: MyungJoo Ham Signed-off-by: Donggeun Kim Signed-off-by: Kyungmin Park Signed-off-by: Anton Vorontsov --- include/linux/power/charger-manager.h | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h index baa299a95e13..241065c9ce51 100644 --- a/include/linux/power/charger-manager.h +++ b/include/linux/power/charger-manager.h @@ -31,6 +31,16 @@ enum polling_modes { CM_POLL_CHARGING_ONLY, }; +enum cm_event_types { + CM_EVENT_UNKNOWN = 0, + CM_EVENT_BATT_FULL, + CM_EVENT_BATT_IN, + CM_EVENT_BATT_OUT, + CM_EVENT_EXT_PWR_IN_OUT, + CM_EVENT_CHG_START_STOP, + CM_EVENT_OTHERS, +}; + /** * struct charger_global_desc * @rtc_name: the name of RTC used to wake up the system from suspend. @@ -159,14 +169,13 @@ struct charger_manager { #ifdef CONFIG_CHARGER_MANAGER extern int setup_charger_manager(struct charger_global_desc *gd); extern bool cm_suspend_again(void); +extern void cm_notify_event(struct power_supply *psy, + enum cm_event_types type, char *msg); #else -static void __maybe_unused setup_charger_manager(struct charger_global_desc *gd) -{ } - -static bool __maybe_unused cm_suspend_again(void) -{ - return false; -} +static inline int setup_charger_manager(struct charger_global_desc *gd) +{ return 0; } +static inline bool cm_suspend_again(void) { return false; } +static inline void cm_notify_event(struct power_supply *psy, + enum cm_event_types type, char *msg) { } #endif - #endif /* _CHARGER_MANAGER_H */ -- cgit v1.2.3 From 9a8422d205ea142a27c2573e5ca3d2cc87d75260 Mon Sep 17 00:00:00 2001 From: Ramakrishna Pallala Date: Sat, 5 May 2012 14:34:26 +0530 Subject: max17042_battery: Add support for max17047/50 chip max17047 is improved version of max17042 chip. It has few HW bug fixes with minor changes in register set. max17050 is same as max17047 chip except its silicon packging. So from driver's point of view there is no difference btw max1047 and max1050. This patch adds the support to dynamically detect the chip type and adds steps to initialize the max17047 chip. Signed-off-by: Ramakrishna Pallala Signed-off-by: Anton Vorontsov --- include/linux/power/max17042_battery.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h index e01b167e66f0..89dd84f47c6e 100644 --- a/include/linux/power/max17042_battery.h +++ b/include/linux/power/max17042_battery.h @@ -116,6 +116,18 @@ enum max17042_register { MAX17042_VFSOC = 0xFF, }; +/* Registers specific to max17047/50 */ +enum max17047_register { + MAX17047_QRTbl00 = 0x12, + MAX17047_FullSOCThr = 0x13, + MAX17047_QRTbl10 = 0x22, + MAX17047_QRTbl20 = 0x32, + MAX17047_V_empty = 0x3A, + MAX17047_QRTbl30 = 0x42, +}; + +enum max170xx_chip_type {MAX17042, MAX17047}; + /* * used for setting a register to a desired value * addr : address for a register @@ -144,6 +156,7 @@ struct max17042_config_data { u16 shdntimer; /* 0x03F */ /* App data */ + u16 full_soc_thresh; /* 0x13 */ u16 design_cap; /* 0x18 */ u16 ichgt_term; /* 0x1E */ @@ -162,6 +175,10 @@ struct max17042_config_data { u16 lavg_empty; /* 0x36 */ u16 dqacc; /* 0x45 */ u16 dpacc; /* 0x46 */ + u16 qrtbl00; /* 0x12 */ + u16 qrtbl10; /* 0x22 */ + u16 qrtbl20; /* 0x32 */ + u16 qrtbl30; /* 0x42 */ /* Cell technology from power_supply.h */ u16 cell_technology; -- cgit v1.2.3 From 86c2072be6f3c2150cc35f00233f2c31bdba2745 Mon Sep 17 00:00:00 2001 From: Mike Dunn Date: Wed, 25 Apr 2012 12:06:05 -0700 Subject: mtd: ecc_strength is at ecc step granularity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ecc_strength element of mtd_info will be the strength of one ecc step, not of the entire writesize, as was previously planned. This is the appropriate way because, as was pointed out¹, bit errors in excess of the strength of one step can cause a hard error if they all occur within the same ecc region. ¹ http://lists.infradead.org/pipermail/linux-mtd/2012-March/040313.html Signed-off-by: Mike Dunn Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- include/linux/mtd/mtd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index cf5ea8cdcf8e..cd0119d19cd9 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -164,7 +164,7 @@ struct mtd_info { /* ECC layout structure pointer - read only! */ struct nand_ecclayout *ecclayout; - /* max number of correctible bit errors per writesize */ + /* max number of correctible bit errors per ecc step */ unsigned int ecc_strength; /* Data for variable erase regions. If numeraseregions is zero, -- cgit v1.2.3 From d062d4ede877fcd2ecc4c6262abad09a6f32950a Mon Sep 17 00:00:00 2001 From: Mike Dunn Date: Wed, 25 Apr 2012 12:06:08 -0700 Subject: mtd: bitflip_threshold added to mtd_info and sysfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An element 'bitflip_threshold' is added to struct mtd_info, and also exposed as a read/write variable in sysfs. This will be used to determine whether or not mtd_read() returns -EUCLEAN or 0 (absent a hard error). If the driver leaves it as zero, mtd will set it to a default value of ecc_strength. This v2 adds the line that propagates bitflip_threshold from the master to the partitions - thanks Ivan¹. ¹ http://lists.infradead.org/pipermail/linux-mtd/2012-April/040900.html Signed-off-by: Mike Dunn Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- include/linux/mtd/mtd.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index cd0119d19cd9..63dadc0dfb62 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -157,6 +157,15 @@ struct mtd_info { unsigned int erasesize_mask; unsigned int writesize_mask; + /* + * read ops return -EUCLEAN if max number of bitflips corrected on any + * one region comprising an ecc step equals or exceeds this value. + * Settable by driver, else defaults to ecc_strength. User can override + * in sysfs. N.B. The meaning of the -EUCLEAN return code has changed; + * see Documentation/ABI/testing/sysfs-class-mtd for more detail. + */ + unsigned int bitflip_threshold; + // Kernel-only stuff starts here. const char *name; int index; -- cgit v1.2.3 From edbc4540e02c201bdd4f4d498ebb6ed517fd36e2 Mon Sep 17 00:00:00 2001 From: Mike Dunn Date: Wed, 25 Apr 2012 12:06:11 -0700 Subject: mtd: driver _read() returns max_bitflips; mtd_read() returns -EUCLEAN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The drivers' _read() method, absent an error, returns a non-negative integer indicating the maximum number of bit errors that were corrected in any one region comprising an ecc step. MTD returns -EUCLEAN if this is >= bitflip_threshold, 0 otherwise. If bitflip_threshold is zero, the comparison is not made since these devices lack ECC and always return zero in the non-error case (thanks Brian)¹. Note that this is a subtle change to the driver interface. This and the preceding patches in this set were tested with ubi on top of the nandsim and docg4 devices, running the ubi test io_basic from mtd-utils. ¹ http://lists.infradead.org/pipermail/linux-mtd/2012-March/040468.html Signed-off-by: Mike Dunn Acked-by: Robert Jarzmik Acked-by: Brian Norris Ivan Djelic Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- include/linux/mtd/nand.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 1482340d3d9f..2829e8be3a62 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -459,6 +459,8 @@ struct nand_buffers { * @pagemask: [INTERN] page number mask = number of (pages / chip) - 1 * @pagebuf: [INTERN] holds the pagenumber which is currently in * data_buf. + * @pagebuf_bitflips: [INTERN] holds the bitflip count for the page which is + * currently in data_buf. * @subpagesize: [INTERN] holds the subpagesize * @onfi_version: [INTERN] holds the chip ONFI version (BCD encoded), * non 0 if ONFI supported. @@ -519,6 +521,7 @@ struct nand_chip { uint64_t chipsize; int pagemask; int pagebuf; + unsigned int pagebuf_bitflips; int subpagesize; uint8_t cellinfo; int badblockpos; -- cgit v1.2.3 From 1826dbccebc9a58a0b0c0a9b7c09e47b19d97398 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 1 May 2012 17:12:55 -0700 Subject: mtd: nand: kill NAND_NO_AUTOINCR option No drivers use auto-increment NAND, so kill the NO_AUTOINCR option entirely. Signed-off-by: Brian Norris Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- include/linux/mtd/nand.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 2829e8be3a62..627f0c575ac4 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -161,8 +161,6 @@ typedef enum { * Option constants for bizarre disfunctionality and real * features. */ -/* Chip can not auto increment pages */ -#define NAND_NO_AUTOINCR 0x00000001 /* Buswidth is 16 bit */ #define NAND_BUSWIDTH_16 0x00000002 /* Device supports partial programming without padding */ @@ -207,7 +205,6 @@ typedef enum { (NAND_NO_PADDING | NAND_CACHEPRG | NAND_COPYBACK) /* Macros to identify the above */ -#define NAND_CANAUTOINCR(chip) (!(chip->options & NAND_NO_AUTOINCR)) #define NAND_MUST_PAD(chip) (!(chip->options & NAND_NO_PADDING)) #define NAND_HAS_CACHEPROG(chip) ((chip->options & NAND_CACHEPRG)) #define NAND_HAS_COPYBACK(chip) ((chip->options & NAND_COPYBACK)) @@ -216,7 +213,7 @@ typedef enum { && (chip->page_shift > 9)) /* Mask to zero out the chip options, which come from the id table */ -#define NAND_CHIPOPTIONS_MSK (0x0000ffff & ~NAND_NO_AUTOINCR) +#define NAND_CHIPOPTIONS_MSK 0x0000ffff /* Non chip related options */ /* This option skips the bbt scan during initialization. */ -- cgit v1.2.3 From b4f7aa84d6ff44327ab91a2973ebf0c2a7797d24 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Mon, 30 Apr 2012 19:30:47 +0200 Subject: mtd: add read_byte support to plat_nand Lantiq SoCs have a External Bus Unit (EBU) that is used to attach MTD media. As we need to co-exist with PCI on the same bus, certain swapping settings must be applied. Similar to the NOR map driver we need to apply a fix to make NAND work. The easiest way is to use byte reads. Signed-off-by: John Crispin Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- include/linux/mtd/nand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 627f0c575ac4..94a6679bfc2e 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -654,6 +654,7 @@ struct platform_nand_ctrl { void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl); void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len); void (*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len); + unsigned char (*read_byte)(struct mtd_info *mtd); void *priv; }; -- cgit v1.2.3 From 1fbb938dff5b6bb4514a4e7600276b03c7f08e25 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 2 May 2012 10:14:55 -0700 Subject: mtd: nand: add 'oob_required' argument to NAND {read,write}_page interfaces New NAND controllers can perform read/write via HW engines which don't expose OOB data in their DMA mode. To reflect this, we should rework the nand_chip / nand_ecc_ctrl interfaces that assume that drivers will always read/write OOB data in the nand_chip.oob_poi buffer. A better interface includes a boolean argument that explicitly tells the callee when OOB data is requested by the calling layer (for reading/writing to/from nand_chip.oob_poi). This patch adds the 'oob_required' parameter to each relevant {read,write}_page interface; all 'oob_required' parameters are left unused for now. The next patch will set the parameter properly in the nand_base.c callers, and follow-up patches will make use of 'oob_required' in some of the callee functions. Note that currently, there is no harm in ignoring the 'oob_required' parameter and *always* utilizing nand_chip.oob_poi, but there can be performance/complexity/design benefits from avoiding filling oob_poi in the common case. I will try to implement this for some drivers which can be ported easily. Note: I couldn't compile-test all of these easily, as some had ARCH dependencies. [dwmw2: Merge later 1/0 vs. true/false cleanup] Signed-off-by: Brian Norris Reviewed-by: Shmulik Ladkani Acked-by: Jiandong Zheng Acked-by: Mike Dunn Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- include/linux/mtd/nand.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 94a6679bfc2e..c7755f455c81 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -360,15 +360,15 @@ struct nand_ecc_ctrl { int (*correct)(struct mtd_info *mtd, uint8_t *dat, uint8_t *read_ecc, uint8_t *calc_ecc); int (*read_page_raw)(struct mtd_info *mtd, struct nand_chip *chip, - uint8_t *buf, int page); + uint8_t *buf, int oob_required, int page); void (*write_page_raw)(struct mtd_info *mtd, struct nand_chip *chip, - const uint8_t *buf); + const uint8_t *buf, int oob_required); int (*read_page)(struct mtd_info *mtd, struct nand_chip *chip, - uint8_t *buf, int page); + uint8_t *buf, int oob_required, int page); int (*read_subpage)(struct mtd_info *mtd, struct nand_chip *chip, uint32_t offs, uint32_t len, uint8_t *buf); void (*write_page)(struct mtd_info *mtd, struct nand_chip *chip, - const uint8_t *buf); + const uint8_t *buf, int oob_required); int (*write_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip, int page); int (*read_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip, @@ -504,7 +504,8 @@ struct nand_chip { int (*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state, int status, int page); int (*write_page)(struct mtd_info *mtd, struct nand_chip *chip, - const uint8_t *buf, int page, int cached, int raw); + const uint8_t *buf, int oob_required, int page, + int cached, int raw); int chip_delay; unsigned int options; -- cgit v1.2.3 From e10db1f00a5e3c2ec04d7fe26c7444dc55a59b19 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Fri, 4 May 2012 21:42:05 -0400 Subject: mtd: gpmi: add device tree support to gpmi-nand This patch just adds the DT support to gpmi-nand. Signed-off-by: Huang Shijie Signed-off-by: Huang Shijie Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- include/linux/mtd/gpmi-nand.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/gpmi-nand.h b/include/linux/mtd/gpmi-nand.h index 69b6dbf46b5e..ed3c4e09f3d1 100644 --- a/include/linux/mtd/gpmi-nand.h +++ b/include/linux/mtd/gpmi-nand.h @@ -23,12 +23,12 @@ #define GPMI_NAND_RES_SIZE 6 /* Resource names for the GPMI NAND driver. */ -#define GPMI_NAND_GPMI_REGS_ADDR_RES_NAME "GPMI NAND GPMI Registers" +#define GPMI_NAND_GPMI_REGS_ADDR_RES_NAME "gpmi-nand" #define GPMI_NAND_GPMI_INTERRUPT_RES_NAME "GPMI NAND GPMI Interrupt" -#define GPMI_NAND_BCH_REGS_ADDR_RES_NAME "GPMI NAND BCH Registers" -#define GPMI_NAND_BCH_INTERRUPT_RES_NAME "GPMI NAND BCH Interrupt" +#define GPMI_NAND_BCH_REGS_ADDR_RES_NAME "bch" +#define GPMI_NAND_BCH_INTERRUPT_RES_NAME "bch" #define GPMI_NAND_DMA_CHANNELS_RES_NAME "GPMI NAND DMA Channels" -#define GPMI_NAND_DMA_INTERRUPT_RES_NAME "GPMI NAND DMA Interrupt" +#define GPMI_NAND_DMA_INTERRUPT_RES_NAME "gpmi-dma" /** * struct gpmi_nand_platform_data - GPMI NAND driver platform data. -- cgit v1.2.3 From 5c2ffb11d40dd967eecb45b8570a871746ba124b Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Wed, 9 May 2012 13:06:35 +0300 Subject: mtd: nand: remove 'sndcmd' parameter of 'read_oob/read_oob_raw' As of [mtd: nand: remove autoincrement 'sndcmd' code], the NAND_CMD_READ0 command is issued unconditionally. Thus, read_oob/read_oob_raw's 'sndcmd' argument is no longer needed, as well as their return code. Remove the 'sndcmd' parameter, and set the return code to 0. Signed-off-by: Shmulik Ladkani Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- include/linux/mtd/nand.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index c7755f455c81..57977c640529 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -372,9 +372,8 @@ struct nand_ecc_ctrl { int (*write_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip, int page); int (*read_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip, - int page, int sndcmd); - int (*read_oob)(struct mtd_info *mtd, struct nand_chip *chip, int page, - int sndcmd); + int page); + int (*read_oob)(struct mtd_info *mtd, struct nand_chip *chip, int page); int (*write_oob)(struct mtd_info *mtd, struct nand_chip *chip, int page); }; -- cgit v1.2.3 From c3ba9698152b17fdc2c7cd0f7cbeb571e3367e9d Mon Sep 17 00:00:00 2001 From: Dan Magenheimer Date: Mon, 9 Apr 2012 17:06:54 -0600 Subject: mm: frontswap: add frontswap header file Frontswap is the alter ego of cleancache, the "yang" to cleancache's "yin"... and more precisely frontswap is the provider of anonymous pages to transcendent memory to nicely complement cleancache's providing of clean pagecache pages to transcendent memory. For optimal use of transcendent memory, both are necessary... because a kernel under memory pressure first reclaims clean pagecache pages and, when under more memory pressure, starts swapping anonymous pages. Frontswap and cleancache (which was merged at 3.0) are the "frontends" and the only necessary changes to the core kernel for transcendent memory; all other supporting code -- the "backends" -- is implemented as drivers. See the LWN.net article "Transcendent memory in a nutshell" for a detailed overview of frontswap and related kernel parts: https://lwn.net/Articles/454795/ Frontswap code was first posted publicly in January 2009 and on LKML in May 2009, and has remained functionally stable for nearly three years now. It is barely invasive, touching only the swap subsystem and adds less than 100 lines of code to existing swap subsystem code files. It has improved syntactically substantially between V1 and this posting of V14, thanks to the review of a few kernel developers, and has adapted easily to at least one major swap subsystem change. As of 3.4, there are three in-tree users of frontswap patiently waiting for this patchset and for CONFIG_FRONTSWAP to be enabled: zcache (staging driver merged at 2.6.39), Xen tmem (merged at 3.0 and 3.1) and RAMster (staging driver merged at 3.4). In addition, a RFC has been posted for a KVM backend. The frontswap patchset has been in linux-next since next-110603. Earlier versions of frontswap already ship in the Oracle Unbreakable Enterprise Kernel and SuSE SLES. This patch, 1of4, provides the header file for the core code for frontswap that interfaces between the hooks in the swap subsystem and a frontswap backend via frontswap_ops. --- New file added: include/linux/frontswap.h [v14: add support for writethrough, per suggestion by aarcange@redhat.com] [v14: rebase to 3.4-rc2] [v11: konrad.wilk@oracle.com: squashed s/flush/invalidate/ in] [v10: no change] [v9: akpm@linux-foundation.org: change "flush" to "invalidate", part 1] [v8: rebase to 3.0-rc4] [v7: rebase to 3.0-rc3] [v7: JBeulich@novell.com: new static inlines resolve to no-ops if not config'd] [v7: JBeulich@novell.com: avoid redundant shifts/divides for *_bit lib calls] [v6: rebase to 3.1-rc1] [v5: no change from v4] [v4: rebase to 2.6.39] Signed-off-by: Dan Magenheimer Reviewed-by: Andrew Morton Acked-by: Jan Beulich Acked-by: Seth Jennings Cc: Jeremy Fitzhardinge Cc: Hugh Dickins Cc: Johannes Weiner Cc: Nitin Gupta Cc: Matthew Wilcox Cc: Chris Mason Cc: Rik Riel [v15: int/bool on some functions] Signed-off-by: Konrad Wilk --- include/linux/frontswap.h | 127 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 include/linux/frontswap.h (limited to 'include/linux') diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h new file mode 100644 index 000000000000..68ff7af5c5fb --- /dev/null +++ b/include/linux/frontswap.h @@ -0,0 +1,127 @@ +#ifndef _LINUX_FRONTSWAP_H +#define _LINUX_FRONTSWAP_H + +#include +#include +#include + +struct frontswap_ops { + void (*init)(unsigned); + int (*put_page)(unsigned, pgoff_t, struct page *); + int (*get_page)(unsigned, pgoff_t, struct page *); + void (*invalidate_page)(unsigned, pgoff_t); + void (*invalidate_area)(unsigned); +}; + +extern bool frontswap_enabled; +extern struct frontswap_ops + frontswap_register_ops(struct frontswap_ops *ops); +extern void frontswap_shrink(unsigned long); +extern unsigned long frontswap_curr_pages(void); +extern void frontswap_writethrough(bool); + +extern void __frontswap_init(unsigned type); +extern int __frontswap_put_page(struct page *page); +extern int __frontswap_get_page(struct page *page); +extern void __frontswap_invalidate_page(unsigned, pgoff_t); +extern void __frontswap_invalidate_area(unsigned); + +#ifdef CONFIG_FRONTSWAP + +static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) +{ + bool ret = false; + + if (frontswap_enabled && sis->frontswap_map) + ret = test_bit(offset, sis->frontswap_map); + return ret; +} + +static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset) +{ + if (frontswap_enabled && sis->frontswap_map) + set_bit(offset, sis->frontswap_map); +} + +static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) +{ + if (frontswap_enabled && sis->frontswap_map) + clear_bit(offset, sis->frontswap_map); +} + +static inline void frontswap_map_set(struct swap_info_struct *p, + unsigned long *map) +{ + p->frontswap_map = map; +} + +static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) +{ + return p->frontswap_map; +} +#else +/* all inline routines become no-ops and all externs are ignored */ + +#define frontswap_enabled (0) + +static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) +{ + return false; +} + +static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset) +{ +} + +static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) +{ +} + +static inline void frontswap_map_set(struct swap_info_struct *p, + unsigned long *map) +{ +} + +static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) +{ + return NULL; +} +#endif + +static inline int frontswap_put_page(struct page *page) +{ + int ret = -1; + + if (frontswap_enabled) + ret = __frontswap_put_page(page); + return ret; +} + +static inline int frontswap_get_page(struct page *page) +{ + int ret = -1; + + if (frontswap_enabled) + ret = __frontswap_get_page(page); + return ret; +} + +static inline void frontswap_invalidate_page(unsigned type, pgoff_t offset) +{ + if (frontswap_enabled) + __frontswap_invalidate_page(type, offset); +} + +static inline void frontswap_invalidate_area(unsigned type) +{ + if (frontswap_enabled) + __frontswap_invalidate_area(type); +} + +static inline void frontswap_init(unsigned type) +{ + if (frontswap_enabled) + __frontswap_init(type); +} + +#endif /* _LINUX_FRONTSWAP_H */ -- cgit v1.2.3 From 38b5faf4b178d5279b1fca5d7dadc68881342660 Mon Sep 17 00:00:00 2001 From: Dan Magenheimer Date: Mon, 9 Apr 2012 17:08:06 -0600 Subject: mm: frontswap: core swap subsystem hooks and headers This patch, 2of4, contains the changes to the core swap subsystem. This includes: (1) makes available core swap data structures (swap_lock, swap_list and swap_info) that are needed by frontswap.c but we don't need to expose them to the dozens of files that include swap.h so we create a new swapfile.h just to extern-ify these and modify their declarations to non-static (2) adds frontswap-related elements to swap_info_struct. Frontswap_map points to vzalloc'ed one-bit-per-swap-page metadata that indicates whether the swap page is in frontswap or in the device and frontswap_pages counts how many pages are in frontswap. (3) adds hooks in the swap subsystem and extends try_to_unuse so that frontswap_shrink can do a "partial swapoff". Note that a failed frontswap_map allocation is safe... failure is noted by lack of "FS" in the subsequent printk. --- [v14: rebase to 3.4-rc2] [v10: no change] [v9: akpm@linux-foundation.org: mark some statics __read_mostly] [v9: akpm@linux-foundation.org: add clarifying comments] [v9: akpm@linux-foundation.org: no need to loop repeating try_to_unuse] [v9: error27@gmail.com: remove superfluous check for NULL] [v8: rebase to 3.0-rc4] [v8: kamezawa.hiroyu@jp.fujitsu.com: change counter to atomic_t to avoid races] [v8: kamezawa.hiroyu@jp.fujitsu.com: comment to clarify informational counters] [v7: rebase to 3.0-rc3] [v7: JBeulich@novell.com: add new swap struct elements only if config'd] [v6: rebase to 3.0-rc1] [v6: lliubbo@gmail.com: fix null pointer deref if vzalloc fails] [v6: konrad.wilk@oracl.com: various checks and code clarifications/comments] [v5: no change from v4] [v4: rebase to 2.6.39] Signed-off-by: Dan Magenheimer Reviewed-by: Kamezawa Hiroyuki Acked-by: Jan Beulich Acked-by: Seth Jennings Cc: Jeremy Fitzhardinge Cc: Hugh Dickins Cc: Johannes Weiner Cc: Nitin Gupta Cc: Matthew Wilcox Cc: Chris Mason Cc: Rik Riel Cc: Andrew Morton [v11: Rebased, fixed mm/swapfile.c context change] Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/swap.h | 4 ++++ include/linux/swapfile.h | 13 +++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 include/linux/swapfile.h (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index b1fd5c7925fe..50a55e2d58ec 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -197,6 +197,10 @@ struct swap_info_struct { struct block_device *bdev; /* swap device or bdev of swap file */ struct file *swap_file; /* seldom referenced */ unsigned int old_block_size; /* seldom referenced */ +#ifdef CONFIG_FRONTSWAP + unsigned long *frontswap_map; /* frontswap in-use, one bit per page */ + atomic_t frontswap_pages; /* frontswap pages in-use counter */ +#endif }; struct swap_list_t { diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h new file mode 100644 index 000000000000..e282624e8c10 --- /dev/null +++ b/include/linux/swapfile.h @@ -0,0 +1,13 @@ +#ifndef _LINUX_SWAPFILE_H +#define _LINUX_SWAPFILE_H + +/* + * these were static in swapfile.c but frontswap.c needs them and we don't + * want to expose them to the dozens of source files that include swap.h + */ +extern spinlock_t swap_lock; +extern struct swap_list_t swap_list; +extern struct swap_info_struct *swap_info[]; +extern int try_to_unuse(unsigned int, bool, unsigned long); + +#endif /* _LINUX_SWAPFILE_H */ -- cgit v1.2.3 From 165c8aed5bbc6bdddbccae0ba9db451732558ff9 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 15 May 2012 11:32:15 -0400 Subject: frontswap: s/put_page/store/g s/get_page/load Sounds so much more natural. Suggested-by: Andrea Arcangeli Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/frontswap.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index 68ff7af5c5fb..0e4e2eec5c1d 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -7,8 +7,8 @@ struct frontswap_ops { void (*init)(unsigned); - int (*put_page)(unsigned, pgoff_t, struct page *); - int (*get_page)(unsigned, pgoff_t, struct page *); + int (*store)(unsigned, pgoff_t, struct page *); + int (*load)(unsigned, pgoff_t, struct page *); void (*invalidate_page)(unsigned, pgoff_t); void (*invalidate_area)(unsigned); }; @@ -21,8 +21,8 @@ extern unsigned long frontswap_curr_pages(void); extern void frontswap_writethrough(bool); extern void __frontswap_init(unsigned type); -extern int __frontswap_put_page(struct page *page); -extern int __frontswap_get_page(struct page *page); +extern int __frontswap_store(struct page *page); +extern int __frontswap_load(struct page *page); extern void __frontswap_invalidate_page(unsigned, pgoff_t); extern void __frontswap_invalidate_area(unsigned); @@ -88,21 +88,21 @@ static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) } #endif -static inline int frontswap_put_page(struct page *page) +static inline int frontswap_store(struct page *page) { int ret = -1; if (frontswap_enabled) - ret = __frontswap_put_page(page); + ret = __frontswap_store(page); return ret; } -static inline int frontswap_get_page(struct page *page) +static inline int frontswap_load(struct page *page) { int ret = -1; if (frontswap_enabled) - ret = __frontswap_get_page(page); + ret = __frontswap_load(page); return ret; } -- cgit v1.2.3 From 8f888ef846d4481e24c74b4a91ece771d2bcbcb5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 22 May 2012 22:43:41 -0400 Subject: jbd2: change disk layout for metadata checksumming Define flags and allocate space in on-disk journal structures to support checksumming of journal metadata. Signed-off-by: Darrick J. Wong Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 912c30a8ddb1..809c439066c5 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -147,12 +147,24 @@ typedef struct journal_header_s #define JBD2_CRC32_CHKSUM 1 #define JBD2_MD5_CHKSUM 2 #define JBD2_SHA1_CHKSUM 3 +#define JBD2_CRC32C_CHKSUM 4 #define JBD2_CRC32_CHKSUM_SIZE 4 #define JBD2_CHECKSUM_BYTES (32 / sizeof(u32)) /* * Commit block header for storing transactional checksums: + * + * NOTE: If FEATURE_COMPAT_CHECKSUM (checksum v1) is set, the h_chksum* + * fields are used to store a checksum of the descriptor and data blocks. + * + * If FEATURE_INCOMPAT_CSUM_V2 (checksum v2) is set, then the h_chksum + * field is used to store crc32c(uuid+commit_block). Each journal metadata + * block gets its own checksum, and data block checksums are stored in + * journal_block_tag (in the descriptor). The other h_chksum* fields are + * not used. + * + * Checksum v1 and v2 are mutually exclusive features. */ struct commit_header { __be32 h_magic; @@ -175,13 +187,19 @@ struct commit_header { typedef struct journal_block_tag_s { __be32 t_blocknr; /* The on-disk block number */ - __be32 t_flags; /* See below */ + __be16 t_checksum; /* truncated crc32c(uuid+seq+block) */ + __be16 t_flags; /* See below */ __be32 t_blocknr_high; /* most-significant high 32bits. */ } journal_block_tag_t; #define JBD2_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high)) #define JBD2_TAG_SIZE64 (sizeof(journal_block_tag_t)) +/* Tail of descriptor block, for checksumming */ +struct jbd2_journal_block_tail { + __be32 t_checksum; /* crc32c(uuid+descr_block) */ +}; + /* * The revoke descriptor: used on disk to describe a series of blocks to * be revoked from the log @@ -192,6 +210,10 @@ typedef struct jbd2_journal_revoke_header_s __be32 r_count; /* Count of bytes used in the block */ } jbd2_journal_revoke_header_t; +/* Tail of revoke block, for checksumming */ +struct jbd2_journal_revoke_tail { + __be32 r_checksum; /* crc32c(uuid+revoke_block) */ +}; /* Definitions for the journal tag flags word: */ #define JBD2_FLAG_ESCAPE 1 /* on-disk block is escaped */ @@ -241,7 +263,10 @@ typedef struct journal_superblock_s __be32 s_max_trans_data; /* Limit of data blocks per trans. */ /* 0x0050 */ - __u32 s_padding[44]; + __u8 s_checksum_type; /* checksum type */ + __u8 s_padding2[3]; + __u32 s_padding[42]; + __be32 s_checksum; /* crc32c(superblock) */ /* 0x0100 */ __u8 s_users[16*48]; /* ids of all fs'es sharing the log */ @@ -263,6 +288,7 @@ typedef struct journal_superblock_s #define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 #define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 +#define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008 /* Features known to this kernel version: */ #define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM -- cgit v1.2.3 From 1227dd773d8d4e3983b4b751f9ffa0f41402fb7c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 24 Apr 2012 02:44:49 -0400 Subject: TIF_NOTIFY_RESUME is defined on all targets now Signed-off-by: Al Viro --- include/linux/tracehook.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 51bd91d911c3..8a2a3fc9bd05 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -153,7 +153,6 @@ static inline void tracehook_signal_handler(int sig, siginfo_t *info, ptrace_notify(SIGTRAP); } -#ifdef TIF_NOTIFY_RESUME /** * set_notify_resume - cause tracehook_notify_resume() to be called * @task: task that will call tracehook_notify_resume() @@ -185,6 +184,5 @@ static inline void set_notify_resume(struct task_struct *task) static inline void tracehook_notify_resume(struct pt_regs *regs) { } -#endif /* TIF_NOTIFY_RESUME */ #endif /* */ -- cgit v1.2.3 From a42c6ded827dbd396d2efde7530620be029a72d1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 23 May 2012 14:44:37 -0400 Subject: move key_repace_session_keyring() into tracehook_notify_resume() Signed-off-by: Al Viro --- include/linux/tracehook.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 8a2a3fc9bd05..b9ca903bb553 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -183,6 +183,8 @@ static inline void set_notify_resume(struct task_struct *task) */ static inline void tracehook_notify_resume(struct pt_regs *regs) { + if (current->replacement_session_keyring) + key_replace_session_keyring(); } #endif /* */ -- cgit v1.2.3 From e73f8959af0439d114847eab5a8a5ce48f1217c4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 11 May 2012 10:59:07 +1000 Subject: task_work_add: generic process-context callbacks Provide a simple mechanism that allows running code in the (nonatomic) context of the arbitrary task. The caller does task_work_add(task, task_work) and this task executes task_work->func() either from do_notify_resume() or from do_exit(). The callback can rely on PF_EXITING to detect the latter case. "struct task_work" can be embedded in another struct, still it has "void *data" to handle the most common/simple case. This allows us to kill the ->replacement_session_keyring hack, and potentially this can have more users. Performance-wise, this adds 2 "unlikely(!hlist_empty())" checks into tracehook_notify_resume() and do_exit(). But at the same time we can remove the "replacement_session_keyring != NULL" checks from arch/*/signal.c and exit_creds(). Note: task_work_add/task_work_run abuses ->pi_lock. This is only because this lock is already used by lookup_pi_state() to synchronize with do_exit() setting PF_EXITING. Fortunately the scope of this lock in task_work.c is really tiny, and the code is unlikely anyway. Signed-off-by: Oleg Nesterov Acked-by: David Howells Cc: Thomas Gleixner Cc: Richard Kuo Cc: Linus Torvalds Cc: Alexander Gordeev Cc: Chris Zankel Cc: David Smith Cc: "Frank Ch. Eigler" Cc: Geert Uytterhoeven Cc: Larry Woodman Cc: Peter Zijlstra Cc: Tejun Heo Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/sched.h | 2 ++ include/linux/task_work.h | 33 +++++++++++++++++++++++++++++++++ include/linux/tracehook.h | 11 +++++++++++ 3 files changed, 46 insertions(+) create mode 100644 include/linux/task_work.h (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5ea8baea9387..7930131abc1a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1400,6 +1400,8 @@ struct task_struct { int (*notifier)(void *priv); void *notifier_data; sigset_t *notifier_mask; + struct hlist_head task_works; + struct audit_context *audit_context; #ifdef CONFIG_AUDITSYSCALL uid_t loginuid; diff --git a/include/linux/task_work.h b/include/linux/task_work.h new file mode 100644 index 000000000000..294d5d5e90b1 --- /dev/null +++ b/include/linux/task_work.h @@ -0,0 +1,33 @@ +#ifndef _LINUX_TASK_WORK_H +#define _LINUX_TASK_WORK_H + +#include +#include + +struct task_work; +typedef void (*task_work_func_t)(struct task_work *); + +struct task_work { + struct hlist_node hlist; + task_work_func_t func; + void *data; +}; + +static inline void +init_task_work(struct task_work *twork, task_work_func_t func, void *data) +{ + twork->func = func; + twork->data = data; +} + +int task_work_add(struct task_struct *task, struct task_work *twork, bool); +struct task_work *task_work_cancel(struct task_struct *, task_work_func_t); +void task_work_run(void); + +static inline void exit_task_work(struct task_struct *task) +{ + if (unlikely(!hlist_empty(&task->task_works))) + task_work_run(); +} + +#endif /* _LINUX_TASK_WORK_H */ diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index b9ca903bb553..b2dd0917ca0d 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -49,6 +49,7 @@ #include #include #include +#include struct linux_binprm; /* @@ -164,8 +165,10 @@ static inline void tracehook_signal_handler(int sig, siginfo_t *info, */ static inline void set_notify_resume(struct task_struct *task) { +#ifdef TIF_NOTIFY_RESUME if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME)) kick_process(task); +#endif } /** @@ -185,6 +188,14 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) { if (current->replacement_session_keyring) key_replace_session_keyring(); + /* + * The caller just cleared TIF_NOTIFY_RESUME. This barrier + * pairs with task_work_add()->set_notify_resume() after + * hlist_add_head(task->task_works); + */ + smp_mb__after_clear_bit(); + if (unlikely(!hlist_empty(¤t->task_works))) + task_work_run(); } #endif /* */ -- cgit v1.2.3 From 4d1d61a6b203d957777d73fcebf19d90b038b5b2 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 11 May 2012 10:59:08 +1000 Subject: genirq: reimplement exit_irq_thread() hook via task_work_add() exit_irq_thread() and task->irq_thread are needed to handle the unexpected (and unlikely) exit of irq-thread. We can use task_work instead and make this all private to kernel/irq/manage.c, cleanup plus micro-optimization. 1. rename exit_irq_thread() to irq_thread_dtor(), make it static, and move it up before irq_thread(). 2. change irq_thread() to do task_work_add(irq_thread_dtor) at the start and task_work_cancel() before return. tracehook_notify_resume() can never play with kthreads, only do_exit()->exit_task_work() can call the callback and this is what we want. 3. remove task_struct->irq_thread and the special hook in do_exit(). Signed-off-by: Oleg Nesterov Reviewed-by: Thomas Gleixner Cc: David Howells Cc: Richard Kuo Cc: Linus Torvalds Cc: Alexander Gordeev Cc: Chris Zankel Cc: David Smith Cc: "Frank Ch. Eigler" Cc: Geert Uytterhoeven Cc: Larry Woodman Cc: Peter Zijlstra Cc: Tejun Heo Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/interrupt.h | 4 ---- include/linux/sched.h | 10 ++-------- 2 files changed, 2 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c91171599cb6..e68a8e53bb59 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -142,8 +142,6 @@ request_any_context_irq(unsigned int irq, irq_handler_t handler, extern int __must_check request_percpu_irq(unsigned int irq, irq_handler_t handler, const char *devname, void __percpu *percpu_dev_id); - -extern void exit_irq_thread(void); #else extern int __must_check @@ -177,8 +175,6 @@ request_percpu_irq(unsigned int irq, irq_handler_t handler, { return request_irq(irq, handler, 0, devname, percpu_dev_id); } - -static inline void exit_irq_thread(void) { } #endif extern void free_irq(unsigned int, void *); diff --git a/include/linux/sched.h b/include/linux/sched.h index 7930131abc1a..da013853a622 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1301,11 +1301,6 @@ struct task_struct { unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; -#ifdef CONFIG_GENERIC_HARDIRQS - /* IRQ handler threads */ - unsigned irq_thread:1; -#endif - pid_t pid; pid_t tgid; @@ -1313,10 +1308,9 @@ struct task_struct { /* Canary value for the -fstack-protector gcc feature */ unsigned long stack_canary; #endif - - /* + /* * pointers to (original) parent process, youngest child, younger sibling, - * older sibling, respectively. (p->father can be replaced with + * older sibling, respectively. (p->father can be replaced with * p->real_parent->pid) */ struct task_struct __rcu *real_parent; /* real parent process */ -- cgit v1.2.3 From 413cd3d9abeaef590e5ce00564f7a443165db238 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 11 May 2012 10:59:08 +1000 Subject: keys: change keyctl_session_to_parent() to use task_work_add() Change keyctl_session_to_parent() to use task_work_add() and move key_replace_session_keyring() logic into task_work->func(). Note that we do task_work_cancel() before task_work_add() to ensure that only one work can be pending at any time. This is important, we must not allow user-space to abuse the parent's ->task_works list. The callback, replace_session_keyring(), checks PF_EXITING. I guess this is not really needed but looks better. As a side effect, this fixes the (unlikely) race. The callers of key_replace_session_keyring() and keyctl_session_to_parent() lack the necessary barriers, the parent can miss the request. Now we can remove task_struct->replacement_session_keyring and related code. Signed-off-by: Oleg Nesterov Acked-by: David Howells Cc: Thomas Gleixner Cc: Richard Kuo Cc: Linus Torvalds Cc: Alexander Gordeev Cc: Chris Zankel Cc: David Smith Cc: "Frank Ch. Eigler" Cc: Geert Uytterhoeven Cc: Larry Woodman Cc: Peter Zijlstra Cc: Tejun Heo Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/key.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index 5231800770e1..2a0ee11584e9 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -33,6 +33,8 @@ typedef uint32_t key_perm_t; struct key; +#define key_replace_session_keyring() do { } while (0) + #ifdef CONFIG_KEYS #undef KEY_DEBUGGING @@ -308,9 +310,6 @@ static inline bool key_is_instantiated(const struct key *key) #ifdef CONFIG_SYSCTL extern ctl_table key_sysctls[]; #endif - -extern void key_replace_session_keyring(void); - /* * the userspace interface */ @@ -334,7 +333,6 @@ extern void key_init(void); #define key_fsuid_changed(t) do { } while(0) #define key_fsgid_changed(t) do { } while(0) #define key_init() do { } while(0) -#define key_replace_session_keyring() do { } while(0) #endif /* CONFIG_KEYS */ #endif /* __KERNEL__ */ -- cgit v1.2.3 From dea649b8ac1861107c5d91e1a71121434fc64193 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 11 May 2012 10:59:09 +1000 Subject: keys: kill the dummy key_replace_session_keyring() After the previouse change key_replace_session_keyring() becomes a nop. Remove the dummy definition in key.h and update the callers in arch/*/kernel/signal.c. Signed-off-by: Oleg Nesterov Acked-by: David Howells Cc: Thomas Gleixner Cc: Richard Kuo Cc: Linus Torvalds Cc: Alexander Gordeev Cc: Chris Zankel Cc: David Smith Cc: "Frank Ch. Eigler" Cc: Geert Uytterhoeven Cc: Larry Woodman Cc: Peter Zijlstra Cc: Tejun Heo Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/key.h | 2 -- include/linux/tracehook.h | 2 -- 2 files changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index 2a0ee11584e9..4cd22ed627ef 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -33,8 +33,6 @@ typedef uint32_t key_perm_t; struct key; -#define key_replace_session_keyring() do { } while (0) - #ifdef CONFIG_KEYS #undef KEY_DEBUGGING diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index b2dd0917ca0d..6a4d82bedb03 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -186,8 +186,6 @@ static inline void set_notify_resume(struct task_struct *task) */ static inline void tracehook_notify_resume(struct pt_regs *regs) { - if (current->replacement_session_keyring) - key_replace_session_keyring(); /* * The caller just cleared TIF_NOTIFY_RESUME. This barrier * pairs with task_work_add()->set_notify_resume() after -- cgit v1.2.3 From f23ca335462e3c84f13270b9e65f83936068ec2c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 11 May 2012 10:59:09 +1000 Subject: keys: kill task_struct->replacement_session_keyring Kill the no longer used task_struct->replacement_session_keyring, update copy_creds() and exit_creds(). Signed-off-by: Oleg Nesterov Acked-by: David Howells Cc: Thomas Gleixner Cc: Richard Kuo Cc: Linus Torvalds Cc: Alexander Gordeev Cc: Chris Zankel Cc: David Smith Cc: "Frank Ch. Eigler" Cc: Geert Uytterhoeven Cc: Larry Woodman Cc: Peter Zijlstra Cc: Tejun Heo Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index da013853a622..17c6c929ee94 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1357,8 +1357,6 @@ struct task_struct { * credentials (COW) */ const struct cred __rcu *cred; /* effective (overridable) subjective task * credentials (COW) */ - struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */ - char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock it with task_lock()) -- cgit v1.2.3 From e5400321a6f15ce0fe77c8455954f213ef7dcc54 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 9 May 2012 23:39:34 +0900 Subject: clockevents: Make clockevents_config() a global symbol Make clockevents_config() into a global symbol to allow it to be used by compiled-in clockevent drivers. This is needed by drivers that want to update the timer frequency after registration time. Signed-off-by: Magnus Damm Tested-by: Simon Horman Cc: arnd@arndb.de Cc: johnstul@us.ibm.com Cc: rjw@sisk.pl Cc: lethal@linux-sh.org Cc: gregkh@linuxfoundation.org Cc: olof@lixom.net Cc: Magnus Damm Link: http://lkml.kernel.org/r/20120509143934.27521.46553.sendpatchset@w520 Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 81e803e90aa4..acba894374a1 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -132,6 +132,7 @@ extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt); extern void clockevents_register_device(struct clock_event_device *dev); +extern void clockevents_config(struct clock_event_device *dev, u32 freq); extern void clockevents_config_and_register(struct clock_event_device *dev, u32 freq, unsigned long min_delta, unsigned long max_delta); -- cgit v1.2.3 From 01b5adcebb977bc61b64167adce6d8260c9da33c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 27 May 2012 07:50:56 -0400 Subject: jbd2: Grab a reference to the crc32c driver if necessary Obtain a reference to the crc32c driver if needed for the v2 checksum. Signed-off-by: Darrick J. Wong Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 809c439066c5..71e77dddebf1 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -31,6 +31,7 @@ #include #include #include +#include #endif #define journal_oom_retry 1 @@ -965,6 +966,9 @@ struct journal_s * superblock pointer here */ void *j_private; + + /* Reference to checksum algorithm driver via cryptoapi */ + struct crypto_shash *j_chksum_driver; }; /* @@ -1294,6 +1298,25 @@ static inline int jbd_space_needed(journal_t *journal) extern int jbd_blocks_per_page(struct inode *inode); +static inline u32 jbd2_chksum(journal_t *journal, u32 crc, + const void *address, unsigned int length) +{ + struct { + struct shash_desc shash; + char ctx[crypto_shash_descsize(journal->j_chksum_driver)]; + } desc; + int err; + + desc.shash.tfm = journal->j_chksum_driver; + desc.shash.flags = 0; + *(u32 *)desc.ctx = crc; + + err = crypto_shash_update(&desc.shash, address, length); + BUG_ON(err); + + return *(u32 *)desc.ctx; +} + #ifdef __KERNEL__ #define buffer_trace_init(bh) do {} while (0) -- cgit v1.2.3 From 4fd5ea43bc11602bfabe2c8f5378586d34bd2b0a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 27 May 2012 08:08:22 -0400 Subject: jbd2: checksum journal superblock Calculate and verify a checksum covering the journal superblock. Signed-off-by: Darrick J. Wong Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 71e77dddebf1..a9632bc55d97 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -969,6 +969,9 @@ struct journal_s /* Reference to checksum algorithm driver via cryptoapi */ struct crypto_shash *j_chksum_driver; + + /* Precomputed journal UUID checksum for seeding other checksums */ + __u32 j_csum_seed; }; /* -- cgit v1.2.3 From e93376c20b70d1e62bb3246acd1bbe21fe58859f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 27 May 2012 08:12:42 -0400 Subject: ext4/jbd2: add metadata checksumming to the list of supported features Activate the metadata checksumming feature by adding it to ext4 and jbd2's lists of supported features. Signed-off-by: Darrick J. Wong Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index a9632bc55d97..f334c7fab967 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -296,7 +296,8 @@ typedef struct journal_superblock_s #define JBD2_KNOWN_ROCOMPAT_FEATURES 0 #define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \ JBD2_FEATURE_INCOMPAT_64BIT | \ - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \ + JBD2_FEATURE_INCOMPAT_CSUM_V2) #ifdef __KERNEL__ -- cgit v1.2.3 From 617c8c11236716dcbda877e764b7bf37c6fd8063 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 29 May 2012 03:35:08 +0000 Subject: skb: avoid unnecessary reallocations in __skb_cow At the beginning of __skb_cow, headroom gets set to a minimum of NET_SKB_PAD. This causes unnecessary reallocations if the buffer was not cloned and the headroom is just below NET_SKB_PAD, but still more than the amount requested by the caller. This was showing up frequently in my tests on VLAN tx, where vlan_insert_tag calls skb_cow_head(skb, VLAN_HLEN). Locally generated packets should have enough headroom, and for forward paths, we already have NET_SKB_PAD bytes of headroom, so we don't need to add any extra space here. Signed-off-by: Felix Fietkau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0e501714d47f..b534a1be540a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1896,8 +1896,6 @@ static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom, { int delta = 0; - if (headroom < NET_SKB_PAD) - headroom = NET_SKB_PAD; if (headroom > skb_headroom(skb)) delta = headroom - skb_headroom(skb); -- cgit v1.2.3 From 2033e9bf06f07e049bbc77e9452856df846714cc Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 29 May 2012 09:30:40 +0000 Subject: net: add MODULE_ALIAS_NET_PF_PROTO_NAME The MODULE_ALAIS_NET_PF macro set is missing a variant that allows for the appending of an arbitrary string to the net-pf--proto- base. while MODULE_ALIAS_NET_PF_PROTO_NAME_TYPE allows an appending of a numerical type, we need to be able to append a generic string to support generic netlink families that have neither a fix numberical protocol nor type number Signed-off-by: Neil Horman CC: Eric Dumazet CC: David Miller Signed-off-by: David S. Miller --- include/linux/net.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 2d7510f38934..e9ac2df079ba 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -313,5 +313,8 @@ extern int kernel_sock_shutdown(struct socket *sock, MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \ "-type-" __stringify(type)) +#define MODULE_ALIAS_NET_PF_PROTO_NAME(pf, proto, name) \ + MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \ + name) #endif /* __KERNEL__ */ #endif /* _LINUX_NET_H */ -- cgit v1.2.3 From e9412c37082b5c932e83364aaed0c38c2ce33acb Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 29 May 2012 09:30:41 +0000 Subject: genetlink: Build a generic netlink family module alias Generic netlink searches for -type- formatted aliases when requesting a module to fulfill a protocol request (i.e. net-pf-16-proto-16-type-, where x is a type value). However generic netlink protocols have no well defined type numbers, they have string names. Modify genl_ctrl_getfamily to request an alias in the format net-pf-16-proto-16-family- instead, where x is a generic string, and add a macro that builds on the previously added MODULE_ALIAS_NET_PF_PROTO_NAME macro to allow modules to specifify those generic strings. Note, l2tp previously hacked together an net-pf-16-proto-16-type-l2tp alias using the MODULE_ALIAS macro, with these updates we can convert that to use the PROTO_NAME macro. Signed-off-by: Neil Horman CC: Eric Dumazet CC: James Chapman CC: David Miller Signed-off-by: David S. Miller --- include/linux/genetlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index 73c28dea10ae..7a114016ac7d 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -110,6 +110,9 @@ extern int lockdep_genl_is_held(void); #define genl_dereference(p) \ rcu_dereference_protected(p, lockdep_genl_is_held()) +#define MODULE_ALIAS_GENL_FAMILY(family)\ + MODULE_ALIAS_NET_PF_PROTO_NAME(PF_NETLINK, NETLINK_GENERIC, "-family-" family) + #endif /* __KERNEL__ */ #endif /* __LINUX_GENERIC_NETLINK_H */ -- cgit v1.2.3 From b0b0382bb4904965a9e9fca77ad87514dfda0d1c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Apr 2012 14:34:06 -0400 Subject: ->encode_fh() API change pass inode + parent's inode or NULL instead of dentry + bool saying whether we want the parent or not. NOTE: that needs ceph fix folded in. Signed-off-by: Al Viro --- include/linux/exportfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 3a4cef5322dc..12291a7ee275 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -165,8 +165,8 @@ struct fid { */ struct export_operations { - int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len, - int connectable); + int (*encode_fh)(struct inode *inode, __u32 *fh, int *max_len, + struct inode *parent); struct dentry * (*fh_to_dentry)(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); struct dentry * (*fh_to_parent)(struct super_block *sb, struct fid *fid, -- cgit v1.2.3 From 9dd6fa03ab31bb57cee4623a689d058d222fbe68 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 8 May 2012 13:29:45 +0930 Subject: lglock: remove online variants of lock Optimizing the slow paths adds a lot of complexity. If you need to grab every lock often, you have other problems. Signed-off-by: Rusty Russell Acked-by: Nick Piggin Signed-off-by: Al Viro --- include/linux/lglock.h | 58 ++------------------------------------------------ 1 file changed, 2 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lglock.h b/include/linux/lglock.h index 87f402ccec55..0fdd821e77b7 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -28,8 +28,8 @@ #define br_lock_init(name) name##_lock_init() #define br_read_lock(name) name##_local_lock() #define br_read_unlock(name) name##_local_unlock() -#define br_write_lock(name) name##_global_lock_online() -#define br_write_unlock(name) name##_global_unlock_online() +#define br_write_lock(name) name##_global_lock() +#define br_write_unlock(name) name##_global_unlock() #define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) @@ -42,8 +42,6 @@ #define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) #define lg_global_lock(name) name##_global_lock() #define lg_global_unlock(name) name##_global_unlock() -#define lg_global_lock_online(name) name##_global_lock_online() -#define lg_global_unlock_online(name) name##_global_unlock_online() #ifdef CONFIG_DEBUG_LOCK_ALLOC #define LOCKDEP_INIT_MAP lockdep_init_map @@ -68,36 +66,13 @@ extern void name##_local_unlock_cpu(int cpu); \ extern void name##_global_lock(void); \ extern void name##_global_unlock(void); \ - extern void name##_global_lock_online(void); \ - extern void name##_global_unlock_online(void); \ #define DEFINE_LGLOCK(name) \ \ DEFINE_SPINLOCK(name##_cpu_lock); \ - cpumask_t name##_cpus __read_mostly; \ DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ DEFINE_LGLOCK_LOCKDEP(name); \ \ - static int \ - name##_lg_cpu_callback(struct notifier_block *nb, \ - unsigned long action, void *hcpu) \ - { \ - switch (action & ~CPU_TASKS_FROZEN) { \ - case CPU_UP_PREPARE: \ - spin_lock(&name##_cpu_lock); \ - cpu_set((unsigned long)hcpu, name##_cpus); \ - spin_unlock(&name##_cpu_lock); \ - break; \ - case CPU_UP_CANCELED: case CPU_DEAD: \ - spin_lock(&name##_cpu_lock); \ - cpu_clear((unsigned long)hcpu, name##_cpus); \ - spin_unlock(&name##_cpu_lock); \ - } \ - return NOTIFY_OK; \ - } \ - static struct notifier_block name##_lg_cpu_notifier = { \ - .notifier_call = name##_lg_cpu_callback, \ - }; \ void name##_lock_init(void) { \ int i; \ LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ @@ -106,11 +81,6 @@ lock = &per_cpu(name##_lock, i); \ *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ } \ - register_hotcpu_notifier(&name##_lg_cpu_notifier); \ - get_online_cpus(); \ - for_each_online_cpu(i) \ - cpu_set(i, name##_cpus); \ - put_online_cpus(); \ } \ EXPORT_SYMBOL(name##_lock_init); \ \ @@ -150,30 +120,6 @@ } \ EXPORT_SYMBOL(name##_local_unlock_cpu); \ \ - void name##_global_lock_online(void) { \ - int i; \ - spin_lock(&name##_cpu_lock); \ - rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_cpu(i, &name##_cpus) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_lock(lock); \ - } \ - } \ - EXPORT_SYMBOL(name##_global_lock_online); \ - \ - void name##_global_unlock_online(void) { \ - int i; \ - rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_cpu(i, &name##_cpus) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_unlock(lock); \ - } \ - spin_unlock(&name##_cpu_lock); \ - } \ - EXPORT_SYMBOL(name##_global_unlock_online); \ - \ void name##_global_lock(void) { \ int i; \ preempt_disable(); \ -- cgit v1.2.3 From eea62f831b8030b0eeea8314eed73b6132d1de26 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 8 May 2012 13:32:24 +0930 Subject: brlocks/lglocks: turn into functions lglocks and brlocks are currently generated with some complicated macros in lglock.h. But there's no reason to not just use common utility functions and put all the data into a common data structure. Since there are at least two users it makes sense to share this code in a library. This is also easier maintainable than a macro forest. This will also make it later possible to dynamically allocate lglocks and also use them in modules (this would both still need some additional, but now straightforward, code) [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Andi Kleen Cc: Al Viro Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Rusty Russell Signed-off-by: Al Viro --- include/linux/lglock.h | 125 ++++++++++--------------------------------------- 1 file changed, 26 insertions(+), 99 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lglock.h b/include/linux/lglock.h index 0fdd821e77b7..f01e5f6d1f07 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -23,26 +23,17 @@ #include #include #include +#include /* can make br locks by using local lock for read side, global lock for write */ -#define br_lock_init(name) name##_lock_init() -#define br_read_lock(name) name##_local_lock() -#define br_read_unlock(name) name##_local_unlock() -#define br_write_lock(name) name##_global_lock() -#define br_write_unlock(name) name##_global_unlock() +#define br_lock_init(name) lg_lock_init(name, #name) +#define br_read_lock(name) lg_local_lock(name) +#define br_read_unlock(name) lg_local_unlock(name) +#define br_write_lock(name) lg_global_lock(name) +#define br_write_unlock(name) lg_global_unlock(name) -#define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) - -#define lg_lock_init(name) name##_lock_init() -#define lg_local_lock(name) name##_local_lock() -#define lg_local_unlock(name) name##_local_unlock() -#define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu) -#define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) -#define lg_global_lock(name) name##_global_lock() -#define lg_global_unlock(name) name##_global_unlock() - #ifdef CONFIG_DEBUG_LOCK_ALLOC #define LOCKDEP_INIT_MAP lockdep_init_map @@ -57,90 +48,26 @@ #define DEFINE_LGLOCK_LOCKDEP(name) #endif - -#define DECLARE_LGLOCK(name) \ - extern void name##_lock_init(void); \ - extern void name##_local_lock(void); \ - extern void name##_local_unlock(void); \ - extern void name##_local_lock_cpu(int cpu); \ - extern void name##_local_unlock_cpu(int cpu); \ - extern void name##_global_lock(void); \ - extern void name##_global_unlock(void); \ +struct lglock { + arch_spinlock_t __percpu *lock; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lock_class_key lock_key; + struct lockdep_map lock_dep_map; +#endif +}; #define DEFINE_LGLOCK(name) \ - \ - DEFINE_SPINLOCK(name##_cpu_lock); \ - DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ - DEFINE_LGLOCK_LOCKDEP(name); \ - \ - void name##_lock_init(void) { \ - int i; \ - LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ - } \ - } \ - EXPORT_SYMBOL(name##_lock_init); \ - \ - void name##_local_lock(void) { \ - arch_spinlock_t *lock; \ - preempt_disable(); \ - rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ - lock = &__get_cpu_var(name##_lock); \ - arch_spin_lock(lock); \ - } \ - EXPORT_SYMBOL(name##_local_lock); \ - \ - void name##_local_unlock(void) { \ - arch_spinlock_t *lock; \ - rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ - lock = &__get_cpu_var(name##_lock); \ - arch_spin_unlock(lock); \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_local_unlock); \ - \ - void name##_local_lock_cpu(int cpu) { \ - arch_spinlock_t *lock; \ - preempt_disable(); \ - rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ - lock = &per_cpu(name##_lock, cpu); \ - arch_spin_lock(lock); \ - } \ - EXPORT_SYMBOL(name##_local_lock_cpu); \ - \ - void name##_local_unlock_cpu(int cpu) { \ - arch_spinlock_t *lock; \ - rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ - lock = &per_cpu(name##_lock, cpu); \ - arch_spin_unlock(lock); \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_local_unlock_cpu); \ - \ - void name##_global_lock(void) { \ - int i; \ - preempt_disable(); \ - rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_lock(lock); \ - } \ - } \ - EXPORT_SYMBOL(name##_global_lock); \ - \ - void name##_global_unlock(void) { \ - int i; \ - rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_unlock(lock); \ - } \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_global_unlock); + DEFINE_LGLOCK_LOCKDEP(name); \ + DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \ + = __ARCH_SPIN_LOCK_UNLOCKED; \ + struct lglock name = { .lock = &name ## _lock } + +void lg_lock_init(struct lglock *lg, char *name); +void lg_local_lock(struct lglock *lg); +void lg_local_unlock(struct lglock *lg); +void lg_local_lock_cpu(struct lglock *lg, int cpu); +void lg_local_unlock_cpu(struct lglock *lg, int cpu); +void lg_global_lock(struct lglock *lg); +void lg_global_unlock(struct lglock *lg); + #endif -- cgit v1.2.3 From 5aaa0b7a2ed5b12692c9ffb5222182bd558d3146 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 May 2012 17:15:29 +0200 Subject: sched/nohz: Fix rq->cpu_load calculations some more Follow up on commit 556061b00 ("sched/nohz: Fix rq->cpu_load[] calculations") since while that fixed the busy case it regressed the mostly idle case. Add a callback from the nohz exit to also age the rq->cpu_load[] array. This closes the hole where either there was no nohz load balance pass during the nohz, or there was a 'significant' amount of idle time between the last nohz balance and the nohz exit. So we'll update unconditionally from the tick to not insert any accidental 0 load periods while busy, and we try and catch up from nohz idle balance and nohz exit. Both these are still prone to missing a jiffy, but that has always been the case. Signed-off-by: Peter Zijlstra Cc: pjt@google.com Cc: Venkatesh Pallipadi Link: http://lkml.kernel.org/n/tip-kt0trz0apodbf84ucjfdbr1a@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f45c0b280b5d..d61e5977e517 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -145,6 +145,7 @@ extern unsigned long this_cpu_load(void); extern void calc_global_load(unsigned long ticks); +extern void update_cpu_load_nohz(void); extern unsigned long get_parent_ip(unsigned long addr); -- cgit v1.2.3 From 29baa7478ba47d746e3625c91d3b2afbf46b4312 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Apr 2012 12:11:21 +0200 Subject: sched: Move nr_cpus_allowed out of 'struct sched_rt_entity' Since nr_cpus_allowed is used outside of sched/rt.c and wants to be used outside of there more, move it to a more natural site. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-kr61f02y9brwzkh6x53pdptm@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 2 +- include/linux/sched.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index e4baff5f7ff4..9e65eff6af3b 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -149,6 +149,7 @@ extern struct cred init_cred; .normal_prio = MAX_PRIO-20, \ .policy = SCHED_NORMAL, \ .cpus_allowed = CPU_MASK_ALL, \ + .nr_cpus_allowed= NR_CPUS, \ .mm = NULL, \ .active_mm = &init_mm, \ .se = { \ @@ -157,7 +158,6 @@ extern struct cred init_cred; .rt = { \ .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \ .time_slice = RR_TIMESLICE, \ - .nr_cpus_allowed = NR_CPUS, \ }, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ INIT_PUSHABLE_TASKS(tsk) \ diff --git a/include/linux/sched.h b/include/linux/sched.h index d61e5977e517..0f50e78f7f44 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1188,7 +1188,6 @@ struct sched_rt_entity { struct list_head run_list; unsigned long timeout; unsigned int time_slice; - int nr_cpus_allowed; struct sched_rt_entity *back; #ifdef CONFIG_RT_GROUP_SCHED @@ -1253,6 +1252,7 @@ struct task_struct { #endif unsigned int policy; + int nr_cpus_allowed; cpumask_t cpus_allowed; #ifdef CONFIG_PREEMPT_RCU -- cgit v1.2.3 From 0053ea9c34a41865ec89ffbf3d3033f9a503bccc Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 30 May 2012 07:43:34 +0000 Subject: netdevice: Update netif_dbg for CONFIG_DYNAMIC_DEBUG Make netif_dbg use dynamic debugging whenever CONFIG_DYNAMIC_DEBUG is enabled. commit b558c96ffa53 ("dynamic_debug: make dynamic-debug supersede DEBUG ccflag") missed updating the netif_dbg variant. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e7fd468f7126..d94cb1431519 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2795,15 +2795,15 @@ do { \ #define netif_info(priv, type, dev, fmt, args...) \ netif_level(info, priv, type, dev, fmt, ##args) -#if defined(DEBUG) -#define netif_dbg(priv, type, dev, format, args...) \ - netif_printk(priv, type, KERN_DEBUG, dev, format, ##args) -#elif defined(CONFIG_DYNAMIC_DEBUG) +#if defined(CONFIG_DYNAMIC_DEBUG) #define netif_dbg(priv, type, netdev, format, args...) \ do { \ if (netif_msg_##type(priv)) \ dynamic_netdev_dbg(netdev, format, ##args); \ } while (0) +#elif defined(DEBUG) +#define netif_dbg(priv, type, dev, format, args...) \ + netif_printk(priv, type, KERN_DEBUG, dev, format, ##args) #else #define netif_dbg(priv, type, dev, format, args...) \ ({ \ -- cgit v1.2.3 From bb8ac181a5cf50458a0d83b4460790badc9fdc16 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 19 May 2012 10:25:23 -0400 Subject: bury __kernel_nlink_t, make internal nlink_t consistent Signed-off-by: Al Viro --- include/linux/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/types.h b/include/linux/types.h index 7f480db60231..9c1bd539ea70 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -25,7 +25,7 @@ typedef __kernel_dev_t dev_t; typedef __kernel_ino_t ino_t; typedef __kernel_mode_t mode_t; typedef unsigned short umode_t; -typedef __kernel_nlink_t nlink_t; +typedef __u32 nlink_t; typedef __kernel_off_t off_t; typedef __kernel_pid_t pid_t; typedef __kernel_daddr_t daddr_t; -- cgit v1.2.3 From a4f9a9a635e4d54ac93df4b861ed8792e17bd4a2 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 29 May 2012 11:02:24 -0700 Subject: fsnotify: handle subfiles' perm events Recently I'm working on fanotify and found the following strange behaviors. I wrote a program to set fanotify_mark on "/tmp/block" and FAN_DENY all events notified. fanotify_mask = FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD: $ cd /tmp/block; cat foo cat: foo: Operation not permitted Operation on the file is blocked as expected. But, fanotify_mask = FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD: $ cd /tmp/block; cat foo aaa It's not blocked anymore. This is confusing behavior. Also reading commit "fsnotify: call fsnotify_parent in perm events", it seems like fsnotify should handle subfiles' perm events as well as the other notify events. With this patch, regardless of FAN_ALL_EVENTS set or not: $ cd /tmp/block; cat foo cat: foo: Operation not permitted Operation on the file is now blocked properly. FS_OPEN_PERM and FS_ACCESS_PERM are not listed on FS_EVENTS_POSS_ON_CHILD. Due to fsnotify_inode_watches_children() check, if you only specify only these events as fsnotify_mask, you don't get subfiles' perm events notified. This patch add the events to FS_EVENTS_POSS_ON_CHILD to get them notified even if only these events are specified to fsnotify_mask. Signed-off-by: Naohiro Aota Cc: Eric Paris Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/fsnotify_backend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 91d0e0a34ef3..63d966d5c2ea 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -60,7 +60,7 @@ #define FS_EVENTS_POSS_ON_CHILD (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\ FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\ FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\ - FS_DELETE) + FS_DELETE | FS_OPEN_PERM | FS_ACCESS_PERM) #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) -- cgit v1.2.3 From 114067b69e7b2c691faace0e33db2f04096f668d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 31 May 2012 14:43:27 +0900 Subject: perf tools: Check if callchain is corrupted We faced segmentation fault on perf top -G at very high sampling rate due to a corrupted callchain. While the root cause was not revealed (I failed to figure it out), this patch tries to protect us from the segfault on such cases. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sunjin Yang Link: http://lkml.kernel.org/r/1338443007-24857-2-git-send-email-namhyung.kim@lge.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f32578634d9d..1817d4015e5f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -555,6 +555,8 @@ enum perf_event_type { PERF_RECORD_MAX, /* non-ABI */ }; +#define PERF_MAX_STACK_DEPTH 255 + enum perf_callchain_context { PERF_CONTEXT_HV = (__u64)-32, PERF_CONTEXT_KERNEL = (__u64)-128, @@ -609,8 +611,6 @@ struct perf_guest_info_callbacks { #include #include -#define PERF_MAX_STACK_DEPTH 255 - struct perf_callchain_entry { __u64 nr; __u64 ip[PERF_MAX_STACK_DEPTH]; -- cgit v1.2.3 From d007794a182bc072a7b7479909dbd0d67ba341be Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 13:11:37 -0400 Subject: split cap_mmap_addr() out of cap_file_mmap() ... switch callers. Signed-off-by: Al Viro --- include/linux/security.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index ab0e091ce5fa..4ad59c9fa731 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -86,6 +86,7 @@ extern int cap_inode_setxattr(struct dentry *dentry, const char *name, extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); +extern int cap_mmap_addr(unsigned long addr); extern int cap_file_mmap(struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags, unsigned long addr, unsigned long addr_only); @@ -2187,7 +2188,7 @@ static inline int security_file_mmap(struct file *file, unsigned long reqprot, unsigned long addr, unsigned long addr_only) { - return cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); + return cap_mmap_addr(addr); } static inline int security_file_mprotect(struct vm_area_struct *vma, -- cgit v1.2.3 From e5467859f7f79b69fc49004403009dfdba3bec53 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 13:30:51 -0400 Subject: split ->file_mmap() into ->mmap_addr()/->mmap_file() ... i.e. file-dependent and address-dependent checks. Signed-off-by: Al Viro --- include/linux/security.h | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 4ad59c9fa731..f1bae0963ddc 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -87,9 +87,8 @@ extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); extern int cap_mmap_addr(unsigned long addr); -extern int cap_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only); +extern int cap_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags); extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); @@ -587,15 +586,17 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * simple integer value. When @arg represents a user space pointer, it * should never be used by the security module. * Return 0 if permission is granted. - * @file_mmap : + * @mmap_addr : + * Check permissions for a mmap operation at @addr. + * @addr contains virtual address that will be used for the operation. + * Return 0 if permission is granted. + * @mmap_file : * Check permissions for a mmap operation. The @file may be NULL, e.g. * if mapping anonymous memory. * @file contains the file structure for file to map (may be NULL). * @reqprot contains the protection requested by the application. * @prot contains the protection that will be applied by the kernel. * @flags contains the operational flags. - * @addr contains virtual address that will be used for the operation. - * @addr_only contains a boolean: 0 if file-backed VMA, otherwise 1. * Return 0 if permission is granted. * @file_mprotect: * Check permissions before changing memory access permissions. @@ -1482,10 +1483,10 @@ struct security_operations { void (*file_free_security) (struct file *file); int (*file_ioctl) (struct file *file, unsigned int cmd, unsigned long arg); - int (*file_mmap) (struct file *file, + int (*mmap_addr) (unsigned long addr); + int (*mmap_file) (struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags, unsigned long addr, - unsigned long addr_only); + unsigned long flags); int (*file_mprotect) (struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); @@ -1744,9 +1745,9 @@ int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); void security_file_free(struct file *file); int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -int security_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only); +int security_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags); +int security_mmap_addr(unsigned long addr); int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); int security_file_lock(struct file *file, unsigned int cmd); @@ -2182,11 +2183,14 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd, return 0; } -static inline int security_file_mmap(struct file *file, unsigned long reqprot, +static inline int security_mmap_file(struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags, - unsigned long addr, - unsigned long addr_only) + unsigned long flags) +{ + return 0; +} + +static inline int security_mmap_addr(unsigned long addr) { return cap_mmap_addr(addr); } -- cgit v1.2.3 From 1d59d61f606547f0712aa6971f91f71154071c99 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 May 2012 12:22:33 -0400 Subject: NFS: Ensure that setattr and getattr wait for O_DIRECT write completion Use the same mechanism as the block devices are using, but move the helper functions from fs/direct-io.c into fs/inode.c to remove the dependency on CONFIG_BLOCK. Signed-off-by: Trond Myklebust Cc: Christoph Hellwig Cc: Al Viro Cc: Fred Isaman Signed-off-by: Linus Torvalds --- include/linux/fs.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 038076b27ea4..598a5892ff2b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2453,8 +2453,6 @@ enum { }; void dio_end_io(struct bio *bio, int error); -void inode_dio_wait(struct inode *inode); -void inode_dio_done(struct inode *inode); ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, @@ -2469,12 +2467,11 @@ static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, offset, nr_segs, get_block, NULL, NULL, DIO_LOCKING | DIO_SKIP_HOLES); } -#else -static inline void inode_dio_wait(struct inode *inode) -{ -} #endif +void inode_dio_wait(struct inode *inode); +void inode_dio_done(struct inode *inode); + extern const struct file_operations generic_ro_fops; #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) -- cgit v1.2.3 From 3fc929e2d693185aac2686e5e64e24eae10642a4 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Wed, 30 May 2012 09:14:51 +0000 Subject: net/mlx4_core: Fix number of EQs used in ICM initialisation In SRIOV mode, the number of EQs used when computing the total ICM size was incorrect. To fix this, we do the following: 1. We add a new structure to mlx4_dev, mlx4_phys_caps, to contain physical HCA capabilities. The PPF uses the phys capabilities when it computes things like ICM size. The dev_caps structure will then contain the paravirtualized values, making bookkeeping much easier in SRIOV mode. We add a structure rather than a single parameter because there will be other fields in the phys_caps. The first field we add to the mlx4_phys_caps structure is num_phys_eqs. 2. In INIT_HCA, when running in SRIOV mode, the "log_num_eqs" parameter passed to the FW is the number of EQs per VF/PF; each function (PF or VF) has this number of EQs available. However, the total number of EQs which must be allowed for in the ICM is (1 << log_num_eqs) * (#VFs + #PFs). Rather than compute this quantity, we allocate ICM space for 1024 EQs (which is the device maximum number of EQs, and which is the value we place in the mlx4_phys_caps structure). For INIT_HCA, however, we use the per-function number of EQs as described above. Signed-off-by: Marcel Apfelbaum Signed-off-by: Jack Morgenstein Reviewed-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 6e27fa99e8b9..6a8f002b8ed3 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -64,6 +64,7 @@ enum { MLX4_MAX_NUM_PF = 16, MLX4_MAX_NUM_VF = 64, MLX4_MFUNC_MAX = 80, + MLX4_MAX_EQ_NUM = 1024, MLX4_MFUNC_EQ_NUM = 4, MLX4_MFUNC_MAX_EQES = 8, MLX4_MFUNC_EQE_MASK = (MLX4_MFUNC_MAX_EQES - 1) @@ -239,6 +240,10 @@ static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) return (major << 32) | (minor << 16) | subminor; } +struct mlx4_phys_caps { + u32 num_phys_eqs; +}; + struct mlx4_caps { u64 fw_ver; u32 function; @@ -499,6 +504,7 @@ struct mlx4_dev { unsigned long flags; unsigned long num_slaves; struct mlx4_caps caps; + struct mlx4_phys_caps phys_caps; struct radix_tree_root qp_table_tree; u8 rev_id; char board_id[MLX4_BOARD_ID_LEN]; -- cgit v1.2.3 From 9793f7c88937e7ac07305ab1af1a519225836823 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 2 May 2012 16:08:38 +0400 Subject: SUNRPC: new svc_bind() routine introduced This new routine is responsible for service registration in a specified network context. The idea is to separate service creation from per-net operations. Note also: since registering service with svc_bind() can fail, the service will be destroyed and during destruction it will try to unregister itself from rpcbind. In this case unregistration has to be skipped. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 51b29ac45a8e..2b43e0214261 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -416,6 +416,7 @@ struct svc_procedure { */ int svc_rpcb_setup(struct svc_serv *serv, struct net *net); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); +int svc_bind(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, void (*shutdown)(struct svc_serv *, struct net *net)); struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, -- cgit v1.2.3 From 03a4e1f6ddf25f48848e1bddcffc0ad489648331 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 14 May 2012 19:55:22 -0400 Subject: nfsd4: move principal name into svc_cred Instead of keeping the principal name associated with a request in a structure that's private to auth_gss and using an accessor function, move it to svc_cred. Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svcauth.h | 9 +++++++++ include/linux/sunrpc/svcauth_gss.h | 1 - 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 2c54683b91de..16fe477a96e0 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -15,13 +15,22 @@ #include #include #include +#include struct svc_cred { uid_t cr_uid; gid_t cr_gid; struct group_info *cr_group_info; + char *cr_principal; /* for gss */ }; +static inline void free_svc_cred(struct svc_cred *cred) +{ + if (cred->cr_group_info) + put_group_info(cred->cr_group_info); + kfree(cred->cr_principal); +} + struct svc_rqst; /* forward decl */ struct in6_addr; diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h index 7c32daa025eb..726aff1a5201 100644 --- a/include/linux/sunrpc/svcauth_gss.h +++ b/include/linux/sunrpc/svcauth_gss.h @@ -22,7 +22,6 @@ int gss_svc_init_net(struct net *net); void gss_svc_shutdown_net(struct net *net); int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name); u32 svcauth_gss_flavor(struct auth_domain *dom); -char *svc_gss_principal(struct svc_rqst *); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */ -- cgit v1.2.3 From d5497fc693a446ce9100fcf4117c3f795ddfd0d2 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 14 May 2012 22:06:49 -0400 Subject: nfsd4: move rq_flavor into svc_cred Move the rq_flavor into struct svc_cred, and use it in setclientid and exchange_id comparisons as well. Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 1 - include/linux/sunrpc/svcauth.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 2b43e0214261..40e0a273faea 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -232,7 +232,6 @@ struct svc_rqst { struct svc_pool * rq_pool; /* thread pool */ struct svc_procedure * rq_procinfo; /* procedure info */ struct auth_ops * rq_authop; /* authentication flavour */ - u32 rq_flavor; /* pseudoflavor */ struct svc_cred rq_cred; /* auth info */ void * rq_xprt_ctxt; /* transport specific context ptr */ struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 16fe477a96e0..dd74084a9799 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -21,6 +21,7 @@ struct svc_cred { uid_t cr_uid; gid_t cr_gid; struct group_info *cr_group_info; + u32 cr_flavor; /* pseudoflavor */ char *cr_principal; /* for gss */ }; -- cgit v1.2.3 From a3860c1c5dd1137db23d7786d284939c5761d517 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Thu, 31 May 2012 16:26:04 -0700 Subject: introduce SIZE_MAX ULONG_MAX is often used to check for integer overflow when calculating allocation size. While ULONG_MAX happens to work on most systems, there is no guarantee that `size_t' must be the same size as `long'. This patch introduces SIZE_MAX, the maximum value of `size_t', to improve portability and readability for allocation size validation. Signed-off-by: Xi Wang Acked-by: Alex Elder Cc: David Airlie Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 1 + include/linux/slab.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index ec55a3c8ba77..e07f5e0c5df4 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -35,6 +35,7 @@ #define LLONG_MAX ((long long)(~0ULL>>1)) #define LLONG_MIN (-LLONG_MAX - 1) #define ULLONG_MAX (~0ULL) +#define SIZE_MAX (~(size_t)0) #define STACK_MAGIC 0xdeadbeef diff --git a/include/linux/slab.h b/include/linux/slab.h index a595dce6b0c7..67d5d94b783a 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -242,7 +242,7 @@ size_t ksize(const void *); */ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) { - if (size != 0 && n > ULONG_MAX / size) + if (size != 0 && n > SIZE_MAX / size) return NULL; return __kmalloc(n * size, flags); } -- cgit v1.2.3 From 020ac5b6bef15785f9dde9de89d2734ff97da733 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 31 May 2012 16:26:12 -0700 Subject: fat: introduce special inode for managing the FSINFO block This is patchset makes fatfs stop using the VFS '->write_super()' method for writing out the FSINFO block. The final goal is to get rid of the 'sync_supers()' kernel thread. This kernel thread wakes up every 5 seconds (by default) and calls '->write_super()' for all mounted file-systems. And the bad thing is that this is done even if all the superblocks are clean. Moreover, some file-systems do not even need this end they do not register the '->write_super()' method at all (e.g., btrfs). So 'sync_supers()' most often just generates useless wake-ups and wastes power. I am trying to make all file-systems independent of '->write_super()' and plan to remove 'sync_supers()' and '->write_super' completely once there are no more users. The '->write_supers()' method is mostly used by baroque file-systems like hfs, udf, etc. Modern file-systems like btrfs and xfs do not use it. This justifies removing this stuff from VFS completely and make every FS self-manage own superblock. Tested with xfstests. This patch: Preparation for further changes. It introduces a special inode ('fsinfo_inode') in FAT file-system which we'll later use for managing the FSINFO block. Note, this there is already one special inode ('fat_inode') which is used for managing the FAT tables. Introduce new 'MSDOS_FSINFO_INO' constant for this special inode. It is safe to do because FAT file-system does not store inode numbers on the media but generates them run-time. I've also cleaned up the comment to existing 'MSDOS_ROOT_INO' constant, while on it. Signed-off-by: Artem Bityutskiy Cc: OGAWA Hirofumi Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/msdos_fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index 34066e65fdeb..11cc2ac67e75 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -21,8 +21,9 @@ #define CT_LE_W(v) cpu_to_le16(v) #define CT_LE_L(v) cpu_to_le32(v) +#define MSDOS_ROOT_INO 1 /* The root inode number */ +#define MSDOS_FSINFO_INO 2 /* Used for managing the FSINFO block */ -#define MSDOS_ROOT_INO 1 /* == MINIX_ROOT_INO */ #define MSDOS_DIR_BITS 5 /* log2(sizeof(struct msdos_dir_entry)) */ /* directory limit */ -- cgit v1.2.3 From ae3cef7300e9fddc35ad251dd5f27c5b88c8594a Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Thu, 31 May 2012 16:26:14 -0700 Subject: kmod: unexport call_usermodehelper_freeinfo() call_usermodehelper_freeinfo() is not used outside of kmod.c. So unexport it, and make it static to kmod.c Signed-off-by: Boaz Harrosh Cc: Oleg Nesterov Cc: Tetsuo Handa Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmod.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index dd99c329e161..f07f9a4e10ff 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -79,10 +79,6 @@ void call_usermodehelper_setfns(struct subprocess_info *info, /* Actually execute the sub-process */ int call_usermodehelper_exec(struct subprocess_info *info, int wait); -/* Free the subprocess_info. This is only needed if you're not going - to call call_usermodehelper_exec */ -void call_usermodehelper_freeinfo(struct subprocess_info *info); - static inline int call_usermodehelper_fns(char *path, char **argv, char **envp, int wait, int (*init)(struct subprocess_info *info, struct cred *new), -- cgit v1.2.3 From 785042f2e275089e22c36b462f6495ce8d91732d Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Thu, 31 May 2012 16:26:15 -0700 Subject: kmod: move call_usermodehelper_fns() to .c file and unexport all it's helpers If we move call_usermodehelper_fns() to kmod.c file and EXPORT_SYMBOL it we can avoid exporting all it's helper functions: call_usermodehelper_setup call_usermodehelper_setfns call_usermodehelper_exec And make all of them static to kmod.c Since the optimizer will see all these as a single call site it will inline them inside call_usermodehelper_fns(). So we loose the call to _fns but gain 3 calls to the helpers. (Not that it matters) Signed-off-by: Boaz Harrosh Cc: Oleg Nesterov Cc: Tetsuo Handa Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmod.h | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index f07f9a4e10ff..5398d5807075 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -66,36 +66,10 @@ struct subprocess_info { void *data; }; -/* Allocate a subprocess_info structure */ -struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, - char **envp, gfp_t gfp_mask); - -/* Set various pieces of state into the subprocess_info structure */ -void call_usermodehelper_setfns(struct subprocess_info *info, - int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *info), - void *data); - -/* Actually execute the sub-process */ -int call_usermodehelper_exec(struct subprocess_info *info, int wait); - -static inline int +extern int call_usermodehelper_fns(char *path, char **argv, char **envp, int wait, int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *), void *data) -{ - struct subprocess_info *info; - gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL; - - info = call_usermodehelper_setup(path, argv, envp, gfp_mask); - - if (info == NULL) - return -ENOMEM; - - call_usermodehelper_setfns(info, init, cleanup, data); - - return call_usermodehelper_exec(info, wait); -} + void (*cleanup)(struct subprocess_info *), void *data); static inline int call_usermodehelper(char *path, char **argv, char **envp, int wait) -- cgit v1.2.3 From 43e13cc107cf6cd3c15fbe1cef849435c2223d50 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 31 May 2012 16:26:16 -0700 Subject: cred: remove task_is_dead() from __task_cred() validation Commit 8f92054e7ca1 ("CRED: Fix __task_cred()'s lockdep check and banner comment"): add the following validation condition: task->exit_state >= 0 to permit the access if the target task is dead and therefore unable to change its own credentials. OK, but afaics currently this can only help wait_task_zombie() which calls __task_cred() without rcu lock. Remove this validation and change wait_task_zombie() to use task_uid() instead. This means we do rcu_read_lock() only to shut up the lockdep, but we already do the same in, say, wait_task_stopped(). task_is_dead() should die, task->exit_state != 0 means that this task has passed exit_notify(), only do_wait-like code paths should use this. Unfortunately, we can't kill task_is_dead() right now, it has already acquired buggy users in drivers/staging. The fix already exists. Signed-off-by: Oleg Nesterov Reviewed-by: "Eric W. Biederman" Acked-by: David Howells Cc: Jiri Olsa Cc: Paul E. McKenney Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cred.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index 917dc5aeb1d4..ebbed2ce6637 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -277,17 +277,13 @@ static inline void put_cred(const struct cred *_cred) * @task: The task to query * * Access the objective credentials of a task. The caller must hold the RCU - * readlock or the task must be dead and unable to change its own credentials. + * readlock. * * The result of this function should not be passed directly to get_cred(); * rather get_task_cred() should be used instead. */ -#define __task_cred(task) \ - ({ \ - const struct task_struct *__t = (task); \ - rcu_dereference_check(__t->real_cred, \ - task_is_dead(__t)); \ - }) +#define __task_cred(task) \ + rcu_dereference((task)->real_cred) /** * get_current_cred - Get the current task's subjective credentials -- cgit v1.2.3 From cb79295e20a8088a2fd6a9b3cb5f2d889ec36b4d Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 31 May 2012 16:26:22 -0700 Subject: cpu: introduce clear_tasks_mm_cpumask() helper Many architectures clear tasks' mm_cpumask like this: read_lock(&tasklist_lock); for_each_process(p) { if (p->mm) cpumask_clear_cpu(cpu, mm_cpumask(p->mm)); } read_unlock(&tasklist_lock); Depending on the context, the code above may have several problems, such as: 1. Working with task->mm w/o getting mm or grabing the task lock is dangerous as ->mm might disappear (exit_mm() assigns NULL under task_lock(), so tasklist lock is not enough). 2. Checking for process->mm is not enough because process' main thread may exit or detach its mm via use_mm(), but other threads may still have a valid mm. This patch implements a small helper function that does things correctly, i.e.: 1. We take the task's lock while whe handle its mm (we can't use get_task_mm()/mmput() pair as mmput() might sleep); 2. To catch exited main thread case, we use find_lock_task_mm(), which walks up all threads and returns an appropriate task (with task lock held). Also, Per Peter Zijlstra's idea, now we don't grab tasklist_lock in the new helper, instead we take the rcu read lock. We can do this because the function is called after the cpu is taken down and marked offline, so no new tasks will get this cpu set in their mm mask. Signed-off-by: Anton Vorontsov Cc: Richard Weinberger Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Russell King Cc: Benjamin Herrenschmidt Cc: Mike Frysinger Cc: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 7230bb59a06f..2e9b9ebbeb78 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -177,6 +177,7 @@ extern void put_online_cpus(void); #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri) #define register_hotcpu_notifier(nb) register_cpu_notifier(nb) #define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb) +void clear_tasks_mm_cpumask(int cpu); int cpu_down(unsigned int cpu); #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE -- cgit v1.2.3 From 29a5c67e7a78815fda0567a867adce467f6e6e5a Mon Sep 17 00:00:00 2001 From: maximilian attems Date: Thu, 31 May 2012 16:26:27 -0700 Subject: kexec: export kexec.h to user space Add userspace definitions, guard all relevant kernel structures. While at it document stuff and remove now useless userspace hint. It is easy to add the relevant system call to respective libc's, but it seems pointless to have to duplicate the data structures. This is based on the kexec-tools headers, with the exception of just using int on return (succes or failure) and using size_t instead of 'unsigned long int' for the number of segments argument of kexec_load(). Signed-off-by: maximilian attems Cc: Simon Horman Cc: Vivek Goyal Cc: Haren Myneni Cc: "Eric W. Biederman" Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/Kbuild | 1 + include/linux/kexec.h | 75 ++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 54 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 7185b8f15ced..8760be30b375 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -226,6 +226,7 @@ header-y += kdev_t.h header-y += kernel.h header-y += kernelcapi.h header-y += kernel-page-flags.h +header-y += kexec.h header-y += keyboard.h header-y += keyctl.h header-y += l2tp.h diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 0d7d6a1b172f..37c5f7261142 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -1,8 +1,58 @@ #ifndef LINUX_KEXEC_H #define LINUX_KEXEC_H -#ifdef CONFIG_KEXEC +/* kexec system call - It loads the new kernel to boot into. + * kexec does not sync, or unmount filesystems so if you need + * that to happen you need to do that yourself. + */ + #include + +/* kexec flags for different usage scenarios */ +#define KEXEC_ON_CRASH 0x00000001 +#define KEXEC_PRESERVE_CONTEXT 0x00000002 +#define KEXEC_ARCH_MASK 0xffff0000 + +/* These values match the ELF architecture values. + * Unless there is a good reason that should continue to be the case. + */ +#define KEXEC_ARCH_DEFAULT ( 0 << 16) +#define KEXEC_ARCH_386 ( 3 << 16) +#define KEXEC_ARCH_X86_64 (62 << 16) +#define KEXEC_ARCH_PPC (20 << 16) +#define KEXEC_ARCH_PPC64 (21 << 16) +#define KEXEC_ARCH_IA_64 (50 << 16) +#define KEXEC_ARCH_ARM (40 << 16) +#define KEXEC_ARCH_S390 (22 << 16) +#define KEXEC_ARCH_SH (42 << 16) +#define KEXEC_ARCH_MIPS_LE (10 << 16) +#define KEXEC_ARCH_MIPS ( 8 << 16) + +/* The artificial cap on the number of segments passed to kexec_load. */ +#define KEXEC_SEGMENT_MAX 16 + +#ifndef __KERNEL__ +/* + * This structure is used to hold the arguments that are used when + * loading kernel binaries. + */ +struct kexec_segment { + const void *buf; + size_t bufsz; + const void *mem; + size_t memsz; +}; + +/* Load a new kernel image as described by the kexec_segment array + * consisting of passed number of segments at the entry-point address. + * The flags allow different useage types. + */ +extern int kexec_load(void *, size_t, struct kexec_segment *, + unsigned long int); +#endif /* __KERNEL__ */ + +#ifdef __KERNEL__ +#ifdef CONFIG_KEXEC #include #include #include @@ -67,11 +117,10 @@ typedef unsigned long kimage_entry_t; #define IND_DONE 0x4 #define IND_SOURCE 0x8 -#define KEXEC_SEGMENT_MAX 16 struct kexec_segment { void __user *buf; size_t bufsz; - unsigned long mem; /* User space sees this as a (void *) ... */ + unsigned long mem; size_t memsz; }; @@ -175,25 +224,6 @@ extern struct kimage *kexec_crash_image; #define kexec_flush_icache_page(page) #endif -#define KEXEC_ON_CRASH 0x00000001 -#define KEXEC_PRESERVE_CONTEXT 0x00000002 -#define KEXEC_ARCH_MASK 0xffff0000 - -/* These values match the ELF architecture values. - * Unless there is a good reason that should continue to be the case. - */ -#define KEXEC_ARCH_DEFAULT ( 0 << 16) -#define KEXEC_ARCH_386 ( 3 << 16) -#define KEXEC_ARCH_X86_64 (62 << 16) -#define KEXEC_ARCH_PPC (20 << 16) -#define KEXEC_ARCH_PPC64 (21 << 16) -#define KEXEC_ARCH_IA_64 (50 << 16) -#define KEXEC_ARCH_ARM (40 << 16) -#define KEXEC_ARCH_S390 (22 << 16) -#define KEXEC_ARCH_SH (42 << 16) -#define KEXEC_ARCH_MIPS_LE (10 << 16) -#define KEXEC_ARCH_MIPS ( 8 << 16) - /* List of defined/legal kexec flags */ #ifndef CONFIG_KEXEC_JUMP #define KEXEC_FLAGS KEXEC_ON_CRASH @@ -228,4 +258,5 @@ struct task_struct; static inline void crash_kexec(struct pt_regs *regs) { } static inline int kexec_should_crash(struct task_struct *p) { return 0; } #endif /* CONFIG_KEXEC */ +#endif /* __KERNEL__ */ #endif /* LINUX_KEXEC_H */ -- cgit v1.2.3 From 93e6f119c0ce8a1bba6e81dc8dd97d67be360844 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Thu, 31 May 2012 16:26:28 -0700 Subject: ipc/mqueue: cleanup definition names and locations Since commit b231cca4381e ("message queues: increase range limits") on Oct 18, 2008, calls to mq_open() that did not pass in an attribute struct and expected to get default values for the size of the queue and the max message size now get the system wide maximums instead of hardwired defaults like they used to get. This was uncovered when one of the earlier patches in this patch set increased the default system wide maximums at the same time it increased the hard ceiling on the system wide maximums (a customer specifically needed the hard ceiling brought back up, the new ceiling that commit b231cca4381e introduced was too low for their production systems). By increasing the default maximums and not realising they were tied to any attempt to create a message queue without an attribute struct, I had inadvertently made it such that all message queue creation attempts without an attribute struct were failing because the new default maximums would create a queue that exceeded the default rlimit for message queue bytes. As a result, the system wide defaults were brought back down to their previous levels, and the system wide ceilings on the maximums were raised to meet the customer's needs. However, the fact that the no attribute struct behavior of mq_open() could be broken by changing the system wide maximums for message queues was seen as fundamentally broken itself. So we hardwired the no attribute case back like it used to be. But, then we realized that on the very off chance that some piece of software in the wild depended on that behavior, we could work around that issue by adding two new knobs to /proc that allowed setting the defaults for message queues created without an attr struct separately from the system wide maximums. What is not an option IMO is to leave the current behavior in place. No piece of software should ever rely on setting the system wide maximums in order to get a desired message queue. Such a reliance would be so fundamentally multitasking OS unfriendly as to not really be tolerable. Fortunately, we don't know of any software in the wild that uses this except for a regression test program that caught the issue in the first place. If there is though, we have made accommodations with the two new /proc knobs (and that's all the accommodations such fundamentally broken software can be allowed).. This patch: The various defines for minimums and maximums of the sysctl controllable mqueue values are scattered amongst different files and named inconsistently. Move them all into ipc_namespace.h and make them have consistent names. Additionally, make the number of queues per namespace also have a minimum and maximum and use the same sysctl function as the other two settable variables. Signed-off-by: Doug Ledford Acked-by: Serge E. Hallyn Cc: Amerigo Wang Cc: Joe Korty Cc: Jiri Slaby Acked-by: KOSAKI Motohiro Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 8a297a5e794c..1372b566e1e1 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -91,10 +91,15 @@ static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {} #ifdef CONFIG_POSIX_MQUEUE extern int mq_init_ns(struct ipc_namespace *ns); /* default values */ +#define MIN_QUEUESMAX 1 #define DFLT_QUEUESMAX 256 /* max number of message queues */ +#define HARD_QUEUESMAX 1024 +#define MIN_MSGMAX 1 #define DFLT_MSGMAX 10 /* max number of messages in each queue */ #define HARD_MSGMAX (32768*sizeof(void *)/4) +#define MIN_MSGSIZEMAX 128 #define DFLT_MSGSIZEMAX 8192 /* max message size */ +#define HARD_MSGSIZEMAX (8192*128) #else static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; } #endif -- cgit v1.2.3 From 858ee3784e8105467f1f3017f4ece51cb51d4830 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Thu, 31 May 2012 16:26:29 -0700 Subject: ipc/mqueue: switch back to using non-max values on create Commit b231cca4381e ("message queues: increase range limits") changed how we create a queue that does not include an attr struct passed to open so that it creates the queue with whatever the maximum values are. However, if the admin has set the maximums to allow flexibility in creating a queue (aka, both a large size and large queue are allowed, but combined they create a queue too large for the RLIMIT_MSGQUEUE of the user), then attempts to create a queue without an attr struct will fail. Switch back to using acceptable defaults regardless of what the maximums are. Note: so far, we only know of a few applications that rely on this behavior (specifically, set the maximums in /proc, then run the application which calls mq_open() without passing in an attr struct, and the application expects the newly created message queue to have the maximum sizes that were set in /proc used on the mq_open() call, and all of those applications that we know of are actually part of regression test suites that were coded to do something like this: for size in 4096 65536 $((1024 * 1024)) $((16 * 1024 * 1024)); do echo $size > /proc/sys/fs/mqueue/msgsize_max mq_open || echo "Error opening mq with size $size" done These test suites that depend on any behavior like this are broken. The concept that programs should rely upon the system wide maximum in order to get their desired results instead of simply using a attr struct to specify what they want is fundamentally unfriendly programming practice for any multi-tasking OS. Fixing this will break those few apps that we know of (and those app authors recognize the brokenness of their code and the need to fix it). However, the following patch "mqueue: separate mqueue default value" allows a workaround in the form of new knobs for the default msg queue creation parameters for any software out there that we don't already know about that might rely on this behavior at the moment. Signed-off-by: Doug Ledford Cc: Serge E. Hallyn Cc: Amerigo Wang Cc: Joe Korty Cc: Jiri Slaby Acked-by: KOSAKI Motohiro Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 1372b566e1e1..bde094ee7b0e 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -95,9 +95,11 @@ extern int mq_init_ns(struct ipc_namespace *ns); #define DFLT_QUEUESMAX 256 /* max number of message queues */ #define HARD_QUEUESMAX 1024 #define MIN_MSGMAX 1 +#define DFLT_MSG 10U #define DFLT_MSGMAX 10 /* max number of messages in each queue */ #define HARD_MSGMAX (32768*sizeof(void *)/4) #define MIN_MSGSIZEMAX 128 +#define DFLT_MSGSIZE 8192U #define DFLT_MSGSIZEMAX 8192 /* max message size */ #define HARD_MSGSIZEMAX (8192*128) #else -- cgit v1.2.3 From 5b5c4d1a1440e94994c73dddbad7be0676cd8b9a Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Thu, 31 May 2012 16:26:30 -0700 Subject: ipc/mqueue: update maximums for the mqueue subsystem Commit b231cca4381e ("message queues: increase range limits") changed the maximum size of a message in a message queue from INT_MAX to 8192*128. Unfortunately, we had customers that relied on a size much larger than 8192*128 on their production systems. After reviewing POSIX, we found that it is silent on the maximum message size. We did find a couple other areas in which it was not silent. Fix up the mqueue maximums so that the customer's system can continue to work, and document both the POSIX and real world requirements in ipc_namespace.h so that we don't have this issue crop back up. Also, commit 9cf18e1dd74cd0 ("ipc: HARD_MSGMAX should be higher not lower on 64bit") fiddled with HARD_MSGMAX without realizing that the number was intentionally in place to limit the msg queue depth to one that was small enough to kmalloc an array of pointers (hence why we divided 128k by sizeof(long)). If we wish to meet POSIX requirements, we have no choice but to change our allocation to a vmalloc instead (at least for the large queue size case). With that, it's possible to increase our allowed maximum to the POSIX requirements (or more if we choose). [sfr@canb.auug.org.au: using vmalloc requires including vmalloc.h] Signed-off-by: Doug Ledford Cc: Serge E. Hallyn Cc: Amerigo Wang Cc: Joe Korty Cc: Jiri Slaby Acked-by: KOSAKI Motohiro Cc: Manfred Spraul Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 47 ++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index bde094ee7b0e..6e1dd08194fd 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -90,18 +90,41 @@ static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {} #ifdef CONFIG_POSIX_MQUEUE extern int mq_init_ns(struct ipc_namespace *ns); -/* default values */ -#define MIN_QUEUESMAX 1 -#define DFLT_QUEUESMAX 256 /* max number of message queues */ -#define HARD_QUEUESMAX 1024 -#define MIN_MSGMAX 1 -#define DFLT_MSG 10U -#define DFLT_MSGMAX 10 /* max number of messages in each queue */ -#define HARD_MSGMAX (32768*sizeof(void *)/4) -#define MIN_MSGSIZEMAX 128 -#define DFLT_MSGSIZE 8192U -#define DFLT_MSGSIZEMAX 8192 /* max message size */ -#define HARD_MSGSIZEMAX (8192*128) +/* + * POSIX Message Queue default values: + * + * MIN_*: Lowest value an admin can set the maximum unprivileged limit to + * DFLT_*MAX: Default values for the maximum unprivileged limits + * DFLT_{MSG,MSGSIZE}: Default values used when the user doesn't supply + * an attribute to the open call and the queue must be created + * HARD_*: Highest value the maximums can be set to. These are enforced + * on CAP_SYS_RESOURCE apps as well making them inviolate (so make them + * suitably high) + * + * POSIX Requirements: + * Per app minimum openable message queues - 8. This does not map well + * to the fact that we limit the number of queues on a per namespace + * basis instead of a per app basis. So, make the default high enough + * that no given app should have a hard time opening 8 queues. + * Minimum maximum for HARD_MSGMAX - 32767. I bumped this to 65536. + * Minimum maximum for HARD_MSGSIZEMAX - POSIX is silent on this. However, + * we have run into a situation where running applications in the wild + * require this to be at least 5MB, and preferably 10MB, so I set the + * value to 16MB in hopes that this user is the worst of the bunch and + * the new maximum will handle anyone else. I may have to revisit this + * in the future. + */ +#define MIN_QUEUESMAX 1 +#define DFLT_QUEUESMAX 256 +#define HARD_QUEUESMAX 1024 +#define MIN_MSGMAX 1 +#define DFLT_MSG 64U +#define DFLT_MSGMAX 1024 +#define HARD_MSGMAX 65536 +#define MIN_MSGSIZEMAX 128 +#define DFLT_MSGSIZE 8192U +#define DFLT_MSGSIZEMAX (1024*1024) +#define HARD_MSGSIZEMAX (16*1024*1024) #else static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; } #endif -- cgit v1.2.3 From e6315bb154e778391ce64b194756bd3d108dadf6 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 31 May 2012 16:26:31 -0700 Subject: mqueue: revert bump up DFLT_*MAX Mqueue limitation is slightly naieve parameter likes other ipcs because unprivileged user can consume kernel memory by using ipcs. Thus, too aggressive raise bring us security issue. Example, current setting allow evil unprivileged user use 256GB (= 256 * 1024 * 1024*1024) and it's enough large to system will belome unresponsive. Don't do that. Instead, every admin should adjust the knobs for their own systems. Signed-off-by: KOSAKI Motohiro Acked-by: Doug Ledford Acked-by: Joe Korty Cc: Amerigo Wang Acked-by: Serge E. Hallyn Cc: Jiri Slaby Cc: Manfred Spraul Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 6e1dd08194fd..2488535a32a3 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -118,12 +118,12 @@ extern int mq_init_ns(struct ipc_namespace *ns); #define DFLT_QUEUESMAX 256 #define HARD_QUEUESMAX 1024 #define MIN_MSGMAX 1 -#define DFLT_MSG 64U -#define DFLT_MSGMAX 1024 +#define DFLT_MSG 10U +#define DFLT_MSGMAX 10 #define HARD_MSGMAX 65536 #define MIN_MSGSIZEMAX 128 #define DFLT_MSGSIZE 8192U -#define DFLT_MSGSIZEMAX (1024*1024) +#define DFLT_MSGSIZEMAX 8192 #define HARD_MSGSIZEMAX (16*1024*1024) #else static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; } -- cgit v1.2.3 From cef0184c115e5e4e10498f6548d9526465e72478 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 31 May 2012 16:26:33 -0700 Subject: mqueue: separate mqueue default value from maximum value Commit b231cca4381e ("message queues: increase range limits") changed mqueue default value when attr parameter is specified NULL from hard coded value to fs.mqueue.{msg,msgsize}_max sysctl value. This made large side effect. When user need to use two mqueue applications 1) using !NULL attr parameter and it require big message size and 2) using NULL attr parameter and only need small size message, app (1) require to raise fs.mqueue.msgsize_max and app (2) consume large memory size even though it doesn't need. Doug Ledford propsed to switch back it to static hard coded value. However it also has a compatibility problem. Some applications might started depend on the default value is tunable. The solution is to separate default value from maximum value. Signed-off-by: KOSAKI Motohiro Signed-off-by: Doug Ledford Acked-by: Doug Ledford Acked-by: Joe Korty Cc: Amerigo Wang Acked-by: Serge E. Hallyn Cc: Jiri Slaby Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 2488535a32a3..5499c92a9153 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -62,6 +62,8 @@ struct ipc_namespace { unsigned int mq_queues_max; /* initialized to DFLT_QUEUESMAX */ unsigned int mq_msg_max; /* initialized to DFLT_MSGMAX */ unsigned int mq_msgsize_max; /* initialized to DFLT_MSGSIZEMAX */ + unsigned int mq_msg_default; + unsigned int mq_msgsize_default; /* user_ns which owns the ipc ns */ struct user_namespace *user_ns; -- cgit v1.2.3 From e42d98ebe7d754a2c9fbccd6186721d3ca8679f6 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Thu, 31 May 2012 16:26:38 -0700 Subject: rapidio: add DMA engine support for RIO data transfers Adds DMA Engine framework support into RapidIO subsystem. Uses DMA Engine DMA_SLAVE interface to generate data transfers to/from remote RapidIO target devices. Introduces RapidIO-specific wrapper for prep_slave_sg() interface with an extra parameter to pass target specific information. Uses scatterlist to describe local data buffer. Address flat data buffer on a remote side. Signed-off-by: Alexandre Bounine Cc: Dan Williams Acked-by: Vinod Koul Cc: Li Yang Cc: Matt Porter Cc: Paul Gortmaker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dmaengine.h | 12 ++++++++++++ include/linux/rio.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/rio_drv.h | 9 +++++++++ 3 files changed, 68 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index d3fec584e8c3..56377df39124 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -635,6 +635,18 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_sg( dir, flags, NULL); } +#ifdef CONFIG_RAPIDIO_DMA_ENGINE +struct rio_dma_ext; +static inline struct dma_async_tx_descriptor *dmaengine_prep_rio_sg( + struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction dir, unsigned long flags, + struct rio_dma_ext *rio_ext) +{ + return chan->device->device_prep_slave_sg(chan, sgl, sg_len, + dir, flags, rio_ext); +} +#endif + static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction dir) diff --git a/include/linux/rio.h b/include/linux/rio.h index 4d50611112ba..a90ebadd9da0 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -20,6 +20,9 @@ #include #include #include +#ifdef CONFIG_RAPIDIO_DMA_ENGINE +#include +#endif #define RIO_NO_HOPCOUNT -1 #define RIO_INVALID_DESTID 0xffff @@ -254,6 +257,9 @@ struct rio_mport { u32 phys_efptr; unsigned char name[40]; void *priv; /* Master port private data */ +#ifdef CONFIG_RAPIDIO_DMA_ENGINE + struct dma_device dma; +#endif }; /** @@ -395,6 +401,47 @@ union rio_pw_msg { u32 raw[RIO_PW_MSG_SIZE/sizeof(u32)]; }; +#ifdef CONFIG_RAPIDIO_DMA_ENGINE + +/** + * enum rio_write_type - RIO write transaction types used in DMA transfers + * + * Note: RapidIO specification defines write (NWRITE) and + * write-with-response (NWRITE_R) data transfer operations. + * Existing DMA controllers that service RapidIO may use one of these operations + * for entire data transfer or their combination with only the last data packet + * requires response. + */ +enum rio_write_type { + RDW_DEFAULT, /* default method used by DMA driver */ + RDW_ALL_NWRITE, /* all packets use NWRITE */ + RDW_ALL_NWRITE_R, /* all packets use NWRITE_R */ + RDW_LAST_NWRITE_R, /* last packet uses NWRITE_R, others - NWRITE */ +}; + +struct rio_dma_ext { + u16 destid; + u64 rio_addr; /* low 64-bits of 66-bit RapidIO address */ + u8 rio_addr_u; /* upper 2-bits of 66-bit RapidIO address */ + enum rio_write_type wr_type; /* preferred RIO write operation type */ +}; + +struct rio_dma_data { + /* Local data (as scatterlist) */ + struct scatterlist *sg; /* I/O scatter list */ + unsigned int sg_len; /* size of scatter list */ + /* Remote device address (flat buffer) */ + u64 rio_addr; /* low 64-bits of 66-bit RapidIO address */ + u8 rio_addr_u; /* upper 2-bits of 66-bit RapidIO address */ + enum rio_write_type wr_type; /* preferred RIO write operation type */ +}; + +static inline struct rio_mport *dma_to_mport(struct dma_device *ddev) +{ + return container_of(ddev, struct rio_mport, dma); +} +#endif /* CONFIG_RAPIDIO_DMA_ENGINE */ + /* Architecture and hardware-specific functions */ extern int rio_register_mport(struct rio_mport *); extern int rio_open_inb_mbox(struct rio_mport *, void *, int, int); diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h index 7f07470e1ed9..31ad146be316 100644 --- a/include/linux/rio_drv.h +++ b/include/linux/rio_drv.h @@ -377,6 +377,15 @@ void rio_unregister_driver(struct rio_driver *); struct rio_dev *rio_dev_get(struct rio_dev *); void rio_dev_put(struct rio_dev *); +#ifdef CONFIG_RAPIDIO_DMA_ENGINE +extern struct dma_chan *rio_request_dma(struct rio_dev *rdev); +extern void rio_release_dma(struct dma_chan *dchan); +extern struct dma_async_tx_descriptor *rio_dma_prep_slave_sg( + struct rio_dev *rdev, struct dma_chan *dchan, + struct rio_dma_data *data, + enum dma_transfer_direction direction, unsigned long flags); +#endif + /** * rio_name - Get the unique RIO device identifier * @rdev: RIO device -- cgit v1.2.3 From ee62c6b2dc93c09585b51fad18449dc5edb9977f Mon Sep 17 00:00:00 2001 From: Sha Zhengju Date: Thu, 31 May 2012 16:26:41 -0700 Subject: eventfd: change int to __u64 in eventfd_signal() eventfd_ctx->count is an __u64 counter which is allowed to reach ULLONG_MAX. eventfd_write() adds a __u64 value to "count", but the kernel side eventfd_signal() only adds an int value to it. Make them consistent. [akpm@linux-foundation.org: update interface documentation] Signed-off-by: Sha Zhengju Cc: Davide Libenzi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/eventfd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index 91bb4f27238c..3c3ef19a625a 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -34,7 +34,7 @@ void eventfd_ctx_put(struct eventfd_ctx *ctx); struct file *eventfd_fget(int fd); struct eventfd_ctx *eventfd_ctx_fdget(int fd); struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); -int eventfd_signal(struct eventfd_ctx *ctx, int n); +__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt); int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait, __u64 *cnt); -- cgit v1.2.3 From ac34ebb3a67e699edcb5ac72f19d31679369dfaa Mon Sep 17 00:00:00 2001 From: Christopher Yeoh Date: Thu, 31 May 2012 16:26:42 -0700 Subject: aio/vfs: cleanup of rw_copy_check_uvector() and compat_rw_copy_check_uvector() A cleanup of rw_copy_check_uvector and compat_rw_copy_check_uvector after changes made to support CMA in an earlier patch. Rather than having an additional check_access parameter to these functions, the first paramater type is overloaded to allow the caller to specify CHECK_IOVEC_ONLY which means check that the contents of the iovec are valid, but do not check the memory that they point to. This is used by process_vm_readv/writev where we need to validate that a iovec passed to the syscall is valid but do not want to check the memory that it points to at this point because it refers to an address space in another process. Signed-off-by: Chris Yeoh Reviewed-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat.h | 3 +-- include/linux/fs.h | 12 ++++++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 5d46217f84ad..4e890394ef99 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -577,8 +577,7 @@ extern ssize_t compat_rw_copy_check_uvector(int type, const struct compat_iovec __user *uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, - struct iovec **ret_pointer, - int check_access); + struct iovec **ret_pointer); extern void __user *compat_alloc_user_space(unsigned long len); diff --git a/include/linux/fs.h b/include/linux/fs.h index 038076b27ea4..cf2c5611b19b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -173,6 +173,15 @@ struct inodes_stat_t { #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA) #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) + +/* + * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector + * that indicates that they should check the contents of the iovec are + * valid, but not check the memory that the iovec elements + * points too. + */ +#define CHECK_IOVEC_ONLY -1 + #define SEL_IN 1 #define SEL_OUT 2 #define SEL_EX 4 @@ -1690,8 +1699,7 @@ struct seq_file; ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, - struct iovec **ret_pointer, - int check_access); + struct iovec **ret_pointer); extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); -- cgit v1.2.3 From d97b46a64674a267bc41c9e16132ee2a98c3347d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 31 May 2012 16:26:44 -0700 Subject: syscalls, x86: add __NR_kcmp syscall While doing the checkpoint-restore in the user space one need to determine whether various kernel objects (like mm_struct-s of file_struct-s) are shared between tasks and restore this state. The 2nd step can be solved by using appropriate CLONE_ flags and the unshare syscall, while there's currently no ways for solving the 1st one. One of the ways for checking whether two tasks share e.g. mm_struct is to provide some mm_struct ID of a task to its proc file, but showing such info considered to be not that good for security reasons. Thus after some debates we end up in conclusion that using that named 'comparison' syscall might be the best candidate. So here is it -- __NR_kcmp. It takes up to 5 arguments - the pids of the two tasks (which characteristics should be compared), the comparison type and (in case of comparison of files) two file descriptors. Lookups for pids are done in the caller's PID namespace only. At moment only x86 is supported and tested. [akpm@linux-foundation.org: fix up selftests, warnings] [akpm@linux-foundation.org: include errno.h] [akpm@linux-foundation.org: tweak comment text] Signed-off-by: Cyrill Gorcunov Acked-by: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Andrey Vagin Cc: KOSAKI Motohiro Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Thomas Gleixner Cc: Glauber Costa Cc: Andi Kleen Cc: Tejun Heo Cc: Matt Helsley Cc: Pekka Enberg Cc: Eric Dumazet Cc: Vasiliy Kulikov Cc: Alexey Dobriyan Cc: Valdis.Kletnieks@vt.edu Cc: Michal Marek Cc: Frederic Weisbecker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kcmp.h | 17 +++++++++++++++++ include/linux/syscalls.h | 2 ++ 2 files changed, 19 insertions(+) create mode 100644 include/linux/kcmp.h (limited to 'include/linux') diff --git a/include/linux/kcmp.h b/include/linux/kcmp.h new file mode 100644 index 000000000000..2dcd1b3aafc8 --- /dev/null +++ b/include/linux/kcmp.h @@ -0,0 +1,17 @@ +#ifndef _LINUX_KCMP_H +#define _LINUX_KCMP_H + +/* Comparison type */ +enum kcmp_type { + KCMP_FILE, + KCMP_VM, + KCMP_FILES, + KCMP_FS, + KCMP_SIGHAND, + KCMP_IO, + KCMP_SYSVSEM, + + KCMP_TYPES, +}; + +#endif /* _LINUX_KCMP_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3de3acb84a95..19439c75c5b2 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -858,4 +858,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, unsigned long riovcnt, unsigned long flags); +asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, + unsigned long idx1, unsigned long idx2); #endif -- cgit v1.2.3 From fe8c7f5cbf91124987106faa3bdf0c8b955c4cf7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 31 May 2012 16:26:45 -0700 Subject: c/r: prctl: extend PR_SET_MM to set up more mm_struct entries During checkpoint we dump whole process memory to a file and the dump includes process stack memory. But among stack data itself, the stack carries additional parameters such as command line arguments, environment data and auxiliary vector. So when we do restore procedure and once we've restored stack data itself we need to setup mm_struct::arg_start/end, env_start/end, so restored process would be able to find command line arguments and environment data it had at checkpoint time. The same applies to auxiliary vector. For this reason additional PR_SET_MM_(ARG_START | ARG_END | ENV_START | ENV_END | AUXV) codes are introduced. Signed-off-by: Cyrill Gorcunov Acked-by: Kees Cook Cc: Tejun Heo Cc: Andrew Vagin Cc: Serge Hallyn Cc: Pavel Emelyanov Cc: Vasiliy Kulikov Cc: KAMEZAWA Hiroyuki Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/prctl.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 78b76e24cc7e..18d84c4b42d8 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -113,6 +113,11 @@ # define PR_SET_MM_START_STACK 5 # define PR_SET_MM_START_BRK 6 # define PR_SET_MM_BRK 7 +# define PR_SET_MM_ARG_START 8 +# define PR_SET_MM_ARG_END 9 +# define PR_SET_MM_ENV_START 10 +# define PR_SET_MM_ENV_END 11 +# define PR_SET_MM_AUXV 12 /* * Set specific pid that is allowed to ptrace the current task. -- cgit v1.2.3 From b32dfe377102ce668775f8b6b1461f7ad428f8b6 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 31 May 2012 16:26:46 -0700 Subject: c/r: prctl: add ability to set new mm_struct::exe_file When we do restore we would like to have a way to setup a former mm_struct::exe_file so that /proc/pid/exe would point to the original executable file a process had at checkpoint time. For this the PR_SET_MM_EXE_FILE code is introduced. This option takes a file descriptor which will be set as a source for new /proc/$pid/exe symlink. Note it allows to change /proc/$pid/exe if there are no VM_EXECUTABLE vmas present for current process, simply because this feature is a special to C/R and mm::num_exe_file_vmas become meaningless after that. To minimize the amount of transition the /proc/pid/exe symlink might have, this feature is implemented in one-shot manner. Thus once changed the symlink can't be changed again. This should help sysadmins to monitor the symlinks over all process running in a system. In particular one could make a snapshot of processes and ring alarm if there unexpected changes of /proc/pid/exe's in a system. Note -- this feature is available iif CONFIG_CHECKPOINT_RESTORE is set and the caller must have CAP_SYS_RESOURCE capability granted, otherwise the request to change symlink will be rejected. Signed-off-by: Cyrill Gorcunov Reviewed-by: Oleg Nesterov Cc: KOSAKI Motohiro Cc: Pavel Emelyanov Cc: Kees Cook Cc: Tejun Heo Cc: Matt Helsley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/prctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 18d84c4b42d8..711e0a30aacc 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -118,6 +118,7 @@ # define PR_SET_MM_ENV_START 10 # define PR_SET_MM_ENV_END 11 # define PR_SET_MM_AUXV 12 +# define PR_SET_MM_EXE_FILE 13 /* * Set specific pid that is allowed to ptrace the current task. -- cgit v1.2.3 From 8b3ec6814c83d76b85bd13badc48552836c24839 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 17:11:23 -0400 Subject: take security_mmap_file() outside of ->mmap_sem Signed-off-by: Al Viro --- include/linux/security.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index f1bae0963ddc..4e5a73cdbbef 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1745,8 +1745,8 @@ int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); void security_file_free(struct file *file); int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -int security_mmap_file(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags); +int security_mmap_file(struct file *file, unsigned long prot, + unsigned long flags); int security_mmap_addr(unsigned long addr); int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); @@ -2183,8 +2183,7 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd, return 0; } -static inline int security_mmap_file(struct file *file, unsigned long reqprot, - unsigned long prot, +static inline int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags) { return 0; -- cgit v1.2.3 From e3fc629d7bb70848fbf479688a66d4e76dff46ac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 20:08:42 -0400 Subject: switch aio and shm to do_mmap_pgoff(), make do_mmap() static after all, 0 bytes and 0 pages is the same thing... Signed-off-by: Al Viro --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7d5c37f24c63..4189e0d0ac05 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1394,7 +1394,7 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo extern unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, unsigned long flags, vm_flags_t vm_flags, unsigned long pgoff); -extern unsigned long do_mmap(struct file *, unsigned long, +extern unsigned long do_mmap_pgoff(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); extern int do_munmap(struct mm_struct *, unsigned long, size_t); -- cgit v1.2.3 From c3b2da314834499f34cba94f7053e55f6d6f92d8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 26 Mar 2012 09:59:21 -0400 Subject: fs: introduce inode operation ->update_time Btrfs has to make sure we have space to allocate new blocks in order to modify the inode, so updating time can fail. We've gotten around this by having our own file_update_time but this is kind of a pain, and Christoph has indicated he would like to make xfs do something different with atime updates. So introduce ->update_time, where we will deal with i_version an a/m/c time updates and indicate which changes need to be made. The normal version just does what it has always done, updates the time and marks the inode dirty, and then filesystems can choose to do something different. I've gone through all of the users of file_update_time and made them check for errors with the exception of the fault code since it's complicated and I wasn't quite sure what to do there, also Jan is going to be pushing the file time updates into page_mkwrite for those who have it so that should satisfy btrfs and make it not a big deal to check the file_update_time() return code in the generic fault path. Thanks, Signed-off-by: Josef Bacik --- include/linux/fs.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index cdc1a9630948..57fc70574d20 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1684,6 +1684,7 @@ struct inode_operations { void (*truncate_range)(struct inode *, loff_t, loff_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); + int (*update_time)(struct inode *, struct timespec *, int); } ____cacheline_aligned; struct seq_file; @@ -1843,6 +1844,13 @@ static inline void inode_inc_iversion(struct inode *inode) spin_unlock(&inode->i_lock); } +enum file_time_flags { + S_ATIME = 1, + S_MTIME = 2, + S_CTIME = 4, + S_VERSION = 8, +}; + extern void touch_atime(struct path *); static inline void file_accessed(struct file *file) { @@ -2579,7 +2587,7 @@ extern int inode_change_ok(const struct inode *, struct iattr *); extern int inode_newsize_ok(const struct inode *, loff_t offset); extern void setattr_copy(struct inode *inode, const struct iattr *attr); -extern void file_update_time(struct file *file); +extern int file_update_time(struct file *file); extern int generic_show_options(struct seq_file *m, struct dentry *root); extern void save_mount_options(struct super_block *sb, char *options); -- cgit v1.2.3 From 16b1c1cd71176ab0a76b26818fbf12db9183ed57 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:19 +0200 Subject: vfs: retry last component if opening stale dentry NFS optimizes away d_revalidates for last component of open. This means that open itself can find the dentry stale. This patch allows the filesystem to return EOPENSTALE and the VFS will retry the lookup on just the last component if possible. If the lookup was done using RCU mode, including the last component, then this is not possible since the parent dentry is lost. In this case fall back to non-RCU lookup. Currently this is not used since NFS will always leave RCU mode. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- include/linux/errno.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/errno.h b/include/linux/errno.h index 2d09bfa5c262..e0de516374da 100644 --- a/include/linux/errno.h +++ b/include/linux/errno.h @@ -17,6 +17,7 @@ #define ENOIOCTLCMD 515 /* No ioctl command */ #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */ #define EPROBE_DEFER 517 /* Driver requests probe retry */ +#define EOPENSTALE 518 /* open found a stale dentry */ /* Defined for the NFSv3 protocol */ #define EBADHANDLE 521 /* Illegal NFS file handle */ -- cgit v1.2.3 From 754421c8cab1a568be844a7069fe04c1cf6391b8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 26 Apr 2012 18:31:00 -0400 Subject: HAVE_RESTORE_SIGMASK is defined on all architectures now Everyone either defines it in arch thread_info.h or has TIF_RESTORE_SIGMASK and picks default set_restore_sigmask() in linux/thread_info.h. Kill the ifdefs, slap #error in linux/thread_info.h to catch breakage when new ones get merged. Signed-off-by: Al Viro --- include/linux/thread_info.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index db78775eff3b..eee729428683 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -129,6 +129,10 @@ static inline void set_restore_sigmask(void) } #endif /* TIF_RESTORE_SIGMASK && !HAVE_SET_RESTORE_SIGMASK */ +#ifndef HAVE_SET_RESTORE_SIGMASK +#error "no set_restore_sigmask() provided and default one won't work" +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_THREAD_INFO_H */ -- cgit v1.2.3 From 4ebefe3ec729003443daf153ed6fad1739271283 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 26 Apr 2012 22:29:20 -0400 Subject: new helpers: {clear,test,test_and_clear}_restore_sigmask() helpers parallel to set_restore_sigmask(), used in the next commits Signed-off-by: Al Viro --- include/linux/thread_info.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index eee729428683..ed279701ac79 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -127,6 +127,18 @@ static inline void set_restore_sigmask(void) set_thread_flag(TIF_RESTORE_SIGMASK); set_thread_flag(TIF_SIGPENDING); } +static inline void clear_restore_sigmask(void) +{ + clear_thread_flag(TIF_RESTORE_SIGMASK); +} +static inline bool test_restore_sigmask(void) +{ + return test_thread_flag(TIF_RESTORE_SIGMASK); +} +static inline bool test_and_clear_restore_sigmask(void) +{ + return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK); +} #endif /* TIF_RESTORE_SIGMASK && !HAVE_SET_RESTORE_SIGMASK */ #ifndef HAVE_SET_RESTORE_SIGMASK -- cgit v1.2.3 From 51a7b448d4134e3e8eec633435e3e8faee14a828 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 21 May 2012 23:33:55 -0400 Subject: new helper: restore_saved_sigmask() first fruits of ..._restore_sigmask() helpers: now we can take boilerplate "signal didn't have a handler, clear RESTORE_SIGMASK and restore the blocked mask from ->saved_mask" into a common helper. Open-coded instances switched... Signed-off-by: Al Viro --- include/linux/sched.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 660c8ae93471..f1b46b88f6f5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2207,6 +2207,12 @@ extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long); +static inline void restore_saved_sigmask(void) +{ + if (test_and_clear_restore_sigmask()) + set_current_blocked(¤t->saved_sigmask); +} + static inline int kill_cad_pid(int sig, int priv) { return kill_pid(cad_pid, sig, priv); -- cgit v1.2.3 From b7f9a11a6cf1ea9ee6be3eb2b90d91327a09ad14 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 2 May 2012 09:59:21 -0400 Subject: new helper: sigmask_to_save() replace boilerplate "should we use ->saved_sigmask or ->blocked?" with calls of obvious inlined helper... Signed-off-by: Al Viro --- include/linux/sched.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f1b46b88f6f5..ded3fb63fb06 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2213,6 +2213,14 @@ static inline void restore_saved_sigmask(void) set_current_blocked(¤t->saved_sigmask); } +static inline sigset_t *sigmask_to_save(void) +{ + sigset_t *res = ¤t->blocked; + if (unlikely(test_restore_sigmask())) + res = ¤t->saved_sigmask; + return res; +} + static inline int kill_cad_pid(int sig, int priv) { return kill_pid(cad_pid, sig, priv); -- cgit v1.2.3 From edd63a2763bdae0daa4f0a4d4c5d61d1154352a5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 27 Apr 2012 13:42:45 -0400 Subject: set_restore_sigmask() is never called without SIGPENDING (and never should be) Signed-off-by: Al Viro --- include/linux/thread_info.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index ed279701ac79..ccc1899bd62e 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -8,6 +8,7 @@ #define _LINUX_THREAD_INFO_H #include +#include struct timespec; struct compat_timespec; @@ -125,7 +126,7 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) static inline void set_restore_sigmask(void) { set_thread_flag(TIF_RESTORE_SIGMASK); - set_thread_flag(TIF_SIGPENDING); + WARN_ON(!test_thread_flag(TIF_SIGPENDING)); } static inline void clear_restore_sigmask(void) { -- cgit v1.2.3 From 77097ae503b170120ab66dd1d547f8577193f91f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 27 Apr 2012 13:58:59 -0400 Subject: most of set_current_blocked() callers want SIGKILL/SIGSTOP removed from set Only 3 out of 63 do not. Renamed the current variant to __set_current_blocked(), added set_current_blocked() that will exclude unblockable signals, switched open-coded instances to it. Signed-off-by: Al Viro --- include/linux/sched.h | 2 +- include/linux/signal.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ded3fb63fb06..f34437e835a7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2210,7 +2210,7 @@ extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned lon static inline void restore_saved_sigmask(void) { if (test_and_clear_restore_sigmask()) - set_current_blocked(¤t->saved_sigmask); + __set_current_blocked(¤t->saved_sigmask); } static inline sigset_t *sigmask_to_save(void) diff --git a/include/linux/signal.h b/include/linux/signal.h index 17046cc484bc..065e76330398 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -250,7 +250,8 @@ extern long do_sigpending(void __user *, unsigned long); extern int do_sigtimedwait(const sigset_t *, siginfo_t *, const struct timespec *); extern int sigprocmask(int, sigset_t *, sigset_t *); -extern void set_current_blocked(const sigset_t *); +extern void set_current_blocked(sigset_t *); +extern void __set_current_blocked(const sigset_t *); extern int show_unhandled_signals; extern int sigsuspend(sigset_t *); -- cgit v1.2.3 From efee984c27b67e3ebef40410f35671997441b57c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 28 Apr 2012 02:04:15 -0400 Subject: new helper: signal_delivered() Does block_sigmask() + tracehook_signal_handler(); called when sigframe has been successfully built. All architectures converted to it; block_sigmask() itself is gone now (merged into this one). I'm still not too happy with the signature, but that's a separate story (IMO we need a structure that would contain signal number + siginfo + k_sigaction, so that get_signal_to_deliver() would fill one, signal_delivered(), handle_signal() and probably setup...frame() - take one). Signed-off-by: Al Viro --- include/linux/signal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index 065e76330398..26b424adc842 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -256,7 +256,7 @@ extern int show_unhandled_signals; extern int sigsuspend(sigset_t *); extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie); -extern void block_sigmask(struct k_sigaction *ka, int signr); +extern void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka, struct pt_regs *regs, int stepping); extern void exit_signals(struct task_struct *tsk); extern struct kmem_cache *sighand_cachep; -- cgit v1.2.3 From f309532bf3e1cc1b787403d84e3039812a7dbe50 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 2 Jun 2012 15:21:43 -0700 Subject: tty: Revert the tty locking series, it needs more work This reverts the tty layer change to use per-tty locking, because it's not correct yet, and fixing it will require some more deep surgery. The main revert is d29f3ef39be4 ("tty_lock: Localise the lock"), but there are several smaller commits that built upon it, they also get reverted here. The list of reverted commits is: fde86d310886 - tty: add lockdep annotations 8f6576ad476b - tty: fix ldisc lock inversion trace d3ca8b64b97e - pty: Fix lock inversion b1d679afd766 - tty: drop the pty lock during hangup abcefe5fc357 - tty/amiserial: Add missing argument for tty_unlock() fd11b42e3598 - cris: fix missing tty arg in wait_event_interruptible_tty call d29f3ef39be4 - tty_lock: Localise the lock The revert had a trivial conflict in the 68360serial.c staging driver that got removed in the meantime. Signed-off-by: Linus Torvalds --- include/linux/tty.h | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 4990ef2b1fb7..9f47ab540f65 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -268,7 +268,6 @@ struct tty_struct { struct mutex ldisc_mutex; struct tty_ldisc *ldisc; - struct mutex legacy_mutex; struct mutex termios_mutex; spinlock_t ctrl_lock; /* Termios values are protected by the termios mutex */ @@ -606,12 +605,8 @@ extern long vt_compat_ioctl(struct tty_struct *tty, /* tty_mutex.c */ /* functions for preparation of BKL removal */ -extern void __lockfunc tty_lock(struct tty_struct *tty); -extern void __lockfunc tty_unlock(struct tty_struct *tty); -extern void __lockfunc tty_lock_pair(struct tty_struct *tty, - struct tty_struct *tty2); -extern void __lockfunc tty_unlock_pair(struct tty_struct *tty, - struct tty_struct *tty2); +extern void __lockfunc tty_lock(void) __acquires(tty_lock); +extern void __lockfunc tty_unlock(void) __releases(tty_lock); /* * this shall be called only from where BTM is held (like close) @@ -626,9 +621,9 @@ extern void __lockfunc tty_unlock_pair(struct tty_struct *tty, static inline void tty_wait_until_sent_from_close(struct tty_struct *tty, long timeout) { - tty_unlock(tty); /* tty->ops->close holds the BTM, drop it while waiting */ + tty_unlock(); /* tty->ops->close holds the BTM, drop it while waiting */ tty_wait_until_sent(tty, timeout); - tty_lock(tty); + tty_lock(); } /* @@ -643,16 +638,16 @@ static inline void tty_wait_until_sent_from_close(struct tty_struct *tty, * * Do not use in new code. */ -#define wait_event_interruptible_tty(tty, wq, condition) \ +#define wait_event_interruptible_tty(wq, condition) \ ({ \ int __ret = 0; \ if (!(condition)) { \ - __wait_event_interruptible_tty(tty, wq, condition, __ret); \ + __wait_event_interruptible_tty(wq, condition, __ret); \ } \ __ret; \ }) -#define __wait_event_interruptible_tty(tty, wq, condition, ret) \ +#define __wait_event_interruptible_tty(wq, condition, ret) \ do { \ DEFINE_WAIT(__wait); \ \ @@ -661,9 +656,9 @@ do { \ if (condition) \ break; \ if (!signal_pending(current)) { \ - tty_unlock(tty); \ + tty_unlock(); \ schedule(); \ - tty_lock(tty); \ + tty_lock(); \ continue; \ } \ ret = -ERESTARTSYS; \ -- cgit v1.2.3 From 2f9d3df8aa1cc3c6db5cfa0bad3f0745e04cc27d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 3 Jun 2012 14:50:19 -0700 Subject: vfs: move inode stat information closer together The comment above it says "Stat data, not accessed from path walking", but in fact some of inode fields we use for the common stat data was way down at the end of the inode, causing unnecessary cache misses for the common stat operations. The inode structure is pretty big, and this can change padding depending on field width, but at least on the common 64-bit configurations this doesn't change the size. Some of our inode layout has historically been to tro to avoid unnecessary padding fields, but cache locality is at least as important for layout, if not more. Noticed by looking at kernel profiles, and noticing that the "i_blkbits" access stood out like a sore thumb. Signed-off-by: Linus Torvalds --- include/linux/fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 51978ed43e97..17fd887c798f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -802,13 +802,14 @@ struct inode { unsigned int __i_nlink; }; dev_t i_rdev; + loff_t i_size; struct timespec i_atime; struct timespec i_mtime; struct timespec i_ctime; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; + unsigned int i_blkbits; blkcnt_t i_blocks; - loff_t i_size; #ifdef __NEED_I_SIZE_ORDERED seqcount_t i_size_seqcount; @@ -828,9 +829,8 @@ struct inode { struct list_head i_dentry; struct rcu_head i_rcu; }; - atomic_t i_count; - unsigned int i_blkbits; u64 i_version; + atomic_t i_count; atomic_t i_dio_count; atomic_t i_writecount; const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ -- cgit v1.2.3 From 68e3e92620c323703bc7db75c2ba15239ee85c39 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 3 Jun 2012 20:05:57 -0700 Subject: Revert "mm: compaction: handle incorrect MIGRATE_UNMOVABLE type pageblocks" This reverts commit 5ceb9ce6fe9462a298bb2cd5c9f1ca6cb80a0199. That commit seems to be the cause of the mm compation list corruption issues that Dave Jones reported. The locking (or rather, absense there-of) is dubious, as is the use of the 'page' variable once it has been found to be outside the pageblock range. So revert it for now, we can re-visit this for 3.6. If we even need to: as Minchan Kim says, "The patch wasn't a bug fix and even test workload was very theoretical". Reported-and-tested-by: Dave Jones Acked-by: Hugh Dickins Acked-by: KOSAKI Motohiro Acked-by: Minchan Kim Cc: Bartlomiej Zolnierkiewicz Cc: Kyungmin Park Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index e988037abd2a..51a90b7f2d60 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -1,8 +1,6 @@ #ifndef _LINUX_COMPACTION_H #define _LINUX_COMPACTION_H -#include - /* Return values for compact_zone() and try_to_compact_pages() */ /* compaction didn't start as it was not possible or direct reclaim was more suitable */ #define COMPACT_SKIPPED 0 @@ -13,23 +11,6 @@ /* The full zone was compacted */ #define COMPACT_COMPLETE 3 -/* - * compaction supports three modes - * - * COMPACT_ASYNC_MOVABLE uses asynchronous migration and only scans - * MIGRATE_MOVABLE pageblocks as migration sources and targets. - * COMPACT_ASYNC_UNMOVABLE uses asynchronous migration and only scans - * MIGRATE_MOVABLE pageblocks as migration sources. - * MIGRATE_UNMOVABLE pageblocks are scanned as potential migration - * targets and convers them to MIGRATE_MOVABLE if possible - * COMPACT_SYNC uses synchronous migration and scans all pageblocks - */ -enum compact_mode { - COMPACT_ASYNC_MOVABLE, - COMPACT_ASYNC_UNMOVABLE, - COMPACT_SYNC, -}; - #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; extern int sysctl_compaction_handler(struct ctl_table *table, int write, -- cgit v1.2.3 From ae58d1e406986f31d1e88b32f5ac601506c196d8 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Fri, 18 May 2012 09:29:34 -0600 Subject: i2c: Add generic I2C multiplexer using pinctrl API This is useful for SoCs whose I2C module's signals can be routed to different sets of pins at run-time, using the pinctrl API. +-----+ +-----+ | dev | | dev | +------------------------+ +-----+ +-----+ | SoC | | | | /----|------+--------+ | +---+ +------+ | child bus A, on first set of pins | |I2C|---|Pinmux| | | +---+ +------+ | child bus B, on second set of pins | \----|------+--------+--------+ | | | | | +------------------------+ +-----+ +-----+ +-----+ | dev | | dev | | dev | +-----+ +-----+ +-----+ Signed-off-by: Stephen Warren Acked-by: Linus Walleij Acked-by: Rob Herring Signed-off-by: Wolfram Sang --- include/linux/i2c-mux-pinctrl.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 include/linux/i2c-mux-pinctrl.h (limited to 'include/linux') diff --git a/include/linux/i2c-mux-pinctrl.h b/include/linux/i2c-mux-pinctrl.h new file mode 100644 index 000000000000..a65c86429e84 --- /dev/null +++ b/include/linux/i2c-mux-pinctrl.h @@ -0,0 +1,41 @@ +/* + * i2c-mux-pinctrl platform data + * + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _LINUX_I2C_MUX_PINCTRL_H +#define _LINUX_I2C_MUX_PINCTRL_H + +/** + * struct i2c_mux_pinctrl_platform_data - Platform data for i2c-mux-pinctrl + * @parent_bus_num: Parent I2C bus number + * @base_bus_num: Base I2C bus number for the child busses. 0 for dynamic. + * @bus_count: Number of child busses. Also the number of elements in + * @pinctrl_states + * @pinctrl_states: The names of the pinctrl state to select for each child bus + * @pinctrl_state_idle: The pinctrl state to select when no child bus is being + * accessed. If NULL, the most recently used pinctrl state will be left + * selected. + */ +struct i2c_mux_pinctrl_platform_data { + int parent_bus_num; + int base_bus_num; + int bus_count; + const char **pinctrl_states; + const char *pinctrl_state_idle; +}; + +#endif -- cgit v1.2.3 From 1549210fcc17e9ae20c09ac8cd4c48a8dfd431bd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 09:16:47 -0400 Subject: NFSv4: Fix an Oops in the open recovery code The open recovery code does not need to request a new value for the mdsthreshold, and so does not allocate a struct nfs4_threshold. The problem is that encode_getfattr_open() will still request an mdsthreshold, and so we end up Oopsing in decode_attr_mdsthreshold. This patch fixes encode_getfattr_open so that it doesn't request an mdsthreshold when the caller isn't asking for one. It also fixes decode_attr_mdsthreshold so that it errors if the server returns an mdsthreshold that we didn't ask for (instead of Oopsing). Signed-off-by: Trond Myklebust Cc: Andy Adamson --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d1a7bf51c326..7519baef025b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -348,6 +348,7 @@ struct nfs_openargs { const struct qstr * name; const struct nfs_server *server; /* Needed for ID mapping */ const u32 * bitmask; + const u32 * open_bitmap; __u32 claim; struct nfs4_sequence_args seq_args; }; -- cgit v1.2.3 From fffaee365fded09f9ebf2db19066065fa54323c3 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 5 Jun 2012 21:36:33 +0400 Subject: radix-tree: fix contiguous iterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes bug in macro radix_tree_for_each_contig(). If radix_tree_next_slot() sees NULL in next slot it returns NULL, but following radix_tree_next_chunk() switches iterating into next chunk. As result iterating becomes non-contiguous and breaks vfs "splice" and all its users. Signed-off-by: Konstantin Khlebnikov Reported-and-bisected-by: Hans de Bruin Reported-and-bisected-by: Ondrej Zary Reported-bisected-and-tested-by: Toralf Förster Link: https://lkml.org/lkml/2012/6/5/64 Cc: stable # 3.4.x Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 0d04cd69ab9b..ffc444c38b0a 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -368,8 +368,11 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) iter->index++; if (likely(*slot)) return slot; - if (flags & RADIX_TREE_ITER_CONTIG) + if (flags & RADIX_TREE_ITER_CONTIG) { + /* forbid switching to the next chunk */ + iter->next_index = 0; break; + } } } return NULL; -- cgit v1.2.3 From 9bce008bae8b57bc7b007bcc2071d1247a527120 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 18:32:03 -0400 Subject: NFS: Fix a commit bug The new commit code fails to copy the verifier into the wb_verf field of _all_ the nfs_page structures; it only copies it into the first entry. The consequence is that most requests end up failing to match in nfs_commit_release. Fix is to copy the verifier into the req->wb_verf field in nfs_write_completion. Signed-off-by: Trond Myklebust Cc: Fred Isaman --- include/linux/nfs_xdr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7519baef025b..8aadd90b808a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1237,6 +1237,7 @@ struct nfs_pgio_header { struct list_head rpc_list; atomic_t refcnt; struct nfs_page *req; + struct nfs_writeverf *verf; struct pnfs_layout_segment *lseg; loff_t io_start; const struct rpc_call_ops *mds_ops; @@ -1274,6 +1275,7 @@ struct nfs_write_data { struct nfs_write_header { struct nfs_pgio_header header; struct nfs_write_data rpc_data; + struct nfs_writeverf verf; }; struct nfs_mds_commit_info { -- cgit v1.2.3 From 2a0fe914a38745f5b03534c4e4f4056cbd6978b8 Mon Sep 17 00:00:00 2001 From: Yong Ding Date: Tue, 15 May 2012 13:09:43 +0800 Subject: mmc: sdio: fix setting card data bus width as 4-bit SDIO_CCCR_IF[1:0] in SDIO card is used for card data bus width setting as below: 00b: 1-bit bus 01b: Reserved 10b: 4-bit bus 11b: 8-bit bus (only for embedded SDIO) And sdio_enable_wide is for setting data bus width as 4-bit. But currently, it first reads the register, second OR' 1b with SDIO_CCCR_IF[1], and then writes it back. As we can see, this is based on such assumption that the SDIO_CCCR_IF[0] is always 0. Apparently, this is not right. Signed-off-by: Yong Ding Acked-by: Philip Rakity Signed-off-by: Chris Ball --- include/linux/mmc/sdio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h index c9fe66c58f8f..17446d3c3602 100644 --- a/include/linux/mmc/sdio.h +++ b/include/linux/mmc/sdio.h @@ -98,7 +98,9 @@ #define SDIO_CCCR_IF 0x07 /* bus interface controls */ +#define SDIO_BUS_WIDTH_MASK 0x03 /* data bus width setting */ #define SDIO_BUS_WIDTH_1BIT 0x00 +#define SDIO_BUS_WIDTH_RESERVED 0x01 #define SDIO_BUS_WIDTH_4BIT 0x02 #define SDIO_BUS_ECSI 0x20 /* Enable continuous SPI interrupt */ #define SDIO_BUS_SCSI 0x40 /* Support continuous SPI interrupt */ -- cgit v1.2.3 From c1174876874dcf8986806e4dad3d7d07af20b439 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 May 2012 14:47:33 +0200 Subject: sched: Fix domain iteration Weird topologies can lead to asymmetric domain setups. This needs further consideration since these setups are typically non-minimal too. For now, make it work by adding an extra mask selecting which CPUs are allowed to iterate up. The topology that triggered it is the one from David Rientjes: 10 20 20 30 20 10 20 20 20 20 10 20 30 20 20 10 resulting in boxes that wouldn't even boot. Reported-by: David Rientjes Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-3p86l9cuaqnxz7uxsojmz5rm@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6029d8c54476..ac321d753470 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -876,6 +876,8 @@ struct sched_group_power { * Number of busy cpus in this group. */ atomic_t nr_busy_cpus; + + unsigned long cpumask[0]; /* iteration mask */ }; struct sched_group { @@ -900,6 +902,15 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg) return to_cpumask(sg->cpumask); } +/* + * cpumask masking which cpus in the group are allowed to iterate up the domain + * tree. + */ +static inline struct cpumask *sched_group_mask(struct sched_group *sg) +{ + return to_cpumask(sg->sgp->cpumask); +} + /** * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. * @group: The group whose first cpu is to be returned. -- cgit v1.2.3 From 0b0d9cf6ec7bab91977da2d71c09157f110f7c2e Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Fri, 20 Apr 2012 15:41:34 -0700 Subject: perf: Limit callchains to 127 Stack depth of 255 seems excessive, given that copy_from_user_nmi() could be slow. Signed-off-by: Arun Sharma Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1334961696-19580-3-git-send-email-asharma@fb.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1817d4015e5f..45db49f64bb4 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -555,7 +555,7 @@ enum perf_event_type { PERF_RECORD_MAX, /* non-ABI */ }; -#define PERF_MAX_STACK_DEPTH 255 +#define PERF_MAX_STACK_DEPTH 127 enum perf_callchain_context { PERF_CONTEXT_HV = (__u64)-32, -- cgit v1.2.3 From aa9b16306e3243229580ff889cc59fd66bf77973 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 10 May 2012 16:41:44 -0700 Subject: rcu: Precompute RCU_FAST_NO_HZ timer offsets When a CPU is entering dyntick-idle mode, tick_nohz_stop_sched_tick() calls rcu_needs_cpu() see if RCU needs that CPU, and, if not, computes the next wakeup time based on the timer wheels. Only later, when actually entering the idle loop, rcu_prepare_for_idle() will be invoked. In some cases, rcu_prepare_for_idle() will post timers to wake the CPU back up. But all for naught: The next wakeup time for the CPU has already been computed, and posting a timer afterwards does not force that wakeup time to be recomputed. This means that rcu_prepare_for_idle()'s have no effect. This is not a problem on a busy system because something else will wake up the CPU soon enough. However, on lightly loaded systems, the CPU might stay asleep for a considerable length of time. If that CPU has a callback that the rest of the system is waiting on, the system might run very slowly or (in theory) even hang. This commit avoids this problem by having rcu_needs_cpu() give tick_nohz_stop_sched_tick() an estimate of when RCU will need the CPU to wake back up, which tick_nohz_stop_sched_tick() takes into account when programming the CPU's wakeup time. An alternative approach is for rcu_prepare_for_idle() to use hrtimers instead of normal timers, but timers are much more efficient than are hrtimers for frequently and repeatedly posting and cancelling a given timer, which is exactly what RCU_FAST_NO_HZ does. Reported-by: Pascal Chapperon Reported-by: Heiko Carstens Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Tested-by: Heiko Carstens Tested-by: Pascal Chapperon --- include/linux/rcutiny.h | 6 ++++-- include/linux/rcutree.h | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index adb5e5a38cae..854dc4c5c271 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -87,8 +87,9 @@ static inline void kfree_call_rcu(struct rcu_head *head, #ifdef CONFIG_TINY_RCU -static inline int rcu_needs_cpu(int cpu) +static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { + *delta_jiffies = ULONG_MAX; return 0; } @@ -96,8 +97,9 @@ static inline int rcu_needs_cpu(int cpu) int rcu_preempt_needs_cpu(void); -static inline int rcu_needs_cpu(int cpu) +static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { + *delta_jiffies = ULONG_MAX; return rcu_preempt_needs_cpu(); } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 3c6083cde4fc..952b79339304 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -32,7 +32,7 @@ extern void rcu_init(void); extern void rcu_note_context_switch(int cpu); -extern int rcu_needs_cpu(int cpu); +extern int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies); extern void rcu_cpu_stall_reset(void); /* -- cgit v1.2.3 From d1992b169d31f339dc5ea4e9f312567c8cf322a3 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Thu, 17 May 2012 22:35:46 +0000 Subject: netfilter: xt_HMARK: fix endianness and provide consistent hashing This patch addresses two issues: a) Fix usage of u32 and __be32 that causes endianess warnings via sparse. b) Ensure consistent hashing in a cluster that is composed of big and little endian systems. Thus, we obtain the same hash mark in an heterogeneous cluster. Reported-by: Dan Carpenter Signed-off-by: Hans Schillstrom Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/xt_HMARK.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_HMARK.h b/include/linux/netfilter/xt_HMARK.h index abb1650940d2..826fc5807577 100644 --- a/include/linux/netfilter/xt_HMARK.h +++ b/include/linux/netfilter/xt_HMARK.h @@ -27,7 +27,12 @@ union hmark_ports { __u16 src; __u16 dst; } p16; + struct { + __be16 src; + __be16 dst; + } b16; __u32 v32; + __be32 b32; }; struct xt_hmark_info { -- cgit v1.2.3 From bafb282df29c1524b1617019adebd6d0c3eb7a47 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 7 Jun 2012 14:21:11 -0700 Subject: c/r: prctl: update prctl_set_mm_exe_file() after mm->num_exe_file_vmas removal A fix for commit b32dfe377102 ("c/r: prctl: add ability to set new mm_struct::exe_file"). After removing mm->num_exe_file_vmas kernel keeps mm->exe_file until final mmput(), it never becomes NULL while task is alive. We can check for other mapped files in mm instead of checking mm->num_exe_file_vmas, and mark mm with flag MMF_EXE_FILE_CHANGED in order to forbid second changing of mm->exe_file. Signed-off-by: Konstantin Khlebnikov Reviewed-by: Cyrill Gorcunov Cc: Oleg Nesterov Cc: Matt Helsley Cc: Kees Cook Cc: KOSAKI Motohiro Cc: Tejun Heo Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6029d8c54476..c688d4cc2e40 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -439,6 +439,7 @@ extern int get_dumpable(struct mm_struct *mm); /* leave room for more dump flags */ #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ +#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) -- cgit v1.2.3 From 300f786b2683f8bb1ec0afb6e1851183a479c86d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 7 Jun 2012 14:21:12 -0700 Subject: c/r: prctl: add ability to get clear_tid_address Zero is written at clear_tid_address when the process exits. This functionality is used by pthread_join(). We already have sys_set_tid_address() to change this address for the current task but there is no way to obtain it from user space. Without the ability to find this address and dump it we can't restore pthread'ed apps which call pthread_join() once they have been restored. This patch introduces the PR_GET_TID_ADDRESS prctl option which allows the current process to obtain own clear_tid_address. This feature is available iif CONFIG_CHECKPOINT_RESTORE is set. [akpm@linux-foundation.org: fix prctl numbering] Signed-off-by: Andrew Vagin Signed-off-by: Cyrill Gorcunov Cc: Pedro Alves Cc: Oleg Nesterov Cc: Pavel Emelyanov Cc: Tejun Heo Acked-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/prctl.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 711e0a30aacc..3988012255dc 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -127,8 +127,8 @@ #define PR_SET_PTRACER 0x59616d61 # define PR_SET_PTRACER_ANY ((unsigned long)-1) -#define PR_SET_CHILD_SUBREAPER 36 -#define PR_GET_CHILD_SUBREAPER 37 +#define PR_SET_CHILD_SUBREAPER 36 +#define PR_GET_CHILD_SUBREAPER 37 /* * If no_new_privs is set, then operations that grant new privileges (i.e. @@ -142,7 +142,9 @@ * asking selinux for a specific new context (e.g. with runcon) will result * in execve returning -EPERM. */ -#define PR_SET_NO_NEW_PRIVS 38 -#define PR_GET_NO_NEW_PRIVS 39 +#define PR_SET_NO_NEW_PRIVS 38 +#define PR_GET_NO_NEW_PRIVS 39 + +#define PR_GET_TID_ADDRESS 40 #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From ae82fdb1406ad41d68f07027fe31f2d35ba22a90 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 8 Jun 2012 14:58:13 +0930 Subject: module_param: stop double-calling parameters. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 026cee0086fe1df4cf74691cf273062cc769617d "params: _initcall-like kernel parameters" set old-style module parameters to level 0. And we call those level 0 calls where we used to, early in start_kernel(). We also loop through the initcall levels and call the levelled module_params before the corresponding initcall. Unfortunately level 0 is early_init(), so we call the standard module_param calls twice. (Turns out most things don't care, but at least ubi.mtd does). Change the level to -1 for standard module_param calls. Reported-by: Benoît Thébaudeau Signed-off-by: Rusty Russell Cc: stable@kernel.org --- include/linux/moduleparam.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 1b14d25162cb..d6a58065c09c 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -128,7 +128,7 @@ struct kparam_array * The ops can have NULL set or get functions. */ #define module_param_cb(name, ops, arg, perm) \ - __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, 0) + __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1) /** * _param_cb - general callback for a module/cmdline parameter @@ -192,7 +192,7 @@ struct kparam_array { (void *)set, (void *)get }; \ __module_param_call(MODULE_PARAM_PREFIX, \ name, &__param_ops_##name, arg, \ - (perm) + sizeof(__check_old_set_param(set))*0, 0) + (perm) + sizeof(__check_old_set_param(set))*0, -1) /* We don't get oldget: it's often a new-style param_get_uint, etc. */ static inline int @@ -272,7 +272,7 @@ static inline void __kernel_param_unlock(void) */ #define core_param(name, var, type, perm) \ param_check_##type(name, &(var)); \ - __module_param_call("", name, ¶m_ops_##type, &var, perm, 0) + __module_param_call("", name, ¶m_ops_##type, &var, perm, -1) #endif /* !MODULE */ /** @@ -290,7 +290,7 @@ static inline void __kernel_param_unlock(void) = { len, string }; \ __module_param_call(MODULE_PARAM_PREFIX, name, \ ¶m_ops_string, \ - .str = &__param_string_##name, perm, 0); \ + .str = &__param_string_##name, perm, -1); \ __MODULE_PARM_TYPE(name, "string") /** @@ -432,7 +432,7 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp); __module_param_call(MODULE_PARAM_PREFIX, name, \ ¶m_array_ops, \ .arr = &__param_arr_##name, \ - perm, 0); \ + perm, -1); \ __MODULE_PARM_TYPE(name, "array of " #type) extern struct kernel_param_ops param_array_ops; -- cgit v1.2.3 From c8e9cf7bb240049117d2fa64d1540476c289396d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 7 Jun 2012 12:15:15 +0200 Subject: vga_switcheroo: Add a helper function to get the client state Add vga_switcheroo_get_client_state() to get the current state of the client. This is necessary to determine the proper initial state of audio clients in HD-audio driver. Acked-by: Dave Airlie Signed-off-by: Takashi Iwai --- include/linux/vga_switcheroo.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index b455c7c212eb..b176342ca031 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -12,6 +12,9 @@ enum vga_switcheroo_state { VGA_SWITCHEROO_OFF, VGA_SWITCHEROO_ON, + /* below are referred only from vga_switcheroo_get_client_state() */ + VGA_SWITCHEROO_INIT, + VGA_SWITCHEROO_NOT_FOUND, }; enum vga_switcheroo_client_id { @@ -50,6 +53,8 @@ void vga_switcheroo_unregister_handler(void); int vga_switcheroo_process_delayed_switch(void); +int vga_switcheroo_get_client_state(struct pci_dev *dev); + #else static inline void vga_switcheroo_unregister_client(struct pci_dev *dev) {} @@ -62,5 +67,7 @@ static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev, int id, bool active) { return 0; } static inline void vga_switcheroo_unregister_handler(void) {} static inline int vga_switcheroo_process_delayed_switch(void) { return 0; } +static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_CLIENT_ON; } + #endif -- cgit v1.2.3 From 505cff00de9c303b95c204eb4544066e3e707911 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 8 Jun 2012 12:49:17 +0200 Subject: vga_switcheroo: Fix error without CONFIG_VGA_SWITCHEROO Fix a typo that is built only when CONFIG_VGA_SWITCHEROO=n. Signed-off-by: Takashi Iwai --- include/linux/vga_switcheroo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index b176342ca031..60da41fe9dc2 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -67,7 +67,7 @@ static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev, int id, bool active) { return 0; } static inline void vga_switcheroo_unregister_handler(void) {} static inline int vga_switcheroo_process_delayed_switch(void) { return 0; } -static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_CLIENT_ON; } +static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; } #endif -- cgit v1.2.3 From 8876d6b5f81f4e242a6660da22bbd92f17a8d058 Mon Sep 17 00:00:00 2001 From: Paul Pluzhnikov Date: Sat, 9 Jun 2012 07:53:03 -0700 Subject: net: Make linux/tcp.h C++ friendly (trivial) I originally sent this patch to , but Jiri Kosina did not feel that this is fully appropriate for the trivial tree. Using linux/tcp.h from C++ results in: cat t.cc #include int main() { } g++ -c t.cc In file included from t.cc:1: /usr/include/linux/tcp.h:72: error: '__u32 __fswab32(__u32)' cannot appear in a constant-expression /usr/include/linux/tcp.h:72: error: a function call cannot appear in a constant-expression ... Attached trivial patch fixes this problem. Tested: - the t.cc above compiles with g++ and - the following program generates the same output before/after the patch: #include #include int main () { #define P(a) printf("%s: %08x\n", #a, (int)a) P(TCP_FLAG_CWR); P(TCP_FLAG_ECE); P(TCP_FLAG_URG); P(TCP_FLAG_ACK); P(TCP_FLAG_PSH); P(TCP_FLAG_RST); P(TCP_FLAG_SYN); P(TCP_FLAG_FIN); P(TCP_RESERVED_BITS); P(TCP_DATA_OFFSET); #undef P return 0; } Signed-off-by: Paul Pluzhnikov Signed-off-by: David S. Miller --- include/linux/tcp.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4c5b63283377..5f359dbfcdce 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -69,16 +69,16 @@ union tcp_word_hdr { #define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) enum { - TCP_FLAG_CWR = __cpu_to_be32(0x00800000), - TCP_FLAG_ECE = __cpu_to_be32(0x00400000), - TCP_FLAG_URG = __cpu_to_be32(0x00200000), - TCP_FLAG_ACK = __cpu_to_be32(0x00100000), - TCP_FLAG_PSH = __cpu_to_be32(0x00080000), - TCP_FLAG_RST = __cpu_to_be32(0x00040000), - TCP_FLAG_SYN = __cpu_to_be32(0x00020000), - TCP_FLAG_FIN = __cpu_to_be32(0x00010000), - TCP_RESERVED_BITS = __cpu_to_be32(0x0F000000), - TCP_DATA_OFFSET = __cpu_to_be32(0xF0000000) + TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000), + TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000), + TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000), + TCP_FLAG_ACK = __constant_cpu_to_be32(0x00100000), + TCP_FLAG_PSH = __constant_cpu_to_be32(0x00080000), + TCP_FLAG_RST = __constant_cpu_to_be32(0x00040000), + TCP_FLAG_SYN = __constant_cpu_to_be32(0x00020000), + TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000), + TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000), + TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000) }; /* -- cgit v1.2.3 From 601722157b3f6be73623644eeae6f14940f0bd8f Mon Sep 17 00:00:00 2001 From: Qiao Zhou Date: Mon, 4 Jun 2012 10:41:03 +0800 Subject: ARM: MMP: add pxa910-ssp into ssp_id_table add pxa910-ssp into ssp_id_table, and fix pxa-ssp compiling issue under mach-mmp architect. Signed-off-by: Qiao Zhou Acked-by: Haojian Zhuang Signed-off-by: Mark Brown --- include/linux/pxa2xx_ssp.h | 1 + include/linux/spi/pxa2xx_spi.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index 44835fb39793..f9fe15ec2b51 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -161,6 +161,7 @@ enum pxa_ssp_type { PXA25x_NSSP, /* pxa 255, 26x (including ASSP) */ PXA27x_SSP, PXA168_SSP, + PXA910_SSP, CE4100_SSP, }; diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index d3e1075f7b60..c73d1445c77e 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -43,7 +43,7 @@ struct pxa2xx_spi_chip { void (*cs_control)(u32 command); }; -#ifdef CONFIG_ARCH_PXA +#if defined(CONFIG_ARCH_PXA) || defined(CONFIG_ARCH_MMP) #include #include -- cgit v1.2.3 From 972a55b62d592cfcd6d73577df8a52f1251ea9a7 Mon Sep 17 00:00:00 2001 From: Qiao Zhou Date: Mon, 4 Jun 2012 10:41:04 +0800 Subject: ASoC: fix pxa-ssp compiling issue under mach-mmp pxa-ssp.c uses API like cpu_is_pxa3xx(), cpu_is_pxa2xx(), which is defined under arch-pxa architecture, and drivers under mach-mmp can't find it. so just use ssp->type to replace that API. Signed-off-by: Qiao Zhou Acked-by: Haojian Zhuang Signed-off-by: Mark Brown --- include/linux/pxa2xx_ssp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index f9fe15ec2b51..f36632061c66 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -160,6 +160,7 @@ enum pxa_ssp_type { PXA25x_SSP, /* pxa 210, 250, 255, 26x */ PXA25x_NSSP, /* pxa 255, 26x (including ASSP) */ PXA27x_SSP, + PXA3xx_SSP, PXA168_SSP, PXA910_SSP, CE4100_SSP, -- cgit v1.2.3 From c2fb8a3fa25513de8fedb38509b1f15a5bbee47b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 13 Jun 2012 11:20:19 -0400 Subject: USB: add NO_D3_DURING_SLEEP flag and revert 151b61284776be2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch (as1558) fixes a problem affecting several ASUS computers: The machine crashes or corrupts memory when going into suspend if the ehci-hcd driver is bound to any controllers. Users have been forced to unbind or unload ehci-hcd before putting their systems to sleep. After extensive testing, it was determined that the machines don't like going into suspend when any EHCI controllers are in the PCI D3 power state. Presumably this is a firmware bug, but there's nothing we can do about it except to avoid putting the controllers in D3 during system sleep. The patch adds a new flag to indicate whether the problem is present, and avoids changing the controller's power state if the flag is set. Runtime suspend is unaffected; this matters only for system suspend. However as a side effect, the controller will not respond to remote wakeup requests while the system is asleep. Hence USB wakeup is not functional -- but of course, this is already true in the current state of affairs. A similar patch has already been applied as commit 151b61284776be2d6f02d48c23c3625678960b97 (USB: EHCI: fix crash during suspend on ASUS computers). The patch supersedes that one and reverts it. There are two differences: The old patch added the flag at the USB level; this patch adds it at the PCI level. The old patch applied to all chipsets with the same vendor, subsystem vendor, and product IDs; this patch makes an exception for a known-good system (based on DMI information). Signed-off-by: Alan Stern Tested-by: Dâniel Fraga Tested-by: Andrey Rahmatullin Tested-by: Steven Rostedt Cc: stable Reviewed-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- include/linux/pci.h | 2 ++ include/linux/usb/hcd.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index d8c379dba6ad..fefb4e19bf6a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -176,6 +176,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2, /* Provide indication device is assigned by a Virtual Machine Manager */ PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) 4, + /* Device causes system crash if in D3 during S3 sleep */ + PCI_DEV_FLAGS_NO_D3_DURING_SLEEP = (__force pci_dev_flags_t) 8, }; enum pci_irq_reroute_variant { diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 7f855d50cdf5..49b3ac29726a 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -126,8 +126,6 @@ struct usb_hcd { unsigned wireless:1; /* Wireless USB HCD */ unsigned authorized_default:1; unsigned has_tt:1; /* Integrated TT in root hub */ - unsigned broken_pci_sleep:1; /* Don't put the - controller in PCI-D3 for system sleep */ unsigned int irq; /* irq allocated */ void __iomem *regs; /* device memory/io */ -- cgit v1.2.3 From 201e4aca5aa179e6c69a4dcd36a3562e56b8d670 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sat, 26 May 2012 06:07:49 -0700 Subject: pstore/ram: Should update old dmesg buffer before reading Without the update, we'll only see the new dmesg buffer after the reboot, but previously we could see it right away. Making an oops visible in pstore filesystem before reboot is a somewhat dubious feature, but removing it wasn't an intentional change, so let's restore it. For this we have to make persistent_ram_save_old() safe for calling multiple times, and also extern it. Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- include/linux/pstore_ram.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 7ed7fd4dba49..4491e8ff36e6 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -75,6 +75,7 @@ struct persistent_ram_zone *persistent_ram_init_ringbuffer(struct device *dev, int persistent_ram_write(struct persistent_ram_zone *prz, const void *s, unsigned int count); +void persistent_ram_save_old(struct persistent_ram_zone *prz); size_t persistent_ram_old_size(struct persistent_ram_zone *prz); void *persistent_ram_old(struct persistent_ram_zone *prz); void persistent_ram_free_old(struct persistent_ram_zone *prz); -- cgit v1.2.3 From fce397930475f7efc712a1345dc0dad269a10544 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sat, 26 May 2012 06:07:51 -0700 Subject: pstore/ram_core: Factor persistent_ram_zap() out of post_init() A handy function that we will use outside of ram_core soon. But so far just factor it out and start using it in post_init(). Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- include/linux/pstore_ram.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 4491e8ff36e6..3b823d49a85a 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -69,6 +69,7 @@ struct persistent_ram_zone * __init persistent_ram_new(phys_addr_t start, size_t size, bool ecc); void persistent_ram_free(struct persistent_ram_zone *prz); +void persistent_ram_zap(struct persistent_ram_zone *prz); struct persistent_ram_zone *persistent_ram_init_ringbuffer(struct device *dev, bool ecc); -- cgit v1.2.3 From e2ae715d66bf4becfb85eb84b7150e23cf27df30 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Fri, 15 Jun 2012 14:07:51 +0200 Subject: kmsg - kmsg_dump() use iterator to receive log buffer content Provide an iterator to receive the log buffer content, and convert all kmsg_dump() users to it. The structured data in the kmsg buffer now contains binary data, which should no longer be copied verbatim to the kmsg_dump() users. The iterator should provide reliable access to the buffer data, and also supports proper log line-aware chunking of data while iterating. Signed-off-by: Kay Sievers Tested-by: Tony Luck Reported-by: Anton Vorontsov Tested-by: Anton Vorontsov Signed-off-by: Greg Kroah-Hartman --- include/linux/kmsg_dump.h | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 35f7237ec972..af4eb5a39d9a 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -21,6 +21,7 @@ * is passed to the kernel. */ enum kmsg_dump_reason { + KMSG_DUMP_UNDEF, KMSG_DUMP_PANIC, KMSG_DUMP_OOPS, KMSG_DUMP_EMERG, @@ -31,23 +32,37 @@ enum kmsg_dump_reason { /** * struct kmsg_dumper - kernel crash message dumper structure - * @dump: The callback which gets called on crashes. The buffer is passed - * as two sections, where s1 (length l1) contains the older - * messages and s2 (length l2) contains the newer. * @list: Entry in the dumper list (private) + * @dump: Call into dumping code which will retrieve the data with + * through the record iterator + * @max_reason: filter for highest reason number that should be dumped * @registered: Flag that specifies if this is already registered */ struct kmsg_dumper { - void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason, - const char *s1, unsigned long l1, - const char *s2, unsigned long l2); struct list_head list; - int registered; + void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason); + enum kmsg_dump_reason max_reason; + bool active; + bool registered; + + /* private state of the kmsg iterator */ + u32 cur_idx; + u32 next_idx; + u64 cur_seq; + u64 next_seq; }; #ifdef CONFIG_PRINTK void kmsg_dump(enum kmsg_dump_reason reason); +bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, + char *line, size_t size, size_t *len); + +bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, + char *buf, size_t size, size_t *len); + +void kmsg_dump_rewind(struct kmsg_dumper *dumper); + int kmsg_dump_register(struct kmsg_dumper *dumper); int kmsg_dump_unregister(struct kmsg_dumper *dumper); @@ -56,6 +71,22 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason) { } +bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, + const char *line, size_t size, size_t *len) +{ + return false; +} + +bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, + char *buf, size_t size, size_t *len) +{ + return false; +} + +void kmsg_dump_rewind(struct kmsg_dumper *dumper) +{ +} + static inline int kmsg_dump_register(struct kmsg_dumper *dumper) { return -EINVAL; -- cgit v1.2.3 From 9b15b817f3d62409290fd56fe3cbb076a931bb0a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 15 Jun 2012 17:55:50 -0700 Subject: swap: fix shmem swapping when more than 8 areas Minchan Kim reports that when a system has many swap areas, and tmpfs swaps out to the ninth or more, shmem_getpage_gfp()'s attempts to read back the page cannot locate it, and the read fails with -ENOMEM. Whoops. Yes, I blindly followed read_swap_header()'s pte_to_swp_entry( swp_entry_to_pte()) technique for determining maximum usable swap offset, without stopping to realize that that actually depends upon the pte swap encoding shifting swap offset to the higher bits and truncating it there. Whereas our radix_tree swap encoding leaves offset in the lower bits: it's swap "type" (that is, index of swap area) that was truncated. Fix it by reducing the SWP_TYPE_SHIFT() in swapops.h, and removing the broken radix_to_swp_entry(swp_to_radix_entry()) from read_swap_header(). This does not reduce the usable size of a swap area any further, it leaves it as claimed when making the original commit: no change from 3.0 on x86_64, nor on i386 without PAE; but 3.0's 512GB is reduced to 128GB per swapfile on i386 with PAE. It's not a change I would have risked five years ago, but with x86_64 supported for ten years, I believe it's appropriate now. Hmm, and what if some architecture implements its swap pte with offset encoded below type? That would equally break the maximum usable swap offset check. Happily, they all follow the same tradition of encoding offset above type, but I'll prepare a check on that for next. Reported-and-Reviewed-and-Tested-by: Minchan Kim Signed-off-by: Hugh Dickins Cc: stable@vger.kernel.org [3.1, 3.2, 3.3, 3.4] Signed-off-by: Linus Torvalds --- include/linux/swapops.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 792d16d9cbc7..47ead515c811 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -9,13 +9,15 @@ * get good packing density in that tree, so the index should be dense in * the low-order bits. * - * We arrange the `type' and `offset' fields so that `type' is at the five + * We arrange the `type' and `offset' fields so that `type' is at the seven * high-order bits of the swp_entry_t and `offset' is right-aligned in the - * remaining bits. + * remaining bits. Although `type' itself needs only five bits, we allow for + * shmem/tmpfs to shift it all up a further two bits: see swp_to_radix_entry(). * * swp_entry_t's are *never* stored anywhere in their arch-dependent format. */ -#define SWP_TYPE_SHIFT(e) (sizeof(e.val) * 8 - MAX_SWAPFILES_SHIFT) +#define SWP_TYPE_SHIFT(e) ((sizeof(e.val) * 8) - \ + (MAX_SWAPFILES_SHIFT + RADIX_TREE_EXCEPTIONAL_SHIFT)) #define SWP_OFFSET_MASK(e) ((1UL << SWP_TYPE_SHIFT(e)) - 1) /* -- cgit v1.2.3 From f8fee8f5acb5c3f82e02f2ae139a6f1e7b4eb583 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 15 Jun 2012 12:46:17 -0700 Subject: vga_switcheroo.h: fix pci_dev warning Fix warnings on some architectures/configs (not on x86): include/linux/vga_switcheroo.h:28:30: warning: 'struct pci_dev' declared inside parameter list [enabled by default] include/linux/vga_switcheroo.h:28:30: warning: its scope is only this definition or declaration, which is probably not what you want [enabled by default] Signed-off-by: Randy Dunlap Cc: Takashi Iwai Reported-by: Geert Uytterhoeven Signed-off-by: Dave Airlie --- include/linux/vga_switcheroo.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index 60da41fe9dc2..d844b7790ea6 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -9,6 +9,8 @@ #include +struct pci_dev; + enum vga_switcheroo_state { VGA_SWITCHEROO_OFF, VGA_SWITCHEROO_ON, -- cgit v1.2.3 From 2a4c8994eeef50796015f8a2005e4a75c1929166 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Jun 2012 13:08:38 -0400 Subject: NFSv4.1: Fix umount when filelayout DS is also the MDS Currently there is a 'chicken and egg' issue when the DS is also the mounted MDS. The nfs_match_client() reference from nfs4_set_ds_client bumps the cl_count, the nfs_client is not freed at umount, and nfs4_deviceid_purge_client is not called to dereference the MDS usage of a deviceid which holds a reference to the DS nfs_client. The result is the umount program returns, but the nfs_client is not freed, and the cl_session hearbeat continues. The MDS (and all other nfs mounts) lose their last nfs_client reference in nfs_free_server when the last nfs_server (fsid) is umounted. The file layout DS lose their last nfs_client reference in destroy_ds when the last deviceid referencing the data server is put and destroy_ds is called. This is triggered by a call to nfs4_deviceid_purge_client which removes references to a pNFS deviceid used by an MDS mount. The fix is to track how many pnfs enabled filesystems are mounted from this server, and then to purge the device id cache once that count reaches zero. Reported-by: Jorge Mora Reported-by: Andy Adamson Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index fbb78fb09bd2..f58325a1d8fb 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -25,6 +25,7 @@ struct nfs41_impl_id; */ struct nfs_client { atomic_t cl_count; + atomic_t cl_mds_count; int cl_cons_state; /* current construction state (-ve: init error) */ #define NFS_CS_READY 0 /* ready to be used */ #define NFS_CS_INITING 1 /* busy initialising */ -- cgit v1.2.3 From 93b3cca1ccd30b1ad290951a3fc7c10c73db7313 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 14 Jun 2012 10:54:28 -0400 Subject: ftrace: Make all inline tags also include notrace Commit 5963e317b1e9d2a ("ftrace/x86: Do not change stacks in DEBUG when calling lockdep") prevented lockdep calls from the int3 breakpoint handler from reseting the stack if a function that was called was in the process of being converted for tracing and had a breakpoint on it. The idea is, before calling the lockdep code, do a load_idt() to the special IDT that kept the breakpoint stack from reseting. This worked well as a quick fix for this kernel release, until a certain config caused a lockup in the function tracer start up tests. Investigating it, I found that the load_idt that was used to prevent the int3 from changing stacks was itself being traced! Even though the config had CONFIG_OPTIMIZE_INLINING disabled, and all 'inline' tags were set to always inline, there were still cases that it did not inline! This was caused by CONFIG_PARAVIRT_GUEST, where it would add a pointer to the native_load_idt() which made that function to be traced. Commit 45959ee7aa645815a ("ftrace: Do not function trace inlined functions") only touched the 'inline' tags when CONFIG_OPMITIZE_INLINING was enabled. PARAVIRT_GUEST shows that this was not enough and we need to also mark always_inline with notrace as well. Reported-by: Fengguang Wu Tested-by: Fengguang Wu Signed-off-by: Steven Rostedt --- include/linux/compiler-gcc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index e5834aa24b9e..6a6d7aefe12d 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -47,9 +47,9 @@ */ #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4) -# define inline inline __attribute__((always_inline)) -# define __inline__ __inline__ __attribute__((always_inline)) -# define __inline __inline __attribute__((always_inline)) +# define inline inline __attribute__((always_inline)) notrace +# define __inline__ __inline__ __attribute__((always_inline)) notrace +# define __inline __inline __attribute__((always_inline)) notrace #else /* A lot of inline functions can cause havoc with function tracing */ # define inline inline notrace -- cgit v1.2.3 From 246f6f2ff2c5cf46ded6d06f11f63e38bad880d1 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Tue, 19 Jun 2012 00:32:57 +0200 Subject: kmsg - kmsg_dump() fix CONFIG_PRINTK=n compilation Signed-off-by: Kay Sievers Cc: Stephen Rothwell Reported-by: Randy Dunlap Reported-by: Fengguang Wu Signed-off-by: Greg Kroah-Hartman --- include/linux/kmsg_dump.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index af4eb5a39d9a..d6bd50110ec2 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -71,19 +71,19 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason) { } -bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, - const char *line, size_t size, size_t *len) +static inline bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, + const char *line, size_t size, size_t *len) { return false; } -bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, - char *buf, size_t size, size_t *len) +static inline bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, + char *buf, size_t size, size_t *len) { return false; } -void kmsg_dump_rewind(struct kmsg_dumper *dumper) +static inline void kmsg_dump_rewind(struct kmsg_dumper *dumper) { } -- cgit v1.2.3 From abca7c4965845924f65d40e0aa1092bdd895e314 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Wed, 20 Jun 2012 12:52:56 -0700 Subject: mm: fix slab->page _count corruption when using slub On arches that do not support this_cpu_cmpxchg_double() slab_lock is used to do atomic cmpxchg() on double word which contains page->_count. The page count can be changed from get_page() or put_page() without taking slab_lock. That corrupts page counter. Fix it by moving page->_count out of cmpxchg_double data. So that slub does no change it while updating slub meta-data in struct page. [akpm@linux-foundation.org: use standard comment layout, tweak comment text] Reported-by: Amey Bhide Signed-off-by: Pravin B Shelar Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index dad95bdd06d7..704a626d94a0 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -57,8 +57,18 @@ struct page { }; union { +#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ + defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) /* Used for cmpxchg_double in slub */ unsigned long counters; +#else + /* + * Keep _count separate from slub cmpxchg_double data. + * As the rest of the double word is protected by + * slab_lock but _count is not. + */ + unsigned counters; +#endif struct { -- cgit v1.2.3 From 10d8935f46e5028847b179757ecbf9238b13d129 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 20 Jun 2012 12:53:02 -0700 Subject: Viresh has moved viresh.kumar@st.com email-id doesn't exist anymore as I have left the company. Replace ST's id with viresh.linux@gmail.com. It also updates .mailmap file to fix address for 'git shortlog' Signed-off-by: Viresh Kumar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmc/sdhci-spear.h | 2 +- include/linux/pata_arasan_cf_data.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/sdhci-spear.h b/include/linux/mmc/sdhci-spear.h index 5cdc96da9dd5..e78c0e236e9d 100644 --- a/include/linux/mmc/sdhci-spear.h +++ b/include/linux/mmc/sdhci-spear.h @@ -4,7 +4,7 @@ * SDHCI declarations specific to ST SPEAr platform * * Copyright (C) 2010 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/include/linux/pata_arasan_cf_data.h b/include/linux/pata_arasan_cf_data.h index a6ee9aa898bb..a7b4fc386e63 100644 --- a/include/linux/pata_arasan_cf_data.h +++ b/include/linux/pata_arasan_cf_data.h @@ -4,7 +4,7 @@ * Arasan Compact Flash host controller platform data header file * * Copyright (C) 2011 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any -- cgit v1.2.3 From d3decf3a0c1d28c80c76be170373f4c7a7217f09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ozan=20=C3=87a=C4=9Flayan?= Date: Thu, 14 Jun 2012 15:02:35 +0300 Subject: vga_switcheroo: Add include guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Guard vga_switcheroo.h against multiple inclusion. Signed-off-by: Ozan Çağlayan Signed-off-by: Dave Airlie --- include/linux/vga_switcheroo.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index d844b7790ea6..ddb419cf4530 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -7,6 +7,9 @@ * vga_switcheroo.h - Support for laptop with dual GPU using one set of outputs */ +#ifndef _LINUX_VGA_SWITCHEROO_H_ +#define _LINUX_VGA_SWITCHEROO_H_ + #include struct pci_dev; @@ -73,3 +76,4 @@ static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return #endif +#endif /* _LINUX_VGA_SWITCHEROO_H_ */ -- cgit v1.2.3