From b084f598df36b62dfae83c10ed17f0b66b50f442 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 31 May 2011 12:24:58 -0400 Subject: nfsd: fix dependency of nfsd on auth_rpcgss Commit b0b0c0a26e84 "nfsd: add proc file listing kernel's gss_krb5 enctypes" added an nunnecessary dependency of nfsd on the auth_rpcgss module. It's a little ad hoc, but since the only piece of information nfsd needs from rpcsec_gss_krb5 is a single static string, one solution is just to share it with an include file. Cc: stable@kernel.org Reported-by: Michael Guntsche Cc: Kevin Coffman Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/gss_krb5_enctypes.h | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 include/linux/sunrpc/gss_krb5_enctypes.h (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5_enctypes.h b/include/linux/sunrpc/gss_krb5_enctypes.h new file mode 100644 index 000000000000..ec6234eee89c --- /dev/null +++ b/include/linux/sunrpc/gss_krb5_enctypes.h @@ -0,0 +1,4 @@ +/* + * Dumb way to share this static piece of information with nfsd + */ +#define KRB5_SUPPORTED_ENCTYPES "18,17,16,23,3,1,2" -- cgit v1.2.3 From a685e08987d1edf1995b76511d4c98ea0e905377 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 8 Jun 2011 21:13:01 -0400 Subject: Delay struct net freeing while there's a sysfs instance refering to it * new refcount in struct net, controlling actual freeing of the memory * new method in kobj_ns_type_operations (->drop_ns()) * ->current_ns() semantics change - it's supposed to be followed by corresponding ->drop_ns(). For struct net in case of CONFIG_NET_NS it bumps the new refcount; net_drop_ns() decrements it and calls net_free() if the last reference has been dropped. Method renamed to ->grab_current_ns(). * old net_free() callers call net_drop_ns() instead. * sysfs_exit_ns() is gone, along with a large part of callchain leading to it; now that the references stored in ->ns[...] stay valid we do not need to hunt them down and replace them with NULL. That fixes problems in sysfs_lookup() and sysfs_readdir(), along with getting rid of sb->s_instances abuse. Note that struct net *shutdown* logics has not changed - net_cleanup() is called exactly when it used to be called. The only thing postponed by having a sysfs instance refering to that struct net is actual freeing of memory occupied by struct net. Signed-off-by: Al Viro --- include/linux/kobject_ns.h | 10 ++++++---- include/linux/sysfs.h | 7 ------- 2 files changed, 6 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h index 82cb5bf461fb..f66b065a8b5f 100644 --- a/include/linux/kobject_ns.h +++ b/include/linux/kobject_ns.h @@ -32,15 +32,17 @@ enum kobj_ns_type { /* * Callbacks so sysfs can determine namespaces - * @current_ns: return calling task's namespace + * @grab_current_ns: return a new reference to calling task's namespace * @netlink_ns: return namespace to which a sock belongs (right?) * @initial_ns: return the initial namespace (i.e. init_net_ns) + * @drop_ns: drops a reference to namespace */ struct kobj_ns_type_operations { enum kobj_ns_type type; - const void *(*current_ns)(void); + void *(*grab_current_ns)(void); const void *(*netlink_ns)(struct sock *sk); const void *(*initial_ns)(void); + void (*drop_ns)(void *); }; int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); @@ -48,9 +50,9 @@ int kobj_ns_type_registered(enum kobj_ns_type type); const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent); const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj); -const void *kobj_ns_current(enum kobj_ns_type type); +void *kobj_ns_grab_current(enum kobj_ns_type type); const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk); const void *kobj_ns_initial(enum kobj_ns_type type); -void kobj_ns_exit(enum kobj_ns_type type, const void *ns); +void kobj_ns_drop(enum kobj_ns_type type, void *ns); #endif /* _LINUX_KOBJECT_NS_H */ diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index c3acda60eee0..e2696d76a599 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -177,9 +177,6 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd); void sysfs_put(struct sysfs_dirent *sd); -/* Called to clear a ns tag when it is no longer valid */ -void sysfs_exit_ns(enum kobj_ns_type type, const void *tag); - int __must_check sysfs_init(void); #else /* CONFIG_SYSFS */ @@ -338,10 +335,6 @@ static inline void sysfs_put(struct sysfs_dirent *sd) { } -static inline void sysfs_exit_ns(int type, const void *tag) -{ -} - static inline int __must_check sysfs_init(void) { return 0; -- cgit v1.2.3 From 08e8138adebdd511e0955e8d6c051904bb4082af Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 13 Jun 2011 10:42:49 +0200 Subject: block: Add __attribute__((format(printf...) and fix fallout Use the compiler to verify format strings and arguments. Fix fallout. Signed-off-by: Joe Perches Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index b22fb0d3db0f..8c7c2de7631a 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -169,7 +169,8 @@ extern void blk_trace_shutdown(struct request_queue *); extern int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, struct blk_user_trace_setup *buts); -extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); +extern __attribute__((format(printf, 2, 3))) +void __trace_note_message(struct blk_trace *, const char *fmt, ...); /** * blk_add_trace_msg - Add a (simple) message to the blktrace stream -- cgit v1.2.3 From de1b794130b130e77ffa975bb58cb843744f9ae5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 13 Jun 2011 15:38:22 -0400 Subject: jbd2: Fix oops in jbd2_journal_remove_journal_head() jbd2_journal_remove_journal_head() can oops when trying to access journal_head returned by bh2jh(). This is caused for example by the following race: TASK1 TASK2 jbd2_journal_commit_transaction() ... processing t_forget list __jbd2_journal_refile_buffer(jh); if (!jh->b_transaction) { jbd_unlock_bh_state(bh); jbd2_journal_try_to_free_buffers() jbd2_journal_grab_journal_head(bh) jbd_lock_bh_state(bh) __journal_try_to_free_buffer() jbd2_journal_put_journal_head(jh) jbd2_journal_remove_journal_head(bh); jbd2_journal_put_journal_head() in TASK2 sees that b_jcount == 0 and buffer is not part of any transaction and thus frees journal_head before TASK1 gets to doing so. Note that even buffer_head can be released by try_to_free_buffers() after jbd2_journal_put_journal_head() which adds even larger opportunity for oops (but I didn't see this happen in reality). Fix the problem by making transactions hold their own journal_head reference (in b_jcount). That way we don't have to remove journal_head explicitely via jbd2_journal_remove_journal_head() and instead just remove journal_head when b_jcount drops to zero. The result of this is that [__]jbd2_journal_refile_buffer(), [__]jbd2_journal_unfile_buffer(), and __jdb2_journal_remove_checkpoint() can free journal_head which needs modification of a few callers. Also we have to be careful because once journal_head is removed, buffer_head might be freed as well. So we have to get our own buffer_head reference where it matters. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 4ecb7b16b278..d087c2e7b2aa 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1024,7 +1024,6 @@ struct journal_s /* Filing buffers */ extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *); -extern void __jbd2_journal_unfile_buffer(struct journal_head *); extern void __jbd2_journal_refile_buffer(struct journal_head *); extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *); extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); @@ -1165,7 +1164,6 @@ extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_in */ struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh); struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh); -void jbd2_journal_remove_journal_head(struct buffer_head *bh); void jbd2_journal_put_journal_head(struct journal_head *jh); /* -- cgit v1.2.3 From 09223371deac67d08ca0b70bd18787920284c967 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 14 Jun 2011 13:26:25 +0800 Subject: rcu: Use softirq to address performance regression Commit a26ac2455ffcf3(rcu: move TREE_RCU from softirq to kthread) introduced performance regression. In an AIM7 test, this commit degraded performance by about 40%. The commit runs rcu callbacks in a kthread instead of softirq. We observed high rate of context switch which is caused by this. Out test system has 64 CPUs and HZ is 1000, so we saw more than 64k context switch per second which is caused by RCU's per-CPU kthread. A trace showed that most of the time the RCU per-CPU kthread doesn't actually handle any callbacks, but instead just does a very small amount of work handling grace periods. This means that RCU's per-CPU kthreads are making the scheduler do quite a bit of work in order to allow a very small amount of RCU-related processing to be done. Alex Shi's analysis determined that this slowdown is due to lock contention within the scheduler. Unfortunately, as Peter Zijlstra points out, the scheduler's real-time semantics require global action, which means that this contention is inherent in real-time scheduling. (Yes, perhaps someone will come up with a workaround -- otherwise, -rt is not going to do well on large SMP systems -- but this patch will work around this issue in the meantime. And "the meantime" might well be forever.) This patch therefore re-introduces softirq processing to RCU, but only for core RCU work. RCU callbacks are still executed in kthread context, so that only a small amount of RCU work runs in softirq context in the common case. This should minimize ksoftirqd execution, allowing us to skip boosting of ksoftirqd for CONFIG_RCU_BOOST=y kernels. Signed-off-by: Shaohua Li Tested-by: "Alex,Shi" Signed-off-by: Paul E. McKenney --- include/linux/interrupt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 6c12989839d9..f6efed0039ed 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -414,6 +414,7 @@ enum TASKLET_SOFTIRQ, SCHED_SOFTIRQ, HRTIMER_SOFTIRQ, + RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ NR_SOFTIRQS }; -- cgit v1.2.3 From 0b760113a3a155269a3fba93a409c640031dd68f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 31 May 2011 15:15:34 -0400 Subject: NLM: Don't hang forever on NLM unlock requests If the NLM daemon is killed on the NFS server, we can currently end up hanging forever on an 'unlock' request, instead of aborting. Basically, if the rpcbind request fails, or the server keeps returning garbage, we really want to quit instead of retrying. Tested-by: Vasily Averin Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- include/linux/sunrpc/sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index f73c482ec9c6..fe2d8e6b923b 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -84,7 +84,8 @@ struct rpc_task { #endif unsigned char tk_priority : 2,/* Task priority */ tk_garb_retry : 2, - tk_cred_retry : 2; + tk_cred_retry : 2, + tk_rebind_retry : 2; }; #define tk_xprt tk_client->cl_xprt -- cgit v1.2.3 From c9c30dd5f73dccaa326a54dfcf490316946aea87 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sat, 11 Jun 2011 17:08:39 -0400 Subject: NFSv4.1: deprecate headerpadsz in CREATE_SESSION We don't support header padding yet so better off ditching it Reported-by: Sid Moore Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 5e8444a11adf..00848d86ffb2 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -158,7 +158,6 @@ struct nfs_seqid; /* nfs41 sessions channel attributes */ struct nfs4_channel_attrs { - u32 headerpadsz; u32 max_rqst_sz; u32 max_resp_sz; u32 max_resp_sz_cached; -- cgit v1.2.3 From a59ec1e7ff98cc4365d5b1bff4e7102e86b5716b Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Wed, 15 Jun 2011 15:08:11 -0700 Subject: backlight: new driver for the ADP8870 backlight devices Signed-off-by: Michael Hennerich Signed-off-by: Mike Frysinger Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/i2c/adp8870.h | 153 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 include/linux/i2c/adp8870.h (limited to 'include/linux') diff --git a/include/linux/i2c/adp8870.h b/include/linux/i2c/adp8870.h new file mode 100644 index 000000000000..624dceccbd5b --- /dev/null +++ b/include/linux/i2c/adp8870.h @@ -0,0 +1,153 @@ +/* + * Definitions and platform data for Analog Devices + * Backlight drivers ADP8870 + * + * Copyright 2009-2010 Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#ifndef __LINUX_I2C_ADP8870_H +#define __LINUX_I2C_ADP8870_H + +#define ID_ADP8870 8870 + +#define ADP8870_MAX_BRIGHTNESS 0x7F +#define FLAG_OFFT_SHIFT 8 + +/* + * LEDs subdevice platform data + */ + +#define ADP8870_LED_DIS_BLINK (0 << FLAG_OFFT_SHIFT) +#define ADP8870_LED_OFFT_600ms (1 << FLAG_OFFT_SHIFT) +#define ADP8870_LED_OFFT_1200ms (2 << FLAG_OFFT_SHIFT) +#define ADP8870_LED_OFFT_1800ms (3 << FLAG_OFFT_SHIFT) + +#define ADP8870_LED_ONT_200ms 0 +#define ADP8870_LED_ONT_600ms 1 +#define ADP8870_LED_ONT_800ms 2 +#define ADP8870_LED_ONT_1200ms 3 + +#define ADP8870_LED_D7 (7) +#define ADP8870_LED_D6 (6) +#define ADP8870_LED_D5 (5) +#define ADP8870_LED_D4 (4) +#define ADP8870_LED_D3 (3) +#define ADP8870_LED_D2 (2) +#define ADP8870_LED_D1 (1) + +/* + * Backlight subdevice platform data + */ + +#define ADP8870_BL_D7 (1 << 6) +#define ADP8870_BL_D6 (1 << 5) +#define ADP8870_BL_D5 (1 << 4) +#define ADP8870_BL_D4 (1 << 3) +#define ADP8870_BL_D3 (1 << 2) +#define ADP8870_BL_D2 (1 << 1) +#define ADP8870_BL_D1 (1 << 0) + +#define ADP8870_FADE_T_DIS 0 /* Fade Timer Disabled */ +#define ADP8870_FADE_T_300ms 1 /* 0.3 Sec */ +#define ADP8870_FADE_T_600ms 2 +#define ADP8870_FADE_T_900ms 3 +#define ADP8870_FADE_T_1200ms 4 +#define ADP8870_FADE_T_1500ms 5 +#define ADP8870_FADE_T_1800ms 6 +#define ADP8870_FADE_T_2100ms 7 +#define ADP8870_FADE_T_2400ms 8 +#define ADP8870_FADE_T_2700ms 9 +#define ADP8870_FADE_T_3000ms 10 +#define ADP8870_FADE_T_3500ms 11 +#define ADP8870_FADE_T_4000ms 12 +#define ADP8870_FADE_T_4500ms 13 +#define ADP8870_FADE_T_5000ms 14 +#define ADP8870_FADE_T_5500ms 15 /* 5.5 Sec */ + +#define ADP8870_FADE_LAW_LINEAR 0 +#define ADP8870_FADE_LAW_SQUARE 1 +#define ADP8870_FADE_LAW_CUBIC1 2 +#define ADP8870_FADE_LAW_CUBIC2 3 + +#define ADP8870_BL_AMBL_FILT_80ms 0 /* Light sensor filter time */ +#define ADP8870_BL_AMBL_FILT_160ms 1 +#define ADP8870_BL_AMBL_FILT_320ms 2 +#define ADP8870_BL_AMBL_FILT_640ms 3 +#define ADP8870_BL_AMBL_FILT_1280ms 4 +#define ADP8870_BL_AMBL_FILT_2560ms 5 +#define ADP8870_BL_AMBL_FILT_5120ms 6 +#define ADP8870_BL_AMBL_FILT_10240ms 7 /* 10.24 sec */ + +/* + * Blacklight current 0..30mA + */ +#define ADP8870_BL_CUR_mA(I) ((I * 127) / 30) + +/* + * L2 comparator current 0..1106uA + */ +#define ADP8870_L2_COMP_CURR_uA(I) ((I * 255) / 1106) + +/* + * L3 comparator current 0..551uA + */ +#define ADP8870_L3_COMP_CURR_uA(I) ((I * 255) / 551) + +/* + * L4 comparator current 0..275uA + */ +#define ADP8870_L4_COMP_CURR_uA(I) ((I * 255) / 275) + +/* + * L5 comparator current 0..138uA + */ +#define ADP8870_L5_COMP_CURR_uA(I) ((I * 255) / 138) + +struct adp8870_backlight_platform_data { + u8 bl_led_assign; /* 1 = Backlight 0 = Individual LED */ + u8 pwm_assign; /* 1 = Enables PWM mode */ + + u8 bl_fade_in; /* Backlight Fade-In Timer */ + u8 bl_fade_out; /* Backlight Fade-Out Timer */ + u8 bl_fade_law; /* fade-on/fade-off transfer characteristic */ + + u8 en_ambl_sens; /* 1 = enable ambient light sensor */ + u8 abml_filt; /* Light sensor filter time */ + + u8 l1_daylight_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l1_daylight_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l2_bright_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l2_bright_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l3_office_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l3_office_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l4_indoor_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l4_indor_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l5_dark_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l5_dark_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + + u8 l2_trip; /* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */ + u8 l2_hyst; /* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */ + u8 l3_trip; /* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */ + u8 l3_hyst; /* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */ + u8 l4_trip; /* use L4_COMP_CURR_uA(I) 0 <= I <= 275 uA */ + u8 l4_hyst; /* use L4_COMP_CURR_uA(I) 0 <= I <= 275 uA */ + u8 l5_trip; /* use L5_COMP_CURR_uA(I) 0 <= I <= 138 uA */ + u8 l5_hyst; /* use L6_COMP_CURR_uA(I) 0 <= I <= 138 uA */ + + /** + * Independent Current Sinks / LEDS + * Sinks not assigned to the Backlight can be exposed to + * user space using the LEDS CLASS interface + */ + + int num_leds; + struct led_info *leds; + u8 led_fade_in; /* LED Fade-In Timer */ + u8 led_fade_out; /* LED Fade-Out Timer */ + u8 led_fade_law; /* fade-on/fade-off transfer characteristic */ + u8 led_on_time; +}; + +#endif /* __LINUX_I2C_ADP8870_H */ -- cgit v1.2.3 From a433658c30974fc87ba3ff52d7e4e6299762aa3d Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 15 Jun 2011 15:08:13 -0700 Subject: vmscan,memcg: memcg aware swap token Currently, memcg reclaim can disable swap token even if the swap token mm doesn't belong in its memory cgroup. It's slightly risky. If an admin creates very small mem-cgroup and silly guy runs contentious heavy memory pressure workload, every tasks are going to lose swap token and then system may become unresponsive. That's bad. This patch adds 'memcg' parameter into disable_swap_token(). and if the parameter doesn't match swap token, VM doesn't disable it. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: KOSAKI Motohiro Reviewed-by: KAMEZAWA Hiroyuki Reviewed-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ++++++ include/linux/swap.h | 8 ++------ 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9724a38ee69d..50940da6adf3 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -84,6 +84,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page); extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); +extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm); static inline int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) @@ -246,6 +247,11 @@ static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) return NULL; } +static inline struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) +{ + return NULL; +} + static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem) { return 1; diff --git a/include/linux/swap.h b/include/linux/swap.h index 384eb5fe530b..e70564647039 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -358,6 +358,7 @@ struct backing_dev_info; extern struct mm_struct *swap_token_mm; extern void grab_swap_token(struct mm_struct *); extern void __put_swap_token(struct mm_struct *); +extern void disable_swap_token(struct mem_cgroup *memcg); static inline int has_swap_token(struct mm_struct *mm) { @@ -370,11 +371,6 @@ static inline void put_swap_token(struct mm_struct *mm) __put_swap_token(mm); } -static inline void disable_swap_token(void) -{ - put_swap_token(swap_token_mm); -} - #ifdef CONFIG_CGROUP_MEM_RES_CTLR extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout); @@ -500,7 +496,7 @@ static inline int has_swap_token(struct mm_struct *mm) return 0; } -static inline void disable_swap_token(void) +static inline void disable_swap_token(struct mem_cgroup *memcg) { } -- cgit v1.2.3 From ac5622418bbff9cd3dc607aa57dfb4f62a7f2043 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jun 2011 15:08:17 -0700 Subject: kmsg_dump.h: fix build when CONFIG_PRINTK is disabled Fix when CONFIG_PRINTK is not enabled: include/linux/kmsg_dump.h:56: error: 'EINVAL' undeclared (first use in this function) include/linux/kmsg_dump.h:61: error: 'EINVAL' undeclared (first use in this function) Looks like commit 595dd3d8bf95 ("kmsg_dump: fix build for CONFIG_PRINTK=n") uses EINVAL without having the needed header file(s), but I'm sure that I build tested that patch also. oh well. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmsg_dump.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 2a0d7d651dc3..ee0c952188de 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -12,6 +12,7 @@ #ifndef _LINUX_KMSG_DUMP_H #define _LINUX_KMSG_DUMP_H +#include #include enum kmsg_dump_reason { -- cgit v1.2.3 From 32e45ff43eaf5c17f5a82c9ad358d515622c2562 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 15 Jun 2011 15:08:20 -0700 Subject: mm: increase RECLAIM_DISTANCE to 30 Recently, Robert Mueller reported (http://lkml.org/lkml/2010/9/12/236) that zone_reclaim_mode doesn't work properly on his new NUMA server (Dual Xeon E5520 + Intel S5520UR MB). He is using Cyrus IMAPd and it's built on a very traditional single-process model. * a master process which reads config files and manages the other process * multiple imapd processes, one per connection * multiple pop3d processes, one per connection * multiple lmtpd processes, one per connection * periodical "cleanup" processes. There are thousands of independent processes. The problem is, recent Intel motherboard turn on zone_reclaim_mode by default and traditional prefork model software don't work well on it. Unfortunatelly, such models are still typical even in the 21st century. We can't ignore them. This patch raises the zone_reclaim_mode threshold to 30. 30 doesn't have any specific meaning. but 20 means that one-hop QPI/Hypertransport and such relatively cheap 2-4 socket machine are often used for traditional servers as above. The intention is that these machines don't use zone_reclaim_mode. Note: ia64 and Power have arch specific RECLAIM_DISTANCE definitions. This patch doesn't change such high-end NUMA machine behavior. Dave Hansen said: : I know specifically of pieces of x86 hardware that set the information : in the BIOS to '21' *specifically* so they'll get the zone_reclaim_mode : behavior which that implies. : : They've done performance testing and run very large and scary benchmarks : to make sure that they _want_ this turned on. What this means for them : is that they'll probably be de-optimized, at least on newer versions of : the kernel. : : If you want to do this for particular systems, maybe _that_'s what we : should do. Have a list of specific configurations that need the : defaults overridden either because they're buggy, or they have an : unusual hardware configuration not really reflected in the distance : table. And later said: : The original change in the hardware tables was for the benefit of a : benchmark. Said benchmark isn't going to get run on mainline until the : next batch of enterprise distros drops, at which point the hardware where : this was done will be irrelevant for the benchmark. I'm sure any new : hardware will just set this distance to another yet arbitrary value to : make the kernel do what it wants. :) : : Also, when the hardware got _set_ to this initially, I complained. So, I : guess I'm getting my way now, with this patch. I'm cool with it. Reported-by: Robert Mueller Signed-off-by: KOSAKI Motohiro Acked-by: Christoph Lameter Acked-by: David Rientjes Reviewed-by: KAMEZAWA Hiroyuki Cc: Benjamin Herrenschmidt Cc: "Luck, Tony" Acked-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index b91a40e847d2..fc839bfa7935 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -60,7 +60,7 @@ int arch_update_cpu_topology(void); * (in whatever arch specific measurement units returned by node_distance()) * then switch on zone reclaim on boot. */ -#define RECLAIM_DISTANCE 20 +#define RECLAIM_DISTANCE 30 #endif #ifndef PENALTY_FOR_NODE_WITH_CPUS #define PENALTY_FOR_NODE_WITH_CPUS (1) -- cgit v1.2.3 From ca39599c633fb02aceac31a7e67563612e4fe347 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 15 Jun 2011 15:08:27 -0700 Subject: BUILD_BUG_ON_ZERO: fix sparse breakage BUILD_BUG_ON_ZERO and BUILD_BUG_ON_NULL must return values, even in the CHECKER case otherwise various users of it become syntactically invalid. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index fb0e7329fee1..953352a88336 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -671,8 +671,8 @@ struct sysinfo { #ifdef __CHECKER__ #define BUILD_BUG_ON_NOT_POWER_OF_2(n) -#define BUILD_BUG_ON_ZERO(e) -#define BUILD_BUG_ON_NULL(e) +#define BUILD_BUG_ON_ZERO(e) (0) +#define BUILD_BUG_ON_NULL(e) ((void*)0) #define BUILD_BUG_ON(condition) #else /* __CHECKER__ */ -- cgit v1.2.3 From bd5dc17be87b3a3073d50b23802647db3ae3fa8e Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Wed, 15 Jun 2011 15:08:28 -0700 Subject: uts: make default hostname configurable, rather than always using "(none)" The "hostname" tool falls back to setting the hostname to "localhost" if /etc/hostname does not exist. Distribution init scripts have the same fallback. However, if userspace never calls sethostname, such as when booting with init=/bin/sh, or otherwise booting a minimal system without the usual init scripts, the default hostname of "(none)" remains, unhelpfully appearing in various places such as prompts ("root@(none):~#") and logs. Furthermore, "(none)" doesn't typically resolve to anything useful. Make the default hostname configurable. This removes the need for the standard fallback, provides a useful default for systems that never call sethostname, and makes minimal systems that much more useful with less configuration. Distributions could choose to use "localhost" here to avoid the fallback, while embedded systems may wish to use a specific target hostname. Signed-off-by: Josh Triplett Acked-by: Linus Torvalds Acked-by: David Miller Cc: Serge Hallyn Cc: Kel Modderman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/uts.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uts.h b/include/linux/uts.h index 73eb1ed36ec4..6ddbd86377de 100644 --- a/include/linux/uts.h +++ b/include/linux/uts.h @@ -9,7 +9,7 @@ #endif #ifndef UTS_NODENAME -#define UTS_NODENAME "(none)" /* set by sethostname() */ +#define UTS_NODENAME CONFIG_DEFAULT_HOSTNAME /* set by sethostname() */ #endif #ifndef UTS_DOMAINNAME -- cgit v1.2.3 From c001fb72a7b705f902bdfdd05b5d2408efe6f848 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 14 Jun 2011 17:05:11 -0700 Subject: gpio: add GPIOF_ values regardless on kconfig settings Make GPIOF_ defined values available even when GPIOLIB nor GENERIC_GPIO is enabled by moving them to . Fixes these build errors in linux-next: sound/soc/codecs/ak4641.c:524: error: 'GPIOF_OUT_INIT_LOW' undeclared (first use in this function) sound/soc/codecs/wm8915.c:2921: error: 'GPIOF_OUT_INIT_LOW' undeclared (first use in this function) Signed-off-by: Randy Dunlap Signed-off-by: Grant Likely --- include/linux/gpio.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 32d47e710661..17b5a0d80e42 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -3,6 +3,17 @@ /* see Documentation/gpio.txt */ +/* make these flag values available regardless of GPIO kconfig options */ +#define GPIOF_DIR_OUT (0 << 0) +#define GPIOF_DIR_IN (1 << 0) + +#define GPIOF_INIT_LOW (0 << 1) +#define GPIOF_INIT_HIGH (1 << 1) + +#define GPIOF_IN (GPIOF_DIR_IN) +#define GPIOF_OUT_INIT_LOW (GPIOF_DIR_OUT | GPIOF_INIT_LOW) +#define GPIOF_OUT_INIT_HIGH (GPIOF_DIR_OUT | GPIOF_INIT_HIGH) + #ifdef CONFIG_GENERIC_GPIO #include -- cgit v1.2.3 From b5199515c25cca622495eb9c6a8a1d275e775088 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Jun 2011 16:22:08 +0200 Subject: clocksource: Make watchdog robust vs. interruption The clocksource watchdog code is interruptible and it has been observed that this can trigger false positives which disable the TSC. The reason is that an interrupt storm or a long running interrupt handler between the read of the watchdog source and the read of the TSC brings the two far enough apart that the delta is larger than the unstable treshold. Move both reads into a short interrupt disabled region to avoid that. Reported-and-tested-by: Vernon Mauery Signed-off-by: Thomas Gleixner Cc: stable@kernel.org --- include/linux/clocksource.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index d4646b48dc4a..18a1baf31f2d 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -188,6 +188,7 @@ struct clocksource { #ifdef CONFIG_CLOCKSOURCE_WATCHDOG /* Watchdog related data, used by the framework */ struct list_head wd_list; + cycle_t cs_last; cycle_t wd_last; #endif } ____cacheline_aligned; -- cgit v1.2.3 From d8ad7d1123a960cc9f276bd499f9325c6f5e1bd1 Mon Sep 17 00:00:00 2001 From: Takao Indoh Date: Tue, 29 Mar 2011 12:35:04 -0400 Subject: generic-ipi: Fix kexec boot crash by initializing call_single_queue before enabling interrupts There is a problem that kdump(2nd kernel) sometimes hangs up due to a pending IPI from 1st kernel. Kernel panic occurs because IPI comes before call_single_queue is initialized. To fix the crash, rename init_call_single_data() to call_function_init() and call it in start_kernel() so that call_single_queue can be initialized before enabling interrupts. The details of the crash are: (1) 2nd kernel boots up (2) A pending IPI from 1st kernel comes when irqs are first enabled in start_kernel(). (3) Kernel tries to handle the interrupt, but call_single_queue is not initialized yet at this point. As a result, in the generic_smp_call_function_single_interrupt(), NULL pointer dereference occurs when list_replace_init() tries to access &q->list.next. Therefore this patch changes the name of init_call_single_data() to call_function_init() and calls it before local_irq_enable() in start_kernel(). Signed-off-by: Takao Indoh Reviewed-by: WANG Cong Acked-by: Neil Horman Acked-by: Vivek Goyal Acked-by: Peter Zijlstra Cc: Milton Miller Cc: Jens Axboe Cc: Paul E. McKenney Cc: kexec@lists.infradead.org Link: http://lkml.kernel.org/r/D6CBEE2F420741indou.takao@jp.fujitsu.com Signed-off-by: Ingo Molnar --- include/linux/smp.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 7ad824d510a2..8cc38d3bab0c 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -85,12 +85,15 @@ int smp_call_function_any(const struct cpumask *mask, * Generic and arch helpers */ #ifdef CONFIG_USE_GENERIC_SMP_HELPERS +void __init call_function_init(void); void generic_smp_call_function_single_interrupt(void); void generic_smp_call_function_interrupt(void); void ipi_call_lock(void); void ipi_call_unlock(void); void ipi_call_lock_irq(void); void ipi_call_unlock_irq(void); +#else +static inline void call_function_init(void) { } #endif /* @@ -134,7 +137,7 @@ static inline void smp_send_reschedule(int cpu) { } #define smp_prepare_boot_cpu() do {} while (0) #define smp_call_function_many(mask, func, info, wait) \ (up_smp_call_function(func, info)) -static inline void init_call_single_data(void) { } +static inline void call_function_init(void) { } static inline int smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, -- cgit v1.2.3 From 879669961b11e7f40b518784863a259f735a72bf Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 17 Jun 2011 11:25:59 +0100 Subject: KEYS/DNS: Fix ____call_usermodehelper() to not lose the session keyring ____call_usermodehelper() now erases any credentials set by the subprocess_inf::init() function. The problem is that commit 17f60a7da150 ("capabilites: allow the application of capability limits to usermode helpers") creates and commits new credentials with prepare_kernel_cred() after the call to the init() function. This wipes all keyrings after umh_keys_init() is called. The best way to deal with this is to put the init() call just prior to the commit_creds() call, and pass the cred pointer to init(). That means that umh_keys_init() and suchlike can modify the credentials _before_ they are published and potentially in use by the rest of the system. This prevents request_key() from working as it is prevented from passing the session keyring it set up with the authorisation token to /sbin/request-key, and so the latter can't assume the authority to instantiate the key. This causes the in-kernel DNS resolver to fail with ENOKEY unconditionally. Signed-off-by: David Howells Acked-by: Eric Paris Tested-by: Jeff Layton Signed-off-by: Linus Torvalds --- include/linux/kmod.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index d4a5c84c503d..0da38cf7db7b 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -45,7 +45,7 @@ static inline int request_module_nowait(const char *name, ...) { return -ENOSYS; #endif -struct key; +struct cred; struct file; enum umh_wait { @@ -62,7 +62,7 @@ struct subprocess_info { char **envp; enum umh_wait wait; int retval; - int (*init)(struct subprocess_info *info); + int (*init)(struct subprocess_info *info, struct cred *new); void (*cleanup)(struct subprocess_info *info); void *data; }; @@ -73,7 +73,7 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, /* Set various pieces of state into the subprocess_info structure */ void call_usermodehelper_setfns(struct subprocess_info *info, - int (*init)(struct subprocess_info *info), + int (*init)(struct subprocess_info *info, struct cred *new), void (*cleanup)(struct subprocess_info *info), void *data); @@ -87,7 +87,7 @@ void call_usermodehelper_freeinfo(struct subprocess_info *info); static inline int call_usermodehelper_fns(char *path, char **argv, char **envp, enum umh_wait wait, - int (*init)(struct subprocess_info *info), + int (*init)(struct subprocess_info *info, struct cred *new), void (*cleanup)(struct subprocess_info *), void *data) { struct subprocess_info *info; -- cgit v1.2.3 From cca23d0b5350c9ca0473625c3f5879422ba534a6 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Sat, 18 Jun 2011 02:51:52 -0700 Subject: Input: sh_keysc - 8x8 MODE_6 fix According to the data sheet for G4, AP4 and AG5 KEYSC MODE_6 is 8x8 keys. Bump up MAXKEYS to 64 too. Signed-off-by: Magnus Damm Reviewed-by: Simon Horman Signed-off-by: Dmitry Torokhov --- include/linux/input/sh_keysc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/input/sh_keysc.h b/include/linux/input/sh_keysc.h index 649dc7f12925..5d253cd93691 100644 --- a/include/linux/input/sh_keysc.h +++ b/include/linux/input/sh_keysc.h @@ -1,7 +1,7 @@ #ifndef __SH_KEYSC_H__ #define __SH_KEYSC_H__ -#define SH_KEYSC_MAXKEYS 49 +#define SH_KEYSC_MAXKEYS 64 struct sh_keysc_info { enum { SH_KEYSC_MODE_1, SH_KEYSC_MODE_2, SH_KEYSC_MODE_3, -- cgit v1.2.3 From be98ca652faa6468916a9b7608befff215a8ca70 Mon Sep 17 00:00:00 2001 From: Manoj Iyer Date: Thu, 26 May 2011 11:19:05 -0500 Subject: mmc: Add PCI fixup quirks for Ricoh 1180:e823 reader Signed-off-by: Manoj Iyer Cc: Signed-off-by: Chris Ball --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index a311008af5e1..f8910e155566 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1537,6 +1537,7 @@ #define PCI_DEVICE_ID_RICOH_RL5C476 0x0476 #define PCI_DEVICE_ID_RICOH_RL5C478 0x0478 #define PCI_DEVICE_ID_RICOH_R5C822 0x0822 +#define PCI_DEVICE_ID_RICOH_R5CE823 0xe823 #define PCI_DEVICE_ID_RICOH_R5C832 0x0832 #define PCI_DEVICE_ID_RICOH_R5C843 0x0843 -- cgit v1.2.3 From 155d109b5f52ffd749219b27702462dcd9cf4f8d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 20 Jun 2011 13:23:14 +0200 Subject: block: add REQ_SECURE to REQ_COMMON_MASK Add REQ_SECURE flag to REQ_COMMON_MASK so that init_request_from_bio() can pass it to @req->cmd_flags. Signed-off-by: Namhyung Kim Acked-by: Adrian Hunter Cc: stable@kernel.org # 2.6.36 and newer Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 2a7cea53ca0d..6395692b2e7a 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -167,7 +167,7 @@ enum rq_flag_bits { (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_DISCARD | \ - REQ_NOIDLE | REQ_FLUSH | REQ_FUA) + REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE) #define REQ_CLONE_MASK REQ_COMMON_MASK #define REQ_RAHEAD (1 << __REQ_RAHEAD) -- cgit v1.2.3 From 482e0cd3dbaa70f2a2bead4b5f2c0d203ef654ba Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 19 Jun 2011 13:01:04 -0400 Subject: devcgroup_inode_permission: take "is it a device node" checks to inlined wrapper inode_permission() calls devcgroup_inode_permission() and almost all such calls are _not_ for device nodes; let's at least keep the common path straight... Signed-off-by: Al Viro --- include/linux/device_cgroup.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h index 0b0d9c39ed67..7aad1f440867 100644 --- a/include/linux/device_cgroup.h +++ b/include/linux/device_cgroup.h @@ -2,8 +2,16 @@ #include #ifdef CONFIG_CGROUP_DEVICE -extern int devcgroup_inode_permission(struct inode *inode, int mask); +extern int __devcgroup_inode_permission(struct inode *inode, int mask); extern int devcgroup_inode_mknod(int mode, dev_t dev); +static inline int devcgroup_inode_permission(struct inode *inode, int mask) +{ + if (likely(!inode->i_rdev)) + return 0; + if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)) + return 0; + return __devcgroup_inode_permission(inode, mask); +} #else static inline int devcgroup_inode_permission(struct inode *inode, int mask) { return 0; } -- cgit v1.2.3 From 19345cb299e8234006c5125151ab723e851a1d24 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 19 Jun 2011 18:33:46 -0400 Subject: NFSv4.1: file layout must consider pg_bsize for coalescing Otherwise we end up overflowing the rpc buffer size on the receive end. Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 3a34e80ae92f..25311b3bedf8 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -92,6 +92,9 @@ extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, struct nfs_page *); extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc); extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t); +extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, + struct nfs_page *prev, + struct nfs_page *req); extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern int nfs_set_page_tag_locked(struct nfs_page *req); -- cgit v1.2.3 From 79568f5be06c91071697c065f01f3ebfbeb25a61 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 20 Jun 2011 20:13:49 -0700 Subject: vfs: i_state needs to be 'unsigned long' for now Commit 13e12d14e2dc ("vfs: reorganize 'struct inode' layout a bit") moved things around a bit changed i_state to be unsigned int instead of unsigned long. That was to help structure layout for the 64-bit case, and shrink 'struct inode' a bit (admittedly that only happened when spinlock debugging was on and i_flags didn't pack with i_lock). However, Meelis Roos reports that this results in unaligned exceptions on sprc, and it turns out that the bit-locking primitives that we use for the I_NEW bit want to use the bitops. Which want 'unsigned long', not 'unsigned int'. We really should fix the bit locking code to not have that kind of requirement, but that's a much bigger change. So for now, revert that field back to 'unsigned long' (but keep the other re-ordering changes from the commit that caused this). Andi points out that we have played games with this in 'struct page', so it's solvable with other hacks too, but since right now the struct inode size advantage only happens with some rare config options, it's not worth fighting. It _would_ be worth fixing the bitlocking code, though. Especially since there is no type safety in the bitlocking code (this never caused any warnings, and worked fine on x86-64, because the bitlocks take a 'void *' and x86-64 doesn't care that deeply about alignment). So it's currently a very easy problem to trigger by mistake and never notice. Reported-by: Meelis Roos Cc: Andi Kleen Cc: David Miller Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 1c777878f1ea..6e73e2e9ae33 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -744,7 +744,7 @@ struct inode { spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned int i_flags; - unsigned int i_state; + unsigned long i_state; #ifdef CONFIG_SECURITY void *i_security; #endif -- cgit v1.2.3 From f76b168b6f117a49d36307053e1acbe30580ea5b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 18 Jun 2011 20:22:23 +0200 Subject: PM: Rename dev_pm_info.in_suspend to is_prepared This patch (as1473) renames the "in_suspend" field in struct dev_pm_info to "is_prepared", in preparation for an upcoming change. The new name is more descriptive of what the field really means. Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki Cc: stable@kernel.org --- include/linux/device.h | 4 ++-- include/linux/pm.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index c66111affca9..553fd37b173b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -654,13 +654,13 @@ static inline int device_is_registered(struct device *dev) static inline void device_enable_async_suspend(struct device *dev) { - if (!dev->power.in_suspend) + if (!dev->power.is_prepared) dev->power.async_suspend = true; } static inline void device_disable_async_suspend(struct device *dev) { - if (!dev->power.in_suspend) + if (!dev->power.is_prepared) dev->power.async_suspend = false; } diff --git a/include/linux/pm.h b/include/linux/pm.h index 3160648ccdda..cc536bd80984 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -425,7 +425,7 @@ struct dev_pm_info { pm_message_t power_state; unsigned int can_wakeup:1; unsigned int async_suspend:1; - unsigned int in_suspend:1; /* Owned by the PM core */ + bool is_prepared:1; /* Owned by the PM core */ spinlock_t lock; #ifdef CONFIG_PM_SLEEP struct list_head entry; -- cgit v1.2.3 From 6d0e0e84f66d32c33511984dd3badd32364b863c Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 18 Jun 2011 22:42:09 +0200 Subject: PM: Fix async resume following suspend failure The PM core doesn't handle suspend failures correctly when it comes to asynchronously suspended devices. These devices are moved onto the dpm_suspended_list as soon as the corresponding async thread is started up, and they remain on the list even if they fail to suspend or the sleep transition is cancelled before they get suspended. As a result, when the PM core unwinds the transition, it tries to resume the devices even though they were never suspended. This patch (as1474) fixes the problem by adding a new "is_suspended" flag to dev_pm_info. Devices are resumed only if the flag is set. [rjw: * Moved the dev->power.is_suspended check into device_resume(), because we need to complete dev->power.completion and clear dev->power.is_prepared too for devices whose dev->power.is_suspended flags are unset. * Fixed __device_suspend() to avoid setting dev->power.is_suspended if async_error is different from zero.] Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki Cc: stable@kernel.org --- include/linux/pm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index cc536bd80984..411e4f4be52b 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -426,6 +426,7 @@ struct dev_pm_info { unsigned int can_wakeup:1; unsigned int async_suspend:1; bool is_prepared:1; /* Owned by the PM core */ + bool is_suspended:1; /* Ditto */ spinlock_t lock; #ifdef CONFIG_PM_SLEEP struct list_head entry; -- cgit v1.2.3 From c6830c22603aaecf65405af23f6da2d55892f9cb Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Thu, 16 Jun 2011 17:28:07 +0900 Subject: Fix node_start/end_pfn() definition for mm/page_cgroup.c commit 21a3c96 uses node_start/end_pfn(nid) for detection start/end of nodes. But, it's not defined in linux/mmzone.h but defined in /arch/???/include/mmzone.h which is included only under CONFIG_NEED_MULTIPLE_NODES=y. Then, we see mm/page_cgroup.c: In function 'page_cgroup_init': mm/page_cgroup.c:308: error: implicit declaration of function 'node_start_pfn' mm/page_cgroup.c:309: error: implicit declaration of function 'node_end_pfn' So, fixiing page_cgroup.c is an idea... But node_start_pfn()/node_end_pfn() is a very generic macro and should be implemented in the same manner for all archs. (m32r has different implementation...) This patch removes definitions of node_start/end_pfn() in each archs and defines a unified one in linux/mmzone.h. It's not under CONFIG_NEED_MULTIPLE_NODES, now. A result of macro expansion is here (mm/page_cgroup.c) for !NUMA start_pfn = ((&contig_page_data)->node_start_pfn); end_pfn = ({ pg_data_t *__pgdat = (&contig_page_data); __pgdat->node_start_pfn + __pgdat->node_spanned_pages;}); for NUMA (x86-64) start_pfn = ((node_data[nid])->node_start_pfn); end_pfn = ({ pg_data_t *__pgdat = (node_data[nid]); __pgdat->node_start_pfn + __pgdat->node_spanned_pages;}); Changelog: - fixed to avoid using "nid" twice in node_end_pfn() macro. Reported-and-acked-by: Randy Dunlap Reported-and-tested-by: Ingo Molnar Acked-by: Mel Gorman Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c928dac6cad0..9f7c3ebcbbad 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -647,6 +647,13 @@ typedef struct pglist_data { #endif #define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr)) +#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) + +#define node_end_pfn(nid) ({\ + pg_data_t *__pgdat = NODE_DATA(nid);\ + __pgdat->node_start_pfn + __pgdat->node_spanned_pages;\ +}) + #include extern struct mutex zonelists_mutex; -- cgit v1.2.3 From 4d258b25d947521c8b913154db61ec55198243f8 Mon Sep 17 00:00:00 2001 From: Vitaliy Ivanov Date: Mon, 27 Jun 2011 19:07:08 +0300 Subject: Fix some kernel-doc warnings Fix 'make htmldocs' warnings: Warning(/include/linux/hrtimer.h:153): No description found for parameter 'clockid' Warning(/include/linux/device.h:604): Excess struct/union/enum/typedef member 'of_match' description in 'device' Warning(/include/net/sock.h:349): Excess struct/union/enum/typedef member 'sk_rmem_alloc' description in 'sock' Signed-off-by: Vitaliy Ivanov Acked-by: Grant Likely Acked-by: David S. Miller Signed-off-by: Linus Torvalds --- include/linux/device.h | 1 - include/linux/hrtimer.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 553fd37b173b..e4f62d8896b7 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -530,7 +530,6 @@ struct device_dma_parameters { * @dma_mem: Internal for coherent mem override. * @archdata: For arch-specific additions. * @of_node: Associated device tree node. - * @of_match: Matching of_device_id from driver. * @devt: For creating the sysfs "dev". * @devres_lock: Spinlock to protect the resource of the device. * @devres_head: The resources list of the device. diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 51932e5acf7c..fd0dc30c9f15 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -135,6 +135,7 @@ struct hrtimer_sleeper { * @cpu_base: per cpu clock base * @index: clock type index for per_cpu support when moving a * timer to a base on another cpu. + * @clockid: clock id for per_cpu support * @active: red black tree root node for the active timers * @resolution: the resolution of the clock, in nanoseconds * @get_time: function to retrieve the current time of the clock -- cgit v1.2.3 From 072441e21ddcd1140606b7d4ef6eab579a86b0b3 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 27 Jun 2011 16:18:02 -0700 Subject: mm: move shmem prototypes to shmem_fs.h Before adding any more global entry points into shmem.c, gather such prototypes into shmem_fs.h. Remove mm's own declarations from swap.h, but for now leave the ones in mm.h: because shmem_file_setup() and shmem_zero_setup() are called from various places, and we should not force other subsystems to update immediately. Signed-off-by: Hugh Dickins Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shmem_fs.h | 17 +++++++++++++++++ include/linux/swap.h | 10 ---------- 2 files changed, 17 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 2b7fec840517..cae65dc42bcc 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -5,6 +5,13 @@ #include #include +struct page; +struct file; +struct inode; +struct super_block; +struct user_struct; +struct vm_area_struct; + /* inode in-kernel data */ #define SHMEM_NR_DIRECT 16 @@ -45,7 +52,17 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode) return container_of(inode, struct shmem_inode_info, vfs_inode); } +/* + * Functions in mm/shmem.c called directly from elsewhere: + */ extern int init_tmpfs(void); extern int shmem_fill_super(struct super_block *sb, void *data, int silent); +extern struct file *shmem_file_setup(const char *name, + loff_t size, unsigned long flags); +extern int shmem_zero_setup(struct vm_area_struct *); +extern int shmem_lock(struct file *file, int lock, struct user_struct *user); +extern int shmem_unuse(swp_entry_t entry, struct page *page); +extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, + struct page **pagep, swp_entry_t *ent); #endif diff --git a/include/linux/swap.h b/include/linux/swap.h index e70564647039..a273468f8285 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -300,16 +300,6 @@ static inline void scan_unevictable_unregister_node(struct node *node) extern int kswapd_run(int nid); extern void kswapd_stop(int nid); -#ifdef CONFIG_MMU -/* linux/mm/shmem.c */ -extern int shmem_unuse(swp_entry_t entry, struct page *page); -#endif /* CONFIG_MMU */ - -#ifdef CONFIG_CGROUP_MEM_RES_CTLR -extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, - struct page **pagep, swp_entry_t *ent); -#endif - #ifdef CONFIG_SWAP /* linux/mm/page_io.c */ extern int swap_readpage(struct page *); -- cgit v1.2.3 From 94c1e62df4494b79782cb9c7279f827212d1de70 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 27 Jun 2011 16:18:03 -0700 Subject: tmpfs: take control of its truncate_range 2.6.35's new truncate convention gave tmpfs the opportunity to control its file truncation, no longer enforced from outside by vmtruncate(). We shall want to build upon that, to handle pagecache and swap together. Slightly redefine the ->truncate_range interface: let it now be called between the unmap_mapping_range()s, with the filesystem responsible for doing the truncate_inode_pages_range() from it - just as the filesystem is nowadays responsible for doing that from its ->setattr. Let's rename shmem_notify_change() to shmem_setattr(). Instead of calling the generic truncate_setsize(), bring that code in so we can call shmem_truncate_range() - which will later be updated to perform its own variant of truncate_inode_pages_range(). Remove the punch_hole unmap_mapping_range() from shmem_truncate_range(): now that the COW's unmap_mapping_range() comes after ->truncate_range, there is no need to call it a third time. Export shmem_truncate_range() and add it to the list in shmem_fs.h, so that i915_gem_object_truncate() can call it explicitly in future; get this patch in first, then update drm/i915 once this is available (until then, i915 will just be doing the truncate_inode_pages() twice). Though introduced five years ago, no other filesystem is implementing ->truncate_range, and its only other user is madvise(,,MADV_REMOVE): we expect to convert it to fallocate(,FALLOC_FL_PUNCH_HOLE,,) shortly, whereupon ->truncate_range can be removed from inode_operations - shmem_truncate_range() will help i915 across that transition too. Signed-off-by: Hugh Dickins Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shmem_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index cae65dc42bcc..22a20af4d785 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -61,6 +61,7 @@ extern struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); extern int shmem_zero_setup(struct vm_area_struct *); extern int shmem_lock(struct file *file, int lock, struct user_struct *user); +extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); extern int shmem_unuse(swp_entry_t entry, struct page *page); extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, struct page **pagep, swp_entry_t *ent); -- cgit v1.2.3 From d9d90e5eb70e09903dadff42099b6c948f814050 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 27 Jun 2011 16:18:04 -0700 Subject: tmpfs: add shmem_read_mapping_page_gfp Although it is used (by i915) on nothing but tmpfs, read_cache_page_gfp() is unsuited to tmpfs, because it inserts a page into pagecache before calling the filesystem's ->readpage: tmpfs may have pages in swapcache which only it knows how to locate and switch to filecache. At present tmpfs provides a ->readpage method, and copes with this by copying pages; but soon we can simplify it by removing its ->readpage. Provide shmem_read_mapping_page_gfp() now, ready for that transition, Export shmem_read_mapping_page_gfp() and add it to list in shmem_fs.h, with shmem_read_mapping_page() inline for the common mapping_gfp case. (shmem_read_mapping_page_gfp or shmem_read_cache_page_gfp? Generally the read_mapping_page functions use the mapping's ->readpage, and the read_cache_page functions use the supplied filler, so I think read_cache_page_gfp was slightly misnamed.) Signed-off-by: Hugh Dickins Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shmem_fs.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 22a20af4d785..aa08fa8fd79b 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -3,15 +3,9 @@ #include #include +#include #include -struct page; -struct file; -struct inode; -struct super_block; -struct user_struct; -struct vm_area_struct; - /* inode in-kernel data */ #define SHMEM_NR_DIRECT 16 @@ -61,9 +55,18 @@ extern struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); extern int shmem_zero_setup(struct vm_area_struct *); extern int shmem_lock(struct file *file, int lock, struct user_struct *user); +extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, + pgoff_t index, gfp_t gfp_mask); extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); extern int shmem_unuse(swp_entry_t entry, struct page *page); extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, struct page **pagep, swp_entry_t *ent); +static inline struct page *shmem_read_mapping_page( + struct address_space *mapping, pgoff_t index) +{ + return shmem_read_mapping_page_gfp(mapping, index, + mapping_gfp_mask(mapping)); +} + #endif -- cgit v1.2.3 From 507c5f1224014f9956e604ee8703b3bbea7da4a4 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 27 Jun 2011 16:18:07 -0700 Subject: include/linux/compat.h: declare compat_sys_sendmmsg() This is required for tilegx to be able to use the compat unistd.h header where compat_sys_sendmmsg() is now mentioned. Signed-off-by: Chris Metcalf Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index ddcb7db38e67..846bb1792572 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -467,6 +467,8 @@ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname, char __user *optval, unsigned int optlen); asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags); +asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, + unsigned vlen, unsigned int flags); asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags); asmlinkage long compat_sys_recv(int fd, void __user *buf, size_t len, -- cgit v1.2.3 From 08142579b6ca35883c1ed066a2681de6f6917062 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 27 Jun 2011 16:18:10 -0700 Subject: mm: fix assertion mapping->nrpages == 0 in end_writeback() Under heavy memory and filesystem load, users observe the assertion mapping->nrpages == 0 in end_writeback() trigger. This can be caused by page reclaim reclaiming the last page from a mapping in the following race: CPU0 CPU1 ... shrink_page_list() __remove_mapping() __delete_from_page_cache() radix_tree_delete() evict_inode() truncate_inode_pages() truncate_inode_pages_range() pagevec_lookup() - finds nothing end_writeback() mapping->nrpages != 0 -> BUG page->mapping = NULL mapping->nrpages-- Fix the problem by doing a reliable check of mapping->nrpages under mapping->tree_lock in end_writeback(). Analyzed by Jay , lost in LKML, and dug out by Miklos Szeredi . Cc: Jay Cc: Miklos Szeredi Signed-off-by: Jan Kara Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6e73e2e9ae33..b5b979247863 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -639,6 +639,7 @@ struct address_space { struct prio_tree_root i_mmap; /* tree of private and shared mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ struct mutex i_mmap_mutex; /* protect tree, count, list */ + /* Protected by tree_lock together with the radix tree */ unsigned long nrpages; /* number of total pages */ pgoff_t writeback_index;/* writeback starts here */ const struct address_space_operations *a_ops; /* methods */ -- cgit v1.2.3