From 05ba1f0823004e947748523782e9c2f07f3bff0d Mon Sep 17 00:00:00 2001 From: Anatol Pomozov Date: Sun, 22 Apr 2012 18:45:24 -0700 Subject: fuse: add FALLOCATE operation fallocate filesystem operation preallocates media space for the given file. If fallocate returns success then any subsequent write to the given range never fails with 'not enough space' error. Signed-off-by: Anatol Pomozov Signed-off-by: Miklos Szeredi --- include/linux/fuse.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 8f2ab8fef929..9303348965fb 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -54,6 +54,9 @@ * 7.18 * - add FUSE_IOCTL_DIR flag * - add FUSE_NOTIFY_DELETE + * + * 7.19 + * - add FUSE_FALLOCATE */ #ifndef _LINUX_FUSE_H @@ -85,7 +88,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 18 +#define FUSE_KERNEL_MINOR_VERSION 19 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -278,6 +281,7 @@ enum fuse_opcode { FUSE_POLL = 40, FUSE_NOTIFY_REPLY = 41, FUSE_BATCH_FORGET = 42, + FUSE_FALLOCATE = 43, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -571,6 +575,14 @@ struct fuse_notify_poll_wakeup_out { __u64 kh; }; +struct fuse_fallocate_in { + __u64 fh; + __u64 offset; + __u64 length; + __u32 mode; + __u32 padding; +}; + struct fuse_in_header { __u32 len; __u32 opcode; -- cgit v1.2.3 From c3ba9698152b17fdc2c7cd0f7cbeb571e3367e9d Mon Sep 17 00:00:00 2001 From: Dan Magenheimer Date: Mon, 9 Apr 2012 17:06:54 -0600 Subject: mm: frontswap: add frontswap header file Frontswap is the alter ego of cleancache, the "yang" to cleancache's "yin"... and more precisely frontswap is the provider of anonymous pages to transcendent memory to nicely complement cleancache's providing of clean pagecache pages to transcendent memory. For optimal use of transcendent memory, both are necessary... because a kernel under memory pressure first reclaims clean pagecache pages and, when under more memory pressure, starts swapping anonymous pages. Frontswap and cleancache (which was merged at 3.0) are the "frontends" and the only necessary changes to the core kernel for transcendent memory; all other supporting code -- the "backends" -- is implemented as drivers. See the LWN.net article "Transcendent memory in a nutshell" for a detailed overview of frontswap and related kernel parts: https://lwn.net/Articles/454795/ Frontswap code was first posted publicly in January 2009 and on LKML in May 2009, and has remained functionally stable for nearly three years now. It is barely invasive, touching only the swap subsystem and adds less than 100 lines of code to existing swap subsystem code files. It has improved syntactically substantially between V1 and this posting of V14, thanks to the review of a few kernel developers, and has adapted easily to at least one major swap subsystem change. As of 3.4, there are three in-tree users of frontswap patiently waiting for this patchset and for CONFIG_FRONTSWAP to be enabled: zcache (staging driver merged at 2.6.39), Xen tmem (merged at 3.0 and 3.1) and RAMster (staging driver merged at 3.4). In addition, a RFC has been posted for a KVM backend. The frontswap patchset has been in linux-next since next-110603. Earlier versions of frontswap already ship in the Oracle Unbreakable Enterprise Kernel and SuSE SLES. This patch, 1of4, provides the header file for the core code for frontswap that interfaces between the hooks in the swap subsystem and a frontswap backend via frontswap_ops. --- New file added: include/linux/frontswap.h [v14: add support for writethrough, per suggestion by aarcange@redhat.com] [v14: rebase to 3.4-rc2] [v11: konrad.wilk@oracle.com: squashed s/flush/invalidate/ in] [v10: no change] [v9: akpm@linux-foundation.org: change "flush" to "invalidate", part 1] [v8: rebase to 3.0-rc4] [v7: rebase to 3.0-rc3] [v7: JBeulich@novell.com: new static inlines resolve to no-ops if not config'd] [v7: JBeulich@novell.com: avoid redundant shifts/divides for *_bit lib calls] [v6: rebase to 3.1-rc1] [v5: no change from v4] [v4: rebase to 2.6.39] Signed-off-by: Dan Magenheimer Reviewed-by: Andrew Morton Acked-by: Jan Beulich Acked-by: Seth Jennings Cc: Jeremy Fitzhardinge Cc: Hugh Dickins Cc: Johannes Weiner Cc: Nitin Gupta Cc: Matthew Wilcox Cc: Chris Mason Cc: Rik Riel [v15: int/bool on some functions] Signed-off-by: Konrad Wilk --- include/linux/frontswap.h | 127 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 include/linux/frontswap.h (limited to 'include/linux') diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h new file mode 100644 index 000000000000..68ff7af5c5fb --- /dev/null +++ b/include/linux/frontswap.h @@ -0,0 +1,127 @@ +#ifndef _LINUX_FRONTSWAP_H +#define _LINUX_FRONTSWAP_H + +#include +#include +#include + +struct frontswap_ops { + void (*init)(unsigned); + int (*put_page)(unsigned, pgoff_t, struct page *); + int (*get_page)(unsigned, pgoff_t, struct page *); + void (*invalidate_page)(unsigned, pgoff_t); + void (*invalidate_area)(unsigned); +}; + +extern bool frontswap_enabled; +extern struct frontswap_ops + frontswap_register_ops(struct frontswap_ops *ops); +extern void frontswap_shrink(unsigned long); +extern unsigned long frontswap_curr_pages(void); +extern void frontswap_writethrough(bool); + +extern void __frontswap_init(unsigned type); +extern int __frontswap_put_page(struct page *page); +extern int __frontswap_get_page(struct page *page); +extern void __frontswap_invalidate_page(unsigned, pgoff_t); +extern void __frontswap_invalidate_area(unsigned); + +#ifdef CONFIG_FRONTSWAP + +static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) +{ + bool ret = false; + + if (frontswap_enabled && sis->frontswap_map) + ret = test_bit(offset, sis->frontswap_map); + return ret; +} + +static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset) +{ + if (frontswap_enabled && sis->frontswap_map) + set_bit(offset, sis->frontswap_map); +} + +static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) +{ + if (frontswap_enabled && sis->frontswap_map) + clear_bit(offset, sis->frontswap_map); +} + +static inline void frontswap_map_set(struct swap_info_struct *p, + unsigned long *map) +{ + p->frontswap_map = map; +} + +static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) +{ + return p->frontswap_map; +} +#else +/* all inline routines become no-ops and all externs are ignored */ + +#define frontswap_enabled (0) + +static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) +{ + return false; +} + +static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset) +{ +} + +static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) +{ +} + +static inline void frontswap_map_set(struct swap_info_struct *p, + unsigned long *map) +{ +} + +static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) +{ + return NULL; +} +#endif + +static inline int frontswap_put_page(struct page *page) +{ + int ret = -1; + + if (frontswap_enabled) + ret = __frontswap_put_page(page); + return ret; +} + +static inline int frontswap_get_page(struct page *page) +{ + int ret = -1; + + if (frontswap_enabled) + ret = __frontswap_get_page(page); + return ret; +} + +static inline void frontswap_invalidate_page(unsigned type, pgoff_t offset) +{ + if (frontswap_enabled) + __frontswap_invalidate_page(type, offset); +} + +static inline void frontswap_invalidate_area(unsigned type) +{ + if (frontswap_enabled) + __frontswap_invalidate_area(type); +} + +static inline void frontswap_init(unsigned type) +{ + if (frontswap_enabled) + __frontswap_init(type); +} + +#endif /* _LINUX_FRONTSWAP_H */ -- cgit v1.2.3 From 38b5faf4b178d5279b1fca5d7dadc68881342660 Mon Sep 17 00:00:00 2001 From: Dan Magenheimer Date: Mon, 9 Apr 2012 17:08:06 -0600 Subject: mm: frontswap: core swap subsystem hooks and headers This patch, 2of4, contains the changes to the core swap subsystem. This includes: (1) makes available core swap data structures (swap_lock, swap_list and swap_info) that are needed by frontswap.c but we don't need to expose them to the dozens of files that include swap.h so we create a new swapfile.h just to extern-ify these and modify their declarations to non-static (2) adds frontswap-related elements to swap_info_struct. Frontswap_map points to vzalloc'ed one-bit-per-swap-page metadata that indicates whether the swap page is in frontswap or in the device and frontswap_pages counts how many pages are in frontswap. (3) adds hooks in the swap subsystem and extends try_to_unuse so that frontswap_shrink can do a "partial swapoff". Note that a failed frontswap_map allocation is safe... failure is noted by lack of "FS" in the subsequent printk. --- [v14: rebase to 3.4-rc2] [v10: no change] [v9: akpm@linux-foundation.org: mark some statics __read_mostly] [v9: akpm@linux-foundation.org: add clarifying comments] [v9: akpm@linux-foundation.org: no need to loop repeating try_to_unuse] [v9: error27@gmail.com: remove superfluous check for NULL] [v8: rebase to 3.0-rc4] [v8: kamezawa.hiroyu@jp.fujitsu.com: change counter to atomic_t to avoid races] [v8: kamezawa.hiroyu@jp.fujitsu.com: comment to clarify informational counters] [v7: rebase to 3.0-rc3] [v7: JBeulich@novell.com: add new swap struct elements only if config'd] [v6: rebase to 3.0-rc1] [v6: lliubbo@gmail.com: fix null pointer deref if vzalloc fails] [v6: konrad.wilk@oracl.com: various checks and code clarifications/comments] [v5: no change from v4] [v4: rebase to 2.6.39] Signed-off-by: Dan Magenheimer Reviewed-by: Kamezawa Hiroyuki Acked-by: Jan Beulich Acked-by: Seth Jennings Cc: Jeremy Fitzhardinge Cc: Hugh Dickins Cc: Johannes Weiner Cc: Nitin Gupta Cc: Matthew Wilcox Cc: Chris Mason Cc: Rik Riel Cc: Andrew Morton [v11: Rebased, fixed mm/swapfile.c context change] Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/swap.h | 4 ++++ include/linux/swapfile.h | 13 +++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 include/linux/swapfile.h (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index b1fd5c7925fe..50a55e2d58ec 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -197,6 +197,10 @@ struct swap_info_struct { struct block_device *bdev; /* swap device or bdev of swap file */ struct file *swap_file; /* seldom referenced */ unsigned int old_block_size; /* seldom referenced */ +#ifdef CONFIG_FRONTSWAP + unsigned long *frontswap_map; /* frontswap in-use, one bit per page */ + atomic_t frontswap_pages; /* frontswap pages in-use counter */ +#endif }; struct swap_list_t { diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h new file mode 100644 index 000000000000..e282624e8c10 --- /dev/null +++ b/include/linux/swapfile.h @@ -0,0 +1,13 @@ +#ifndef _LINUX_SWAPFILE_H +#define _LINUX_SWAPFILE_H + +/* + * these were static in swapfile.c but frontswap.c needs them and we don't + * want to expose them to the dozens of source files that include swap.h + */ +extern spinlock_t swap_lock; +extern struct swap_list_t swap_list; +extern struct swap_info_struct *swap_info[]; +extern int try_to_unuse(unsigned int, bool, unsigned long); + +#endif /* _LINUX_SWAPFILE_H */ -- cgit v1.2.3 From 165c8aed5bbc6bdddbccae0ba9db451732558ff9 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 15 May 2012 11:32:15 -0400 Subject: frontswap: s/put_page/store/g s/get_page/load Sounds so much more natural. Suggested-by: Andrea Arcangeli Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/frontswap.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index 68ff7af5c5fb..0e4e2eec5c1d 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -7,8 +7,8 @@ struct frontswap_ops { void (*init)(unsigned); - int (*put_page)(unsigned, pgoff_t, struct page *); - int (*get_page)(unsigned, pgoff_t, struct page *); + int (*store)(unsigned, pgoff_t, struct page *); + int (*load)(unsigned, pgoff_t, struct page *); void (*invalidate_page)(unsigned, pgoff_t); void (*invalidate_area)(unsigned); }; @@ -21,8 +21,8 @@ extern unsigned long frontswap_curr_pages(void); extern void frontswap_writethrough(bool); extern void __frontswap_init(unsigned type); -extern int __frontswap_put_page(struct page *page); -extern int __frontswap_get_page(struct page *page); +extern int __frontswap_store(struct page *page); +extern int __frontswap_load(struct page *page); extern void __frontswap_invalidate_page(unsigned, pgoff_t); extern void __frontswap_invalidate_area(unsigned); @@ -88,21 +88,21 @@ static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) } #endif -static inline int frontswap_put_page(struct page *page) +static inline int frontswap_store(struct page *page) { int ret = -1; if (frontswap_enabled) - ret = __frontswap_put_page(page); + ret = __frontswap_store(page); return ret; } -static inline int frontswap_get_page(struct page *page) +static inline int frontswap_load(struct page *page) { int ret = -1; if (frontswap_enabled) - ret = __frontswap_get_page(page); + ret = __frontswap_load(page); return ret; } -- cgit v1.2.3 From e5400321a6f15ce0fe77c8455954f213ef7dcc54 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 9 May 2012 23:39:34 +0900 Subject: clockevents: Make clockevents_config() a global symbol Make clockevents_config() into a global symbol to allow it to be used by compiled-in clockevent drivers. This is needed by drivers that want to update the timer frequency after registration time. Signed-off-by: Magnus Damm Tested-by: Simon Horman Cc: arnd@arndb.de Cc: johnstul@us.ibm.com Cc: rjw@sisk.pl Cc: lethal@linux-sh.org Cc: gregkh@linuxfoundation.org Cc: olof@lixom.net Cc: Magnus Damm Link: http://lkml.kernel.org/r/20120509143934.27521.46553.sendpatchset@w520 Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 81e803e90aa4..acba894374a1 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -132,6 +132,7 @@ extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt); extern void clockevents_register_device(struct clock_event_device *dev); +extern void clockevents_config(struct clock_event_device *dev, u32 freq); extern void clockevents_config_and_register(struct clock_event_device *dev, u32 freq, unsigned long min_delta, unsigned long max_delta); -- cgit v1.2.3 From 5aaa0b7a2ed5b12692c9ffb5222182bd558d3146 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 May 2012 17:15:29 +0200 Subject: sched/nohz: Fix rq->cpu_load calculations some more Follow up on commit 556061b00 ("sched/nohz: Fix rq->cpu_load[] calculations") since while that fixed the busy case it regressed the mostly idle case. Add a callback from the nohz exit to also age the rq->cpu_load[] array. This closes the hole where either there was no nohz load balance pass during the nohz, or there was a 'significant' amount of idle time between the last nohz balance and the nohz exit. So we'll update unconditionally from the tick to not insert any accidental 0 load periods while busy, and we try and catch up from nohz idle balance and nohz exit. Both these are still prone to missing a jiffy, but that has always been the case. Signed-off-by: Peter Zijlstra Cc: pjt@google.com Cc: Venkatesh Pallipadi Link: http://lkml.kernel.org/n/tip-kt0trz0apodbf84ucjfdbr1a@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f45c0b280b5d..d61e5977e517 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -145,6 +145,7 @@ extern unsigned long this_cpu_load(void); extern void calc_global_load(unsigned long ticks); +extern void update_cpu_load_nohz(void); extern unsigned long get_parent_ip(unsigned long addr); -- cgit v1.2.3 From 29baa7478ba47d746e3625c91d3b2afbf46b4312 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Apr 2012 12:11:21 +0200 Subject: sched: Move nr_cpus_allowed out of 'struct sched_rt_entity' Since nr_cpus_allowed is used outside of sched/rt.c and wants to be used outside of there more, move it to a more natural site. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-kr61f02y9brwzkh6x53pdptm@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 2 +- include/linux/sched.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index e4baff5f7ff4..9e65eff6af3b 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -149,6 +149,7 @@ extern struct cred init_cred; .normal_prio = MAX_PRIO-20, \ .policy = SCHED_NORMAL, \ .cpus_allowed = CPU_MASK_ALL, \ + .nr_cpus_allowed= NR_CPUS, \ .mm = NULL, \ .active_mm = &init_mm, \ .se = { \ @@ -157,7 +158,6 @@ extern struct cred init_cred; .rt = { \ .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \ .time_slice = RR_TIMESLICE, \ - .nr_cpus_allowed = NR_CPUS, \ }, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ INIT_PUSHABLE_TASKS(tsk) \ diff --git a/include/linux/sched.h b/include/linux/sched.h index d61e5977e517..0f50e78f7f44 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1188,7 +1188,6 @@ struct sched_rt_entity { struct list_head run_list; unsigned long timeout; unsigned int time_slice; - int nr_cpus_allowed; struct sched_rt_entity *back; #ifdef CONFIG_RT_GROUP_SCHED @@ -1253,6 +1252,7 @@ struct task_struct { #endif unsigned int policy; + int nr_cpus_allowed; cpumask_t cpus_allowed; #ifdef CONFIG_PREEMPT_RCU -- cgit v1.2.3 From 114067b69e7b2c691faace0e33db2f04096f668d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 31 May 2012 14:43:27 +0900 Subject: perf tools: Check if callchain is corrupted We faced segmentation fault on perf top -G at very high sampling rate due to a corrupted callchain. While the root cause was not revealed (I failed to figure it out), this patch tries to protect us from the segfault on such cases. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sunjin Yang Link: http://lkml.kernel.org/r/1338443007-24857-2-git-send-email-namhyung.kim@lge.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f32578634d9d..1817d4015e5f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -555,6 +555,8 @@ enum perf_event_type { PERF_RECORD_MAX, /* non-ABI */ }; +#define PERF_MAX_STACK_DEPTH 255 + enum perf_callchain_context { PERF_CONTEXT_HV = (__u64)-32, PERF_CONTEXT_KERNEL = (__u64)-128, @@ -609,8 +611,6 @@ struct perf_guest_info_callbacks { #include #include -#define PERF_MAX_STACK_DEPTH 255 - struct perf_callchain_entry { __u64 nr; __u64 ip[PERF_MAX_STACK_DEPTH]; -- cgit v1.2.3 From ae58d1e406986f31d1e88b32f5ac601506c196d8 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Fri, 18 May 2012 09:29:34 -0600 Subject: i2c: Add generic I2C multiplexer using pinctrl API This is useful for SoCs whose I2C module's signals can be routed to different sets of pins at run-time, using the pinctrl API. +-----+ +-----+ | dev | | dev | +------------------------+ +-----+ +-----+ | SoC | | | | /----|------+--------+ | +---+ +------+ | child bus A, on first set of pins | |I2C|---|Pinmux| | | +---+ +------+ | child bus B, on second set of pins | \----|------+--------+--------+ | | | | | +------------------------+ +-----+ +-----+ +-----+ | dev | | dev | | dev | +-----+ +-----+ +-----+ Signed-off-by: Stephen Warren Acked-by: Linus Walleij Acked-by: Rob Herring Signed-off-by: Wolfram Sang --- include/linux/i2c-mux-pinctrl.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 include/linux/i2c-mux-pinctrl.h (limited to 'include/linux') diff --git a/include/linux/i2c-mux-pinctrl.h b/include/linux/i2c-mux-pinctrl.h new file mode 100644 index 000000000000..a65c86429e84 --- /dev/null +++ b/include/linux/i2c-mux-pinctrl.h @@ -0,0 +1,41 @@ +/* + * i2c-mux-pinctrl platform data + * + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _LINUX_I2C_MUX_PINCTRL_H +#define _LINUX_I2C_MUX_PINCTRL_H + +/** + * struct i2c_mux_pinctrl_platform_data - Platform data for i2c-mux-pinctrl + * @parent_bus_num: Parent I2C bus number + * @base_bus_num: Base I2C bus number for the child busses. 0 for dynamic. + * @bus_count: Number of child busses. Also the number of elements in + * @pinctrl_states + * @pinctrl_states: The names of the pinctrl state to select for each child bus + * @pinctrl_state_idle: The pinctrl state to select when no child bus is being + * accessed. If NULL, the most recently used pinctrl state will be left + * selected. + */ +struct i2c_mux_pinctrl_platform_data { + int parent_bus_num; + int base_bus_num; + int bus_count; + const char **pinctrl_states; + const char *pinctrl_state_idle; +}; + +#endif -- cgit v1.2.3 From 1549210fcc17e9ae20c09ac8cd4c48a8dfd431bd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 09:16:47 -0400 Subject: NFSv4: Fix an Oops in the open recovery code The open recovery code does not need to request a new value for the mdsthreshold, and so does not allocate a struct nfs4_threshold. The problem is that encode_getfattr_open() will still request an mdsthreshold, and so we end up Oopsing in decode_attr_mdsthreshold. This patch fixes encode_getfattr_open so that it doesn't request an mdsthreshold when the caller isn't asking for one. It also fixes decode_attr_mdsthreshold so that it errors if the server returns an mdsthreshold that we didn't ask for (instead of Oopsing). Signed-off-by: Trond Myklebust Cc: Andy Adamson --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d1a7bf51c326..7519baef025b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -348,6 +348,7 @@ struct nfs_openargs { const struct qstr * name; const struct nfs_server *server; /* Needed for ID mapping */ const u32 * bitmask; + const u32 * open_bitmap; __u32 claim; struct nfs4_sequence_args seq_args; }; -- cgit v1.2.3 From fffaee365fded09f9ebf2db19066065fa54323c3 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 5 Jun 2012 21:36:33 +0400 Subject: radix-tree: fix contiguous iterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes bug in macro radix_tree_for_each_contig(). If radix_tree_next_slot() sees NULL in next slot it returns NULL, but following radix_tree_next_chunk() switches iterating into next chunk. As result iterating becomes non-contiguous and breaks vfs "splice" and all its users. Signed-off-by: Konstantin Khlebnikov Reported-and-bisected-by: Hans de Bruin Reported-and-bisected-by: Ondrej Zary Reported-bisected-and-tested-by: Toralf Förster Link: https://lkml.org/lkml/2012/6/5/64 Cc: stable # 3.4.x Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 0d04cd69ab9b..ffc444c38b0a 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -368,8 +368,11 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) iter->index++; if (likely(*slot)) return slot; - if (flags & RADIX_TREE_ITER_CONTIG) + if (flags & RADIX_TREE_ITER_CONTIG) { + /* forbid switching to the next chunk */ + iter->next_index = 0; break; + } } } return NULL; -- cgit v1.2.3 From 9bce008bae8b57bc7b007bcc2071d1247a527120 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 18:32:03 -0400 Subject: NFS: Fix a commit bug The new commit code fails to copy the verifier into the wb_verf field of _all_ the nfs_page structures; it only copies it into the first entry. The consequence is that most requests end up failing to match in nfs_commit_release. Fix is to copy the verifier into the req->wb_verf field in nfs_write_completion. Signed-off-by: Trond Myklebust Cc: Fred Isaman --- include/linux/nfs_xdr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7519baef025b..8aadd90b808a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1237,6 +1237,7 @@ struct nfs_pgio_header { struct list_head rpc_list; atomic_t refcnt; struct nfs_page *req; + struct nfs_writeverf *verf; struct pnfs_layout_segment *lseg; loff_t io_start; const struct rpc_call_ops *mds_ops; @@ -1274,6 +1275,7 @@ struct nfs_write_data { struct nfs_write_header { struct nfs_pgio_header header; struct nfs_write_data rpc_data; + struct nfs_writeverf verf; }; struct nfs_mds_commit_info { -- cgit v1.2.3 From 2a0fe914a38745f5b03534c4e4f4056cbd6978b8 Mon Sep 17 00:00:00 2001 From: Yong Ding Date: Tue, 15 May 2012 13:09:43 +0800 Subject: mmc: sdio: fix setting card data bus width as 4-bit SDIO_CCCR_IF[1:0] in SDIO card is used for card data bus width setting as below: 00b: 1-bit bus 01b: Reserved 10b: 4-bit bus 11b: 8-bit bus (only for embedded SDIO) And sdio_enable_wide is for setting data bus width as 4-bit. But currently, it first reads the register, second OR' 1b with SDIO_CCCR_IF[1], and then writes it back. As we can see, this is based on such assumption that the SDIO_CCCR_IF[0] is always 0. Apparently, this is not right. Signed-off-by: Yong Ding Acked-by: Philip Rakity Signed-off-by: Chris Ball --- include/linux/mmc/sdio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h index c9fe66c58f8f..17446d3c3602 100644 --- a/include/linux/mmc/sdio.h +++ b/include/linux/mmc/sdio.h @@ -98,7 +98,9 @@ #define SDIO_CCCR_IF 0x07 /* bus interface controls */ +#define SDIO_BUS_WIDTH_MASK 0x03 /* data bus width setting */ #define SDIO_BUS_WIDTH_1BIT 0x00 +#define SDIO_BUS_WIDTH_RESERVED 0x01 #define SDIO_BUS_WIDTH_4BIT 0x02 #define SDIO_BUS_ECSI 0x20 /* Enable continuous SPI interrupt */ #define SDIO_BUS_SCSI 0x40 /* Support continuous SPI interrupt */ -- cgit v1.2.3 From c1174876874dcf8986806e4dad3d7d07af20b439 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 May 2012 14:47:33 +0200 Subject: sched: Fix domain iteration Weird topologies can lead to asymmetric domain setups. This needs further consideration since these setups are typically non-minimal too. For now, make it work by adding an extra mask selecting which CPUs are allowed to iterate up. The topology that triggered it is the one from David Rientjes: 10 20 20 30 20 10 20 20 20 20 10 20 30 20 20 10 resulting in boxes that wouldn't even boot. Reported-by: David Rientjes Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-3p86l9cuaqnxz7uxsojmz5rm@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6029d8c54476..ac321d753470 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -876,6 +876,8 @@ struct sched_group_power { * Number of busy cpus in this group. */ atomic_t nr_busy_cpus; + + unsigned long cpumask[0]; /* iteration mask */ }; struct sched_group { @@ -900,6 +902,15 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg) return to_cpumask(sg->cpumask); } +/* + * cpumask masking which cpus in the group are allowed to iterate up the domain + * tree. + */ +static inline struct cpumask *sched_group_mask(struct sched_group *sg) +{ + return to_cpumask(sg->sgp->cpumask); +} + /** * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. * @group: The group whose first cpu is to be returned. -- cgit v1.2.3 From 0b0d9cf6ec7bab91977da2d71c09157f110f7c2e Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Fri, 20 Apr 2012 15:41:34 -0700 Subject: perf: Limit callchains to 127 Stack depth of 255 seems excessive, given that copy_from_user_nmi() could be slow. Signed-off-by: Arun Sharma Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1334961696-19580-3-git-send-email-asharma@fb.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1817d4015e5f..45db49f64bb4 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -555,7 +555,7 @@ enum perf_event_type { PERF_RECORD_MAX, /* non-ABI */ }; -#define PERF_MAX_STACK_DEPTH 255 +#define PERF_MAX_STACK_DEPTH 127 enum perf_callchain_context { PERF_CONTEXT_HV = (__u64)-32, -- cgit v1.2.3 From aa9b16306e3243229580ff889cc59fd66bf77973 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 10 May 2012 16:41:44 -0700 Subject: rcu: Precompute RCU_FAST_NO_HZ timer offsets When a CPU is entering dyntick-idle mode, tick_nohz_stop_sched_tick() calls rcu_needs_cpu() see if RCU needs that CPU, and, if not, computes the next wakeup time based on the timer wheels. Only later, when actually entering the idle loop, rcu_prepare_for_idle() will be invoked. In some cases, rcu_prepare_for_idle() will post timers to wake the CPU back up. But all for naught: The next wakeup time for the CPU has already been computed, and posting a timer afterwards does not force that wakeup time to be recomputed. This means that rcu_prepare_for_idle()'s have no effect. This is not a problem on a busy system because something else will wake up the CPU soon enough. However, on lightly loaded systems, the CPU might stay asleep for a considerable length of time. If that CPU has a callback that the rest of the system is waiting on, the system might run very slowly or (in theory) even hang. This commit avoids this problem by having rcu_needs_cpu() give tick_nohz_stop_sched_tick() an estimate of when RCU will need the CPU to wake back up, which tick_nohz_stop_sched_tick() takes into account when programming the CPU's wakeup time. An alternative approach is for rcu_prepare_for_idle() to use hrtimers instead of normal timers, but timers are much more efficient than are hrtimers for frequently and repeatedly posting and cancelling a given timer, which is exactly what RCU_FAST_NO_HZ does. Reported-by: Pascal Chapperon Reported-by: Heiko Carstens Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Tested-by: Heiko Carstens Tested-by: Pascal Chapperon --- include/linux/rcutiny.h | 6 ++++-- include/linux/rcutree.h | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index adb5e5a38cae..854dc4c5c271 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -87,8 +87,9 @@ static inline void kfree_call_rcu(struct rcu_head *head, #ifdef CONFIG_TINY_RCU -static inline int rcu_needs_cpu(int cpu) +static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { + *delta_jiffies = ULONG_MAX; return 0; } @@ -96,8 +97,9 @@ static inline int rcu_needs_cpu(int cpu) int rcu_preempt_needs_cpu(void); -static inline int rcu_needs_cpu(int cpu) +static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { + *delta_jiffies = ULONG_MAX; return rcu_preempt_needs_cpu(); } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 3c6083cde4fc..952b79339304 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -32,7 +32,7 @@ extern void rcu_init(void); extern void rcu_note_context_switch(int cpu); -extern int rcu_needs_cpu(int cpu); +extern int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies); extern void rcu_cpu_stall_reset(void); /* -- cgit v1.2.3 From d1992b169d31f339dc5ea4e9f312567c8cf322a3 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Thu, 17 May 2012 22:35:46 +0000 Subject: netfilter: xt_HMARK: fix endianness and provide consistent hashing This patch addresses two issues: a) Fix usage of u32 and __be32 that causes endianess warnings via sparse. b) Ensure consistent hashing in a cluster that is composed of big and little endian systems. Thus, we obtain the same hash mark in an heterogeneous cluster. Reported-by: Dan Carpenter Signed-off-by: Hans Schillstrom Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/xt_HMARK.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_HMARK.h b/include/linux/netfilter/xt_HMARK.h index abb1650940d2..826fc5807577 100644 --- a/include/linux/netfilter/xt_HMARK.h +++ b/include/linux/netfilter/xt_HMARK.h @@ -27,7 +27,12 @@ union hmark_ports { __u16 src; __u16 dst; } p16; + struct { + __be16 src; + __be16 dst; + } b16; __u32 v32; + __be32 b32; }; struct xt_hmark_info { -- cgit v1.2.3 From bafb282df29c1524b1617019adebd6d0c3eb7a47 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 7 Jun 2012 14:21:11 -0700 Subject: c/r: prctl: update prctl_set_mm_exe_file() after mm->num_exe_file_vmas removal A fix for commit b32dfe377102 ("c/r: prctl: add ability to set new mm_struct::exe_file"). After removing mm->num_exe_file_vmas kernel keeps mm->exe_file until final mmput(), it never becomes NULL while task is alive. We can check for other mapped files in mm instead of checking mm->num_exe_file_vmas, and mark mm with flag MMF_EXE_FILE_CHANGED in order to forbid second changing of mm->exe_file. Signed-off-by: Konstantin Khlebnikov Reviewed-by: Cyrill Gorcunov Cc: Oleg Nesterov Cc: Matt Helsley Cc: Kees Cook Cc: KOSAKI Motohiro Cc: Tejun Heo Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6029d8c54476..c688d4cc2e40 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -439,6 +439,7 @@ extern int get_dumpable(struct mm_struct *mm); /* leave room for more dump flags */ #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ +#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) -- cgit v1.2.3 From 300f786b2683f8bb1ec0afb6e1851183a479c86d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 7 Jun 2012 14:21:12 -0700 Subject: c/r: prctl: add ability to get clear_tid_address Zero is written at clear_tid_address when the process exits. This functionality is used by pthread_join(). We already have sys_set_tid_address() to change this address for the current task but there is no way to obtain it from user space. Without the ability to find this address and dump it we can't restore pthread'ed apps which call pthread_join() once they have been restored. This patch introduces the PR_GET_TID_ADDRESS prctl option which allows the current process to obtain own clear_tid_address. This feature is available iif CONFIG_CHECKPOINT_RESTORE is set. [akpm@linux-foundation.org: fix prctl numbering] Signed-off-by: Andrew Vagin Signed-off-by: Cyrill Gorcunov Cc: Pedro Alves Cc: Oleg Nesterov Cc: Pavel Emelyanov Cc: Tejun Heo Acked-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/prctl.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 711e0a30aacc..3988012255dc 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -127,8 +127,8 @@ #define PR_SET_PTRACER 0x59616d61 # define PR_SET_PTRACER_ANY ((unsigned long)-1) -#define PR_SET_CHILD_SUBREAPER 36 -#define PR_GET_CHILD_SUBREAPER 37 +#define PR_SET_CHILD_SUBREAPER 36 +#define PR_GET_CHILD_SUBREAPER 37 /* * If no_new_privs is set, then operations that grant new privileges (i.e. @@ -142,7 +142,9 @@ * asking selinux for a specific new context (e.g. with runcon) will result * in execve returning -EPERM. */ -#define PR_SET_NO_NEW_PRIVS 38 -#define PR_GET_NO_NEW_PRIVS 39 +#define PR_SET_NO_NEW_PRIVS 38 +#define PR_GET_NO_NEW_PRIVS 39 + +#define PR_GET_TID_ADDRESS 40 #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From ae82fdb1406ad41d68f07027fe31f2d35ba22a90 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 8 Jun 2012 14:58:13 +0930 Subject: module_param: stop double-calling parameters. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 026cee0086fe1df4cf74691cf273062cc769617d "params: _initcall-like kernel parameters" set old-style module parameters to level 0. And we call those level 0 calls where we used to, early in start_kernel(). We also loop through the initcall levels and call the levelled module_params before the corresponding initcall. Unfortunately level 0 is early_init(), so we call the standard module_param calls twice. (Turns out most things don't care, but at least ubi.mtd does). Change the level to -1 for standard module_param calls. Reported-by: Benoît Thébaudeau Signed-off-by: Rusty Russell Cc: stable@kernel.org --- include/linux/moduleparam.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 1b14d25162cb..d6a58065c09c 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -128,7 +128,7 @@ struct kparam_array * The ops can have NULL set or get functions. */ #define module_param_cb(name, ops, arg, perm) \ - __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, 0) + __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1) /** * _param_cb - general callback for a module/cmdline parameter @@ -192,7 +192,7 @@ struct kparam_array { (void *)set, (void *)get }; \ __module_param_call(MODULE_PARAM_PREFIX, \ name, &__param_ops_##name, arg, \ - (perm) + sizeof(__check_old_set_param(set))*0, 0) + (perm) + sizeof(__check_old_set_param(set))*0, -1) /* We don't get oldget: it's often a new-style param_get_uint, etc. */ static inline int @@ -272,7 +272,7 @@ static inline void __kernel_param_unlock(void) */ #define core_param(name, var, type, perm) \ param_check_##type(name, &(var)); \ - __module_param_call("", name, ¶m_ops_##type, &var, perm, 0) + __module_param_call("", name, ¶m_ops_##type, &var, perm, -1) #endif /* !MODULE */ /** @@ -290,7 +290,7 @@ static inline void __kernel_param_unlock(void) = { len, string }; \ __module_param_call(MODULE_PARAM_PREFIX, name, \ ¶m_ops_string, \ - .str = &__param_string_##name, perm, 0); \ + .str = &__param_string_##name, perm, -1); \ __MODULE_PARM_TYPE(name, "string") /** @@ -432,7 +432,7 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp); __module_param_call(MODULE_PARAM_PREFIX, name, \ ¶m_array_ops, \ .arr = &__param_arr_##name, \ - perm, 0); \ + perm, -1); \ __MODULE_PARM_TYPE(name, "array of " #type) extern struct kernel_param_ops param_array_ops; -- cgit v1.2.3 From c8e9cf7bb240049117d2fa64d1540476c289396d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 7 Jun 2012 12:15:15 +0200 Subject: vga_switcheroo: Add a helper function to get the client state Add vga_switcheroo_get_client_state() to get the current state of the client. This is necessary to determine the proper initial state of audio clients in HD-audio driver. Acked-by: Dave Airlie Signed-off-by: Takashi Iwai --- include/linux/vga_switcheroo.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index b455c7c212eb..b176342ca031 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -12,6 +12,9 @@ enum vga_switcheroo_state { VGA_SWITCHEROO_OFF, VGA_SWITCHEROO_ON, + /* below are referred only from vga_switcheroo_get_client_state() */ + VGA_SWITCHEROO_INIT, + VGA_SWITCHEROO_NOT_FOUND, }; enum vga_switcheroo_client_id { @@ -50,6 +53,8 @@ void vga_switcheroo_unregister_handler(void); int vga_switcheroo_process_delayed_switch(void); +int vga_switcheroo_get_client_state(struct pci_dev *dev); + #else static inline void vga_switcheroo_unregister_client(struct pci_dev *dev) {} @@ -62,5 +67,7 @@ static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev, int id, bool active) { return 0; } static inline void vga_switcheroo_unregister_handler(void) {} static inline int vga_switcheroo_process_delayed_switch(void) { return 0; } +static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_CLIENT_ON; } + #endif -- cgit v1.2.3 From 505cff00de9c303b95c204eb4544066e3e707911 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 8 Jun 2012 12:49:17 +0200 Subject: vga_switcheroo: Fix error without CONFIG_VGA_SWITCHEROO Fix a typo that is built only when CONFIG_VGA_SWITCHEROO=n. Signed-off-by: Takashi Iwai --- include/linux/vga_switcheroo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index b176342ca031..60da41fe9dc2 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -67,7 +67,7 @@ static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev, int id, bool active) { return 0; } static inline void vga_switcheroo_unregister_handler(void) {} static inline int vga_switcheroo_process_delayed_switch(void) { return 0; } -static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_CLIENT_ON; } +static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; } #endif -- cgit v1.2.3 From 8876d6b5f81f4e242a6660da22bbd92f17a8d058 Mon Sep 17 00:00:00 2001 From: Paul Pluzhnikov Date: Sat, 9 Jun 2012 07:53:03 -0700 Subject: net: Make linux/tcp.h C++ friendly (trivial) I originally sent this patch to , but Jiri Kosina did not feel that this is fully appropriate for the trivial tree. Using linux/tcp.h from C++ results in: cat t.cc #include int main() { } g++ -c t.cc In file included from t.cc:1: /usr/include/linux/tcp.h:72: error: '__u32 __fswab32(__u32)' cannot appear in a constant-expression /usr/include/linux/tcp.h:72: error: a function call cannot appear in a constant-expression ... Attached trivial patch fixes this problem. Tested: - the t.cc above compiles with g++ and - the following program generates the same output before/after the patch: #include #include int main () { #define P(a) printf("%s: %08x\n", #a, (int)a) P(TCP_FLAG_CWR); P(TCP_FLAG_ECE); P(TCP_FLAG_URG); P(TCP_FLAG_ACK); P(TCP_FLAG_PSH); P(TCP_FLAG_RST); P(TCP_FLAG_SYN); P(TCP_FLAG_FIN); P(TCP_RESERVED_BITS); P(TCP_DATA_OFFSET); #undef P return 0; } Signed-off-by: Paul Pluzhnikov Signed-off-by: David S. Miller --- include/linux/tcp.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4c5b63283377..5f359dbfcdce 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -69,16 +69,16 @@ union tcp_word_hdr { #define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) enum { - TCP_FLAG_CWR = __cpu_to_be32(0x00800000), - TCP_FLAG_ECE = __cpu_to_be32(0x00400000), - TCP_FLAG_URG = __cpu_to_be32(0x00200000), - TCP_FLAG_ACK = __cpu_to_be32(0x00100000), - TCP_FLAG_PSH = __cpu_to_be32(0x00080000), - TCP_FLAG_RST = __cpu_to_be32(0x00040000), - TCP_FLAG_SYN = __cpu_to_be32(0x00020000), - TCP_FLAG_FIN = __cpu_to_be32(0x00010000), - TCP_RESERVED_BITS = __cpu_to_be32(0x0F000000), - TCP_DATA_OFFSET = __cpu_to_be32(0xF0000000) + TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000), + TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000), + TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000), + TCP_FLAG_ACK = __constant_cpu_to_be32(0x00100000), + TCP_FLAG_PSH = __constant_cpu_to_be32(0x00080000), + TCP_FLAG_RST = __constant_cpu_to_be32(0x00040000), + TCP_FLAG_SYN = __constant_cpu_to_be32(0x00020000), + TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000), + TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000), + TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000) }; /* -- cgit v1.2.3 From 601722157b3f6be73623644eeae6f14940f0bd8f Mon Sep 17 00:00:00 2001 From: Qiao Zhou Date: Mon, 4 Jun 2012 10:41:03 +0800 Subject: ARM: MMP: add pxa910-ssp into ssp_id_table add pxa910-ssp into ssp_id_table, and fix pxa-ssp compiling issue under mach-mmp architect. Signed-off-by: Qiao Zhou Acked-by: Haojian Zhuang Signed-off-by: Mark Brown --- include/linux/pxa2xx_ssp.h | 1 + include/linux/spi/pxa2xx_spi.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index 44835fb39793..f9fe15ec2b51 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -161,6 +161,7 @@ enum pxa_ssp_type { PXA25x_NSSP, /* pxa 255, 26x (including ASSP) */ PXA27x_SSP, PXA168_SSP, + PXA910_SSP, CE4100_SSP, }; diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index d3e1075f7b60..c73d1445c77e 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -43,7 +43,7 @@ struct pxa2xx_spi_chip { void (*cs_control)(u32 command); }; -#ifdef CONFIG_ARCH_PXA +#if defined(CONFIG_ARCH_PXA) || defined(CONFIG_ARCH_MMP) #include #include -- cgit v1.2.3 From 972a55b62d592cfcd6d73577df8a52f1251ea9a7 Mon Sep 17 00:00:00 2001 From: Qiao Zhou Date: Mon, 4 Jun 2012 10:41:04 +0800 Subject: ASoC: fix pxa-ssp compiling issue under mach-mmp pxa-ssp.c uses API like cpu_is_pxa3xx(), cpu_is_pxa2xx(), which is defined under arch-pxa architecture, and drivers under mach-mmp can't find it. so just use ssp->type to replace that API. Signed-off-by: Qiao Zhou Acked-by: Haojian Zhuang Signed-off-by: Mark Brown --- include/linux/pxa2xx_ssp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index f9fe15ec2b51..f36632061c66 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -160,6 +160,7 @@ enum pxa_ssp_type { PXA25x_SSP, /* pxa 210, 250, 255, 26x */ PXA25x_NSSP, /* pxa 255, 26x (including ASSP) */ PXA27x_SSP, + PXA3xx_SSP, PXA168_SSP, PXA910_SSP, CE4100_SSP, -- cgit v1.2.3 From 1761a110a9c36ad9ba26b104516677ad8cb38a08 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 7 Jun 2012 11:20:31 -0300 Subject: [media] Fix regression in ioctl numbering Yuck. The VIDIOC_(TRY_)DECODER_CMD ioctls already had ioctl numbers 96 and 97, and after merging the timings API I forgot to continue numbering from 98. So now we have two ioctls with number 96 and two with 97. With the new table-driver ioctl handling in v4l2-ioctl.c it is essential that each ioctl has its own unique number, so let's fix this quickly for 3.5. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/videodev2.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index 370d11106c11..2039c5d3292e 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -2640,9 +2640,9 @@ struct v4l2_create_buffers { /* Experimental, these three ioctls may change over the next couple of kernel versions. */ -#define VIDIOC_ENUM_DV_TIMINGS _IOWR('V', 96, struct v4l2_enum_dv_timings) -#define VIDIOC_QUERY_DV_TIMINGS _IOR('V', 97, struct v4l2_dv_timings) -#define VIDIOC_DV_TIMINGS_CAP _IOWR('V', 98, struct v4l2_dv_timings_cap) +#define VIDIOC_ENUM_DV_TIMINGS _IOWR('V', 98, struct v4l2_enum_dv_timings) +#define VIDIOC_QUERY_DV_TIMINGS _IOR('V', 99, struct v4l2_dv_timings) +#define VIDIOC_DV_TIMINGS_CAP _IOWR('V', 100, struct v4l2_dv_timings_cap) /* Reminder: when adding new ioctls please add support for them to drivers/media/video/v4l2-compat-ioctl32.c as well! */ -- cgit v1.2.3 From 2177905ca7419c49910d47e38e44790affd918cc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 11 Jun 2012 23:56:09 -0700 Subject: Input: fix input.h kernel-doc warning Fix kernel-doc warning in input.h: Warning(include/linux/input.h:140): No description found for parameter 'len' Signed-off-by: Randy Dunlap Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index a81671453575..2740d080ec6b 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -116,6 +116,7 @@ struct input_keymap_entry { /** * EVIOCGMTSLOTS(len) - get MT slot values + * @len: size of the data buffer in bytes * * The ioctl buffer argument should be binary equivalent to * -- cgit v1.2.3 From c2fb8a3fa25513de8fedb38509b1f15a5bbee47b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 13 Jun 2012 11:20:19 -0400 Subject: USB: add NO_D3_DURING_SLEEP flag and revert 151b61284776be2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch (as1558) fixes a problem affecting several ASUS computers: The machine crashes or corrupts memory when going into suspend if the ehci-hcd driver is bound to any controllers. Users have been forced to unbind or unload ehci-hcd before putting their systems to sleep. After extensive testing, it was determined that the machines don't like going into suspend when any EHCI controllers are in the PCI D3 power state. Presumably this is a firmware bug, but there's nothing we can do about it except to avoid putting the controllers in D3 during system sleep. The patch adds a new flag to indicate whether the problem is present, and avoids changing the controller's power state if the flag is set. Runtime suspend is unaffected; this matters only for system suspend. However as a side effect, the controller will not respond to remote wakeup requests while the system is asleep. Hence USB wakeup is not functional -- but of course, this is already true in the current state of affairs. A similar patch has already been applied as commit 151b61284776be2d6f02d48c23c3625678960b97 (USB: EHCI: fix crash during suspend on ASUS computers). The patch supersedes that one and reverts it. There are two differences: The old patch added the flag at the USB level; this patch adds it at the PCI level. The old patch applied to all chipsets with the same vendor, subsystem vendor, and product IDs; this patch makes an exception for a known-good system (based on DMI information). Signed-off-by: Alan Stern Tested-by: Dâniel Fraga Tested-by: Andrey Rahmatullin Tested-by: Steven Rostedt Cc: stable Reviewed-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- include/linux/pci.h | 2 ++ include/linux/usb/hcd.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index d8c379dba6ad..fefb4e19bf6a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -176,6 +176,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2, /* Provide indication device is assigned by a Virtual Machine Manager */ PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) 4, + /* Device causes system crash if in D3 during S3 sleep */ + PCI_DEV_FLAGS_NO_D3_DURING_SLEEP = (__force pci_dev_flags_t) 8, }; enum pci_irq_reroute_variant { diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 7f855d50cdf5..49b3ac29726a 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -126,8 +126,6 @@ struct usb_hcd { unsigned wireless:1; /* Wireless USB HCD */ unsigned authorized_default:1; unsigned has_tt:1; /* Integrated TT in root hub */ - unsigned broken_pci_sleep:1; /* Don't put the - controller in PCI-D3 for system sleep */ unsigned int irq; /* irq allocated */ void __iomem *regs; /* device memory/io */ -- cgit v1.2.3 From 201e4aca5aa179e6c69a4dcd36a3562e56b8d670 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sat, 26 May 2012 06:07:49 -0700 Subject: pstore/ram: Should update old dmesg buffer before reading Without the update, we'll only see the new dmesg buffer after the reboot, but previously we could see it right away. Making an oops visible in pstore filesystem before reboot is a somewhat dubious feature, but removing it wasn't an intentional change, so let's restore it. For this we have to make persistent_ram_save_old() safe for calling multiple times, and also extern it. Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- include/linux/pstore_ram.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 7ed7fd4dba49..4491e8ff36e6 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -75,6 +75,7 @@ struct persistent_ram_zone *persistent_ram_init_ringbuffer(struct device *dev, int persistent_ram_write(struct persistent_ram_zone *prz, const void *s, unsigned int count); +void persistent_ram_save_old(struct persistent_ram_zone *prz); size_t persistent_ram_old_size(struct persistent_ram_zone *prz); void *persistent_ram_old(struct persistent_ram_zone *prz); void persistent_ram_free_old(struct persistent_ram_zone *prz); -- cgit v1.2.3 From fce397930475f7efc712a1345dc0dad269a10544 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sat, 26 May 2012 06:07:51 -0700 Subject: pstore/ram_core: Factor persistent_ram_zap() out of post_init() A handy function that we will use outside of ram_core soon. But so far just factor it out and start using it in post_init(). Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- include/linux/pstore_ram.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 4491e8ff36e6..3b823d49a85a 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -69,6 +69,7 @@ struct persistent_ram_zone * __init persistent_ram_new(phys_addr_t start, size_t size, bool ecc); void persistent_ram_free(struct persistent_ram_zone *prz); +void persistent_ram_zap(struct persistent_ram_zone *prz); struct persistent_ram_zone *persistent_ram_init_ringbuffer(struct device *dev, bool ecc); -- cgit v1.2.3 From e2ae715d66bf4becfb85eb84b7150e23cf27df30 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Fri, 15 Jun 2012 14:07:51 +0200 Subject: kmsg - kmsg_dump() use iterator to receive log buffer content Provide an iterator to receive the log buffer content, and convert all kmsg_dump() users to it. The structured data in the kmsg buffer now contains binary data, which should no longer be copied verbatim to the kmsg_dump() users. The iterator should provide reliable access to the buffer data, and also supports proper log line-aware chunking of data while iterating. Signed-off-by: Kay Sievers Tested-by: Tony Luck Reported-by: Anton Vorontsov Tested-by: Anton Vorontsov Signed-off-by: Greg Kroah-Hartman --- include/linux/kmsg_dump.h | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 35f7237ec972..af4eb5a39d9a 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -21,6 +21,7 @@ * is passed to the kernel. */ enum kmsg_dump_reason { + KMSG_DUMP_UNDEF, KMSG_DUMP_PANIC, KMSG_DUMP_OOPS, KMSG_DUMP_EMERG, @@ -31,23 +32,37 @@ enum kmsg_dump_reason { /** * struct kmsg_dumper - kernel crash message dumper structure - * @dump: The callback which gets called on crashes. The buffer is passed - * as two sections, where s1 (length l1) contains the older - * messages and s2 (length l2) contains the newer. * @list: Entry in the dumper list (private) + * @dump: Call into dumping code which will retrieve the data with + * through the record iterator + * @max_reason: filter for highest reason number that should be dumped * @registered: Flag that specifies if this is already registered */ struct kmsg_dumper { - void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason, - const char *s1, unsigned long l1, - const char *s2, unsigned long l2); struct list_head list; - int registered; + void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason); + enum kmsg_dump_reason max_reason; + bool active; + bool registered; + + /* private state of the kmsg iterator */ + u32 cur_idx; + u32 next_idx; + u64 cur_seq; + u64 next_seq; }; #ifdef CONFIG_PRINTK void kmsg_dump(enum kmsg_dump_reason reason); +bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, + char *line, size_t size, size_t *len); + +bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, + char *buf, size_t size, size_t *len); + +void kmsg_dump_rewind(struct kmsg_dumper *dumper); + int kmsg_dump_register(struct kmsg_dumper *dumper); int kmsg_dump_unregister(struct kmsg_dumper *dumper); @@ -56,6 +71,22 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason) { } +bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, + const char *line, size_t size, size_t *len) +{ + return false; +} + +bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, + char *buf, size_t size, size_t *len) +{ + return false; +} + +void kmsg_dump_rewind(struct kmsg_dumper *dumper) +{ +} + static inline int kmsg_dump_register(struct kmsg_dumper *dumper) { return -EINVAL; -- cgit v1.2.3 From 62b1a8ab9b3660bb820d8dfe23148ed6cda38574 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Jun 2012 06:42:44 +0000 Subject: net: remove skb_orphan_try() Orphaning skb in dev_hard_start_xmit() makes bonding behavior unfriendly for applications sending big UDP bursts : Once packets pass the bonding device and come to real device, they might hit a full qdisc and be dropped. Without orphaning, the sender is automatically throttled because sk->sk_wmemalloc reaches sk->sk_sndbuf (assuming sk_sndbuf is not too big) We could try to defer the orphaning adding another test in dev_hard_start_xmit(), but all this seems of little gain, now that BQL tends to make packets more likely to be parked in Qdisc queues instead of NIC TX ring, in cases where performance matters. Reverts commits : fc6055a5ba31 net: Introduce skb_orphan_try() 87fd308cfc6b net: skb_tx_hash() fix relative to skb_orphan_try() and removes SKBTX_DRV_NEEDS_SK_REF flag Reported-and-bisected-by: Jean-Michel Hautbois Signed-off-by: Eric Dumazet Tested-by: Oliver Hartkopp Acked-by: Oliver Hartkopp Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b534a1be540a..642cb7355df3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -225,14 +225,11 @@ enum { /* device driver is going to provide hardware time stamp */ SKBTX_IN_PROGRESS = 1 << 2, - /* ensure the originating sk reference is available on driver level */ - SKBTX_DRV_NEEDS_SK_REF = 1 << 3, - /* device driver supports TX zero-copy buffers */ - SKBTX_DEV_ZEROCOPY = 1 << 4, + SKBTX_DEV_ZEROCOPY = 1 << 3, /* generate wifi status information (where possible) */ - SKBTX_WIFI_STATUS = 1 << 5, + SKBTX_WIFI_STATUS = 1 << 4, }; /* -- cgit v1.2.3 From 9b15b817f3d62409290fd56fe3cbb076a931bb0a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 15 Jun 2012 17:55:50 -0700 Subject: swap: fix shmem swapping when more than 8 areas Minchan Kim reports that when a system has many swap areas, and tmpfs swaps out to the ninth or more, shmem_getpage_gfp()'s attempts to read back the page cannot locate it, and the read fails with -ENOMEM. Whoops. Yes, I blindly followed read_swap_header()'s pte_to_swp_entry( swp_entry_to_pte()) technique for determining maximum usable swap offset, without stopping to realize that that actually depends upon the pte swap encoding shifting swap offset to the higher bits and truncating it there. Whereas our radix_tree swap encoding leaves offset in the lower bits: it's swap "type" (that is, index of swap area) that was truncated. Fix it by reducing the SWP_TYPE_SHIFT() in swapops.h, and removing the broken radix_to_swp_entry(swp_to_radix_entry()) from read_swap_header(). This does not reduce the usable size of a swap area any further, it leaves it as claimed when making the original commit: no change from 3.0 on x86_64, nor on i386 without PAE; but 3.0's 512GB is reduced to 128GB per swapfile on i386 with PAE. It's not a change I would have risked five years ago, but with x86_64 supported for ten years, I believe it's appropriate now. Hmm, and what if some architecture implements its swap pte with offset encoded below type? That would equally break the maximum usable swap offset check. Happily, they all follow the same tradition of encoding offset above type, but I'll prepare a check on that for next. Reported-and-Reviewed-and-Tested-by: Minchan Kim Signed-off-by: Hugh Dickins Cc: stable@vger.kernel.org [3.1, 3.2, 3.3, 3.4] Signed-off-by: Linus Torvalds --- include/linux/swapops.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 792d16d9cbc7..47ead515c811 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -9,13 +9,15 @@ * get good packing density in that tree, so the index should be dense in * the low-order bits. * - * We arrange the `type' and `offset' fields so that `type' is at the five + * We arrange the `type' and `offset' fields so that `type' is at the seven * high-order bits of the swp_entry_t and `offset' is right-aligned in the - * remaining bits. + * remaining bits. Although `type' itself needs only five bits, we allow for + * shmem/tmpfs to shift it all up a further two bits: see swp_to_radix_entry(). * * swp_entry_t's are *never* stored anywhere in their arch-dependent format. */ -#define SWP_TYPE_SHIFT(e) (sizeof(e.val) * 8 - MAX_SWAPFILES_SHIFT) +#define SWP_TYPE_SHIFT(e) ((sizeof(e.val) * 8) - \ + (MAX_SWAPFILES_SHIFT + RADIX_TREE_EXCEPTIONAL_SHIFT)) #define SWP_OFFSET_MASK(e) ((1UL << SWP_TYPE_SHIFT(e)) - 1) /* -- cgit v1.2.3 From f8fee8f5acb5c3f82e02f2ae139a6f1e7b4eb583 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 15 Jun 2012 12:46:17 -0700 Subject: vga_switcheroo.h: fix pci_dev warning Fix warnings on some architectures/configs (not on x86): include/linux/vga_switcheroo.h:28:30: warning: 'struct pci_dev' declared inside parameter list [enabled by default] include/linux/vga_switcheroo.h:28:30: warning: its scope is only this definition or declaration, which is probably not what you want [enabled by default] Signed-off-by: Randy Dunlap Cc: Takashi Iwai Reported-by: Geert Uytterhoeven Signed-off-by: Dave Airlie --- include/linux/vga_switcheroo.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index 60da41fe9dc2..d844b7790ea6 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -9,6 +9,8 @@ #include +struct pci_dev; + enum vga_switcheroo_state { VGA_SWITCHEROO_OFF, VGA_SWITCHEROO_ON, -- cgit v1.2.3 From 2a4c8994eeef50796015f8a2005e4a75c1929166 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Jun 2012 13:08:38 -0400 Subject: NFSv4.1: Fix umount when filelayout DS is also the MDS Currently there is a 'chicken and egg' issue when the DS is also the mounted MDS. The nfs_match_client() reference from nfs4_set_ds_client bumps the cl_count, the nfs_client is not freed at umount, and nfs4_deviceid_purge_client is not called to dereference the MDS usage of a deviceid which holds a reference to the DS nfs_client. The result is the umount program returns, but the nfs_client is not freed, and the cl_session hearbeat continues. The MDS (and all other nfs mounts) lose their last nfs_client reference in nfs_free_server when the last nfs_server (fsid) is umounted. The file layout DS lose their last nfs_client reference in destroy_ds when the last deviceid referencing the data server is put and destroy_ds is called. This is triggered by a call to nfs4_deviceid_purge_client which removes references to a pNFS deviceid used by an MDS mount. The fix is to track how many pnfs enabled filesystems are mounted from this server, and then to purge the device id cache once that count reaches zero. Reported-by: Jorge Mora Reported-by: Andy Adamson Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index fbb78fb09bd2..f58325a1d8fb 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -25,6 +25,7 @@ struct nfs41_impl_id; */ struct nfs_client { atomic_t cl_count; + atomic_t cl_mds_count; int cl_cons_state; /* current construction state (-ve: init error) */ #define NFS_CS_READY 0 /* ready to be used */ #define NFS_CS_INITING 1 /* busy initialising */ -- cgit v1.2.3 From 93b3cca1ccd30b1ad290951a3fc7c10c73db7313 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 14 Jun 2012 10:54:28 -0400 Subject: ftrace: Make all inline tags also include notrace Commit 5963e317b1e9d2a ("ftrace/x86: Do not change stacks in DEBUG when calling lockdep") prevented lockdep calls from the int3 breakpoint handler from reseting the stack if a function that was called was in the process of being converted for tracing and had a breakpoint on it. The idea is, before calling the lockdep code, do a load_idt() to the special IDT that kept the breakpoint stack from reseting. This worked well as a quick fix for this kernel release, until a certain config caused a lockup in the function tracer start up tests. Investigating it, I found that the load_idt that was used to prevent the int3 from changing stacks was itself being traced! Even though the config had CONFIG_OPTIMIZE_INLINING disabled, and all 'inline' tags were set to always inline, there were still cases that it did not inline! This was caused by CONFIG_PARAVIRT_GUEST, where it would add a pointer to the native_load_idt() which made that function to be traced. Commit 45959ee7aa645815a ("ftrace: Do not function trace inlined functions") only touched the 'inline' tags when CONFIG_OPMITIZE_INLINING was enabled. PARAVIRT_GUEST shows that this was not enough and we need to also mark always_inline with notrace as well. Reported-by: Fengguang Wu Tested-by: Fengguang Wu Signed-off-by: Steven Rostedt --- include/linux/compiler-gcc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index e5834aa24b9e..6a6d7aefe12d 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -47,9 +47,9 @@ */ #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4) -# define inline inline __attribute__((always_inline)) -# define __inline__ __inline__ __attribute__((always_inline)) -# define __inline __inline __attribute__((always_inline)) +# define inline inline __attribute__((always_inline)) notrace +# define __inline__ __inline__ __attribute__((always_inline)) notrace +# define __inline __inline __attribute__((always_inline)) notrace #else /* A lot of inline functions can cause havoc with function tracing */ # define inline inline notrace -- cgit v1.2.3 From 246f6f2ff2c5cf46ded6d06f11f63e38bad880d1 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Tue, 19 Jun 2012 00:32:57 +0200 Subject: kmsg - kmsg_dump() fix CONFIG_PRINTK=n compilation Signed-off-by: Kay Sievers Cc: Stephen Rothwell Reported-by: Randy Dunlap Reported-by: Fengguang Wu Signed-off-by: Greg Kroah-Hartman --- include/linux/kmsg_dump.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index af4eb5a39d9a..d6bd50110ec2 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -71,19 +71,19 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason) { } -bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, - const char *line, size_t size, size_t *len) +static inline bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, + const char *line, size_t size, size_t *len) { return false; } -bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, - char *buf, size_t size, size_t *len) +static inline bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, + char *buf, size_t size, size_t *len) { return false; } -void kmsg_dump_rewind(struct kmsg_dumper *dumper) +static inline void kmsg_dump_rewind(struct kmsg_dumper *dumper) { } -- cgit v1.2.3 From abca7c4965845924f65d40e0aa1092bdd895e314 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Wed, 20 Jun 2012 12:52:56 -0700 Subject: mm: fix slab->page _count corruption when using slub On arches that do not support this_cpu_cmpxchg_double() slab_lock is used to do atomic cmpxchg() on double word which contains page->_count. The page count can be changed from get_page() or put_page() without taking slab_lock. That corrupts page counter. Fix it by moving page->_count out of cmpxchg_double data. So that slub does no change it while updating slub meta-data in struct page. [akpm@linux-foundation.org: use standard comment layout, tweak comment text] Reported-by: Amey Bhide Signed-off-by: Pravin B Shelar Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index dad95bdd06d7..704a626d94a0 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -57,8 +57,18 @@ struct page { }; union { +#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ + defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) /* Used for cmpxchg_double in slub */ unsigned long counters; +#else + /* + * Keep _count separate from slub cmpxchg_double data. + * As the rest of the double word is protected by + * slab_lock but _count is not. + */ + unsigned counters; +#endif struct { -- cgit v1.2.3 From 10d8935f46e5028847b179757ecbf9238b13d129 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 20 Jun 2012 12:53:02 -0700 Subject: Viresh has moved viresh.kumar@st.com email-id doesn't exist anymore as I have left the company. Replace ST's id with viresh.linux@gmail.com. It also updates .mailmap file to fix address for 'git shortlog' Signed-off-by: Viresh Kumar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmc/sdhci-spear.h | 2 +- include/linux/pata_arasan_cf_data.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/sdhci-spear.h b/include/linux/mmc/sdhci-spear.h index 5cdc96da9dd5..e78c0e236e9d 100644 --- a/include/linux/mmc/sdhci-spear.h +++ b/include/linux/mmc/sdhci-spear.h @@ -4,7 +4,7 @@ * SDHCI declarations specific to ST SPEAr platform * * Copyright (C) 2010 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/include/linux/pata_arasan_cf_data.h b/include/linux/pata_arasan_cf_data.h index a6ee9aa898bb..a7b4fc386e63 100644 --- a/include/linux/pata_arasan_cf_data.h +++ b/include/linux/pata_arasan_cf_data.h @@ -4,7 +4,7 @@ * Arasan Compact Flash host controller platform data header file * * Copyright (C) 2011 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any -- cgit v1.2.3 From d3decf3a0c1d28c80c76be170373f4c7a7217f09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ozan=20=C3=87a=C4=9Flayan?= Date: Thu, 14 Jun 2012 15:02:35 +0300 Subject: vga_switcheroo: Add include guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Guard vga_switcheroo.h against multiple inclusion. Signed-off-by: Ozan Çağlayan Signed-off-by: Dave Airlie --- include/linux/vga_switcheroo.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index d844b7790ea6..ddb419cf4530 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -7,6 +7,9 @@ * vga_switcheroo.h - Support for laptop with dual GPU using one set of outputs */ +#ifndef _LINUX_VGA_SWITCHEROO_H_ +#define _LINUX_VGA_SWITCHEROO_H_ + #include struct pci_dev; @@ -73,3 +76,4 @@ static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return #endif +#endif /* _LINUX_VGA_SWITCHEROO_H_ */ -- cgit v1.2.3 From 87fac288083db40b5d5ab845393be268357c8827 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 30 Jun 2012 15:30:46 -0700 Subject: linux/irq.h: fix kernel-doc warning Fix kernel-doc warning. This struct member was removed in commit 875682648b89 ("irq: Remove irq_chip->release()") so remove its associated kernel-doc entry also. Warning(include/linux/irq.h:338): Excess struct/union/enum/typedef member 'release' description in 'irq_chip' Signed-off-by: Randy Dunlap Cc: Richard Weinberger Cc: Thomas Gleixner Signed-off-by: Linus Torvalds --- include/linux/irq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 61f5cec031e0..a5261e3d2e3c 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -301,8 +301,6 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @irq_pm_shutdown: function called from core code on shutdown once per chip * @irq_print_chip: optional to print special chip info in show_interrupts * @flags: chip specific flags - * - * @release: release function solely used by UML */ struct irq_chip { const char *name; -- cgit v1.2.3 From cba6d0d64ee53772b285d0c0c288deefbeaf7775 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 2 Jul 2012 07:08:42 -0700 Subject: Revert "rcu: Move PREEMPT_RCU preemption to switch_to() invocation" This reverts commit 616c310e83b872024271c915c1b9ab505b9efad9. (Move PREEMPT_RCU preemption to switch_to() invocation). Testing by Sasha Levin showed that this can result in deadlock due to invoking the scheduler when one of the runqueue locks is held. Because this commit was simply a performance optimization, revert it. Reported-by: Sasha Levin Signed-off-by: Paul E. McKenney Tested-by: Sasha Levin --- include/linux/rcupdate.h | 1 - include/linux/rcutiny.h | 6 ++++++ include/linux/sched.h | 10 ---------- 3 files changed, 6 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 26d1a47591f1..9cac722b169c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -184,7 +184,6 @@ static inline int rcu_preempt_depth(void) /* Internal to kernel */ extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); -extern void rcu_preempt_note_context_switch(void); extern void rcu_check_callbacks(int cpu, int user); struct notifier_block; extern void rcu_idle_enter(void); diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 854dc4c5c271..4e56a9c69a35 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -87,6 +87,10 @@ static inline void kfree_call_rcu(struct rcu_head *head, #ifdef CONFIG_TINY_RCU +static inline void rcu_preempt_note_context_switch(void) +{ +} + static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { *delta_jiffies = ULONG_MAX; @@ -95,6 +99,7 @@ static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) #else /* #ifdef CONFIG_TINY_RCU */ +void rcu_preempt_note_context_switch(void); int rcu_preempt_needs_cpu(void); static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) @@ -108,6 +113,7 @@ static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) static inline void rcu_note_context_switch(int cpu) { rcu_sched_qs(cpu); + rcu_preempt_note_context_switch(); } /* diff --git a/include/linux/sched.h b/include/linux/sched.h index 4059c0f33f07..06a4c5f4f55c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1871,22 +1871,12 @@ static inline void rcu_copy_process(struct task_struct *p) INIT_LIST_HEAD(&p->rcu_node_entry); } -static inline void rcu_switch_from(struct task_struct *prev) -{ - if (prev->rcu_read_lock_nesting != 0) - rcu_preempt_note_context_switch(); -} - #else static inline void rcu_copy_process(struct task_struct *p) { } -static inline void rcu_switch_from(struct task_struct *prev) -{ -} - #endif #ifdef CONFIG_SMP -- cgit v1.2.3 From d4db2935e4fffeba42540b0dc9d85e3036701221 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 29 Jun 2012 09:56:08 -0600 Subject: KVM: Pass kvm_irqfd to functions Prune this down to just the struct kvm_irqfd so we can avoid changing function definition for every flag or field we use. Signed-off-by: Alex Williamson Acked-by: Cornelia Huck Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c4464356b35b..96c158a37d3e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -815,7 +815,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {} #ifdef CONFIG_HAVE_KVM_EVENTFD void kvm_eventfd_init(struct kvm *kvm); -int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags); +int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); void kvm_irqfd_release(struct kvm *kvm); void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *); int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); @@ -824,7 +824,7 @@ int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); static inline void kvm_eventfd_init(struct kvm *kvm) {} -static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) +static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) { return -EINVAL; } -- cgit v1.2.3 From 5a081caa0414b9bbb82c17ffab9d6fe66edbb72f Mon Sep 17 00:00:00 2001 From: Ohad Ben-Cohen Date: Wed, 6 Jun 2012 10:09:25 +0300 Subject: rpmsg: avoid premature deallocation of endpoints When an inbound message arrives, the rpmsg core looks up its associated endpoint and invokes the registered callback. If a message arrives while its endpoint is being removed (because the rpmsg driver was removed, or a recovery of a remote processor has kicked in) we must ensure atomicity, i.e.: - Either the ept is removed before it is found or - The ept is found but will not be freed until the callback returns This is achieved by maintaining a per-ept reference count, which, when drops to zero, will trigger deallocation of the ept. With this in hand, it is now forbidden to directly deallocate epts once they have been added to the endpoints idr. Cc: stable Reported-by: Fernando Guzman Lugo Signed-off-by: Ohad Ben-Cohen --- include/linux/rpmsg.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index a8e50e44203c..195f373590b8 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -38,6 +38,7 @@ #include #include #include +#include /* The feature bitmap for virtio rpmsg */ #define VIRTIO_RPMSG_F_NS 0 /* RP supports name service notifications */ @@ -120,6 +121,7 @@ typedef void (*rpmsg_rx_cb_t)(struct rpmsg_channel *, void *, int, void *, u32); /** * struct rpmsg_endpoint - binds a local rpmsg address to its user * @rpdev: rpmsg channel device + * @refcount: when this drops to zero, the ept is deallocated * @cb: rx callback handler * @addr: local rpmsg address * @priv: private data for the driver's use @@ -140,6 +142,7 @@ typedef void (*rpmsg_rx_cb_t)(struct rpmsg_channel *, void *, int, void *, u32); */ struct rpmsg_endpoint { struct rpmsg_channel *rpdev; + struct kref refcount; rpmsg_rx_cb_t cb; u32 addr; void *priv; -- cgit v1.2.3 From 15fd943af50dbc5f7f4de33835795c72595f7bf4 Mon Sep 17 00:00:00 2001 From: Ohad Ben-Cohen Date: Thu, 7 Jun 2012 15:39:35 +0300 Subject: rpmsg: make sure inflight messages don't invoke just-removed callbacks When inbound messages arrive, rpmsg core looks up their associated endpoint (by destination address) and then invokes their callback. We've made sure that endpoints will never be de-allocated after they were found by rpmsg core, but we also need to protect against the (rare) scenario where the rpmsg driver was just removed, and its callback function isn't available anymore. This is achieved by introducing a callback mutex, which must be taken before the callback is invoked, and, obviously, before it is removed. Cc: stable Reported-by: Fernando Guzman Lugo Signed-off-by: Ohad Ben-Cohen --- include/linux/rpmsg.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 195f373590b8..82a673905edb 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -39,6 +39,7 @@ #include #include #include +#include /* The feature bitmap for virtio rpmsg */ #define VIRTIO_RPMSG_F_NS 0 /* RP supports name service notifications */ @@ -123,6 +124,7 @@ typedef void (*rpmsg_rx_cb_t)(struct rpmsg_channel *, void *, int, void *, u32); * @rpdev: rpmsg channel device * @refcount: when this drops to zero, the ept is deallocated * @cb: rx callback handler + * @cb_lock: must be taken before accessing/changing @cb * @addr: local rpmsg address * @priv: private data for the driver's use * @@ -144,6 +146,7 @@ struct rpmsg_endpoint { struct rpmsg_channel *rpdev; struct kref refcount; rpmsg_rx_cb_t cb; + struct mutex cb_lock; u32 addr; void *priv; }; -- cgit v1.2.3 From 2dfd06036ba7ae8e7be2daf5a2fff1dac42390bf Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Wed, 27 Jun 2012 17:09:54 +0800 Subject: aio: make kiocb->private NUll in init_sync_kiocb() Ocfs2 uses kiocb.*private as a flag of unsigned long size. In commit a11f7e6 ocfs2: serialize unaligned aio, the unaligned io flag is involved in it to serialize the unaligned aio. As *private is not initialized in init_sync_kiocb() of do_sync_write(), this unaligned io flag may be unexpectly set in an aligned dio. And this will cause OCFS2_I(inode)->ip_unaligned_aio decreased to -1 in ocfs2_dio_end_io(), thus the following unaligned dio will hang forever at ocfs2_aiodio_wait() in ocfs2_file_aio_write(). Signed-off-by: Junxiao Bi Cc: stable@vger.kernel.org Acked-by: Jeff Moyer Signed-off-by: Joel Becker --- include/linux/aio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/aio.h b/include/linux/aio.h index 2314ad8b3c9c..b1a520ec8b59 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -140,6 +140,7 @@ struct kiocb { (x)->ki_dtor = NULL; \ (x)->ki_obj.tsk = tsk; \ (x)->ki_user_data = 0; \ + (x)->private = NULL; \ } while (0) #define AIO_RING_MAGIC 0xa10a10a1 -- cgit v1.2.3 From f567fde24640cf6f2d6416196bfc8b3fefc8e433 Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Wed, 20 Jun 2012 14:14:05 +0530 Subject: gpio: fix bits conflict for gpio flags The bit 2 and 3 in GPIO flag are allocated for the flag OPEN_DRAIN/OPEN_SOURCE. These bits are reused for the flag EXPORT/EXPORT_CHANGEABLE and so creating conflict. Fix this conflict by assigning bit 4 and 5 for the flag EXPORT/EXPORT_CHANGEABLE. Signed-off-by: Laxman Dewangan Signed-off-by: Linus Walleij --- include/linux/gpio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index f07fc2d08159..2e31e8b3a190 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -22,8 +22,8 @@ /* Gpio pin is open source */ #define GPIOF_OPEN_SOURCE (1 << 3) -#define GPIOF_EXPORT (1 << 2) -#define GPIOF_EXPORT_CHANGEABLE (1 << 3) +#define GPIOF_EXPORT (1 << 4) +#define GPIOF_EXPORT_CHANGEABLE (1 << 5) #define GPIOF_EXPORT_DIR_FIXED (GPIOF_EXPORT) #define GPIOF_EXPORT_DIR_CHANGEABLE (GPIOF_EXPORT | GPIOF_EXPORT_CHANGEABLE) -- cgit v1.2.3 From 5167e8d5417bf5c322a703d2927daec727ea40dd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jun 2012 15:52:09 +0200 Subject: sched/nohz: Rewrite and fix load-avg computation -- again Thanks to Charles Wang for spotting the defects in the current code: - If we go idle during the sample window -- after sampling, we get a negative bias because we can negate our own sample. - If we wake up during the sample window we get a positive bias because we push the sample to a known active period. So rewrite the entire nohz load-avg muck once again, now adding copious documentation to the code. Reported-and-tested-by: Doug Smythies Reported-and-tested-by: Charles Wang Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: stable@kernel.org Link: http://lkml.kernel.org/r/1340373782.18025.74.camel@twins [ minor edits ] Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4059c0f33f07..20cb7497c59c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1909,6 +1909,14 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, } #endif +#ifdef CONFIG_NO_HZ +void calc_load_enter_idle(void); +void calc_load_exit_idle(void); +#else +static inline void calc_load_enter_idle(void) { } +static inline void calc_load_exit_idle(void) { } +#endif /* CONFIG_NO_HZ */ + #ifndef CONFIG_CPUMASK_OFFSTACK static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) { -- cgit v1.2.3 From c540521bba5d2f24bd2c0417157bfaf8b85e2eee Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 5 Jul 2012 11:23:24 -0700 Subject: security: Minor improvements to no_new_privs documentation The documentation didn't actually mention how to enable no_new_privs. This also adds a note about possible interactions between no_new_privs and LSMs (i.e. why teaching systemd to set no_new_privs is not necessarily a good idea), and it references the new docs from include/linux/prctl.h. Suggested-by: Rob Landley Signed-off-by: Andy Lutomirski Acked-by: Kees Cook Signed-off-by: James Morris --- include/linux/prctl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 3988012255dc..289760f424aa 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -141,6 +141,8 @@ * Changing LSM security domain is considered a new privilege. So, for example, * asking selinux for a specific new context (e.g. with runcon) will result * in execve returning -EPERM. + * + * See Documentation/prctl/no_new_privs.txt for more details. */ #define PR_SET_NO_NEW_PRIVS 38 #define PR_GET_NO_NEW_PRIVS 39 -- cgit v1.2.3 From dbf0e4c7257f8d684ec1a3c919853464293de66e Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 9 Jul 2012 11:09:21 -0400 Subject: PCI: EHCI: fix crash during suspend on ASUS computers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Quite a few ASUS computers experience a nasty problem, related to the EHCI controllers, when going into system suspend. It was observed that the problem didn't occur if the controllers were not put into the D3 power state before starting the suspend, and commit 151b61284776be2d6f02d48c23c3625678960b97 (USB: EHCI: fix crash during suspend on ASUS computers) was created to do this. It turned out this approach messed up other computers that didn't have the problem -- it prevented USB wakeup from working. Consequently commit c2fb8a3fa25513de8fedb38509b1f15a5bbee47b (USB: add NO_D3_DURING_SLEEP flag and revert 151b61284776be2) was merged; it reverted the earlier commit and added a whitelist of known good board names. Now we know the actual cause of the problem. Thanks to AceLan Kao for tracking it down. According to him, an engineer at ASUS explained that some of their BIOSes contain a bug that was added in an attempt to work around a problem in early versions of Windows. When the computer goes into S3 suspend, the BIOS tries to verify that the EHCI controllers were first quiesced by the OS. Nothing's wrong with this, but the BIOS does it by checking that the PCI COMMAND registers contain 0 without checking the controllers' power state. If the register isn't 0, the BIOS assumes the controller needs to be quiesced and tries to do so. This involves making various MMIO accesses to the controller, which don't work very well if the controller is already in D3. The end result is a system hang or memory corruption. Since the value in the PCI COMMAND register doesn't matter once the controller has been suspended, and since the value will be restored anyway when the controller is resumed, we can work around the BIOS bug simply by setting the register to 0 during system suspend. This patch (as1590) does so and also reverts the second commit mentioned above, which is now unnecessary. In theory we could do this for every PCI device. However to avoid introducing new problems, the patch restricts itself to EHCI host controllers. Finally the affected systems can suspend with USB wakeup working properly. Reference: https://bugzilla.kernel.org/show_bug.cgi?id=37632 Reference: https://bugzilla.kernel.org/show_bug.cgi?id=42728 Based-on-patch-by: AceLan Kao Signed-off-by: Alan Stern Tested-by: Dâniel Fraga Tested-by: Javier Marcet Tested-by: Andrey Rahmatullin Tested-by: Oleksij Rempel Tested-by: Pavel Pisa Cc: stable Acked-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- include/linux/pci.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index fefb4e19bf6a..d8c379dba6ad 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -176,8 +176,6 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2, /* Provide indication device is assigned by a Virtual Machine Manager */ PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) 4, - /* Device causes system crash if in D3 during S3 sleep */ - PCI_DEV_FLAGS_NO_D3_DURING_SLEEP = (__force pci_dev_flags_t) 8, }; enum pci_irq_reroute_variant { -- cgit v1.2.3 From f55a6faa384304c89cfef162768e88374d3312cb Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 10 Jul 2012 18:43:19 -0400 Subject: hrtimer: Provide clock_was_set_delayed() clock_was_set() cannot be called from hard interrupt context because it calls on_each_cpu(). For fixing the widely reported leap seconds issue it is necessary to call it from hard interrupt context, i.e. the timer tick code, which does the timekeeping updates. Provide a new function which denotes it in the hrtimer cpu base structure of the cpu on which it is called and raise the hrtimer softirq. We then execute the clock_was_set() notificiation from softirq context in run_hrtimer_softirq(). The hrtimer softirq is rarely used, so polling the flag there is not a performance issue. [ tglx: Made it depend on CONFIG_HIGH_RES_TIMERS. We really should get rid of all this ifdeffery ASAP ] Signed-off-by: John Stultz Reported-by: Jan Engelhardt Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra Acked-by: Prarit Bhargava Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1341960205-56738-2-git-send-email-johnstul@us.ibm.com Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index fd0dc30c9f15..c9ec9400ee5b 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -165,6 +165,7 @@ enum hrtimer_base_type { * @lock: lock protecting the base and associated clock bases * and timers * @active_bases: Bitfield to mark bases with active timers + * @clock_was_set: Indicates that clock was set from irq context. * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @hres_active: State of high resolution mode @@ -177,7 +178,8 @@ enum hrtimer_base_type { */ struct hrtimer_cpu_base { raw_spinlock_t lock; - unsigned long active_bases; + unsigned int active_bases; + unsigned int clock_was_set; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; int hres_active; @@ -286,6 +288,8 @@ extern void hrtimer_peek_ahead_timers(void); # define MONOTONIC_RES_NSEC HIGH_RES_NSEC # define KTIME_MONOTONIC_RES KTIME_HIGH_RES +extern void clock_was_set_delayed(void); + #else # define MONOTONIC_RES_NSEC LOW_RES_NSEC @@ -306,6 +310,9 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) { return 0; } + +static inline void clock_was_set_delayed(void) { } + #endif extern void clock_was_set(void); -- cgit v1.2.3 From f6c06abfb3972ad4914cef57d8348fcb2932bc3b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 10 Jul 2012 18:43:24 -0400 Subject: timekeeping: Provide hrtimer update function To finally fix the infamous leap second issue and other race windows caused by functions which change the offsets between the various time bases (CLOCK_MONOTONIC, CLOCK_REALTIME and CLOCK_BOOTTIME) we need a function which atomically gets the current monotonic time and updates the offsets of CLOCK_REALTIME and CLOCK_BOOTTIME with minimalistic overhead. The previous patch which provides ktime_t offsets allows us to make this function almost as cheap as ktime_get() which is going to be replaced in hrtimer_interrupt(). Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra Acked-by: Prarit Bhargava Cc: stable@vger.kernel.org Signed-off-by: John Stultz Link: http://lkml.kernel.org/r/1341960205-56738-7-git-send-email-johnstul@us.ibm.com Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index c9ec9400ee5b..cc07d2777bbe 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -327,6 +327,7 @@ extern ktime_t ktime_get(void); extern ktime_t ktime_get_real(void); extern ktime_t ktime_get_boottime(void); extern ktime_t ktime_get_monotonic_offset(void); +extern ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot); DECLARE_PER_CPU(struct tick_device, tick_cpu_device); -- cgit v1.2.3 From d8adde17e5f858427504725218c56aef90e90fc7 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 11 Jul 2012 14:01:52 -0700 Subject: memory hotplug: fix invalid memory access caused by stale kswapd pointer kswapd_stop() is called to destroy the kswapd work thread when all memory of a NUMA node has been offlined. But kswapd_stop() only terminates the work thread without resetting NODE_DATA(nid)->kswapd to NULL. The stale pointer will prevent kswapd_run() from creating a new work thread when adding memory to the memory-less NUMA node again. Eventually the stale pointer may cause invalid memory access. An example stack dump as below. It's reproduced with 2.6.32, but latest kernel has the same issue. BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] exit_creds+0x12/0x78 PGD 0 Oops: 0000 [#1] SMP last sysfs file: /sys/devices/system/memory/memory391/state CPU 11 Modules linked in: cpufreq_conservative cpufreq_userspace cpufreq_powersave acpi_cpufreq microcode fuse loop dm_mod tpm_tis rtc_cmos i2c_i801 rtc_core tpm serio_raw pcspkr sg tpm_bios igb i2c_core iTCO_wdt rtc_lib mptctl iTCO_vendor_support button dca bnx2 usbhid hid uhci_hcd ehci_hcd usbcore sd_mod crc_t10dif edd ext3 mbcache jbd fan ide_pci_generic ide_core ata_generic ata_piix libata thermal processor thermal_sys hwmon mptsas mptscsih mptbase scsi_transport_sas scsi_mod Pid: 7949, comm: sh Not tainted 2.6.32.12-qiuxishi-5-default #92 Tecal RH2285 RIP: 0010:exit_creds+0x12/0x78 RSP: 0018:ffff8806044f1d78 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffff880604f22140 RCX: 0000000000019502 RDX: 0000000000000000 RSI: 0000000000000202 RDI: 0000000000000000 RBP: ffff880604f22150 R08: 0000000000000000 R09: ffffffff81a4dc10 R10: 00000000000032a0 R11: ffff880006202500 R12: 0000000000000000 R13: 0000000000c40000 R14: 0000000000008000 R15: 0000000000000001 FS: 00007fbc03d066f0(0000) GS:ffff8800282e0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000000 CR3: 000000060f029000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process sh (pid: 7949, threadinfo ffff8806044f0000, task ffff880603d7c600) Stack: ffff880604f22140 ffffffff8103aac5 ffff880604f22140 ffffffff8104d21e ffff880006202500 0000000000008000 0000000000c38000 ffffffff810bd5b1 0000000000000000 ffff880603d7c600 00000000ffffdd29 0000000000000003 Call Trace: __put_task_struct+0x5d/0x97 kthread_stop+0x50/0x58 offline_pages+0x324/0x3da memory_block_change_state+0x179/0x1db store_mem_state+0x9e/0xbb sysfs_write_file+0xd0/0x107 vfs_write+0xad/0x169 sys_write+0x45/0x6e system_call_fastpath+0x16/0x1b Code: ff 4d 00 0f 94 c0 84 c0 74 08 48 89 ef e8 1f fd ff ff 5b 5d 31 c0 41 5c c3 53 48 8b 87 20 06 00 00 48 89 fb 48 8b bf 18 06 00 00 <8b> 00 48 c7 83 18 06 00 00 00 00 00 00 f0 ff 0f 0f 94 c0 84 c0 RIP exit_creds+0x12/0x78 RSP CR2: 0000000000000000 [akpm@linux-foundation.org: add pglist_data.kswapd locking comments] Signed-off-by: Xishi Qiu Signed-off-by: Jiang Liu Acked-by: KAMEZAWA Hiroyuki Acked-by: KOSAKI Motohiro Acked-by: Mel Gorman Acked-by: David Rientjes Reviewed-by: Minchan Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 2427706f78b4..68c569fcbb66 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -694,7 +694,7 @@ typedef struct pglist_data { range, including holes */ int node_id; wait_queue_head_t kswapd_wait; - struct task_struct *kswapd; + struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */ int kswapd_max_order; enum zone_type classzone_idx; } pg_data_t; -- cgit v1.2.3 From 99ab7b19440a72ebdf225f99b20f8ef40decee86 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 11 Jul 2012 14:02:53 -0700 Subject: mm: sparse: fix usemap allocation above node descriptor section After commit f5bf18fa22f8 ("bootmem/sparsemem: remove limit constraint in alloc_bootmem_section"), usemap allocations may easily be placed outside the optimal section that holds the node descriptor, even if there is space available in that section. This results in unnecessary hotplug dependencies that need to have the node unplugged before the section holding the usemap. The reason is that the bootmem allocator doesn't guarantee a linear search starting from the passed allocation goal but may start out at a much higher address absent an upper limit. Fix this by trying the allocation with the limit at the section end, then retry without if that fails. This keeps the fix from f5bf18fa22f8 of not panicking if the allocation does not fit in the section, but still makes sure to try to stay within the section at first. Signed-off-by: Yinghai Lu Signed-off-by: Johannes Weiner Cc: [3.3.x, 3.4.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 324fe08ea3b1..6d6795d46a75 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -91,6 +91,11 @@ extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); +void *___alloc_bootmem_node_nopanic(pg_data_t *pgdat, + unsigned long size, + unsigned long align, + unsigned long goal, + unsigned long limit); extern void *__alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal); -- cgit v1.2.3 From 29f6738609e40227dabcc63bfb3b84b3726a75bd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 11 Jul 2012 14:02:56 -0700 Subject: memblock: free allocated memblock_reserved_regions later memblock_free_reserved_regions() calls memblock_free(), but memblock_free() would double reserved.regions too, so we could free the old range for reserved.regions. Also tj said there is another bug which could be related to this. | I don't think we're saving any noticeable | amount by doing this "free - give it to page allocator - reserve | again" dancing. We should just allocate regions aligned to page | boundaries and free them later when memblock is no longer in use. in that case, when DEBUG_PAGEALLOC, will get panic: memblock_free: [0x0000102febc080-0x0000102febf080] memblock_free_reserved_regions+0x37/0x39 BUG: unable to handle kernel paging request at ffff88102febd948 IP: [] __next_free_mem_range+0x9b/0x155 PGD 4826063 PUD cf67a067 PMD cf7fa067 PTE 800000102febd160 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC CPU 0 Pid: 0, comm: swapper Not tainted 3.5.0-rc2-next-20120614-sasha #447 RIP: 0010:[] [] __next_free_mem_range+0x9b/0x155 See the discussion at https://lkml.org/lkml/2012/6/13/469 So try to allocate with PAGE_SIZE alignment and free it later. Reported-by: Sasha Levin Acked-by: Tejun Heo Cc: Benjamin Herrenschmidt Signed-off-by: Yinghai Lu Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index a6bb10235148..19dc455b4f3d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -50,9 +50,7 @@ phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align, int nid); phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align); -int memblock_free_reserved_regions(void); -int memblock_reserve_reserved_regions(void); - +phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr); void memblock_allow_resize(void); int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid); int memblock_add(phys_addr_t base, phys_addr_t size); -- cgit v1.2.3 From d9914cf66181b8aa0929775f5c6f675c6ebc3eb5 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Tue, 17 Jul 2012 21:37:27 +0200 Subject: PM: Rename CAP_EPOLLWAKEUP to CAP_BLOCK_SUSPEND As discussed in http://thread.gmane.org/gmane.linux.kernel/1249726/focus=1288990, the capability introduced in 4d7e30d98939a0340022ccd49325a3d70f7e0238 to govern EPOLLWAKEUP seems misnamed: this capability is about governing the ability to suspend the system, not using a particular API flag (EPOLLWAKEUP). We should make the name of the capability more general to encourage reuse in related cases. (Whether or not this capability should also be used to govern the use of /sys/power/wake_lock is a question that needs to be separately resolved.) This patch renames the capability to CAP_BLOCK_SUSPEND. In order to ensure that the old capability name doesn't make it out into the wild, could you please apply and push up the tree to ensure that it is incorporated for the 3.5 release. Signed-off-by: Michael Kerrisk Acked-by: Serge Hallyn Signed-off-by: Rafael J. Wysocki --- include/linux/capability.h | 6 +++--- include/linux/eventpoll.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index 68d56effc328..d10b7ed595b1 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -360,11 +360,11 @@ struct cpu_vfs_cap_data { #define CAP_WAKE_ALARM 35 -/* Allow preventing system suspends while epoll events are pending */ +/* Allow preventing system suspends */ -#define CAP_EPOLLWAKEUP 36 +#define CAP_BLOCK_SUSPEND 36 -#define CAP_LAST_CAP CAP_EPOLLWAKEUP +#define CAP_LAST_CAP CAP_BLOCK_SUSPEND #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 6f8be328770a..f4bb378ccf6a 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -34,7 +34,7 @@ * re-allowed until epoll_wait is called again after consuming the wakeup * event(s). * - * Requires CAP_EPOLLWAKEUP + * Requires CAP_BLOCK_SUSPEND */ #define EPOLLWAKEUP (1 << 29) -- cgit v1.2.3 From 5bdca4e0768d3e0f4efa43d9a2cc8210aeb91ab9 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 10 Jul 2012 11:53:34 -0700 Subject: libceph: fix messenger retry In ancient times, the messenger could both initiate and accept connections. An artifact if that was data structures to store/process an incoming ceph_msg_connect request and send an outgoing ceph_msg_connect_reply. Sadly, the negotiation code was referencing those structures and ignoring important information (like the peer's connect_seq) from the correct ones. Among other things, this fixes tight reconnect loops where the server sends RETRY_SESSION and we (the client) retries with the same connect_seq as last time. This bug pretty easily triggered by injecting socket failures on the MDS and running some fs workload like workunits/direct_io/test_sync_io. Signed-off-by: Sage Weil --- include/linux/ceph/messenger.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 2521a95fa6d9..44c87e731e9d 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -163,16 +163,8 @@ struct ceph_connection { /* connection negotiation temps */ char in_banner[CEPH_BANNER_MAX_LEN]; - union { - struct { /* outgoing connection */ - struct ceph_msg_connect out_connect; - struct ceph_msg_connect_reply in_reply; - }; - struct { /* incoming */ - struct ceph_msg_connect in_connect; - struct ceph_msg_connect_reply out_reply; - }; - }; + struct ceph_msg_connect out_connect; + struct ceph_msg_connect_reply in_reply; struct ceph_entity_addr actual_peer_addr; /* message out temps */ -- cgit v1.2.3 From eea03c20ae38a55405c0865ed9adfccc400e4c8e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 18 Jul 2012 18:15:46 -0700 Subject: Make wait_for_device_probe() also do scsi_complete_async_scans() Commit a7a20d103994 ("sd: limit the scope of the async probe domain") make the SCSI device probing run device discovery in it's own async domain. However, as a result, the partition detection was no longer synchronized by async_synchronize_full() (which, despite the name, only synchronizes the global async space, not all of them). Which in turn meant that "wait_for_device_probe()" would not wait for the SCSI partitions to be parsed. And "wait_for_device_probe()" was what the boot time init code relied on for mounting the root filesystem. Now, most people never noticed this, because not only is it timing-dependent, but modern distributions all use initrd. So the root filesystem isn't actually on a disk at all. And then before they actually mount the final disk filesystem, they will have loaded the scsi-wait-scan module, which not only does the expected wait_for_device_probe(), but also does scsi_complete_async_scans(). [ Side note: scsi_complete_async_scans() had also been partially broken, but that was fixed in commit 43a8d39d0137 ("fix async probe regression"), so that same commit a7a20d103994 had actually broken setups even if you used scsi-wait-scan explicitly ] Solve this problem by just moving the scsi_complete_async_scans() call into wait_for_device_probe(). Everybody who wants to wait for device probing to finish really wants the SCSI probing to complete, so there's no reason not to do this. So now "wait_for_device_probe()" really does what the name implies, and properly waits for device probing to finish. This also removes the now unnecessary extra calls to scsi_complete_async_scans(). Reported-and-tested-by: Artem S. Tashkinov Cc: Dan Williams Cc: Alan Stern Cc: James Bottomley Cc: Borislav Petkov Cc: linux-scsi Signed-off-by: Linus Torvalds --- include/linux/device.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 161d96241b1b..6de94151ff6f 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -865,8 +865,6 @@ extern int (*platform_notify_remove)(struct device *dev); extern struct device *get_device(struct device *dev); extern void put_device(struct device *dev); -extern void wait_for_device_probe(void); - #ifdef CONFIG_DEVTMPFS extern int devtmpfs_create_node(struct device *dev); extern int devtmpfs_delete_node(struct device *dev); -- cgit v1.2.3 From 533827c921c34310f63e859e1d6d0feec439657d Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Fri, 20 Jul 2012 17:28:07 -0700 Subject: printk: Implement some unlocked kmsg_dump functions If used from KDB, the locked variants are prone to deadlocks (suppose we got to the debugger w/ the logbuf lock held). So, we have to implement a few routines that grab no logbuf lock. Yet we don't need these functions in modules, so we don't export them. Signed-off-by: Anton Vorontsov Signed-off-by: Linus Torvalds --- include/linux/kmsg_dump.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index d6bd50110ec2..2e7a1e032c71 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -55,12 +55,17 @@ struct kmsg_dumper { #ifdef CONFIG_PRINTK void kmsg_dump(enum kmsg_dump_reason reason); +bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, + char *line, size_t size, size_t *len); + bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, char *line, size_t size, size_t *len); bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, char *buf, size_t size, size_t *len); +void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper); + void kmsg_dump_rewind(struct kmsg_dumper *dumper); int kmsg_dump_register(struct kmsg_dumper *dumper); @@ -71,6 +76,13 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason) { } +static inline bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, + bool syslog, const char *line, + size_t size, size_t *len) +{ + return false; +} + static inline bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, const char *line, size_t size, size_t *len) { @@ -83,6 +95,10 @@ static inline bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, return false; } +static inline void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) +{ +} + static inline void kmsg_dump_rewind(struct kmsg_dumper *dumper) { } -- cgit v1.2.3 From bff9d1865640dcb4d9711dcc50714e9a8b859453 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 21 Jul 2012 20:24:52 +0200 Subject: Remove SYSTEM_SUSPEND_DISK system state The SYSTEM_SUSPEND_DISK system state is never used, so drop it. Signed-off-by: Rafael J. Wysocki Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e07f5e0c5df4..604382143bcf 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -377,7 +377,6 @@ extern enum system_states { SYSTEM_HALT, SYSTEM_POWER_OFF, SYSTEM_RESTART, - SYSTEM_SUSPEND_DISK, } system_state; #define TAINT_PROPRIETARY_MODULE 0 -- cgit v1.2.3