From 1922b0f2758badc0b971d1ebbd300bc6635a6aef Mon Sep 17 00:00:00 2001 From: AnilKumar Ch Date: Mon, 13 Aug 2012 20:36:05 +0530 Subject: mfd: Move tps65217 regulator plat data handling to regulator Regulator platform data handling was mistakenly added to MFD driver. So we will see build errors if we compile MFD drivers without CONFIG_REGULATOR. This patch moves regulator platform data handling from TPS65217 MFD driver to regulator driver. This makes MFD driver independent of REGULATOR framework so build error is fixed if CONFIG_REGULATOR is not set. drivers/built-in.o: In function `tps65217_probe': tps65217.c:(.devinit.text+0x13e37): undefined reference to `of_regulator_match' This patch also fix allocation size of tps65217 platform data. Current implementation allocates a struct tps65217_board for each regulator specified in the device tree. But the structure itself provides array of regulators so one instance of it is sufficient. Signed-off-by: AnilKumar Ch --- include/linux/mfd/tps65217.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tps65217.h b/include/linux/mfd/tps65217.h index 12c06870829a..7cd83d826ed8 100644 --- a/include/linux/mfd/tps65217.h +++ b/include/linux/mfd/tps65217.h @@ -22,6 +22,9 @@ #include #include +/* TPS chip id list */ +#define TPS65217 0xF0 + /* I2C ID for TPS65217 part */ #define TPS65217_I2C_ID 0x24 @@ -248,13 +251,11 @@ struct tps_info { struct tps65217 { struct device *dev; struct tps65217_board *pdata; + unsigned int id; struct regulator_desc desc[TPS65217_NUM_REGULATOR]; struct regulator_dev *rdev[TPS65217_NUM_REGULATOR]; struct tps_info *info[TPS65217_NUM_REGULATOR]; struct regmap *regmap; - - /* Client devices */ - struct platform_device *regulator_pdev[TPS65217_NUM_REGULATOR]; }; static inline struct tps65217 *dev_to_tps65217(struct device *dev) @@ -262,6 +263,11 @@ static inline struct tps65217 *dev_to_tps65217(struct device *dev) return dev_get_drvdata(dev); } +static inline int tps65217_chip_id(struct tps65217 *tps65217) +{ + return tps65217->id; +} + int tps65217_reg_read(struct tps65217 *tps, unsigned int reg, unsigned int *val); int tps65217_reg_write(struct tps65217 *tps, unsigned int reg, -- cgit v1.2.3 From b969afc8b719bbe3f0842a694e6bf5e87f08868f Mon Sep 17 00:00:00 2001 From: Joachim Eastwood Date: Thu, 23 Aug 2012 18:14:54 +0200 Subject: ASoC: atmel-ssc: include linux/io.h for raw io Include linux/io.h for raw io operations in atmel-scc header. This fixes the following build error: CC [M] sound/soc/atmel/atmel_ssc_dai.o sound/soc/atmel/atmel_ssc_dai.c: In function 'atmel_ssc_interrupt': sound/soc/atmel/atmel_ssc_dai.c:171: error: implicit declaration of function '__raw_readl' sound/soc/atmel/atmel_ssc_dai.c: In function 'atmel_ssc_shutdown': sound/soc/atmel/atmel_ssc_dai.c:249: error: implicit declaration of function '__raw_writel' Signed-off-by: Joachim Eastwood Signed-off-by: Nicolas Ferre Acked-by: Jean-Christophe PLAGNIOL-VILLARD Signed-off-by: Mark Brown --- include/linux/atmel-ssc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/atmel-ssc.h b/include/linux/atmel-ssc.h index 06023393fba9..4eb31752e2b7 100644 --- a/include/linux/atmel-ssc.h +++ b/include/linux/atmel-ssc.h @@ -3,6 +3,7 @@ #include #include +#include struct ssc_device { struct list_head list; -- cgit v1.2.3 From a6fa941d94b411bbd2b6421ffbde6db3c93e65ab Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 20 Aug 2012 14:59:25 +0100 Subject: perf_event: Switch to internal refcount, fix race with close() Don't mess with file refcounts (or keep a reference to file, for that matter) in perf_event. Use explicit refcount of its own instead. Deal with the race between the final reference to event going away and new children getting created for it by use of atomic_long_inc_not_zero() in inherit_event(); just have the latter free what it had allocated and return NULL, that works out just fine (children of siblings of something doomed are created as singletons, same as if the child of leader had been created and immediately killed). Signed-off-by: Al Viro Cc: stable@kernel.org Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120820135925.GG23464@ZenIV.linux.org.uk Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7602ccb3f40e..ad04dfcd6f35 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -926,7 +926,7 @@ struct perf_event { struct hw_perf_event hw; struct perf_event_context *ctx; - struct file *filp; + atomic_long_t refcount; /* * These accumulate total time (in nanoseconds) that children -- cgit v1.2.3 From 500ad2d8b01390c98bc6dce068bccfa9534b8212 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Thu, 2 Aug 2012 13:46:35 +0530 Subject: perf/hwpb: Invoke __perf_event_disable() if interrupts are already disabled While debugging a warning message on PowerPC while using hardware breakpoints, it was discovered that when perf_event_disable is invoked through hw_breakpoint_handler function with interrupts disabled, a subsequent IPI in the code path would trigger a WARN_ON_ONCE message in smp_call_function_single function. This patch calls __perf_event_disable() when interrupts are already disabled, instead of perf_event_disable(). Reported-by: Edjunior Barbosa Machado Signed-off-by: K.Prasad [naveen.n.rao@linux.vnet.ibm.com: v3: Check to make sure we target current task] Signed-off-by: Naveen N. Rao Acked-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120802081635.5811.17737.stgit@localhost.localdomain [ Fixed build error on MIPS. ] Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ad04dfcd6f35..33ed9d605f91 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1296,6 +1296,7 @@ extern int perf_swevent_get_recursion_context(void); extern void perf_swevent_put_recursion_context(int rctx); extern void perf_event_enable(struct perf_event *event); extern void perf_event_disable(struct perf_event *event); +extern int __perf_event_disable(void *info); extern void perf_event_task_tick(void); #else static inline void @@ -1334,6 +1335,7 @@ static inline int perf_swevent_get_recursion_context(void) { return -1; } static inline void perf_swevent_put_recursion_context(int rctx) { } static inline void perf_event_enable(struct perf_event *event) { } static inline void perf_event_disable(struct perf_event *event) { } +static inline int __perf_event_disable(void *info) { return -1; } static inline void perf_event_task_tick(void) { } #endif -- cgit v1.2.3 From c3f52af3e03013db5237e339c817beaae5ec9e3a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 3 Sep 2012 14:56:02 -0400 Subject: NFS: Fix the initialisation of the readdir 'cookieverf' array When the NFS_COOKIEVERF helper macro was converted into a static inline function in commit 99fadcd764 (nfs: convert NFS_*(inode) helpers to static inline), we broke the initialisation of the readdir cookies, since that depended on doing a memset with an argument of 'sizeof(NFS_COOKIEVERF(inode))' which therefore changed from sizeof(be32 cookieverf[2]) to sizeof(be32 *). At this point, NFS_COOKIEVERF seems to be more of an obfuscation than a helper, so the best thing would be to just get rid of it. Also see: https://bugzilla.kernel.org/show_bug.cgi?id=46881 Reported-by: Andi Kleen Reported-by: David Binderman Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- include/linux/nfs_fs.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 1f8fc7f9bcd8..4b03f56e280e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -265,11 +265,6 @@ static inline const struct nfs_rpc_ops *NFS_PROTO(const struct inode *inode) return NFS_SERVER(inode)->nfs_client->rpc_ops; } -static inline __be32 *NFS_COOKIEVERF(const struct inode *inode) -{ - return NFS_I(inode)->cookieverf; -} - static inline unsigned NFS_MINATTRTIMEO(const struct inode *inode) { struct nfs_server *nfss = NFS_SERVER(inode); -- cgit v1.2.3 From 1f1ea6c2d9d8c0be9ec56454b05315273b5de8ce Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 26 Aug 2012 11:44:43 -0700 Subject: NFSv4: Fix buffer overflow checking in __nfs4_get_acl_uncached Pass the checks made by decode_getacl back to __nfs4_get_acl_uncached so that it knows if the acl has been truncated. The current overflow checking is broken, resulting in Oopses on user-triggered nfs4_getfacl calls, and is opaque to the point where several attempts at fixing it have failed. This patch tries to clean up the code in addition to fixing the Oopses by ensuring that the overflow checks are performed in a single place (decode_getacl). If the overflow check failed, we will still be able to report the acl length, but at least we will no longer attempt to cache the acl or copy the truncated contents to user space. Reported-by: Sachin Prabhu Signed-off-by: Trond Myklebust Tested-by: Sachin Prabhu --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index ac7c8ae254f2..be9cf3c7e79e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -652,7 +652,7 @@ struct nfs_getaclargs { }; /* getxattr ACL interface flags */ -#define NFS4_ACL_LEN_REQUEST 0x0001 /* zero length getxattr buffer */ +#define NFS4_ACL_TRUNC 0x0001 /* ACL was truncated */ struct nfs_getaclres { size_t acl_len; size_t acl_data_offset; -- cgit v1.2.3 From 60e233a56609fd963c59e99bd75c663d63fa91b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Sun, 2 Sep 2012 15:41:34 +0200 Subject: kobject: fix oops with "input0: bad kobj_uevent_env content in show_uevent()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fengguang Wu writes: > After the __devinit* removal series, I can still get kernel panic in > show_uevent(). So there are more sources of bug.. > > Debug patch: > > @@ -343,8 +343,11 @@ static ssize_t show_uevent(struct device > goto out; > > /* copy keys to file */ > - for (i = 0; i < env->envp_idx; i++) > + dev_err(dev, "uevent %d env[%d]: %s/.../%s\n", env->buflen, env->envp_idx, top_kobj->name, dev->kobj.name); > + for (i = 0; i < env->envp_idx; i++) { > + printk(KERN_ERR "uevent %d env[%d]: %s\n", (int)count, i, env->envp[i]); > count += sprintf(&buf[count], "%s\n", env->envp[i]); > + } > > Oops message, the env[] is again not properly initilized: > > [ 44.068623] input input0: uevent 61 env[805306368]: input0/.../input0 > [ 44.069552] uevent 0 env[0]: (null) This is a completely different CONFIG_HOTPLUG problem, only demonstrating another reason why CONFIG_HOTPLUG should go away. I had a hard time trying to disable it anyway ;-) The problem this time is lots of code assuming that a call to add_uevent_var() will guarantee that env->buflen > 0. This is not true if CONFIG_HOTPLUG is unset. So things like this end up overwriting env->envp_idx because the array index is -1: if (add_uevent_var(env, "MODALIAS=")) return -ENOMEM; len = input_print_modalias(&env->buf[env->buflen - 1], sizeof(env->buf) - env->buflen, dev, 0); Don't know what the best action is, given that there seem to be a *lot* of this around the kernel. This patch "fixes" the problem for me, but I don't know if it can be considered an appropriate fix. [ It is the correct fix for now, for 3.7 forcing CONFIG_HOTPLUG to always be on is the longterm fix, but it's too late for 3.6 and older kernels to resolve this that way - gregkh ] Reported-by: Fengguang Wu Signed-off-by: Bjørn Mork Tested-by: Fengguang Wu Cc: stable Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index fc615a97e2d3..1e57449395b1 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -224,7 +224,7 @@ static inline int kobject_uevent_env(struct kobject *kobj, static inline __printf(2, 3) int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...) -{ return 0; } +{ return -ENOMEM; } static inline int kobject_action_type(const char *buf, size_t count, enum kobject_action *type) -- cgit v1.2.3 From f39c1bfb5a03e2d255451bff05be0d7255298fa4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 7 Sep 2012 11:08:50 -0400 Subject: SUNRPC: Fix a UDP transport regression Commit 43cedbf0e8dfb9c5610eb7985d5f21263e313802 (SUNRPC: Ensure that we grab the XPRT_LOCK before calling xprt_alloc_slot) is causing hangs in the case of NFS over UDP mounts. Since neither the UDP or the RDMA transport mechanism use dynamic slot allocation, we can skip grabbing the socket lock for those transports. Add a new rpc_xprt_op to allow switching between the TCP and UDP/RDMA case. Note that the NFSv4.1 back channel assigns the slot directly through rpc_run_bc_task, so we can ignore that case. Reported-by: Dick Streefland Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org [>= 3.1] --- include/linux/sunrpc/xprt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index cff40aa7db62..bf8c49ff7530 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -114,6 +114,7 @@ struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); int (*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); + void (*alloc_slot)(struct rpc_xprt *xprt, struct rpc_task *task); void (*rpcbind)(struct rpc_task *task); void (*set_port)(struct rpc_xprt *xprt, unsigned short port); void (*connect)(struct rpc_task *task); @@ -281,6 +282,8 @@ void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); +void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); +void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); int xprt_prepare_transmit(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); void xprt_end_transmit(struct rpc_task *task); -- cgit v1.2.3 From a8edc3bf05a3465726afdf635a820761fae0d50b Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 5 Sep 2012 22:50:48 +0000 Subject: net/mlx4_core: Put Firmware flow steering structures in common header files To allow for usage of the flow steering Firmware structures in more locations over the driver, such as the resource tracker, move them from mcg.c to common header files. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index bd6c9fcdf2dd..244ba902ab72 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -796,6 +796,8 @@ enum mlx4_net_trans_rule_id { MLX4_NET_TRANS_RULE_NUM, /* should be last */ }; +extern const u16 __sw_id_hw[]; + enum mlx4_net_trans_promisc_mode { MLX4_FS_PROMISC_NONE = 0, MLX4_FS_PROMISC_UPLINK, -- cgit v1.2.3 From 7fb40f87c4195ec1728527f30bc744c47a45b366 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 5 Sep 2012 22:50:49 +0000 Subject: net/mlx4_core: Add security check / enforcement for flow steering rules set for VMs Since VFs may be mapped to VMs which aren't trusted entities, flow steering rules attached through the wrapper on behalf of VFs must be checked to make sure that their L2 specification relate to MAC address assigned to that VF, and add L2 specification if its missing. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 244ba902ab72..6e1b0f973a03 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -798,6 +798,17 @@ enum mlx4_net_trans_rule_id { extern const u16 __sw_id_hw[]; +static inline int map_hw_to_sw_id(u16 header_id) +{ + + int i; + for (i = 0; i < MLX4_NET_TRANS_RULE_NUM; i++) { + if (header_id == __sw_id_hw[i]) + return i; + } + return -EINVAL; +} + enum mlx4_net_trans_promisc_mode { MLX4_FS_PROMISC_NONE = 0, MLX4_FS_PROMISC_UPLINK, -- cgit v1.2.3 From c076ada4e4aaf45e1a31ad6de7c6cce36081e045 Mon Sep 17 00:00:00 2001 From: Roland Stigge Date: Wed, 8 Aug 2012 09:42:32 +0200 Subject: i2c: pnx: Fix read transactions of >= 2 bytes On transactions with n>=2 bytes, the controller actually wrongly clocks in n+1 bytes. This is caused by the (wrong) assumption that RFE in the Status Register is 1 iff there is no byte already ordered (via a dummy TX byte). This lead to the implementation of synchronized byte ordering, e.g.: Dummy-TX - RX - Dummy-TX - RX - ... But since RFE actually stays high after some Dummy-TX, it rather looks like: Dummy-TX - Dummy-TX - RX - Dummy-TX - RX - (RX) The last RX byte is clocked in by the bus controller, but ignored by the kernel when filling the userspace buffer. This patch fixes the issue by asking for RX via Dummy-TX asynchronously. Introducing a separate counter for TX bytes. Signed-off-by: Roland Stigge Signed-off-by: Wolfram Sang --- include/linux/i2c-pnx.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/i2c-pnx.h b/include/linux/i2c-pnx.h index 1bc74afe7a35..49ed17fdf055 100644 --- a/include/linux/i2c-pnx.h +++ b/include/linux/i2c-pnx.h @@ -22,6 +22,7 @@ struct i2c_pnx_mif { struct timer_list timer; /* Timeout */ u8 * buf; /* Data buffer */ int len; /* Length of data buffer */ + int order; /* RX Bytes to order via TX */ }; struct i2c_pnx_algo_data { -- cgit v1.2.3 From 4b921eda53366b319602351ff4d7256fafa4bd1b Mon Sep 17 00:00:00 2001 From: Karsten Keil Date: Thu, 13 Sep 2012 04:36:20 +0000 Subject: mISDN: Fix wrong usage of flush_work_sync while holding locks It is a bad idea to hold a spinlock and call flush_work_sync. Move the workqueue cleanup outside the spinlock and use cancel_work_sync, on closing the channel this seems to be the more correct function. Remove the never used and constant return value of mISDN_freebchannel. Signed-off-by: Karsten Keil Cc: Signed-off-by: David S. Miller --- include/linux/mISDNhw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mISDNhw.h b/include/linux/mISDNhw.h index d0752eca9b44..9d96d5d4dfed 100644 --- a/include/linux/mISDNhw.h +++ b/include/linux/mISDNhw.h @@ -183,7 +183,7 @@ extern int mISDN_initbchannel(struct bchannel *, unsigned short, unsigned short); extern int mISDN_freedchannel(struct dchannel *); extern void mISDN_clear_bchannel(struct bchannel *); -extern int mISDN_freebchannel(struct bchannel *); +extern void mISDN_freebchannel(struct bchannel *); extern int mISDN_ctrl_bchannel(struct bchannel *, struct mISDN_ctrl_req *); extern void queue_ch_frame(struct mISDNchannel *, u_int, int, struct sk_buff *); -- cgit v1.2.3 From 0848c94fb4a5cc213a7fb0fb3a5721ad6e16f096 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 11 Sep 2012 15:16:36 +0800 Subject: mfd: core: Push irqdomain mapping out into devices Currently the MFD core supports remapping MFD cell interrupts using an irqdomain but only if the MFD is being instantiated using device tree and only if the device tree bindings use the pattern of registering IPs in the device tree with compatible properties. This will be actively harmful for drivers which support non-DT platforms and use this pattern for their DT bindings as it will mean that the core will silently change remapping behaviour and it is also limiting for drivers which don't do DT with this particular pattern. There is also a potential fragility if there are interrupts not associated with MFD cells and all the cells are omitted from the device tree for some reason. Instead change the code to take an IRQ domain as an optional argument, allowing drivers to take the decision about the parent domain for their interrupts. The one current user of this feature is ab8500-core, it has the domain lookup pushed out into the driver. Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz --- include/linux/mfd/core.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index 3a8435a8058f..cebe97ee98b8 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -16,6 +16,8 @@ #include +struct irq_domain; + /* * This struct describes the MFD part ("cell"). * After registration the copy of this structure will become the platform data @@ -98,7 +100,7 @@ static inline const struct mfd_cell *mfd_get_cell(struct platform_device *pdev) extern int mfd_add_devices(struct device *parent, int id, struct mfd_cell *cells, int n_devs, struct resource *mem_base, - int irq_base); + int irq_base, struct irq_domain *irq_domain); extern void mfd_remove_devices(struct device *parent); -- cgit v1.2.3 From 37407ea7f93864c2cfc03edf8f37872ec539ea2b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 16 Sep 2012 12:29:43 -0700 Subject: Revert "sched: Improve scalability via 'CPU buddies', which withstand random perturbations" This reverts commit 970e178985cadbca660feb02f4d2ee3a09f7fdda. Nikolay Ulyanitsky reported thatthe 3.6-rc5 kernel has a 15-20% performance drop on PostgreSQL 9.2 on his machine (running "pgbench"). Borislav Petkov was able to reproduce this, and bisected it to this commit 970e178985ca ("sched: Improve scalability via 'CPU buddies' ...") apparently because the new single-idle-buddy model simply doesn't find idle CPU's to reschedule on aggressively enough. Mike Galbraith suspects that it is likely due to the user-mode spinlocks in PostgreSQL not reacting well to preemption, but we don't really know the details - I'll just revert the commit for now. There are hopefully other approaches to improve scheduler scalability without it causing these kinds of downsides. Reported-by: Nikolay Ulyanitsky Bisected-by: Borislav Petkov Acked-by: Mike Galbraith Cc: Andrew Morton Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b8c86648a2f9..23bddac4bad8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -954,7 +954,6 @@ struct sched_domain { unsigned int smt_gain; int flags; /* See SD_* */ int level; - int idle_buddy; /* cpu assigned to select_idle_sibling() */ /* Runtime fields. */ unsigned long last_balance; /* init to jiffies. units in jiffies */ -- cgit v1.2.3