From 6d3321e8e2b3bf6a5892e2ef673c7bf536e3f904 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 23 Jun 2011 11:19:26 -0700 Subject: x86, mtrr: lock stop machine during MTRR rendezvous sequence MTRR rendezvous sequence using stop_one_cpu_nowait() can potentially happen in parallel with another system wide rendezvous using stop_machine(). This can lead to deadlock (The order in which works are queued can be different on different cpu's. Some cpu's will be running the first rendezvous handler and others will be running the second rendezvous handler. Each set waiting for the other set to join for the system wide rendezvous, leading to a deadlock). MTRR rendezvous sequence is not implemented using stop_machine() as this gets called both from the process context aswell as the cpu online paths (where the cpu has not come online and the interrupts are disabled etc). stop_machine() works with only online cpus. For now, take the stop_machine mutex in the MTRR rendezvous sequence that gets called from an online cpu (here we are in the process context and can potentially sleep while taking the mutex). And the MTRR rendezvous that gets triggered during cpu online doesn't need to take this stop_machine lock (as the stop_machine() already ensures that there is no cpu hotplug going on in parallel by doing get_online_cpus()) TBD: Pursue a cleaner solution of extending the stop_machine() infrastructure to handle the case where the calling cpu is still not online and use this for MTRR rendezvous sequence. fixes: https://bugzilla.novell.com/show_bug.cgi?id=672008 Reported-by: Vadim Kotelnikov Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/20110623182056.807230326@sbsiddha-MOBL3.sc.intel.com Cc: stable@kernel.org # 2.6.35+, backport a week or two after this gets more testing in mainline Signed-off-by: H. Peter Anvin --- include/linux/stop_machine.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 092dc9b1ce7d..14d3524d1274 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -27,6 +27,8 @@ struct cpu_stop_work { struct cpu_stop_done *done; }; +extern struct mutex stop_cpus_mutex; + int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg); void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, struct cpu_stop_work *work_buf); -- cgit v1.2.3 From f740e6cd0cb5e7468e46831aeb4d9c30e03d5ebc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Jun 2011 11:19:28 -0700 Subject: stop_machine: implement stop_machine_from_inactive_cpu() Currently, mtrr wants stop_machine functionality while a CPU is being brought up. As stop_machine() requires the calling CPU to be active, mtrr implements its own stop_machine using stop_one_cpu() on each online CPU. This doesn't only unnecessarily duplicate complex logic but also introduces a possibility of deadlock when it races against the generic stop_machine(). This patch implements stop_machine_from_inactive_cpu() to serve such use cases. Its functionality is basically the same as stop_machine(); however, it should be called from a CPU which isn't active and doesn't depend on working scheduling on the calling CPU. This is achieved by using busy loops for synchronization and open-coding stop_cpus queuing and waiting with direct invocation of fn() for local CPU inbetween. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/20110623182056.982526827@sbsiddha-MOBL3.sc.intel.com Signed-off-by: Suresh Siddha Cc: Ingo Molnar Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: H. Peter Anvin --- include/linux/stop_machine.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 14d3524d1274..e0f2da25d751 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -126,15 +126,19 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); */ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); +int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data, + const struct cpumask *cpus); + #else /* CONFIG_STOP_MACHINE && CONFIG_SMP */ static inline int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) { + unsigned long flags; int ret; - local_irq_disable(); + local_irq_save(flags); ret = fn(data); - local_irq_enable(); + local_irq_restore(flags); return ret; } @@ -144,5 +148,11 @@ static inline int stop_machine(int (*fn)(void *), void *data, return __stop_machine(fn, data, cpus); } +static inline int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data, + const struct cpumask *cpus) +{ + return __stop_machine(fn, data, cpus); +} + #endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */ #endif /* _LINUX_STOP_MACHINE */ -- cgit v1.2.3 From 192d8857427dd23707d5f0b86ca990c3af6f2d74 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 23 Jun 2011 11:19:29 -0700 Subject: x86, mtrr: use stop_machine APIs for doing MTRR rendezvous MTRR rendezvous sequence is not implemened using stop_machine() before, as this gets called both from the process context aswell as the cpu online paths (where the cpu has not come online and the interrupts are disabled etc). Now that we have a new stop_machine_from_inactive_cpu() API, use it for rendezvous during mtrr init of a logical processor that is coming online. For the rest (runtime MTRR modification, system boot, resume paths), use stop_machine() to implement the rendezvous sequence. This will consolidate and cleanup the code. Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/20110623182057.076997177@sbsiddha-MOBL3.sc.intel.com Signed-off-by: H. Peter Anvin --- include/linux/stop_machine.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index e0f2da25d751..4a9d0c7edc65 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -27,8 +27,6 @@ struct cpu_stop_work { struct cpu_stop_done *done; }; -extern struct mutex stop_cpus_mutex; - int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg); void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, struct cpu_stop_work *work_buf); -- cgit v1.2.3