From 363ab6f1424cdea63e5d182312d60e19077b892a Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: core: use performance variant for_each_cpu_mask_nr Change references from for_each_cpu_mask to for_each_cpu_mask_nr where appropriate Reviewed-by: Paul Jackson Reviewed-by: Christoph Lameter Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index c77bc3a1c722..50ae922c6022 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -390,7 +390,7 @@ void __ref enable_nonboot_cpus(void) goto out; printk("Enabling non-boot CPUs ...\n"); - for_each_cpu_mask(cpu, frozen_cpus) { + for_each_cpu_mask_nr(cpu, frozen_cpus) { error = _cpu_up(cpu, 1); if (!error) { printk("CPU%d is up\n", cpu); -- cgit v1.2.3 From e761b7725234276a802322549cee5255305a0930 Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Tue, 15 Jul 2008 04:43:49 -0700 Subject: cpu hotplug, sched: Introduce cpu_active_map and redo sched domain managment (take 2) This is based on Linus' idea of creating cpu_active_map that prevents scheduler load balancer from migrating tasks to the cpu that is going down. It allows us to simplify domain management code and avoid unecessary domain rebuilds during cpu hotplug event handling. Please ignore the cpusets part for now. It needs some more work in order to avoid crazy lock nesting. Although I did simplfy and unify domain reinitialization logic. We now simply call partition_sched_domains() in all the cases. This means that we're using exact same code paths as in cpusets case and hence the test below cover cpusets too. Cpuset changes to make rebuild_sched_domains() callable from various contexts are in the separate patch (right next after this one). This not only boots but also easily handles while true; do make clean; make -j 8; done and while true; do on-off-cpu 1; done at the same time. (on-off-cpu 1 simple does echo 0/1 > /sys/.../cpu1/online thing). Suprisingly the box (dual-core Core2) is quite usable. In fact I'm typing this on right now in gnome-terminal and things are moving just fine. Also this is running with most of the debug features enabled (lockdep, mutex, etc) no BUG_ONs or lockdep complaints so far. I believe I addressed all of the Dmitry's comments for original Linus' version. I changed both fair and rt balancer to mask out non-active cpus. And replaced cpu_is_offline() with !cpu_active() in the main scheduler code where it made sense (to me). Signed-off-by: Max Krasnyanskiy Acked-by: Linus Torvalds Acked-by: Peter Zijlstra Acked-by: Gregory Haskins Cc: dmitry.adamushko@gmail.com Cc: pj@sgi.com Signed-off-by: Ingo Molnar --- kernel/cpu.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index cfb1d43ab801..a1ac7ea245d7 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -64,6 +64,8 @@ void __init cpu_hotplug_init(void) cpu_hotplug.refcount = 0; } +cpumask_t cpu_active_map; + #ifdef CONFIG_HOTPLUG_CPU void get_online_cpus(void) @@ -291,11 +293,20 @@ int __ref cpu_down(unsigned int cpu) int err = 0; cpu_maps_update_begin(); - if (cpu_hotplug_disabled) + + if (cpu_hotplug_disabled) { err = -EBUSY; - else - err = _cpu_down(cpu, 0); + goto out; + } + + cpu_clear(cpu, cpu_active_map); + + err = _cpu_down(cpu, 0); + + if (cpu_online(cpu)) + cpu_set(cpu, cpu_active_map); +out: cpu_maps_update_done(); return err; } @@ -355,11 +366,18 @@ int __cpuinit cpu_up(unsigned int cpu) } cpu_maps_update_begin(); - if (cpu_hotplug_disabled) + + if (cpu_hotplug_disabled) { err = -EBUSY; - else - err = _cpu_up(cpu, 0); + goto out; + } + err = _cpu_up(cpu, 0); + + if (cpu_online(cpu)) + cpu_set(cpu, cpu_active_map); + +out: cpu_maps_update_done(); return err; } -- cgit v1.2.3 From 39b0fad7121eace85770e7a4c6dc35dfd2879768 Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Tue, 15 Jul 2008 20:56:26 -0700 Subject: cpu hotplug: Make cpu_active_map synchronization dependency clear This goes on top of the cpu_active_map (take 2) patch. Currently we depend on the stop_machine to provide nescessesary synchronization for the cpu_active_map updates. As Dmitry Adamushko pointed this is fragile and is not much clearer than the previous scheme. In other words we do not want to depend on the internal stop machine operation here. So make the synchronization rules clear by doing synchronize_sched() after clearing out cpu active bit. Tested on quad-Core2 with: while true; do for i in 1 2 3; do echo 0 > /sys/devices/system/cpu/cpu$i/online done for i in 1 2 3; do echo 1 > /sys/devices/system/cpu/cpu$i/online done done and stress -c 200 No lockdep, preempt or other complaints. Signed-off-by: Max Krasnyansky Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/cpu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index a1ac7ea245d7..033603c1d7c3 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -301,6 +301,16 @@ int __ref cpu_down(unsigned int cpu) cpu_clear(cpu, cpu_active_map); + /* + * Make sure the all cpus did the reschedule and are not + * using stale version of the cpu_active_map. + * This is not strictly necessary becuase stop_machine() + * that we run down the line already provides the required + * synchronization. But it's really a side effect and we do not + * want to depend on the innards of the stop_machine here. + */ + synchronize_sched(); + err = _cpu_down(cpu, 0); if (cpu_online(cpu)) -- cgit v1.2.3 From 3da1c84c00c7e5fa8348336bd8c342f9128b0f14 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 25 Jul 2008 01:47:50 -0700 Subject: workqueues: make get_online_cpus() useable for work->func() workqueue_cpu_callback(CPU_DEAD) flushes cwq->thread under cpu_maps_update_begin(). This means that the multithreaded workqueues can't use get_online_cpus() due to the possible deadlock, very bad and very old problem. Introduce the new state, CPU_POST_DEAD, which is called after cpu_hotplug_done() but before cpu_maps_update_done(). Change workqueue_cpu_callback() to use CPU_POST_DEAD instead of CPU_DEAD. This means that create/destroy functions can't rely on get_online_cpus() any longer and should take cpu_add_remove_lock instead. [akpm@linux-foundation.org: fix CONFIG_SMP=n] Signed-off-by: Oleg Nesterov Acked-by: Gautham R Shenoy Cc: Heiko Carstens Cc: Max Krasnyansky Cc: Paul Jackson Cc: Paul Menage Cc: Peter Zijlstra Cc: Vegard Nossum Cc: Martin Schwidefsky Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpu.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index 2cc409ce0a8f..10ba5f1004a5 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -285,6 +285,11 @@ out_allowed: set_cpus_allowed_ptr(current, &old_allowed); out_release: cpu_hotplug_done(); + if (!err) { + if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod, + hcpu) == NOTIFY_BAD) + BUG(); + } return err; } -- cgit v1.2.3 From b8d317d10cca76cabe6b03ebfeb23cc99118b731 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Thu, 24 Jul 2008 18:21:29 -0700 Subject: cpumask: make cpumask_of_cpu_map generic If an arch doesn't define cpumask_of_cpu_map, create a generic statically-initialized one for them. This allows removal of the buggy cpumask_of_cpu() macro (&cpumask_of_cpu() gives address of out-of-scope var). An arch with NR_CPUS of 4096 probably wants to allocate this itself based on the actual number of CPUs, since otherwise they're using 2MB of rodata (1024 cpus means 128k). That's what CONFIG_HAVE_CPUMASK_OF_CPU_MAP is for (only x86/64 does so at the moment). In future as we support more CPUs, we'll need to resort to a get_cpu_map()/put_cpu_map() allocation scheme. Signed-off-by: Mike Travis Signed-off-by: Rusty Russell Cc: Andrew Morton Cc: Jack Steiner Signed-off-by: Ingo Molnar --- kernel/cpu.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index 10ba5f1004a5..fe31ff3d3809 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -461,3 +461,112 @@ out: #endif /* CONFIG_PM_SLEEP_SMP */ #endif /* CONFIG_SMP */ + +#ifndef CONFIG_HAVE_CPUMASK_OF_CPU_MAP +/* 64 bits of zeros, for initializers. */ +#if BITS_PER_LONG == 32 +#define Z64 0, 0 +#else +#define Z64 0 +#endif + +/* Initializer macros. */ +#define CMI0(n) { .bits = { 1UL << (n) } } +#define CMI(n, ...) { .bits = { __VA_ARGS__, 1UL << ((n) % BITS_PER_LONG) } } + +#define CMI8(n, ...) \ + CMI((n), __VA_ARGS__), CMI((n)+1, __VA_ARGS__), \ + CMI((n)+2, __VA_ARGS__), CMI((n)+3, __VA_ARGS__), \ + CMI((n)+4, __VA_ARGS__), CMI((n)+5, __VA_ARGS__), \ + CMI((n)+6, __VA_ARGS__), CMI((n)+7, __VA_ARGS__) + +#if BITS_PER_LONG == 32 +#define CMI64(n, ...) \ + CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__), \ + CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__), \ + CMI8((n)+32, 0, __VA_ARGS__), CMI8((n)+40, 0, __VA_ARGS__), \ + CMI8((n)+48, 0, __VA_ARGS__), CMI8((n)+56, 0, __VA_ARGS__) +#else +#define CMI64(n, ...) \ + CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__), \ + CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__), \ + CMI8((n)+32, __VA_ARGS__), CMI8((n)+40, __VA_ARGS__), \ + CMI8((n)+48, __VA_ARGS__), CMI8((n)+56, __VA_ARGS__) +#endif + +#define CMI256(n, ...) \ + CMI64((n), __VA_ARGS__), CMI64((n)+64, Z64, __VA_ARGS__), \ + CMI64((n)+128, Z64, Z64, __VA_ARGS__), \ + CMI64((n)+192, Z64, Z64, Z64, __VA_ARGS__) +#define Z256 Z64, Z64, Z64, Z64 + +#define CMI1024(n, ...) \ + CMI256((n), __VA_ARGS__), \ + CMI256((n)+256, Z256, __VA_ARGS__), \ + CMI256((n)+512, Z256, Z256, __VA_ARGS__), \ + CMI256((n)+768, Z256, Z256, Z256, __VA_ARGS__) +#define Z1024 Z256, Z256, Z256, Z256 + +/* We want this statically initialized, just to be safe. We try not + * to waste too much space, either. */ +static const cpumask_t cpumask_map[] = { + CMI0(0), CMI0(1), CMI0(2), CMI0(3), +#if NR_CPUS > 4 + CMI0(4), CMI0(5), CMI0(6), CMI0(7), +#endif +#if NR_CPUS > 8 + CMI0(8), CMI0(9), CMI0(10), CMI0(11), + CMI0(12), CMI0(13), CMI0(14), CMI0(15), +#endif +#if NR_CPUS > 16 + CMI0(16), CMI0(17), CMI0(18), CMI0(19), + CMI0(20), CMI0(21), CMI0(22), CMI0(23), + CMI0(24), CMI0(25), CMI0(26), CMI0(27), + CMI0(28), CMI0(29), CMI0(30), CMI0(31), +#endif +#if NR_CPUS > 32 +#if BITS_PER_LONG == 32 + CMI(32, 0), CMI(33, 0), CMI(34, 0), CMI(35, 0), + CMI(36, 0), CMI(37, 0), CMI(38, 0), CMI(39, 0), + CMI(40, 0), CMI(41, 0), CMI(42, 0), CMI(43, 0), + CMI(44, 0), CMI(45, 0), CMI(46, 0), CMI(47, 0), + CMI(48, 0), CMI(49, 0), CMI(50, 0), CMI(51, 0), + CMI(52, 0), CMI(53, 0), CMI(54, 0), CMI(55, 0), + CMI(56, 0), CMI(57, 0), CMI(58, 0), CMI(59, 0), + CMI(60, 0), CMI(61, 0), CMI(62, 0), CMI(63, 0), +#else + CMI0(32), CMI0(33), CMI0(34), CMI0(35), + CMI0(36), CMI0(37), CMI0(38), CMI0(39), + CMI0(40), CMI0(41), CMI0(42), CMI0(43), + CMI0(44), CMI0(45), CMI0(46), CMI0(47), + CMI0(48), CMI0(49), CMI0(50), CMI0(51), + CMI0(52), CMI0(53), CMI0(54), CMI0(55), + CMI0(56), CMI0(57), CMI0(58), CMI0(59), + CMI0(60), CMI0(61), CMI0(62), CMI0(63), +#endif /* BITS_PER_LONG == 64 */ +#endif +#if NR_CPUS > 64 + CMI64(64, Z64), +#endif +#if NR_CPUS > 128 + CMI64(128, Z64, Z64), CMI64(192, Z64, Z64, Z64), +#endif +#if NR_CPUS > 256 + CMI256(256, Z256), +#endif +#if NR_CPUS > 512 + CMI256(512, Z256, Z256), CMI256(768, Z256, Z256, Z256), +#endif +#if NR_CPUS > 1024 + CMI1024(1024, Z1024), +#endif +#if NR_CPUS > 2048 + CMI1024(2048, Z1024, Z1024), CMI1024(3072, Z1024, Z1024, Z1024), +#endif +#if NR_CPUS > 4096 +#error NR_CPUS too big. Fix initializers or set CONFIG_HAVE_CPUMASK_OF_CPU_MAP +#endif +}; + +const cpumask_t *cpumask_of_cpu_map = cpumask_map; +#endif /* !CONFIG_HAVE_CPUMASK_OF_CPU_MAP */ -- cgit v1.2.3 From 6524d938b3360504b43a1278b5a8403e85383d1a Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Thu, 24 Jul 2008 18:21:30 -0700 Subject: cpumask: put cpumask_of_cpu_map in the initdata section * Create the cpumask_of_cpu_map statically in the init data section using NR_CPUS but replace it during boot up with one sized by nr_cpu_ids (num possible cpus). Signed-off-by: Mike Travis Cc: Andrew Morton Cc: Jack Steiner Cc: Rusty Russell Signed-off-by: Ingo Molnar --- kernel/cpu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index fe31ff3d3809..9d4e1c28c053 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -462,7 +462,6 @@ out: #endif /* CONFIG_SMP */ -#ifndef CONFIG_HAVE_CPUMASK_OF_CPU_MAP /* 64 bits of zeros, for initializers. */ #if BITS_PER_LONG == 32 #define Z64 0, 0 @@ -509,7 +508,11 @@ out: /* We want this statically initialized, just to be safe. We try not * to waste too much space, either. */ -static const cpumask_t cpumask_map[] = { +static const cpumask_t cpumask_map[] +#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP +__initdata +#endif += { CMI0(0), CMI0(1), CMI0(2), CMI0(3), #if NR_CPUS > 4 CMI0(4), CMI0(5), CMI0(6), CMI0(7), @@ -569,4 +572,3 @@ static const cpumask_t cpumask_map[] = { }; const cpumask_t *cpumask_of_cpu_map = cpumask_map; -#endif /* !CONFIG_HAVE_CPUMASK_OF_CPU_MAP */ -- cgit v1.2.3 From 5a7a201c51c324876d00a54e7208af6af12d1ca4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 26 Jul 2008 16:50:47 +0200 Subject: cpumask: export cpumask_of_cpu_map fix: ERROR: "cpumask_of_cpu_map" [drivers/acpi/processor.ko] undefined! ERROR: "cpumask_of_cpu_map" [arch/x86/kernel/microcode.ko] undefined! ERROR: "cpumask_of_cpu_map" [arch/x86/kernel/cpu/cpufreq/speedstep-ich.ko] undefined! ERROR: "cpumask_of_cpu_map" [arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.ko] undefined! Signed-off-by: Ingo Molnar --- kernel/cpu.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index 9d4e1c28c053..a35d8995dc8c 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -572,3 +572,5 @@ __initdata }; const cpumask_t *cpumask_of_cpu_map = cpumask_map; + +EXPORT_SYMBOL_GPL(cpumask_of_cpu_map); -- cgit v1.2.3 From ffdb5976c47609c862917d4c186ecbb5706d2dda Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 28 Jul 2008 12:16:28 -0500 Subject: Simplify stop_machine stop_machine creates a kthread which creates kernel threads. We can create those threads directly and simplify things a little. Some care must be taken with CPU hotunplug, which has special needs, but that code seems more robust than it was in the past. Signed-off-by: Rusty Russell Acked-by: Christian Borntraeger --- kernel/cpu.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index 10ba5f1004a5..cf79bb911371 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -216,7 +216,6 @@ static int __ref take_cpu_down(void *_param) static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) { int err, nr_calls = 0; - struct task_struct *p; cpumask_t old_allowed, tmp; void *hcpu = (void *)(long)cpu; unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; @@ -250,19 +249,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) cpu_clear(cpu, tmp); set_cpus_allowed_ptr(current, &tmp); - p = __stop_machine_run(take_cpu_down, &tcd_param, cpu); + err = __stop_machine_run(take_cpu_down, &tcd_param, cpu); - if (IS_ERR(p) || cpu_online(cpu)) { + if (err || cpu_online(cpu)) { /* CPU didn't die: tell everyone. Can't complain. */ if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, hcpu) == NOTIFY_BAD) BUG(); - if (IS_ERR(p)) { - err = PTR_ERR(p); - goto out_allowed; - } - goto out_thread; + goto out_allowed; } /* Wait for it to sleep (leaving idle task). */ @@ -279,8 +274,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) check_for_tasks(cpu); -out_thread: - err = kthread_stop(p); out_allowed: set_cpus_allowed_ptr(current, &old_allowed); out_release: -- cgit v1.2.3 From 04321587584272f4e8b9818f319f40caf8eeee13 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 28 Jul 2008 12:16:29 -0500 Subject: Hotplug CPU: don't check cpu_online after take_cpu_down Akinobu points out that if take_cpu_down() succeeds, the cpu must be offline. Remove the cpu_online() check, and put a BUG_ON(). Quoting Akinobu Mita: Actually the cpu_online() check was necessary before appling this stop_machine: simplify patch. With old __stop_machine_run(), __stop_machine_run() could succeed (return !IS_ERR(p) value) even if take_cpu_down() returned non-zero value. The return value of take_cpu_down() was obtained through kthread_stop().. Signed-off-by: Rusty Russell Cc: "Akinobu Mita" --- kernel/cpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index cf79bb911371..53cf508f975a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -250,8 +250,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) set_cpus_allowed_ptr(current, &tmp); err = __stop_machine_run(take_cpu_down, &tcd_param, cpu); - - if (err || cpu_online(cpu)) { + if (err) { /* CPU didn't die: tell everyone. Can't complain. */ if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, hcpu) == NOTIFY_BAD) @@ -259,6 +258,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) goto out_allowed; } + BUG_ON(cpu_online(cpu)); /* Wait for it to sleep (leaving idle task). */ while (!idle_cpu(cpu)) -- cgit v1.2.3 From eeec4fad963490821348a331cca6102ae1c4a7a3 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 28 Jul 2008 12:16:30 -0500 Subject: stop_machine(): stop_machine_run() changed to use cpu mask Instead of a "cpu" arg with magic values NR_CPUS (any cpu) and ~0 (all cpus), pass a cpumask_t. Allow NULL for the common case (where we don't care which CPU the function is run on): temporary cpumask_t's are usually considered bad for stack space. This deprecates stop_machine_run, to be removed soon when all the callers are dead. Signed-off-by: Rusty Russell --- kernel/cpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index 53cf508f975a..29510d68338a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -248,8 +248,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) cpus_setall(tmp); cpu_clear(cpu, tmp); set_cpus_allowed_ptr(current, &tmp); + tmp = cpumask_of_cpu(cpu); - err = __stop_machine_run(take_cpu_down, &tcd_param, cpu); + err = __stop_machine(take_cpu_down, &tcd_param, &tmp); if (err) { /* CPU didn't die: tell everyone. Can't complain. */ if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, -- cgit v1.2.3 From e56b3bc7942982ac2589c942fb345e38bc7a341a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 28 Jul 2008 11:32:33 -0700 Subject: cpu masks: optimize and clean up cpumask_of_cpu() Clean up and optimize cpumask_of_cpu(), by sharing all the zero words. Instead of stupidly generating all possible i=0...NR_CPUS 2^i patterns creating a huge array of constant bitmasks, realize that the zero words can be shared. In other words, on a 64-bit architecture, we only ever need 64 of these arrays - with a different bit set in one single world (with enough zero words around it so that we can create any bitmask by just offsetting in that big array). And then we just put enough zeroes around it that we can point every single cpumask to be one of those things. So when we have 4k CPU's, instead of having 4k arrays (of 4k bits each, with one bit set in each array - 2MB memory total), we have exactly 64 arrays instead, each 8k bits in size (64kB total). And then we just point cpumask(n) to the right position (which we can calculate dynamically). Once we have the right arrays, getting "cpumask(n)" ends up being: static inline const cpumask_t *get_cpu_mask(unsigned int cpu) { const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG]; p -= cpu / BITS_PER_LONG; return (const cpumask_t *)p; } This brings other advantages and simplifications as well: - we are not wasting memory that is just filled with a single bit in various different places - we don't need all those games to re-create the arrays in some dense format, because they're already going to be dense enough. if we compile a kernel for up to 4k CPU's, "wasting" that 64kB of memory is a non-issue (especially since by doing this "overlapping" trick we probably get better cache behaviour anyway). [ mingo@elte.hu: Converted Linus's mails into a commit. See: http://lkml.org/lkml/2008/7/27/156 http://lkml.org/lkml/2008/7/28/320 Also applied a family filter - which also has the side-effect of leaving out the bits where Linus calls me an idio... Oh, never mind ;-) ] Signed-off-by: Ingo Molnar Cc: Rusty Russell Cc: Andrew Morton Cc: Al Viro Cc: Mike Travis Signed-off-by: Ingo Molnar --- kernel/cpu.c | 128 ++++++++++------------------------------------------------- 1 file changed, 20 insertions(+), 108 deletions(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index a35d8995dc8c..06a8358bb418 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -462,115 +462,27 @@ out: #endif /* CONFIG_SMP */ -/* 64 bits of zeros, for initializers. */ -#if BITS_PER_LONG == 32 -#define Z64 0, 0 -#else -#define Z64 0 -#endif +/* + * cpu_bit_bitmap[] is a special, "compressed" data structure that + * represents all NR_CPUS bits binary values of 1< 4 - CMI0(4), CMI0(5), CMI0(6), CMI0(7), -#endif -#if NR_CPUS > 8 - CMI0(8), CMI0(9), CMI0(10), CMI0(11), - CMI0(12), CMI0(13), CMI0(14), CMI0(15), -#endif -#if NR_CPUS > 16 - CMI0(16), CMI0(17), CMI0(18), CMI0(19), - CMI0(20), CMI0(21), CMI0(22), CMI0(23), - CMI0(24), CMI0(25), CMI0(26), CMI0(27), - CMI0(28), CMI0(29), CMI0(30), CMI0(31), -#endif -#if NR_CPUS > 32 -#if BITS_PER_LONG == 32 - CMI(32, 0), CMI(33, 0), CMI(34, 0), CMI(35, 0), - CMI(36, 0), CMI(37, 0), CMI(38, 0), CMI(39, 0), - CMI(40, 0), CMI(41, 0), CMI(42, 0), CMI(43, 0), - CMI(44, 0), CMI(45, 0), CMI(46, 0), CMI(47, 0), - CMI(48, 0), CMI(49, 0), CMI(50, 0), CMI(51, 0), - CMI(52, 0), CMI(53, 0), CMI(54, 0), CMI(55, 0), - CMI(56, 0), CMI(57, 0), CMI(58, 0), CMI(59, 0), - CMI(60, 0), CMI(61, 0), CMI(62, 0), CMI(63, 0), -#else - CMI0(32), CMI0(33), CMI0(34), CMI0(35), - CMI0(36), CMI0(37), CMI0(38), CMI0(39), - CMI0(40), CMI0(41), CMI0(42), CMI0(43), - CMI0(44), CMI0(45), CMI0(46), CMI0(47), - CMI0(48), CMI0(49), CMI0(50), CMI0(51), - CMI0(52), CMI0(53), CMI0(54), CMI0(55), - CMI0(56), CMI0(57), CMI0(58), CMI0(59), - CMI0(60), CMI0(61), CMI0(62), CMI0(63), -#endif /* BITS_PER_LONG == 64 */ -#endif -#if NR_CPUS > 64 - CMI64(64, Z64), -#endif -#if NR_CPUS > 128 - CMI64(128, Z64, Z64), CMI64(192, Z64, Z64, Z64), -#endif -#if NR_CPUS > 256 - CMI256(256, Z256), -#endif -#if NR_CPUS > 512 - CMI256(512, Z256, Z256), CMI256(768, Z256, Z256, Z256), -#endif -#if NR_CPUS > 1024 - CMI1024(1024, Z1024), -#endif -#if NR_CPUS > 2048 - CMI1024(2048, Z1024, Z1024), CMI1024(3072, Z1024, Z1024, Z1024), -#endif -#if NR_CPUS > 4096 -#error NR_CPUS too big. Fix initializers or set CONFIG_HAVE_CPUMASK_OF_CPU_MAP +const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = { + + MASK_DECLARE_8(0), MASK_DECLARE_8(8), + MASK_DECLARE_8(16), MASK_DECLARE_8(24), +#if BITS_PER_LONG > 32 + MASK_DECLARE_8(32), MASK_DECLARE_8(40), + MASK_DECLARE_8(48), MASK_DECLARE_8(56), #endif }; - -const cpumask_t *cpumask_of_cpu_map = cpumask_map; - -EXPORT_SYMBOL_GPL(cpumask_of_cpu_map); +EXPORT_SYMBOL_GPL(cpu_bit_bitmap); -- cgit v1.2.3 From 279ef6bbb8308488398c8f33b04c760148428378 Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Wed, 30 Jul 2008 12:34:04 +0200 Subject: sched, cpu hotplug: fix set_cpus_allowed() use in hotplug callbacks Mark Langsdorf reported: > One of my co-workers noticed that the powernow-k8 > driver no longer restarts when a CPU core is > hot-disabled and then hot-enabled on AMD quad-core > systems. > > The following comands work fine on 2.6.26 and fail > on 2.6.27-rc1: > > echo 0 > /sys/devices/system/cpu/cpu3/online > echo 1 > /sys/devices/system/cpu/cpu3/online > find /sys -name cpufreq > > For 2.6.26, the find will return a cpufreq > directory for each processor. In 2.6.27-rc1, > the cpu3 directory is missing. > > After digging through the code, the following > logic is failing when the core is hot-enabled > at runtime. The code works during the boot > sequence. > > cpumask_t = current->cpus_allowed; > set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); > if (smp_processor_id() != cpu) > return -ENODEV; So set the CPU active before calling the CPU_ONLINE notifier chain, there are a handful of notifiers that use set_cpus_allowed(). This fix also solves the problem with x86-microcode. I've sent alternative patches for microcode, but as this "rely on set_cpus_allowed_ptr() being workable in cpu-hotplug(CPU_ONLINE, ...)" assumption seems to be more broad than what we thought, perhaps this fix should be applied. With this patch we define that by the moment CPU_ONLINE is being sent, a 'cpu' is online and ready for tasks to be migrated onto it. Signed-off-by: Dmitry Adamushko Reported-by: Mark Langsdorf Tested-by: Mark Langsdorf Signed-off-by: Ingo Molnar --- kernel/cpu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index e202a68d1cc1..c977c339f559 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -349,6 +349,8 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) goto out_notify; BUG_ON(!cpu_online(cpu)); + cpu_set(cpu, cpu_active_map); + /* Now call notifier in preparation. */ raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu); @@ -383,9 +385,6 @@ int __cpuinit cpu_up(unsigned int cpu) err = _cpu_up(cpu, 0); - if (cpu_online(cpu)) - cpu_set(cpu, cpu_active_map); - out: cpu_maps_update_done(); return err; -- cgit v1.2.3 From 3ee1062b4ee82a56294808a065b64f4bc6781a56 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 12 Aug 2008 15:08:40 -0700 Subject: cpu hotplug: s390 doesn't support additional_cpus anymore. s390 doesn't support the additional_cpus kernel parameter anymore since a long time. So we better update the code and documentation to reflect that. Cc: Martin Schwidefsky Signed-off-by: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index c977c339f559..f17e9854c246 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -369,7 +369,7 @@ int __cpuinit cpu_up(unsigned int cpu) if (!cpu_isset(cpu, cpu_possible_map)) { printk(KERN_ERR "can't online cpu %d because it is not " "configured as may-hotadd at boot time\n", cpu); -#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) || defined(CONFIG_S390) +#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) printk(KERN_ERR "please check additional_cpus= boot " "parameter\n"); #endif -- cgit v1.2.3