From cf23422b9d76215316855253da491d4c9f294372 Mon Sep 17 00:00:00 2001 From: minskey guo Date: Mon, 24 May 2010 14:32:41 -0700 Subject: cpu/mem hotplug: enable CPUs online before local memory online Enable users to online CPUs even if the CPUs belongs to a numa node which doesn't have onlined local memory. The zonlists(pg_data_t.node_zonelists[]) of a numa node are created either in system boot/init period, or at the time of local memory online. For a numa node without onlined local memory, its zonelists are not initialized at present. As a result, any memory allocation operations executed by CPUs within this node will fail. In fact, an out-of-memory error is triggered when attempt to online CPUs before memory comes to online. This patch tries to create zonelists for such numa nodes, so that the memory allocation for this node can be fallback'ed to other nodes. [akpm@linux-foundation.org: remove unneeded export] [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: minskey guo Cc: Minchan Kim Cc: Yasunori Goto Cc: Andi Kleen Cc: Christoph Lameter Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpu.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index 545777574779..a3fbcc0a0abc 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -326,6 +326,12 @@ out_notify: int __cpuinit cpu_up(unsigned int cpu) { int err = 0; + +#ifdef CONFIG_MEMORY_HOTPLUG + int nid; + pg_data_t *pgdat; +#endif + if (!cpu_possible(cpu)) { printk(KERN_ERR "can't online cpu %d because it is not " "configured as may-hotadd at boot time\n", cpu); @@ -336,6 +342,25 @@ int __cpuinit cpu_up(unsigned int cpu) return -EINVAL; } +#ifdef CONFIG_MEMORY_HOTPLUG + nid = cpu_to_node(cpu); + if (!node_online(nid)) { + err = mem_online_node(nid); + if (err) + return err; + } + + pgdat = NODE_DATA(nid); + if (!pgdat) { + printk(KERN_ERR + "Can't online cpu %d due to NULL pgdat\n", cpu); + return -ENOMEM; + } + + if (pgdat->node_zonelists->_zonerefs->zone == NULL) + build_all_zonelists(); +#endif + cpu_maps_update_begin(); if (cpu_hotplug_disabled) { -- cgit v1.2.3 From 1f522509c77a5dea8dc384b735314f03908a6415 Mon Sep 17 00:00:00 2001 From: Haicheng Li Date: Mon, 24 May 2010 14:32:51 -0700 Subject: mem-hotplug: avoid multiple zones sharing same boot strapping boot_pageset For each new populated zone of hotadded node, need to update its pagesets with dynamically allocated per_cpu_pageset struct for all possible CPUs: 1) Detach zone->pageset from the shared boot_pageset at end of __build_all_zonelists(). 2) Use mutex to protect zone->pageset when it's still shared in onlined_pages() Otherwises, multiple zones of different nodes would share same boot strapping boot_pageset for same CPU, which will finally cause below kernel panic: ------------[ cut here ]------------ kernel BUG at mm/page_alloc.c:1239! invalid opcode: 0000 [#1] SMP ... Call Trace: [] __alloc_pages_nodemask+0x131/0x7b0 [] alloc_pages_current+0x87/0xd0 [] __page_cache_alloc+0x67/0x70 [] __do_page_cache_readahead+0x120/0x260 [] ra_submit+0x21/0x30 [] ondemand_readahead+0x166/0x2c0 [] page_cache_async_readahead+0x80/0xa0 [] generic_file_aio_read+0x364/0x670 [] nfs_file_read+0xca/0x130 [] do_sync_read+0xfa/0x140 [] vfs_read+0xb5/0x1a0 [] sys_read+0x51/0x80 [] system_call_fastpath+0x16/0x1b RIP [] get_page_from_freelist+0x883/0x900 RSP ---[ end trace 4bda28328b9990db ] [akpm@linux-foundation.org: merge fix] Signed-off-by: Haicheng Li Signed-off-by: Wu Fengguang Reviewed-by: Andi Kleen Reviewed-by: Christoph Lameter Cc: Mel Gorman Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index a3fbcc0a0abc..3e8b3ba27175 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -358,7 +358,7 @@ int __cpuinit cpu_up(unsigned int cpu) } if (pgdat->node_zonelists->_zonerefs->zone == NULL) - build_all_zonelists(); + build_all_zonelists(NULL); #endif cpu_maps_update_begin(); -- cgit v1.2.3 From 4eaf3f64397c3db3c5785eee508270d62a9fabd9 Mon Sep 17 00:00:00 2001 From: Haicheng Li Date: Mon, 24 May 2010 14:32:52 -0700 Subject: mem-hotplug: fix potential race while building zonelist for new populated zone Add global mutex zonelists_mutex to fix the possible race: CPU0 CPU1 CPU2 (1) zone->present_pages += online_pages; (2) build_all_zonelists(); (3) alloc_page(); (4) free_page(); (5) build_all_zonelists(); (6) __build_all_zonelists(); (7) zone->pageset = alloc_percpu(); In step (3,4), zone->pageset still points to boot_pageset, so bad things may happen if 2+ nodes are in this state. Even if only 1 node is accessing the boot_pageset, (3) may still consume too much memory to fail the memory allocations in step (7). Besides, atomic operation ensures alloc_percpu() in step (7) will never fail since there is a new fresh memory block added in step(6). [haicheng.li@linux.intel.com: hold zonelists_mutex when build_all_zonelists] Signed-off-by: Haicheng Li Signed-off-by: Wu Fengguang Reviewed-by: Andi Kleen Cc: Christoph Lameter Cc: Mel Gorman Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index 3e8b3ba27175..124ad9d6be16 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -357,8 +357,11 @@ int __cpuinit cpu_up(unsigned int cpu) return -ENOMEM; } - if (pgdat->node_zonelists->_zonerefs->zone == NULL) + if (pgdat->node_zonelists->_zonerefs->zone == NULL) { + mutex_lock(&zonelists_mutex); build_all_zonelists(NULL); + mutex_unlock(&zonelists_mutex); + } #endif cpu_maps_update_begin(); -- cgit v1.2.3 From e9fb7631ebcdc9467cbb736337546a42f7b7f28e Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 26 May 2010 14:43:28 -0700 Subject: cpu-hotplug: introduce cpu_notify(), __cpu_notify(), cpu_notify_nofail() No functional change. These are just wrappers of raw_cpu_notifier_call_chain. Signed-off-by: Akinobu Mita Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpu.c | 54 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 23 deletions(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index 124ad9d6be16..09207c772c25 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -134,6 +134,26 @@ int __ref register_cpu_notifier(struct notifier_block *nb) return ret; } +static int __cpu_notify(unsigned long val, void *v, int nr_to_call, + int *nr_calls) +{ + return __raw_notifier_call_chain(&cpu_chain, val, v, nr_to_call, + nr_calls); +} + +static int cpu_notify(unsigned long val, void *v) +{ + return __cpu_notify(val, v, -1, NULL); +} + +static void cpu_notify_nofail(unsigned long val, void *v) +{ + int err; + + err = cpu_notify(val, v); + BUG_ON(err == NOTIFY_BAD); +} + #ifdef CONFIG_HOTPLUG_CPU EXPORT_SYMBOL(register_cpu_notifier); @@ -181,8 +201,7 @@ static int __ref take_cpu_down(void *_param) if (err < 0) return err; - raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod, - param->hcpu); + cpu_notify(CPU_DYING | param->mod, param->hcpu); if (task_cpu(param->caller) == cpu) move_task_off_dead_cpu(cpu, param->caller); @@ -212,14 +231,12 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) cpu_hotplug_begin(); set_cpu_active(cpu, false); - err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, - hcpu, -1, &nr_calls); + err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); if (err == NOTIFY_BAD) { set_cpu_active(cpu, true); nr_calls--; - __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, - hcpu, nr_calls, NULL); + __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL); printk("%s: attempt to take down CPU %u failed\n", __func__, cpu); err = -EINVAL; @@ -230,9 +247,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) if (err) { set_cpu_active(cpu, true); /* CPU didn't die: tell everyone. Can't complain. */ - if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, - hcpu) == NOTIFY_BAD) - BUG(); + cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); goto out_release; } @@ -246,19 +261,14 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) __cpu_die(cpu); /* CPU is completely dead: tell everyone. Too late to complain. */ - if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD | mod, - hcpu) == NOTIFY_BAD) - BUG(); + cpu_notify_nofail(CPU_DEAD | mod, hcpu); check_for_tasks(cpu); out_release: cpu_hotplug_done(); - if (!err) { - if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod, - hcpu) == NOTIFY_BAD) - BUG(); - } + if (!err) + cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu); return err; } @@ -293,8 +303,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) return -EINVAL; cpu_hotplug_begin(); - ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu, - -1, &nr_calls); + ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); if (ret == NOTIFY_BAD) { nr_calls--; printk("%s: attempt to bring up CPU %u failed\n", @@ -312,12 +321,11 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) set_cpu_active(cpu, true); /* Now call notifier in preparation. */ - raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu); + cpu_notify(CPU_ONLINE | mod, hcpu); out_notify: if (ret != 0) - __raw_notifier_call_chain(&cpu_chain, - CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL); + __cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL); cpu_hotplug_done(); return ret; @@ -481,7 +489,7 @@ void __cpuinit notify_cpu_starting(unsigned int cpu) if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus)) val = CPU_STARTING_FROZEN; #endif /* CONFIG_PM_SLEEP_SMP */ - raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu); + cpu_notify(val, (void *)(long)cpu); } #endif /* CONFIG_SMP */ -- cgit v1.2.3 From e6bde73b07edeb703d4c89c1daabc09c303de11f Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 26 May 2010 14:43:29 -0700 Subject: cpu-hotplug: return better errno on cpu hotplug failure Currently, onlining or offlining a CPU failure by one of the cpu notifiers error always cause -EINVAL error. (i.e. writing 0 or 1 to /sys/devices/system/cpu/cpuX/online gets EINVAL) To get better error reporting rather than always getting -EINVAL, This changes cpu_notify() to return -errno value with notifier_to_errno() and fix the callers. Now that cpu notifiers can return encapsulate errno value. Currently, all cpu hotplug notifiers return NOTIFY_OK, NOTIFY_BAD, or NOTIFY_DONE. So cpu_notify() can returns 0 or -EPERM with this change for now. (notifier_to_errno(NOTIFY_OK) == 0, notifier_to_errno(NOTIFY_DONE) == 0, notifier_to_errno(NOTIFY_BAD) == -EPERM) Forthcoming patches convert several cpu notifiers to return encapsulate errno value with notifier_from_errno(). Signed-off-by: Akinobu Mita Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpu.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index 09207c772c25..0690ac27a253 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -137,8 +137,12 @@ int __ref register_cpu_notifier(struct notifier_block *nb) static int __cpu_notify(unsigned long val, void *v, int nr_to_call, int *nr_calls) { - return __raw_notifier_call_chain(&cpu_chain, val, v, nr_to_call, + int ret; + + ret = __raw_notifier_call_chain(&cpu_chain, val, v, nr_to_call, nr_calls); + + return notifier_to_errno(ret); } static int cpu_notify(unsigned long val, void *v) @@ -151,7 +155,7 @@ static void cpu_notify_nofail(unsigned long val, void *v) int err; err = cpu_notify(val, v); - BUG_ON(err == NOTIFY_BAD); + BUG_ON(err); } #ifdef CONFIG_HOTPLUG_CPU @@ -232,14 +236,13 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) cpu_hotplug_begin(); set_cpu_active(cpu, false); err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); - if (err == NOTIFY_BAD) { + if (err) { set_cpu_active(cpu, true); nr_calls--; __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL); printk("%s: attempt to take down CPU %u failed\n", __func__, cpu); - err = -EINVAL; goto out_release; } @@ -304,11 +307,10 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) cpu_hotplug_begin(); ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); - if (ret == NOTIFY_BAD) { + if (ret) { nr_calls--; printk("%s: attempt to bring up CPU %u failed\n", __func__, cpu); - ret = -EINVAL; goto out_notify; } -- cgit v1.2.3 From 79a6cdeb7eb54e3d2d4bb9fc5f0231b057882a87 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 26 May 2010 14:43:36 -0700 Subject: cpuhotplug: do not need cpu_hotplug_begin() when CONFIG_HOTPLUG_CPU=n Since when CONFIG_HOTPLUG_CPU=n, get_online_cpus() do nothing, so we don't need cpu_hotplug_begin() either. This patch moves cpu_hotplug_begin()/cpu_hotplug_done() into the code block of CONFIG_HOTPLUG_CPU=y. Signed-off-by: Lai Jiangshan Cc: Gautham R Shenoy Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpu.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index 0690ac27a253..63e8de13c948 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -20,6 +20,20 @@ /* Serializes the updates to cpu_online_mask, cpu_present_mask */ static DEFINE_MUTEX(cpu_add_remove_lock); +/* + * The following two API's must be used when attempting + * to serialize the updates to cpu_online_mask, cpu_present_mask. + */ +void cpu_maps_update_begin(void) +{ + mutex_lock(&cpu_add_remove_lock); +} + +void cpu_maps_update_done(void) +{ + mutex_unlock(&cpu_add_remove_lock); +} + static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain); /* If set, cpu_up and cpu_down will return -EBUSY and do nothing. @@ -27,6 +41,8 @@ static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain); */ static int cpu_hotplug_disabled; +#ifdef CONFIG_HOTPLUG_CPU + static struct { struct task_struct *active_writer; struct mutex lock; /* Synchronizes accesses to refcount, */ @@ -41,8 +57,6 @@ static struct { .refcount = 0, }; -#ifdef CONFIG_HOTPLUG_CPU - void get_online_cpus(void) { might_sleep(); @@ -67,22 +81,6 @@ void put_online_cpus(void) } EXPORT_SYMBOL_GPL(put_online_cpus); -#endif /* CONFIG_HOTPLUG_CPU */ - -/* - * The following two API's must be used when attempting - * to serialize the updates to cpu_online_mask, cpu_present_mask. - */ -void cpu_maps_update_begin(void) -{ - mutex_lock(&cpu_add_remove_lock); -} - -void cpu_maps_update_done(void) -{ - mutex_unlock(&cpu_add_remove_lock); -} - /* * This ensures that the hotplug operation can begin only when the * refcount goes to zero. @@ -124,6 +122,12 @@ static void cpu_hotplug_done(void) cpu_hotplug.active_writer = NULL; mutex_unlock(&cpu_hotplug.lock); } + +#else /* #if CONFIG_HOTPLUG_CPU */ +static void cpu_hotplug_begin(void) {} +static void cpu_hotplug_done(void) {} +#endif /* #esle #if CONFIG_HOTPLUG_CPU */ + /* Need to know about CPUs going up/down? */ int __ref register_cpu_notifier(struct notifier_block *nb) { -- cgit v1.2.3 From 00b9b0af5887fed54e899e3b7f5c2ccf5e739def Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 27 May 2010 10:32:08 -0700 Subject: Avoid warning when CPU hotplug isn't enabled Commit e9fb7631ebcd ("cpu-hotplug: introduce cpu_notify(), __cpu_notify(), cpu_notify_nofail()") also introduced this annoying warning: kernel/cpu.c:157: warning: 'cpu_notify_nofail' defined but not used when CONFIG_HOTPLUG_CPU wasn't set. So move that helper inside the #ifdef CONFIG_HOTPLUG_CPU region, and simplify it while at it. Signed-off-by: Linus Torvalds --- kernel/cpu.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index 63e8de13c948..3097382eb44a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -154,16 +154,13 @@ static int cpu_notify(unsigned long val, void *v) return __cpu_notify(val, v, -1, NULL); } +#ifdef CONFIG_HOTPLUG_CPU + static void cpu_notify_nofail(unsigned long val, void *v) { - int err; - - err = cpu_notify(val, v); - BUG_ON(err); + BUG_ON(cpu_notify(val, v)); } -#ifdef CONFIG_HOTPLUG_CPU - EXPORT_SYMBOL(register_cpu_notifier); void __ref unregister_cpu_notifier(struct notifier_block *nb) -- cgit v1.2.3 From e9a5f426b85e429bffaee4e0b086b1e742a39fa6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 27 May 2010 22:16:22 +0200 Subject: CPU: Avoid using unititialized error variable in disable_nonboot_cpus() If there's only one CPU online when disable_nonboot_cpus() is called, the error variable will not be initialized and that may lead to erroneous behavior. Fix this issue by initializing error in disable_nonboot_cpus() as appropriate. Signed-off-by: Rafael J. Wysocki Signed-off-by: Linus Torvalds --- kernel/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index 3097382eb44a..8b92539b4754 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -394,7 +394,7 @@ static cpumask_var_t frozen_cpus; int disable_nonboot_cpus(void) { - int cpu, first_cpu, error; + int cpu, first_cpu, error = 0; cpu_maps_update_begin(); first_cpu = cpumask_first(cpu_online_mask); -- cgit v1.2.3