From 02af61bb50f5d5f0322dbe5ab2a0d75808d25c7b Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Fri, 10 Apr 2009 14:26:18 +0800 Subject: tracing, kmemtrace: Separate include/trace/kmemtrace.h to kmemtrace part and tracepoint part Impact: refactor code for future changes Current kmemtrace.h is used both as header file of kmemtrace and kmem's tracepoints definition. Tracepoints' definition file may be used by other code, and should only have definition of tracepoint. We can separate include/trace/kmemtrace.h into 2 files: include/linux/kmemtrace.h: header file for kmemtrace include/trace/kmem.h: definition of kmem tracepoints Signed-off-by: Zhao Lei Acked-by: Eduard - Gabriel Munteanu Acked-by: Pekka Enberg Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Tom Zanussi LKML-Reference: <49DEE68A.5040902@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- init/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'init/main.c') diff --git a/init/main.c b/init/main.c index 3585f073d636..eece40cd8a64 100644 --- a/init/main.c +++ b/init/main.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include @@ -71,7 +72,6 @@ #include #include #include -#include #ifdef CONFIG_X86_LOCAL_APIC #include -- cgit v1.2.3 From 3c7b4e6b8be4c16f1e6e5c558e33b7ff0db2dfaf Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 11 Jun 2009 13:22:39 +0100 Subject: kmemleak: Add the base support This patch adds the base support for the kernel memory leak detector. It traces the memory allocation/freeing in a way similar to the Boehm's conservative garbage collector, the difference being that the unreferenced objects are not freed but only shown in /sys/kernel/debug/kmemleak. Enabling this feature introduces an overhead to memory allocations. Signed-off-by: Catalin Marinas Cc: Ingo Molnar Acked-by: Pekka Enberg Cc: Andrew Morton Reviewed-by: Paul E. McKenney --- init/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'init/main.c') diff --git a/init/main.c b/init/main.c index bb7dc57eee36..9d759d68d7a0 100644 --- a/init/main.c +++ b/init/main.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -603,6 +604,7 @@ asmlinkage void __init start_kernel(void) /* init some links before init_ISA_irqs() */ early_irq_init(); init_IRQ(); + prio_tree_init(); pidhash_init(); init_timers(); hrtimers_init(); @@ -654,6 +656,7 @@ asmlinkage void __init start_kernel(void) cpu_hotplug_init(); kmem_cache_init(); kmemtrace_init(); + kmemleak_init(); debug_objects_mem_init(); idr_init_cache(); setup_per_cpu_pageset(); @@ -663,7 +666,6 @@ asmlinkage void __init start_kernel(void) calibrate_delay(); pidmap_init(); pgtable_cache_init(); - prio_tree_init(); anon_vma_init(); #ifdef CONFIG_X86 if (efi_enabled) -- cgit v1.2.3 From 83b519e8b9572c319c8e0c615ee5dd7272856090 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 10 Jun 2009 19:40:04 +0300 Subject: slab: setup allocators earlier in the boot sequence This patch makes kmalloc() available earlier in the boot sequence so we can get rid of some bootmem allocations. The bulk of the changes are due to kmem_cache_init() being called with interrupts disabled which requires some changes to allocator boostrap code. Note: 32-bit x86 does WP protect test in mem_init() so we must setup traps before we call mem_init() during boot as reported by Ingo Molnar: We have a hard crash in the WP-protect code: [ 0.000000] Checking if this processor honours the WP bit even in supervisor mode...BUG: Int 14: CR2 ffcff000 [ 0.000000] EDI 00000188 ESI 00000ac7 EBP c17eaf9c ESP c17eaf8c [ 0.000000] EBX 000014e0 EDX 0000000e ECX 01856067 EAX 00000001 [ 0.000000] err 00000003 EIP c10135b1 CS 00000060 flg 00010002 [ 0.000000] Stack: c17eafa8 c17fd410 c16747bc c17eafc4 c17fd7e5 000011fd f8616000 c18237cc [ 0.000000] 00099800 c17bb000 c17eafec c17f1668 000001c5 c17f1322 c166e039 c1822bf0 [ 0.000000] c166e033 c153a014 c18237cc 00020800 c17eaff8 c17f106a 00020800 01ba5003 [ 0.000000] Pid: 0, comm: swapper Not tainted 2.6.30-tip-02161-g7a74539-dirty #52203 [ 0.000000] Call Trace: [ 0.000000] [] ? printk+0x14/0x16 [ 0.000000] [] ? do_test_wp_bit+0x19/0x23 [ 0.000000] [] ? test_wp_bit+0x26/0x64 [ 0.000000] [] ? mem_init+0x1ba/0x1d8 [ 0.000000] [] ? start_kernel+0x164/0x2f7 [ 0.000000] [] ? unknown_bootoption+0x0/0x19c [ 0.000000] [] ? __init_begin+0x6a/0x6f Acked-by: Johannes Weiner Acked-by Linus Torvalds Cc: Christoph Lameter Cc: Ingo Molnar Cc: Matt Mackall Cc: Nick Piggin Cc: Yinghai Lu Signed-off-by: Pekka Enberg --- init/main.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'init/main.c') diff --git a/init/main.c b/init/main.c index bb7dc57eee36..0ab82a453de5 100644 --- a/init/main.c +++ b/init/main.c @@ -574,6 +574,28 @@ asmlinkage void __init start_kernel(void) setup_nr_cpu_ids(); smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ + build_all_zonelists(); + page_alloc_init(); + + printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line); + parse_early_param(); + parse_args("Booting kernel", static_command_line, __start___param, + __stop___param - __start___param, + &unknown_bootoption); + /* + * These use large bootmem allocations and must precede + * kmem_cache_init() + */ + pidhash_init(); + vmalloc_init(); + vfs_caches_init_early(); + sort_main_extable(); + trap_init(); + /* + * Set up kernel memory allocators + */ + mem_init(); + kmem_cache_init(); /* * Set up the scheduler prior starting any interrupts (such as the * timer interrupt). Full topology setup happens at smp_init() @@ -585,25 +607,15 @@ asmlinkage void __init start_kernel(void) * fragile until we cpu_idle() for the first time. */ preempt_disable(); - build_all_zonelists(); - page_alloc_init(); - printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line); - parse_early_param(); - parse_args("Booting kernel", static_command_line, __start___param, - __stop___param - __start___param, - &unknown_bootoption); if (!irqs_disabled()) { printk(KERN_WARNING "start_kernel(): bug: interrupts were " "enabled *very* early, fixing it\n"); local_irq_disable(); } - sort_main_extable(); - trap_init(); rcu_init(); /* init some links before init_ISA_irqs() */ early_irq_init(); init_IRQ(); - pidhash_init(); init_timers(); hrtimers_init(); softirq_init(); @@ -645,14 +657,10 @@ asmlinkage void __init start_kernel(void) initrd_start = 0; } #endif - vmalloc_init(); - vfs_caches_init_early(); cpuset_init_early(); page_cgroup_init(); - mem_init(); enable_debug_pagealloc(); cpu_hotplug_init(); - kmem_cache_init(); kmemtrace_init(); debug_objects_mem_init(); idr_init_cache(); -- cgit v1.2.3 From 43ebdac42f16037263b52a5aeedcd1bfa4a9bb29 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Mon, 25 May 2009 15:01:35 +0300 Subject: vmalloc: use kzalloc() instead of alloc_bootmem() We can call vmalloc_init() after kmem_cache_init() and use kzalloc() instead of the bootmem allocator when initializing vmalloc data structures. Acked-by: Johannes Weiner Acked-by: Linus Torvalds Acked-by: Nick Piggin Cc: Ingo Molnar Cc: Yinghai Lu Signed-off-by: Pekka Enberg --- init/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'init/main.c') diff --git a/init/main.c b/init/main.c index 0ab82a453de5..6d38f9607d14 100644 --- a/init/main.c +++ b/init/main.c @@ -587,7 +587,6 @@ asmlinkage void __init start_kernel(void) * kmem_cache_init() */ pidhash_init(); - vmalloc_init(); vfs_caches_init_early(); sort_main_extable(); trap_init(); @@ -596,6 +595,7 @@ asmlinkage void __init start_kernel(void) */ mem_init(); kmem_cache_init(); + vmalloc_init(); /* * Set up the scheduler prior starting any interrupts (such as the * timer interrupt). Full topology setup happens at smp_init() -- cgit v1.2.3 From 444f478f65c7ca4606f9965b31feed13fe2bc9fa Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 11 Jun 2009 18:29:06 +0300 Subject: init: introduce mm_init() As suggested by Christoph Lameter, introduce mm_init() now that we initialize all the kernel memory allocations together. Cc: Christoph Lameter Signed-off-by: Pekka Enberg --- init/main.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'init/main.c') diff --git a/init/main.c b/init/main.c index 6d38f9607d14..7917695bf71e 100644 --- a/init/main.c +++ b/init/main.c @@ -533,6 +533,16 @@ void __init __weak thread_info_cache_init(void) { } +/* + * Set up kernel memory allocators + */ +static void __init mm_init(void) +{ + mem_init(); + kmem_cache_init(); + vmalloc_init(); +} + asmlinkage void __init start_kernel(void) { char * command_line; @@ -590,12 +600,7 @@ asmlinkage void __init start_kernel(void) vfs_caches_init_early(); sort_main_extable(); trap_init(); - /* - * Set up kernel memory allocators - */ - mem_init(); - kmem_cache_init(); - vmalloc_init(); + mm_init(); /* * Set up the scheduler prior starting any interrupts (such as the * timer interrupt). Full topology setup happens at smp_init() -- cgit v1.2.3 From ca371c0d7e23d0d0afae65fc83a0e91cf7399573 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Fri, 12 Jun 2009 10:33:53 +0300 Subject: memcg: fix page_cgroup fatal error in FLATMEM Now, SLAB is configured in very early stage and it can be used in init routine now. But replacing alloc_bootmem() in FLAT/DISCONTIGMEM's page_cgroup() initialization breaks the allocation, now. (Works well in SPARSEMEM case...it supports MEMORY_HOTPLUG and size of page_cgroup is in reasonable size (< 1 << MAX_ORDER.) This patch revive FLATMEM+memory cgroup by using alloc_bootmem. In future, We stop to support FLATMEM (if no users) or rewrite codes for flatmem completely.But this will adds more messy codes and overheads. Reported-by: Li Zefan Tested-by: Li Zefan Tested-by: Ingo Molnar Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Pekka Enberg --- init/main.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'init/main.c') diff --git a/init/main.c b/init/main.c index 5616661eac01..b3e8f14c568a 100644 --- a/init/main.c +++ b/init/main.c @@ -539,6 +539,11 @@ void __init __weak thread_info_cache_init(void) */ static void __init mm_init(void) { + /* + * page_cgroup requires countinous pages as memmap + * and it's bigger than MAX_ORDER unless SPARSEMEM. + */ + page_cgroup_init_flatmem(); mem_init(); kmem_cache_init(); vmalloc_init(); -- cgit v1.2.3 From 7e85ee0c1d15ca5f8bff0f514f158eba1742dd87 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 12 Jun 2009 14:03:06 +0300 Subject: slab,slub: don't enable interrupts during early boot As explained by Benjamin Herrenschmidt: Oh and btw, your patch alone doesn't fix powerpc, because it's missing a whole bunch of GFP_KERNEL's in the arch code... You would have to grep the entire kernel for things that check slab_is_available() and even then you'll be missing some. For example, slab_is_available() didn't always exist, and so in the early days on powerpc, we used a mem_init_done global that is set form mem_init() (not perfect but works in practice). And we still have code using that to do the test. Therefore, mask out __GFP_WAIT, __GFP_IO, and __GFP_FS in the slab allocators in early boot code to avoid enabling interrupts. Signed-off-by: Pekka Enberg --- init/main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'init/main.c') diff --git a/init/main.c b/init/main.c index b3e8f14c568a..f6204f712e7c 100644 --- a/init/main.c +++ b/init/main.c @@ -640,6 +640,7 @@ asmlinkage void __init start_kernel(void) "enabled early\n"); early_boot_irqs_on(); local_irq_enable(); + kmem_cache_init_late(); /* * HACK ALERT! This is early. We're enabling the console before -- cgit v1.2.3 From 4a7a16dc061e4c57bf288150f51bd4c2ace33723 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 12 Jun 2009 20:42:08 -0400 Subject: ACPI: move declaration acpi_early_init() to acpi.h Signed-off-by: Len Brown --- init/main.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'init/main.c') diff --git a/init/main.c b/init/main.c index d721dad05dd7..f1b9f0fdb1b4 100644 --- a/init/main.c +++ b/init/main.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -86,11 +87,6 @@ extern void sbus_init(void); extern void prio_tree_init(void); extern void radix_tree_init(void); extern void free_initmem(void); -#ifdef CONFIG_ACPI -extern void acpi_early_init(void); -#else -static inline void acpi_early_init(void) { } -#endif #ifndef CONFIG_DEBUG_RODATA static inline void mark_rodata_ro(void) { } #endif -- cgit v1.2.3 From dfec072ecd35ba6ecad2d51dde325253ac9a2936 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 4 Apr 2008 00:51:41 +0200 Subject: kmemcheck: add the kmemcheck core General description: kmemcheck is a patch to the linux kernel that detects use of uninitialized memory. It does this by trapping every read and write to memory that was allocated dynamically (e.g. using kmalloc()). If a memory address is read that has not previously been written to, a message is printed to the kernel log. Thanks to Andi Kleen for the set_memory_4k() solution. Andrew Morton suggested documenting the shadow member of struct page. Signed-off-by: Vegard Nossum Signed-off-by: Pekka Enberg [export kmemcheck_mark_initialized] [build fix for setup_max_cpus] Signed-off-by: Ingo Molnar [rebased for mainline inclusion] Signed-off-by: Vegard Nossum --- init/main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'init/main.c') diff --git a/init/main.c b/init/main.c index 5616661eac01..e3c335e47cd2 100644 --- a/init/main.c +++ b/init/main.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 58568d2a8215cb6f55caf2332017d7bdff954e1c Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 16 Jun 2009 15:31:49 -0700 Subject: cpuset,mm: update tasks' mems_allowed in time Fix allocating page cache/slab object on the unallowed node when memory spread is set by updating tasks' mems_allowed after its cpuset's mems is changed. In order to update tasks' mems_allowed in time, we must modify the code of memory policy. Because the memory policy is applied in the process's context originally. After applying this patch, one task directly manipulates anothers mems_allowed, and we use alloc_lock in the task_struct to protect mems_allowed and memory policy of the task. But in the fast path, we didn't use lock to protect them, because adding a lock may lead to performance regression. But if we don't add a lock,the task might see no nodes when changing cpuset's mems_allowed to some non-overlapping set. In order to avoid it, we set all new allowed nodes, then clear newly disallowed ones. [lee.schermerhorn@hp.com: The rework of mpol_new() to extract the adjusting of the node mask to apply cpuset and mpol flags "context" breaks set_mempolicy() and mbind() with MPOL_PREFERRED and a NULL nodemask--i.e., explicit local allocation. Fix this by adding the check for MPOL_PREFERRED and empty node mask to mpol_new_mpolicy(). Remove the now unneeded 'nodes = NULL' from mpol_new(). Note that mpol_new_mempolicy() is always called with a non-NULL 'nodes' parameter now that it has been removed from mpol_new(). Therefore, we don't need to test nodes for NULL before testing it for 'empty'. However, just to be extra paranoid, add a VM_BUG_ON() to verify this assumption.] [lee.schermerhorn@hp.com: I don't think the function name 'mpol_new_mempolicy' is descriptive enough to differentiate it from mpol_new(). This function applies cpuset set context, usually constraining nodes to those allowed by the cpuset. However, when the 'RELATIVE_NODES flag is set, it also translates the nodes. So I settled on 'mpol_set_nodemask()', because the comment block for mpol_new() mentions that we need to call this function to "set nodes". Some additional minor line length, whitespace and typo cleanup.] Signed-off-by: Miao Xie Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Christoph Lameter Cc: Paul Menage Cc: Nick Piggin Cc: Yasunori Goto Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Lee Schermerhorn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'init/main.c') diff --git a/init/main.c b/init/main.c index f6204f712e7c..5e0d3f047eaf 100644 --- a/init/main.c +++ b/init/main.c @@ -670,7 +670,6 @@ asmlinkage void __init start_kernel(void) initrd_start = 0; } #endif - cpuset_init_early(); page_cgroup_init(); enable_debug_pagealloc(); cpu_hotplug_init(); @@ -867,6 +866,11 @@ static noinline int init_post(void) static int __init kernel_init(void * unused) { lock_kernel(); + + /* + * init can allocate pages on any node + */ + set_mems_allowed(node_possible_map); /* * init can run on any cpu. */ -- cgit v1.2.3 From c868d550115b9ccc0027c67265b9520790f05601 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 17 Jun 2009 13:48:39 +1000 Subject: mm: Move pgtable_cache_init() earlier Some architectures need to initialize SLAB caches to be able to allocate page tables. They do that from pgtable_cache_init() so the later should be called earlier now, best is before vmalloc_init(). Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- init/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'init/main.c') diff --git a/init/main.c b/init/main.c index 7756ddad3c85..0e7aedeaa05f 100644 --- a/init/main.c +++ b/init/main.c @@ -547,6 +547,7 @@ static void __init mm_init(void) page_cgroup_init_flatmem(); mem_init(); kmem_cache_init(); + pgtable_cache_init(); vmalloc_init(); } @@ -684,7 +685,6 @@ asmlinkage void __init start_kernel(void) late_time_init(); calibrate_delay(); pidmap_init(); - pgtable_cache_init(); anon_vma_init(); #ifdef CONFIG_X86 if (efi_enabled) -- cgit v1.2.3 From b99b87f70c7785ab1e253c6220f4b0b57ce3a7f7 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Wed, 17 Jun 2009 16:28:03 -0700 Subject: kernel: constructor support Call constructors (gcc-generated initcall-like functions) during kernel start and module load. Constructors are e.g. used for gcov data initialization. Disable constructor support for usermode Linux to prevent conflicts with host glibc. Signed-off-by: Peter Oberparleiter Acked-by: Rusty Russell Acked-by: WANG Cong Cc: Sam Ravnborg Cc: Jeff Dike Cc: Andi Kleen Cc: Huang Ying Cc: Li Wei Cc: Michael Ellerman Cc: Ingo Molnar Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/main.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'init/main.c') diff --git a/init/main.c b/init/main.c index 0e7aedeaa05f..1a65fdd06318 100644 --- a/init/main.c +++ b/init/main.c @@ -720,6 +720,17 @@ asmlinkage void __init start_kernel(void) rest_init(); } +/* Call all constructor functions linked into the kernel. */ +static void __init do_ctors(void) +{ +#ifdef CONFIG_CONSTRUCTORS + ctor_fn_t *call = (ctor_fn_t *) __ctors_start; + + for (; call < (ctor_fn_t *) __ctors_end; call++) + (*call)(); +#endif +} + int initcall_debug; core_param(initcall_debug, initcall_debug, bool, 0644); @@ -800,6 +811,7 @@ static void __init do_basic_setup(void) usermodehelper_init(); driver_init(); init_irq_proc(); + do_ctors(); do_initcalls(); } -- cgit v1.2.3 From dcce284a259373f9e5570f2e33f79eca84fcf565 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 18 Jun 2009 13:24:12 +1000 Subject: mm: Extend gfp masking to the page allocator The page allocator also needs the masking of gfp flags during boot, so this moves it out of slab/slub and uses it with the page allocator as well. Signed-off-by: Benjamin Herrenschmidt Acked-by: Pekka Enberg Signed-off-by: Linus Torvalds --- init/main.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'init/main.c') diff --git a/init/main.c b/init/main.c index 1a65fdd06318..09131ec090c1 100644 --- a/init/main.c +++ b/init/main.c @@ -642,6 +642,10 @@ asmlinkage void __init start_kernel(void) "enabled early\n"); early_boot_irqs_on(); local_irq_enable(); + + /* Interrupts are enabled now so all GFP allocations are safe. */ + set_gfp_allowed_mask(__GFP_BITS_MASK); + kmem_cache_init_late(); /* -- cgit v1.2.3 From 31950eb66ff47c946fd9c65c2f8c94b6b7ba13fc Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 22 Jun 2009 21:18:12 -0700 Subject: mm/init: cpu_hotplug_init() must be initialized before SLAB SLAB uses get/put_online_cpus() which use a mutex which is itself only initialized when cpu_hotplug_init() is called. Currently we hang suring boot in SLAB due to doing that too late. Reported by James Bottomley and Sachin Sant (and possibly others). Debugged by Benjamin Herrenschmidt. This just removes the dynamic initialization of the data structures, and replaces it with a static one, avoiding this dependency entirely, and removing one unnecessary special initcall. Tested-by: Sachin Sant Tested-by: James Bottomley Tested-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- init/main.c | 1 - 1 file changed, 1 deletion(-) (limited to 'init/main.c') diff --git a/init/main.c b/init/main.c index 09131ec090c1..4870dfeb9ee5 100644 --- a/init/main.c +++ b/init/main.c @@ -678,7 +678,6 @@ asmlinkage void __init start_kernel(void) #endif page_cgroup_init(); enable_debug_pagealloc(); - cpu_hotplug_init(); kmemtrace_init(); kmemleak_init(); debug_objects_mem_init(); -- cgit v1.2.3