From a7adb91b13c104e5ad950fbe1795aa2722f2ea0a Mon Sep 17 00:00:00 2001 From: Kristen Carlson Accardi Date: Tue, 22 Sep 2015 10:51:36 -0700 Subject: x86/cpufeatures: Correct spelling of the HWP_NOTIFY flag Because noitification just isn't right. Signed-off-by: Kristen Carlson Accardi Acked-by: Rafael J. Wysocki Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Cc: rjw@rjwysocki.net Link: http://lkml.kernel.org/r/1442944296-11737-1-git-send-email-kristen@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/scattered.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 3d423a101fae..608fb26c7254 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -37,7 +37,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, { X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 }, { X86_FEATURE_HWP, CR_EAX, 7, 0x00000006, 0 }, - { X86_FEATURE_HWP_NOITFY, CR_EAX, 8, 0x00000006, 0 }, + { X86_FEATURE_HWP_NOTIFY, CR_EAX, 8, 0x00000006, 0 }, { X86_FEATURE_HWP_ACT_WINDOW, CR_EAX, 9, 0x00000006, 0 }, { X86_FEATURE_HWP_EPP, CR_EAX,10, 0x00000006, 0 }, { X86_FEATURE_HWP_PKG_REQ, CR_EAX,11, 0x00000006, 0 }, -- cgit v1.2.3 From 1e034743e918d195d339af340ae933727c072bce Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 23 Sep 2015 12:02:57 +0200 Subject: x86/hyperv: Fix the build in the !CONFIG_KEXEC_CORE case Recent changes in the Hyper-V driver: b4370df2b1f5 ("Drivers: hv: vmbus: add special crash handler") broke the build when CONFIG_KEXEC_CORE is not set: arch/x86/built-in.o: In function `hv_machine_crash_shutdown': arch/x86/kernel/cpu/mshyperv.c:112: undefined reference to `native_machine_crash_shutdown' Decorate all kexec related code with #ifdef CONFIG_KEXEC_CORE. Reported-by: Jim Davis Reported-by: Stephen Hemminger Signed-off-by: Vitaly Kuznetsov Signed-off-by: Thomas Gleixner Cc: devel@linuxdriverproject.org Cc: K. Y. Srinivasan Cc: Haiyang Zhang Cc: Greg Kroah-Hartman Cc: Peter Zijlstra Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1443002577-25370-1-git-send-email-vkuznets@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mshyperv.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 381c8b9b3a33..20e242ea1bc4 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -34,11 +34,10 @@ struct ms_hyperv_info ms_hyperv; EXPORT_SYMBOL_GPL(ms_hyperv); -static void (*hv_kexec_handler)(void); -static void (*hv_crash_handler)(struct pt_regs *regs); - #if IS_ENABLED(CONFIG_HYPERV) static void (*vmbus_handler)(void); +static void (*hv_kexec_handler)(void); +static void (*hv_crash_handler)(struct pt_regs *regs); void hyperv_vector_handler(struct pt_regs *regs) { @@ -96,8 +95,8 @@ void hv_remove_crash_handler(void) hv_crash_handler = NULL; } EXPORT_SYMBOL_GPL(hv_remove_crash_handler); -#endif +#ifdef CONFIG_KEXEC_CORE static void hv_machine_shutdown(void) { if (kexec_in_progress && hv_kexec_handler) @@ -111,7 +110,8 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs) hv_crash_handler(regs); native_machine_crash_shutdown(regs); } - +#endif /* CONFIG_KEXEC_CORE */ +#endif /* CONFIG_HYPERV */ static uint32_t __init ms_hyperv_platform(void) { @@ -186,8 +186,10 @@ static void __init ms_hyperv_init_platform(void) no_timer_check = 1; #endif +#if IS_ENABLED(CONFIG_HYPERV) && defined(CONFIG_KEXEC_CORE) machine_ops.shutdown = hv_machine_shutdown; machine_ops.crash_shutdown = hv_machine_crash_shutdown; +#endif mark_tsc_unstable("running on Hyper-V"); } -- cgit v1.2.3 From eddd3826a1a0190e5235703d1e666affa4d13b96 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Sep 2015 08:38:22 +0000 Subject: x86/process: Add proper bound checks in 64bit get_wchan() Dmitry Vyukov reported the following using trinity and the memory error detector AddressSanitizer (https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel). [ 124.575597] ERROR: AddressSanitizer: heap-buffer-overflow on address ffff88002e280000 [ 124.576801] ffff88002e280000 is located 131938492886538 bytes to the left of 28857600-byte region [ffffffff81282e0a, ffffffff82e0830a) [ 124.578633] Accessed by thread T10915: [ 124.579295] inlined in describe_heap_address ./arch/x86/mm/asan/report.c:164 [ 124.579295] #0 ffffffff810dd277 in asan_report_error ./arch/x86/mm/asan/report.c:278 [ 124.580137] #1 ffffffff810dc6a0 in asan_check_region ./arch/x86/mm/asan/asan.c:37 [ 124.581050] #2 ffffffff810dd423 in __tsan_read8 ??:0 [ 124.581893] #3 ffffffff8107c093 in get_wchan ./arch/x86/kernel/process_64.c:444 The address checks in the 64bit implementation of get_wchan() are wrong in several ways: - The lower bound of the stack is not the start of the stack page. It's the start of the stack page plus sizeof (struct thread_info) - The upper bound must be: top_of_stack - TOP_OF_KERNEL_STACK_PADDING - 2 * sizeof(unsigned long). The 2 * sizeof(unsigned long) is required because the stack pointer points at the frame pointer. The layout on the stack is: ... IP FP ... IP FP. So we need to make sure that both IP and FP are in the bounds. Fix the bound checks and get rid of the mix of numeric constants, u64 and unsigned long. Making all unsigned long allows us to use the same function for 32bit as well. Use READ_ONCE() when accessing the stack. This does not prevent a concurrent wakeup of the task and the stack changing, but at least it avoids TOCTOU. Also check task state at the end of the loop. Again that does not prevent concurrent changes, but it avoids walking for nothing. Add proper comments while at it. Reported-by: Dmitry Vyukov Reported-by: Sasha Levin Based-on-patch-from: Wolfram Gloger Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Andrey Konovalov Cc: Kostya Serebryany Cc: Alexander Potapenko Cc: kasan-dev Cc: Denys Vlasenko Cc: Andi Kleen Cc: Wolfram Gloger Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20150930083302.694788319@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 52 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3c1bbcf12924..f1fd0889e8af 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -499,27 +499,59 @@ void set_personality_ia32(bool x32) } EXPORT_SYMBOL_GPL(set_personality_ia32); +/* + * Called from fs/proc with a reference on @p to find the function + * which called into schedule(). This needs to be done carefully + * because the task might wake up and we might look at a stack + * changing under us. + */ unsigned long get_wchan(struct task_struct *p) { - unsigned long stack; - u64 fp, ip; + unsigned long start, bottom, top, sp, fp, ip; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; - stack = (unsigned long)task_stack_page(p); - if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE) + + start = (unsigned long)task_stack_page(p); + if (!start) + return 0; + + /* + * Layout of the stack page: + * + * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long) + * PADDING + * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING + * stack + * ----------- bottom = start + sizeof(thread_info) + * thread_info + * ----------- start + * + * The tasks stack pointer points at the location where the + * framepointer is stored. The data on the stack is: + * ... IP FP ... IP FP + * + * We need to read FP and IP, so we need to adjust the upper + * bound by another unsigned long. + */ + top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; + top -= 2 * sizeof(unsigned long); + bottom = start + sizeof(struct thread_info); + + sp = READ_ONCE(p->thread.sp); + if (sp < bottom || sp > top) return 0; - fp = *(u64 *)(p->thread.sp); + + fp = READ_ONCE(*(unsigned long *)sp); do { - if (fp < (unsigned long)stack || - fp >= (unsigned long)stack+THREAD_SIZE) + if (fp < bottom || fp > top) return 0; - ip = *(u64 *)(fp+8); + ip = READ_ONCE(*(unsigned long *)(fp + sizeof(unsigned long))); if (!in_sched_functions(ip)) return ip; - fp = *(u64 *)fp; - } while (count++ < 16); + fp = READ_ONCE(*(unsigned long *)fp); + } while (count++ < 16 && p->state != TASK_RUNNING); return 0; } -- cgit v1.2.3 From 7ba78053aacb89998a052843e3c56983c31d57f0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Sep 2015 08:38:23 +0000 Subject: x86/process: Unify 32bit and 64bit implementations of get_wchan() The stack layout and the functionality is identical. Use the 64bit version for all of x86. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Andrey Konovalov Cc: Kostya Serebryany Cc: Alexander Potapenko Cc: kasan-dev Cc: Denys Vlasenko Cc: Andi Kleen Cc: Sasha Levin Cc: Wolfram Gloger Link: http://lkml.kernel.org/r/20150930083302.779694618@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 55 +++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/process_32.c | 28 ---------------------- arch/x86/kernel/process_64.c | 56 -------------------------------------------- 3 files changed, 55 insertions(+), 84 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6d0e62ae8516..39e585a554b7 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -506,3 +506,58 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) return randomize_range(mm->brk, range_end, 0) ? : mm->brk; } +/* + * Called from fs/proc with a reference on @p to find the function + * which called into schedule(). This needs to be done carefully + * because the task might wake up and we might look at a stack + * changing under us. + */ +unsigned long get_wchan(struct task_struct *p) +{ + unsigned long start, bottom, top, sp, fp, ip; + int count = 0; + + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + + start = (unsigned long)task_stack_page(p); + if (!start) + return 0; + + /* + * Layout of the stack page: + * + * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long) + * PADDING + * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING + * stack + * ----------- bottom = start + sizeof(thread_info) + * thread_info + * ----------- start + * + * The tasks stack pointer points at the location where the + * framepointer is stored. The data on the stack is: + * ... IP FP ... IP FP + * + * We need to read FP and IP, so we need to adjust the upper + * bound by another unsigned long. + */ + top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; + top -= 2 * sizeof(unsigned long); + bottom = start + sizeof(struct thread_info); + + sp = READ_ONCE(p->thread.sp); + if (sp < bottom || sp > top) + return 0; + + fp = READ_ONCE(*(unsigned long *)sp); + do { + if (fp < bottom || fp > top) + return 0; + ip = READ_ONCE(*(unsigned long *)(fp + sizeof(unsigned long))); + if (!in_sched_functions(ip)) + return ip; + fp = READ_ONCE(*(unsigned long *)fp); + } while (count++ < 16 && p->state != TASK_RUNNING); + return 0; +} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index c13df2c735f8..737527b40e5b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -324,31 +324,3 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) return prev_p; } - -#define top_esp (THREAD_SIZE - sizeof(unsigned long)) -#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) - -unsigned long get_wchan(struct task_struct *p) -{ - unsigned long bp, sp, ip; - unsigned long stack_page; - int count = 0; - if (!p || p == current || p->state == TASK_RUNNING) - return 0; - stack_page = (unsigned long)task_stack_page(p); - sp = p->thread.sp; - if (!stack_page || sp < stack_page || sp > top_esp+stack_page) - return 0; - /* include/asm-i386/system.h:switch_to() pushes bp last. */ - bp = *(unsigned long *) sp; - do { - if (bp < stack_page || bp > top_ebp+stack_page) - return 0; - ip = *(unsigned long *) (bp+4); - if (!in_sched_functions(ip)) - return ip; - bp = *(unsigned long *) bp; - } while (count++ < 16); - return 0; -} - diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index f1fd0889e8af..b35921a670b2 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -499,62 +499,6 @@ void set_personality_ia32(bool x32) } EXPORT_SYMBOL_GPL(set_personality_ia32); -/* - * Called from fs/proc with a reference on @p to find the function - * which called into schedule(). This needs to be done carefully - * because the task might wake up and we might look at a stack - * changing under us. - */ -unsigned long get_wchan(struct task_struct *p) -{ - unsigned long start, bottom, top, sp, fp, ip; - int count = 0; - - if (!p || p == current || p->state == TASK_RUNNING) - return 0; - - start = (unsigned long)task_stack_page(p); - if (!start) - return 0; - - /* - * Layout of the stack page: - * - * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long) - * PADDING - * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING - * stack - * ----------- bottom = start + sizeof(thread_info) - * thread_info - * ----------- start - * - * The tasks stack pointer points at the location where the - * framepointer is stored. The data on the stack is: - * ... IP FP ... IP FP - * - * We need to read FP and IP, so we need to adjust the upper - * bound by another unsigned long. - */ - top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; - top -= 2 * sizeof(unsigned long); - bottom = start + sizeof(struct thread_info); - - sp = READ_ONCE(p->thread.sp); - if (sp < bottom || sp > top) - return 0; - - fp = READ_ONCE(*(unsigned long *)sp); - do { - if (fp < bottom || fp > top) - return 0; - ip = READ_ONCE(*(unsigned long *)(fp + sizeof(unsigned long))); - if (!in_sched_functions(ip)) - return ip; - fp = READ_ONCE(*(unsigned long *)fp); - } while (count++ < 16 && p->state != TASK_RUNNING); - return 0; -} - long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) { int ret = 0; -- cgit v1.2.3 From e3c41e37b0f4b18cbd4dac76cbeece5a7558b909 Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Tue, 29 Sep 2015 20:58:57 +0800 Subject: x86/kexec: Fix kexec crash in syscall kexec_file_load() The original bug is a page fault crash that sometimes happens on big machines when preparing ELF headers: BUG: unable to handle kernel paging request at ffffc90613fc9000 IP: [] prepare_elf64_ram_headers_callback+0x165/0x260 The bug is caused by us under-counting the number of memory ranges and subsequently not allocating enough ELF header space for them. The bug is typically masked on smaller systems, because the ELF header allocation is rounded up to the next page. This patch modifies the code in fill_up_crash_elf_data() by using walk_system_ram_res() instead of walk_system_ram_range() to correctly count the max number of crash memory ranges. That's because the walk_system_ram_range() filters out small memory regions that reside in the same page, but walk_system_ram_res() does not. Here's how I found the bug: After tracing prepare_elf64_headers() and prepare_elf64_ram_headers_callback(), the code uses walk_system_ram_res() to fill-in crash memory regions information to the program header, so it counts those small memory regions that reside in a page area. But, when the kernel was using walk_system_ram_range() in fill_up_crash_elf_data() to count the number of crash memory regions, it filters out small regions. I printed those small memory regions, for example: kexec: Get nr_ram ranges. vaddr=0xffff880077592258 paddr=0x77592258, sz=0xdc0 Based on the code in walk_system_ram_range(), this memory region will be filtered out: pfn = (0x77592258 + 0x1000 - 1) >> 12 = 0x77593 end_pfn = (0x77592258 + 0xfc0 -1 + 1) >> 12 = 0x77593 end_pfn - pfn = 0x77593 - 0x77593 = 0 <=== if (end_pfn > pfn) is FALSE So, the max_nr_ranges that's counted by the kernel doesn't include small memory regions - causing us to under-allocate the required space. That causes the page fault crash that happens in a later code path when preparing ELF headers. This bug is not easy to reproduce on small machines that have few CPUs, because the allocated page aligned ELF buffer has more free space to cover those small memory regions' PT_LOAD headers. Signed-off-by: Lee, Chun-Yi Cc: Andy Lutomirski Cc: Baoquan He Cc: Jiang Liu Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Stephen Rothwell Cc: Takashi Iwai Cc: Thomas Gleixner Cc: Viresh Kumar Cc: Vivek Goyal Cc: kexec@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: Link: http://lkml.kernel.org/r/1443531537-29436-1-git-send-email-jlee@suse.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/crash.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index e068d6683dba..74ca2fe7a0b3 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -185,10 +185,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs) } #ifdef CONFIG_KEXEC_FILE -static int get_nr_ram_ranges_callback(unsigned long start_pfn, - unsigned long nr_pfn, void *arg) +static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg) { - int *nr_ranges = arg; + unsigned int *nr_ranges = arg; (*nr_ranges)++; return 0; @@ -214,7 +213,7 @@ static void fill_up_crash_elf_data(struct crash_elf_data *ced, ced->image = image; - walk_system_ram_range(0, -1, &nr_ranges, + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); ced->max_nr_ranges = nr_ranges; -- cgit v1.2.3