From 50d3f7d55a4fa7c46a0fec709faeae636e5841fd Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 24 Jun 2016 15:09:37 -0700 Subject: UPSTREAM: Clarify naming of thread info/stack allocators We've had the thread info allocated together with the thread stack for most architectures for a long time (since the thread_info was split off from the task struct), but that is about to change. But the patches that move the thread info to be off-stack (and a part of the task struct instead) made it clear how confused the allocator and freeing functions are. Because the common case was that we share an allocation with the thread stack and the thread_info, the two pointers were identical. That identity then meant that we would have things like ti = alloc_thread_info_node(tsk, node); ... tsk->stack = ti; which certainly _worked_ (since stack and thread_info have the same value), but is rather confusing: why are we assigning a thread_info to the stack? And if we move the thread_info away, the "confusing" code just gets to be entirely bogus. So remove all this confusion, and make it clear that we are doing the stack allocation by renaming and clarifying the function names to be about the stack. The fact that the thread_info then shares the allocation is an implementation detail, and not really about the allocation itself. This is a pure renaming and type fix: we pass in the same pointer, it's just that we clarify what the pointer means. The ia64 code that actually only has one single allocation (for all of task_struct, thread_info and kernel thread stack) now looks a bit odd, but since "tsk->stack" is actually not even used there, that oddity doesn't matter. It would be a separate thing to clean that up, I intentionally left the ia64 changes as a pure brute-force renaming and type change. Acked-by: Andy Lutomirski Signed-off-by: Linus Torvalds Bug: 38331309 Change-Id: I870b5476fc900c9145134f9dd3ed18a32a490162 (cherry picked from commit b235beea9e996a4d36fed6cfef4801a3e7d7a9a5) Signed-off-by: Zubin Mithra --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ad2c304b29b8..85cf2d2f02cb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2986,7 +2986,7 @@ static inline int object_is_on_stack(void *obj) return (obj >= stack) && (obj < (stack + THREAD_SIZE)); } -extern void thread_info_cache_init(void); +extern void thread_stack_cache_init(void); #ifdef CONFIG_DEBUG_STACK_USAGE static inline unsigned long stack_not_used(struct task_struct *p) -- cgit v1.2.3 From f707c0f98f32e79c148b6c4284fa1681b441ee6d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 24 Jun 2016 17:07:33 -0700 Subject: UPSTREAM: fix up initial thread stack pointer vs thread_info confusion The INIT_TASK() initializer was similarly confused about the stack vs thread_info allocation that the allocators had, and that were fixed in commit b235beea9e99 ("Clarify naming of thread info/stack allocators"). The task ->stack pointer only incidentally ends up having the same value as the thread_info, and in fact that will change. So fix the initial task struct initializer to point to 'init_stack' instead of 'init_thread_info', and make sure the ia64 definition for that exists. This actually makes the ia64 tsk->stack pointer be sensible for the initial task, but not for any other task. As mentioned in commit b235beea9e99, that whole pointer isn't actually used on ia64, since task_stack_page() there just points to the (single) allocation. All the other architectures seem to have copied the 'init_stack' definition, even if it tended to be generally unusued. Signed-off-by: Linus Torvalds Bug: 38331309 Change-Id: Ia96e9225b07e38df2f4af2b9a7eb2aa972d8845a (cherry picked from commit 7f1a00b6fcd0e3c19beba2e92d157dc0c2cf3494) Signed-off-by: Zubin Mithra --- include/linux/init_task.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 1c1ff7e4faa4..9a0056499337 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -190,7 +190,7 @@ extern struct task_group root_task_group; #define INIT_TASK(tsk) \ { \ .state = 0, \ - .stack = &init_thread_info, \ + .stack = init_stack, \ .usage = ATOMIC_INIT(2), \ .flags = PF_KTHREAD, \ .prio = MAX_PRIO-20, \ -- cgit v1.2.3 From 8bc69d462ad300364c836616b249055ca7cb19e9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 13 Sep 2016 14:29:24 -0700 Subject: UPSTREAM: sched/core: Allow putting thread_info into task_struct If an arch opts in by setting CONFIG_THREAD_INFO_IN_TASK_STRUCT, then thread_info is defined as a single 'u32 flags' and is the first entry of task_struct. thread_info::task is removed (it serves no purpose if thread_info is embedded in task_struct), and thread_info::cpu gets its own slot in task_struct. This is heavily based on a patch written by Linus. Originally-from: Linus Torvalds Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jann Horn Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/a0898196f0476195ca02713691a5037a14f2aac5.1473801993.git.luto@kernel.org Signed-off-by: Ingo Molnar Bug: 38331309 Change-Id: I25e5a830f2ada5e74fa93661e97e5e701b1b70d2 (cherry picked from commit c65eacbe290b8141554c71b2c94489e73ade8c8d) Signed-off-by: Zubin Mithra --- include/linux/init_task.h | 9 +++++++++ include/linux/sched.h | 36 ++++++++++++++++++++++++++++++++++-- include/linux/thread_info.h | 15 +++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9a0056499337..021b1e9ff6cd 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -15,6 +15,8 @@ #include #include +#include + #ifdef CONFIG_SMP # define INIT_PUSHABLE_TASKS(tsk) \ .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), @@ -183,12 +185,19 @@ extern struct task_group root_task_group; # define INIT_KASAN(tsk) #endif +#ifdef CONFIG_THREAD_INFO_IN_TASK +# define INIT_TASK_TI(tsk) .thread_info = INIT_THREAD_INFO(tsk), +#else +# define INIT_TASK_TI(tsk) +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) */ #define INIT_TASK(tsk) \ { \ + INIT_TASK_TI(tsk) \ .state = 0, \ .stack = init_stack, \ .usage = ATOMIC_INIT(2), \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 85cf2d2f02cb..d8c1b4340283 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1518,6 +1518,13 @@ struct tlbflush_unmap_batch { }; struct task_struct { +#ifdef CONFIG_THREAD_INFO_IN_TASK + /* + * For reasons of header soup (see current_thread_info()), this + * must be the first element of task_struct. + */ + struct thread_info thread_info; +#endif volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; atomic_t usage; @@ -1527,6 +1534,9 @@ struct task_struct { #ifdef CONFIG_SMP struct llist_node wake_entry; int on_cpu; +#ifdef CONFIG_THREAD_INFO_IN_TASK + unsigned int cpu; /* current CPU */ +#endif unsigned int wakee_flips; unsigned long wakee_flip_decay_ts; struct task_struct *last_wakee; @@ -2556,7 +2566,9 @@ extern void set_curr_task(int cpu, struct task_struct *p); void yield(void); union thread_union { +#ifndef CONFIG_THREAD_INFO_IN_TASK struct thread_info thread_info; +#endif unsigned long stack[THREAD_SIZE/sizeof(long)]; }; @@ -2946,10 +2958,26 @@ static inline void threadgroup_change_end(struct task_struct *tsk) cgroup_threadgroup_change_end(tsk); } -#ifndef __HAVE_THREAD_FUNCTIONS +#ifdef CONFIG_THREAD_INFO_IN_TASK + +static inline struct thread_info *task_thread_info(struct task_struct *task) +{ + return &task->thread_info; +} +static inline void *task_stack_page(const struct task_struct *task) +{ + return task->stack; +} +#define setup_thread_stack(new,old) do { } while(0) +static inline unsigned long *end_of_stack(const struct task_struct *task) +{ + return task->stack; +} + +#elif !defined(__HAVE_THREAD_FUNCTIONS) #define task_thread_info(task) ((struct thread_info *)(task)->stack) -#define task_stack_page(task) ((task)->stack) +#define task_stack_page(task) ((void *)(task)->stack) static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) { @@ -3241,7 +3269,11 @@ static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) static inline unsigned int task_cpu(const struct task_struct *p) { +#ifdef CONFIG_THREAD_INFO_IN_TASK + return p->cpu; +#else return task_thread_info(p)->cpu; +#endif } static inline int task_node(const struct task_struct *p) diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 4cf89517783a..8784cebd0f51 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -13,6 +13,21 @@ struct timespec; struct compat_timespec; +#ifdef CONFIG_THREAD_INFO_IN_TASK +struct thread_info { + u32 flags; /* low level flags */ +}; + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .flags = 0, \ +} +#endif + +#ifdef CONFIG_THREAD_INFO_IN_TASK +#define current_thread_info() ((struct thread_info *)current) +#endif + /* * System call restart block. */ -- cgit v1.2.3 From 99cf9fa9a00606ee4d51d876af421362f1818160 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 15 Sep 2016 22:45:43 -0700 Subject: UPSTREAM: sched/core: Add try_get_task_stack() and put_task_stack() There are a few places in the kernel that access stack memory belonging to a different task. Before we can start freeing task stacks before the task_struct is freed, we need a way for those code paths to pin the stack. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jann Horn Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/17a434f50ad3d77000104f21666575e10a9c1fbd.1474003868.git.luto@kernel.org Signed-off-by: Ingo Molnar Bug: 38331309 Change-Id: I414853e9b72ecb0967d5e1cbfc77b4929bf3f4f5 (cherry picked from commit c6c314a613cd7d03fb97713e0d642b493de42e69) Signed-off-by: Zubin Mithra --- include/linux/sched.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d8c1b4340283..0e6744bb2779 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2964,11 +2964,19 @@ static inline struct thread_info *task_thread_info(struct task_struct *task) { return &task->thread_info; } + +/* + * When accessing the stack of a non-current task that might exit, use + * try_get_task_stack() instead. task_stack_page will return a pointer + * that could get freed out from under you. + */ static inline void *task_stack_page(const struct task_struct *task) { return task->stack; } + #define setup_thread_stack(new,old) do { } while(0) + static inline unsigned long *end_of_stack(const struct task_struct *task) { return task->stack; @@ -3004,6 +3012,14 @@ static inline unsigned long *end_of_stack(struct task_struct *p) } #endif + +static inline void *try_get_task_stack(struct task_struct *tsk) +{ + return task_stack_page(tsk); +} + +static inline void put_task_stack(struct task_struct *tsk) {} + #define task_stack_end_corrupted(task) \ (*(end_of_stack(task)) != STACK_END_MAGIC) -- cgit v1.2.3 From 264c551c4c77c9645a1c5a03735a71ed37348bc4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 19 Oct 2016 19:28:12 +0100 Subject: UPSTREAM: thread_info: factor out restart_block Since commit f56141e3e2d9aabf ("all arches, signal: move restart_block to struct task_struct"), thread_info and restart_block have been logically distinct, yet struct restart_block is still defined in . At least one architecture (erroneously) uses restart_block as part of its thread_info, and thus the definition of restart_block must come before the include of . Subsequent patches in this series need to shuffle the order of includes and definitions in , and will make this ordering fragile. This patch moves the definition of restart_block out to its own header. This serves as generic cleanup, logically separating thread_info and restart_block, and also makes it easier to avoid fragility. Signed-off-by: Mark Rutland Reviewed-by: Andy Lutomirski Cc: Andrew Morton Cc: Heiko Carstens Cc: Kees Cook Signed-off-by: Catalin Marinas Bug: 38331309 Change-Id: I4283c87072c092179e2b6c02cbf7248b4a1c2d22 (cherry picked from commit 53d74d056a4e306a72b8883d325b5d853c0618e6) Signed-off-by: Zubin Mithra --- include/linux/restart_block.h | 51 +++++++++++++++++++++++++++++++++++++++++++ include/linux/thread_info.h | 41 +--------------------------------- 2 files changed, 52 insertions(+), 40 deletions(-) create mode 100644 include/linux/restart_block.h (limited to 'include/linux') diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h new file mode 100644 index 000000000000..0d905d8ec553 --- /dev/null +++ b/include/linux/restart_block.h @@ -0,0 +1,51 @@ +/* + * Common syscall restarting data + */ +#ifndef __LINUX_RESTART_BLOCK_H +#define __LINUX_RESTART_BLOCK_H + +#include +#include + +struct timespec; +struct compat_timespec; +struct pollfd; + +/* + * System call restart block. + */ +struct restart_block { + long (*fn)(struct restart_block *); + union { + /* For futex_wait and futex_wait_requeue_pi */ + struct { + u32 __user *uaddr; + u32 val; + u32 flags; + u32 bitset; + u64 time; + u32 __user *uaddr2; + } futex; + /* For nanosleep */ + struct { + clockid_t clockid; + struct timespec __user *rmtp; +#ifdef CONFIG_COMPAT + struct compat_timespec __user *compat_rmtp; +#endif + u64 expires; + } nanosleep; + /* For poll */ + struct { + struct pollfd __user *ufds; + int nfds; + int has_timeout; + unsigned long tv_sec; + unsigned long tv_nsec; + } poll; + }; +}; + +extern long do_no_restart_syscall(struct restart_block *parm); + +#endif /* __LINUX_RESTART_BLOCK_H */ diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 8784cebd0f51..e8369b0d71e1 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -9,9 +9,7 @@ #include #include - -struct timespec; -struct compat_timespec; +#include #ifdef CONFIG_THREAD_INFO_IN_TASK struct thread_info { @@ -28,43 +26,6 @@ struct thread_info { #define current_thread_info() ((struct thread_info *)current) #endif -/* - * System call restart block. - */ -struct restart_block { - long (*fn)(struct restart_block *); - union { - /* For futex_wait and futex_wait_requeue_pi */ - struct { - u32 __user *uaddr; - u32 val; - u32 flags; - u32 bitset; - u64 time; - u32 __user *uaddr2; - } futex; - /* For nanosleep */ - struct { - clockid_t clockid; - struct timespec __user *rmtp; -#ifdef CONFIG_COMPAT - struct compat_timespec __user *compat_rmtp; -#endif - u64 expires; - } nanosleep; - /* For poll */ - struct { - struct pollfd __user *ufds; - int nfds; - int has_timeout; - unsigned long tv_sec; - unsigned long tv_nsec; - } poll; - }; -}; - -extern long do_no_restart_syscall(struct restart_block *parm); - #include #include -- cgit v1.2.3 From f2b8210f0a7c3f717b82880a1160aaa9255ceecf Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 19 Oct 2016 19:28:13 +0100 Subject: UPSTREAM: thread_info: include for THREAD_INFO_IN_TASK When CONFIG_THREAD_INFO_IN_TASK is selected, the current_thread_info() macro relies on current having been defined prior to its use. However, not all users of current_thread_info() include , and thus current is not guaranteed to be defined. When CONFIG_THREAD_INFO_IN_TASK is not selected, it's possible that get_current() / current are based upon current_thread_info(), and includes . Thus always including would result in circular dependences on some platforms. To ensure both cases work, this patch includes , but only when CONFIG_THREAD_INFO_IN_TASK is selected. Signed-off-by: Mark Rutland Acked-by: Heiko Carstens Reviewed-by: Andy Lutomirski Cc: Andrew Morton Cc: Kees Cook Signed-off-by: Catalin Marinas Bug: 38331309 Change-Id: Ia981a829798d60a54d4e3eb679d8e24b01228357 (cherry picked from commit dc3d2a679cd8631b8a570fc8ca5f4712d7d25698) Signed-off-by: Zubin Mithra --- include/linux/thread_info.h | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index e8369b0d71e1..8933ecc2bc9f 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -12,17 +12,12 @@ #include #ifdef CONFIG_THREAD_INFO_IN_TASK -struct thread_info { - u32 flags; /* low level flags */ -}; - -#define INIT_THREAD_INFO(tsk) \ -{ \ - .flags = 0, \ -} -#endif - -#ifdef CONFIG_THREAD_INFO_IN_TASK +/* + * For CONFIG_THREAD_INFO_IN_TASK kernels we need for the + * definition of current, but for !CONFIG_THREAD_INFO_IN_TASK kernels, + * including can cause a circular dependency on some platforms. + */ +#include #define current_thread_info() ((struct thread_info *)current) #endif -- cgit v1.2.3 From f1181047ff29d4d4d364435040bd347eb54483ca Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 2 Aug 2017 13:31:52 -0700 Subject: mm, mprotect: flush TLB if potentially racing with a parallel reclaim leaving stale TLB entries commit 3ea277194daaeaa84ce75180ec7c7a2075027a68 upstream. Stable note for 4.4: The upstream patch patches madvise(MADV_FREE) but 4.4 does not have support for that feature. The changelog is left as-is but the hunk related to madvise is omitted from the backport. Nadav Amit identified a theoritical race between page reclaim and mprotect due to TLB flushes being batched outside of the PTL being held. He described the race as follows: CPU0 CPU1 ---- ---- user accesses memory using RW PTE [PTE now cached in TLB] try_to_unmap_one() ==> ptep_get_and_clear() ==> set_tlb_ubc_flush_pending() mprotect(addr, PROT_READ) ==> change_pte_range() ==> [ PTE non-present - no flush ] user writes using cached RW PTE ... try_to_unmap_flush() The same type of race exists for reads when protecting for PROT_NONE and also exists for operations that can leave an old TLB entry behind such as munmap, mremap and madvise. For some operations like mprotect, it's not necessarily a data integrity issue but it is a correctness issue as there is a window where an mprotect that limits access still allows access. For munmap, it's potentially a data integrity issue although the race is massive as an munmap, mmap and return to userspace must all complete between the window when reclaim drops the PTL and flushes the TLB. However, it's theoritically possible so handle this issue by flushing the mm if reclaim is potentially currently batching TLB flushes. Other instances where a flush is required for a present pte should be ok as either the page lock is held preventing parallel reclaim or a page reference count is elevated preventing a parallel free leading to corruption. In the case of page_mkclean there isn't an obvious path that userspace could take advantage of without using the operations that are guarded by this patch. Other users such as gup as a race with reclaim looks just at PTEs. huge page variants should be ok as they don't race with reclaim. mincore only looks at PTEs. userfault also should be ok as if a parallel reclaim takes place, it will either fault the page back in or read some of the data before the flush occurs triggering a fault. Note that a variant of this patch was acked by Andy Lutomirski but this was for the x86 parts on top of his PCID work which didn't make the 4.13 merge window as expected. His ack is dropped from this version and there will be a follow-on patch on top of PCID that will include his ack. [akpm@linux-foundation.org: tweak comments] [akpm@linux-foundation.org: fix spello] Link: http://lkml.kernel.org/r/20170717155523.emckq2esjro6hf3z@suse.de Reported-by: Nadav Amit Signed-off-by: Mel Gorman Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/mm_types.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 2ccccbfcd532..36f4695aa604 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -503,6 +503,10 @@ struct mm_struct { * PROT_NONE or PROT_NUMA mapped page. */ bool tlb_flush_pending; +#endif +#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH + /* See flush_tlb_batched_pending() */ + bool tlb_flush_batched; #endif struct uprobes_state uprobes_state; #ifdef CONFIG_X86_INTEL_MPX -- cgit v1.2.3 From 9c83b97bdeabfea1f9c51c1f505ee14f13e4c628 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 10 Jan 2017 16:57:27 -0800 Subject: mm, slab: make sure that KMALLOC_MAX_SIZE will fit into MAX_ORDER [ Upstream commit bb1107f7c6052c863692a41f78c000db792334bf ] Andrey Konovalov has reported the following warning triggered by the syzkaller fuzzer. WARNING: CPU: 1 PID: 9935 at mm/page_alloc.c:3511 __alloc_pages_nodemask+0x159c/0x1e20 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 9935 Comm: syz-executor0 Not tainted 4.9.0-rc7+ #34 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __alloc_pages_slowpath mm/page_alloc.c:3511 __alloc_pages_nodemask+0x159c/0x1e20 mm/page_alloc.c:3781 alloc_pages_current+0x1c7/0x6b0 mm/mempolicy.c:2072 alloc_pages include/linux/gfp.h:469 kmalloc_order+0x1f/0x70 mm/slab_common.c:1015 kmalloc_order_trace+0x1f/0x160 mm/slab_common.c:1026 kmalloc_large include/linux/slab.h:422 __kmalloc+0x210/0x2d0 mm/slub.c:3723 kmalloc include/linux/slab.h:495 ep_write_iter+0x167/0xb50 drivers/usb/gadget/legacy/inode.c:664 new_sync_write fs/read_write.c:499 __vfs_write+0x483/0x760 fs/read_write.c:512 vfs_write+0x170/0x4e0 fs/read_write.c:560 SYSC_write fs/read_write.c:607 SyS_write+0xfb/0x230 fs/read_write.c:599 entry_SYSCALL_64_fastpath+0x1f/0xc2 The issue is caused by a lack of size check for the request size in ep_write_iter which should be fixed. It, however, points to another problem, that SLUB defines KMALLOC_MAX_SIZE too large because the its KMALLOC_SHIFT_MAX is (MAX_ORDER + PAGE_SHIFT) which means that the resulting page allocator request might be MAX_ORDER which is too large (see __alloc_pages_slowpath). The same applies to the SLOB allocator which allows even larger sizes. Make sure that they are capped properly and never request more than MAX_ORDER order. Link: http://lkml.kernel.org/r/20161220130659.16461-2-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Andrey Konovalov Acked-by: Christoph Lameter Cc: Alexei Starovoitov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/slab.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 2037a861e367..8a2a9ffaf5de 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -203,7 +203,7 @@ size_t ksize(const void *); * (PAGE_SIZE*2). Larger requests are passed to the page allocator. */ #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) -#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT) +#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1) #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 3 #endif @@ -216,7 +216,7 @@ size_t ksize(const void *); * be allocated from the same page. */ #define KMALLOC_SHIFT_HIGH PAGE_SHIFT -#define KMALLOC_SHIFT_MAX 30 +#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1) #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 3 #endif -- cgit v1.2.3 From bbe660db23e41647366039c1860cee0891fe9903 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Tue, 10 Jan 2017 16:57:54 -0800 Subject: signal: protect SIGNAL_UNKILLABLE from unintentional clearing. [ Upstream commit 2d39b3cd34e6d323720d4c61bd714f5ae202c022 ] Since commit 00cd5c37afd5 ("ptrace: permit ptracing of /sbin/init") we can now trace init processes. init is initially protected with SIGNAL_UNKILLABLE which will prevent fatal signals such as SIGSTOP, but there are a number of paths during tracing where SIGNAL_UNKILLABLE can be implicitly cleared. This can result in init becoming stoppable/killable after tracing. For example, running: while true; do kill -STOP 1; done & strace -p 1 and then stopping strace and the kill loop will result in init being left in state TASK_STOPPED. Sending SIGCONT to init will resume it, but init will now respond to future SIGSTOP signals rather than ignoring them. Make sure that when setting SIGNAL_STOP_CONTINUED/SIGNAL_STOP_STOPPED that we don't clear SIGNAL_UNKILLABLE. Link: http://lkml.kernel.org/r/20170104122017.25047-1-jamie.iles@oracle.com Signed-off-by: Jamie Iles Acked-by: Oleg Nesterov Cc: Alexander Viro Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/sched.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 352213b360d7..eff7c1fad26f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -801,6 +801,16 @@ struct signal_struct { #define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ +#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \ + SIGNAL_STOP_CONTINUED) + +static inline void signal_set_stop_flags(struct signal_struct *sig, + unsigned int flags) +{ + WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP)); + sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags; +} + /* If true, all threads except ->group_exit_task have pending SIGKILL */ static inline int signal_group_exit(const struct signal_struct *sig) { -- cgit v1.2.3 From 34a08ae493f1970d5ce80dd3812b8dba4e5cbe22 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 23 Jul 2017 08:36:15 -0400 Subject: workqueue: implicit ordered attribute should be overridable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0a94efb5acbb6980d7c9ab604372d93cd507e4d8 upstream. 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered") automatically enabled ordered attribute for unbound workqueues w/ max_active == 1. Because ordered workqueues reject max_active and some attribute changes, this implicit ordered mode broke cases where the user creates an unbound workqueue w/ max_active == 1 and later explicitly changes the related attributes. This patch distinguishes explicit and implicit ordered setting and overrides from attribute changes if implict. Signed-off-by: Tejun Heo Fixes: 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered") Cc: Holger Hoffstätte Signed-off-by: Greg Kroah-Hartman --- include/linux/workqueue.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 0197358f1e81..262d5c95dfc8 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -311,6 +311,7 @@ enum { __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ + __WQ_ORDERED_EXPLICIT = 1 << 18, /* internal: alloc_ordered_workqueue() */ WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ @@ -408,7 +409,8 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, * Pointer to the allocated workqueue on success, %NULL on failure. */ #define alloc_ordered_workqueue(fmt, flags, args...) \ - alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) + alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | \ + __WQ_ORDERED_EXPLICIT | (flags), 1, ##args) #define create_workqueue(name) \ alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, (name)) -- cgit v1.2.3 From c932c1b7730408e592a7f46229dcb325da28d4c4 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 28 Jul 2016 15:48:20 -0700 Subject: UPSTREAM: kdb: use task_cpu() instead of task_thread_info()->cpu commit e558af65be65713ef2e8b2aa637c6263caeed172 upstream. We'll need this cleanup to make the cpu field in thread_info be optional. Link: http://lkml.kernel.org/r/da298328dc77ea494576c2f20a934218e758a6fa.1468523549.git.luto@kernel.org Signed-off-by: Andy Lutomirski Cc: Jason Wessel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Change-Id: I0cd616f086f0eb54ed997ea153382fbf6188dba9 Signed-off-by: Amit Pundir --- include/linux/kdb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kdb.h b/include/linux/kdb.h index a19bcf9e762e..410decacff8f 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -177,7 +177,7 @@ extern int kdb_get_kbd_char(void); static inline int kdb_process_cpu(const struct task_struct *p) { - unsigned int cpu = task_thread_info(p)->cpu; + unsigned int cpu = task_cpu(p); if (cpu > num_possible_cpus()) cpu = 0; return cpu; -- cgit v1.2.3