diff options
Diffstat (limited to 'kernel/fork.c')
| -rw-r--r-- | kernel/fork.c | 212 |
1 files changed, 149 insertions, 63 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index cf65139615a0..dbd9b8d7b7cc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -74,6 +74,7 @@ #include <linux/uprobes.h> #include <linux/aio.h> #include <linux/compiler.h> +#include <linux/sysctl.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -88,6 +89,16 @@ #include <trace/events/task.h> /* + * Minimum number of threads to boot the kernel + */ +#define MIN_THREADS 20 + +/* + * Maximum number of threads + */ +#define MAX_THREADS FUTEX_TID_MASK + +/* * Protected counters by write_lock_irq(&tasklist_lock) */ unsigned long total_forks; /* Handle normal Linux uptimes. */ @@ -253,7 +264,35 @@ EXPORT_SYMBOL_GPL(__put_task_struct); void __init __weak arch_task_cache_init(void) { } -void __init fork_init(unsigned long mempages) +/* + * set_max_threads + */ +static void set_max_threads(unsigned int max_threads_suggested) +{ + u64 threads; + + /* + * The number of threads shall be limited such that the thread + * structures may only consume a small part of the available memory. + */ + if (fls64(totalram_pages) + fls64(PAGE_SIZE) > 64) + threads = MAX_THREADS; + else + threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE, + (u64) THREAD_SIZE * 8UL); + + if (threads > max_threads_suggested) + threads = max_threads_suggested; + + max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS); +} + +#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT +/* Initialized by the architecture: */ +int arch_task_struct_size __read_mostly; +#endif + +void __init fork_init(void) { #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR #ifndef ARCH_MIN_TASKALIGN @@ -261,25 +300,14 @@ void __init fork_init(unsigned long mempages) #endif /* create a slab on which task_structs can be allocated */ task_struct_cachep = - kmem_cache_create("task_struct", sizeof(struct task_struct), + kmem_cache_create("task_struct", arch_task_struct_size, ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL); #endif /* do the arch specific task caches init */ arch_task_cache_init(); - /* - * The default maximum number of threads is set to a safe - * value: the thread structures can take up at most half - * of memory. - */ - max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE); - - /* - * we need to allow at least 20 threads to boot a system - */ - if (max_threads < 20) - max_threads = 20; + set_max_threads(MAX_THREADS); init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; @@ -380,6 +408,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) */ down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); + /* No ordering required: file already has been exposed. */ + RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); + mm->total_vm = oldmm->total_vm; mm->shared_vm = oldmm->shared_vm; mm->exec_vm = oldmm->exec_vm; @@ -505,7 +536,13 @@ static inline void mm_free_pgd(struct mm_struct *mm) pgd_free(mm, mm->pgd); } #else -#define dup_mmap(mm, oldmm) (0) +static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) +{ + down_write(&oldmm->mmap_sem); + RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); + up_write(&oldmm->mmap_sem); + return 0; +} #define mm_alloc_pgd(mm) (0) #define mm_free_pgd(mm) #endif /* CONFIG_MMU */ @@ -674,34 +711,53 @@ void mmput(struct mm_struct *mm) } EXPORT_SYMBOL_GPL(mmput); +/** + * set_mm_exe_file - change a reference to the mm's executable file + * + * This changes mm's executable file (shown as symlink /proc/[pid]/exe). + * + * Main users are mmput() and sys_execve(). Callers prevent concurrent + * invocations: in mmput() nobody alive left, in execve task is single + * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the + * mm->exe_file, but does so without using set_mm_exe_file() in order + * to do avoid the need for any locks. + */ void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) { + struct file *old_exe_file; + + /* + * It is safe to dereference the exe_file without RCU as + * this function is only called if nobody else can access + * this mm -- see comment above for justification. + */ + old_exe_file = rcu_dereference_raw(mm->exe_file); + if (new_exe_file) get_file(new_exe_file); - if (mm->exe_file) - fput(mm->exe_file); - mm->exe_file = new_exe_file; + rcu_assign_pointer(mm->exe_file, new_exe_file); + if (old_exe_file) + fput(old_exe_file); } +/** + * get_mm_exe_file - acquire a reference to the mm's executable file + * + * Returns %NULL if mm has no associated executable file. + * User must release file via fput(). + */ struct file *get_mm_exe_file(struct mm_struct *mm) { struct file *exe_file; - /* We need mmap_sem to protect against races with removal of exe_file */ - down_read(&mm->mmap_sem); - exe_file = mm->exe_file; - if (exe_file) - get_file(exe_file); - up_read(&mm->mmap_sem); + rcu_read_lock(); + exe_file = rcu_dereference(mm->exe_file); + if (exe_file && !get_file_rcu(exe_file)) + exe_file = NULL; + rcu_read_unlock(); return exe_file; } - -static void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm) -{ - /* It's safe to write the exe_file pointer without exe_file_lock because - * this is called during fork when the task is not yet in /proc */ - newmm->exe_file = get_mm_exe_file(oldmm); -} +EXPORT_SYMBOL(get_mm_exe_file); /** * get_task_mm - acquire a reference to the task's mm @@ -864,8 +920,6 @@ static struct mm_struct *dup_mm(struct task_struct *tsk) if (!mm_init(mm, tsk)) goto fail_nomem; - dup_mm_exe_file(oldmm, mm); - err = dup_mmap(mm, oldmm); if (err) goto free_pt; @@ -1042,10 +1096,7 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) { unsigned long cpu_limit; - /* Thread group counters. */ - thread_group_cputime_init(sig); - - cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); + cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); if (cpu_limit != RLIM_INFINITY) { sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit); sig->cputimer.running = 1; @@ -1095,10 +1146,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) tty_audit_fork(sig); sched_autogroup_fork(sig); -#ifdef CONFIG_CGROUPS - init_rwsem(&sig->group_rwsem); -#endif - sig->oom_score_adj = current->signal->oom_score_adj; sig->oom_score_adj_min = current->signal->oom_score_adj_min; @@ -1192,7 +1239,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, - int trace) + int trace, + unsigned long tls) { int retval; struct task_struct *p; @@ -1279,9 +1327,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (nr_threads >= max_threads) goto bad_fork_cleanup_count; - if (!try_module_get(task_thread_info(p)->exec_domain->module)) - goto bad_fork_cleanup_count; - delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER); p->flags |= PF_FORKNOEXEC; @@ -1350,6 +1395,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->hardirq_context = 0; p->softirq_context = 0; #endif + + p->pagefault_disabled = 0; + #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; @@ -1401,15 +1449,16 @@ static struct task_struct *copy_process(unsigned long clone_flags, retval = copy_io(clone_flags, p); if (retval) goto bad_fork_cleanup_namespaces; - retval = copy_thread(clone_flags, stack_start, stack_size, p); + retval = copy_thread_tls(clone_flags, stack_start, stack_size, p, tls); if (retval) goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { - retval = -ENOMEM; pid = alloc_pid(p->nsproxy->pid_ns_for_children); - if (!pid) + if (IS_ERR(pid)) { + retval = PTR_ERR(pid); goto bad_fork_cleanup_io; + } } p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; @@ -1590,7 +1639,6 @@ bad_fork_cleanup_threadgroup_lock: if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); delayacct_tsk_free(p); - module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); exit_creds(p); @@ -1613,7 +1661,7 @@ static inline void init_idle_pids(struct pid_link *links) struct task_struct *fork_idle(int cpu) { struct task_struct *task; - task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0); + task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0); if (!IS_ERR(task)) { init_idle_pids(task->pids); init_idle(task, cpu); @@ -1628,11 +1676,12 @@ struct task_struct *fork_idle(int cpu) * It copies the process, and if successful kick-starts * it and waits for it to finish using the VM if required. */ -long do_fork(unsigned long clone_flags, +long _do_fork(unsigned long clone_flags, unsigned long stack_start, unsigned long stack_size, int __user *parent_tidptr, - int __user *child_tidptr) + int __user *child_tidptr, + unsigned long tls) { struct task_struct *p; int trace = 0; @@ -1657,7 +1706,7 @@ long do_fork(unsigned long clone_flags, } p = copy_process(clone_flags, stack_start, stack_size, - child_tidptr, NULL, trace); + child_tidptr, NULL, trace, tls); /* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. @@ -1698,20 +1747,34 @@ long do_fork(unsigned long clone_flags, return nr; } +#ifndef CONFIG_HAVE_COPY_THREAD_TLS +/* For compatibility with architectures that call do_fork directly rather than + * using the syscall entry points below. */ +long do_fork(unsigned long clone_flags, + unsigned long stack_start, + unsigned long stack_size, + int __user *parent_tidptr, + int __user *child_tidptr) +{ + return _do_fork(clone_flags, stack_start, stack_size, + parent_tidptr, child_tidptr, 0); +} +#endif + /* * Create a kernel thread. */ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) { - return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, - (unsigned long)arg, NULL, NULL); + return _do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, + (unsigned long)arg, NULL, NULL, 0); } #ifdef __ARCH_WANT_SYS_FORK SYSCALL_DEFINE0(fork) { #ifdef CONFIG_MMU - return do_fork(SIGCHLD, 0, 0, NULL, NULL); + return _do_fork(SIGCHLD, 0, 0, NULL, NULL, 0); #else /* can not support in nommu mode */ return -EINVAL; @@ -1722,8 +1785,8 @@ SYSCALL_DEFINE0(fork) #ifdef __ARCH_WANT_SYS_VFORK SYSCALL_DEFINE0(vfork) { - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, - 0, NULL, NULL); + return _do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, + 0, NULL, NULL, 0); } #endif @@ -1731,27 +1794,27 @@ SYSCALL_DEFINE0(vfork) #ifdef CONFIG_CLONE_BACKWARDS SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, int __user *, parent_tidptr, - int, tls_val, + unsigned long, tls, int __user *, child_tidptr) #elif defined(CONFIG_CLONE_BACKWARDS2) SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, - int, tls_val) + unsigned long, tls) #elif defined(CONFIG_CLONE_BACKWARDS3) SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp, int, stack_size, int __user *, parent_tidptr, int __user *, child_tidptr, - int, tls_val) + unsigned long, tls) #else SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, int __user *, parent_tidptr, int __user *, child_tidptr, - int, tls_val) + unsigned long, tls) #endif { - return do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr); + return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, tls); } #endif @@ -2004,3 +2067,26 @@ int unshare_files(struct files_struct **displaced) task_unlock(task); return 0; } + +int sysctl_max_threads(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table t; + int ret; + int threads = max_threads; + int min = MIN_THREADS; + int max = MAX_THREADS; + + t = *table; + t.data = &threads; + t.extra1 = &min; + t.extra2 = &max; + + ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); + if (ret || !write) + return ret; + + set_max_threads(threads); + + return 0; +} |
