From 940fe4793a219375c4713a17c61b843720807c9d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 23 Jan 2014 15:55:36 -0800 Subject: proc: fix the potential use-after-free in first_tid() proc_task_readdir() verifies that the result of get_proc_task() is pid_alive() and thus its ->group_leader is fine too. However this is not necessarily true after rcu_read_unlock(), we need to recheck this again after first_tid() does rcu_read_lock(). Otherwise leader->thread_group.next (used by next_thread()) can be invalid if the rcu grace period expires in between. The race is subtle and unlikely, but still it is possible afaics. To simplify lets ignore the "likely" case when tid != 0, f_version can be cleared by proc_task_operations->llseek(). Suppose we have a main thread M and its subthread T. Suppose that f_pos == 3, iow first_tid() should return T. Now suppose that the following happens between rcu_read_unlock() and rcu_read_lock(): 1. T execs and becomes the new leader. This removes M from ->thread_group but next_thread(M) is still T. 2. T creates another thread X which does exec as well, T goes away. 3. X creates another subthread, this increments nr_threads. 4. first_tid() does next_thread(M) and returns the already dead T. Note also that we need 2. and 3. only because of get_nr_threads() check, and this check was supposed to be optimization only. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Michal Hocko Cc: Sameer Nanda Cc: Sergey Dyasly Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/proc/base.c') diff --git a/fs/proc/base.c b/fs/proc/base.c index 03c8d747be48..f223a56e613c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3109,6 +3109,9 @@ static struct task_struct *first_tid(struct task_struct *leader, pos = NULL; if (nr && nr >= get_nr_threads(leader)) goto out; + /* It could be unhashed before we take rcu lock */ + if (!pid_alive(leader)) + goto out; /* If we haven't found our starting place yet start * with the leader and walk nr threads forward. -- cgit v1.2.3 From c986c14a6a88427946dc77d7018a81b95b3d41b6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 23 Jan 2014 15:55:38 -0800 Subject: proc: change first_tid() to use while_each_thread() rather than next_thread() Rerwrite the main loop to use while_each_thread() instead of next_thread(). We are going to fix or replace while_each_thread(), next_thread() should be avoided whenever possible. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Michal Hocko Cc: Sameer Nanda Cc: Sergey Dyasly Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'fs/proc/base.c') diff --git a/fs/proc/base.c b/fs/proc/base.c index f223a56e613c..be8e17cabfc7 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3106,23 +3106,23 @@ static struct task_struct *first_tid(struct task_struct *leader, } /* If nr exceeds the number of threads there is nothing todo */ - pos = NULL; if (nr && nr >= get_nr_threads(leader)) - goto out; + goto fail; /* It could be unhashed before we take rcu lock */ if (!pid_alive(leader)) - goto out; + goto fail; /* If we haven't found our starting place yet start * with the leader and walk nr threads forward. */ - for (pos = leader; nr > 0; --nr) { - pos = next_thread(pos); - if (pos == leader) { - pos = NULL; - goto out; - } - } + pos = leader; + do { + if (nr-- <= 0) + goto found; + } while_each_thread(leader, pos); +fail: + pos = NULL; + goto out; found: get_task_struct(pos); out: -- cgit v1.2.3 From d855a4b79f49ea07d1827fc0591490a6a324148b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 23 Jan 2014 15:55:39 -0800 Subject: proc: don't (ab)use ->group_leader in proc_task_readdir() paths proc_task_readdir() does not really need "leader", first_tid() has to revalidate it anyway. Just pass proc_pid(inode) to first_tid() instead, it can do pid_task(PIDTYPE_PID) itself and read ->group_leader only if necessary. The patch also extracts the "inode is dead" code from pid_delete_dentry(dentry) into the new trivial helper, proc_inode_is_dead(inode), proc_task_readdir() uses it to return -ENOENT if this dir was removed. This is a bit racy, but the race is very inlikely and the getdents() after openndir() can see the empty "." + ".." dir only once. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Michal Hocko Cc: Sameer Nanda Cc: Sergey Dyasly Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 52 ++++++++++++++++++++++++---------------------------- 1 file changed, 24 insertions(+), 28 deletions(-) (limited to 'fs/proc/base.c') diff --git a/fs/proc/base.c b/fs/proc/base.c index be8e17cabfc7..9b423fec9732 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1658,13 +1658,18 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags) return 0; } +static inline bool proc_inode_is_dead(struct inode *inode) +{ + return !proc_pid(inode)->tasks[PIDTYPE_PID].first; +} + int pid_delete_dentry(const struct dentry *dentry) { /* Is the task we represent dead? * If so, then don't put the dentry on the lru list, * kill it immediately. */ - return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; + return proc_inode_is_dead(dentry->d_inode); } const struct dentry_operations pid_dentry_operations = @@ -3092,34 +3097,35 @@ out_no_task: * In the case of a seek we start with the leader and walk nr * threads past it. */ -static struct task_struct *first_tid(struct task_struct *leader, - int tid, int nr, struct pid_namespace *ns) +static struct task_struct *first_tid(struct pid *pid, int tid, + int nr, struct pid_namespace *ns) { - struct task_struct *pos; + struct task_struct *pos, *task; rcu_read_lock(); - /* Attempt to start with the pid of a thread */ + task = pid_task(pid, PIDTYPE_PID); + if (!task) + goto fail; + + /* Attempt to start with the tid of a thread */ if (tid && (nr > 0)) { pos = find_task_by_pid_ns(tid, ns); - if (pos && (pos->group_leader == leader)) + if (pos && same_thread_group(pos, task)) goto found; } /* If nr exceeds the number of threads there is nothing todo */ - if (nr && nr >= get_nr_threads(leader)) - goto fail; - /* It could be unhashed before we take rcu lock */ - if (!pid_alive(leader)) + if (nr && nr >= get_nr_threads(task)) goto fail; /* If we haven't found our starting place yet start * with the leader and walk nr threads forward. */ - pos = leader; + pos = task = task->group_leader; do { if (nr-- <= 0) goto found; - } while_each_thread(leader, pos); + } while_each_thread(task, pos); fail: pos = NULL; goto out; @@ -3155,25 +3161,16 @@ static struct task_struct *next_tid(struct task_struct *start) /* for the /proc/TGID/task/ directories */ static int proc_task_readdir(struct file *file, struct dir_context *ctx) { - struct task_struct *leader = NULL; - struct task_struct *task = get_proc_task(file_inode(file)); + struct inode *inode = file_inode(file); + struct task_struct *task; struct pid_namespace *ns; int tid; - if (!task) - return -ENOENT; - rcu_read_lock(); - if (pid_alive(task)) { - leader = task->group_leader; - get_task_struct(leader); - } - rcu_read_unlock(); - put_task_struct(task); - if (!leader) + if (proc_inode_is_dead(inode)) return -ENOENT; if (!dir_emit_dots(file, ctx)) - goto out; + return 0; /* f_version caches the tgid value that the last readdir call couldn't * return. lseek aka telldir automagically resets f_version to 0. @@ -3181,7 +3178,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) ns = file->f_dentry->d_sb->s_fs_info; tid = (int)file->f_version; file->f_version = 0; - for (task = first_tid(leader, tid, ctx->pos - 2, ns); + for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); task; task = next_tid(task), ctx->pos++) { char name[PROC_NUMBUF]; @@ -3197,8 +3194,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) break; } } -out: - put_task_struct(leader); + return 0; } -- cgit v1.2.3 From 9f6e963f06c19a57a876cb77a9c87f6a56295b13 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 23 Jan 2014 15:55:40 -0800 Subject: proc: fix ->f_pos overflows in first_tid() 1. proc_task_readdir()->first_tid() path truncates f_pos to int, this is wrong even on 64bit. We could check that f_pos < PID_MAX or even INT_MAX in proc_task_readdir(), but this patch simply checks the potential overflow in first_tid(), this check is nop on 64bit. We do not care if it was negative and the new unsigned value is huge, all we need to ensure is that we never wrongly return !NULL. 2. Remove the 2nd "nr != 0" check before get_nr_threads(), nr_threads == 0 is not distinguishable from !pid_task() above. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Michal Hocko Cc: Sameer Nanda Cc: Sergey Dyasly Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'fs/proc/base.c') diff --git a/fs/proc/base.c b/fs/proc/base.c index 9b423fec9732..51507065263b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3097,10 +3097,14 @@ out_no_task: * In the case of a seek we start with the leader and walk nr * threads past it. */ -static struct task_struct *first_tid(struct pid *pid, int tid, - int nr, struct pid_namespace *ns) +static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos, + struct pid_namespace *ns) { struct task_struct *pos, *task; + unsigned long nr = f_pos; + + if (nr != f_pos) /* 32bit overflow? */ + return NULL; rcu_read_lock(); task = pid_task(pid, PIDTYPE_PID); @@ -3108,14 +3112,14 @@ static struct task_struct *first_tid(struct pid *pid, int tid, goto fail; /* Attempt to start with the tid of a thread */ - if (tid && (nr > 0)) { + if (tid && nr) { pos = find_task_by_pid_ns(tid, ns); if (pos && same_thread_group(pos, task)) goto found; } /* If nr exceeds the number of threads there is nothing todo */ - if (nr && nr >= get_nr_threads(task)) + if (nr >= get_nr_threads(task)) goto fail; /* If we haven't found our starting place yet start @@ -3123,7 +3127,7 @@ static struct task_struct *first_tid(struct pid *pid, int tid, */ pos = task = task->group_leader; do { - if (nr-- <= 0) + if (!nr--) goto found; } while_each_thread(task, pos); fail: -- cgit v1.2.3