summaryrefslogtreecommitdiff
path: root/ipc
diff options
context:
space:
mode:
Diffstat (limited to 'ipc')
-rw-r--r--ipc/mqueue.c19
-rw-r--r--ipc/namespace.c6
-rw-r--r--ipc/sem.c6
-rw-r--r--ipc/shm.c176
-rw-r--r--ipc/util.c12
5 files changed, 154 insertions, 65 deletions
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index a94880dc9094..0a1afa17bf5f 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -305,7 +305,7 @@ err:
static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
{
struct inode *inode;
- struct ipc_namespace *ns = data;
+ struct ipc_namespace *ns = sb->s_fs_info;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
@@ -326,17 +326,14 @@ static struct dentry *mqueue_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
void *data)
{
- if (!(flags & MS_KERNMOUNT)) {
- struct ipc_namespace *ns = current->nsproxy->ipc_ns;
- /* Don't allow mounting unless the caller has CAP_SYS_ADMIN
- * over the ipc namespace.
- */
- if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
- return ERR_PTR(-EPERM);
-
- data = ns;
+ struct ipc_namespace *ns;
+ if (flags & MS_KERNMOUNT) {
+ ns = data;
+ data = NULL;
+ } else {
+ ns = current->nsproxy->ipc_ns;
}
- return mount_ns(fs_type, flags, data, mqueue_fill_super);
+ return mount_ns(fs_type, flags, data, ns, ns->user_ns, mqueue_fill_super);
}
static void init_once(void *foo)
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 068caf18d565..25b64530c042 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -166,10 +166,16 @@ static int ipcns_install(struct nsproxy *nsproxy, struct ns_common *new)
return 0;
}
+static struct user_namespace *ipcns_owner(struct ns_common *ns)
+{
+ return to_ipc_ns(ns)->user_ns;
+}
+
const struct proc_ns_operations ipcns_operations = {
.name = "ipc",
.type = CLONE_NEWIPC,
.get = ipcns_get,
.put = ipcns_put,
.install = ipcns_install,
+ .owner = ipcns_owner,
};
diff --git a/ipc/sem.c b/ipc/sem.c
index 9862c3d1c26d..9963ed351b43 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -2151,11 +2151,9 @@ void exit_sem(struct task_struct *tsk)
ipc_assert_locked_object(&sma->sem_perm);
list_del(&un->list_id);
- /* we are the last process using this ulp, acquiring ulp->lock
- * isn't required. Besides that, we are also protected against
- * IPC_RMID as we hold sma->sem_perm lock now
- */
+ spin_lock(&ulp->lock);
list_del_rcu(&un->list_proc);
+ spin_unlock(&ulp->lock);
/* perform adjustments registered in un */
for (i = 0; i < sma->sem_nsems; i++) {
diff --git a/ipc/shm.c b/ipc/shm.c
index 5095eb3a6a6c..41e5e712da1c 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -90,6 +90,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
{
struct shmid_kernel *shp;
shp = container_of(ipcp, struct shmid_kernel, shm_perm);
+ WARN_ON(ns != shp->ns);
if (shp->shm_nattch) {
shp->shm_perm.mode |= SHM_DEST;
@@ -180,10 +181,43 @@ static void shm_rcu_free(struct rcu_head *head)
ipc_rcu_free(head);
}
-static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
+/*
+ * It has to be called with shp locked.
+ * It must be called before ipc_rmid()
+ */
+static inline void shm_clist_rm(struct shmid_kernel *shp)
{
- list_del(&s->shm_clist);
- ipc_rmid(&shm_ids(ns), &s->shm_perm);
+ struct task_struct *creator;
+
+ /* ensure that shm_creator does not disappear */
+ rcu_read_lock();
+
+ /*
+ * A concurrent exit_shm may do a list_del_init() as well.
+ * Just do nothing if exit_shm already did the work
+ */
+ if (!list_empty(&shp->shm_clist)) {
+ /*
+ * shp->shm_creator is guaranteed to be valid *only*
+ * if shp->shm_clist is not empty.
+ */
+ creator = shp->shm_creator;
+
+ task_lock(creator);
+ /*
+ * list_del_init() is a nop if the entry was already removed
+ * from the list.
+ */
+ list_del_init(&shp->shm_clist);
+ task_unlock(creator);
+ }
+ rcu_read_unlock();
+}
+
+static inline void shm_rmid(struct shmid_kernel *s)
+{
+ shm_clist_rm(s);
+ ipc_rmid(&shm_ids(s->ns), &s->shm_perm);
}
@@ -238,7 +272,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
shm_file = shp->shm_file;
shp->shm_file = NULL;
ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
- shm_rmid(ns, shp);
+ shm_rmid(shp);
shm_unlock(shp);
if (!is_file_hugepages(shm_file))
shmem_lock(shm_file, 0, shp->mlock_user);
@@ -259,10 +293,10 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
*
* 2) sysctl kernel.shm_rmid_forced is set to 1.
*/
-static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
+static bool shm_may_destroy(struct shmid_kernel *shp)
{
return (shp->shm_nattch == 0) &&
- (ns->shm_rmid_forced ||
+ (shp->ns->shm_rmid_forced ||
(shp->shm_perm.mode & SHM_DEST));
}
@@ -293,7 +327,7 @@ static void shm_close(struct vm_area_struct *vma)
shp->shm_lprid = task_tgid_vnr(current);
shp->shm_dtim = get_seconds();
shp->shm_nattch--;
- if (shm_may_destroy(ns, shp))
+ if (shm_may_destroy(shp))
shm_destroy(ns, shp);
else
shm_unlock(shp);
@@ -314,10 +348,10 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
*
* As shp->* are changed under rwsem, it's safe to skip shp locking.
*/
- if (shp->shm_creator != NULL)
+ if (!list_empty(&shp->shm_clist))
return 0;
- if (shm_may_destroy(ns, shp)) {
+ if (shm_may_destroy(shp)) {
shm_lock_by_ptr(shp);
shm_destroy(ns, shp);
}
@@ -335,48 +369,97 @@ void shm_destroy_orphaned(struct ipc_namespace *ns)
/* Locking assumes this will only be called with task == current */
void exit_shm(struct task_struct *task)
{
- struct ipc_namespace *ns = task->nsproxy->ipc_ns;
- struct shmid_kernel *shp, *n;
+ for (;;) {
+ struct shmid_kernel *shp;
+ struct ipc_namespace *ns;
- if (list_empty(&task->sysvshm.shm_clist))
- return;
+ task_lock(task);
+
+ if (list_empty(&task->sysvshm.shm_clist)) {
+ task_unlock(task);
+ break;
+ }
+
+ shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel,
+ shm_clist);
- /*
- * If kernel.shm_rmid_forced is not set then only keep track of
- * which shmids are orphaned, so that a later set of the sysctl
- * can clean them up.
- */
- if (!ns->shm_rmid_forced) {
- down_read(&shm_ids(ns).rwsem);
- list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
- shp->shm_creator = NULL;
/*
- * Only under read lock but we are only called on current
- * so no entry on the list will be shared.
+ * 1) Get pointer to the ipc namespace. It is worth to say
+ * that this pointer is guaranteed to be valid because
+ * shp lifetime is always shorter than namespace lifetime
+ * in which shp lives.
+ * We taken task_lock it means that shp won't be freed.
*/
- list_del(&task->sysvshm.shm_clist);
- up_read(&shm_ids(ns).rwsem);
- return;
- }
+ ns = shp->ns;
- /*
- * Destroy all already created segments, that were not yet mapped,
- * and mark any mapped as orphan to cover the sysctl toggling.
- * Destroy is skipped if shm_may_destroy() returns false.
- */
- down_write(&shm_ids(ns).rwsem);
- list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
- shp->shm_creator = NULL;
+ /*
+ * 2) If kernel.shm_rmid_forced is not set then only keep track of
+ * which shmids are orphaned, so that a later set of the sysctl
+ * can clean them up.
+ */
+ if (!ns->shm_rmid_forced)
+ goto unlink_continue;
- if (shm_may_destroy(ns, shp)) {
- shm_lock_by_ptr(shp);
- shm_destroy(ns, shp);
+ /*
+ * 3) get a reference to the namespace.
+ * The refcount could be already 0. If it is 0, then
+ * the shm objects will be free by free_ipc_work().
+ */
+ ns = get_ipc_ns_not_zero(ns);
+ if (!ns) {
+unlink_continue:
+ list_del_init(&shp->shm_clist);
+ task_unlock(task);
+ continue;
}
- }
- /* Remove the list head from any segments still attached. */
- list_del(&task->sysvshm.shm_clist);
- up_write(&shm_ids(ns).rwsem);
+ /*
+ * 4) get a reference to shp.
+ * This cannot fail: shm_clist_rm() is called before
+ * ipc_rmid(), thus the refcount cannot be 0.
+ */
+ WARN_ON(!ipc_rcu_getref(&shp->shm_perm));
+
+ /*
+ * 5) unlink the shm segment from the list of segments
+ * created by current.
+ * This must be done last. After unlinking,
+ * only the refcounts obtained above prevent IPC_RMID
+ * from destroying the segment or the namespace.
+ */
+ list_del_init(&shp->shm_clist);
+
+ task_unlock(task);
+
+ /*
+ * 6) we have all references
+ * Thus lock & if needed destroy shp.
+ */
+ down_write(&shm_ids(ns).rwsem);
+ shm_lock_by_ptr(shp);
+ /*
+ * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's
+ * safe to call ipc_rcu_putref here
+ */
+ ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
+
+ if (ipc_valid_object(&shp->shm_perm)) {
+ if (shm_may_destroy(shp))
+ shm_destroy(ns, shp);
+ else
+ shm_unlock(shp);
+ } else {
+ /*
+ * Someone else deleted the shp from namespace
+ * idr/kht while we have waited.
+ * Just unlock and continue.
+ */
+ shm_unlock(shp);
+ }
+
+ up_write(&shm_ids(ns).rwsem);
+ put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */
+ }
}
static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -607,7 +690,11 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
goto no_id;
}
+ shp->ns = ns;
+
+ task_lock(current);
list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
+ task_unlock(current);
/*
* shmid gets reported as "inode#" in /proc/pid/maps.
@@ -1252,7 +1339,8 @@ out_nattch:
down_write(&shm_ids(ns).rwsem);
shp = shm_lock(ns, shmid);
shp->shm_nattch--;
- if (shm_may_destroy(ns, shp))
+
+ if (shm_may_destroy(shp))
shm_destroy(ns, shp);
else
shm_unlock(shp);
diff --git a/ipc/util.c b/ipc/util.c
index 0f401d94b7c6..7af476b6dcdd 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -756,21 +756,21 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos,
total++;
}
+ ipc = NULL;
if (total >= ids->in_use)
- return NULL;
+ goto out;
for (; pos < IPCMNI; pos++) {
ipc = idr_find(&ids->ipcs_idr, pos);
if (ipc != NULL) {
- *new_pos = pos + 1;
rcu_read_lock();
ipc_lock_object(ipc);
- return ipc;
+ break;
}
}
-
- /* Out of range - return NULL to terminate iteration */
- return NULL;
+out:
+ *new_pos = pos + 1;
+ return ipc;
}
static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)