summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/binfmt_elf.c59
-rw-r--r--fs/exec.c11
-rw-r--r--fs/ext4/sysfs.c2
-rw-r--r--fs/fcntl.c14
-rw-r--r--fs/gfs2/glock.c11
-rw-r--r--fs/gfs2/incore.h1
-rw-r--r--fs/mount.h1
-rw-r--r--fs/namespace.c1
-rw-r--r--fs/open.c6
-rw-r--r--fs/pnode.c212
-rw-r--r--fs/sdcardfs/inode.c6
-rw-r--r--fs/sdcardfs/main.c47
12 files changed, 263 insertions, 108 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6c031dd1bc4e..8a0243efd359 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -905,17 +905,60 @@ static int load_elf_binary(struct linux_binprm *bprm)
elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
vaddr = elf_ppnt->p_vaddr;
+ /*
+ * If we are loading ET_EXEC or we have already performed
+ * the ET_DYN load_addr calculations, proceed normally.
+ */
if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
elf_flags |= MAP_FIXED;
} else if (loc->elf_ex.e_type == ET_DYN) {
- /* Try and get dynamic programs out of the way of the
- * default mmap base, as well as whatever program they
- * might try to exec. This is because the brk will
- * follow the loader, and is not movable. */
- load_bias = ELF_ET_DYN_BASE - vaddr;
- if (current->flags & PF_RANDOMIZE)
- load_bias += arch_mmap_rnd();
- load_bias = ELF_PAGESTART(load_bias);
+ /*
+ * This logic is run once for the first LOAD Program
+ * Header for ET_DYN binaries to calculate the
+ * randomization (load_bias) for all the LOAD
+ * Program Headers, and to calculate the entire
+ * size of the ELF mapping (total_size). (Note that
+ * load_addr_set is set to true later once the
+ * initial mapping is performed.)
+ *
+ * There are effectively two types of ET_DYN
+ * binaries: programs (i.e. PIE: ET_DYN with INTERP)
+ * and loaders (ET_DYN without INTERP, since they
+ * _are_ the ELF interpreter). The loaders must
+ * be loaded away from programs since the program
+ * may otherwise collide with the loader (especially
+ * for ET_EXEC which does not have a randomized
+ * position). For example to handle invocations of
+ * "./ld.so someprog" to test out a new version of
+ * the loader, the subsequent program that the
+ * loader loads must avoid the loader itself, so
+ * they cannot share the same load range. Sufficient
+ * room for the brk must be allocated with the
+ * loader as well, since brk must be available with
+ * the loader.
+ *
+ * Therefore, programs are loaded offset from
+ * ELF_ET_DYN_BASE and loaders are loaded into the
+ * independently randomized mmap region (0 load_bias
+ * without MAP_FIXED).
+ */
+ if (elf_interpreter) {
+ load_bias = ELF_ET_DYN_BASE;
+ if (current->flags & PF_RANDOMIZE)
+ load_bias += arch_mmap_rnd();
+ elf_flags |= MAP_FIXED;
+ } else
+ load_bias = 0;
+
+ /*
+ * Since load_bias is used for all subsequent loading
+ * calculations, we must lower it by the first vaddr
+ * so that the remaining calculations based on the
+ * ELF vaddrs will be correctly offset. The result
+ * is then page aligned.
+ */
+ load_bias = ELF_PAGESTART(load_bias - vaddr);
+
total_size = total_mapping_size(elf_phdata,
loc->elf_ex.e_phnum);
if (!total_size) {
diff --git a/fs/exec.c b/fs/exec.c
index 073ae12b396e..0428c34d4773 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -206,8 +206,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
if (write) {
unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
- unsigned long ptr_size;
- struct rlimit *rlim;
+ unsigned long ptr_size, limit;
/*
* Since the stack will hold pointers to the strings, we
@@ -236,14 +235,16 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
return page;
/*
- * Limit to 1/4-th the stack size for the argv+env strings.
+ * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
+ * (whichever is smaller) for the argv+env strings.
* This ensures that:
* - the remaining binfmt code will not run out of stack space,
* - the program will have a reasonable amount of stack left
* to work from.
*/
- rlim = current->signal->rlim;
- if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4)
+ limit = _STK_LIM / 4 * 3;
+ limit = min(limit, rlimit(RLIMIT_STACK) / 4);
+ if (size > limit)
goto fail;
}
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 5d09ea585840..c2ee23acf359 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -100,7 +100,7 @@ static ssize_t reserved_clusters_store(struct ext4_attr *a,
int ret;
ret = kstrtoull(skip_spaces(buf), 0, &val);
- if (!ret || val >= clusters)
+ if (ret || val >= clusters)
return -EINVAL;
atomic64_set(&sbi->s_resv_clusters, val);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index ee85cd4e136a..62376451bbce 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -740,16 +740,10 @@ static int __init fcntl_init(void)
* Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
* is defined as O_NONBLOCK on some platforms and not on others.
*/
- BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
- O_RDONLY | O_WRONLY | O_RDWR |
- O_CREAT | O_EXCL | O_NOCTTY |
- O_TRUNC | O_APPEND | /* O_NONBLOCK | */
- __O_SYNC | O_DSYNC | FASYNC |
- O_DIRECT | O_LARGEFILE | O_DIRECTORY |
- O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
- __FMODE_EXEC | O_PATH | __O_TMPFILE |
- __FMODE_NONOTIFY
- ));
+ BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
+ HWEIGHT32(
+ (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
+ __FMODE_EXEC | __FMODE_NONOTIFY));
fasync_cache = kmem_cache_create("fasync_cache",
sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 9cd8c92b953d..070901e76653 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -80,9 +80,9 @@ static struct rhashtable_params ht_parms = {
static struct rhashtable gl_hash_table;
-void gfs2_glock_free(struct gfs2_glock *gl)
+static void gfs2_glock_dealloc(struct rcu_head *rcu)
{
- struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+ struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
if (gl->gl_ops->go_flags & GLOF_ASPACE) {
kmem_cache_free(gfs2_glock_aspace_cachep, gl);
@@ -90,6 +90,13 @@ void gfs2_glock_free(struct gfs2_glock *gl)
kfree(gl->gl_lksb.sb_lvbptr);
kmem_cache_free(gfs2_glock_cachep, gl);
}
+}
+
+void gfs2_glock_free(struct gfs2_glock *gl)
+{
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
+ call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
if (atomic_dec_and_test(&sdp->sd_glock_disposal))
wake_up(&sdp->sd_glock_wait);
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index be519416c112..4a9077ec9313 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -367,6 +367,7 @@ struct gfs2_glock {
loff_t end;
} gl_vm;
};
+ struct rcu_head gl_rcu;
struct rhash_head gl_node;
};
diff --git a/fs/mount.h b/fs/mount.h
index 13a4ebbbaa74..37c64bbe840c 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -57,6 +57,7 @@ struct mount {
struct mnt_namespace *mnt_ns; /* containing namespace */
struct mountpoint *mnt_mp; /* where is it mounted */
struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */
+ struct list_head mnt_umounting; /* list entry for umount propagation */
#ifdef CONFIG_FSNOTIFY
struct hlist_head mnt_fsnotify_marks;
__u32 mnt_fsnotify_mask;
diff --git a/fs/namespace.c b/fs/namespace.c
index 0f52d90c356f..f32450c3e72c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -237,6 +237,7 @@ static struct mount *alloc_vfsmnt(const char *name)
INIT_LIST_HEAD(&mnt->mnt_slave_list);
INIT_LIST_HEAD(&mnt->mnt_slave);
INIT_HLIST_NODE(&mnt->mnt_mp_list);
+ INIT_LIST_HEAD(&mnt->mnt_umounting);
#ifdef CONFIG_FSNOTIFY
INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
#endif
diff --git a/fs/open.c b/fs/open.c
index e70cca15c976..1fd96c5d3895 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -898,6 +898,12 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
int lookup_flags = 0;
int acc_mode;
+ /*
+ * Clear out all open flags we don't know about so that we don't report
+ * them in fcntl(F_GETFD) or similar interfaces.
+ */
+ flags &= VALID_OPEN_FLAGS;
+
if (flags & (O_CREAT | __O_TMPFILE))
op->mode = (mode & S_IALLUGO) | S_IFREG;
else
diff --git a/fs/pnode.c b/fs/pnode.c
index e4e428d621e9..ddb846f878b8 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -24,6 +24,11 @@ static inline struct mount *first_slave(struct mount *p)
return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
}
+static inline struct mount *last_slave(struct mount *p)
+{
+ return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave);
+}
+
static inline struct mount *next_slave(struct mount *p)
{
return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
@@ -164,6 +169,19 @@ static struct mount *propagation_next(struct mount *m,
}
}
+static struct mount *skip_propagation_subtree(struct mount *m,
+ struct mount *origin)
+{
+ /*
+ * Advance m such that propagation_next will not return
+ * the slaves of m.
+ */
+ if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
+ m = last_slave(m);
+
+ return m;
+}
+
static struct mount *next_group(struct mount *m, struct mount *origin)
{
while (1) {
@@ -415,65 +433,104 @@ void propagate_mount_unlock(struct mount *mnt)
}
}
-/*
- * Mark all mounts that the MNT_LOCKED logic will allow to be unmounted.
- */
-static void mark_umount_candidates(struct mount *mnt)
+static void umount_one(struct mount *mnt, struct list_head *to_umount)
{
- struct mount *parent = mnt->mnt_parent;
- struct mount *m;
-
- BUG_ON(parent == mnt);
-
- for (m = propagation_next(parent, parent); m;
- m = propagation_next(m, parent)) {
- struct mount *child = __lookup_mnt(&m->mnt,
- mnt->mnt_mountpoint);
- if (!child || (child->mnt.mnt_flags & MNT_UMOUNT))
- continue;
- if (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m)) {
- SET_MNT_MARK(child);
- }
- }
+ CLEAR_MNT_MARK(mnt);
+ mnt->mnt.mnt_flags |= MNT_UMOUNT;
+ list_del_init(&mnt->mnt_child);
+ list_del_init(&mnt->mnt_umounting);
+ list_move_tail(&mnt->mnt_list, to_umount);
}
/*
* NOTE: unmounting 'mnt' naturally propagates to all other mounts its
* parent propagates to.
*/
-static void __propagate_umount(struct mount *mnt)
+static bool __propagate_umount(struct mount *mnt,
+ struct list_head *to_umount,
+ struct list_head *to_restore)
{
- struct mount *parent = mnt->mnt_parent;
- struct mount *m;
+ bool progress = false;
+ struct mount *child;
- BUG_ON(parent == mnt);
+ /*
+ * The state of the parent won't change if this mount is
+ * already unmounted or marked as without children.
+ */
+ if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED))
+ goto out;
- for (m = propagation_next(parent, parent); m;
- m = propagation_next(m, parent)) {
- struct mount *topper;
- struct mount *child = __lookup_mnt(&m->mnt,
- mnt->mnt_mountpoint);
- /*
- * umount the child only if the child has no children
- * and the child is marked safe to unmount.
- */
- if (!child || !IS_MNT_MARKED(child))
+ /* Verify topper is the only grandchild that has not been
+ * speculatively unmounted.
+ */
+ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
+ if (child->mnt_mountpoint == mnt->mnt.mnt_root)
continue;
- CLEAR_MNT_MARK(child);
+ if (!list_empty(&child->mnt_umounting) && IS_MNT_MARKED(child))
+ continue;
+ /* Found a mounted child */
+ goto children;
+ }
- /* If there is exactly one mount covering all of child
- * replace child with that mount.
- */
- topper = find_topper(child);
- if (topper)
- mnt_change_mountpoint(child->mnt_parent, child->mnt_mp,
- topper);
+ /* Mark mounts that can be unmounted if not locked */
+ SET_MNT_MARK(mnt);
+ progress = true;
+
+ /* If a mount is without children and not locked umount it. */
+ if (!IS_MNT_LOCKED(mnt)) {
+ umount_one(mnt, to_umount);
+ } else {
+children:
+ list_move_tail(&mnt->mnt_umounting, to_restore);
+ }
+out:
+ return progress;
+}
+
+static void umount_list(struct list_head *to_umount,
+ struct list_head *to_restore)
+{
+ struct mount *mnt, *child, *tmp;
+ list_for_each_entry(mnt, to_umount, mnt_list) {
+ list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) {
+ /* topper? */
+ if (child->mnt_mountpoint == mnt->mnt.mnt_root)
+ list_move_tail(&child->mnt_umounting, to_restore);
+ else
+ umount_one(child, to_umount);
+ }
+ }
+}
- if (list_empty(&child->mnt_mounts)) {
- list_del_init(&child->mnt_child);
- child->mnt.mnt_flags |= MNT_UMOUNT;
- list_move_tail(&child->mnt_list, &mnt->mnt_list);
+static void restore_mounts(struct list_head *to_restore)
+{
+ /* Restore mounts to a clean working state */
+ while (!list_empty(to_restore)) {
+ struct mount *mnt, *parent;
+ struct mountpoint *mp;
+
+ mnt = list_first_entry(to_restore, struct mount, mnt_umounting);
+ CLEAR_MNT_MARK(mnt);
+ list_del_init(&mnt->mnt_umounting);
+
+ /* Should this mount be reparented? */
+ mp = mnt->mnt_mp;
+ parent = mnt->mnt_parent;
+ while (parent->mnt.mnt_flags & MNT_UMOUNT) {
+ mp = parent->mnt_mp;
+ parent = parent->mnt_parent;
}
+ if (parent != mnt->mnt_parent)
+ mnt_change_mountpoint(parent, mp, mnt);
+ }
+}
+
+static void cleanup_umount_visitations(struct list_head *visited)
+{
+ while (!list_empty(visited)) {
+ struct mount *mnt =
+ list_first_entry(visited, struct mount, mnt_umounting);
+ list_del_init(&mnt->mnt_umounting);
}
}
@@ -487,12 +544,69 @@ static void __propagate_umount(struct mount *mnt)
int propagate_umount(struct list_head *list)
{
struct mount *mnt;
+ LIST_HEAD(to_restore);
+ LIST_HEAD(to_umount);
+ LIST_HEAD(visited);
+
+ /* Find candidates for unmounting */
+ list_for_each_entry_reverse(mnt, list, mnt_list) {
+ struct mount *parent = mnt->mnt_parent;
+ struct mount *m;
+
+ /*
+ * If this mount has already been visited it is known that it's
+ * entire peer group and all of their slaves in the propagation
+ * tree for the mountpoint has already been visited and there is
+ * no need to visit them again.
+ */
+ if (!list_empty(&mnt->mnt_umounting))
+ continue;
+
+ list_add_tail(&mnt->mnt_umounting, &visited);
+ for (m = propagation_next(parent, parent); m;
+ m = propagation_next(m, parent)) {
+ struct mount *child = __lookup_mnt(&m->mnt,
+ mnt->mnt_mountpoint);
+ if (!child)
+ continue;
+
+ if (!list_empty(&child->mnt_umounting)) {
+ /*
+ * If the child has already been visited it is
+ * know that it's entire peer group and all of
+ * their slaves in the propgation tree for the
+ * mountpoint has already been visited and there
+ * is no need to visit this subtree again.
+ */
+ m = skip_propagation_subtree(m, parent);
+ continue;
+ } else if (child->mnt.mnt_flags & MNT_UMOUNT) {
+ /*
+ * We have come accross an partially unmounted
+ * mount in list that has not been visited yet.
+ * Remember it has been visited and continue
+ * about our merry way.
+ */
+ list_add_tail(&child->mnt_umounting, &visited);
+ continue;
+ }
+
+ /* Check the child and parents while progress is made */
+ while (__propagate_umount(child,
+ &to_umount, &to_restore)) {
+ /* Is the parent a umount candidate? */
+ child = child->mnt_parent;
+ if (list_empty(&child->mnt_umounting))
+ break;
+ }
+ }
+ }
- list_for_each_entry_reverse(mnt, list, mnt_list)
- mark_umount_candidates(mnt);
+ umount_list(&to_umount, &to_restore);
+ restore_mounts(&to_restore);
+ cleanup_umount_visitations(&visited);
+ list_splice_tail(&to_umount, list);
- list_for_each_entry(mnt, list, mnt_list)
- __propagate_umount(mnt);
return 0;
}
diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c
index 60fea424835f..103dc45a131f 100644
--- a/fs/sdcardfs/inode.c
+++ b/fs/sdcardfs/inode.c
@@ -766,13 +766,9 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct
* afterwards in the other cases: we fsstack_copy_inode_size from
* the lower level.
*/
- if (current->mm)
- down_write(&current->mm->mmap_sem);
if (ia->ia_valid & ATTR_SIZE) {
err = inode_newsize_ok(&tmp, ia->ia_size);
if (err) {
- if (current->mm)
- up_write(&current->mm->mmap_sem);
goto out;
}
truncate_setsize(inode, ia->ia_size);
@@ -795,8 +791,6 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct
err = notify_change2(lower_mnt, lower_dentry, &lower_ia, /* note: lower_ia */
NULL);
mutex_unlock(&d_inode(lower_dentry)->i_mutex);
- if (current->mm)
- up_write(&current->mm->mmap_sem);
if (err)
goto out;
diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c
index 3c5b51d49d21..80825b287836 100644
--- a/fs/sdcardfs/main.c
+++ b/fs/sdcardfs/main.c
@@ -364,41 +364,34 @@ out:
return err;
}
-/* A feature which supports mount_nodev() with options */
-static struct dentry *mount_nodev_with_options(struct vfsmount *mnt,
- struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data,
- int (*fill_super)(struct vfsmount *, struct super_block *,
- const char *, void *, int))
+struct sdcardfs_mount_private {
+ struct vfsmount *mnt;
+ const char *dev_name;
+ void *raw_data;
+};
+static int __sdcardfs_fill_super(
+ struct super_block *sb,
+ void *_priv, int silent)
{
- int error;
- struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);
-
- if (IS_ERR(s))
- return ERR_CAST(s);
-
- s->s_flags = flags;
+ struct sdcardfs_mount_private *priv = _priv;
- error = fill_super(mnt, s, dev_name, data, flags & MS_SILENT ? 1 : 0);
- if (error) {
- deactivate_locked_super(s);
- return ERR_PTR(error);
- }
- s->s_flags |= MS_ACTIVE;
- return dget(s->s_root);
+ return sdcardfs_read_super(priv->mnt,
+ sb, priv->dev_name, priv->raw_data, silent);
}
static struct dentry *sdcardfs_mount(struct vfsmount *mnt,
struct file_system_type *fs_type, int flags,
const char *dev_name, void *raw_data)
{
- /*
- * dev_name is a lower_path_name,
- * raw_data is a option string.
- */
- return mount_nodev_with_options(mnt, fs_type, flags, dev_name,
- raw_data, sdcardfs_read_super);
+ struct sdcardfs_mount_private priv = {
+ .mnt = mnt,
+ .dev_name = dev_name,
+ .raw_data = raw_data
+ };
+
+ return mount_nodev(fs_type, flags,
+ &priv, __sdcardfs_fill_super);
}
static struct dentry *sdcardfs_mount_wrn(struct file_system_type *fs_type,
@@ -423,7 +416,7 @@ void sdcardfs_kill_sb(struct super_block *sb)
list_del(&sbi->list);
mutex_unlock(&sdcardfs_super_list_lock);
}
- generic_shutdown_super(sb);
+ kill_anon_super(sb);
}
static struct file_system_type sdcardfs_fs_type = {