summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c2
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/Makefile7
-rw-r--r--fs/attr.c14
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/coredump.c2
-rw-r--r--fs/dcache.c1
-rw-r--r--fs/eventpoll.c6
-rw-r--r--fs/exec.c4
-rw-r--r--fs/ext2/ext2.h3
-rw-r--r--fs/ext2/super.c25
-rw-r--r--fs/ext2/xattr.c143
-rw-r--r--fs/ext2/xattr.h21
-rw-r--r--fs/ext4/Kconfig1
-rw-r--r--fs/ext4/crypto_fname.c3
-rw-r--r--fs/ext4/crypto_key.c101
-rw-r--r--fs/ext4/ext4.h6
-rw-r--r--fs/ext4/ext4_crypto.h4
-rw-r--r--fs/ext4/inline.c14
-rw-r--r--fs/ext4/inode.c54
-rw-r--r--fs/ext4/ioctl.c6
-rw-r--r--fs/ext4/mballoc.c28
-rw-r--r--fs/ext4/readpage.c47
-rw-r--r--fs/ext4/super.c7
-rw-r--r--fs/ext4/xattr.c136
-rw-r--r--fs/ext4/xattr.h5
-rw-r--r--fs/f2fs/data.c41
-rw-r--r--fs/f2fs/inline.c18
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/fs_struct.c3
-rw-r--r--fs/fuse/dev.c10
-rw-r--r--fs/fuse/dir.c45
-rw-r--r--fs/fuse/fuse_i.h3
-rw-r--r--fs/inode.c6
-rw-r--r--fs/internal.h4
-rw-r--r--fs/mbcache2.c359
-rw-r--r--fs/mpage.c36
-rw-r--r--fs/namei.c179
-rw-r--r--fs/namespace.c32
-rw-r--r--fs/notify/fanotify/fanotify_user.c2
-rw-r--r--fs/notify/inotify/inotify_user.c17
-rw-r--r--fs/open.c37
-rw-r--r--fs/pnode.c29
-rw-r--r--fs/pnode.h1
-rw-r--r--fs/proc/base.c137
-rw-r--r--fs/proc/kcore.c31
-rw-r--r--fs/proc/task_mmu.c65
-rw-r--r--fs/proc_namespace.c8
-rw-r--r--fs/pstore/platform.c36
-rw-r--r--fs/pstore/pmsg.c35
-rw-r--r--fs/pstore/ram.c132
-rw-r--r--fs/pstore/ram_core.c47
-rw-r--r--fs/sdcardfs/Kconfig13
-rw-r--r--fs/sdcardfs/Makefile7
-rw-r--r--fs/sdcardfs/dentry.c177
-rw-r--r--fs/sdcardfs/derived_perm.c465
-rw-r--r--fs/sdcardfs/file.c433
-rw-r--r--fs/sdcardfs/inode.c912
-rw-r--r--fs/sdcardfs/lookup.c464
-rw-r--r--fs/sdcardfs/main.c486
-rw-r--r--fs/sdcardfs/mmap.c89
-rw-r--r--fs/sdcardfs/multiuser.h53
-rw-r--r--fs/sdcardfs/packagelist.c893
-rw-r--r--fs/sdcardfs/sdcardfs.h632
-rw-r--r--fs/sdcardfs/super.c278
-rw-r--r--fs/select.c8
-rw-r--r--fs/squashfs/Kconfig28
-rw-r--r--fs/squashfs/Makefile3
-rw-r--r--fs/squashfs/block.c546
-rw-r--r--fs/squashfs/cache.c73
-rw-r--r--fs/squashfs/decompressor.c55
-rw-r--r--fs/squashfs/file.c140
-rw-r--r--fs/squashfs/file_cache.c38
-rw-r--r--fs/squashfs/file_direct.c245
-rw-r--r--fs/squashfs/lz4_wrapper.c32
-rw-r--r--fs/squashfs/lzo_wrapper.c40
-rw-r--r--fs/squashfs/page_actor.c175
-rw-r--r--fs/squashfs/page_actor.h84
-rw-r--r--fs/squashfs/squashfs.h11
-rw-r--r--fs/squashfs/squashfs_fs_sb.h2
-rw-r--r--fs/squashfs/super.c7
-rw-r--r--fs/squashfs/xz_wrapper.c15
-rw-r--r--fs/squashfs/zlib_wrapper.c14
-rw-r--r--fs/super.c30
-rw-r--r--fs/sync.c1
-rw-r--r--fs/userfaultfd.c9
-rw-r--r--fs/utimes.c2
87 files changed, 7444 insertions, 974 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 929b618da43b..c30c6ceac2c4 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -283,6 +283,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
case ACL_TYPE_ACCESS:
if (acl) {
struct iattr iattr;
+ struct posix_acl *old_acl = acl;
retval = posix_acl_update_mode(inode, &iattr.ia_mode, &acl);
if (retval)
@@ -293,6 +294,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
* by the mode bits. So don't
* update ACL.
*/
+ posix_acl_release(old_acl);
value = NULL;
size = 0;
}
diff --git a/fs/Kconfig b/fs/Kconfig
index 6ce72d8d1ee1..a5d2dc39ba07 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -199,6 +199,7 @@ if MISC_FILESYSTEMS
source "fs/adfs/Kconfig"
source "fs/affs/Kconfig"
source "fs/ecryptfs/Kconfig"
+source "fs/sdcardfs/Kconfig"
source "fs/hfs/Kconfig"
source "fs/hfsplus/Kconfig"
source "fs/befs/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index 79f522575cba..dee237540bc0 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -3,7 +3,7 @@
#
# 14 Sep 2000, Christoph Hellwig <hch@infradead.org>
# Rewritten to use lists instead of if-statements.
-#
+#
obj-y := open.o read_write.o file_table.o super.o \
char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
@@ -41,7 +41,7 @@ obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o
obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o
obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o
-obj-$(CONFIG_FS_MBCACHE) += mbcache.o
+obj-$(CONFIG_FS_MBCACHE) += mbcache.o mbcache2.o
obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
obj-$(CONFIG_NFS_COMMON) += nfs_common/
obj-$(CONFIG_COREDUMP) += coredump.o
@@ -59,7 +59,7 @@ obj-y += devpts/
obj-$(CONFIG_PROFILING) += dcookies.o
obj-$(CONFIG_DLM) += dlm/
-
+
# Do not add any filesystems before this line
obj-$(CONFIG_FSCACHE) += fscache/
obj-$(CONFIG_REISERFS_FS) += reiserfs/
@@ -81,6 +81,7 @@ obj-$(CONFIG_ISO9660_FS) += isofs/
obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+
obj-$(CONFIG_HFS_FS) += hfs/
obj-$(CONFIG_ECRYPT_FS) += ecryptfs/
+obj-$(CONFIG_SDCARD_FS) += sdcardfs/
obj-$(CONFIG_VXFS_FS) += freevxfs/
obj-$(CONFIG_NFS_FS) += nfs/
obj-$(CONFIG_EXPORTFS) += exportfs/
diff --git a/fs/attr.c b/fs/attr.c
index d62f674a605f..c86b37c38fb7 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -187,7 +187,7 @@ EXPORT_SYMBOL(setattr_copy);
* the file open for write, as there can be no conflicting delegation in
* that case.
*/
-int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode)
+int notify_change2(struct vfsmount *mnt, struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode)
{
struct inode *inode = dentry->d_inode;
umode_t mode = inode->i_mode;
@@ -211,7 +211,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
return -EPERM;
if (!inode_owner_or_capable(inode)) {
- error = inode_permission(inode, MAY_WRITE);
+ error = inode_permission2(mnt, inode, MAY_WRITE);
if (error)
return error;
}
@@ -277,7 +277,9 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
if (error)
return error;
- if (inode->i_op->setattr)
+ if (mnt && inode->i_op->setattr2)
+ error = inode->i_op->setattr2(mnt, dentry, attr);
+ else if (inode->i_op->setattr)
error = inode->i_op->setattr(dentry, attr);
else
error = simple_setattr(dentry, attr);
@@ -290,4 +292,10 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
return error;
}
+EXPORT_SYMBOL(notify_change2);
+
+int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode)
+{
+ return notify_change2(NULL, dentry, attr, delegated_inode);
+}
EXPORT_SYMBOL(notify_change);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 3a93755e880f..0c52941dd62c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -651,7 +651,7 @@ static unsigned long randomize_stack_top(unsigned long stack_top)
if ((current->flags & PF_RANDOMIZE) &&
!(current->personality & ADDR_NO_RANDOMIZE)) {
- random_variable = (unsigned long) get_random_int();
+ random_variable = get_random_long();
random_variable &= STACK_RND_MASK;
random_variable <<= PAGE_SHIFT;
}
diff --git a/fs/coredump.c b/fs/coredump.c
index 5d15c4975ba1..fe0a28da18a6 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -720,7 +720,7 @@ void do_coredump(const siginfo_t *siginfo)
goto close_fail;
if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
goto close_fail;
- if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
+ if (do_truncate2(cprm.file->f_path.mnt, cprm.file->f_path.dentry, 0, 0, cprm.file))
goto close_fail;
}
diff --git a/fs/dcache.c b/fs/dcache.c
index 849c1c1e787b..7b8feb6d60c8 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3026,6 +3026,7 @@ char *d_absolute_path(const struct path *path,
return ERR_PTR(error);
return res;
}
+EXPORT_SYMBOL(d_absolute_path);
/*
* same as __d_path but appends "(deleted)" for unlinked files.
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1e009cad8d5c..3ab9c68b8bce 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -34,6 +34,7 @@
#include <linux/mutex.h>
#include <linux/anon_inodes.h>
#include <linux/device.h>
+#include <linux/freezer.h>
#include <asm/uaccess.h>
#include <asm/io.h>
#include <asm/mman.h>
@@ -1587,7 +1588,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
{
int res = 0, eavail, timed_out = 0;
unsigned long flags;
- long slack = 0;
+ u64 slack = 0;
wait_queue_t wait;
ktime_t expires, *to = NULL;
@@ -1634,7 +1635,8 @@ fetch_events:
}
spin_unlock_irqrestore(&ep->lock, flags);
- if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
+ if (!freezable_schedule_hrtimeout_range(to, slack,
+ HRTIMER_MODE_ABS))
timed_out = 1;
spin_lock_irqsave(&ep->lock, flags);
diff --git a/fs/exec.c b/fs/exec.c
index 3a6de10d3891..b1f5ddbf7d56 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1132,8 +1132,10 @@ EXPORT_SYMBOL(flush_old_exec);
void would_dump(struct linux_binprm *bprm, struct file *file)
{
struct inode *inode = file_inode(file);
- if (inode_permission(inode, MAY_READ) < 0) {
+
+ if (inode_permission2(file->f_path.mnt, inode, MAY_READ) < 0) {
struct user_namespace *old, *user_ns;
+
bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
/* Ensure mm->user_ns contains the executable */
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 4c69c94cafd8..f98ce7e60a0f 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -61,6 +61,8 @@ struct ext2_block_alloc_info {
#define rsv_start rsv_window._rsv_start
#define rsv_end rsv_window._rsv_end
+struct mb2_cache;
+
/*
* second extended-fs super-block data in memory
*/
@@ -111,6 +113,7 @@ struct ext2_sb_info {
* of the mount options.
*/
spinlock_t s_lock;
+ struct mb2_cache *s_mb_cache;
};
static inline spinlock_t *
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 748d35afc902..111a31761ffa 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -131,7 +131,10 @@ static void ext2_put_super (struct super_block * sb)
dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
- ext2_xattr_put_super(sb);
+ if (sbi->s_mb_cache) {
+ ext2_xattr_destroy_cache(sbi->s_mb_cache);
+ sbi->s_mb_cache = NULL;
+ }
if (!(sb->s_flags & MS_RDONLY)) {
struct ext2_super_block *es = sbi->s_es;
@@ -1104,6 +1107,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
ext2_msg(sb, KERN_ERR, "error: insufficient memory");
goto failed_mount3;
}
+
+#ifdef CONFIG_EXT2_FS_XATTR
+ sbi->s_mb_cache = ext2_xattr_create_cache();
+ if (!sbi->s_mb_cache) {
+ ext2_msg(sb, KERN_ERR, "Failed to create an mb_cache");
+ goto failed_mount3;
+ }
+#endif
/*
* set up enough so that it can read an inode
*/
@@ -1149,6 +1160,8 @@ cantfind_ext2:
sb->s_id);
goto failed_mount;
failed_mount3:
+ if (sbi->s_mb_cache)
+ ext2_xattr_destroy_cache(sbi->s_mb_cache);
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -1555,20 +1568,17 @@ MODULE_ALIAS_FS("ext2");
static int __init init_ext2_fs(void)
{
- int err = init_ext2_xattr();
- if (err)
- return err;
+ int err;
+
err = init_inodecache();
if (err)
- goto out1;
+ return err;
err = register_filesystem(&ext2_fs_type);
if (err)
goto out;
return 0;
out:
destroy_inodecache();
-out1:
- exit_ext2_xattr();
return err;
}
@@ -1576,7 +1586,6 @@ static void __exit exit_ext2_fs(void)
{
unregister_filesystem(&ext2_fs_type);
destroy_inodecache();
- exit_ext2_xattr();
}
MODULE_AUTHOR("Remy Card and others");
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index fa70848afa8f..24736c8b3d51 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -56,7 +56,7 @@
#include <linux/buffer_head.h>
#include <linux/init.h>
#include <linux/slab.h>
-#include <linux/mbcache.h>
+#include <linux/mbcache2.h>
#include <linux/quotaops.h>
#include <linux/rwsem.h>
#include <linux/security.h>
@@ -92,14 +92,12 @@
static int ext2_xattr_set2(struct inode *, struct buffer_head *,
struct ext2_xattr_header *);
-static int ext2_xattr_cache_insert(struct buffer_head *);
+static int ext2_xattr_cache_insert(struct mb2_cache *, struct buffer_head *);
static struct buffer_head *ext2_xattr_cache_find(struct inode *,
struct ext2_xattr_header *);
static void ext2_xattr_rehash(struct ext2_xattr_header *,
struct ext2_xattr_entry *);
-static struct mb_cache *ext2_xattr_cache;
-
static const struct xattr_handler *ext2_xattr_handler_map[] = {
[EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler,
#ifdef CONFIG_EXT2_FS_POSIX_ACL
@@ -154,6 +152,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
size_t name_len, size;
char *end;
int error;
+ struct mb2_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
name_index, name, buffer, (long)buffer_size);
@@ -198,7 +197,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
goto found;
entry = next;
}
- if (ext2_xattr_cache_insert(bh))
+ if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
ea_idebug(inode, "cache insert failed");
error = -ENODATA;
goto cleanup;
@@ -211,7 +210,7 @@ found:
le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
goto bad_block;
- if (ext2_xattr_cache_insert(bh))
+ if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
ea_idebug(inode, "cache insert failed");
if (buffer) {
error = -ERANGE;
@@ -249,6 +248,7 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
char *end;
size_t rest = buffer_size;
int error;
+ struct mb2_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size);
@@ -283,7 +283,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
goto bad_block;
entry = next;
}
- if (ext2_xattr_cache_insert(bh))
+ if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
ea_idebug(inode, "cache insert failed");
/* list the attribute names */
@@ -480,22 +480,23 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
/* Here we know that we can set the new attribute. */
if (header) {
- struct mb_cache_entry *ce;
-
/* assert(header == HDR(bh)); */
- ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev,
- bh->b_blocknr);
lock_buffer(bh);
if (header->h_refcount == cpu_to_le32(1)) {
+ __u32 hash = le32_to_cpu(header->h_hash);
+
ea_bdebug(bh, "modifying in-place");
- if (ce)
- mb_cache_entry_free(ce);
+ /*
+ * This must happen under buffer lock for
+ * ext2_xattr_set2() to reliably detect modified block
+ */
+ mb2_cache_entry_delete_block(EXT2_SB(sb)->s_mb_cache,
+ hash, bh->b_blocknr);
+
/* keep the buffer locked while modifying it. */
} else {
int offset;
- if (ce)
- mb_cache_entry_release(ce);
unlock_buffer(bh);
ea_bdebug(bh, "cloning");
header = kmalloc(bh->b_size, GFP_KERNEL);
@@ -623,6 +624,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
struct super_block *sb = inode->i_sb;
struct buffer_head *new_bh = NULL;
int error;
+ struct mb2_cache *ext2_mb_cache = EXT2_SB(sb)->s_mb_cache;
if (header) {
new_bh = ext2_xattr_cache_find(inode, header);
@@ -650,7 +652,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
don't need to change the reference count. */
new_bh = old_bh;
get_bh(new_bh);
- ext2_xattr_cache_insert(new_bh);
+ ext2_xattr_cache_insert(ext2_mb_cache, new_bh);
} else {
/* We need to allocate a new block */
ext2_fsblk_t goal = ext2_group_first_block_no(sb,
@@ -671,7 +673,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
memcpy(new_bh->b_data, header, new_bh->b_size);
set_buffer_uptodate(new_bh);
unlock_buffer(new_bh);
- ext2_xattr_cache_insert(new_bh);
+ ext2_xattr_cache_insert(ext2_mb_cache, new_bh);
ext2_xattr_update_super_block(sb);
}
@@ -704,19 +706,21 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
error = 0;
if (old_bh && old_bh != new_bh) {
- struct mb_cache_entry *ce;
-
/*
* If there was an old block and we are no longer using it,
* release the old block.
*/
- ce = mb_cache_entry_get(ext2_xattr_cache, old_bh->b_bdev,
- old_bh->b_blocknr);
lock_buffer(old_bh);
if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
+ __u32 hash = le32_to_cpu(HDR(old_bh)->h_hash);
+
+ /*
+ * This must happen under buffer lock for
+ * ext2_xattr_set2() to reliably detect freed block
+ */
+ mb2_cache_entry_delete_block(ext2_mb_cache,
+ hash, old_bh->b_blocknr);
/* Free the old block. */
- if (ce)
- mb_cache_entry_free(ce);
ea_bdebug(old_bh, "freeing");
ext2_free_blocks(inode, old_bh->b_blocknr, 1);
mark_inode_dirty(inode);
@@ -727,8 +731,6 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
} else {
/* Decrement the refcount only. */
le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
- if (ce)
- mb_cache_entry_release(ce);
dquot_free_block_nodirty(inode, 1);
mark_inode_dirty(inode);
mark_buffer_dirty(old_bh);
@@ -754,7 +756,6 @@ void
ext2_xattr_delete_inode(struct inode *inode)
{
struct buffer_head *bh = NULL;
- struct mb_cache_entry *ce;
down_write(&EXT2_I(inode)->xattr_sem);
if (!EXT2_I(inode)->i_file_acl)
@@ -774,19 +775,22 @@ ext2_xattr_delete_inode(struct inode *inode)
EXT2_I(inode)->i_file_acl);
goto cleanup;
}
- ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, bh->b_blocknr);
lock_buffer(bh);
if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
- if (ce)
- mb_cache_entry_free(ce);
+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
+
+ /*
+ * This must happen under buffer lock for ext2_xattr_set2() to
+ * reliably detect freed block
+ */
+ mb2_cache_entry_delete_block(EXT2_SB(inode->i_sb)->s_mb_cache,
+ hash, bh->b_blocknr);
ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
get_bh(bh);
bforget(bh);
unlock_buffer(bh);
} else {
le32_add_cpu(&HDR(bh)->h_refcount, -1);
- if (ce)
- mb_cache_entry_release(ce);
ea_bdebug(bh, "refcount now=%d",
le32_to_cpu(HDR(bh)->h_refcount));
unlock_buffer(bh);
@@ -803,18 +807,6 @@ cleanup:
}
/*
- * ext2_xattr_put_super()
- *
- * This is called when a file system is unmounted.
- */
-void
-ext2_xattr_put_super(struct super_block *sb)
-{
- mb_cache_shrink(sb->s_bdev);
-}
-
-
-/*
* ext2_xattr_cache_insert()
*
* Create a new entry in the extended attribute cache, and insert
@@ -823,28 +815,20 @@ ext2_xattr_put_super(struct super_block *sb)
* Returns 0, or a negative error number on failure.
*/
static int
-ext2_xattr_cache_insert(struct buffer_head *bh)
+ext2_xattr_cache_insert(struct mb2_cache *cache, struct buffer_head *bh)
{
__u32 hash = le32_to_cpu(HDR(bh)->h_hash);
- struct mb_cache_entry *ce;
int error;
- ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS);
- if (!ce)
- return -ENOMEM;
- error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
+ error = mb2_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr);
if (error) {
- mb_cache_entry_free(ce);
if (error == -EBUSY) {
ea_bdebug(bh, "already in cache (%d cache entries)",
atomic_read(&ext2_xattr_cache->c_entry_count));
error = 0;
}
- } else {
- ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
- atomic_read(&ext2_xattr_cache->c_entry_count));
- mb_cache_entry_release(ce);
- }
+ } else
+ ea_bdebug(bh, "inserting [%x]", (int)hash);
return error;
}
@@ -900,23 +884,17 @@ static struct buffer_head *
ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
{
__u32 hash = le32_to_cpu(header->h_hash);
- struct mb_cache_entry *ce;
+ struct mb2_cache_entry *ce;
+ struct mb2_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
again:
- ce = mb_cache_entry_find_first(ext2_xattr_cache, inode->i_sb->s_bdev,
- hash);
+ ce = mb2_cache_entry_find_first(ext2_mb_cache, hash);
while (ce) {
struct buffer_head *bh;
- if (IS_ERR(ce)) {
- if (PTR_ERR(ce) == -EAGAIN)
- goto again;
- break;
- }
-
bh = sb_bread(inode->i_sb, ce->e_block);
if (!bh) {
ext2_error(inode->i_sb, "ext2_xattr_cache_find",
@@ -924,7 +902,21 @@ again:
inode->i_ino, (unsigned long) ce->e_block);
} else {
lock_buffer(bh);
- if (le32_to_cpu(HDR(bh)->h_refcount) >
+ /*
+ * We have to be careful about races with freeing or
+ * rehashing of xattr block. Once we hold buffer lock
+ * xattr block's state is stable so we can check
+ * whether the block got freed / rehashed or not.
+ * Since we unhash mbcache entry under buffer lock when
+ * freeing / rehashing xattr block, checking whether
+ * entry is still hashed is reliable.
+ */
+ if (hlist_bl_unhashed(&ce->e_hash_list)) {
+ mb2_cache_entry_put(ext2_mb_cache, ce);
+ unlock_buffer(bh);
+ brelse(bh);
+ goto again;
+ } else if (le32_to_cpu(HDR(bh)->h_refcount) >
EXT2_XATTR_REFCOUNT_MAX) {
ea_idebug(inode, "block %ld refcount %d>%d",
(unsigned long) ce->e_block,
@@ -933,13 +925,14 @@ again:
} else if (!ext2_xattr_cmp(header, HDR(bh))) {
ea_bdebug(bh, "b_count=%d",
atomic_read(&(bh->b_count)));
- mb_cache_entry_release(ce);
+ mb2_cache_entry_touch(ext2_mb_cache, ce);
+ mb2_cache_entry_put(ext2_mb_cache, ce);
return bh;
}
unlock_buffer(bh);
brelse(bh);
}
- ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
+ ce = mb2_cache_entry_find_next(ext2_mb_cache, ce);
}
return NULL;
}
@@ -1012,17 +1005,15 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *header,
#undef BLOCK_HASH_SHIFT
-int __init
-init_ext2_xattr(void)
+#define HASH_BUCKET_BITS 10
+
+struct mb2_cache *ext2_xattr_create_cache(void)
{
- ext2_xattr_cache = mb_cache_create("ext2_xattr", 6);
- if (!ext2_xattr_cache)
- return -ENOMEM;
- return 0;
+ return mb2_cache_create(HASH_BUCKET_BITS);
}
-void
-exit_ext2_xattr(void)
+void ext2_xattr_destroy_cache(struct mb2_cache *cache)
{
- mb_cache_destroy(ext2_xattr_cache);
+ if (cache)
+ mb2_cache_destroy(cache);
}
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
index 60edf298644e..6ea38aa9563a 100644
--- a/fs/ext2/xattr.h
+++ b/fs/ext2/xattr.h
@@ -53,6 +53,8 @@ struct ext2_xattr_entry {
#define EXT2_XATTR_SIZE(size) \
(((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND)
+struct mb2_cache;
+
# ifdef CONFIG_EXT2_FS_XATTR
extern const struct xattr_handler ext2_xattr_user_handler;
@@ -65,10 +67,9 @@ extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t);
extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
extern void ext2_xattr_delete_inode(struct inode *);
-extern void ext2_xattr_put_super(struct super_block *);
-extern int init_ext2_xattr(void);
-extern void exit_ext2_xattr(void);
+extern struct mb2_cache *ext2_xattr_create_cache(void);
+extern void ext2_xattr_destroy_cache(struct mb2_cache *cache);
extern const struct xattr_handler *ext2_xattr_handlers[];
@@ -93,19 +94,7 @@ ext2_xattr_delete_inode(struct inode *inode)
{
}
-static inline void
-ext2_xattr_put_super(struct super_block *sb)
-{
-}
-
-static inline int
-init_ext2_xattr(void)
-{
- return 0;
-}
-
-static inline void
-exit_ext2_xattr(void)
+static inline void ext2_xattr_destroy_cache(struct mb2_cache *cache)
{
}
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index b46e9fc64196..3c8293215603 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -106,6 +106,7 @@ config EXT4_ENCRYPTION
select CRYPTO_ECB
select CRYPTO_XTS
select CRYPTO_CTS
+ select CRYPTO_HEH
select CRYPTO_CTR
select CRYPTO_SHA256
select KEYS
diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c
index 2fbef8a14760..e2645ca9b95e 100644
--- a/fs/ext4/crypto_fname.c
+++ b/fs/ext4/crypto_fname.c
@@ -44,7 +44,8 @@ static void ext4_dir_crypt_complete(struct crypto_async_request *req, int res)
bool ext4_valid_filenames_enc_mode(uint32_t mode)
{
- return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS);
+ return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS ||
+ mode == EXT4_ENCRYPTION_MODE_AES_256_HEH);
}
static unsigned max_name_len(struct inode *inode)
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
index 505f8afde57c..22096e31a720 100644
--- a/fs/ext4/crypto_key.c
+++ b/fs/ext4/crypto_key.c
@@ -29,16 +29,16 @@ static void derive_crypt_complete(struct crypto_async_request *req, int rc)
}
/**
- * ext4_derive_key_aes() - Derive a key using AES-128-ECB
+ * ext4_derive_key_v1() - Derive a key using AES-128-ECB
* @deriving_key: Encryption key used for derivation.
* @source_key: Source key to which to apply derivation.
* @derived_key: Derived key.
*
- * Return: Zero on success; non-zero otherwise.
+ * Return: 0 on success, -errno on failure
*/
-static int ext4_derive_key_aes(char deriving_key[EXT4_AES_128_ECB_KEY_SIZE],
- char source_key[EXT4_AES_256_XTS_KEY_SIZE],
- char derived_key[EXT4_AES_256_XTS_KEY_SIZE])
+static int ext4_derive_key_v1(const char deriving_key[EXT4_AES_128_ECB_KEY_SIZE],
+ const char source_key[EXT4_AES_256_XTS_KEY_SIZE],
+ char derived_key[EXT4_AES_256_XTS_KEY_SIZE])
{
int res = 0;
struct ablkcipher_request *req = NULL;
@@ -83,6 +83,91 @@ out:
return res;
}
+/**
+ * ext4_derive_key_v2() - Derive a key non-reversibly
+ * @nonce: the nonce associated with the file
+ * @master_key: the master key referenced by the file
+ * @derived_key: (output) the resulting derived key
+ *
+ * This function computes the following:
+ * derived_key[0:127] = AES-256-ENCRYPT(master_key[0:255], nonce)
+ * derived_key[128:255] = AES-256-ENCRYPT(master_key[0:255], nonce ^ 0x01)
+ * derived_key[256:383] = AES-256-ENCRYPT(master_key[256:511], nonce)
+ * derived_key[384:511] = AES-256-ENCRYPT(master_key[256:511], nonce ^ 0x01)
+ *
+ * 'nonce ^ 0x01' denotes flipping the low order bit of the last byte.
+ *
+ * Unlike the v1 algorithm, the v2 algorithm is "non-reversible", meaning that
+ * compromising a derived key does not also compromise the master key.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+static int ext4_derive_key_v2(const char nonce[EXT4_KEY_DERIVATION_NONCE_SIZE],
+ const char master_key[EXT4_MAX_KEY_SIZE],
+ char derived_key[EXT4_MAX_KEY_SIZE])
+{
+ const int noncelen = EXT4_KEY_DERIVATION_NONCE_SIZE;
+ struct crypto_cipher *tfm;
+ int err;
+ int i;
+
+ /*
+ * Since we only use each transform for a small number of encryptions,
+ * requesting just "aes" turns out to be significantly faster than
+ * "ecb(aes)", by about a factor of two.
+ */
+ tfm = crypto_alloc_cipher("aes", 0, 0);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ BUILD_BUG_ON(4 * EXT4_KEY_DERIVATION_NONCE_SIZE != EXT4_MAX_KEY_SIZE);
+ BUILD_BUG_ON(2 * EXT4_AES_256_ECB_KEY_SIZE != EXT4_MAX_KEY_SIZE);
+ for (i = 0; i < 2; i++) {
+ memcpy(derived_key, nonce, noncelen);
+ memcpy(derived_key + noncelen, nonce, noncelen);
+ derived_key[2 * noncelen - 1] ^= 0x01;
+ err = crypto_cipher_setkey(tfm, master_key,
+ EXT4_AES_256_ECB_KEY_SIZE);
+ if (err)
+ break;
+ crypto_cipher_encrypt_one(tfm, derived_key, derived_key);
+ crypto_cipher_encrypt_one(tfm, derived_key + noncelen,
+ derived_key + noncelen);
+ master_key += EXT4_AES_256_ECB_KEY_SIZE;
+ derived_key += 2 * noncelen;
+ }
+ crypto_free_cipher(tfm);
+ return err;
+}
+
+/**
+ * ext4_derive_key() - Derive a per-file key from a nonce and master key
+ * @ctx: the encryption context associated with the file
+ * @master_key: the master key referenced by the file
+ * @derived_key: (output) the resulting derived key
+ *
+ * Return: 0 on success, -errno on failure
+ */
+static int ext4_derive_key(const struct ext4_encryption_context *ctx,
+ const char master_key[EXT4_MAX_KEY_SIZE],
+ char derived_key[EXT4_MAX_KEY_SIZE])
+{
+ BUILD_BUG_ON(EXT4_AES_128_ECB_KEY_SIZE != EXT4_KEY_DERIVATION_NONCE_SIZE);
+ BUILD_BUG_ON(EXT4_AES_256_XTS_KEY_SIZE != EXT4_MAX_KEY_SIZE);
+
+ /*
+ * Although the key derivation algorithm is logically independent of the
+ * choice of encryption modes, in this kernel it is bundled with HEH
+ * encryption of filenames, which is another crypto improvement that
+ * requires an on-disk format change and requires userspace to specify
+ * different encryption policies.
+ */
+ if (ctx->filenames_encryption_mode == EXT4_ENCRYPTION_MODE_AES_256_HEH)
+ return ext4_derive_key_v2(ctx->nonce, master_key, derived_key);
+ else
+ return ext4_derive_key_v1(ctx->nonce, master_key, derived_key);
+}
+
void ext4_free_crypt_info(struct ext4_crypt_info *ci)
{
if (!ci)
@@ -172,6 +257,9 @@ int ext4_get_encryption_info(struct inode *inode)
case EXT4_ENCRYPTION_MODE_AES_256_CTS:
cipher_str = "cts(cbc(aes))";
break;
+ case EXT4_ENCRYPTION_MODE_AES_256_HEH:
+ cipher_str = "heh(aes)";
+ break;
default:
printk_once(KERN_WARNING
"ext4: unsupported key mode %d (ino %u)\n",
@@ -220,8 +308,7 @@ int ext4_get_encryption_info(struct inode *inode)
up_read(&keyring_key->sem);
goto out;
}
- res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
- raw_key);
+ res = ext4_derive_key(&ctx, master_key->raw, raw_key);
up_read(&keyring_key->sem);
if (res)
goto out;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 362d59b24f1d..2b3ac9fa9460 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -589,6 +589,7 @@ enum {
#define EXT4_ENCRYPTION_MODE_AES_256_GCM 2
#define EXT4_ENCRYPTION_MODE_AES_256_CBC 3
#define EXT4_ENCRYPTION_MODE_AES_256_CTS 4
+#define EXT4_ENCRYPTION_MODE_AES_256_HEH 126
#include "ext4_crypto.h"
@@ -1441,7 +1442,7 @@ struct ext4_sb_info {
struct list_head s_es_list; /* List of inodes with reclaimable extents */
long s_es_nr_inode;
struct ext4_es_stats s_es_stats;
- struct mb_cache *s_mb_cache;
+ struct mb2_cache *s_mb_cache;
spinlock_t s_es_lock ____cacheline_aligned_in_smp;
/* Ratelimit ext4 messages. */
@@ -2453,7 +2454,8 @@ extern int ext4_mb_add_groupinfo(struct super_block *sb,
ext4_group_t i, struct ext4_group_desc *desc);
extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
ext4_fsblk_t block, unsigned long count);
-extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
+extern int ext4_trim_fs(struct super_block *, struct fstrim_range *,
+ unsigned long blkdev_flags);
/* inode.c */
int ext4_inode_is_fast_symlink(struct inode *inode);
diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h
index 1b17b05b9f4d..e52637d969db 100644
--- a/fs/ext4/ext4_crypto.h
+++ b/fs/ext4/ext4_crypto.h
@@ -58,8 +58,10 @@ struct ext4_encryption_context {
#define EXT4_XTS_TWEAK_SIZE 16
#define EXT4_AES_128_ECB_KEY_SIZE 16
#define EXT4_AES_256_GCM_KEY_SIZE 32
+#define EXT4_AES_256_ECB_KEY_SIZE 32
#define EXT4_AES_256_CBC_KEY_SIZE 32
#define EXT4_AES_256_CTS_KEY_SIZE 32
+#define EXT4_AES_256_HEH_KEY_SIZE 32
#define EXT4_AES_256_XTS_KEY_SIZE 64
#define EXT4_MAX_KEY_SIZE 64
@@ -120,6 +122,8 @@ static inline int ext4_encryption_key_size(int mode)
return EXT4_AES_256_CBC_KEY_SIZE;
case EXT4_ENCRYPTION_MODE_AES_256_CTS:
return EXT4_AES_256_CTS_KEY_SIZE;
+ case EXT4_ENCRYPTION_MODE_AES_256_HEH:
+ return EXT4_AES_256_HEH_KEY_SIZE;
default:
BUG();
}
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index dad8e7bdf0a6..bc7c082b7913 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -18,6 +18,7 @@
#include "ext4.h"
#include "xattr.h"
#include "truncate.h"
+#include <trace/events/android_fs.h>
#define EXT4_XATTR_SYSTEM_DATA "data"
#define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS))
@@ -502,6 +503,17 @@ int ext4_readpage_inline(struct inode *inode, struct page *page)
return -EAGAIN;
}
+ if (trace_android_fs_dataread_start_enabled()) {
+ char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+ path = android_fstrace_get_pathname(pathbuf,
+ MAX_TRACE_PATHBUF_LEN,
+ inode);
+ trace_android_fs_dataread_start(inode, page_offset(page),
+ PAGE_SIZE, current->pid,
+ path, current->comm);
+ }
+
/*
* Current inline data can only exist in the 1st page,
* So for all the other pages, just set them uptodate.
@@ -513,6 +525,8 @@ int ext4_readpage_inline(struct inode *inode, struct page *page)
SetPageUptodate(page);
}
+ trace_android_fs_dataread_end(inode, page_offset(page), PAGE_SIZE);
+
up_read(&EXT4_I(inode)->xattr_sem);
unlock_page(page);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 817a937de733..8f917150ca69 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -44,6 +44,7 @@
#include "truncate.h"
#include <trace/events/ext4.h>
+#include <trace/events/android_fs.h>
#define MPAGE_DA_EXTENT_TAIL 0x01
@@ -1015,6 +1016,16 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index;
unsigned from, to;
+ if (trace_android_fs_datawrite_start_enabled()) {
+ char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+ path = android_fstrace_get_pathname(pathbuf,
+ MAX_TRACE_PATHBUF_LEN,
+ inode);
+ trace_android_fs_datawrite_start(inode, pos, len,
+ current->pid, path,
+ current->comm);
+ }
trace_ext4_write_begin(inode, pos, len, flags);
/*
* Reserve one block more for addition to orphan list in case
@@ -1151,6 +1162,7 @@ static int ext4_write_end(struct file *file,
int ret = 0, ret2;
int i_size_changed = 0;
+ trace_android_fs_datawrite_end(inode, pos, len);
trace_ext4_write_end(inode, pos, len, copied);
if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) {
ret = ext4_jbd2_file_inode(handle, inode);
@@ -1264,6 +1276,7 @@ static int ext4_journalled_write_end(struct file *file,
unsigned from, to;
int size_changed = 0;
+ trace_android_fs_datawrite_end(inode, pos, len);
trace_ext4_journalled_write_end(inode, pos, len, copied);
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
@@ -2739,6 +2752,16 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
len, flags, pagep, fsdata);
}
*fsdata = (void *)0;
+ if (trace_android_fs_datawrite_start_enabled()) {
+ char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+ path = android_fstrace_get_pathname(pathbuf,
+ MAX_TRACE_PATHBUF_LEN,
+ inode);
+ trace_android_fs_datawrite_start(inode, pos, len,
+ current->pid,
+ path, current->comm);
+ }
trace_ext4_da_write_begin(inode, pos, len, flags);
if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
@@ -2857,6 +2880,7 @@ static int ext4_da_write_end(struct file *file,
return ext4_write_end(file, mapping, pos,
len, copied, page, fsdata);
+ trace_android_fs_datawrite_end(inode, pos, len);
trace_ext4_da_write_end(inode, pos, len, copied);
start = pos & (PAGE_CACHE_SIZE - 1);
end = start + copied - 1;
@@ -3345,12 +3369,42 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
if (ext4_has_inline_data(inode))
return 0;
+ if (trace_android_fs_dataread_start_enabled() &&
+ (iov_iter_rw(iter) == READ)) {
+ char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+ path = android_fstrace_get_pathname(pathbuf,
+ MAX_TRACE_PATHBUF_LEN,
+ inode);
+ trace_android_fs_dataread_start(inode, offset, count,
+ current->pid, path,
+ current->comm);
+ }
+ if (trace_android_fs_datawrite_start_enabled() &&
+ (iov_iter_rw(iter) == WRITE)) {
+ char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+ path = android_fstrace_get_pathname(pathbuf,
+ MAX_TRACE_PATHBUF_LEN,
+ inode);
+ trace_android_fs_datawrite_start(inode, offset, count,
+ current->pid, path,
+ current->comm);
+ }
trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
ret = ext4_ext_direct_IO(iocb, iter, offset);
else
ret = ext4_ind_direct_IO(iocb, iter, offset);
trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret);
+
+ if (trace_android_fs_dataread_start_enabled() &&
+ (iov_iter_rw(iter) == READ))
+ trace_android_fs_dataread_end(inode, offset, count);
+ if (trace_android_fs_datawrite_start_enabled() &&
+ (iov_iter_rw(iter) == WRITE))
+ trace_android_fs_datawrite_end(inode, offset, count);
+
return ret;
}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 789e2d6724a9..3a2594665b44 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -587,11 +587,13 @@ resizefs_out:
return err;
}
+ case FIDTRIM:
case FITRIM:
{
struct request_queue *q = bdev_get_queue(sb->s_bdev);
struct fstrim_range range;
int ret = 0;
+ int flags = cmd == FIDTRIM ? BLKDEV_DISCARD_SECURE : 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -599,13 +601,15 @@ resizefs_out:
if (!blk_queue_discard(q))
return -EOPNOTSUPP;
+ if ((flags & BLKDEV_DISCARD_SECURE) && !blk_queue_secdiscard(q))
+ return -EOPNOTSUPP;
if (copy_from_user(&range, (struct fstrim_range __user *)arg,
sizeof(range)))
return -EFAULT;
range.minlen = max((unsigned int)range.minlen,
q->limits.discard_granularity);
- ret = ext4_trim_fs(sb, &range);
+ ret = ext4_trim_fs(sb, &range, flags);
if (ret < 0)
return ret;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 84cd77663e1f..c2810503eb50 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2770,7 +2770,8 @@ int ext4_mb_release(struct super_block *sb)
}
static inline int ext4_issue_discard(struct super_block *sb,
- ext4_group_t block_group, ext4_grpblk_t cluster, int count)
+ ext4_group_t block_group, ext4_grpblk_t cluster, int count,
+ unsigned long flags)
{
ext4_fsblk_t discard_block;
@@ -2779,7 +2780,7 @@ static inline int ext4_issue_discard(struct super_block *sb,
count = EXT4_C2B(EXT4_SB(sb), count);
trace_ext4_discard_blocks(sb,
(unsigned long long) discard_block, count);
- return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
+ return sb_issue_discard(sb, discard_block, count, GFP_NOFS, flags);
}
/*
@@ -2801,7 +2802,7 @@ static void ext4_free_data_callback(struct super_block *sb,
if (test_opt(sb, DISCARD)) {
err = ext4_issue_discard(sb, entry->efd_group,
entry->efd_start_cluster,
- entry->efd_count);
+ entry->efd_count, 0);
if (err && err != -EOPNOTSUPP)
ext4_msg(sb, KERN_WARNING, "discard request in"
" group:%d block:%d count:%d failed"
@@ -4853,7 +4854,8 @@ do_more:
* them with group lock_held
*/
if (test_opt(sb, DISCARD)) {
- err = ext4_issue_discard(sb, block_group, bit, count);
+ err = ext4_issue_discard(sb, block_group, bit, count,
+ 0);
if (err && err != -EOPNOTSUPP)
ext4_msg(sb, KERN_WARNING, "discard request in"
" group:%d block:%d count:%lu failed"
@@ -5049,13 +5051,15 @@ error_return:
* @count: number of blocks to TRIM
* @group: alloc. group we are working with
* @e4b: ext4 buddy for the group
+ * @blkdev_flags: flags for the block device
*
* Trim "count" blocks starting at "start" in the "group". To assure that no
* one will allocate those blocks, mark it as used in buddy bitmap. This must
* be called with under the group lock.
*/
static int ext4_trim_extent(struct super_block *sb, int start, int count,
- ext4_group_t group, struct ext4_buddy *e4b)
+ ext4_group_t group, struct ext4_buddy *e4b,
+ unsigned long blkdev_flags)
__releases(bitlock)
__acquires(bitlock)
{
@@ -5076,7 +5080,7 @@ __acquires(bitlock)
*/
mb_mark_used(e4b, &ex);
ext4_unlock_group(sb, group);
- ret = ext4_issue_discard(sb, group, start, count);
+ ret = ext4_issue_discard(sb, group, start, count, blkdev_flags);
ext4_lock_group(sb, group);
mb_free_blocks(NULL, e4b, start, ex.fe_len);
return ret;
@@ -5089,6 +5093,7 @@ __acquires(bitlock)
* @start: first group block to examine
* @max: last group block to examine
* @minblocks: minimum extent block count
+ * @blkdev_flags: flags for the block device
*
* ext4_trim_all_free walks through group's buddy bitmap searching for free
* extents. When the free block is found, ext4_trim_extent is called to TRIM
@@ -5103,7 +5108,7 @@ __acquires(bitlock)
static ext4_grpblk_t
ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
ext4_grpblk_t start, ext4_grpblk_t max,
- ext4_grpblk_t minblocks)
+ ext4_grpblk_t minblocks, unsigned long blkdev_flags)
{
void *bitmap;
ext4_grpblk_t next, count = 0, free_count = 0;
@@ -5136,7 +5141,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
if ((next - start) >= minblocks) {
ret = ext4_trim_extent(sb, start,
- next - start, group, &e4b);
+ next - start, group, &e4b,
+ blkdev_flags);
if (ret && ret != -EOPNOTSUPP)
break;
ret = 0;
@@ -5178,6 +5184,7 @@ out:
* ext4_trim_fs() -- trim ioctl handle function
* @sb: superblock for filesystem
* @range: fstrim_range structure
+ * @blkdev_flags: flags for the block device
*
* start: First Byte to trim
* len: number of Bytes to trim from start
@@ -5186,7 +5193,8 @@ out:
* start to start+len. For each such a group ext4_trim_all_free function
* is invoked to trim all free space.
*/
-int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range,
+ unsigned long blkdev_flags)
{
struct ext4_group_info *grp;
ext4_group_t group, first_group, last_group;
@@ -5242,7 +5250,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
if (grp->bb_free >= minlen) {
cnt = ext4_trim_all_free(sb, group, first_cluster,
- end, minlen);
+ end, minlen, blkdev_flags);
if (cnt < 0) {
ret = cnt;
break;
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 5dc5e95063de..1c5db9fd9c8f 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -45,6 +45,7 @@
#include <linux/cleancache.h>
#include "ext4.h"
+#include <trace/events/android_fs.h>
/*
* Call ext4_decrypt on every single page, reusing the encryption
@@ -86,6 +87,17 @@ static inline bool ext4_bio_encrypted(struct bio *bio)
#endif
}
+static void
+ext4_trace_read_completion(struct bio *bio)
+{
+ struct page *first_page = bio->bi_io_vec[0].bv_page;
+
+ if (first_page != NULL)
+ trace_android_fs_dataread_end(first_page->mapping->host,
+ page_offset(first_page),
+ bio->bi_iter.bi_size);
+}
+
/*
* I/O completion handler for multipage BIOs.
*
@@ -103,6 +115,9 @@ static void mpage_end_io(struct bio *bio)
struct bio_vec *bv;
int i;
+ if (trace_android_fs_dataread_start_enabled())
+ ext4_trace_read_completion(bio);
+
if (ext4_bio_encrypted(bio)) {
struct ext4_crypto_ctx *ctx = bio->bi_private;
@@ -130,6 +145,30 @@ static void mpage_end_io(struct bio *bio)
bio_put(bio);
}
+static void
+ext4_submit_bio_read(struct bio *bio)
+{
+ if (trace_android_fs_dataread_start_enabled()) {
+ struct page *first_page = bio->bi_io_vec[0].bv_page;
+
+ if (first_page != NULL) {
+ char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+ path = android_fstrace_get_pathname(pathbuf,
+ MAX_TRACE_PATHBUF_LEN,
+ first_page->mapping->host);
+ trace_android_fs_dataread_start(
+ first_page->mapping->host,
+ page_offset(first_page),
+ bio->bi_iter.bi_size,
+ current->pid,
+ path,
+ current->comm);
+ }
+ }
+ submit_bio(READ, bio);
+}
+
int ext4_mpage_readpages(struct address_space *mapping,
struct list_head *pages, struct page *page,
unsigned nr_pages)
@@ -271,7 +310,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
*/
if (bio && (last_block_in_bio != blocks[0] - 1)) {
submit_and_realloc:
- submit_bio(READ, bio);
+ ext4_submit_bio_read(bio);
bio = NULL;
}
if (bio == NULL) {
@@ -303,14 +342,14 @@ int ext4_mpage_readpages(struct address_space *mapping,
if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
(relative_block == map.m_len)) ||
(first_hole != blocks_per_page)) {
- submit_bio(READ, bio);
+ ext4_submit_bio_read(bio);
bio = NULL;
} else
last_block_in_bio = blocks[blocks_per_page - 1];
goto next_page;
confused:
if (bio) {
- submit_bio(READ, bio);
+ ext4_submit_bio_read(bio);
bio = NULL;
}
if (!PageUptodate(page))
@@ -323,6 +362,6 @@ int ext4_mpage_readpages(struct address_space *mapping,
}
BUG_ON(pages && !list_empty(pages));
if (bio)
- submit_bio(READ, bio);
+ ext4_submit_bio_read(bio);
return 0;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 68345a9e59b8..bd8831bfbafe 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -816,7 +816,6 @@ static void ext4_put_super(struct super_block *sb)
ext4_release_system_zone(sb);
ext4_mb_release(sb);
ext4_ext_release(sb);
- ext4_xattr_put_super(sb);
if (!(sb->s_flags & MS_RDONLY) && !aborted) {
ext4_clear_feature_journal_needs_recovery(sb);
@@ -3833,7 +3832,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
no_journal:
if (ext4_mballoc_ready) {
- sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id);
+ sbi->s_mb_cache = ext4_xattr_create_cache();
if (!sbi->s_mb_cache) {
ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
goto failed_mount_wq;
@@ -4065,6 +4064,10 @@ failed_mount4:
if (EXT4_SB(sb)->rsv_conversion_wq)
destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
failed_mount_wq:
+ if (sbi->s_mb_cache) {
+ ext4_xattr_destroy_cache(sbi->s_mb_cache);
+ sbi->s_mb_cache = NULL;
+ }
if (sbi->s_journal) {
jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 7c23363ecf19..b310ed81c10e 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -53,7 +53,7 @@
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/slab.h>
-#include <linux/mbcache.h>
+#include <linux/mbcache2.h>
#include <linux/quotaops.h>
#include "ext4_jbd2.h"
#include "ext4.h"
@@ -80,10 +80,10 @@
# define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
-static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
+static void ext4_xattr_cache_insert(struct mb2_cache *, struct buffer_head *);
static struct buffer_head *ext4_xattr_cache_find(struct inode *,
struct ext4_xattr_header *,
- struct mb_cache_entry **);
+ struct mb2_cache_entry **);
static void ext4_xattr_rehash(struct ext4_xattr_header *,
struct ext4_xattr_entry *);
static int ext4_xattr_list(struct dentry *dentry, char *buffer,
@@ -300,7 +300,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
struct ext4_xattr_entry *entry;
size_t size;
int error;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
name_index, name, buffer, (long)buffer_size);
@@ -447,7 +447,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
struct inode *inode = d_inode(dentry);
struct buffer_head *bh = NULL;
int error;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size);
@@ -564,11 +564,8 @@ static void
ext4_xattr_release_block(handle_t *handle, struct inode *inode,
struct buffer_head *bh)
{
- struct mb_cache_entry *ce = NULL;
int error = 0;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
- ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr);
BUFFER_TRACE(bh, "get_write_access");
error = ext4_journal_get_write_access(handle, bh);
if (error)
@@ -576,9 +573,15 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
lock_buffer(bh);
if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
+ __u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
+
ea_bdebug(bh, "refcount now=0; freeing");
- if (ce)
- mb_cache_entry_free(ce);
+ /*
+ * This must happen under buffer lock for
+ * ext4_xattr_block_set() to reliably detect freed block
+ */
+ mb2_cache_entry_delete_block(EXT4_GET_MB_CACHE(inode), hash,
+ bh->b_blocknr);
get_bh(bh);
unlock_buffer(bh);
ext4_free_blocks(handle, inode, bh, 0, 1,
@@ -586,8 +589,6 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
EXT4_FREE_BLOCKS_FORGET);
} else {
le32_add_cpu(&BHDR(bh)->h_refcount, -1);
- if (ce)
- mb_cache_entry_release(ce);
/*
* Beware of this ugliness: Releasing of xattr block references
* from different inodes can race and so we have to protect
@@ -800,17 +801,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
struct super_block *sb = inode->i_sb;
struct buffer_head *new_bh = NULL;
struct ext4_xattr_search *s = &bs->s;
- struct mb_cache_entry *ce = NULL;
+ struct mb2_cache_entry *ce = NULL;
int error = 0;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
#define header(x) ((struct ext4_xattr_header *)(x))
if (i->value && i->value_len > sb->s_blocksize)
return -ENOSPC;
if (s->base) {
- ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev,
- bs->bh->b_blocknr);
BUFFER_TRACE(bs->bh, "get_write_access");
error = ext4_journal_get_write_access(handle, bs->bh);
if (error)
@@ -818,10 +817,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
lock_buffer(bs->bh);
if (header(s->base)->h_refcount == cpu_to_le32(1)) {
- if (ce) {
- mb_cache_entry_free(ce);
- ce = NULL;
- }
+ __u32 hash = le32_to_cpu(BHDR(bs->bh)->h_hash);
+
+ /*
+ * This must happen under buffer lock for
+ * ext4_xattr_block_set() to reliably detect modified
+ * block
+ */
+ mb2_cache_entry_delete_block(ext4_mb_cache, hash,
+ bs->bh->b_blocknr);
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s);
if (!error) {
@@ -845,10 +849,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
int offset = (char *)s->here - bs->bh->b_data;
unlock_buffer(bs->bh);
- if (ce) {
- mb_cache_entry_release(ce);
- ce = NULL;
- }
ea_bdebug(bs->bh, "cloning");
s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
error = -ENOMEM;
@@ -903,6 +903,31 @@ inserted:
if (error)
goto cleanup_dquot;
lock_buffer(new_bh);
+ /*
+ * We have to be careful about races with
+ * freeing or rehashing of xattr block. Once we
+ * hold buffer lock xattr block's state is
+ * stable so we can check whether the block got
+ * freed / rehashed or not. Since we unhash
+ * mbcache entry under buffer lock when freeing
+ * / rehashing xattr block, checking whether
+ * entry is still hashed is reliable.
+ */
+ if (hlist_bl_unhashed(&ce->e_hash_list)) {
+ /*
+ * Undo everything and check mbcache
+ * again.
+ */
+ unlock_buffer(new_bh);
+ dquot_free_block(inode,
+ EXT4_C2B(EXT4_SB(sb),
+ 1));
+ brelse(new_bh);
+ mb2_cache_entry_put(ext4_mb_cache, ce);
+ ce = NULL;
+ new_bh = NULL;
+ goto inserted;
+ }
le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
ea_bdebug(new_bh, "reusing; refcount now=%d",
le32_to_cpu(BHDR(new_bh)->h_refcount));
@@ -913,7 +938,8 @@ inserted:
if (error)
goto cleanup_dquot;
}
- mb_cache_entry_release(ce);
+ mb2_cache_entry_touch(ext4_mb_cache, ce);
+ mb2_cache_entry_put(ext4_mb_cache, ce);
ce = NULL;
} else if (bs->bh && s->base == bs->bh->b_data) {
/* We were modifying this block in-place. */
@@ -978,7 +1004,7 @@ getblk_failed:
cleanup:
if (ce)
- mb_cache_entry_release(ce);
+ mb2_cache_entry_put(ext4_mb_cache, ce);
brelse(new_bh);
if (!(bs->bh && s->base == bs->bh->b_data))
kfree(s->base);
@@ -1543,17 +1569,6 @@ cleanup:
}
/*
- * ext4_xattr_put_super()
- *
- * This is called when a file system is unmounted.
- */
-void
-ext4_xattr_put_super(struct super_block *sb)
-{
- mb_cache_shrink(sb->s_bdev);
-}
-
-/*
* ext4_xattr_cache_insert()
*
* Create a new entry in the extended attribute cache, and insert
@@ -1562,28 +1577,18 @@ ext4_xattr_put_super(struct super_block *sb)
* Returns 0, or a negative error number on failure.
*/
static void
-ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
+ext4_xattr_cache_insert(struct mb2_cache *ext4_mb_cache, struct buffer_head *bh)
{
__u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
- struct mb_cache_entry *ce;
int error;
- ce = mb_cache_entry_alloc(ext4_mb_cache, GFP_NOFS);
- if (!ce) {
- ea_bdebug(bh, "out of memory");
- return;
- }
- error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
+ error = mb2_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash,
+ bh->b_blocknr);
if (error) {
- mb_cache_entry_free(ce);
- if (error == -EBUSY) {
+ if (error == -EBUSY)
ea_bdebug(bh, "already in cache");
- error = 0;
- }
- } else {
+ } else
ea_bdebug(bh, "inserting [%x]", (int)hash);
- mb_cache_entry_release(ce);
- }
}
/*
@@ -1636,26 +1641,19 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
*/
static struct buffer_head *
ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
- struct mb_cache_entry **pce)
+ struct mb2_cache_entry **pce)
{
__u32 hash = le32_to_cpu(header->h_hash);
- struct mb_cache_entry *ce;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb2_cache_entry *ce;
+ struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
-again:
- ce = mb_cache_entry_find_first(ext4_mb_cache, inode->i_sb->s_bdev,
- hash);
+ ce = mb2_cache_entry_find_first(ext4_mb_cache, hash);
while (ce) {
struct buffer_head *bh;
- if (IS_ERR(ce)) {
- if (PTR_ERR(ce) == -EAGAIN)
- goto again;
- break;
- }
bh = sb_bread(inode->i_sb, ce->e_block);
if (!bh) {
EXT4_ERROR_INODE(inode, "block %lu read error",
@@ -1671,7 +1669,7 @@ again:
return bh;
}
brelse(bh);
- ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
+ ce = mb2_cache_entry_find_next(ext4_mb_cache, ce);
}
return NULL;
}
@@ -1746,15 +1744,15 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
#define HASH_BUCKET_BITS 10
-struct mb_cache *
-ext4_xattr_create_cache(char *name)
+struct mb2_cache *
+ext4_xattr_create_cache(void)
{
- return mb_cache_create(name, HASH_BUCKET_BITS);
+ return mb2_cache_create(HASH_BUCKET_BITS);
}
-void ext4_xattr_destroy_cache(struct mb_cache *cache)
+void ext4_xattr_destroy_cache(struct mb2_cache *cache)
{
if (cache)
- mb_cache_destroy(cache);
+ mb2_cache_destroy(cache);
}
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index ddc0957760ba..10b0f7323ed6 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -108,7 +108,6 @@ extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_
extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
-extern void ext4_xattr_put_super(struct super_block *);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
@@ -124,8 +123,8 @@ extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
struct ext4_xattr_info *i,
struct ext4_xattr_ibody_find *is);
-extern struct mb_cache *ext4_xattr_create_cache(char *name);
-extern void ext4_xattr_destroy_cache(struct mb_cache *);
+extern struct mb2_cache *ext4_xattr_create_cache(void);
+extern void ext4_xattr_destroy_cache(struct mb2_cache *);
#ifdef CONFIG_EXT4_FS_SECURITY
extern int ext4_init_security(handle_t *handle, struct inode *inode,
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 972eab7ac071..8936044dee4c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -26,6 +26,7 @@
#include "segment.h"
#include "trace.h"
#include <trace/events/f2fs.h>
+#include <trace/events/android_fs.h>
static void f2fs_read_end_io(struct bio *bio)
{
@@ -1401,6 +1402,16 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
struct dnode_of_data dn;
int err = 0;
+ if (trace_android_fs_datawrite_start_enabled()) {
+ char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+ path = android_fstrace_get_pathname(pathbuf,
+ MAX_TRACE_PATHBUF_LEN,
+ inode);
+ trace_android_fs_datawrite_start(inode, pos, len,
+ current->pid, path,
+ current->comm);
+ }
trace_f2fs_write_begin(inode, pos, len, flags);
f2fs_balance_fs(sbi);
@@ -1529,6 +1540,7 @@ static int f2fs_write_end(struct file *file,
{
struct inode *inode = page->mapping->host;
+ trace_android_fs_datawrite_end(inode, pos, len);
trace_f2fs_write_end(inode, pos, len, copied);
set_page_dirty(page);
@@ -1582,6 +1594,28 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
+ if (trace_android_fs_dataread_start_enabled() &&
+ (iov_iter_rw(iter) == READ)) {
+ char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+ path = android_fstrace_get_pathname(pathbuf,
+ MAX_TRACE_PATHBUF_LEN,
+ inode);
+ trace_android_fs_dataread_start(inode, offset,
+ count, current->pid, path,
+ current->comm);
+ }
+ if (trace_android_fs_datawrite_start_enabled() &&
+ (iov_iter_rw(iter) == WRITE)) {
+ char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+ path = android_fstrace_get_pathname(pathbuf,
+ MAX_TRACE_PATHBUF_LEN,
+ inode);
+ trace_android_fs_datawrite_start(inode, offset, count,
+ current->pid, path,
+ current->comm);
+ }
if (iov_iter_rw(iter) == WRITE) {
__allocate_data_blocks(inode, offset, count);
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
@@ -1595,6 +1629,13 @@ out:
if (err < 0 && iov_iter_rw(iter) == WRITE)
f2fs_write_failed(mapping, offset + count);
+ if (trace_android_fs_dataread_start_enabled() &&
+ (iov_iter_rw(iter) == READ))
+ trace_android_fs_dataread_end(inode, offset, count);
+ if (trace_android_fs_datawrite_start_enabled() &&
+ (iov_iter_rw(iter) == WRITE))
+ trace_android_fs_datawrite_end(inode, offset, count);
+
trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err);
return err;
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index bda7126466c0..dbb2cc4df603 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -13,6 +13,7 @@
#include "f2fs.h"
#include "node.h"
+#include <trace/events/android_fs.h>
bool f2fs_may_inline_data(struct inode *inode)
{
@@ -84,14 +85,29 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
{
struct page *ipage;
+ if (trace_android_fs_dataread_start_enabled()) {
+ char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+ path = android_fstrace_get_pathname(pathbuf,
+ MAX_TRACE_PATHBUF_LEN,
+ inode);
+ trace_android_fs_dataread_start(inode, page_offset(page),
+ PAGE_SIZE, current->pid,
+ path, current->comm);
+ }
+
ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(ipage)) {
+ trace_android_fs_dataread_end(inode, page_offset(page),
+ PAGE_SIZE);
unlock_page(page);
return PTR_ERR(ipage);
}
if (!f2fs_has_inline_data(inode)) {
f2fs_put_page(ipage, 1);
+ trace_android_fs_dataread_end(inode, page_offset(page),
+ PAGE_SIZE);
return -EAGAIN;
}
@@ -102,6 +118,8 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
SetPageUptodate(page);
f2fs_put_page(ipage, 1);
+ trace_android_fs_dataread_end(inode, page_offset(page),
+ PAGE_SIZE);
unlock_page(page);
return 0;
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 60d6fc2e0e4b..de11206dda63 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2062,7 +2062,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
(dirtytime && (inode->i_state & I_DIRTY_INODE)))
return;
- if (unlikely(block_dump))
+ if (unlikely(block_dump > 1))
block_dump___mark_inode_dirty(inode);
spin_lock(&inode->i_lock);
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 7dca743b2ce1..940c683561dd 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -44,6 +44,7 @@ void set_fs_pwd(struct fs_struct *fs, const struct path *path)
if (old_pwd.dentry)
path_put(&old_pwd);
}
+EXPORT_SYMBOL(set_fs_pwd);
static inline int replace_path(struct path *p, const struct path *old, const struct path *new)
{
@@ -89,6 +90,7 @@ void free_fs_struct(struct fs_struct *fs)
path_put(&fs->pwd);
kmem_cache_free(fs_cachep, fs);
}
+EXPORT_SYMBOL(free_fs_struct);
void exit_fs(struct task_struct *tsk)
{
@@ -127,6 +129,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
}
return fs;
}
+EXPORT_SYMBOL_GPL(copy_fs_struct);
int unshare_fs_struct(void)
{
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index d0cf1f010fbe..fbfec06b054d 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -13,12 +13,14 @@
#include <linux/poll.h>
#include <linux/uio.h>
#include <linux/miscdevice.h>
+#include <linux/namei.h>
#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/pipe_fs_i.h>
#include <linux/swap.h>
#include <linux/splice.h>
+#include <linux/freezer.h>
MODULE_ALIAS_MISCDEV(FUSE_MINOR);
MODULE_ALIAS("devname:fuse");
@@ -477,7 +479,9 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
* Either request is already in userspace, or it was forced.
* Wait it out.
*/
- wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags));
+ while (!test_bit(FR_FINISHED, &req->flags))
+ wait_event_freezable(req->waitq,
+ test_bit(FR_FINISHED, &req->flags));
}
static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
@@ -1936,6 +1940,10 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
cs->move_pages = 0;
err = copy_out_args(cs, &req->out, nbytes);
+ if (req->in.h.opcode == FUSE_CANONICAL_PATH) {
+ req->out.h.error = kern_path((char *)req->out.args[0].value, 0,
+ req->canonical_path);
+ }
fuse_copy_finish(cs);
spin_lock(&fpq->lock);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 4b5f2c4e69c8..278caed7c367 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -267,6 +267,50 @@ invalid:
goto out;
}
+/*
+ * Get the canonical path. Since we must translate to a path, this must be done
+ * in the context of the userspace daemon, however, the userspace daemon cannot
+ * look up paths on its own. Instead, we handle the lookup as a special case
+ * inside of the write request.
+ */
+static void fuse_dentry_canonical_path(const struct path *path, struct path *canonical_path) {
+ struct inode *inode = path->dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ int err;
+ char *path_name;
+
+ req = fuse_get_req(fc, 1);
+ err = PTR_ERR(req);
+ if (IS_ERR(req))
+ goto default_path;
+
+ path_name = (char*)__get_free_page(GFP_KERNEL);
+ if (!path_name) {
+ fuse_put_request(fc, req);
+ goto default_path;
+ }
+
+ req->in.h.opcode = FUSE_CANONICAL_PATH;
+ req->in.h.nodeid = get_node_id(inode);
+ req->in.numargs = 0;
+ req->out.numargs = 1;
+ req->out.args[0].size = PATH_MAX;
+ req->out.args[0].value = path_name;
+ req->canonical_path = canonical_path;
+ req->out.argvar = 1;
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ free_page((unsigned long)path_name);
+ if (!err)
+ return;
+default_path:
+ canonical_path->dentry = path->dentry;
+ canonical_path->mnt = path->mnt;
+ path_get(canonical_path);
+}
+
static int invalid_nodeid(u64 nodeid)
{
return !nodeid || nodeid == FUSE_ROOT_ID;
@@ -274,6 +318,7 @@ static int invalid_nodeid(u64 nodeid)
const struct dentry_operations fuse_dentry_operations = {
.d_revalidate = fuse_dentry_revalidate,
+ .d_canonical_path = fuse_dentry_canonical_path,
};
int fuse_valid_type(int m)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 604cd42dafef..644687ae04bd 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -372,6 +372,9 @@ struct fuse_req {
/** Inode used in the request or NULL */
struct inode *inode;
+ /** Path used for completing d_canonical_path */
+ struct path *canonical_path;
+
/** AIO control block */
struct fuse_io_priv *io;
diff --git a/fs/inode.c b/fs/inode.c
index b0edef500590..e89f1271c545 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1715,7 +1715,7 @@ int dentry_needs_remove_privs(struct dentry *dentry)
}
EXPORT_SYMBOL(dentry_needs_remove_privs);
-static int __remove_privs(struct dentry *dentry, int kill)
+static int __remove_privs(struct vfsmount *mnt, struct dentry *dentry, int kill)
{
struct iattr newattrs;
@@ -1724,7 +1724,7 @@ static int __remove_privs(struct dentry *dentry, int kill)
* Note we call this on write, so notify_change will not
* encounter any conflicting delegations:
*/
- return notify_change(dentry, &newattrs, NULL);
+ return notify_change2(mnt, dentry, &newattrs, NULL);
}
/*
@@ -1746,7 +1746,7 @@ int file_remove_privs(struct file *file)
if (kill < 0)
return kill;
if (kill)
- error = __remove_privs(dentry, kill);
+ error = __remove_privs(file->f_path.mnt, dentry, kill);
if (!error)
inode_has_no_xattr(inode);
diff --git a/fs/internal.h b/fs/internal.h
index 71859c4d0b41..6387b35a1c0d 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -84,9 +84,11 @@ extern struct file *get_empty_filp(void);
* super.c
*/
extern int do_remount_sb(struct super_block *, int, void *, int);
+extern int do_remount_sb2(struct vfsmount *, struct super_block *, int,
+ void *, int);
extern bool trylock_super(struct super_block *sb);
extern struct dentry *mount_fs(struct file_system_type *,
- int, const char *, void *);
+ int, const char *, struct vfsmount *, void *);
extern struct super_block *user_get_super(dev_t);
/*
diff --git a/fs/mbcache2.c b/fs/mbcache2.c
new file mode 100644
index 000000000000..5c3e1a8c38f6
--- /dev/null
+++ b/fs/mbcache2.c
@@ -0,0 +1,359 @@
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/list_bl.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/mbcache2.h>
+
+/*
+ * Mbcache is a simple key-value store. Keys need not be unique, however
+ * key-value pairs are expected to be unique (we use this fact in
+ * mb2_cache_entry_delete_block()).
+ *
+ * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
+ * They use hash of a block contents as a key and block number as a value.
+ * That's why keys need not be unique (different xattr blocks may end up having
+ * the same hash). However block number always uniquely identifies a cache
+ * entry.
+ *
+ * We provide functions for creation and removal of entries, search by key,
+ * and a special "delete entry with given key-value pair" operation. Fixed
+ * size hash table is used for fast key lookups.
+ */
+
+struct mb2_cache {
+ /* Hash table of entries */
+ struct hlist_bl_head *c_hash;
+ /* log2 of hash table size */
+ int c_bucket_bits;
+ /* Protects c_lru_list, c_entry_count */
+ spinlock_t c_lru_list_lock;
+ struct list_head c_lru_list;
+ /* Number of entries in cache */
+ unsigned long c_entry_count;
+ struct shrinker c_shrink;
+};
+
+static struct kmem_cache *mb2_entry_cache;
+
+/*
+ * mb2_cache_entry_create - create entry in cache
+ * @cache - cache where the entry should be created
+ * @mask - gfp mask with which the entry should be allocated
+ * @key - key of the entry
+ * @block - block that contains data
+ *
+ * Creates entry in @cache with key @key and records that data is stored in
+ * block @block. The function returns -EBUSY if entry with the same key
+ * and for the same block already exists in cache. Otherwise 0 is returned.
+ */
+int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key,
+ sector_t block)
+{
+ struct mb2_cache_entry *entry, *dup;
+ struct hlist_bl_node *dup_node;
+ struct hlist_bl_head *head;
+
+ entry = kmem_cache_alloc(mb2_entry_cache, mask);
+ if (!entry)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&entry->e_lru_list);
+ /* One ref for hash, one ref returned */
+ atomic_set(&entry->e_refcnt, 1);
+ entry->e_key = key;
+ entry->e_block = block;
+ head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
+ entry->e_hash_list_head = head;
+ hlist_bl_lock(head);
+ hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
+ if (dup->e_key == key && dup->e_block == block) {
+ hlist_bl_unlock(head);
+ kmem_cache_free(mb2_entry_cache, entry);
+ return -EBUSY;
+ }
+ }
+ hlist_bl_add_head(&entry->e_hash_list, head);
+ hlist_bl_unlock(head);
+
+ spin_lock(&cache->c_lru_list_lock);
+ list_add_tail(&entry->e_lru_list, &cache->c_lru_list);
+ /* Grab ref for LRU list */
+ atomic_inc(&entry->e_refcnt);
+ cache->c_entry_count++;
+ spin_unlock(&cache->c_lru_list_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(mb2_cache_entry_create);
+
+void __mb2_cache_entry_free(struct mb2_cache_entry *entry)
+{
+ kmem_cache_free(mb2_entry_cache, entry);
+}
+EXPORT_SYMBOL(__mb2_cache_entry_free);
+
+static struct mb2_cache_entry *__entry_find(struct mb2_cache *cache,
+ struct mb2_cache_entry *entry,
+ u32 key)
+{
+ struct mb2_cache_entry *old_entry = entry;
+ struct hlist_bl_node *node;
+ struct hlist_bl_head *head;
+
+ if (entry)
+ head = entry->e_hash_list_head;
+ else
+ head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
+ hlist_bl_lock(head);
+ if (entry && !hlist_bl_unhashed(&entry->e_hash_list))
+ node = entry->e_hash_list.next;
+ else
+ node = hlist_bl_first(head);
+ while (node) {
+ entry = hlist_bl_entry(node, struct mb2_cache_entry,
+ e_hash_list);
+ if (entry->e_key == key) {
+ atomic_inc(&entry->e_refcnt);
+ goto out;
+ }
+ node = node->next;
+ }
+ entry = NULL;
+out:
+ hlist_bl_unlock(head);
+ if (old_entry)
+ mb2_cache_entry_put(cache, old_entry);
+
+ return entry;
+}
+
+/*
+ * mb2_cache_entry_find_first - find the first entry in cache with given key
+ * @cache: cache where we should search
+ * @key: key to look for
+ *
+ * Search in @cache for entry with key @key. Grabs reference to the first
+ * entry found and returns the entry.
+ */
+struct mb2_cache_entry *mb2_cache_entry_find_first(struct mb2_cache *cache,
+ u32 key)
+{
+ return __entry_find(cache, NULL, key);
+}
+EXPORT_SYMBOL(mb2_cache_entry_find_first);
+
+/*
+ * mb2_cache_entry_find_next - find next entry in cache with the same
+ * @cache: cache where we should search
+ * @entry: entry to start search from
+ *
+ * Finds next entry in the hash chain which has the same key as @entry.
+ * If @entry is unhashed (which can happen when deletion of entry races
+ * with the search), finds the first entry in the hash chain. The function
+ * drops reference to @entry and returns with a reference to the found entry.
+ */
+struct mb2_cache_entry *mb2_cache_entry_find_next(struct mb2_cache *cache,
+ struct mb2_cache_entry *entry)
+{
+ return __entry_find(cache, entry, entry->e_key);
+}
+EXPORT_SYMBOL(mb2_cache_entry_find_next);
+
+/* mb2_cache_entry_delete_block - remove information about block from cache
+ * @cache - cache we work with
+ * @key - key of the entry to remove
+ * @block - block containing data for @key
+ *
+ * Remove entry from cache @cache with key @key with data stored in @block.
+ */
+void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key,
+ sector_t block)
+{
+ struct hlist_bl_node *node;
+ struct hlist_bl_head *head;
+ struct mb2_cache_entry *entry;
+
+ head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
+ hlist_bl_lock(head);
+ hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
+ if (entry->e_key == key && entry->e_block == block) {
+ /* We keep hash list reference to keep entry alive */
+ hlist_bl_del_init(&entry->e_hash_list);
+ hlist_bl_unlock(head);
+ spin_lock(&cache->c_lru_list_lock);
+ if (!list_empty(&entry->e_lru_list)) {
+ list_del_init(&entry->e_lru_list);
+ cache->c_entry_count--;
+ atomic_dec(&entry->e_refcnt);
+ }
+ spin_unlock(&cache->c_lru_list_lock);
+ mb2_cache_entry_put(cache, entry);
+ return;
+ }
+ }
+ hlist_bl_unlock(head);
+}
+EXPORT_SYMBOL(mb2_cache_entry_delete_block);
+
+/* mb2_cache_entry_touch - cache entry got used
+ * @cache - cache the entry belongs to
+ * @entry - entry that got used
+ *
+ * Move entry in lru list to reflect the fact that it was used.
+ */
+void mb2_cache_entry_touch(struct mb2_cache *cache,
+ struct mb2_cache_entry *entry)
+{
+ spin_lock(&cache->c_lru_list_lock);
+ if (!list_empty(&entry->e_lru_list))
+ list_move_tail(&cache->c_lru_list, &entry->e_lru_list);
+ spin_unlock(&cache->c_lru_list_lock);
+}
+EXPORT_SYMBOL(mb2_cache_entry_touch);
+
+static unsigned long mb2_cache_count(struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ struct mb2_cache *cache = container_of(shrink, struct mb2_cache,
+ c_shrink);
+
+ return cache->c_entry_count;
+}
+
+/* Shrink number of entries in cache */
+static unsigned long mb2_cache_scan(struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ int nr_to_scan = sc->nr_to_scan;
+ struct mb2_cache *cache = container_of(shrink, struct mb2_cache,
+ c_shrink);
+ struct mb2_cache_entry *entry;
+ struct hlist_bl_head *head;
+ unsigned int shrunk = 0;
+
+ spin_lock(&cache->c_lru_list_lock);
+ while (nr_to_scan-- && !list_empty(&cache->c_lru_list)) {
+ entry = list_first_entry(&cache->c_lru_list,
+ struct mb2_cache_entry, e_lru_list);
+ list_del_init(&entry->e_lru_list);
+ cache->c_entry_count--;
+ /*
+ * We keep LRU list reference so that entry doesn't go away
+ * from under us.
+ */
+ spin_unlock(&cache->c_lru_list_lock);
+ head = entry->e_hash_list_head;
+ hlist_bl_lock(head);
+ if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+ hlist_bl_del_init(&entry->e_hash_list);
+ atomic_dec(&entry->e_refcnt);
+ }
+ hlist_bl_unlock(head);
+ if (mb2_cache_entry_put(cache, entry))
+ shrunk++;
+ cond_resched();
+ spin_lock(&cache->c_lru_list_lock);
+ }
+ spin_unlock(&cache->c_lru_list_lock);
+
+ return shrunk;
+}
+
+/*
+ * mb2_cache_create - create cache
+ * @bucket_bits: log2 of the hash table size
+ *
+ * Create cache for keys with 2^bucket_bits hash entries.
+ */
+struct mb2_cache *mb2_cache_create(int bucket_bits)
+{
+ struct mb2_cache *cache;
+ int bucket_count = 1 << bucket_bits;
+ int i;
+
+ if (!try_module_get(THIS_MODULE))
+ return NULL;
+
+ cache = kzalloc(sizeof(struct mb2_cache), GFP_KERNEL);
+ if (!cache)
+ goto err_out;
+ cache->c_bucket_bits = bucket_bits;
+ INIT_LIST_HEAD(&cache->c_lru_list);
+ spin_lock_init(&cache->c_lru_list_lock);
+ cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
+ GFP_KERNEL);
+ if (!cache->c_hash) {
+ kfree(cache);
+ goto err_out;
+ }
+ for (i = 0; i < bucket_count; i++)
+ INIT_HLIST_BL_HEAD(&cache->c_hash[i]);
+
+ cache->c_shrink.count_objects = mb2_cache_count;
+ cache->c_shrink.scan_objects = mb2_cache_scan;
+ cache->c_shrink.seeks = DEFAULT_SEEKS;
+ register_shrinker(&cache->c_shrink);
+
+ return cache;
+
+err_out:
+ module_put(THIS_MODULE);
+ return NULL;
+}
+EXPORT_SYMBOL(mb2_cache_create);
+
+/*
+ * mb2_cache_destroy - destroy cache
+ * @cache: the cache to destroy
+ *
+ * Free all entries in cache and cache itself. Caller must make sure nobody
+ * (except shrinker) can reach @cache when calling this.
+ */
+void mb2_cache_destroy(struct mb2_cache *cache)
+{
+ struct mb2_cache_entry *entry, *next;
+
+ unregister_shrinker(&cache->c_shrink);
+
+ /*
+ * We don't bother with any locking. Cache must not be used at this
+ * point.
+ */
+ list_for_each_entry_safe(entry, next, &cache->c_lru_list, e_lru_list) {
+ if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+ hlist_bl_del_init(&entry->e_hash_list);
+ atomic_dec(&entry->e_refcnt);
+ } else
+ WARN_ON(1);
+ list_del(&entry->e_lru_list);
+ WARN_ON(atomic_read(&entry->e_refcnt) != 1);
+ mb2_cache_entry_put(cache, entry);
+ }
+ kfree(cache->c_hash);
+ kfree(cache);
+ module_put(THIS_MODULE);
+}
+EXPORT_SYMBOL(mb2_cache_destroy);
+
+static int __init mb2cache_init(void)
+{
+ mb2_entry_cache = kmem_cache_create("mbcache",
+ sizeof(struct mb2_cache_entry), 0,
+ SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
+ BUG_ON(!mb2_entry_cache);
+ return 0;
+}
+
+static void __exit mb2cache_exit(void)
+{
+ kmem_cache_destroy(mb2_entry_cache);
+}
+
+module_init(mb2cache_init)
+module_exit(mb2cache_exit)
+
+MODULE_AUTHOR("Jan Kara <jack@suse.cz>");
+MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
+MODULE_LICENSE("GPL");
diff --git a/fs/mpage.c b/fs/mpage.c
index 1480d3a18037..0fd48fdcc1b1 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -30,6 +30,14 @@
#include <linux/cleancache.h>
#include "internal.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/android_fs.h>
+
+EXPORT_TRACEPOINT_SYMBOL(android_fs_datawrite_start);
+EXPORT_TRACEPOINT_SYMBOL(android_fs_datawrite_end);
+EXPORT_TRACEPOINT_SYMBOL(android_fs_dataread_start);
+EXPORT_TRACEPOINT_SYMBOL(android_fs_dataread_end);
+
/*
* I/O completion handler for multipage BIOs.
*
@@ -47,6 +55,16 @@ static void mpage_end_io(struct bio *bio)
struct bio_vec *bv;
int i;
+ if (trace_android_fs_dataread_end_enabled() &&
+ (bio_data_dir(bio) == READ)) {
+ struct page *first_page = bio->bi_io_vec[0].bv_page;
+
+ if (first_page != NULL)
+ trace_android_fs_dataread_end(first_page->mapping->host,
+ page_offset(first_page),
+ bio->bi_iter.bi_size);
+ }
+
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
page_endio(page, bio_data_dir(bio), bio->bi_error);
@@ -57,6 +75,24 @@ static void mpage_end_io(struct bio *bio)
static struct bio *mpage_bio_submit(int rw, struct bio *bio)
{
+ if (trace_android_fs_dataread_start_enabled() && (rw == READ)) {
+ struct page *first_page = bio->bi_io_vec[0].bv_page;
+
+ if (first_page != NULL) {
+ char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+ path = android_fstrace_get_pathname(pathbuf,
+ MAX_TRACE_PATHBUF_LEN,
+ first_page->mapping->host);
+ trace_android_fs_dataread_start(
+ first_page->mapping->host,
+ page_offset(first_page),
+ bio->bi_iter.bi_size,
+ current->pid,
+ path,
+ current->comm);
+ }
+ }
bio->bi_end_io = mpage_end_io;
guard_bio_eod(rw, bio);
submit_bio(rw, bio);
diff --git a/fs/namei.c b/fs/namei.c
index 0b0acba72a71..d185869dae93 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -373,9 +373,11 @@ EXPORT_SYMBOL(generic_permission);
* flag in inode->i_opflags, that says "this has not special
* permission function, use the fast case".
*/
-static inline int do_inode_permission(struct inode *inode, int mask)
+static inline int do_inode_permission(struct vfsmount *mnt, struct inode *inode, int mask)
{
if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
+ if (likely(mnt && inode->i_op->permission2))
+ return inode->i_op->permission2(mnt, inode, mask);
if (likely(inode->i_op->permission))
return inode->i_op->permission(inode, mask);
@@ -399,7 +401,7 @@ static inline int do_inode_permission(struct inode *inode, int mask)
* This does not check for a read-only file system. You probably want
* inode_permission().
*/
-int __inode_permission(struct inode *inode, int mask)
+int __inode_permission2(struct vfsmount *mnt, struct inode *inode, int mask)
{
int retval;
@@ -411,7 +413,7 @@ int __inode_permission(struct inode *inode, int mask)
return -EACCES;
}
- retval = do_inode_permission(inode, mask);
+ retval = do_inode_permission(mnt, inode, mask);
if (retval)
return retval;
@@ -419,7 +421,14 @@ int __inode_permission(struct inode *inode, int mask)
if (retval)
return retval;
- return security_inode_permission(inode, mask);
+ retval = security_inode_permission(inode, mask);
+ return retval;
+}
+EXPORT_SYMBOL(__inode_permission2);
+
+int __inode_permission(struct inode *inode, int mask)
+{
+ return __inode_permission2(NULL, inode, mask);
}
EXPORT_SYMBOL(__inode_permission);
@@ -455,14 +464,20 @@ static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
*
* When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
*/
-int inode_permission(struct inode *inode, int mask)
+int inode_permission2(struct vfsmount *mnt, struct inode *inode, int mask)
{
int retval;
retval = sb_permission(inode->i_sb, inode, mask);
if (retval)
return retval;
- return __inode_permission(inode, mask);
+ return __inode_permission2(mnt, inode, mask);
+}
+EXPORT_SYMBOL(inode_permission2);
+
+int inode_permission(struct inode *inode, int mask)
+{
+ return inode_permission2(NULL, inode, mask);
}
EXPORT_SYMBOL(inode_permission);
@@ -1645,13 +1660,13 @@ static int lookup_slow(struct nameidata *nd, struct path *path)
static inline int may_lookup(struct nameidata *nd)
{
if (nd->flags & LOOKUP_RCU) {
- int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
+ int err = inode_permission2(nd->path.mnt, nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
if (err != -ECHILD)
return err;
if (unlazy_walk(nd, NULL, 0))
return -ECHILD;
}
- return inode_permission(nd->inode, MAY_EXEC);
+ return inode_permission2(nd->path.mnt, nd->inode, MAY_EXEC);
}
static inline int handle_dots(struct nameidata *nd, int type)
@@ -2005,11 +2020,12 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
nd->depth = 0;
if (flags & LOOKUP_ROOT) {
struct dentry *root = nd->root.dentry;
+ struct vfsmount *mnt = nd->root.mnt;
struct inode *inode = root->d_inode;
if (*s) {
if (!d_can_lookup(root))
return ERR_PTR(-ENOTDIR);
- retval = inode_permission(inode, MAY_EXEC);
+ retval = inode_permission2(mnt, inode, MAY_EXEC);
if (retval)
return ERR_PTR(retval);
}
@@ -2280,13 +2296,14 @@ EXPORT_SYMBOL(vfs_path_lookup);
/**
* lookup_one_len - filesystem helper to lookup single pathname component
* @name: pathname component to lookup
+ * @mnt: mount we are looking up on
* @base: base directory to lookup from
* @len: maximum length @len should be interpreted to
*
* Note that this routine is purely a helper for filesystem usage and should
* not be called by generic code.
*/
-struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
+struct dentry *lookup_one_len2(const char *name, struct vfsmount *mnt, struct dentry *base, int len)
{
struct qstr this;
unsigned int c;
@@ -2320,12 +2337,18 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
return ERR_PTR(err);
}
- err = inode_permission(base->d_inode, MAY_EXEC);
+ err = inode_permission2(mnt, base->d_inode, MAY_EXEC);
if (err)
return ERR_PTR(err);
return __lookup_hash(&this, base, 0);
}
+EXPORT_SYMBOL(lookup_one_len2);
+
+struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
+{
+ return lookup_one_len2(name, NULL, base, len);
+}
EXPORT_SYMBOL(lookup_one_len);
int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
@@ -2552,7 +2575,7 @@ EXPORT_SYMBOL(__check_sticky);
* 10. We don't allow removal of NFS sillyrenamed files; it's handled by
* nfs_async_unlink().
*/
-static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
+static int may_delete(struct vfsmount *mnt, struct inode *dir, struct dentry *victim, bool isdir)
{
struct inode *inode = d_backing_inode(victim);
int error;
@@ -2564,7 +2587,7 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
BUG_ON(victim->d_parent->d_inode != dir);
audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
- error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+ error = inode_permission2(mnt, dir, MAY_WRITE | MAY_EXEC);
if (error)
return error;
if (IS_APPEND(dir))
@@ -2595,14 +2618,14 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
* 3. We should have write and exec permissions on dir
* 4. We can't do it if dir is immutable (done in permission())
*/
-static inline int may_create(struct inode *dir, struct dentry *child)
+static inline int may_create(struct vfsmount *mnt, struct inode *dir, struct dentry *child)
{
audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
if (child->d_inode)
return -EEXIST;
if (IS_DEADDIR(dir))
return -ENOENT;
- return inode_permission(dir, MAY_WRITE | MAY_EXEC);
+ return inode_permission2(mnt, dir, MAY_WRITE | MAY_EXEC);
}
/*
@@ -2649,10 +2672,10 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
}
EXPORT_SYMBOL(unlock_rename);
-int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool want_excl)
+int vfs_create2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry,
+ umode_t mode, bool want_excl)
{
- int error = may_create(dir, dentry);
+ int error = may_create(mnt, dir, dentry);
if (error)
return error;
@@ -2668,11 +2691,19 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
fsnotify_create(dir, dentry);
return error;
}
+EXPORT_SYMBOL(vfs_create2);
+
+int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+ bool want_excl)
+{
+ return vfs_create2(NULL, dir, dentry, mode, want_excl);
+}
EXPORT_SYMBOL(vfs_create);
static int may_open(struct path *path, int acc_mode, int flag)
{
struct dentry *dentry = path->dentry;
+ struct vfsmount *mnt = path->mnt;
struct inode *inode = dentry->d_inode;
int error;
@@ -2701,7 +2732,7 @@ static int may_open(struct path *path, int acc_mode, int flag)
break;
}
- error = inode_permission(inode, acc_mode);
+ error = inode_permission2(mnt, inode, acc_mode);
if (error)
return error;
@@ -2736,7 +2767,7 @@ static int handle_truncate(struct file *filp)
if (!error)
error = security_path_truncate(path);
if (!error) {
- error = do_truncate(path->dentry, 0,
+ error = do_truncate2(path->mnt, path->dentry, 0,
ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
filp);
}
@@ -2757,7 +2788,7 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
if (error)
return error;
- error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
+ error = inode_permission2(dir->mnt, dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
if (error)
return error;
@@ -2943,6 +2974,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
bool got_write, int *opened)
{
struct dentry *dir = nd->path.dentry;
+ struct vfsmount *mnt = nd->path.mnt;
struct inode *dir_inode = dir->d_inode;
struct dentry *dentry;
int error;
@@ -2990,7 +3022,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
error = security_path_mknod(&nd->path, dentry, mode, 0);
if (error)
goto out_dput;
- error = vfs_create(dir->d_inode, dentry, mode,
+ error = vfs_create2(mnt, dir->d_inode, dentry, mode,
nd->flags & LOOKUP_EXCL);
if (error)
goto out_dput;
@@ -3252,7 +3284,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
goto out;
dir = path.dentry->d_inode;
/* we want directory to be writable */
- error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+ error = inode_permission2(path.mnt, dir, MAY_WRITE | MAY_EXEC);
if (error)
goto out2;
if (!dir->i_op->tmpfile) {
@@ -3486,9 +3518,9 @@ inline struct dentry *user_path_create(int dfd, const char __user *pathname,
}
EXPORT_SYMBOL(user_path_create);
-int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
+int vfs_mknod2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
{
- int error = may_create(dir, dentry);
+ int error = may_create(mnt, dir, dentry);
if (error)
return error;
@@ -3512,6 +3544,12 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
fsnotify_create(dir, dentry);
return error;
}
+EXPORT_SYMBOL(vfs_mknod2);
+
+int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
+{
+ return vfs_mknod2(NULL, dir, dentry, mode, dev);
+}
EXPORT_SYMBOL(vfs_mknod);
static int may_mknod(umode_t mode)
@@ -3554,10 +3592,10 @@ retry:
goto out;
switch (mode & S_IFMT) {
case 0: case S_IFREG:
- error = vfs_create(path.dentry->d_inode,dentry,mode,true);
+ error = vfs_create2(path.mnt, path.dentry->d_inode,dentry,mode,true);
break;
case S_IFCHR: case S_IFBLK:
- error = vfs_mknod(path.dentry->d_inode,dentry,mode,
+ error = vfs_mknod2(path.mnt, path.dentry->d_inode,dentry,mode,
new_decode_dev(dev));
break;
case S_IFIFO: case S_IFSOCK:
@@ -3578,9 +3616,9 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
return sys_mknodat(AT_FDCWD, filename, mode, dev);
}
-int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+int vfs_mkdir2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, umode_t mode)
{
- int error = may_create(dir, dentry);
+ int error = may_create(mnt, dir, dentry);
unsigned max_links = dir->i_sb->s_max_links;
if (error)
@@ -3602,6 +3640,12 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
fsnotify_mkdir(dir, dentry);
return error;
}
+EXPORT_SYMBOL(vfs_mkdir2);
+
+int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ return vfs_mkdir2(NULL, dir, dentry, mode);
+}
EXPORT_SYMBOL(vfs_mkdir);
SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
@@ -3620,7 +3664,7 @@ retry:
mode &= ~current_umask();
error = security_path_mkdir(&path, dentry, mode);
if (!error)
- error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
+ error = vfs_mkdir2(path.mnt, path.dentry->d_inode, dentry, mode);
done_path_create(&path, dentry);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
@@ -3659,9 +3703,9 @@ void dentry_unhash(struct dentry *dentry)
}
EXPORT_SYMBOL(dentry_unhash);
-int vfs_rmdir(struct inode *dir, struct dentry *dentry)
+int vfs_rmdir2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry)
{
- int error = may_delete(dir, dentry, 1);
+ int error = may_delete(mnt, dir, dentry, 1);
if (error)
return error;
@@ -3696,6 +3740,12 @@ out:
d_delete(dentry);
return error;
}
+EXPORT_SYMBOL(vfs_rmdir2);
+
+int vfs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+ return vfs_rmdir2(NULL, dir, dentry);
+}
EXPORT_SYMBOL(vfs_rmdir);
static long do_rmdir(int dfd, const char __user *pathname)
@@ -3741,7 +3791,7 @@ retry:
error = security_path_rmdir(&path, dentry);
if (error)
goto exit3;
- error = vfs_rmdir(path.dentry->d_inode, dentry);
+ error = vfs_rmdir2(path.mnt, path.dentry->d_inode, dentry);
exit3:
dput(dentry);
exit2:
@@ -3780,10 +3830,10 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
* be appropriate for callers that expect the underlying filesystem not
* to be NFS exported.
*/
-int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
+int vfs_unlink2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
{
struct inode *target = dentry->d_inode;
- int error = may_delete(dir, dentry, 0);
+ int error = may_delete(mnt, dir, dentry, 0);
if (error)
return error;
@@ -3818,6 +3868,12 @@ out:
return error;
}
+EXPORT_SYMBOL(vfs_unlink2);
+
+int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
+{
+ return vfs_unlink2(NULL, dir, dentry, delegated_inode);
+}
EXPORT_SYMBOL(vfs_unlink);
/*
@@ -3865,7 +3921,7 @@ retry_deleg:
error = security_path_unlink(&path, dentry);
if (error)
goto exit2;
- error = vfs_unlink(path.dentry->d_inode, dentry, &delegated_inode);
+ error = vfs_unlink2(path.mnt, path.dentry->d_inode, dentry, &delegated_inode);
exit2:
dput(dentry);
}
@@ -3915,9 +3971,9 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname)
return do_unlinkat(AT_FDCWD, pathname);
}
-int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
+int vfs_symlink2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, const char *oldname)
{
- int error = may_create(dir, dentry);
+ int error = may_create(mnt, dir, dentry);
if (error)
return error;
@@ -3934,6 +3990,12 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
fsnotify_create(dir, dentry);
return error;
}
+EXPORT_SYMBOL(vfs_symlink2);
+
+int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
+{
+ return vfs_symlink2(NULL, dir, dentry, oldname);
+}
EXPORT_SYMBOL(vfs_symlink);
SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
@@ -3956,7 +4018,7 @@ retry:
error = security_path_symlink(&path, dentry, from->name);
if (!error)
- error = vfs_symlink(path.dentry->d_inode, dentry, from->name);
+ error = vfs_symlink2(path.mnt, path.dentry->d_inode, dentry, from->name);
done_path_create(&path, dentry);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
@@ -3991,7 +4053,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
* be appropriate for callers that expect the underlying filesystem not
* to be NFS exported.
*/
-int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
+int vfs_link2(struct vfsmount *mnt, struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
{
struct inode *inode = old_dentry->d_inode;
unsigned max_links = dir->i_sb->s_max_links;
@@ -4000,7 +4062,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
if (!inode)
return -ENOENT;
- error = may_create(dir, new_dentry);
+ error = may_create(mnt, dir, new_dentry);
if (error)
return error;
@@ -4043,6 +4105,12 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
fsnotify_link(dir, inode, new_dentry);
return error;
}
+EXPORT_SYMBOL(vfs_link2);
+
+int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
+{
+ return vfs_link2(NULL, old_dentry, dir, new_dentry, delegated_inode);
+}
EXPORT_SYMBOL(vfs_link);
/*
@@ -4098,7 +4166,7 @@ retry:
error = security_path_link(old_path.dentry, &new_path, new_dentry);
if (error)
goto out_dput;
- error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
+ error = vfs_link2(old_path.mnt, old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
out_dput:
done_path_create(&new_path, new_dentry);
if (delegated_inode) {
@@ -4173,7 +4241,8 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
* ->i_mutex on parents, which works but leads to some truly excessive
* locking].
*/
-int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+int vfs_rename2(struct vfsmount *mnt,
+ struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
struct inode **delegated_inode, unsigned int flags)
{
@@ -4192,19 +4261,19 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (vfs_select_inode(old_dentry, 0) == vfs_select_inode(new_dentry, 0))
return 0;
- error = may_delete(old_dir, old_dentry, is_dir);
+ error = may_delete(mnt, old_dir, old_dentry, is_dir);
if (error)
return error;
if (!target) {
- error = may_create(new_dir, new_dentry);
+ error = may_create(mnt, new_dir, new_dentry);
} else {
new_is_dir = d_is_dir(new_dentry);
if (!(flags & RENAME_EXCHANGE))
- error = may_delete(new_dir, new_dentry, is_dir);
+ error = may_delete(mnt, new_dir, new_dentry, is_dir);
else
- error = may_delete(new_dir, new_dentry, new_is_dir);
+ error = may_delete(mnt, new_dir, new_dentry, new_is_dir);
}
if (error)
return error;
@@ -4221,12 +4290,12 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
*/
if (new_dir != old_dir) {
if (is_dir) {
- error = inode_permission(source, MAY_WRITE);
+ error = inode_permission2(mnt, source, MAY_WRITE);
if (error)
return error;
}
if ((flags & RENAME_EXCHANGE) && new_is_dir) {
- error = inode_permission(target, MAY_WRITE);
+ error = inode_permission2(mnt, target, MAY_WRITE);
if (error)
return error;
}
@@ -4309,6 +4378,14 @@ out:
return error;
}
+EXPORT_SYMBOL(vfs_rename2);
+
+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ struct inode **delegated_inode, unsigned int flags)
+{
+ return vfs_rename2(NULL, old_dir, old_dentry, new_dir, new_dentry, delegated_inode, flags);
+}
EXPORT_SYMBOL(vfs_rename);
SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
@@ -4422,7 +4499,7 @@ retry_deleg:
&new_path, new_dentry, flags);
if (error)
goto exit5;
- error = vfs_rename(old_path.dentry->d_inode, old_dentry,
+ error = vfs_rename2(old_path.mnt, old_path.dentry->d_inode, old_dentry,
new_path.dentry->d_inode, new_dentry,
&delegated_inode, flags);
exit5:
@@ -4467,7 +4544,7 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna
int vfs_whiteout(struct inode *dir, struct dentry *dentry)
{
- int error = may_create(dir, dentry);
+ int error = may_create(NULL, dir, dentry);
if (error)
return error;
diff --git a/fs/namespace.c b/fs/namespace.c
index f26d18d69712..a22959c97384 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -580,6 +580,7 @@ int sb_prepare_remount_readonly(struct super_block *sb)
static void free_vfsmnt(struct mount *mnt)
{
+ kfree(mnt->mnt.data);
kfree_const(mnt->mnt_devname);
#ifdef CONFIG_SMP
free_percpu(mnt->mnt_pcp);
@@ -974,11 +975,21 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
if (!mnt)
return ERR_PTR(-ENOMEM);
+ mnt->mnt.data = NULL;
+ if (type->alloc_mnt_data) {
+ mnt->mnt.data = type->alloc_mnt_data();
+ if (!mnt->mnt.data) {
+ mnt_free_id(mnt);
+ free_vfsmnt(mnt);
+ return ERR_PTR(-ENOMEM);
+ }
+ }
if (flags & MS_KERNMOUNT)
mnt->mnt.mnt_flags = MNT_INTERNAL;
- root = mount_fs(type, flags, name, data);
+ root = mount_fs(type, flags, name, &mnt->mnt, data);
if (IS_ERR(root)) {
+ kfree(mnt->mnt.data);
mnt_free_id(mnt);
free_vfsmnt(mnt);
return ERR_CAST(root);
@@ -1006,6 +1017,14 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
if (!mnt)
return ERR_PTR(-ENOMEM);
+ if (sb->s_op->clone_mnt_data) {
+ mnt->mnt.data = sb->s_op->clone_mnt_data(old->mnt.data);
+ if (!mnt->mnt.data) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+ }
+
if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
mnt->mnt_group_id = 0; /* not a peer of original */
else
@@ -1074,6 +1093,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
return mnt;
out_free:
+ kfree(mnt->mnt.data);
mnt_free_id(mnt);
free_vfsmnt(mnt);
return ERR_PTR(err);
@@ -2287,8 +2307,14 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
err = change_mount_flags(path->mnt, flags);
else if (!capable(CAP_SYS_ADMIN))
err = -EPERM;
- else
- err = do_remount_sb(sb, flags, data, 0);
+ else {
+ err = do_remount_sb2(path->mnt, sb, flags, data, 0);
+ namespace_lock();
+ lock_mount_hash();
+ propagate_remount(mnt);
+ unlock_mount_hash();
+ namespace_unlock();
+ }
if (!err) {
lock_mount_hash();
mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index a64313868d3a..2958e7a81f9c 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -488,7 +488,7 @@ static int fanotify_find_path(int dfd, const char __user *filename,
}
/* you can only watch an inode if you have read permissions on it */
- ret = inode_permission(path->dentry->d_inode, MAY_READ);
+ ret = inode_permission2(path->mnt, path->dentry->d_inode, MAY_READ);
if (ret)
path_put(path);
out:
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index b8d08d0d0a4d..4c5b43d15e6e 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -337,7 +337,7 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
if (error)
return error;
/* you can only watch an inode if you have read permissions on it */
- error = inode_permission(path->dentry->d_inode, MAY_READ);
+ error = inode_permission2(path->mnt, path->dentry->d_inode, MAY_READ);
if (error)
path_put(path);
return error;
@@ -702,6 +702,8 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
struct fsnotify_group *group;
struct inode *inode;
struct path path;
+ struct path alteredpath;
+ struct path *canonical_path = &path;
struct fd f;
int ret;
unsigned flags = 0;
@@ -741,13 +743,22 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
if (ret)
goto fput_and_out;
+ /* support stacked filesystems */
+ if(path.dentry && path.dentry->d_op) {
+ if (path.dentry->d_op->d_canonical_path) {
+ path.dentry->d_op->d_canonical_path(&path, &alteredpath);
+ canonical_path = &alteredpath;
+ path_put(&path);
+ }
+ }
+
/* inode held in place by reference to path; group by fget on fd */
- inode = path.dentry->d_inode;
+ inode = canonical_path->dentry->d_inode;
group = f.file->private_data;
/* create/update an inode mark */
ret = inotify_update_watch(group, inode, mask);
- path_put(&path);
+ path_put(canonical_path);
fput_and_out:
fdput(f);
return ret;
diff --git a/fs/open.c b/fs/open.c
index 157b9940dd73..e70cca15c976 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -34,8 +34,8 @@
#include "internal.h"
-int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
- struct file *filp)
+int do_truncate2(struct vfsmount *mnt, struct dentry *dentry, loff_t length,
+ unsigned int time_attrs, struct file *filp)
{
int ret;
struct iattr newattrs;
@@ -60,17 +60,24 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
mutex_lock(&dentry->d_inode->i_mutex);
/* Note any delegations or leases have already been broken: */
- ret = notify_change(dentry, &newattrs, NULL);
+ ret = notify_change2(mnt, dentry, &newattrs, NULL);
mutex_unlock(&dentry->d_inode->i_mutex);
return ret;
}
+int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
+ struct file *filp)
+{
+ return do_truncate2(NULL, dentry, length, time_attrs, filp);
+}
long vfs_truncate(struct path *path, loff_t length)
{
struct inode *inode;
+ struct vfsmount *mnt;
long error;
inode = path->dentry->d_inode;
+ mnt = path->mnt;
/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
if (S_ISDIR(inode->i_mode))
@@ -82,7 +89,7 @@ long vfs_truncate(struct path *path, loff_t length)
if (error)
goto out;
- error = inode_permission(inode, MAY_WRITE);
+ error = inode_permission2(mnt, inode, MAY_WRITE);
if (error)
goto mnt_drop_write_and_out;
@@ -106,7 +113,7 @@ long vfs_truncate(struct path *path, loff_t length)
if (!error)
error = security_path_truncate(path);
if (!error)
- error = do_truncate(path->dentry, length, 0, NULL);
+ error = do_truncate2(mnt, path->dentry, length, 0, NULL);
put_write_and_out:
put_write_access(inode);
@@ -155,6 +162,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
{
struct inode *inode;
struct dentry *dentry;
+ struct vfsmount *mnt;
struct fd f;
int error;
@@ -171,6 +179,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
small = 0;
dentry = f.file->f_path.dentry;
+ mnt = f.file->f_path.mnt;
inode = dentry->d_inode;
error = -EINVAL;
if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
@@ -190,7 +199,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
if (!error)
error = security_path_truncate(&f.file->f_path);
if (!error)
- error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
+ error = do_truncate2(mnt, dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
sb_end_write(inode->i_sb);
out_putf:
fdput(f);
@@ -340,6 +349,7 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
struct cred *override_cred;
struct path path;
struct inode *inode;
+ struct vfsmount *mnt;
int res;
unsigned int lookup_flags = LOOKUP_FOLLOW;
@@ -370,6 +380,7 @@ retry:
goto out;
inode = d_backing_inode(path.dentry);
+ mnt = path.mnt;
if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
/*
@@ -381,7 +392,7 @@ retry:
goto out_path_release;
}
- res = inode_permission(inode, mode | MAY_ACCESS);
+ res = inode_permission2(mnt, inode, mode | MAY_ACCESS);
/* SuS v2 requires we report a read only fs too */
if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
goto out_path_release;
@@ -425,7 +436,7 @@ retry:
if (error)
goto out;
- error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
+ error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
if (error)
goto dput_and_out;
@@ -445,6 +456,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
{
struct fd f = fdget_raw(fd);
struct inode *inode;
+ struct vfsmount *mnt;
int error = -EBADF;
error = -EBADF;
@@ -452,12 +464,13 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
goto out;
inode = file_inode(f.file);
+ mnt = f.file->f_path.mnt;
error = -ENOTDIR;
if (!S_ISDIR(inode->i_mode))
goto out_putf;
- error = inode_permission(inode, MAY_EXEC | MAY_CHDIR);
+ error = inode_permission2(mnt, inode, MAY_EXEC | MAY_CHDIR);
if (!error)
set_fs_pwd(current->fs, &f.file->f_path);
out_putf:
@@ -476,7 +489,7 @@ retry:
if (error)
goto out;
- error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
+ error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
if (error)
goto dput_and_out;
@@ -516,7 +529,7 @@ retry_deleg:
goto out_unlock;
newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
- error = notify_change(path->dentry, &newattrs, &delegated_inode);
+ error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode);
out_unlock:
mutex_unlock(&inode->i_mutex);
if (delegated_inode) {
@@ -596,7 +609,7 @@ retry_deleg:
mutex_lock(&inode->i_mutex);
error = security_path_chown(path, uid, gid);
if (!error)
- error = notify_change(path->dentry, &newattrs, &delegated_inode);
+ error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode);
mutex_unlock(&inode->i_mutex);
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
diff --git a/fs/pnode.c b/fs/pnode.c
index b394ca5307ec..b5f97c605d98 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -495,3 +495,32 @@ int propagate_umount(struct list_head *list)
__propagate_umount(mnt);
return 0;
}
+
+/*
+ * Iterates over all slaves, and slaves of slaves.
+ */
+static struct mount *next_descendent(struct mount *root, struct mount *cur)
+{
+ if (!IS_MNT_NEW(cur) && !list_empty(&cur->mnt_slave_list))
+ return first_slave(cur);
+ do {
+ if (cur->mnt_slave.next != &cur->mnt_master->mnt_slave_list)
+ return next_slave(cur);
+ cur = cur->mnt_master;
+ } while (cur != root);
+ return NULL;
+}
+
+void propagate_remount(struct mount *mnt)
+{
+ struct mount *m = mnt;
+ struct super_block *sb = mnt->mnt.mnt_sb;
+
+ if (sb->s_op->copy_mnt_data) {
+ m = next_descendent(mnt, m);
+ while (m) {
+ sb->s_op->copy_mnt_data(m->mnt.data, mnt->mnt.data);
+ m = next_descendent(mnt, m);
+ }
+ }
+}
diff --git a/fs/pnode.h b/fs/pnode.h
index dc87e65becd2..a9a6576540ad 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -44,6 +44,7 @@ int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
int propagate_umount(struct list_head *);
int propagate_mount_busy(struct mount *, int);
void propagate_mount_unlock(struct mount *);
+void propagate_remount(struct mount *);
void mnt_release_group_id(struct mount *);
int get_dominating_id(struct mount *mnt, const struct path *root);
unsigned int mnt_get_count(struct mount *mnt);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d2b8c754f627..0c9ea52ab399 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2240,6 +2240,92 @@ static const struct file_operations proc_timers_operations = {
.release = seq_release_private,
};
+static ssize_t timerslack_ns_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offset)
+{
+ struct inode *inode = file_inode(file);
+ struct task_struct *p;
+ u64 slack_ns;
+ int err;
+
+ err = kstrtoull_from_user(buf, count, 10, &slack_ns);
+ if (err < 0)
+ return err;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ if (p != current) {
+ if (!capable(CAP_SYS_NICE)) {
+ count = -EPERM;
+ goto out;
+ }
+
+ err = security_task_setscheduler(p);
+ if (err) {
+ count = err;
+ goto out;
+ }
+ }
+
+ task_lock(p);
+ if (slack_ns == 0)
+ p->timer_slack_ns = p->default_timer_slack_ns;
+ else
+ p->timer_slack_ns = slack_ns;
+ task_unlock(p);
+
+out:
+ put_task_struct(p);
+
+ return count;
+}
+
+static int timerslack_ns_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = m->private;
+ struct task_struct *p;
+ int err = 0;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ if (p != current) {
+
+ if (!capable(CAP_SYS_NICE)) {
+ err = -EPERM;
+ goto out;
+ }
+ err = security_task_getscheduler(p);
+ if (err)
+ goto out;
+ }
+
+ task_lock(p);
+ seq_printf(m, "%llu\n", p->timer_slack_ns);
+ task_unlock(p);
+
+out:
+ put_task_struct(p);
+
+ return err;
+}
+
+static int timerslack_ns_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, timerslack_ns_show, inode);
+}
+
+static const struct file_operations proc_pid_set_timerslack_ns_operations = {
+ .open = timerslack_ns_open,
+ .read = seq_read,
+ .write = timerslack_ns_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static int proc_pident_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
@@ -2790,8 +2876,8 @@ static const struct pid_entry tgid_base_stuff[] = {
ONE("cgroup", S_IRUGO, proc_cgroup_show),
#endif
ONE("oom_score", S_IRUGO, proc_oom_score),
- REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
- REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
+ REG("oom_adj", S_IRUSR, proc_oom_adj_operations),
+ REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
@@ -2817,6 +2903,7 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_CHECKPOINT_RESTORE
REG("timers", S_IRUGO, proc_timers_operations),
#endif
+ REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations),
};
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3074,6 +3161,44 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
}
/*
+ * proc_tid_comm_permission is a special permission function exclusively
+ * used for the node /proc/<pid>/task/<tid>/comm.
+ * It bypasses generic permission checks in the case where a task of the same
+ * task group attempts to access the node.
+ * The rational behind this is that glibc and bionic access this node for
+ * cross thread naming (pthread_set/getname_np(!self)). However, if
+ * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0,
+ * which locks out the cross thread naming implementation.
+ * This function makes sure that the node is always accessible for members of
+ * same thread group.
+ */
+static int proc_tid_comm_permission(struct inode *inode, int mask)
+{
+ bool is_same_tgroup;
+ struct task_struct *task;
+
+ task = get_proc_task(inode);
+ if (!task)
+ return -ESRCH;
+ is_same_tgroup = same_thread_group(current, task);
+ put_task_struct(task);
+
+ if (likely(is_same_tgroup && !(mask & MAY_EXEC))) {
+ /* This file (/proc/<pid>/task/<tid>/comm) can always be
+ * read or written by the members of the corresponding
+ * thread group.
+ */
+ return 0;
+ }
+
+ return generic_permission(inode, mask);
+}
+
+static const struct inode_operations proc_tid_comm_inode_operations = {
+ .permission = proc_tid_comm_permission,
+};
+
+/*
* Tasks
*/
static const struct pid_entry tid_base_stuff[] = {
@@ -3091,7 +3216,9 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif
- REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
+ NOD("comm", S_IFREG|S_IRUGO|S_IWUSR,
+ &proc_tid_comm_inode_operations,
+ &proc_pid_set_comm_operations, {}),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
ONE("syscall", S_IRUSR, proc_pid_syscall),
#endif
@@ -3138,8 +3265,8 @@ static const struct pid_entry tid_base_stuff[] = {
ONE("cgroup", S_IRUGO, proc_cgroup_show),
#endif
ONE("oom_score", S_IRUGO, proc_oom_score),
- REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
- REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
+ REG("oom_adj", S_IRUSR, proc_oom_adj_operations),
+ REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 92e6726f6e37..21f198aa0961 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -430,6 +430,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
static ssize_t
read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
{
+ char *buf = file->private_data;
ssize_t acc = 0;
size_t size, tsz;
size_t elf_buflen;
@@ -500,23 +501,20 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
if (clear_user(buffer, tsz))
return -EFAULT;
} else if (is_vmalloc_or_module_addr((void *)start)) {
- char * elf_buf;
-
- elf_buf = kzalloc(tsz, GFP_KERNEL);
- if (!elf_buf)
- return -ENOMEM;
- vread(elf_buf, (char *)start, tsz);
+ vread(buf, (char *)start, tsz);
/* we have to zero-fill user buffer even if no read */
- if (copy_to_user(buffer, elf_buf, tsz)) {
- kfree(elf_buf);
+ if (copy_to_user(buffer, buf, tsz))
return -EFAULT;
- }
- kfree(elf_buf);
} else {
if (kern_addr_valid(start)) {
unsigned long n;
- n = copy_to_user(buffer, (char *)start, tsz);
+ /*
+ * Using bounce buffer to bypass the
+ * hardened user copy kernel text checks.
+ */
+ memcpy(buf, (char *) start, tsz);
+ n = copy_to_user(buffer, buf, tsz);
/*
* We cannot distinguish between fault on source
* and fault on destination. When this happens
@@ -549,6 +547,11 @@ static int open_kcore(struct inode *inode, struct file *filp)
{
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
+
+ filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!filp->private_data)
+ return -ENOMEM;
+
if (kcore_need_update)
kcore_update_ram();
if (i_size_read(inode) != proc_root_kcore->size) {
@@ -559,10 +562,16 @@ static int open_kcore(struct inode *inode, struct file *filp)
return 0;
}
+static int release_kcore(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ return 0;
+}
static const struct file_operations proc_kcore_operations = {
.read = read_kcore,
.open = open_kcore,
+ .release = release_kcore,
.llseek = default_llseek,
};
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index db1a1427c27a..0160ae6c1096 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -116,6 +116,56 @@ static void release_task_mempolicy(struct proc_maps_private *priv)
}
#endif
+static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma)
+{
+ const char __user *name = vma_get_anon_name(vma);
+ struct mm_struct *mm = vma->vm_mm;
+
+ unsigned long page_start_vaddr;
+ unsigned long page_offset;
+ unsigned long num_pages;
+ unsigned long max_len = NAME_MAX;
+ int i;
+
+ page_start_vaddr = (unsigned long)name & PAGE_MASK;
+ page_offset = (unsigned long)name - page_start_vaddr;
+ num_pages = DIV_ROUND_UP(page_offset + max_len, PAGE_SIZE);
+
+ seq_puts(m, "[anon:");
+
+ for (i = 0; i < num_pages; i++) {
+ int len;
+ int write_len;
+ const char *kaddr;
+ long pages_pinned;
+ struct page *page;
+
+ pages_pinned = get_user_pages(current, mm, page_start_vaddr,
+ 1, 0, 0, &page, NULL);
+ if (pages_pinned < 1) {
+ seq_puts(m, "<fault>]");
+ return;
+ }
+
+ kaddr = (const char *)kmap(page);
+ len = min(max_len, PAGE_SIZE - page_offset);
+ write_len = strnlen(kaddr + page_offset, len);
+ seq_write(m, kaddr + page_offset, write_len);
+ kunmap(page);
+ put_page(page);
+
+ /* if strnlen hit a null terminator then we're done */
+ if (write_len != len)
+ break;
+
+ max_len -= len;
+ page_offset = 0;
+ page_start_vaddr += PAGE_SIZE;
+ }
+
+ seq_putc(m, ']');
+}
+
static void vma_stop(struct proc_maps_private *priv)
{
struct mm_struct *mm = priv->mm;
@@ -341,8 +391,15 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
goto done;
}
- if (is_stack(priv, vma, is_pid))
+ if (is_stack(priv, vma, is_pid)) {
name = "[stack]";
+ goto done;
+ }
+
+ if (vma_get_anon_name(vma)) {
+ seq_pad(m, ' ');
+ seq_print_vma_name(m, vma);
+ }
}
done:
@@ -667,6 +724,12 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
show_map_vma(m, vma, is_pid);
+ if (vma_get_anon_name(vma)) {
+ seq_puts(m, "Name: ");
+ seq_print_vma_name(m, vma);
+ seq_putc(m, '\n');
+ }
+
seq_printf(m,
"Size: %8lu kB\n"
"Rss: %8lu kB\n"
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 87645955990d..961e597acfc6 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -118,7 +118,9 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
if (err)
goto out;
show_mnt_opts(m, mnt);
- if (sb->s_op->show_options)
+ if (sb->s_op->show_options2)
+ err = sb->s_op->show_options2(mnt, m, mnt_path.dentry);
+ else if (sb->s_op->show_options)
err = sb->s_op->show_options(m, mnt_path.dentry);
seq_puts(m, " 0 0\n");
out:
@@ -178,7 +180,9 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
err = show_sb_opts(m, sb);
if (err)
goto out;
- if (sb->s_op->show_options)
+ if (sb->s_op->show_options2) {
+ err = sb->s_op->show_options2(mnt, m, mnt->mnt_root);
+ } else if (sb->s_op->show_options)
err = sb->s_op->show_options(m, mnt->mnt_root);
seq_putc(m, '\n');
out:
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 588461bb2dd4..40a0fe0a4e05 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -431,6 +431,40 @@ static int pstore_write_compat(enum pstore_type_id type,
size, psi);
}
+static int pstore_write_buf_user_compat(enum pstore_type_id type,
+ enum kmsg_dump_reason reason,
+ u64 *id, unsigned int part,
+ const char __user *buf,
+ bool compressed, size_t size,
+ struct pstore_info *psi)
+{
+ unsigned long flags = 0;
+ size_t i, bufsize = size;
+ long ret = 0;
+
+ if (unlikely(!access_ok(VERIFY_READ, buf, size)))
+ return -EFAULT;
+ if (bufsize > psinfo->bufsize)
+ bufsize = psinfo->bufsize;
+ spin_lock_irqsave(&psinfo->buf_lock, flags);
+ for (i = 0; i < size; ) {
+ size_t c = min(size - i, bufsize);
+
+ ret = __copy_from_user(psinfo->buf, buf + i, c);
+ if (unlikely(ret != 0)) {
+ ret = -EFAULT;
+ break;
+ }
+ ret = psi->write_buf(type, reason, id, part, psinfo->buf,
+ compressed, c, psi);
+ if (unlikely(ret < 0))
+ break;
+ i += c;
+ }
+ spin_unlock_irqrestore(&psinfo->buf_lock, flags);
+ return unlikely(ret < 0) ? ret : size;
+}
+
/*
* platform specific persistent storage driver registers with
* us here. If pstore is already mounted, call the platform
@@ -453,6 +487,8 @@ int pstore_register(struct pstore_info *psi)
if (!psi->write)
psi->write = pstore_write_compat;
+ if (!psi->write_buf_user)
+ psi->write_buf_user = pstore_write_buf_user_compat;
psinfo = psi;
mutex_init(&psinfo->read_mutex);
spin_unlock(&pstore_lock);
diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c
index 7de20cd3797f..78f6176c020f 100644
--- a/fs/pstore/pmsg.c
+++ b/fs/pstore/pmsg.c
@@ -19,48 +19,25 @@
#include "internal.h"
static DEFINE_MUTEX(pmsg_lock);
-#define PMSG_MAX_BOUNCE_BUFFER_SIZE (2*PAGE_SIZE)
static ssize_t write_pmsg(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- size_t i, buffer_size;
- char *buffer;
+ u64 id;
+ int ret;
if (!count)
return 0;
+ /* check outside lock, page in any data. write_buf_user also checks */
if (!access_ok(VERIFY_READ, buf, count))
return -EFAULT;
- buffer_size = count;
- if (buffer_size > PMSG_MAX_BOUNCE_BUFFER_SIZE)
- buffer_size = PMSG_MAX_BOUNCE_BUFFER_SIZE;
- buffer = vmalloc(buffer_size);
- if (!buffer)
- return -ENOMEM;
-
mutex_lock(&pmsg_lock);
- for (i = 0; i < count; ) {
- size_t c = min(count - i, buffer_size);
- u64 id;
- long ret;
-
- ret = __copy_from_user(buffer, buf + i, c);
- if (unlikely(ret != 0)) {
- mutex_unlock(&pmsg_lock);
- vfree(buffer);
- return -EFAULT;
- }
- psinfo->write_buf(PSTORE_TYPE_PMSG, 0, &id, 0, buffer, 0, c,
- psinfo);
-
- i += c;
- }
-
+ ret = psinfo->write_buf_user(PSTORE_TYPE_PMSG, 0, &id, 0, buf, 0, count,
+ psinfo);
mutex_unlock(&pmsg_lock);
- vfree(buffer);
- return count;
+ return ret ? ret : count;
}
static const struct file_operations pmsg_fops = {
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 905caba36529..8d1e5e2db6a1 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -34,6 +34,8 @@
#include <linux/slab.h>
#include <linux/compiler.h>
#include <linux/pstore_ram.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#define RAMOOPS_KERNMSG_HDR "===="
#define MIN_MEM_SIZE 4096UL
@@ -329,6 +331,24 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type,
return 0;
}
+static int notrace ramoops_pstore_write_buf_user(enum pstore_type_id type,
+ enum kmsg_dump_reason reason,
+ u64 *id, unsigned int part,
+ const char __user *buf,
+ bool compressed, size_t size,
+ struct pstore_info *psi)
+{
+ if (type == PSTORE_TYPE_PMSG) {
+ struct ramoops_context *cxt = psi->data;
+
+ if (!cxt->mprz)
+ return -ENOMEM;
+ return persistent_ram_write_user(cxt->mprz, buf, size);
+ }
+
+ return -EINVAL;
+}
+
static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, int count,
struct timespec time, struct pstore_info *psi)
{
@@ -367,6 +387,7 @@ static struct ramoops_context oops_cxt = {
.open = ramoops_pstore_open,
.read = ramoops_pstore_read,
.write_buf = ramoops_pstore_write_buf,
+ .write_buf_user = ramoops_pstore_write_buf_user,
.erase = ramoops_pstore_erase,
},
};
@@ -466,6 +487,97 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt,
return 0;
}
+void notrace ramoops_console_write_buf(const char *buf, size_t size)
+{
+ struct ramoops_context *cxt = &oops_cxt;
+ persistent_ram_write(cxt->cprz, buf, size);
+}
+
+static int ramoops_parse_dt_size(struct platform_device *pdev,
+ const char *propname, unsigned long *val)
+{
+ u64 val64;
+ int ret;
+
+ ret = of_property_read_u64(pdev->dev.of_node, propname, &val64);
+ if (ret == -EINVAL) {
+ *val = 0;
+ return 0;
+ } else if (ret != 0) {
+ dev_err(&pdev->dev, "failed to parse property %s: %d\n",
+ propname, ret);
+ return ret;
+ }
+
+ if (val64 > ULONG_MAX) {
+ dev_err(&pdev->dev, "invalid %s %llu\n", propname, val64);
+ return -EOVERFLOW;
+ }
+
+ *val = val64;
+ return 0;
+}
+
+static int ramoops_parse_dt(struct platform_device *pdev,
+ struct ramoops_platform_data *pdata)
+{
+ struct device_node *of_node = pdev->dev.of_node;
+ struct device_node *mem_region;
+ struct resource res;
+ u32 ecc_size;
+ int ret;
+
+ dev_dbg(&pdev->dev, "using Device Tree\n");
+
+ mem_region = of_parse_phandle(of_node, "memory-region", 0);
+ if (!mem_region) {
+ dev_err(&pdev->dev, "no memory-region phandle\n");
+ return -ENODEV;
+ }
+
+ ret = of_address_to_resource(mem_region, 0, &res);
+ of_node_put(mem_region);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to translate memory-region to resource: %d\n",
+ ret);
+ return ret;
+ }
+
+ pdata->mem_size = resource_size(&res);
+ pdata->mem_address = res.start;
+ pdata->mem_type = of_property_read_bool(of_node, "unbuffered");
+ pdata->dump_oops = !of_property_read_bool(of_node, "no-dump-oops");
+
+ ret = ramoops_parse_dt_size(pdev, "record-size", &pdata->record_size);
+ if (ret < 0)
+ return ret;
+
+ ret = ramoops_parse_dt_size(pdev, "console-size", &pdata->console_size);
+ if (ret < 0)
+ return ret;
+
+ ret = ramoops_parse_dt_size(pdev, "ftrace-size", &pdata->ftrace_size);
+ if (ret < 0)
+ return ret;
+
+ ret = ramoops_parse_dt_size(pdev, "pmsg-size", &pdata->pmsg_size);
+ if (ret < 0)
+ return ret;
+
+ ret = of_property_read_u32(of_node, "ecc-size", &ecc_size);
+ if (ret == 0) {
+ if (ecc_size > INT_MAX) {
+ dev_err(&pdev->dev, "invalid ecc-size %u\n", ecc_size);
+ return -EOVERFLOW;
+ }
+ pdata->ecc_info.ecc_size = ecc_size;
+ } else if (ret != -EINVAL) {
+ return ret;
+ }
+
+ return 0;
+}
+
static int ramoops_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
@@ -475,6 +587,18 @@ static int ramoops_probe(struct platform_device *pdev)
phys_addr_t paddr;
int err = -EINVAL;
+ if (dev->of_node && !pdata) {
+ pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+ if (!pdata) {
+ err = -ENOMEM;
+ goto fail_out;
+ }
+
+ err = ramoops_parse_dt(pdev, pdata);
+ if (err < 0)
+ goto fail_out;
+ }
+
/* Only a single ramoops area allowed at a time, so fail extra
* probes.
*/
@@ -603,11 +727,17 @@ static int ramoops_remove(struct platform_device *pdev)
return 0;
}
+static const struct of_device_id dt_match[] = {
+ { .compatible = "ramoops" },
+ {}
+};
+
static struct platform_driver ramoops_driver = {
.probe = ramoops_probe,
.remove = ramoops_remove,
.driver = {
- .name = "ramoops",
+ .name = "ramoops",
+ .of_match_table = dt_match,
},
};
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 364d2dffe5a6..3975deec02f8 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -17,15 +17,16 @@
#include <linux/device.h>
#include <linux/err.h>
#include <linux/errno.h>
-#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/io.h>
+#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/memblock.h>
+#include <linux/pstore_ram.h>
#include <linux/rslib.h>
#include <linux/slab.h>
+#include <linux/uaccess.h>
#include <linux/vmalloc.h>
-#include <linux/pstore_ram.h>
#include <asm/page.h>
struct persistent_ram_buffer {
@@ -267,6 +268,16 @@ static void notrace persistent_ram_update(struct persistent_ram_zone *prz,
persistent_ram_update_ecc(prz, start, count);
}
+static int notrace persistent_ram_update_user(struct persistent_ram_zone *prz,
+ const void __user *s, unsigned int start, unsigned int count)
+{
+ struct persistent_ram_buffer *buffer = prz->buffer;
+ int ret = unlikely(__copy_from_user(buffer->data + start, s, count)) ?
+ -EFAULT : 0;
+ persistent_ram_update_ecc(prz, start, count);
+ return ret;
+}
+
void persistent_ram_save_old(struct persistent_ram_zone *prz)
{
struct persistent_ram_buffer *buffer = prz->buffer;
@@ -320,6 +331,38 @@ int notrace persistent_ram_write(struct persistent_ram_zone *prz,
return count;
}
+int notrace persistent_ram_write_user(struct persistent_ram_zone *prz,
+ const void __user *s, unsigned int count)
+{
+ int rem, ret = 0, c = count;
+ size_t start;
+
+ if (unlikely(!access_ok(VERIFY_READ, s, count)))
+ return -EFAULT;
+ if (unlikely(c > prz->buffer_size)) {
+ s += c - prz->buffer_size;
+ c = prz->buffer_size;
+ }
+
+ buffer_size_add(prz, c);
+
+ start = buffer_start_add(prz, c);
+
+ rem = prz->buffer_size - start;
+ if (unlikely(rem < c)) {
+ ret = persistent_ram_update_user(prz, s, start, rem);
+ s += rem;
+ c -= rem;
+ start = 0;
+ }
+ if (likely(!ret))
+ ret = persistent_ram_update_user(prz, s, start, c);
+
+ persistent_ram_update_header_ecc(prz);
+
+ return unlikely(ret) ? ret : count;
+}
+
size_t persistent_ram_old_size(struct persistent_ram_zone *prz)
{
return prz->old_log_size;
diff --git a/fs/sdcardfs/Kconfig b/fs/sdcardfs/Kconfig
new file mode 100644
index 000000000000..a1c103316ac7
--- /dev/null
+++ b/fs/sdcardfs/Kconfig
@@ -0,0 +1,13 @@
+config SDCARD_FS
+ tristate "sdcard file system"
+ depends on CONFIGFS_FS
+ default n
+ help
+ Sdcardfs is based on Wrapfs file system.
+
+config SDCARD_FS_FADV_NOACTIVE
+ bool "sdcardfs fadvise noactive support"
+ depends on FADV_NOACTIVE
+ default y
+ help
+ Sdcardfs supports fadvise noactive mode.
diff --git a/fs/sdcardfs/Makefile b/fs/sdcardfs/Makefile
new file mode 100644
index 000000000000..b84fbb2b45a4
--- /dev/null
+++ b/fs/sdcardfs/Makefile
@@ -0,0 +1,7 @@
+SDCARDFS_VERSION="0.1"
+
+EXTRA_CFLAGS += -DSDCARDFS_VERSION=\"$(SDCARDFS_VERSION)\"
+
+obj-$(CONFIG_SDCARD_FS) += sdcardfs.o
+
+sdcardfs-y := dentry.o file.o inode.o main.o super.o lookup.o mmap.o packagelist.o derived_perm.o
diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c
new file mode 100644
index 000000000000..7a19e77fce99
--- /dev/null
+++ b/fs/sdcardfs/dentry.c
@@ -0,0 +1,177 @@
+/*
+ * fs/sdcardfs/dentry.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ * Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009 Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed. It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#include "linux/ctype.h"
+
+/*
+ * returns: -ERRNO if error (returned to user)
+ * 0: tell VFS to invalidate dentry
+ * 1: dentry is valid
+ */
+static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ int err = 1;
+ struct path parent_lower_path, lower_path;
+ struct dentry *parent_dentry = NULL;
+ struct dentry *parent_lower_dentry = NULL;
+ struct dentry *lower_cur_parent_dentry = NULL;
+ struct dentry *lower_dentry = NULL;
+
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ spin_lock(&dentry->d_lock);
+ if (IS_ROOT(dentry)) {
+ spin_unlock(&dentry->d_lock);
+ return 1;
+ }
+ spin_unlock(&dentry->d_lock);
+
+ /* check uninitialized obb_dentry and
+ * whether the base obbpath has been changed or not
+ */
+ if (is_obbpath_invalid(dentry)) {
+ d_drop(dentry);
+ return 0;
+ }
+
+ parent_dentry = dget_parent(dentry);
+ sdcardfs_get_lower_path(parent_dentry, &parent_lower_path);
+ sdcardfs_get_real_lower(dentry, &lower_path);
+ parent_lower_dentry = parent_lower_path.dentry;
+ lower_dentry = lower_path.dentry;
+ lower_cur_parent_dentry = dget_parent(lower_dentry);
+
+ if ((lower_dentry->d_flags & DCACHE_OP_REVALIDATE)) {
+ err = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
+ if (err == 0) {
+ d_drop(dentry);
+ goto out;
+ }
+ }
+
+ spin_lock(&lower_dentry->d_lock);
+ if (d_unhashed(lower_dentry)) {
+ spin_unlock(&lower_dentry->d_lock);
+ d_drop(dentry);
+ err = 0;
+ goto out;
+ }
+ spin_unlock(&lower_dentry->d_lock);
+
+ if (parent_lower_dentry != lower_cur_parent_dentry) {
+ d_drop(dentry);
+ err = 0;
+ goto out;
+ }
+
+ if (dentry < lower_dentry) {
+ spin_lock(&dentry->d_lock);
+ spin_lock_nested(&lower_dentry->d_lock, DENTRY_D_LOCK_NESTED);
+ } else {
+ spin_lock(&lower_dentry->d_lock);
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+ }
+
+ if (!qstr_case_eq(&dentry->d_name, &lower_dentry->d_name)) {
+ __d_drop(dentry);
+ err = 0;
+ }
+
+ if (dentry < lower_dentry) {
+ spin_unlock(&lower_dentry->d_lock);
+ spin_unlock(&dentry->d_lock);
+ } else {
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&lower_dentry->d_lock);
+ }
+
+out:
+ dput(parent_dentry);
+ dput(lower_cur_parent_dentry);
+ sdcardfs_put_lower_path(parent_dentry, &parent_lower_path);
+ sdcardfs_put_real_lower(dentry, &lower_path);
+ return err;
+}
+
+static void sdcardfs_d_release(struct dentry *dentry)
+{
+ /* release and reset the lower paths */
+ if (has_graft_path(dentry))
+ sdcardfs_put_reset_orig_path(dentry);
+ sdcardfs_put_reset_lower_path(dentry);
+ free_dentry_private_data(dentry);
+}
+
+static int sdcardfs_hash_ci(const struct dentry *dentry,
+ struct qstr *qstr)
+{
+ /*
+ * This function is copy of vfat_hashi.
+ * FIXME Should we support national language?
+ * Refer to vfat_hashi()
+ * struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io;
+ */
+ const unsigned char *name;
+ unsigned int len;
+ unsigned long hash;
+
+ name = qstr->name;
+ len = qstr->len;
+
+ hash = init_name_hash();
+ while (len--)
+ hash = partial_name_hash(tolower(*name++), hash);
+ qstr->hash = end_name_hash(hash);
+
+ return 0;
+}
+
+/*
+ * Case insensitive compare of two vfat names.
+ */
+static int sdcardfs_cmp_ci(const struct dentry *parent,
+ const struct dentry *dentry,
+ unsigned int len, const char *str, const struct qstr *name)
+{
+ /* FIXME Should we support national language? */
+
+ if (name->len == len) {
+ if (str_n_case_eq(name->name, str, len))
+ return 0;
+ }
+ return 1;
+}
+
+static void sdcardfs_canonical_path(const struct path *path,
+ struct path *actual_path)
+{
+ sdcardfs_get_real_lower(path->dentry, actual_path);
+}
+
+const struct dentry_operations sdcardfs_ci_dops = {
+ .d_revalidate = sdcardfs_d_revalidate,
+ .d_release = sdcardfs_d_release,
+ .d_hash = sdcardfs_hash_ci,
+ .d_compare = sdcardfs_cmp_ci,
+ .d_canonical_path = sdcardfs_canonical_path,
+};
+
diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c
new file mode 100644
index 000000000000..b4595aab5713
--- /dev/null
+++ b/fs/sdcardfs/derived_perm.c
@@ -0,0 +1,465 @@
+/*
+ * fs/sdcardfs/derived_perm.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ * Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009 Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed. It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+
+/* copy derived state from parent inode */
+static void inherit_derived_state(struct inode *parent, struct inode *child)
+{
+ struct sdcardfs_inode_info *pi = SDCARDFS_I(parent);
+ struct sdcardfs_inode_info *ci = SDCARDFS_I(child);
+
+ ci->perm = PERM_INHERIT;
+ ci->userid = pi->userid;
+ ci->d_uid = pi->d_uid;
+ ci->under_android = pi->under_android;
+ ci->under_cache = pi->under_cache;
+ ci->under_obb = pi->under_obb;
+ set_top(ci, pi->top);
+}
+
+/* helper function for derived state */
+void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid,
+ uid_t uid, bool under_android,
+ struct inode *top)
+{
+ struct sdcardfs_inode_info *info = SDCARDFS_I(inode);
+
+ info->perm = perm;
+ info->userid = userid;
+ info->d_uid = uid;
+ info->under_android = under_android;
+ info->under_cache = false;
+ info->under_obb = false;
+ set_top(info, top);
+}
+
+/* While renaming, there is a point where we want the path from dentry,
+ * but the name from newdentry
+ */
+void get_derived_permission_new(struct dentry *parent, struct dentry *dentry,
+ const struct qstr *name)
+{
+ struct sdcardfs_inode_info *info = SDCARDFS_I(d_inode(dentry));
+ struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent));
+ appid_t appid;
+ unsigned long user_num;
+ int err;
+ struct qstr q_Android = QSTR_LITERAL("Android");
+ struct qstr q_data = QSTR_LITERAL("data");
+ struct qstr q_obb = QSTR_LITERAL("obb");
+ struct qstr q_media = QSTR_LITERAL("media");
+ struct qstr q_cache = QSTR_LITERAL("cache");
+
+ /* By default, each inode inherits from its parent.
+ * the properties are maintained on its private fields
+ * because the inode attributes will be modified with that of
+ * its lower inode.
+ * These values are used by our custom permission call instead
+ * of using the inode permissions.
+ */
+
+ inherit_derived_state(d_inode(parent), d_inode(dentry));
+
+ /* Files don't get special labels */
+ if (!S_ISDIR(d_inode(dentry)->i_mode))
+ return;
+ /* Derive custom permissions based on parent and current node */
+ switch (parent_info->perm) {
+ case PERM_INHERIT:
+ case PERM_ANDROID_PACKAGE_CACHE:
+ /* Already inherited above */
+ break;
+ case PERM_PRE_ROOT:
+ /* Legacy internal layout places users at top level */
+ info->perm = PERM_ROOT;
+ err = kstrtoul(name->name, 10, &user_num);
+ if (err)
+ info->userid = 0;
+ else
+ info->userid = user_num;
+ set_top(info, &info->vfs_inode);
+ break;
+ case PERM_ROOT:
+ /* Assume masked off by default. */
+ if (qstr_case_eq(name, &q_Android)) {
+ /* App-specific directories inside; let anyone traverse */
+ info->perm = PERM_ANDROID;
+ info->under_android = true;
+ set_top(info, &info->vfs_inode);
+ }
+ break;
+ case PERM_ANDROID:
+ if (qstr_case_eq(name, &q_data)) {
+ /* App-specific directories inside; let anyone traverse */
+ info->perm = PERM_ANDROID_DATA;
+ set_top(info, &info->vfs_inode);
+ } else if (qstr_case_eq(name, &q_obb)) {
+ /* App-specific directories inside; let anyone traverse */
+ info->perm = PERM_ANDROID_OBB;
+ info->under_obb = true;
+ set_top(info, &info->vfs_inode);
+ /* Single OBB directory is always shared */
+ } else if (qstr_case_eq(name, &q_media)) {
+ /* App-specific directories inside; let anyone traverse */
+ info->perm = PERM_ANDROID_MEDIA;
+ set_top(info, &info->vfs_inode);
+ }
+ break;
+ case PERM_ANDROID_OBB:
+ case PERM_ANDROID_DATA:
+ case PERM_ANDROID_MEDIA:
+ info->perm = PERM_ANDROID_PACKAGE;
+ appid = get_appid(name->name);
+ if (appid != 0 && !is_excluded(name->name, parent_info->userid))
+ info->d_uid = multiuser_get_uid(parent_info->userid, appid);
+ set_top(info, &info->vfs_inode);
+ break;
+ case PERM_ANDROID_PACKAGE:
+ if (qstr_case_eq(name, &q_cache)) {
+ info->perm = PERM_ANDROID_PACKAGE_CACHE;
+ info->under_cache = true;
+ }
+ break;
+ }
+}
+
+void get_derived_permission(struct dentry *parent, struct dentry *dentry)
+{
+ get_derived_permission_new(parent, dentry, &dentry->d_name);
+}
+
+static appid_t get_type(const char *name)
+{
+ const char *ext = strrchr(name, '.');
+ appid_t id;
+
+ if (ext && ext[0]) {
+ ext = &ext[1];
+ id = get_ext_gid(ext);
+ return id?:AID_MEDIA_RW;
+ }
+ return AID_MEDIA_RW;
+}
+
+void fixup_lower_ownership(struct dentry *dentry, const char *name)
+{
+ struct path path;
+ struct inode *inode;
+ struct inode *delegated_inode = NULL;
+ int error;
+ struct sdcardfs_inode_info *info;
+ struct sdcardfs_inode_info *info_top;
+ perm_t perm;
+ struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+ uid_t uid = sbi->options.fs_low_uid;
+ gid_t gid = sbi->options.fs_low_gid;
+ struct iattr newattrs;
+
+ info = SDCARDFS_I(d_inode(dentry));
+ perm = info->perm;
+ if (info->under_obb) {
+ perm = PERM_ANDROID_OBB;
+ } else if (info->under_cache) {
+ perm = PERM_ANDROID_PACKAGE_CACHE;
+ } else if (perm == PERM_INHERIT) {
+ info_top = SDCARDFS_I(grab_top(info));
+ perm = info_top->perm;
+ release_top(info);
+ }
+
+ switch (perm) {
+ case PERM_ROOT:
+ case PERM_ANDROID:
+ case PERM_ANDROID_DATA:
+ case PERM_ANDROID_MEDIA:
+ case PERM_ANDROID_PACKAGE:
+ case PERM_ANDROID_PACKAGE_CACHE:
+ uid = multiuser_get_uid(info->userid, uid);
+ break;
+ case PERM_ANDROID_OBB:
+ uid = AID_MEDIA_OBB;
+ break;
+ case PERM_PRE_ROOT:
+ default:
+ break;
+ }
+ switch (perm) {
+ case PERM_ROOT:
+ case PERM_ANDROID:
+ case PERM_ANDROID_DATA:
+ case PERM_ANDROID_MEDIA:
+ if (S_ISDIR(d_inode(dentry)->i_mode))
+ gid = multiuser_get_uid(info->userid, AID_MEDIA_RW);
+ else
+ gid = multiuser_get_uid(info->userid, get_type(name));
+ break;
+ case PERM_ANDROID_OBB:
+ gid = AID_MEDIA_OBB;
+ break;
+ case PERM_ANDROID_PACKAGE:
+ if (uid_is_app(info->d_uid))
+ gid = multiuser_get_ext_gid(info->d_uid);
+ else
+ gid = multiuser_get_uid(info->userid, AID_MEDIA_RW);
+ break;
+ case PERM_ANDROID_PACKAGE_CACHE:
+ if (uid_is_app(info->d_uid))
+ gid = multiuser_get_ext_cache_gid(info->d_uid);
+ else
+ gid = multiuser_get_uid(info->userid, AID_MEDIA_RW);
+ break;
+ case PERM_PRE_ROOT:
+ default:
+ break;
+ }
+
+ sdcardfs_get_lower_path(dentry, &path);
+ inode = d_inode(path.dentry);
+ if (d_inode(path.dentry)->i_gid.val != gid || d_inode(path.dentry)->i_uid.val != uid) {
+retry_deleg:
+ newattrs.ia_valid = ATTR_GID | ATTR_UID | ATTR_FORCE;
+ newattrs.ia_uid = make_kuid(current_user_ns(), uid);
+ newattrs.ia_gid = make_kgid(current_user_ns(), gid);
+ if (!S_ISDIR(inode->i_mode))
+ newattrs.ia_valid |=
+ ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
+ mutex_lock(&inode->i_mutex);
+ error = security_path_chown(&path, newattrs.ia_uid, newattrs.ia_gid);
+ if (!error)
+ error = notify_change2(path.mnt, path.dentry, &newattrs, &delegated_inode);
+ mutex_unlock(&inode->i_mutex);
+ if (delegated_inode) {
+ error = break_deleg_wait(&delegated_inode);
+ if (!error)
+ goto retry_deleg;
+ }
+ if (error)
+ pr_debug("sdcardfs: Failed to touch up lower fs gid/uid for %s\n", name);
+ }
+ sdcardfs_put_lower_path(dentry, &path);
+}
+
+static int descendant_may_need_fixup(struct sdcardfs_inode_info *info, struct limit_search *limit)
+{
+ if (info->perm == PERM_ROOT)
+ return (limit->flags & BY_USERID)?info->userid == limit->userid:1;
+ if (info->perm == PERM_PRE_ROOT || info->perm == PERM_ANDROID)
+ return 1;
+ return 0;
+}
+
+static int needs_fixup(perm_t perm)
+{
+ if (perm == PERM_ANDROID_DATA || perm == PERM_ANDROID_OBB
+ || perm == PERM_ANDROID_MEDIA)
+ return 1;
+ return 0;
+}
+
+static void __fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit, int depth)
+{
+ struct dentry *child;
+ struct sdcardfs_inode_info *info;
+
+ /*
+ * All paths will terminate their recursion on hitting PERM_ANDROID_OBB,
+ * PERM_ANDROID_MEDIA, or PERM_ANDROID_DATA. This happens at a depth of
+ * at most 3.
+ */
+ WARN(depth > 3, "%s: Max expected depth exceeded!\n", __func__);
+ spin_lock_nested(&dentry->d_lock, depth);
+ if (!d_inode(dentry)) {
+ spin_unlock(&dentry->d_lock);
+ return;
+ }
+ info = SDCARDFS_I(d_inode(dentry));
+
+ if (needs_fixup(info->perm)) {
+ list_for_each_entry(child, &dentry->d_subdirs, d_child) {
+ spin_lock_nested(&child->d_lock, depth + 1);
+ if (!(limit->flags & BY_NAME) || qstr_case_eq(&child->d_name, &limit->name)) {
+ if (d_inode(child)) {
+ get_derived_permission(dentry, child);
+ fixup_tmp_permissions(d_inode(child));
+ spin_unlock(&child->d_lock);
+ break;
+ }
+ }
+ spin_unlock(&child->d_lock);
+ }
+ } else if (descendant_may_need_fixup(info, limit)) {
+ list_for_each_entry(child, &dentry->d_subdirs, d_child) {
+ __fixup_perms_recursive(child, limit, depth + 1);
+ }
+ }
+ spin_unlock(&dentry->d_lock);
+}
+
+void fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit)
+{
+ __fixup_perms_recursive(dentry, limit, 0);
+}
+
+/* main function for updating derived permission */
+inline void update_derived_permission_lock(struct dentry *dentry)
+{
+ struct dentry *parent;
+
+ if (!dentry || !d_inode(dentry)) {
+ pr_err("sdcardfs: %s: invalid dentry\n", __func__);
+ return;
+ }
+ /* FIXME:
+ * 1. need to check whether the dentry is updated or not
+ * 2. remove the root dentry update
+ */
+ if (!IS_ROOT(dentry)) {
+ parent = dget_parent(dentry);
+ if (parent) {
+ get_derived_permission(parent, dentry);
+ dput(parent);
+ }
+ }
+ fixup_tmp_permissions(d_inode(dentry));
+}
+
+int need_graft_path(struct dentry *dentry)
+{
+ int ret = 0;
+ struct dentry *parent = dget_parent(dentry);
+ struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent));
+ struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+ struct qstr obb = QSTR_LITERAL("obb");
+
+ if (parent_info->perm == PERM_ANDROID &&
+ qstr_case_eq(&dentry->d_name, &obb)) {
+
+ /* /Android/obb is the base obbpath of DERIVED_UNIFIED */
+ if (!(sbi->options.multiuser == false
+ && parent_info->userid == 0)) {
+ ret = 1;
+ }
+ }
+ dput(parent);
+ return ret;
+}
+
+int is_obbpath_invalid(struct dentry *dent)
+{
+ int ret = 0;
+ struct sdcardfs_dentry_info *di = SDCARDFS_D(dent);
+ struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dent->d_sb);
+ char *path_buf, *obbpath_s;
+ int need_put = 0;
+ struct path lower_path;
+
+ /* check the base obbpath has been changed.
+ * this routine can check an uninitialized obb dentry as well.
+ * regarding the uninitialized obb, refer to the sdcardfs_mkdir()
+ */
+ spin_lock(&di->lock);
+ if (di->orig_path.dentry) {
+ if (!di->lower_path.dentry) {
+ ret = 1;
+ } else {
+ path_get(&di->lower_path);
+
+ path_buf = kmalloc(PATH_MAX, GFP_ATOMIC);
+ if (!path_buf) {
+ ret = 1;
+ pr_err("sdcardfs: fail to allocate path_buf in %s.\n", __func__);
+ } else {
+ obbpath_s = d_path(&di->lower_path, path_buf, PATH_MAX);
+ if (d_unhashed(di->lower_path.dentry) ||
+ !str_case_eq(sbi->obbpath_s, obbpath_s)) {
+ ret = 1;
+ }
+ kfree(path_buf);
+ }
+
+ pathcpy(&lower_path, &di->lower_path);
+ need_put = 1;
+ }
+ }
+ spin_unlock(&di->lock);
+ if (need_put)
+ path_put(&lower_path);
+ return ret;
+}
+
+int is_base_obbpath(struct dentry *dentry)
+{
+ int ret = 0;
+ struct dentry *parent = dget_parent(dentry);
+ struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent));
+ struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+ struct qstr q_obb = QSTR_LITERAL("obb");
+
+ spin_lock(&SDCARDFS_D(dentry)->lock);
+ if (sbi->options.multiuser) {
+ if (parent_info->perm == PERM_PRE_ROOT &&
+ qstr_case_eq(&dentry->d_name, &q_obb)) {
+ ret = 1;
+ }
+ } else if (parent_info->perm == PERM_ANDROID &&
+ qstr_case_eq(&dentry->d_name, &q_obb)) {
+ ret = 1;
+ }
+ spin_unlock(&SDCARDFS_D(dentry)->lock);
+ return ret;
+}
+
+/* The lower_path will be stored to the dentry's orig_path
+ * and the base obbpath will be copyed to the lower_path variable.
+ * if an error returned, there's no change in the lower_path
+ * returns: -ERRNO if error (0: no error)
+ */
+int setup_obb_dentry(struct dentry *dentry, struct path *lower_path)
+{
+ int err = 0;
+ struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+ struct path obbpath;
+
+ /* A local obb dentry must have its own orig_path to support rmdir
+ * and mkdir of itself. Usually, we expect that the sbi->obbpath
+ * is avaiable on this stage.
+ */
+ sdcardfs_set_orig_path(dentry, lower_path);
+
+ err = kern_path(sbi->obbpath_s,
+ LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &obbpath);
+
+ if (!err) {
+ /* the obbpath base has been found */
+ pathcpy(lower_path, &obbpath);
+ } else {
+ /* if the sbi->obbpath is not available, we can optionally
+ * setup the lower_path with its orig_path.
+ * but, the current implementation just returns an error
+ * because the sdcard daemon also regards this case as
+ * a lookup fail.
+ */
+ pr_info("sdcardfs: the sbi->obbpath is not available\n");
+ }
+ return err;
+}
+
+
diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c
new file mode 100644
index 000000000000..6076c342dae6
--- /dev/null
+++ b/fs/sdcardfs/file.c
@@ -0,0 +1,433 @@
+/*
+ * fs/sdcardfs/file.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ * Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009 Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed. It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE
+#include <linux/backing-dev.h>
+#endif
+
+static ssize_t sdcardfs_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ int err;
+ struct file *lower_file;
+ struct dentry *dentry = file->f_path.dentry;
+#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE
+ struct backing_dev_info *bdi;
+#endif
+
+ lower_file = sdcardfs_lower_file(file);
+
+#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE
+ if (file->f_mode & FMODE_NOACTIVE) {
+ if (!(lower_file->f_mode & FMODE_NOACTIVE)) {
+ bdi = lower_file->f_mapping->backing_dev_info;
+ lower_file->f_ra.ra_pages = bdi->ra_pages * 2;
+ spin_lock(&lower_file->f_lock);
+ lower_file->f_mode |= FMODE_NOACTIVE;
+ spin_unlock(&lower_file->f_lock);
+ }
+ }
+#endif
+
+ err = vfs_read(lower_file, buf, count, ppos);
+ /* update our inode atime upon a successful lower read */
+ if (err >= 0)
+ fsstack_copy_attr_atime(d_inode(dentry),
+ file_inode(lower_file));
+
+ return err;
+}
+
+static ssize_t sdcardfs_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ int err;
+ struct file *lower_file;
+ struct dentry *dentry = file->f_path.dentry;
+
+ /* check disk space */
+ if (!check_min_free_space(dentry, count, 0)) {
+ pr_err("No minimum free space.\n");
+ return -ENOSPC;
+ }
+
+ lower_file = sdcardfs_lower_file(file);
+ err = vfs_write(lower_file, buf, count, ppos);
+ /* update our inode times+sizes upon a successful lower write */
+ if (err >= 0) {
+ fsstack_copy_inode_size(d_inode(dentry),
+ file_inode(lower_file));
+ fsstack_copy_attr_times(d_inode(dentry),
+ file_inode(lower_file));
+ }
+
+ return err;
+}
+
+static int sdcardfs_readdir(struct file *file, struct dir_context *ctx)
+{
+ int err;
+ struct file *lower_file = NULL;
+ struct dentry *dentry = file->f_path.dentry;
+
+ lower_file = sdcardfs_lower_file(file);
+
+ lower_file->f_pos = file->f_pos;
+ err = iterate_dir(lower_file, ctx);
+ file->f_pos = lower_file->f_pos;
+ if (err >= 0) /* copy the atime */
+ fsstack_copy_attr_atime(d_inode(dentry),
+ file_inode(lower_file));
+ return err;
+}
+
+static long sdcardfs_unlocked_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ long err = -ENOTTY;
+ struct file *lower_file;
+
+ lower_file = sdcardfs_lower_file(file);
+
+ /* XXX: use vfs_ioctl if/when VFS exports it */
+ if (!lower_file || !lower_file->f_op)
+ goto out;
+ if (lower_file->f_op->unlocked_ioctl)
+ err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
+
+ /* some ioctls can change inode attributes (EXT2_IOC_SETFLAGS) */
+ if (!err)
+ sdcardfs_copy_and_fix_attrs(file_inode(file),
+ file_inode(lower_file));
+out:
+ return err;
+}
+
+#ifdef CONFIG_COMPAT
+static long sdcardfs_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ long err = -ENOTTY;
+ struct file *lower_file;
+
+ lower_file = sdcardfs_lower_file(file);
+
+ /* XXX: use vfs_ioctl if/when VFS exports it */
+ if (!lower_file || !lower_file->f_op)
+ goto out;
+ if (lower_file->f_op->compat_ioctl)
+ err = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
+
+out:
+ return err;
+}
+#endif
+
+static int sdcardfs_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ int err = 0;
+ bool willwrite;
+ struct file *lower_file;
+ const struct vm_operations_struct *saved_vm_ops = NULL;
+
+ /* this might be deferred to mmap's writepage */
+ willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags);
+
+ /*
+ * File systems which do not implement ->writepage may use
+ * generic_file_readonly_mmap as their ->mmap op. If you call
+ * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL.
+ * But we cannot call the lower ->mmap op, so we can't tell that
+ * writeable mappings won't work. Therefore, our only choice is to
+ * check if the lower file system supports the ->writepage, and if
+ * not, return EINVAL (the same error that
+ * generic_file_readonly_mmap returns in that case).
+ */
+ lower_file = sdcardfs_lower_file(file);
+ if (willwrite && !lower_file->f_mapping->a_ops->writepage) {
+ err = -EINVAL;
+ pr_err("sdcardfs: lower file system does not support writeable mmap\n");
+ goto out;
+ }
+
+ /*
+ * find and save lower vm_ops.
+ *
+ * XXX: the VFS should have a cleaner way of finding the lower vm_ops
+ */
+ if (!SDCARDFS_F(file)->lower_vm_ops) {
+ err = lower_file->f_op->mmap(lower_file, vma);
+ if (err) {
+ pr_err("sdcardfs: lower mmap failed %d\n", err);
+ goto out;
+ }
+ saved_vm_ops = vma->vm_ops; /* save: came from lower ->mmap */
+ }
+
+ /*
+ * Next 3 lines are all I need from generic_file_mmap. I definitely
+ * don't want its test for ->readpage which returns -ENOEXEC.
+ */
+ file_accessed(file);
+ vma->vm_ops = &sdcardfs_vm_ops;
+
+ file->f_mapping->a_ops = &sdcardfs_aops; /* set our aops */
+ if (!SDCARDFS_F(file)->lower_vm_ops) /* save for our ->fault */
+ SDCARDFS_F(file)->lower_vm_ops = saved_vm_ops;
+ vma->vm_private_data = file;
+ get_file(lower_file);
+ vma->vm_file = lower_file;
+
+out:
+ return err;
+}
+
+static int sdcardfs_open(struct inode *inode, struct file *file)
+{
+ int err = 0;
+ struct file *lower_file = NULL;
+ struct path lower_path;
+ struct dentry *dentry = file->f_path.dentry;
+ struct dentry *parent = dget_parent(dentry);
+ struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+ const struct cred *saved_cred = NULL;
+
+ /* don't open unhashed/deleted files */
+ if (d_unhashed(dentry)) {
+ err = -ENOENT;
+ goto out_err;
+ }
+
+ if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) {
+ err = -EACCES;
+ goto out_err;
+ }
+
+ /* save current_cred and override it */
+ OVERRIDE_CRED(sbi, saved_cred, SDCARDFS_I(inode));
+
+ file->private_data =
+ kzalloc(sizeof(struct sdcardfs_file_info), GFP_KERNEL);
+ if (!SDCARDFS_F(file)) {
+ err = -ENOMEM;
+ goto out_revert_cred;
+ }
+
+ /* open lower object and link sdcardfs's file struct to lower's */
+ sdcardfs_get_lower_path(file->f_path.dentry, &lower_path);
+ lower_file = dentry_open(&lower_path, file->f_flags, current_cred());
+ path_put(&lower_path);
+ if (IS_ERR(lower_file)) {
+ err = PTR_ERR(lower_file);
+ lower_file = sdcardfs_lower_file(file);
+ if (lower_file) {
+ sdcardfs_set_lower_file(file, NULL);
+ fput(lower_file); /* fput calls dput for lower_dentry */
+ }
+ } else {
+ sdcardfs_set_lower_file(file, lower_file);
+ }
+
+ if (err)
+ kfree(SDCARDFS_F(file));
+ else
+ sdcardfs_copy_and_fix_attrs(inode, sdcardfs_lower_inode(inode));
+
+out_revert_cred:
+ REVERT_CRED(saved_cred);
+out_err:
+ dput(parent);
+ return err;
+}
+
+static int sdcardfs_flush(struct file *file, fl_owner_t id)
+{
+ int err = 0;
+ struct file *lower_file = NULL;
+
+ lower_file = sdcardfs_lower_file(file);
+ if (lower_file && lower_file->f_op && lower_file->f_op->flush) {
+ filemap_write_and_wait(file->f_mapping);
+ err = lower_file->f_op->flush(lower_file, id);
+ }
+
+ return err;
+}
+
+/* release all lower object references & free the file info structure */
+static int sdcardfs_file_release(struct inode *inode, struct file *file)
+{
+ struct file *lower_file;
+
+ lower_file = sdcardfs_lower_file(file);
+ if (lower_file) {
+ sdcardfs_set_lower_file(file, NULL);
+ fput(lower_file);
+ }
+
+ kfree(SDCARDFS_F(file));
+ return 0;
+}
+
+static int sdcardfs_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
+{
+ int err;
+ struct file *lower_file;
+ struct path lower_path;
+ struct dentry *dentry = file->f_path.dentry;
+
+ err = __generic_file_fsync(file, start, end, datasync);
+ if (err)
+ goto out;
+
+ lower_file = sdcardfs_lower_file(file);
+ sdcardfs_get_lower_path(dentry, &lower_path);
+ err = vfs_fsync_range(lower_file, start, end, datasync);
+ sdcardfs_put_lower_path(dentry, &lower_path);
+out:
+ return err;
+}
+
+static int sdcardfs_fasync(int fd, struct file *file, int flag)
+{
+ int err = 0;
+ struct file *lower_file = NULL;
+
+ lower_file = sdcardfs_lower_file(file);
+ if (lower_file->f_op && lower_file->f_op->fasync)
+ err = lower_file->f_op->fasync(fd, lower_file, flag);
+
+ return err;
+}
+
+/*
+ * Sdcardfs cannot use generic_file_llseek as ->llseek, because it would
+ * only set the offset of the upper file. So we have to implement our
+ * own method to set both the upper and lower file offsets
+ * consistently.
+ */
+static loff_t sdcardfs_file_llseek(struct file *file, loff_t offset, int whence)
+{
+ int err;
+ struct file *lower_file;
+
+ err = generic_file_llseek(file, offset, whence);
+ if (err < 0)
+ goto out;
+
+ lower_file = sdcardfs_lower_file(file);
+ err = generic_file_llseek(lower_file, offset, whence);
+
+out:
+ return err;
+}
+
+/*
+ * Sdcardfs read_iter, redirect modified iocb to lower read_iter
+ */
+ssize_t sdcardfs_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ int err;
+ struct file *file = iocb->ki_filp, *lower_file;
+
+ lower_file = sdcardfs_lower_file(file);
+ if (!lower_file->f_op->read_iter) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ get_file(lower_file); /* prevent lower_file from being released */
+ iocb->ki_filp = lower_file;
+ err = lower_file->f_op->read_iter(iocb, iter);
+ iocb->ki_filp = file;
+ fput(lower_file);
+ /* update upper inode atime as needed */
+ if (err >= 0 || err == -EIOCBQUEUED)
+ fsstack_copy_attr_atime(file->f_path.dentry->d_inode,
+ file_inode(lower_file));
+out:
+ return err;
+}
+
+/*
+ * Sdcardfs write_iter, redirect modified iocb to lower write_iter
+ */
+ssize_t sdcardfs_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ int err;
+ struct file *file = iocb->ki_filp, *lower_file;
+
+ lower_file = sdcardfs_lower_file(file);
+ if (!lower_file->f_op->write_iter) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ get_file(lower_file); /* prevent lower_file from being released */
+ iocb->ki_filp = lower_file;
+ err = lower_file->f_op->write_iter(iocb, iter);
+ iocb->ki_filp = file;
+ fput(lower_file);
+ /* update upper inode times/sizes as needed */
+ if (err >= 0 || err == -EIOCBQUEUED) {
+ fsstack_copy_inode_size(file->f_path.dentry->d_inode,
+ file_inode(lower_file));
+ fsstack_copy_attr_times(file->f_path.dentry->d_inode,
+ file_inode(lower_file));
+ }
+out:
+ return err;
+}
+
+const struct file_operations sdcardfs_main_fops = {
+ .llseek = generic_file_llseek,
+ .read = sdcardfs_read,
+ .write = sdcardfs_write,
+ .unlocked_ioctl = sdcardfs_unlocked_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = sdcardfs_compat_ioctl,
+#endif
+ .mmap = sdcardfs_mmap,
+ .open = sdcardfs_open,
+ .flush = sdcardfs_flush,
+ .release = sdcardfs_file_release,
+ .fsync = sdcardfs_fsync,
+ .fasync = sdcardfs_fasync,
+ .read_iter = sdcardfs_read_iter,
+ .write_iter = sdcardfs_write_iter,
+};
+
+/* trimmed directory options */
+const struct file_operations sdcardfs_dir_fops = {
+ .llseek = sdcardfs_file_llseek,
+ .read = generic_read_dir,
+ .iterate = sdcardfs_readdir,
+ .unlocked_ioctl = sdcardfs_unlocked_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = sdcardfs_compat_ioctl,
+#endif
+ .open = sdcardfs_open,
+ .release = sdcardfs_file_release,
+ .flush = sdcardfs_flush,
+ .fsync = sdcardfs_fsync,
+ .fasync = sdcardfs_fasync,
+};
diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c
new file mode 100644
index 000000000000..f15cb11ca8fd
--- /dev/null
+++ b/fs/sdcardfs/inode.c
@@ -0,0 +1,912 @@
+/*
+ * fs/sdcardfs/inode.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ * Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009 Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed. It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#include <linux/fs_struct.h>
+#include <linux/ratelimit.h>
+
+/* Do not directly use this function. Use OVERRIDE_CRED() instead. */
+const struct cred *override_fsids(struct sdcardfs_sb_info *sbi, struct sdcardfs_inode_info *info)
+{
+ struct cred *cred;
+ const struct cred *old_cred;
+ uid_t uid;
+
+ cred = prepare_creds();
+ if (!cred)
+ return NULL;
+
+ if (info->under_obb)
+ uid = AID_MEDIA_OBB;
+ else
+ uid = multiuser_get_uid(info->userid, sbi->options.fs_low_uid);
+ cred->fsuid = make_kuid(&init_user_ns, uid);
+ cred->fsgid = make_kgid(&init_user_ns, sbi->options.fs_low_gid);
+
+ old_cred = override_creds(cred);
+
+ return old_cred;
+}
+
+/* Do not directly use this function, use REVERT_CRED() instead. */
+void revert_fsids(const struct cred *old_cred)
+{
+ const struct cred *cur_cred;
+
+ cur_cred = current->cred;
+ revert_creds(old_cred);
+ put_cred(cur_cred);
+}
+
+static int sdcardfs_create(struct inode *dir, struct dentry *dentry,
+ umode_t mode, bool want_excl)
+{
+ int err;
+ struct dentry *lower_dentry;
+ struct vfsmount *lower_dentry_mnt;
+ struct dentry *lower_parent_dentry = NULL;
+ struct path lower_path;
+ const struct cred *saved_cred = NULL;
+ struct fs_struct *saved_fs;
+ struct fs_struct *copied_fs;
+
+ if (!check_caller_access_to_name(dir, &dentry->d_name)) {
+ err = -EACCES;
+ goto out_eacces;
+ }
+
+ /* save current_cred and override it */
+ OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir));
+
+ sdcardfs_get_lower_path(dentry, &lower_path);
+ lower_dentry = lower_path.dentry;
+ lower_dentry_mnt = lower_path.mnt;
+ lower_parent_dentry = lock_parent(lower_dentry);
+
+ /* set last 16bytes of mode field to 0664 */
+ mode = (mode & S_IFMT) | 00664;
+
+ /* temporarily change umask for lower fs write */
+ saved_fs = current->fs;
+ copied_fs = copy_fs_struct(current->fs);
+ if (!copied_fs) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+ current->fs = copied_fs;
+ current->fs->umask = 0;
+ err = vfs_create2(lower_dentry_mnt, d_inode(lower_parent_dentry), lower_dentry, mode, want_excl);
+ if (err)
+ goto out;
+
+ err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path, SDCARDFS_I(dir)->userid);
+ if (err)
+ goto out;
+ fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
+ fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry));
+ fixup_lower_ownership(dentry, dentry->d_name.name);
+
+out:
+ current->fs = saved_fs;
+ free_fs_struct(copied_fs);
+out_unlock:
+ unlock_dir(lower_parent_dentry);
+ sdcardfs_put_lower_path(dentry, &lower_path);
+ REVERT_CRED(saved_cred);
+out_eacces:
+ return err;
+}
+
+#if 0
+static int sdcardfs_link(struct dentry *old_dentry, struct inode *dir,
+ struct dentry *new_dentry)
+{
+ struct dentry *lower_old_dentry;
+ struct dentry *lower_new_dentry;
+ struct dentry *lower_dir_dentry;
+ u64 file_size_save;
+ int err;
+ struct path lower_old_path, lower_new_path;
+
+ OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb));
+
+ file_size_save = i_size_read(d_inode(old_dentry));
+ sdcardfs_get_lower_path(old_dentry, &lower_old_path);
+ sdcardfs_get_lower_path(new_dentry, &lower_new_path);
+ lower_old_dentry = lower_old_path.dentry;
+ lower_new_dentry = lower_new_path.dentry;
+ lower_dir_dentry = lock_parent(lower_new_dentry);
+
+ err = vfs_link(lower_old_dentry, d_inode(lower_dir_dentry),
+ lower_new_dentry, NULL);
+ if (err || !d_inode(lower_new_dentry))
+ goto out;
+
+ err = sdcardfs_interpose(new_dentry, dir->i_sb, &lower_new_path);
+ if (err)
+ goto out;
+ fsstack_copy_attr_times(dir, d_inode(lower_new_dentry));
+ fsstack_copy_inode_size(dir, d_inode(lower_new_dentry));
+ set_nlink(d_inode(old_dentry),
+ sdcardfs_lower_inode(d_inode(old_dentry))->i_nlink);
+ i_size_write(d_inode(new_dentry), file_size_save);
+out:
+ unlock_dir(lower_dir_dentry);
+ sdcardfs_put_lower_path(old_dentry, &lower_old_path);
+ sdcardfs_put_lower_path(new_dentry, &lower_new_path);
+ REVERT_CRED();
+ return err;
+}
+#endif
+
+static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+ int err;
+ struct dentry *lower_dentry;
+ struct vfsmount *lower_mnt;
+ struct inode *lower_dir_inode = sdcardfs_lower_inode(dir);
+ struct dentry *lower_dir_dentry;
+ struct path lower_path;
+ const struct cred *saved_cred = NULL;
+
+ if (!check_caller_access_to_name(dir, &dentry->d_name)) {
+ err = -EACCES;
+ goto out_eacces;
+ }
+
+ /* save current_cred and override it */
+ OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir));
+
+ sdcardfs_get_lower_path(dentry, &lower_path);
+ lower_dentry = lower_path.dentry;
+ lower_mnt = lower_path.mnt;
+ dget(lower_dentry);
+ lower_dir_dentry = lock_parent(lower_dentry);
+
+ err = vfs_unlink2(lower_mnt, lower_dir_inode, lower_dentry, NULL);
+
+ /*
+ * Note: unlinking on top of NFS can cause silly-renamed files.
+ * Trying to delete such files results in EBUSY from NFS
+ * below. Silly-renamed files will get deleted by NFS later on, so
+ * we just need to detect them here and treat such EBUSY errors as
+ * if the upper file was successfully deleted.
+ */
+ if (err == -EBUSY && lower_dentry->d_flags & DCACHE_NFSFS_RENAMED)
+ err = 0;
+ if (err)
+ goto out;
+ fsstack_copy_attr_times(dir, lower_dir_inode);
+ fsstack_copy_inode_size(dir, lower_dir_inode);
+ set_nlink(d_inode(dentry),
+ sdcardfs_lower_inode(d_inode(dentry))->i_nlink);
+ d_inode(dentry)->i_ctime = dir->i_ctime;
+ d_drop(dentry); /* this is needed, else LTP fails (VFS won't do it) */
+out:
+ unlock_dir(lower_dir_dentry);
+ dput(lower_dentry);
+ sdcardfs_put_lower_path(dentry, &lower_path);
+ REVERT_CRED(saved_cred);
+out_eacces:
+ return err;
+}
+
+#if 0
+static int sdcardfs_symlink(struct inode *dir, struct dentry *dentry,
+ const char *symname)
+{
+ int err;
+ struct dentry *lower_dentry;
+ struct dentry *lower_parent_dentry = NULL;
+ struct path lower_path;
+
+ OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb));
+
+ sdcardfs_get_lower_path(dentry, &lower_path);
+ lower_dentry = lower_path.dentry;
+ lower_parent_dentry = lock_parent(lower_dentry);
+
+ err = vfs_symlink(d_inode(lower_parent_dentry), lower_dentry, symname);
+ if (err)
+ goto out;
+ err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path);
+ if (err)
+ goto out;
+ fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
+ fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry));
+
+out:
+ unlock_dir(lower_parent_dentry);
+ sdcardfs_put_lower_path(dentry, &lower_path);
+ REVERT_CRED();
+ return err;
+}
+#endif
+
+static int touch(char *abs_path, mode_t mode)
+{
+ struct file *filp = filp_open(abs_path, O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW, mode);
+
+ if (IS_ERR(filp)) {
+ if (PTR_ERR(filp) == -EEXIST) {
+ return 0;
+ } else {
+ pr_err("sdcardfs: failed to open(%s): %ld\n",
+ abs_path, PTR_ERR(filp));
+ return PTR_ERR(filp);
+ }
+ }
+ filp_close(filp, current->files);
+ return 0;
+}
+
+static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ int err;
+ int make_nomedia_in_obb = 0;
+ struct dentry *lower_dentry;
+ struct vfsmount *lower_mnt;
+ struct dentry *lower_parent_dentry = NULL;
+ struct path lower_path;
+ struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+ const struct cred *saved_cred = NULL;
+ struct sdcardfs_inode_info *pi = SDCARDFS_I(dir);
+ int touch_err = 0;
+ struct fs_struct *saved_fs;
+ struct fs_struct *copied_fs;
+ struct qstr q_obb = QSTR_LITERAL("obb");
+ struct qstr q_data = QSTR_LITERAL("data");
+
+ if (!check_caller_access_to_name(dir, &dentry->d_name)) {
+ err = -EACCES;
+ goto out_eacces;
+ }
+
+ /* save current_cred and override it */
+ OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir));
+
+ /* check disk space */
+ if (!check_min_free_space(dentry, 0, 1)) {
+ pr_err("sdcardfs: No minimum free space.\n");
+ err = -ENOSPC;
+ goto out_revert;
+ }
+
+ /* the lower_dentry is negative here */
+ sdcardfs_get_lower_path(dentry, &lower_path);
+ lower_dentry = lower_path.dentry;
+ lower_mnt = lower_path.mnt;
+ lower_parent_dentry = lock_parent(lower_dentry);
+
+ /* set last 16bytes of mode field to 0775 */
+ mode = (mode & S_IFMT) | 00775;
+
+ /* temporarily change umask for lower fs write */
+ saved_fs = current->fs;
+ copied_fs = copy_fs_struct(current->fs);
+ if (!copied_fs) {
+ err = -ENOMEM;
+ unlock_dir(lower_parent_dentry);
+ goto out_unlock;
+ }
+ current->fs = copied_fs;
+ current->fs->umask = 0;
+ err = vfs_mkdir2(lower_mnt, d_inode(lower_parent_dentry), lower_dentry, mode);
+
+ if (err) {
+ unlock_dir(lower_parent_dentry);
+ goto out;
+ }
+
+ /* if it is a local obb dentry, setup it with the base obbpath */
+ if (need_graft_path(dentry)) {
+
+ err = setup_obb_dentry(dentry, &lower_path);
+ if (err) {
+ /* if the sbi->obbpath is not available, the lower_path won't be
+ * changed by setup_obb_dentry() but the lower path is saved to
+ * its orig_path. this dentry will be revalidated later.
+ * but now, the lower_path should be NULL
+ */
+ sdcardfs_put_reset_lower_path(dentry);
+
+ /* the newly created lower path which saved to its orig_path or
+ * the lower_path is the base obbpath.
+ * therefore, an additional path_get is required
+ */
+ path_get(&lower_path);
+ } else
+ make_nomedia_in_obb = 1;
+ }
+
+ err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path, pi->userid);
+ if (err) {
+ unlock_dir(lower_parent_dentry);
+ goto out;
+ }
+
+ fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
+ fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry));
+ /* update number of links on parent directory */
+ set_nlink(dir, sdcardfs_lower_inode(dir)->i_nlink);
+ fixup_lower_ownership(dentry, dentry->d_name.name);
+ unlock_dir(lower_parent_dentry);
+ if ((!sbi->options.multiuser) && (qstr_case_eq(&dentry->d_name, &q_obb))
+ && (pi->perm == PERM_ANDROID) && (pi->userid == 0))
+ make_nomedia_in_obb = 1;
+
+ /* When creating /Android/data and /Android/obb, mark them as .nomedia */
+ if (make_nomedia_in_obb ||
+ ((pi->perm == PERM_ANDROID) && (qstr_case_eq(&dentry->d_name, &q_data)))) {
+ REVERT_CRED(saved_cred);
+ OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(d_inode(dentry)));
+ set_fs_pwd(current->fs, &lower_path);
+ touch_err = touch(".nomedia", 0664);
+ if (touch_err) {
+ pr_err("sdcardfs: failed to create .nomedia in %s: %d\n",
+ lower_path.dentry->d_name.name, touch_err);
+ goto out;
+ }
+ }
+out:
+ current->fs = saved_fs;
+ free_fs_struct(copied_fs);
+out_unlock:
+ sdcardfs_put_lower_path(dentry, &lower_path);
+out_revert:
+ REVERT_CRED(saved_cred);
+out_eacces:
+ return err;
+}
+
+static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+ struct dentry *lower_dentry;
+ struct dentry *lower_dir_dentry;
+ struct vfsmount *lower_mnt;
+ int err;
+ struct path lower_path;
+ const struct cred *saved_cred = NULL;
+
+ if (!check_caller_access_to_name(dir, &dentry->d_name)) {
+ err = -EACCES;
+ goto out_eacces;
+ }
+
+ /* save current_cred and override it */
+ OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir));
+
+ /* sdcardfs_get_real_lower(): in case of remove an user's obb dentry
+ * the dentry on the original path should be deleted.
+ */
+ sdcardfs_get_real_lower(dentry, &lower_path);
+
+ lower_dentry = lower_path.dentry;
+ lower_mnt = lower_path.mnt;
+ lower_dir_dentry = lock_parent(lower_dentry);
+
+ err = vfs_rmdir2(lower_mnt, d_inode(lower_dir_dentry), lower_dentry);
+ if (err)
+ goto out;
+
+ d_drop(dentry); /* drop our dentry on success (why not VFS's job?) */
+ if (d_inode(dentry))
+ clear_nlink(d_inode(dentry));
+ fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry));
+ fsstack_copy_inode_size(dir, d_inode(lower_dir_dentry));
+ set_nlink(dir, d_inode(lower_dir_dentry)->i_nlink);
+
+out:
+ unlock_dir(lower_dir_dentry);
+ sdcardfs_put_real_lower(dentry, &lower_path);
+ REVERT_CRED(saved_cred);
+out_eacces:
+ return err;
+}
+
+#if 0
+static int sdcardfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
+ dev_t dev)
+{
+ int err;
+ struct dentry *lower_dentry;
+ struct dentry *lower_parent_dentry = NULL;
+ struct path lower_path;
+
+ OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb));
+
+ sdcardfs_get_lower_path(dentry, &lower_path);
+ lower_dentry = lower_path.dentry;
+ lower_parent_dentry = lock_parent(lower_dentry);
+
+ err = vfs_mknod(d_inode(lower_parent_dentry), lower_dentry, mode, dev);
+ if (err)
+ goto out;
+
+ err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path);
+ if (err)
+ goto out;
+ fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
+ fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry));
+
+out:
+ unlock_dir(lower_parent_dentry);
+ sdcardfs_put_lower_path(dentry, &lower_path);
+ REVERT_CRED();
+ return err;
+}
+#endif
+
+/*
+ * The locking rules in sdcardfs_rename are complex. We could use a simpler
+ * superblock-level name-space lock for renames and copy-ups.
+ */
+static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ int err = 0;
+ struct dentry *lower_old_dentry = NULL;
+ struct dentry *lower_new_dentry = NULL;
+ struct dentry *lower_old_dir_dentry = NULL;
+ struct dentry *lower_new_dir_dentry = NULL;
+ struct vfsmount *lower_mnt = NULL;
+ struct dentry *trap = NULL;
+ struct path lower_old_path, lower_new_path;
+ const struct cred *saved_cred = NULL;
+
+ if (!check_caller_access_to_name(old_dir, &old_dentry->d_name) ||
+ !check_caller_access_to_name(new_dir, &new_dentry->d_name)) {
+ err = -EACCES;
+ goto out_eacces;
+ }
+
+ /* save current_cred and override it */
+ OVERRIDE_CRED(SDCARDFS_SB(old_dir->i_sb), saved_cred, SDCARDFS_I(new_dir));
+
+ sdcardfs_get_real_lower(old_dentry, &lower_old_path);
+ sdcardfs_get_lower_path(new_dentry, &lower_new_path);
+ lower_old_dentry = lower_old_path.dentry;
+ lower_new_dentry = lower_new_path.dentry;
+ lower_mnt = lower_old_path.mnt;
+ lower_old_dir_dentry = dget_parent(lower_old_dentry);
+ lower_new_dir_dentry = dget_parent(lower_new_dentry);
+
+ trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+ /* source should not be ancestor of target */
+ if (trap == lower_old_dentry) {
+ err = -EINVAL;
+ goto out;
+ }
+ /* target should not be ancestor of source */
+ if (trap == lower_new_dentry) {
+ err = -ENOTEMPTY;
+ goto out;
+ }
+
+ err = vfs_rename2(lower_mnt,
+ d_inode(lower_old_dir_dentry), lower_old_dentry,
+ d_inode(lower_new_dir_dentry), lower_new_dentry,
+ NULL, 0);
+ if (err)
+ goto out;
+
+ /* Copy attrs from lower dir, but i_uid/i_gid */
+ sdcardfs_copy_and_fix_attrs(new_dir, d_inode(lower_new_dir_dentry));
+ fsstack_copy_inode_size(new_dir, d_inode(lower_new_dir_dentry));
+
+ if (new_dir != old_dir) {
+ sdcardfs_copy_and_fix_attrs(old_dir, d_inode(lower_old_dir_dentry));
+ fsstack_copy_inode_size(old_dir, d_inode(lower_old_dir_dentry));
+ }
+ get_derived_permission_new(new_dentry->d_parent, old_dentry, &new_dentry->d_name);
+ fixup_tmp_permissions(d_inode(old_dentry));
+ fixup_lower_ownership(old_dentry, new_dentry->d_name.name);
+ d_invalidate(old_dentry); /* Can't fixup ownership recursively :( */
+out:
+ unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+ dput(lower_old_dir_dentry);
+ dput(lower_new_dir_dentry);
+ sdcardfs_put_real_lower(old_dentry, &lower_old_path);
+ sdcardfs_put_lower_path(new_dentry, &lower_new_path);
+ REVERT_CRED(saved_cred);
+out_eacces:
+ return err;
+}
+
+#if 0
+static int sdcardfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
+{
+ int err;
+ struct dentry *lower_dentry;
+ struct path lower_path;
+ /* XXX readlink does not requires overriding credential */
+
+ sdcardfs_get_lower_path(dentry, &lower_path);
+ lower_dentry = lower_path.dentry;
+ if (!d_inode(lower_dentry)->i_op ||
+ !d_inode(lower_dentry)->i_op->readlink) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = d_inode(lower_dentry)->i_op->readlink(lower_dentry,
+ buf, bufsiz);
+ if (err < 0)
+ goto out;
+ fsstack_copy_attr_atime(d_inode(dentry), d_inode(lower_dentry));
+
+out:
+ sdcardfs_put_lower_path(dentry, &lower_path);
+ return err;
+}
+#endif
+
+#if 0
+static const char *sdcardfs_follow_link(struct dentry *dentry, void **cookie)
+{
+ char *buf;
+ int len = PAGE_SIZE, err;
+ mm_segment_t old_fs;
+
+ /* This is freed by the put_link method assuming a successful call. */
+ buf = kmalloc(len, GFP_KERNEL);
+ if (!buf) {
+ buf = ERR_PTR(-ENOMEM);
+ return buf;
+ }
+
+ /* read the symlink, and then we will follow it */
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ err = sdcardfs_readlink(dentry, buf, len);
+ set_fs(old_fs);
+ if (err < 0) {
+ kfree(buf);
+ buf = ERR_PTR(err);
+ } else {
+ buf[err] = '\0';
+ }
+ return *cookie = buf;
+}
+#endif
+
+static int sdcardfs_permission_wrn(struct inode *inode, int mask)
+{
+ WARN_RATELIMIT(1, "sdcardfs does not support permission. Use permission2.\n");
+ return -EINVAL;
+}
+
+void copy_attrs(struct inode *dest, const struct inode *src)
+{
+ dest->i_mode = src->i_mode;
+ dest->i_uid = src->i_uid;
+ dest->i_gid = src->i_gid;
+ dest->i_rdev = src->i_rdev;
+ dest->i_atime = src->i_atime;
+ dest->i_mtime = src->i_mtime;
+ dest->i_ctime = src->i_ctime;
+ dest->i_blkbits = src->i_blkbits;
+ dest->i_flags = src->i_flags;
+#ifdef CONFIG_FS_POSIX_ACL
+ dest->i_acl = src->i_acl;
+#endif
+#ifdef CONFIG_SECURITY
+ dest->i_security = src->i_security;
+#endif
+}
+
+static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int mask)
+{
+ int err;
+ struct inode tmp;
+ struct inode *top = grab_top(SDCARDFS_I(inode));
+
+ if (!top) {
+ release_top(SDCARDFS_I(inode));
+ WARN(1, "Top value was null!\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Permission check on sdcardfs inode.
+ * Calling process should have AID_SDCARD_RW permission
+ * Since generic_permission only needs i_mode, i_uid,
+ * i_gid, and i_sb, we can create a fake inode to pass
+ * this information down in.
+ *
+ * The underlying code may attempt to take locks in some
+ * cases for features we're not using, but if that changes,
+ * locks must be dealt with to avoid undefined behavior.
+ */
+ copy_attrs(&tmp, inode);
+ tmp.i_uid = make_kuid(&init_user_ns, SDCARDFS_I(top)->d_uid);
+ tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, SDCARDFS_I(top)));
+ tmp.i_mode = (inode->i_mode & S_IFMT) | get_mode(mnt, SDCARDFS_I(top));
+ release_top(SDCARDFS_I(inode));
+ tmp.i_sb = inode->i_sb;
+ if (IS_POSIXACL(inode))
+ pr_warn("%s: This may be undefined behavior...\n", __func__);
+ err = generic_permission(&tmp, mask);
+ /* XXX
+ * Original sdcardfs code calls inode_permission(lower_inode,.. )
+ * for checking inode permission. But doing such things here seems
+ * duplicated work, because the functions called after this func,
+ * such as vfs_create, vfs_unlink, vfs_rename, and etc,
+ * does exactly same thing, i.e., they calls inode_permission().
+ * So we just let they do the things.
+ * If there are any security hole, just uncomment following if block.
+ */
+#if 0
+ if (!err) {
+ /*
+ * Permission check on lower_inode(=EXT4).
+ * we check it with AID_MEDIA_RW permission
+ */
+ struct inode *lower_inode;
+
+ OVERRIDE_CRED(SDCARDFS_SB(inode->sb));
+
+ lower_inode = sdcardfs_lower_inode(inode);
+ err = inode_permission(lower_inode, mask);
+
+ REVERT_CRED();
+ }
+#endif
+ return err;
+
+}
+
+static int sdcardfs_setattr_wrn(struct dentry *dentry, struct iattr *ia)
+{
+ WARN_RATELIMIT(1, "sdcardfs does not support setattr. User setattr2.\n");
+ return -EINVAL;
+}
+
+static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct iattr *ia)
+{
+ int err;
+ struct dentry *lower_dentry;
+ struct vfsmount *lower_mnt;
+ struct inode *inode;
+ struct inode *lower_inode;
+ struct path lower_path;
+ struct iattr lower_ia;
+ struct dentry *parent;
+ struct inode tmp;
+ struct inode *top;
+ const struct cred *saved_cred = NULL;
+
+ inode = d_inode(dentry);
+ top = grab_top(SDCARDFS_I(inode));
+
+ if (!top) {
+ release_top(SDCARDFS_I(inode));
+ return -EINVAL;
+ }
+
+ /*
+ * Permission check on sdcardfs inode.
+ * Calling process should have AID_SDCARD_RW permission
+ * Since generic_permission only needs i_mode, i_uid,
+ * i_gid, and i_sb, we can create a fake inode to pass
+ * this information down in.
+ *
+ * The underlying code may attempt to take locks in some
+ * cases for features we're not using, but if that changes,
+ * locks must be dealt with to avoid undefined behavior.
+ *
+ */
+ copy_attrs(&tmp, inode);
+ tmp.i_uid = make_kuid(&init_user_ns, SDCARDFS_I(top)->d_uid);
+ tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, SDCARDFS_I(top)));
+ tmp.i_mode = (inode->i_mode & S_IFMT) | get_mode(mnt, SDCARDFS_I(top));
+ tmp.i_size = i_size_read(inode);
+ release_top(SDCARDFS_I(inode));
+ tmp.i_sb = inode->i_sb;
+
+ /*
+ * Check if user has permission to change inode. We don't check if
+ * this user can change the lower inode: that should happen when
+ * calling notify_change on the lower inode.
+ */
+ /* prepare our own lower struct iattr (with the lower file) */
+ memcpy(&lower_ia, ia, sizeof(lower_ia));
+ /* Allow touch updating timestamps. A previous permission check ensures
+ * we have write access. Changes to mode, owner, and group are ignored
+ */
+ ia->ia_valid |= ATTR_FORCE;
+ err = inode_change_ok(&tmp, ia);
+
+ if (!err) {
+ /* check the Android group ID */
+ parent = dget_parent(dentry);
+ if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name))
+ err = -EACCES;
+ dput(parent);
+ }
+
+ if (err)
+ goto out_err;
+
+ /* save current_cred and override it */
+ OVERRIDE_CRED(SDCARDFS_SB(dentry->d_sb), saved_cred, SDCARDFS_I(inode));
+
+ sdcardfs_get_lower_path(dentry, &lower_path);
+ lower_dentry = lower_path.dentry;
+ lower_mnt = lower_path.mnt;
+ lower_inode = sdcardfs_lower_inode(inode);
+
+ if (ia->ia_valid & ATTR_FILE)
+ lower_ia.ia_file = sdcardfs_lower_file(ia->ia_file);
+
+ lower_ia.ia_valid &= ~(ATTR_UID | ATTR_GID | ATTR_MODE);
+
+ /*
+ * If shrinking, first truncate upper level to cancel writing dirty
+ * pages beyond the new eof; and also if its' maxbytes is more
+ * limiting (fail with -EFBIG before making any change to the lower
+ * level). There is no need to vmtruncate the upper level
+ * afterwards in the other cases: we fsstack_copy_inode_size from
+ * the lower level.
+ */
+ if (current->mm)
+ down_write(&current->mm->mmap_sem);
+ if (ia->ia_valid & ATTR_SIZE) {
+ err = inode_newsize_ok(&tmp, ia->ia_size);
+ if (err) {
+ if (current->mm)
+ up_write(&current->mm->mmap_sem);
+ goto out;
+ }
+ truncate_setsize(inode, ia->ia_size);
+ }
+
+ /*
+ * mode change is for clearing setuid/setgid bits. Allow lower fs
+ * to interpret this in its own way.
+ */
+ if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
+ lower_ia.ia_valid &= ~ATTR_MODE;
+
+ /* notify the (possibly copied-up) lower inode */
+ /*
+ * Note: we use d_inode(lower_dentry), because lower_inode may be
+ * unlinked (no inode->i_sb and i_ino==0. This happens if someone
+ * tries to open(), unlink(), then ftruncate() a file.
+ */
+ mutex_lock(&d_inode(lower_dentry)->i_mutex);
+ err = notify_change2(lower_mnt, lower_dentry, &lower_ia, /* note: lower_ia */
+ NULL);
+ mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+ if (current->mm)
+ up_write(&current->mm->mmap_sem);
+ if (err)
+ goto out;
+
+ /* get attributes from the lower inode and update derived permissions */
+ sdcardfs_copy_and_fix_attrs(inode, lower_inode);
+
+ /*
+ * Not running fsstack_copy_inode_size(inode, lower_inode), because
+ * VFS should update our inode size, and notify_change on
+ * lower_inode should update its size.
+ */
+
+out:
+ sdcardfs_put_lower_path(dentry, &lower_path);
+ REVERT_CRED(saved_cred);
+out_err:
+ return err;
+}
+
+static int sdcardfs_fillattr(struct vfsmount *mnt,
+ struct inode *inode, struct kstat *stat)
+{
+ struct sdcardfs_inode_info *info = SDCARDFS_I(inode);
+ struct inode *top = grab_top(info);
+
+ if (!top)
+ return -EINVAL;
+
+ stat->dev = inode->i_sb->s_dev;
+ stat->ino = inode->i_ino;
+ stat->mode = (inode->i_mode & S_IFMT) | get_mode(mnt, SDCARDFS_I(top));
+ stat->nlink = inode->i_nlink;
+ stat->uid = make_kuid(&init_user_ns, SDCARDFS_I(top)->d_uid);
+ stat->gid = make_kgid(&init_user_ns, get_gid(mnt, SDCARDFS_I(top)));
+ stat->rdev = inode->i_rdev;
+ stat->size = i_size_read(inode);
+ stat->atime = inode->i_atime;
+ stat->mtime = inode->i_mtime;
+ stat->ctime = inode->i_ctime;
+ stat->blksize = (1 << inode->i_blkbits);
+ stat->blocks = inode->i_blocks;
+ release_top(info);
+ return 0;
+}
+
+static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ struct kstat lower_stat;
+ struct path lower_path;
+ struct dentry *parent;
+ int err;
+
+ parent = dget_parent(dentry);
+ if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) {
+ dput(parent);
+ return -EACCES;
+ }
+ dput(parent);
+
+ sdcardfs_get_lower_path(dentry, &lower_path);
+ err = vfs_getattr(&lower_path, &lower_stat);
+ if (err)
+ goto out;
+ sdcardfs_copy_and_fix_attrs(d_inode(dentry),
+ d_inode(lower_path.dentry));
+ err = sdcardfs_fillattr(mnt, d_inode(dentry), stat);
+ stat->blocks = lower_stat.blocks;
+out:
+ sdcardfs_put_lower_path(dentry, &lower_path);
+ return err;
+}
+
+const struct inode_operations sdcardfs_symlink_iops = {
+ .permission2 = sdcardfs_permission,
+ .setattr2 = sdcardfs_setattr,
+ /* XXX Following operations are implemented,
+ * but FUSE(sdcard) or FAT does not support them
+ * These methods are *NOT* perfectly tested.
+ .readlink = sdcardfs_readlink,
+ .follow_link = sdcardfs_follow_link,
+ .put_link = kfree_put_link,
+ */
+};
+
+const struct inode_operations sdcardfs_dir_iops = {
+ .create = sdcardfs_create,
+ .lookup = sdcardfs_lookup,
+ .permission = sdcardfs_permission_wrn,
+ .permission2 = sdcardfs_permission,
+ .unlink = sdcardfs_unlink,
+ .mkdir = sdcardfs_mkdir,
+ .rmdir = sdcardfs_rmdir,
+ .rename = sdcardfs_rename,
+ .setattr = sdcardfs_setattr_wrn,
+ .setattr2 = sdcardfs_setattr,
+ .getattr = sdcardfs_getattr,
+ /* XXX Following operations are implemented,
+ * but FUSE(sdcard) or FAT does not support them
+ * These methods are *NOT* perfectly tested.
+ .symlink = sdcardfs_symlink,
+ .link = sdcardfs_link,
+ .mknod = sdcardfs_mknod,
+ */
+};
+
+const struct inode_operations sdcardfs_main_iops = {
+ .permission = sdcardfs_permission_wrn,
+ .permission2 = sdcardfs_permission,
+ .setattr = sdcardfs_setattr_wrn,
+ .setattr2 = sdcardfs_setattr,
+ .getattr = sdcardfs_getattr,
+};
diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c
new file mode 100644
index 000000000000..509d5fbcb472
--- /dev/null
+++ b/fs/sdcardfs/lookup.c
@@ -0,0 +1,464 @@
+/*
+ * fs/sdcardfs/lookup.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ * Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009 Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed. It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#include "linux/delay.h"
+
+/* The dentry cache is just so we have properly sized dentries */
+static struct kmem_cache *sdcardfs_dentry_cachep;
+
+int sdcardfs_init_dentry_cache(void)
+{
+ sdcardfs_dentry_cachep =
+ kmem_cache_create("sdcardfs_dentry",
+ sizeof(struct sdcardfs_dentry_info),
+ 0, SLAB_RECLAIM_ACCOUNT, NULL);
+
+ return sdcardfs_dentry_cachep ? 0 : -ENOMEM;
+}
+
+void sdcardfs_destroy_dentry_cache(void)
+{
+ kmem_cache_destroy(sdcardfs_dentry_cachep);
+}
+
+void free_dentry_private_data(struct dentry *dentry)
+{
+ if (!dentry || !dentry->d_fsdata)
+ return;
+ kmem_cache_free(sdcardfs_dentry_cachep, dentry->d_fsdata);
+ dentry->d_fsdata = NULL;
+}
+
+/* allocate new dentry private data */
+int new_dentry_private_data(struct dentry *dentry)
+{
+ struct sdcardfs_dentry_info *info = SDCARDFS_D(dentry);
+
+ /* use zalloc to init dentry_info.lower_path */
+ info = kmem_cache_zalloc(sdcardfs_dentry_cachep, GFP_ATOMIC);
+ if (!info)
+ return -ENOMEM;
+
+ spin_lock_init(&info->lock);
+ dentry->d_fsdata = info;
+
+ return 0;
+}
+
+struct inode_data {
+ struct inode *lower_inode;
+ userid_t id;
+};
+
+static int sdcardfs_inode_test(struct inode *inode, void *candidate_data/*void *candidate_lower_inode*/)
+{
+ struct inode *current_lower_inode = sdcardfs_lower_inode(inode);
+ userid_t current_userid = SDCARDFS_I(inode)->userid;
+
+ if (current_lower_inode == ((struct inode_data *)candidate_data)->lower_inode &&
+ current_userid == ((struct inode_data *)candidate_data)->id)
+ return 1; /* found a match */
+ else
+ return 0; /* no match */
+}
+
+static int sdcardfs_inode_set(struct inode *inode, void *lower_inode)
+{
+ /* we do actual inode initialization in sdcardfs_iget */
+ return 0;
+}
+
+struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, userid_t id)
+{
+ struct sdcardfs_inode_info *info;
+ struct inode_data data;
+ struct inode *inode; /* the new inode to return */
+
+ if (!igrab(lower_inode))
+ return ERR_PTR(-ESTALE);
+
+ data.id = id;
+ data.lower_inode = lower_inode;
+ inode = iget5_locked(sb, /* our superblock */
+ /*
+ * hashval: we use inode number, but we can
+ * also use "(unsigned long)lower_inode"
+ * instead.
+ */
+ lower_inode->i_ino, /* hashval */
+ sdcardfs_inode_test, /* inode comparison function */
+ sdcardfs_inode_set, /* inode init function */
+ &data); /* data passed to test+set fxns */
+ if (!inode) {
+ iput(lower_inode);
+ return ERR_PTR(-ENOMEM);
+ }
+ /* if found a cached inode, then just return it (after iput) */
+ if (!(inode->i_state & I_NEW)) {
+ iput(lower_inode);
+ return inode;
+ }
+
+ /* initialize new inode */
+ info = SDCARDFS_I(inode);
+
+ inode->i_ino = lower_inode->i_ino;
+ sdcardfs_set_lower_inode(inode, lower_inode);
+
+ inode->i_version++;
+
+ /* use different set of inode ops for symlinks & directories */
+ if (S_ISDIR(lower_inode->i_mode))
+ inode->i_op = &sdcardfs_dir_iops;
+ else if (S_ISLNK(lower_inode->i_mode))
+ inode->i_op = &sdcardfs_symlink_iops;
+ else
+ inode->i_op = &sdcardfs_main_iops;
+
+ /* use different set of file ops for directories */
+ if (S_ISDIR(lower_inode->i_mode))
+ inode->i_fop = &sdcardfs_dir_fops;
+ else
+ inode->i_fop = &sdcardfs_main_fops;
+
+ inode->i_mapping->a_ops = &sdcardfs_aops;
+
+ inode->i_atime.tv_sec = 0;
+ inode->i_atime.tv_nsec = 0;
+ inode->i_mtime.tv_sec = 0;
+ inode->i_mtime.tv_nsec = 0;
+ inode->i_ctime.tv_sec = 0;
+ inode->i_ctime.tv_nsec = 0;
+
+ /* properly initialize special inodes */
+ if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) ||
+ S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode))
+ init_special_inode(inode, lower_inode->i_mode,
+ lower_inode->i_rdev);
+
+ /* all well, copy inode attributes */
+ sdcardfs_copy_and_fix_attrs(inode, lower_inode);
+ fsstack_copy_inode_size(inode, lower_inode);
+
+ unlock_new_inode(inode);
+ return inode;
+}
+
+/*
+ * Helper interpose routine, called directly by ->lookup to handle
+ * spliced dentries.
+ */
+static struct dentry *__sdcardfs_interpose(struct dentry *dentry,
+ struct super_block *sb,
+ struct path *lower_path,
+ userid_t id)
+{
+ struct inode *inode;
+ struct inode *lower_inode;
+ struct super_block *lower_sb;
+ struct dentry *ret_dentry;
+
+ lower_inode = d_inode(lower_path->dentry);
+ lower_sb = sdcardfs_lower_super(sb);
+
+ /* check that the lower file system didn't cross a mount point */
+ if (lower_inode->i_sb != lower_sb) {
+ ret_dentry = ERR_PTR(-EXDEV);
+ goto out;
+ }
+
+ /*
+ * We allocate our new inode below by calling sdcardfs_iget,
+ * which will initialize some of the new inode's fields
+ */
+
+ /* inherit lower inode number for sdcardfs's inode */
+ inode = sdcardfs_iget(sb, lower_inode, id);
+ if (IS_ERR(inode)) {
+ ret_dentry = ERR_CAST(inode);
+ goto out;
+ }
+
+ ret_dentry = d_splice_alias(inode, dentry);
+ dentry = ret_dentry ?: dentry;
+ update_derived_permission_lock(dentry);
+out:
+ return ret_dentry;
+}
+
+/*
+ * Connect an sdcardfs inode dentry/inode with several lower ones. This is
+ * the classic stackable file system "vnode interposition" action.
+ *
+ * @dentry: sdcardfs's dentry which interposes on lower one
+ * @sb: sdcardfs's super_block
+ * @lower_path: the lower path (caller does path_get/put)
+ */
+int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb,
+ struct path *lower_path, userid_t id)
+{
+ struct dentry *ret_dentry;
+
+ ret_dentry = __sdcardfs_interpose(dentry, sb, lower_path, id);
+ return PTR_ERR(ret_dentry);
+}
+
+struct sdcardfs_name_data {
+ struct dir_context ctx;
+ const struct qstr *to_find;
+ char *name;
+ bool found;
+};
+
+static int sdcardfs_name_match(struct dir_context *ctx, const char *name,
+ int namelen, loff_t offset, u64 ino, unsigned int d_type)
+{
+ struct sdcardfs_name_data *buf = container_of(ctx, struct sdcardfs_name_data, ctx);
+ struct qstr candidate = QSTR_INIT(name, namelen);
+
+ if (qstr_case_eq(buf->to_find, &candidate)) {
+ memcpy(buf->name, name, namelen);
+ buf->name[namelen] = 0;
+ buf->found = true;
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Main driver function for sdcardfs's lookup.
+ *
+ * Returns: NULL (ok), ERR_PTR if an error occurred.
+ * Fills in lower_parent_path with <dentry,mnt> on success.
+ */
+static struct dentry *__sdcardfs_lookup(struct dentry *dentry,
+ unsigned int flags, struct path *lower_parent_path, userid_t id)
+{
+ int err = 0;
+ struct vfsmount *lower_dir_mnt;
+ struct dentry *lower_dir_dentry = NULL;
+ struct dentry *lower_dentry;
+ const struct qstr *name;
+ struct path lower_path;
+ struct qstr dname;
+ struct dentry *ret_dentry = NULL;
+ struct sdcardfs_sb_info *sbi;
+
+ sbi = SDCARDFS_SB(dentry->d_sb);
+ /* must initialize dentry operations */
+ d_set_d_op(dentry, &sdcardfs_ci_dops);
+
+ if (IS_ROOT(dentry))
+ goto out;
+
+ name = &dentry->d_name;
+
+ /* now start the actual lookup procedure */
+ lower_dir_dentry = lower_parent_path->dentry;
+ lower_dir_mnt = lower_parent_path->mnt;
+
+ /* Use vfs_path_lookup to check if the dentry exists or not */
+ err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name->name, 0,
+ &lower_path);
+ /* check for other cases */
+ if (err == -ENOENT) {
+ struct file *file;
+ const struct cred *cred = current_cred();
+
+ struct sdcardfs_name_data buffer = {
+ .ctx.actor = sdcardfs_name_match,
+ .to_find = name,
+ .name = __getname(),
+ .found = false,
+ };
+
+ if (!buffer.name) {
+ err = -ENOMEM;
+ goto out;
+ }
+ file = dentry_open(lower_parent_path, O_RDONLY, cred);
+ if (IS_ERR(file)) {
+ err = PTR_ERR(file);
+ goto put_name;
+ }
+ err = iterate_dir(file, &buffer.ctx);
+ fput(file);
+ if (err)
+ goto put_name;
+
+ if (buffer.found)
+ err = vfs_path_lookup(lower_dir_dentry,
+ lower_dir_mnt,
+ buffer.name, 0,
+ &lower_path);
+ else
+ err = -ENOENT;
+put_name:
+ __putname(buffer.name);
+ }
+
+ /* no error: handle positive dentries */
+ if (!err) {
+ /* check if the dentry is an obb dentry
+ * if true, the lower_inode must be replaced with
+ * the inode of the graft path
+ */
+
+ if (need_graft_path(dentry)) {
+
+ /* setup_obb_dentry()
+ * The lower_path will be stored to the dentry's orig_path
+ * and the base obbpath will be copyed to the lower_path variable.
+ * if an error returned, there's no change in the lower_path
+ * returns: -ERRNO if error (0: no error)
+ */
+ err = setup_obb_dentry(dentry, &lower_path);
+
+ if (err) {
+ /* if the sbi->obbpath is not available, we can optionally
+ * setup the lower_path with its orig_path.
+ * but, the current implementation just returns an error
+ * because the sdcard daemon also regards this case as
+ * a lookup fail.
+ */
+ pr_info("sdcardfs: base obbpath is not available\n");
+ sdcardfs_put_reset_orig_path(dentry);
+ goto out;
+ }
+ }
+
+ sdcardfs_set_lower_path(dentry, &lower_path);
+ ret_dentry =
+ __sdcardfs_interpose(dentry, dentry->d_sb, &lower_path, id);
+ if (IS_ERR(ret_dentry)) {
+ err = PTR_ERR(ret_dentry);
+ /* path_put underlying path on error */
+ sdcardfs_put_reset_lower_path(dentry);
+ }
+ goto out;
+ }
+
+ /*
+ * We don't consider ENOENT an error, and we want to return a
+ * negative dentry.
+ */
+ if (err && err != -ENOENT)
+ goto out;
+
+ /* instatiate a new negative dentry */
+ dname.name = name->name;
+ dname.len = name->len;
+
+ /* See if the low-level filesystem might want
+ * to use its own hash
+ */
+ lower_dentry = d_hash_and_lookup(lower_dir_dentry, &dname);
+ if (IS_ERR(lower_dentry))
+ return lower_dentry;
+ if (!lower_dentry) {
+ /* We called vfs_path_lookup earlier, and did not get a negative
+ * dentry then. Don't confuse the lower filesystem by forcing
+ * one on it now...
+ */
+ err = -ENOENT;
+ goto out;
+ }
+
+ lower_path.dentry = lower_dentry;
+ lower_path.mnt = mntget(lower_dir_mnt);
+ sdcardfs_set_lower_path(dentry, &lower_path);
+
+ /*
+ * If the intent is to create a file, then don't return an error, so
+ * the VFS will continue the process of making this negative dentry
+ * into a positive one.
+ */
+ if (flags & (LOOKUP_CREATE|LOOKUP_RENAME_TARGET))
+ err = 0;
+
+out:
+ if (err)
+ return ERR_PTR(err);
+ return ret_dentry;
+}
+
+/*
+ * On success:
+ * fills dentry object appropriate values and returns NULL.
+ * On fail (== error)
+ * returns error ptr
+ *
+ * @dir : Parent inode. It is locked (dir->i_mutex)
+ * @dentry : Target dentry to lookup. we should set each of fields.
+ * (dentry->d_name is initialized already)
+ * @nd : nameidata of parent inode
+ */
+struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
+{
+ struct dentry *ret = NULL, *parent;
+ struct path lower_parent_path;
+ int err = 0;
+ const struct cred *saved_cred = NULL;
+
+ parent = dget_parent(dentry);
+
+ if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) {
+ ret = ERR_PTR(-EACCES);
+ goto out_err;
+ }
+
+ /* save current_cred and override it */
+ OVERRIDE_CRED_PTR(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir));
+
+ sdcardfs_get_lower_path(parent, &lower_parent_path);
+
+ /* allocate dentry private data. We free it in ->d_release */
+ err = new_dentry_private_data(dentry);
+ if (err) {
+ ret = ERR_PTR(err);
+ goto out;
+ }
+
+ ret = __sdcardfs_lookup(dentry, flags, &lower_parent_path, SDCARDFS_I(dir)->userid);
+ if (IS_ERR(ret))
+ goto out;
+ if (ret)
+ dentry = ret;
+ if (d_inode(dentry)) {
+ fsstack_copy_attr_times(d_inode(dentry),
+ sdcardfs_lower_inode(d_inode(dentry)));
+ /* get derived permission */
+ get_derived_permission(parent, dentry);
+ fixup_tmp_permissions(d_inode(dentry));
+ fixup_lower_ownership(dentry, dentry->d_name.name);
+ }
+ /* update parent directory's atime */
+ fsstack_copy_attr_atime(d_inode(parent),
+ sdcardfs_lower_inode(d_inode(parent)));
+
+out:
+ sdcardfs_put_lower_path(parent, &lower_parent_path);
+ REVERT_CRED(saved_cred);
+out_err:
+ dput(parent);
+ return ret;
+}
diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c
new file mode 100644
index 000000000000..953d2156d2e9
--- /dev/null
+++ b/fs/sdcardfs/main.c
@@ -0,0 +1,486 @@
+/*
+ * fs/sdcardfs/main.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ * Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009 Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed. It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/parser.h>
+
+enum {
+ Opt_fsuid,
+ Opt_fsgid,
+ Opt_gid,
+ Opt_debug,
+ Opt_mask,
+ Opt_multiuser,
+ Opt_userid,
+ Opt_reserved_mb,
+ Opt_err,
+};
+
+static const match_table_t sdcardfs_tokens = {
+ {Opt_fsuid, "fsuid=%u"},
+ {Opt_fsgid, "fsgid=%u"},
+ {Opt_gid, "gid=%u"},
+ {Opt_debug, "debug"},
+ {Opt_mask, "mask=%u"},
+ {Opt_userid, "userid=%d"},
+ {Opt_multiuser, "multiuser"},
+ {Opt_reserved_mb, "reserved_mb=%u"},
+ {Opt_err, NULL}
+};
+
+static int parse_options(struct super_block *sb, char *options, int silent,
+ int *debug, struct sdcardfs_vfsmount_options *vfsopts,
+ struct sdcardfs_mount_options *opts)
+{
+ char *p;
+ substring_t args[MAX_OPT_ARGS];
+ int option;
+
+ /* by default, we use AID_MEDIA_RW as uid, gid */
+ opts->fs_low_uid = AID_MEDIA_RW;
+ opts->fs_low_gid = AID_MEDIA_RW;
+ vfsopts->mask = 0;
+ opts->multiuser = false;
+ opts->fs_user_id = 0;
+ vfsopts->gid = 0;
+ /* by default, 0MB is reserved */
+ opts->reserved_mb = 0;
+
+ *debug = 0;
+
+ if (!options)
+ return 0;
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token;
+
+ if (!*p)
+ continue;
+
+ token = match_token(p, sdcardfs_tokens, args);
+
+ switch (token) {
+ case Opt_debug:
+ *debug = 1;
+ break;
+ case Opt_fsuid:
+ if (match_int(&args[0], &option))
+ return 0;
+ opts->fs_low_uid = option;
+ break;
+ case Opt_fsgid:
+ if (match_int(&args[0], &option))
+ return 0;
+ opts->fs_low_gid = option;
+ break;
+ case Opt_gid:
+ if (match_int(&args[0], &option))
+ return 0;
+ vfsopts->gid = option;
+ break;
+ case Opt_userid:
+ if (match_int(&args[0], &option))
+ return 0;
+ opts->fs_user_id = option;
+ break;
+ case Opt_mask:
+ if (match_int(&args[0], &option))
+ return 0;
+ vfsopts->mask = option;
+ break;
+ case Opt_multiuser:
+ opts->multiuser = true;
+ break;
+ case Opt_reserved_mb:
+ if (match_int(&args[0], &option))
+ return 0;
+ opts->reserved_mb = option;
+ break;
+ /* unknown option */
+ default:
+ if (!silent)
+ pr_err("Unrecognized mount option \"%s\" or missing value", p);
+ return -EINVAL;
+ }
+ }
+
+ if (*debug) {
+ pr_info("sdcardfs : options - debug:%d\n", *debug);
+ pr_info("sdcardfs : options - uid:%d\n",
+ opts->fs_low_uid);
+ pr_info("sdcardfs : options - gid:%d\n",
+ opts->fs_low_gid);
+ }
+
+ return 0;
+}
+
+int parse_options_remount(struct super_block *sb, char *options, int silent,
+ struct sdcardfs_vfsmount_options *vfsopts)
+{
+ char *p;
+ substring_t args[MAX_OPT_ARGS];
+ int option;
+ int debug;
+
+ if (!options)
+ return 0;
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token;
+
+ if (!*p)
+ continue;
+
+ token = match_token(p, sdcardfs_tokens, args);
+
+ switch (token) {
+ case Opt_debug:
+ debug = 1;
+ break;
+ case Opt_gid:
+ if (match_int(&args[0], &option))
+ return 0;
+ vfsopts->gid = option;
+
+ break;
+ case Opt_mask:
+ if (match_int(&args[0], &option))
+ return 0;
+ vfsopts->mask = option;
+ break;
+ case Opt_multiuser:
+ case Opt_userid:
+ case Opt_fsuid:
+ case Opt_fsgid:
+ case Opt_reserved_mb:
+ pr_warn("Option \"%s\" can't be changed during remount\n", p);
+ break;
+ /* unknown option */
+ default:
+ if (!silent)
+ pr_err("Unrecognized mount option \"%s\" or missing value", p);
+ return -EINVAL;
+ }
+ }
+
+ if (debug) {
+ pr_info("sdcardfs : options - debug:%d\n", debug);
+ pr_info("sdcardfs : options - gid:%d\n", vfsopts->gid);
+ pr_info("sdcardfs : options - mask:%d\n", vfsopts->mask);
+ }
+
+ return 0;
+}
+
+#if 0
+/*
+ * our custom d_alloc_root work-alike
+ *
+ * we can't use d_alloc_root if we want to use our own interpose function
+ * unchanged, so we simply call our own "fake" d_alloc_root
+ */
+static struct dentry *sdcardfs_d_alloc_root(struct super_block *sb)
+{
+ struct dentry *ret = NULL;
+
+ if (sb) {
+ static const struct qstr name = {
+ .name = "/",
+ .len = 1
+ };
+
+ ret = d_alloc(NULL, &name);
+ if (ret) {
+ d_set_d_op(ret, &sdcardfs_ci_dops);
+ ret->d_sb = sb;
+ ret->d_parent = ret;
+ }
+ }
+ return ret;
+}
+#endif
+
+DEFINE_MUTEX(sdcardfs_super_list_lock);
+EXPORT_SYMBOL_GPL(sdcardfs_super_list_lock);
+LIST_HEAD(sdcardfs_super_list);
+EXPORT_SYMBOL_GPL(sdcardfs_super_list);
+
+/*
+ * There is no need to lock the sdcardfs_super_info's rwsem as there is no
+ * way anyone can have a reference to the superblock at this point in time.
+ */
+static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb,
+ const char *dev_name, void *raw_data, int silent)
+{
+ int err = 0;
+ int debug;
+ struct super_block *lower_sb;
+ struct path lower_path;
+ struct sdcardfs_sb_info *sb_info;
+ struct sdcardfs_vfsmount_options *mnt_opt = mnt->data;
+ struct inode *inode;
+
+ pr_info("sdcardfs version 2.0\n");
+
+ if (!dev_name) {
+ pr_err("sdcardfs: read_super: missing dev_name argument\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ pr_info("sdcardfs: dev_name -> %s\n", dev_name);
+ pr_info("sdcardfs: options -> %s\n", (char *)raw_data);
+ pr_info("sdcardfs: mnt -> %p\n", mnt);
+
+ /* parse lower path */
+ err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
+ &lower_path);
+ if (err) {
+ pr_err("sdcardfs: error accessing lower directory '%s'\n", dev_name);
+ goto out;
+ }
+
+ /* allocate superblock private data */
+ sb->s_fs_info = kzalloc(sizeof(struct sdcardfs_sb_info), GFP_KERNEL);
+ if (!SDCARDFS_SB(sb)) {
+ pr_crit("sdcardfs: read_super: out of memory\n");
+ err = -ENOMEM;
+ goto out_free;
+ }
+
+ sb_info = sb->s_fs_info;
+ /* parse options */
+ err = parse_options(sb, raw_data, silent, &debug, mnt_opt, &sb_info->options);
+ if (err) {
+ pr_err("sdcardfs: invalid options\n");
+ goto out_freesbi;
+ }
+
+ /* set the lower superblock field of upper superblock */
+ lower_sb = lower_path.dentry->d_sb;
+ atomic_inc(&lower_sb->s_active);
+ sdcardfs_set_lower_super(sb, lower_sb);
+
+ /* inherit maxbytes from lower file system */
+ sb->s_maxbytes = lower_sb->s_maxbytes;
+
+ /*
+ * Our c/m/atime granularity is 1 ns because we may stack on file
+ * systems whose granularity is as good.
+ */
+ sb->s_time_gran = 1;
+
+ sb->s_magic = SDCARDFS_SUPER_MAGIC;
+ sb->s_op = &sdcardfs_sops;
+
+ /* get a new inode and allocate our root dentry */
+ inode = sdcardfs_iget(sb, d_inode(lower_path.dentry), 0);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto out_sput;
+ }
+ sb->s_root = d_make_root(inode);
+ if (!sb->s_root) {
+ err = -ENOMEM;
+ goto out_iput;
+ }
+ d_set_d_op(sb->s_root, &sdcardfs_ci_dops);
+
+ /* link the upper and lower dentries */
+ sb->s_root->d_fsdata = NULL;
+ err = new_dentry_private_data(sb->s_root);
+ if (err)
+ goto out_freeroot;
+
+ /* set the lower dentries for s_root */
+ sdcardfs_set_lower_path(sb->s_root, &lower_path);
+
+ /*
+ * No need to call interpose because we already have a positive
+ * dentry, which was instantiated by d_make_root. Just need to
+ * d_rehash it.
+ */
+ d_rehash(sb->s_root);
+
+ /* setup permission policy */
+ sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL);
+ mutex_lock(&sdcardfs_super_list_lock);
+ if (sb_info->options.multiuser) {
+ setup_derived_state(d_inode(sb->s_root), PERM_PRE_ROOT,
+ sb_info->options.fs_user_id, AID_ROOT,
+ false, d_inode(sb->s_root));
+ snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name);
+ } else {
+ setup_derived_state(d_inode(sb->s_root), PERM_ROOT,
+ sb_info->options.fs_user_id, AID_ROOT,
+ false, d_inode(sb->s_root));
+ snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name);
+ }
+ fixup_tmp_permissions(d_inode(sb->s_root));
+ sb_info->sb = sb;
+ list_add(&sb_info->list, &sdcardfs_super_list);
+ mutex_unlock(&sdcardfs_super_list_lock);
+
+ if (!silent)
+ pr_info("sdcardfs: mounted on top of %s type %s\n",
+ dev_name, lower_sb->s_type->name);
+ goto out; /* all is well */
+
+ /* no longer needed: free_dentry_private_data(sb->s_root); */
+out_freeroot:
+ dput(sb->s_root);
+out_iput:
+ iput(inode);
+out_sput:
+ /* drop refs we took earlier */
+ atomic_dec(&lower_sb->s_active);
+out_freesbi:
+ kfree(SDCARDFS_SB(sb));
+ sb->s_fs_info = NULL;
+out_free:
+ path_put(&lower_path);
+
+out:
+ return err;
+}
+
+/* A feature which supports mount_nodev() with options */
+static struct dentry *mount_nodev_with_options(struct vfsmount *mnt,
+ struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data,
+ int (*fill_super)(struct vfsmount *, struct super_block *,
+ const char *, void *, int))
+
+{
+ int error;
+ struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);
+
+ if (IS_ERR(s))
+ return ERR_CAST(s);
+
+ s->s_flags = flags;
+
+ error = fill_super(mnt, s, dev_name, data, flags & MS_SILENT ? 1 : 0);
+ if (error) {
+ deactivate_locked_super(s);
+ return ERR_PTR(error);
+ }
+ s->s_flags |= MS_ACTIVE;
+ return dget(s->s_root);
+}
+
+static struct dentry *sdcardfs_mount(struct vfsmount *mnt,
+ struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *raw_data)
+{
+ /*
+ * dev_name is a lower_path_name,
+ * raw_data is a option string.
+ */
+ return mount_nodev_with_options(mnt, fs_type, flags, dev_name,
+ raw_data, sdcardfs_read_super);
+}
+
+static struct dentry *sdcardfs_mount_wrn(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data)
+{
+ WARN(1, "sdcardfs does not support mount. Use mount2.\n");
+ return ERR_PTR(-EINVAL);
+}
+
+void *sdcardfs_alloc_mnt_data(void)
+{
+ return kmalloc(sizeof(struct sdcardfs_vfsmount_options), GFP_KERNEL);
+}
+
+void sdcardfs_kill_sb(struct super_block *sb)
+{
+ struct sdcardfs_sb_info *sbi;
+
+ if (sb->s_magic == SDCARDFS_SUPER_MAGIC) {
+ sbi = SDCARDFS_SB(sb);
+ mutex_lock(&sdcardfs_super_list_lock);
+ list_del(&sbi->list);
+ mutex_unlock(&sdcardfs_super_list_lock);
+ }
+ generic_shutdown_super(sb);
+}
+
+static struct file_system_type sdcardfs_fs_type = {
+ .owner = THIS_MODULE,
+ .name = SDCARDFS_NAME,
+ .mount = sdcardfs_mount_wrn,
+ .mount2 = sdcardfs_mount,
+ .alloc_mnt_data = sdcardfs_alloc_mnt_data,
+ .kill_sb = sdcardfs_kill_sb,
+ .fs_flags = 0,
+};
+MODULE_ALIAS_FS(SDCARDFS_NAME);
+
+static int __init init_sdcardfs_fs(void)
+{
+ int err;
+
+ pr_info("Registering sdcardfs " SDCARDFS_VERSION "\n");
+
+ err = sdcardfs_init_inode_cache();
+ if (err)
+ goto out;
+ err = sdcardfs_init_dentry_cache();
+ if (err)
+ goto out;
+ err = packagelist_init();
+ if (err)
+ goto out;
+ err = register_filesystem(&sdcardfs_fs_type);
+out:
+ if (err) {
+ sdcardfs_destroy_inode_cache();
+ sdcardfs_destroy_dentry_cache();
+ packagelist_exit();
+ }
+ return err;
+}
+
+static void __exit exit_sdcardfs_fs(void)
+{
+ sdcardfs_destroy_inode_cache();
+ sdcardfs_destroy_dentry_cache();
+ packagelist_exit();
+ unregister_filesystem(&sdcardfs_fs_type);
+ pr_info("Completed sdcardfs module unload\n");
+}
+
+/* Original wrapfs authors */
+MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University (http://www.fsl.cs.sunysb.edu/)");
+
+/* Original sdcardfs authors */
+MODULE_AUTHOR("Woojoong Lee, Daeho Jeong, Kitae Lee, Yeongjin Gil System Memory Lab., Samsung Electronics");
+
+/* Current maintainer */
+MODULE_AUTHOR("Daniel Rosenberg, Google");
+MODULE_DESCRIPTION("Sdcardfs " SDCARDFS_VERSION);
+MODULE_LICENSE("GPL");
+
+module_init(init_sdcardfs_fs);
+module_exit(exit_sdcardfs_fs);
diff --git a/fs/sdcardfs/mmap.c b/fs/sdcardfs/mmap.c
new file mode 100644
index 000000000000..b61f82275e7d
--- /dev/null
+++ b/fs/sdcardfs/mmap.c
@@ -0,0 +1,89 @@
+/*
+ * fs/sdcardfs/mmap.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ * Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009 Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed. It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+
+static int sdcardfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ int err;
+ struct file *file;
+ const struct vm_operations_struct *lower_vm_ops;
+
+ file = (struct file *)vma->vm_private_data;
+ lower_vm_ops = SDCARDFS_F(file)->lower_vm_ops;
+ BUG_ON(!lower_vm_ops);
+
+ err = lower_vm_ops->fault(vma, vmf);
+ return err;
+}
+
+static void sdcardfs_vm_open(struct vm_area_struct *vma)
+{
+ struct file *file = (struct file *)vma->vm_private_data;
+
+ get_file(file);
+}
+
+static void sdcardfs_vm_close(struct vm_area_struct *vma)
+{
+ struct file *file = (struct file *)vma->vm_private_data;
+
+ fput(file);
+}
+
+static int sdcardfs_page_mkwrite(struct vm_area_struct *vma,
+ struct vm_fault *vmf)
+{
+ int err = 0;
+ struct file *file;
+ const struct vm_operations_struct *lower_vm_ops;
+
+ file = (struct file *)vma->vm_private_data;
+ lower_vm_ops = SDCARDFS_F(file)->lower_vm_ops;
+ BUG_ON(!lower_vm_ops);
+ if (!lower_vm_ops->page_mkwrite)
+ goto out;
+
+ err = lower_vm_ops->page_mkwrite(vma, vmf);
+out:
+ return err;
+}
+
+static ssize_t sdcardfs_direct_IO(struct kiocb *iocb,
+ struct iov_iter *iter, loff_t pos)
+{
+ /*
+ * This function should never be called directly. We need it
+ * to exist, to get past a check in open_check_o_direct(),
+ * which is called from do_last().
+ */
+ return -EINVAL;
+}
+
+const struct address_space_operations sdcardfs_aops = {
+ .direct_IO = sdcardfs_direct_IO,
+};
+
+const struct vm_operations_struct sdcardfs_vm_ops = {
+ .fault = sdcardfs_fault,
+ .page_mkwrite = sdcardfs_page_mkwrite,
+ .open = sdcardfs_vm_open,
+ .close = sdcardfs_vm_close,
+};
diff --git a/fs/sdcardfs/multiuser.h b/fs/sdcardfs/multiuser.h
new file mode 100644
index 000000000000..85341e753f8c
--- /dev/null
+++ b/fs/sdcardfs/multiuser.h
@@ -0,0 +1,53 @@
+/*
+ * fs/sdcardfs/multiuser.h
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ * Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009 Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed. It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#define AID_USER_OFFSET 100000 /* offset for uid ranges for each user */
+#define AID_APP_START 10000 /* first app user */
+#define AID_APP_END 19999 /* last app user */
+#define AID_CACHE_GID_START 20000 /* start of gids for apps to mark cached data */
+#define AID_EXT_GID_START 30000 /* start of gids for apps to mark external data */
+#define AID_EXT_CACHE_GID_START 40000 /* start of gids for apps to mark external cached data */
+#define AID_EXT_CACHE_GID_END 49999 /* end of gids for apps to mark external cached data */
+#define AID_SHARED_GID_START 50000 /* start of gids for apps in each user to share */
+
+typedef uid_t userid_t;
+typedef uid_t appid_t;
+
+static inline uid_t multiuser_get_uid(userid_t user_id, appid_t app_id)
+{
+ return (user_id * AID_USER_OFFSET) + (app_id % AID_USER_OFFSET);
+}
+
+static inline bool uid_is_app(uid_t uid)
+{
+ appid_t appid = uid % AID_USER_OFFSET;
+
+ return appid >= AID_APP_START && appid <= AID_APP_END;
+}
+
+static inline gid_t multiuser_get_ext_cache_gid(uid_t uid)
+{
+ return uid - AID_APP_START + AID_EXT_CACHE_GID_START;
+}
+
+static inline gid_t multiuser_get_ext_gid(uid_t uid)
+{
+ return uid - AID_APP_START + AID_EXT_GID_START;
+}
diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c
new file mode 100644
index 000000000000..89196e31073e
--- /dev/null
+++ b/fs/sdcardfs/packagelist.c
@@ -0,0 +1,893 @@
+/*
+ * fs/sdcardfs/packagelist.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ * Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009 Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed. It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#include <linux/hashtable.h>
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/radix-tree.h>
+#include <linux/dcache.h>
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/configfs.h>
+
+struct hashtable_entry {
+ struct hlist_node hlist;
+ struct hlist_node dlist; /* for deletion cleanup */
+ struct qstr key;
+ atomic_t value;
+};
+
+static DEFINE_HASHTABLE(package_to_appid, 8);
+static DEFINE_HASHTABLE(package_to_userid, 8);
+static DEFINE_HASHTABLE(ext_to_groupid, 8);
+
+
+static struct kmem_cache *hashtable_entry_cachep;
+
+static unsigned int full_name_case_hash(const unsigned char *name, unsigned int len)
+{
+ unsigned long hash = init_name_hash();
+
+ while (len--)
+ hash = partial_name_hash(tolower(*name++), hash);
+ return end_name_hash(hash);
+}
+
+static inline void qstr_init(struct qstr *q, const char *name)
+{
+ q->name = name;
+ q->len = strlen(q->name);
+ q->hash = full_name_case_hash(q->name, q->len);
+}
+
+static inline int qstr_copy(const struct qstr *src, struct qstr *dest)
+{
+ dest->name = kstrdup(src->name, GFP_KERNEL);
+ dest->hash_len = src->hash_len;
+ return !!dest->name;
+}
+
+
+static appid_t __get_appid(const struct qstr *key)
+{
+ struct hashtable_entry *hash_cur;
+ unsigned int hash = key->hash;
+ appid_t ret_id;
+
+ rcu_read_lock();
+ hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) {
+ if (qstr_case_eq(key, &hash_cur->key)) {
+ ret_id = atomic_read(&hash_cur->value);
+ rcu_read_unlock();
+ return ret_id;
+ }
+ }
+ rcu_read_unlock();
+ return 0;
+}
+
+appid_t get_appid(const char *key)
+{
+ struct qstr q;
+
+ qstr_init(&q, key);
+ return __get_appid(&q);
+}
+
+static appid_t __get_ext_gid(const struct qstr *key)
+{
+ struct hashtable_entry *hash_cur;
+ unsigned int hash = key->hash;
+ appid_t ret_id;
+
+ rcu_read_lock();
+ hash_for_each_possible_rcu(ext_to_groupid, hash_cur, hlist, hash) {
+ if (qstr_case_eq(key, &hash_cur->key)) {
+ ret_id = atomic_read(&hash_cur->value);
+ rcu_read_unlock();
+ return ret_id;
+ }
+ }
+ rcu_read_unlock();
+ return 0;
+}
+
+appid_t get_ext_gid(const char *key)
+{
+ struct qstr q;
+
+ qstr_init(&q, key);
+ return __get_ext_gid(&q);
+}
+
+static appid_t __is_excluded(const struct qstr *app_name, userid_t user)
+{
+ struct hashtable_entry *hash_cur;
+ unsigned int hash = app_name->hash;
+
+ rcu_read_lock();
+ hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) {
+ if (atomic_read(&hash_cur->value) == user &&
+ qstr_case_eq(app_name, &hash_cur->key)) {
+ rcu_read_unlock();
+ return 1;
+ }
+ }
+ rcu_read_unlock();
+ return 0;
+}
+
+appid_t is_excluded(const char *key, userid_t user)
+{
+ struct qstr q;
+ qstr_init(&q, key);
+ return __is_excluded(&q, user);
+}
+
+/* Kernel has already enforced everything we returned through
+ * derive_permissions_locked(), so this is used to lock down access
+ * even further, such as enforcing that apps hold sdcard_rw.
+ */
+int check_caller_access_to_name(struct inode *parent_node, const struct qstr *name)
+{
+ struct qstr q_autorun = QSTR_LITERAL("autorun.inf");
+ struct qstr q__android_secure = QSTR_LITERAL(".android_secure");
+ struct qstr q_android_secure = QSTR_LITERAL("android_secure");
+
+ /* Always block security-sensitive files at root */
+ if (parent_node && SDCARDFS_I(parent_node)->perm == PERM_ROOT) {
+ if (qstr_case_eq(name, &q_autorun)
+ || qstr_case_eq(name, &q__android_secure)
+ || qstr_case_eq(name, &q_android_secure)) {
+ return 0;
+ }
+ }
+
+ /* Root always has access; access for any other UIDs should always
+ * be controlled through packages.list.
+ */
+ if (from_kuid(&init_user_ns, current_fsuid()) == 0)
+ return 1;
+
+ /* No extra permissions to enforce */
+ return 1;
+}
+
+/* This function is used when file opening. The open flags must be
+ * checked before calling check_caller_access_to_name()
+ */
+int open_flags_to_access_mode(int open_flags)
+{
+ if ((open_flags & O_ACCMODE) == O_RDONLY)
+ return 0; /* R_OK */
+ if ((open_flags & O_ACCMODE) == O_WRONLY)
+ return 1; /* W_OK */
+ /* Probably O_RDRW, but treat as default to be safe */
+ return 1; /* R_OK | W_OK */
+}
+
+static struct hashtable_entry *alloc_hashtable_entry(const struct qstr *key,
+ appid_t value)
+{
+ struct hashtable_entry *ret = kmem_cache_alloc(hashtable_entry_cachep,
+ GFP_KERNEL);
+ if (!ret)
+ return NULL;
+ INIT_HLIST_NODE(&ret->dlist);
+ INIT_HLIST_NODE(&ret->hlist);
+
+ if (!qstr_copy(key, &ret->key)) {
+ kmem_cache_free(hashtable_entry_cachep, ret);
+ return NULL;
+ }
+
+ atomic_set(&ret->value, value);
+ return ret;
+}
+
+static int insert_packagelist_appid_entry_locked(const struct qstr *key, appid_t value)
+{
+ struct hashtable_entry *hash_cur;
+ struct hashtable_entry *new_entry;
+ unsigned int hash = key->hash;
+
+ hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) {
+ if (qstr_case_eq(key, &hash_cur->key)) {
+ atomic_set(&hash_cur->value, value);
+ return 0;
+ }
+ }
+ new_entry = alloc_hashtable_entry(key, value);
+ if (!new_entry)
+ return -ENOMEM;
+ hash_add_rcu(package_to_appid, &new_entry->hlist, hash);
+ return 0;
+}
+
+static int insert_ext_gid_entry_locked(const struct qstr *key, appid_t value)
+{
+ struct hashtable_entry *hash_cur;
+ struct hashtable_entry *new_entry;
+ unsigned int hash = key->hash;
+
+ /* An extension can only belong to one gid */
+ hash_for_each_possible_rcu(ext_to_groupid, hash_cur, hlist, hash) {
+ if (qstr_case_eq(key, &hash_cur->key))
+ return -EINVAL;
+ }
+ new_entry = alloc_hashtable_entry(key, value);
+ if (!new_entry)
+ return -ENOMEM;
+ hash_add_rcu(ext_to_groupid, &new_entry->hlist, hash);
+ return 0;
+}
+
+static int insert_userid_exclude_entry_locked(const struct qstr *key, userid_t value)
+{
+ struct hashtable_entry *hash_cur;
+ struct hashtable_entry *new_entry;
+ unsigned int hash = key->hash;
+
+ /* Only insert if not already present */
+ hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) {
+ if (atomic_read(&hash_cur->value) == value &&
+ qstr_case_eq(key, &hash_cur->key))
+ return 0;
+ }
+ new_entry = alloc_hashtable_entry(key, value);
+ if (!new_entry)
+ return -ENOMEM;
+ hash_add_rcu(package_to_userid, &new_entry->hlist, hash);
+ return 0;
+}
+
+static void fixup_all_perms_name(const struct qstr *key)
+{
+ struct sdcardfs_sb_info *sbinfo;
+ struct limit_search limit = {
+ .flags = BY_NAME,
+ .name = QSTR_INIT(key->name, key->len),
+ };
+ list_for_each_entry(sbinfo, &sdcardfs_super_list, list) {
+ if (sbinfo_has_sdcard_magic(sbinfo))
+ fixup_perms_recursive(sbinfo->sb->s_root, &limit);
+ }
+}
+
+static void fixup_all_perms_name_userid(const struct qstr *key, userid_t userid)
+{
+ struct sdcardfs_sb_info *sbinfo;
+ struct limit_search limit = {
+ .flags = BY_NAME | BY_USERID,
+ .name = QSTR_INIT(key->name, key->len),
+ .userid = userid,
+ };
+ list_for_each_entry(sbinfo, &sdcardfs_super_list, list) {
+ if (sbinfo_has_sdcard_magic(sbinfo))
+ fixup_perms_recursive(sbinfo->sb->s_root, &limit);
+ }
+}
+
+static void fixup_all_perms_userid(userid_t userid)
+{
+ struct sdcardfs_sb_info *sbinfo;
+ struct limit_search limit = {
+ .flags = BY_USERID,
+ .userid = userid,
+ };
+ list_for_each_entry(sbinfo, &sdcardfs_super_list, list) {
+ if (sbinfo_has_sdcard_magic(sbinfo))
+ fixup_perms_recursive(sbinfo->sb->s_root, &limit);
+ }
+}
+
+static int insert_packagelist_entry(const struct qstr *key, appid_t value)
+{
+ int err;
+
+ mutex_lock(&sdcardfs_super_list_lock);
+ err = insert_packagelist_appid_entry_locked(key, value);
+ if (!err)
+ fixup_all_perms_name(key);
+ mutex_unlock(&sdcardfs_super_list_lock);
+
+ return err;
+}
+
+static int insert_ext_gid_entry(const struct qstr *key, appid_t value)
+{
+ int err;
+
+ mutex_lock(&sdcardfs_super_list_lock);
+ err = insert_ext_gid_entry_locked(key, value);
+ mutex_unlock(&sdcardfs_super_list_lock);
+
+ return err;
+}
+
+static int insert_userid_exclude_entry(const struct qstr *key, userid_t value)
+{
+ int err;
+
+ mutex_lock(&sdcardfs_super_list_lock);
+ err = insert_userid_exclude_entry_locked(key, value);
+ if (!err)
+ fixup_all_perms_name_userid(key, value);
+ mutex_unlock(&sdcardfs_super_list_lock);
+
+ return err;
+}
+
+static void free_hashtable_entry(struct hashtable_entry *entry)
+{
+ kfree(entry->key.name);
+ kmem_cache_free(hashtable_entry_cachep, entry);
+}
+
+static void remove_packagelist_entry_locked(const struct qstr *key)
+{
+ struct hashtable_entry *hash_cur;
+ unsigned int hash = key->hash;
+ struct hlist_node *h_t;
+ HLIST_HEAD(free_list);
+
+ hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) {
+ if (qstr_case_eq(key, &hash_cur->key)) {
+ hash_del_rcu(&hash_cur->hlist);
+ hlist_add_head(&hash_cur->dlist, &free_list);
+ }
+ }
+ hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) {
+ if (qstr_case_eq(key, &hash_cur->key)) {
+ hash_del_rcu(&hash_cur->hlist);
+ hlist_add_head(&hash_cur->dlist, &free_list);
+ break;
+ }
+ }
+ synchronize_rcu();
+ hlist_for_each_entry_safe(hash_cur, h_t, &free_list, dlist)
+ free_hashtable_entry(hash_cur);
+}
+
+static void remove_packagelist_entry(const struct qstr *key)
+{
+ mutex_lock(&sdcardfs_super_list_lock);
+ remove_packagelist_entry_locked(key);
+ fixup_all_perms_name(key);
+ mutex_unlock(&sdcardfs_super_list_lock);
+}
+
+static void remove_ext_gid_entry_locked(const struct qstr *key, gid_t group)
+{
+ struct hashtable_entry *hash_cur;
+ unsigned int hash = key->hash;
+
+ hash_for_each_possible_rcu(ext_to_groupid, hash_cur, hlist, hash) {
+ if (qstr_case_eq(key, &hash_cur->key) && atomic_read(&hash_cur->value) == group) {
+ hash_del_rcu(&hash_cur->hlist);
+ synchronize_rcu();
+ free_hashtable_entry(hash_cur);
+ break;
+ }
+ }
+}
+
+static void remove_ext_gid_entry(const struct qstr *key, gid_t group)
+{
+ mutex_lock(&sdcardfs_super_list_lock);
+ remove_ext_gid_entry_locked(key, group);
+ mutex_unlock(&sdcardfs_super_list_lock);
+}
+
+static void remove_userid_all_entry_locked(userid_t userid)
+{
+ struct hashtable_entry *hash_cur;
+ struct hlist_node *h_t;
+ HLIST_HEAD(free_list);
+ int i;
+
+ hash_for_each_rcu(package_to_userid, i, hash_cur, hlist) {
+ if (atomic_read(&hash_cur->value) == userid) {
+ hash_del_rcu(&hash_cur->hlist);
+ hlist_add_head(&hash_cur->dlist, &free_list);
+ }
+ }
+ synchronize_rcu();
+ hlist_for_each_entry_safe(hash_cur, h_t, &free_list, dlist) {
+ free_hashtable_entry(hash_cur);
+ }
+}
+
+static void remove_userid_all_entry(userid_t userid)
+{
+ mutex_lock(&sdcardfs_super_list_lock);
+ remove_userid_all_entry_locked(userid);
+ fixup_all_perms_userid(userid);
+ mutex_unlock(&sdcardfs_super_list_lock);
+}
+
+static void remove_userid_exclude_entry_locked(const struct qstr *key, userid_t userid)
+{
+ struct hashtable_entry *hash_cur;
+ unsigned int hash = key->hash;
+
+ hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) {
+ if (qstr_case_eq(key, &hash_cur->key) &&
+ atomic_read(&hash_cur->value) == userid) {
+ hash_del_rcu(&hash_cur->hlist);
+ synchronize_rcu();
+ free_hashtable_entry(hash_cur);
+ break;
+ }
+ }
+}
+
+static void remove_userid_exclude_entry(const struct qstr *key, userid_t userid)
+{
+ mutex_lock(&sdcardfs_super_list_lock);
+ remove_userid_exclude_entry_locked(key, userid);
+ fixup_all_perms_name_userid(key, userid);
+ mutex_unlock(&sdcardfs_super_list_lock);
+}
+
+static void packagelist_destroy(void)
+{
+ struct hashtable_entry *hash_cur;
+ struct hlist_node *h_t;
+ HLIST_HEAD(free_list);
+ int i;
+
+ mutex_lock(&sdcardfs_super_list_lock);
+ hash_for_each_rcu(package_to_appid, i, hash_cur, hlist) {
+ hash_del_rcu(&hash_cur->hlist);
+ hlist_add_head(&hash_cur->dlist, &free_list);
+ }
+ hash_for_each_rcu(package_to_userid, i, hash_cur, hlist) {
+ hash_del_rcu(&hash_cur->hlist);
+ hlist_add_head(&hash_cur->dlist, &free_list);
+ }
+ synchronize_rcu();
+ hlist_for_each_entry_safe(hash_cur, h_t, &free_list, dlist)
+ free_hashtable_entry(hash_cur);
+ mutex_unlock(&sdcardfs_super_list_lock);
+ pr_info("sdcardfs: destroyed packagelist pkgld\n");
+}
+
+#define SDCARDFS_CONFIGFS_ATTR(_pfx, _name) \
+static struct configfs_attribute _pfx##attr_##_name = { \
+ .ca_name = __stringify(_name), \
+ .ca_mode = S_IRUGO | S_IWUGO, \
+ .ca_owner = THIS_MODULE, \
+ .show = _pfx##_name##_show, \
+ .store = _pfx##_name##_store, \
+}
+
+#define SDCARDFS_CONFIGFS_ATTR_RO(_pfx, _name) \
+static struct configfs_attribute _pfx##attr_##_name = { \
+ .ca_name = __stringify(_name), \
+ .ca_mode = S_IRUGO, \
+ .ca_owner = THIS_MODULE, \
+ .show = _pfx##_name##_show, \
+}
+
+#define SDCARDFS_CONFIGFS_ATTR_WO(_pfx, _name) \
+static struct configfs_attribute _pfx##attr_##_name = { \
+ .ca_name = __stringify(_name), \
+ .ca_mode = S_IWUGO, \
+ .ca_owner = THIS_MODULE, \
+ .store = _pfx##_name##_store, \
+}
+
+struct package_details {
+ struct config_item item;
+ struct qstr name;
+};
+
+static inline struct package_details *to_package_details(struct config_item *item)
+{
+ return item ? container_of(item, struct package_details, item) : NULL;
+}
+
+static ssize_t package_details_appid_show(struct config_item *item, char *page)
+{
+ return scnprintf(page, PAGE_SIZE, "%u\n", __get_appid(&to_package_details(item)->name));
+}
+
+static ssize_t package_details_appid_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ unsigned int tmp;
+ int ret;
+
+ ret = kstrtouint(page, 10, &tmp);
+ if (ret)
+ return ret;
+
+ ret = insert_packagelist_entry(&to_package_details(item)->name, tmp);
+
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+static ssize_t package_details_excluded_userids_show(struct config_item *item,
+ char *page)
+{
+ struct package_details *package_details = to_package_details(item);
+ struct hashtable_entry *hash_cur;
+ unsigned int hash = package_details->name.hash;
+ int count = 0;
+
+ rcu_read_lock();
+ hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) {
+ if (qstr_case_eq(&package_details->name, &hash_cur->key))
+ count += scnprintf(page + count, PAGE_SIZE - count,
+ "%d ", atomic_read(&hash_cur->value));
+ }
+ rcu_read_unlock();
+ if (count)
+ count--;
+ count += scnprintf(page + count, PAGE_SIZE - count, "\n");
+ return count;
+}
+
+static ssize_t package_details_excluded_userids_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ unsigned int tmp;
+ int ret;
+
+ ret = kstrtouint(page, 10, &tmp);
+ if (ret)
+ return ret;
+
+ ret = insert_userid_exclude_entry(&to_package_details(item)->name, tmp);
+
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+static ssize_t package_details_clear_userid_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ unsigned int tmp;
+ int ret;
+
+ ret = kstrtouint(page, 10, &tmp);
+ if (ret)
+ return ret;
+ remove_userid_exclude_entry(&to_package_details(item)->name, tmp);
+ return count;
+}
+
+static void package_details_release(struct config_item *item)
+{
+ struct package_details *package_details = to_package_details(item);
+
+ pr_info("sdcardfs: removing %s\n", package_details->name.name);
+ remove_packagelist_entry(&package_details->name);
+ kfree(package_details->name.name);
+ kfree(package_details);
+}
+
+SDCARDFS_CONFIGFS_ATTR(package_details_, appid);
+SDCARDFS_CONFIGFS_ATTR(package_details_, excluded_userids);
+SDCARDFS_CONFIGFS_ATTR_WO(package_details_, clear_userid);
+
+static struct configfs_attribute *package_details_attrs[] = {
+ &package_details_attr_appid,
+ &package_details_attr_excluded_userids,
+ &package_details_attr_clear_userid,
+ NULL,
+};
+
+static struct configfs_item_operations package_details_item_ops = {
+ .release = package_details_release,
+};
+
+static struct config_item_type package_appid_type = {
+ .ct_item_ops = &package_details_item_ops,
+ .ct_attrs = package_details_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+struct extensions_value {
+ struct config_group group;
+ unsigned int num;
+};
+
+struct extension_details {
+ struct config_item item;
+ struct qstr name;
+ unsigned int num;
+};
+
+static inline struct extensions_value *to_extensions_value(struct config_item *item)
+{
+ return item ? container_of(to_config_group(item), struct extensions_value, group) : NULL;
+}
+
+static inline struct extension_details *to_extension_details(struct config_item *item)
+{
+ return item ? container_of(item, struct extension_details, item) : NULL;
+}
+
+static void extension_details_release(struct config_item *item)
+{
+ struct extension_details *extension_details = to_extension_details(item);
+
+ pr_info("sdcardfs: No longer mapping %s files to gid %d\n",
+ extension_details->name.name, extension_details->num);
+ remove_ext_gid_entry(&extension_details->name, extension_details->num);
+ kfree(extension_details->name.name);
+ kfree(extension_details);
+}
+
+static struct configfs_item_operations extension_details_item_ops = {
+ .release = extension_details_release,
+};
+
+static struct config_item_type extension_details_type = {
+ .ct_item_ops = &extension_details_item_ops,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct config_item *extension_details_make_item(struct config_group *group, const char *name)
+{
+ struct extensions_value *extensions_value = to_extensions_value(&group->cg_item);
+ struct extension_details *extension_details = kzalloc(sizeof(struct extension_details), GFP_KERNEL);
+ const char *tmp;
+ int ret;
+
+ if (!extension_details)
+ return ERR_PTR(-ENOMEM);
+
+ tmp = kstrdup(name, GFP_KERNEL);
+ if (!tmp) {
+ kfree(extension_details);
+ return ERR_PTR(-ENOMEM);
+ }
+ qstr_init(&extension_details->name, tmp);
+ ret = insert_ext_gid_entry(&extension_details->name, extensions_value->num);
+
+ if (ret) {
+ kfree(extension_details->name.name);
+ kfree(extension_details);
+ return ERR_PTR(ret);
+ }
+ config_item_init_type_name(&extension_details->item, name, &extension_details_type);
+
+ return &extension_details->item;
+}
+
+static struct configfs_group_operations extensions_value_group_ops = {
+ .make_item = extension_details_make_item,
+};
+
+static struct config_item_type extensions_name_type = {
+ .ct_group_ops = &extensions_value_group_ops,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct config_group *extensions_make_group(struct config_group *group, const char *name)
+{
+ struct extensions_value *extensions_value;
+ unsigned int tmp;
+ int ret;
+
+ extensions_value = kzalloc(sizeof(struct extensions_value), GFP_KERNEL);
+ if (!extensions_value)
+ return ERR_PTR(-ENOMEM);
+ ret = kstrtouint(name, 10, &tmp);
+ if (ret) {
+ kfree(extensions_value);
+ return ERR_PTR(ret);
+ }
+
+ extensions_value->num = tmp;
+ config_group_init_type_name(&extensions_value->group, name,
+ &extensions_name_type);
+ return &extensions_value->group;
+}
+
+static void extensions_drop_group(struct config_group *group, struct config_item *item)
+{
+ struct extensions_value *value = to_extensions_value(item);
+
+ pr_info("sdcardfs: No longer mapping any files to gid %d\n", value->num);
+ kfree(value);
+}
+
+static struct configfs_group_operations extensions_group_ops = {
+ .make_group = extensions_make_group,
+ .drop_item = extensions_drop_group,
+};
+
+static struct config_item_type extensions_type = {
+ .ct_group_ops = &extensions_group_ops,
+ .ct_owner = THIS_MODULE,
+};
+
+struct config_group extension_group = {
+ .cg_item = {
+ .ci_namebuf = "extensions",
+ .ci_type = &extensions_type,
+ },
+};
+
+static struct config_item *packages_make_item(struct config_group *group, const char *name)
+{
+ struct package_details *package_details;
+ const char *tmp;
+
+ package_details = kzalloc(sizeof(struct package_details), GFP_KERNEL);
+ if (!package_details)
+ return ERR_PTR(-ENOMEM);
+ tmp = kstrdup(name, GFP_KERNEL);
+ if (!tmp) {
+ kfree(package_details);
+ return ERR_PTR(-ENOMEM);
+ }
+ qstr_init(&package_details->name, tmp);
+ config_item_init_type_name(&package_details->item, name,
+ &package_appid_type);
+
+ return &package_details->item;
+}
+
+static ssize_t packages_list_show(struct config_item *item, char *page)
+{
+ struct hashtable_entry *hash_cur_app;
+ struct hashtable_entry *hash_cur_user;
+ int i;
+ int count = 0, written = 0;
+ const char errormsg[] = "<truncated>\n";
+ unsigned int hash;
+
+ rcu_read_lock();
+ hash_for_each_rcu(package_to_appid, i, hash_cur_app, hlist) {
+ written = scnprintf(page + count, PAGE_SIZE - sizeof(errormsg) - count, "%s %d\n",
+ hash_cur_app->key.name, atomic_read(&hash_cur_app->value));
+ hash = hash_cur_app->key.hash;
+ hash_for_each_possible_rcu(package_to_userid, hash_cur_user, hlist, hash) {
+ if (qstr_case_eq(&hash_cur_app->key, &hash_cur_user->key)) {
+ written += scnprintf(page + count + written - 1,
+ PAGE_SIZE - sizeof(errormsg) - count - written + 1,
+ " %d\n", atomic_read(&hash_cur_user->value)) - 1;
+ }
+ }
+ if (count + written == PAGE_SIZE - sizeof(errormsg) - 1) {
+ count += scnprintf(page + count, PAGE_SIZE - count, errormsg);
+ break;
+ }
+ count += written;
+ }
+ rcu_read_unlock();
+
+ return count;
+}
+
+static ssize_t packages_remove_userid_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ unsigned int tmp;
+ int ret;
+
+ ret = kstrtouint(page, 10, &tmp);
+ if (ret)
+ return ret;
+ remove_userid_all_entry(tmp);
+ return count;
+}
+
+static struct configfs_attribute packages_attr_packages_gid_list = {
+ .ca_name = "packages_gid.list",
+ .ca_mode = S_IRUGO,
+ .ca_owner = THIS_MODULE,
+ .show = packages_list_show,
+};
+
+SDCARDFS_CONFIGFS_ATTR_WO(packages_, remove_userid);
+
+static struct configfs_attribute *packages_attrs[] = {
+ &packages_attr_packages_gid_list,
+ &packages_attr_remove_userid,
+ NULL,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations packages_group_ops = {
+ .make_item = packages_make_item,
+};
+
+static struct config_item_type packages_type = {
+ .ct_group_ops = &packages_group_ops,
+ .ct_attrs = packages_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+struct config_group *sd_default_groups[] = {
+ &extension_group,
+ NULL,
+};
+
+static struct configfs_subsystem sdcardfs_packages = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "sdcardfs",
+ .ci_type = &packages_type,
+ },
+ .default_groups = sd_default_groups,
+ },
+};
+
+static int configfs_sdcardfs_init(void)
+{
+ int ret, i;
+ struct configfs_subsystem *subsys = &sdcardfs_packages;
+
+ for (i = 0; sd_default_groups[i]; i++)
+ config_group_init(sd_default_groups[i]);
+ config_group_init(&subsys->su_group);
+ mutex_init(&subsys->su_mutex);
+ ret = configfs_register_subsystem(subsys);
+ if (ret) {
+ pr_err("Error %d while registering subsystem %s\n",
+ ret,
+ subsys->su_group.cg_item.ci_namebuf);
+ }
+ return ret;
+}
+
+static void configfs_sdcardfs_exit(void)
+{
+ configfs_unregister_subsystem(&sdcardfs_packages);
+}
+
+int packagelist_init(void)
+{
+ hashtable_entry_cachep =
+ kmem_cache_create("packagelist_hashtable_entry",
+ sizeof(struct hashtable_entry), 0, 0, NULL);
+ if (!hashtable_entry_cachep) {
+ pr_err("sdcardfs: failed creating pkgl_hashtable entry slab cache\n");
+ return -ENOMEM;
+ }
+
+ configfs_sdcardfs_init();
+ return 0;
+}
+
+void packagelist_exit(void)
+{
+ configfs_sdcardfs_exit();
+ packagelist_destroy();
+ kmem_cache_destroy(hashtable_entry_cachep);
+}
diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h
new file mode 100644
index 000000000000..2b67b9a8ef9f
--- /dev/null
+++ b/fs/sdcardfs/sdcardfs.h
@@ -0,0 +1,632 @@
+/*
+ * fs/sdcardfs/sdcardfs.h
+ *
+ * The sdcardfs v2.0
+ * This file system replaces the sdcard daemon on Android
+ * On version 2.0, some of the daemon functions have been ported
+ * to support the multi-user concepts of Android 4.4
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ * Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009 Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed. It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#ifndef _SDCARDFS_H_
+#define _SDCARDFS_H_
+
+#include <linux/dcache.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/aio.h>
+#include <linux/mm.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/seq_file.h>
+#include <linux/statfs.h>
+#include <linux/fs_stack.h>
+#include <linux/magic.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/security.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include "multiuser.h"
+
+/* the file system name */
+#define SDCARDFS_NAME "sdcardfs"
+
+/* sdcardfs root inode number */
+#define SDCARDFS_ROOT_INO 1
+
+/* useful for tracking code reachability */
+#define UDBG pr_default("DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__)
+
+#define SDCARDFS_DIRENT_SIZE 256
+
+/* temporary static uid settings for development */
+#define AID_ROOT 0 /* uid for accessing /mnt/sdcard & extSdcard */
+#define AID_MEDIA_RW 1023 /* internal media storage write access */
+
+#define AID_SDCARD_RW 1015 /* external storage write access */
+#define AID_SDCARD_R 1028 /* external storage read access */
+#define AID_SDCARD_PICS 1033 /* external storage photos access */
+#define AID_SDCARD_AV 1034 /* external storage audio/video access */
+#define AID_SDCARD_ALL 1035 /* access all users external storage */
+#define AID_MEDIA_OBB 1059 /* obb files */
+
+#define AID_SDCARD_IMAGE 1057
+
+#define AID_PACKAGE_INFO 1027
+
+
+/*
+ * Permissions are handled by our permission function.
+ * We don't want anyone who happens to look at our inode value to prematurely
+ * block access, so store more permissive values. These are probably never
+ * used.
+ */
+#define fixup_tmp_permissions(x) \
+ do { \
+ (x)->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(x)->d_uid); \
+ (x)->i_gid = make_kgid(&init_user_ns, AID_SDCARD_RW); \
+ (x)->i_mode = ((x)->i_mode & S_IFMT) | 0775;\
+ } while (0)
+
+/* OVERRIDE_CRED() and REVERT_CRED()
+ * OVERRIDE_CRED()
+ * backup original task->cred
+ * and modifies task->cred->fsuid/fsgid to specified value.
+ * REVERT_CRED()
+ * restore original task->cred->fsuid/fsgid.
+ * These two macro should be used in pair, and OVERRIDE_CRED() should be
+ * placed at the beginning of a function, right after variable declaration.
+ */
+#define OVERRIDE_CRED(sdcardfs_sbi, saved_cred, info) \
+ do { \
+ saved_cred = override_fsids(sdcardfs_sbi, info); \
+ if (!saved_cred) \
+ return -ENOMEM; \
+ } while (0)
+
+#define OVERRIDE_CRED_PTR(sdcardfs_sbi, saved_cred, info) \
+ do { \
+ saved_cred = override_fsids(sdcardfs_sbi, info); \
+ if (!saved_cred) \
+ return ERR_PTR(-ENOMEM); \
+ } while (0)
+
+#define REVERT_CRED(saved_cred) revert_fsids(saved_cred)
+
+/* Android 5.0 support */
+
+/* Permission mode for a specific node. Controls how file permissions
+ * are derived for children nodes.
+ */
+typedef enum {
+ /* Nothing special; this node should just inherit from its parent. */
+ PERM_INHERIT,
+ /* This node is one level above a normal root; used for legacy layouts
+ * which use the first level to represent user_id.
+ */
+ PERM_PRE_ROOT,
+ /* This node is "/" */
+ PERM_ROOT,
+ /* This node is "/Android" */
+ PERM_ANDROID,
+ /* This node is "/Android/data" */
+ PERM_ANDROID_DATA,
+ /* This node is "/Android/obb" */
+ PERM_ANDROID_OBB,
+ /* This node is "/Android/media" */
+ PERM_ANDROID_MEDIA,
+ /* This node is "/Android/[data|media|obb]/[package]" */
+ PERM_ANDROID_PACKAGE,
+ /* This node is "/Android/[data|media|obb]/[package]/cache" */
+ PERM_ANDROID_PACKAGE_CACHE,
+} perm_t;
+
+struct sdcardfs_sb_info;
+struct sdcardfs_mount_options;
+struct sdcardfs_inode_info;
+
+/* Do not directly use this function. Use OVERRIDE_CRED() instead. */
+const struct cred *override_fsids(struct sdcardfs_sb_info *sbi, struct sdcardfs_inode_info *info);
+/* Do not directly use this function, use REVERT_CRED() instead. */
+void revert_fsids(const struct cred *old_cred);
+
+/* operations vectors defined in specific files */
+extern const struct file_operations sdcardfs_main_fops;
+extern const struct file_operations sdcardfs_dir_fops;
+extern const struct inode_operations sdcardfs_main_iops;
+extern const struct inode_operations sdcardfs_dir_iops;
+extern const struct inode_operations sdcardfs_symlink_iops;
+extern const struct super_operations sdcardfs_sops;
+extern const struct dentry_operations sdcardfs_ci_dops;
+extern const struct address_space_operations sdcardfs_aops, sdcardfs_dummy_aops;
+extern const struct vm_operations_struct sdcardfs_vm_ops;
+
+extern int sdcardfs_init_inode_cache(void);
+extern void sdcardfs_destroy_inode_cache(void);
+extern int sdcardfs_init_dentry_cache(void);
+extern void sdcardfs_destroy_dentry_cache(void);
+extern int new_dentry_private_data(struct dentry *dentry);
+extern void free_dentry_private_data(struct dentry *dentry);
+extern struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry,
+ unsigned int flags);
+extern struct inode *sdcardfs_iget(struct super_block *sb,
+ struct inode *lower_inode, userid_t id);
+extern int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb,
+ struct path *lower_path, userid_t id);
+
+/* file private data */
+struct sdcardfs_file_info {
+ struct file *lower_file;
+ const struct vm_operations_struct *lower_vm_ops;
+};
+
+/* sdcardfs inode data in memory */
+struct sdcardfs_inode_info {
+ struct inode *lower_inode;
+ /* state derived based on current position in hierachy */
+ perm_t perm;
+ userid_t userid;
+ uid_t d_uid;
+ bool under_android;
+ bool under_cache;
+ bool under_obb;
+ /* top folder for ownership */
+ struct inode *top;
+
+ struct inode vfs_inode;
+};
+
+
+/* sdcardfs dentry data in memory */
+struct sdcardfs_dentry_info {
+ spinlock_t lock; /* protects lower_path */
+ struct path lower_path;
+ struct path orig_path;
+};
+
+struct sdcardfs_mount_options {
+ uid_t fs_low_uid;
+ gid_t fs_low_gid;
+ userid_t fs_user_id;
+ bool multiuser;
+ unsigned int reserved_mb;
+};
+
+struct sdcardfs_vfsmount_options {
+ gid_t gid;
+ mode_t mask;
+};
+
+extern int parse_options_remount(struct super_block *sb, char *options, int silent,
+ struct sdcardfs_vfsmount_options *vfsopts);
+
+/* sdcardfs super-block data in memory */
+struct sdcardfs_sb_info {
+ struct super_block *sb;
+ struct super_block *lower_sb;
+ /* derived perm policy : some of options have been added
+ * to sdcardfs_mount_options (Android 4.4 support)
+ */
+ struct sdcardfs_mount_options options;
+ spinlock_t lock; /* protects obbpath */
+ char *obbpath_s;
+ struct path obbpath;
+ void *pkgl_id;
+ struct list_head list;
+};
+
+/*
+ * inode to private data
+ *
+ * Since we use containers and the struct inode is _inside_ the
+ * sdcardfs_inode_info structure, SDCARDFS_I will always (given a non-NULL
+ * inode pointer), return a valid non-NULL pointer.
+ */
+static inline struct sdcardfs_inode_info *SDCARDFS_I(const struct inode *inode)
+{
+ return container_of(inode, struct sdcardfs_inode_info, vfs_inode);
+}
+
+/* dentry to private data */
+#define SDCARDFS_D(dent) ((struct sdcardfs_dentry_info *)(dent)->d_fsdata)
+
+/* superblock to private data */
+#define SDCARDFS_SB(super) ((struct sdcardfs_sb_info *)(super)->s_fs_info)
+
+/* file to private Data */
+#define SDCARDFS_F(file) ((struct sdcardfs_file_info *)((file)->private_data))
+
+/* file to lower file */
+static inline struct file *sdcardfs_lower_file(const struct file *f)
+{
+ return SDCARDFS_F(f)->lower_file;
+}
+
+static inline void sdcardfs_set_lower_file(struct file *f, struct file *val)
+{
+ SDCARDFS_F(f)->lower_file = val;
+}
+
+/* inode to lower inode. */
+static inline struct inode *sdcardfs_lower_inode(const struct inode *i)
+{
+ return SDCARDFS_I(i)->lower_inode;
+}
+
+static inline void sdcardfs_set_lower_inode(struct inode *i, struct inode *val)
+{
+ SDCARDFS_I(i)->lower_inode = val;
+}
+
+/* superblock to lower superblock */
+static inline struct super_block *sdcardfs_lower_super(
+ const struct super_block *sb)
+{
+ return SDCARDFS_SB(sb)->lower_sb;
+}
+
+static inline void sdcardfs_set_lower_super(struct super_block *sb,
+ struct super_block *val)
+{
+ SDCARDFS_SB(sb)->lower_sb = val;
+}
+
+/* path based (dentry/mnt) macros */
+static inline void pathcpy(struct path *dst, const struct path *src)
+{
+ dst->dentry = src->dentry;
+ dst->mnt = src->mnt;
+}
+
+/* sdcardfs_get_pname functions calls path_get()
+ * therefore, the caller must call "proper" path_put functions
+ */
+#define SDCARDFS_DENT_FUNC(pname) \
+static inline void sdcardfs_get_##pname(const struct dentry *dent, \
+ struct path *pname) \
+{ \
+ spin_lock(&SDCARDFS_D(dent)->lock); \
+ pathcpy(pname, &SDCARDFS_D(dent)->pname); \
+ path_get(pname); \
+ spin_unlock(&SDCARDFS_D(dent)->lock); \
+ return; \
+} \
+static inline void sdcardfs_put_##pname(const struct dentry *dent, \
+ struct path *pname) \
+{ \
+ path_put(pname); \
+ return; \
+} \
+static inline void sdcardfs_set_##pname(const struct dentry *dent, \
+ struct path *pname) \
+{ \
+ spin_lock(&SDCARDFS_D(dent)->lock); \
+ pathcpy(&SDCARDFS_D(dent)->pname, pname); \
+ spin_unlock(&SDCARDFS_D(dent)->lock); \
+ return; \
+} \
+static inline void sdcardfs_reset_##pname(const struct dentry *dent) \
+{ \
+ spin_lock(&SDCARDFS_D(dent)->lock); \
+ SDCARDFS_D(dent)->pname.dentry = NULL; \
+ SDCARDFS_D(dent)->pname.mnt = NULL; \
+ spin_unlock(&SDCARDFS_D(dent)->lock); \
+ return; \
+} \
+static inline void sdcardfs_put_reset_##pname(const struct dentry *dent) \
+{ \
+ struct path pname; \
+ spin_lock(&SDCARDFS_D(dent)->lock); \
+ if (SDCARDFS_D(dent)->pname.dentry) { \
+ pathcpy(&pname, &SDCARDFS_D(dent)->pname); \
+ SDCARDFS_D(dent)->pname.dentry = NULL; \
+ SDCARDFS_D(dent)->pname.mnt = NULL; \
+ spin_unlock(&SDCARDFS_D(dent)->lock); \
+ path_put(&pname); \
+ } else \
+ spin_unlock(&SDCARDFS_D(dent)->lock); \
+ return; \
+}
+
+SDCARDFS_DENT_FUNC(lower_path)
+SDCARDFS_DENT_FUNC(orig_path)
+
+static inline bool sbinfo_has_sdcard_magic(struct sdcardfs_sb_info *sbinfo)
+{
+ return sbinfo && sbinfo->sb && sbinfo->sb->s_magic == SDCARDFS_SUPER_MAGIC;
+}
+
+/* grab a refererence if we aren't linking to ourself */
+static inline void set_top(struct sdcardfs_inode_info *info, struct inode *top)
+{
+ struct inode *old_top = NULL;
+
+ BUG_ON(IS_ERR_OR_NULL(top));
+ if (info->top && info->top != &info->vfs_inode)
+ old_top = info->top;
+ if (top != &info->vfs_inode)
+ igrab(top);
+ info->top = top;
+ iput(old_top);
+}
+
+static inline struct inode *grab_top(struct sdcardfs_inode_info *info)
+{
+ struct inode *top = info->top;
+
+ if (top)
+ return igrab(top);
+ else
+ return NULL;
+}
+
+static inline void release_top(struct sdcardfs_inode_info *info)
+{
+ iput(info->top);
+}
+
+static inline int get_gid(struct vfsmount *mnt, struct sdcardfs_inode_info *info)
+{
+ struct sdcardfs_vfsmount_options *opts = mnt->data;
+
+ if (opts->gid == AID_SDCARD_RW)
+ /* As an optimization, certain trusted system components only run
+ * as owner but operate across all users. Since we're now handing
+ * out the sdcard_rw GID only to trusted apps, we're okay relaxing
+ * the user boundary enforcement for the default view. The UIDs
+ * assigned to app directories are still multiuser aware.
+ */
+ return AID_SDCARD_RW;
+ else
+ return multiuser_get_uid(info->userid, opts->gid);
+}
+
+static inline int get_mode(struct vfsmount *mnt, struct sdcardfs_inode_info *info)
+{
+ int owner_mode;
+ int filtered_mode;
+ struct sdcardfs_vfsmount_options *opts = mnt->data;
+ int visible_mode = 0775 & ~opts->mask;
+
+
+ if (info->perm == PERM_PRE_ROOT) {
+ /* Top of multi-user view should always be visible to ensure
+ * secondary users can traverse inside.
+ */
+ visible_mode = 0711;
+ } else if (info->under_android) {
+ /* Block "other" access to Android directories, since only apps
+ * belonging to a specific user should be in there; we still
+ * leave +x open for the default view.
+ */
+ if (opts->gid == AID_SDCARD_RW)
+ visible_mode = visible_mode & ~0006;
+ else
+ visible_mode = visible_mode & ~0007;
+ }
+ owner_mode = info->lower_inode->i_mode & 0700;
+ filtered_mode = visible_mode & (owner_mode | (owner_mode >> 3) | (owner_mode >> 6));
+ return filtered_mode;
+}
+
+static inline int has_graft_path(const struct dentry *dent)
+{
+ int ret = 0;
+
+ spin_lock(&SDCARDFS_D(dent)->lock);
+ if (SDCARDFS_D(dent)->orig_path.dentry != NULL)
+ ret = 1;
+ spin_unlock(&SDCARDFS_D(dent)->lock);
+
+ return ret;
+}
+
+static inline void sdcardfs_get_real_lower(const struct dentry *dent,
+ struct path *real_lower)
+{
+ /* in case of a local obb dentry
+ * the orig_path should be returned
+ */
+ if (has_graft_path(dent))
+ sdcardfs_get_orig_path(dent, real_lower);
+ else
+ sdcardfs_get_lower_path(dent, real_lower);
+}
+
+static inline void sdcardfs_put_real_lower(const struct dentry *dent,
+ struct path *real_lower)
+{
+ if (has_graft_path(dent))
+ sdcardfs_put_orig_path(dent, real_lower);
+ else
+ sdcardfs_put_lower_path(dent, real_lower);
+}
+
+extern struct mutex sdcardfs_super_list_lock;
+extern struct list_head sdcardfs_super_list;
+
+/* for packagelist.c */
+extern appid_t get_appid(const char *app_name);
+extern appid_t get_ext_gid(const char *app_name);
+extern appid_t is_excluded(const char *app_name, userid_t userid);
+extern int check_caller_access_to_name(struct inode *parent_node, const struct qstr *name);
+extern int open_flags_to_access_mode(int open_flags);
+extern int packagelist_init(void);
+extern void packagelist_exit(void);
+
+/* for derived_perm.c */
+#define BY_NAME (1 << 0)
+#define BY_USERID (1 << 1)
+struct limit_search {
+ unsigned int flags;
+ struct qstr name;
+ userid_t userid;
+};
+
+extern void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid,
+ uid_t uid, bool under_android, struct inode *top);
+extern void get_derived_permission(struct dentry *parent, struct dentry *dentry);
+extern void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, const struct qstr *name);
+extern void fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit);
+
+extern void update_derived_permission_lock(struct dentry *dentry);
+void fixup_lower_ownership(struct dentry *dentry, const char *name);
+extern int need_graft_path(struct dentry *dentry);
+extern int is_base_obbpath(struct dentry *dentry);
+extern int is_obbpath_invalid(struct dentry *dentry);
+extern int setup_obb_dentry(struct dentry *dentry, struct path *lower_path);
+
+/* locking helpers */
+static inline struct dentry *lock_parent(struct dentry *dentry)
+{
+ struct dentry *dir = dget_parent(dentry);
+
+ mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+ return dir;
+}
+
+static inline void unlock_dir(struct dentry *dir)
+{
+ mutex_unlock(&d_inode(dir)->i_mutex);
+ dput(dir);
+}
+
+static inline int prepare_dir(const char *path_s, uid_t uid, gid_t gid, mode_t mode)
+{
+ int err;
+ struct dentry *dent;
+ struct iattr attrs;
+ struct path parent;
+
+ dent = kern_path_locked(path_s, &parent);
+ if (IS_ERR(dent)) {
+ err = PTR_ERR(dent);
+ if (err == -EEXIST)
+ err = 0;
+ goto out_unlock;
+ }
+
+ err = vfs_mkdir2(parent.mnt, d_inode(parent.dentry), dent, mode);
+ if (err) {
+ if (err == -EEXIST)
+ err = 0;
+ goto out_dput;
+ }
+
+ attrs.ia_uid = make_kuid(&init_user_ns, uid);
+ attrs.ia_gid = make_kgid(&init_user_ns, gid);
+ attrs.ia_valid = ATTR_UID | ATTR_GID;
+ mutex_lock(&d_inode(dent)->i_mutex);
+ notify_change2(parent.mnt, dent, &attrs, NULL);
+ mutex_unlock(&d_inode(dent)->i_mutex);
+
+out_dput:
+ dput(dent);
+
+out_unlock:
+ /* parent dentry locked by lookup_create */
+ mutex_unlock(&d_inode(parent.dentry)->i_mutex);
+ path_put(&parent);
+ return err;
+}
+
+/*
+ * Return 1, if a disk has enough free space, otherwise 0.
+ * We assume that any files can not be overwritten.
+ */
+static inline int check_min_free_space(struct dentry *dentry, size_t size, int dir)
+{
+ int err;
+ struct path lower_path;
+ struct kstatfs statfs;
+ u64 avail;
+ struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+
+ if (sbi->options.reserved_mb) {
+ /* Get fs stat of lower filesystem. */
+ sdcardfs_get_lower_path(dentry, &lower_path);
+ err = vfs_statfs(&lower_path, &statfs);
+ sdcardfs_put_lower_path(dentry, &lower_path);
+
+ if (unlikely(err))
+ return 0;
+
+ /* Invalid statfs informations. */
+ if (unlikely(statfs.f_bsize == 0))
+ return 0;
+
+ /* if you are checking directory, set size to f_bsize. */
+ if (unlikely(dir))
+ size = statfs.f_bsize;
+
+ /* available size */
+ avail = statfs.f_bavail * statfs.f_bsize;
+
+ /* not enough space */
+ if ((u64)size > avail)
+ return 0;
+
+ /* enough space */
+ if ((avail - size) > (sbi->options.reserved_mb * 1024 * 1024))
+ return 1;
+
+ return 0;
+ } else
+ return 1;
+}
+
+/*
+ * Copies attrs and maintains sdcardfs managed attrs
+ * Since our permission check handles all special permissions, set those to be open
+ */
+static inline void sdcardfs_copy_and_fix_attrs(struct inode *dest, const struct inode *src)
+{
+ dest->i_mode = (src->i_mode & S_IFMT) | S_IRWXU | S_IRWXG |
+ S_IROTH | S_IXOTH; /* 0775 */
+ dest->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(dest)->d_uid);
+ dest->i_gid = make_kgid(&init_user_ns, AID_SDCARD_RW);
+ dest->i_rdev = src->i_rdev;
+ dest->i_atime = src->i_atime;
+ dest->i_mtime = src->i_mtime;
+ dest->i_ctime = src->i_ctime;
+ dest->i_blkbits = src->i_blkbits;
+ dest->i_flags = src->i_flags;
+ set_nlink(dest, src->i_nlink);
+}
+
+static inline bool str_case_eq(const char *s1, const char *s2)
+{
+ return !strcasecmp(s1, s2);
+}
+
+static inline bool str_n_case_eq(const char *s1, const char *s2, size_t len)
+{
+ return !strncasecmp(s1, s2, len);
+}
+
+static inline bool qstr_case_eq(const struct qstr *q1, const struct qstr *q2)
+{
+ return q1->len == q2->len && str_case_eq(q1->name, q2->name);
+}
+
+#define QSTR_LITERAL(string) QSTR_INIT(string, sizeof(string)-1)
+
+#endif /* not _SDCARDFS_H_ */
diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c
new file mode 100644
index 000000000000..a3393e959c63
--- /dev/null
+++ b/fs/sdcardfs/super.c
@@ -0,0 +1,278 @@
+/*
+ * fs/sdcardfs/super.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ * Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009 Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed. It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+
+/*
+ * The inode cache is used with alloc_inode for both our inode info and the
+ * vfs inode.
+ */
+static struct kmem_cache *sdcardfs_inode_cachep;
+
+/* final actions when unmounting a file system */
+static void sdcardfs_put_super(struct super_block *sb)
+{
+ struct sdcardfs_sb_info *spd;
+ struct super_block *s;
+
+ spd = SDCARDFS_SB(sb);
+ if (!spd)
+ return;
+
+ if (spd->obbpath_s) {
+ kfree(spd->obbpath_s);
+ path_put(&spd->obbpath);
+ }
+
+ /* decrement lower super references */
+ s = sdcardfs_lower_super(sb);
+ sdcardfs_set_lower_super(sb, NULL);
+ atomic_dec(&s->s_active);
+
+ kfree(spd);
+ sb->s_fs_info = NULL;
+}
+
+static int sdcardfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+ int err;
+ struct path lower_path;
+ u32 min_blocks;
+ struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+
+ sdcardfs_get_lower_path(dentry, &lower_path);
+ err = vfs_statfs(&lower_path, buf);
+ sdcardfs_put_lower_path(dentry, &lower_path);
+
+ if (sbi->options.reserved_mb) {
+ /* Invalid statfs informations. */
+ if (buf->f_bsize == 0) {
+ pr_err("Returned block size is zero.\n");
+ return -EINVAL;
+ }
+
+ min_blocks = ((sbi->options.reserved_mb * 1024 * 1024)/buf->f_bsize);
+ buf->f_blocks -= min_blocks;
+
+ if (buf->f_bavail > min_blocks)
+ buf->f_bavail -= min_blocks;
+ else
+ buf->f_bavail = 0;
+
+ /* Make reserved blocks invisiable to media storage */
+ buf->f_bfree = buf->f_bavail;
+ }
+
+ /* set return buf to our f/s to avoid confusing user-level utils */
+ buf->f_type = SDCARDFS_SUPER_MAGIC;
+
+ return err;
+}
+
+/*
+ * @flags: numeric mount options
+ * @options: mount options string
+ */
+static int sdcardfs_remount_fs(struct super_block *sb, int *flags, char *options)
+{
+ int err = 0;
+
+ /*
+ * The VFS will take care of "ro" and "rw" flags among others. We
+ * can safely accept a few flags (RDONLY, MANDLOCK), and honor
+ * SILENT, but anything else left over is an error.
+ */
+ if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT)) != 0) {
+ pr_err("sdcardfs: remount flags 0x%x unsupported\n", *flags);
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+/*
+ * @mnt: mount point we are remounting
+ * @sb: superblock we are remounting
+ * @flags: numeric mount options
+ * @options: mount options string
+ */
+static int sdcardfs_remount_fs2(struct vfsmount *mnt, struct super_block *sb,
+ int *flags, char *options)
+{
+ int err = 0;
+
+ /*
+ * The VFS will take care of "ro" and "rw" flags among others. We
+ * can safely accept a few flags (RDONLY, MANDLOCK), and honor
+ * SILENT, but anything else left over is an error.
+ */
+ if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT | MS_REMOUNT)) != 0) {
+ pr_err("sdcardfs: remount flags 0x%x unsupported\n", *flags);
+ err = -EINVAL;
+ }
+ pr_info("Remount options were %s for vfsmnt %p.\n", options, mnt);
+ err = parse_options_remount(sb, options, *flags & ~MS_SILENT, mnt->data);
+
+
+ return err;
+}
+
+static void *sdcardfs_clone_mnt_data(void *data)
+{
+ struct sdcardfs_vfsmount_options *opt = kmalloc(sizeof(struct sdcardfs_vfsmount_options), GFP_KERNEL);
+ struct sdcardfs_vfsmount_options *old = data;
+
+ if (!opt)
+ return NULL;
+ opt->gid = old->gid;
+ opt->mask = old->mask;
+ return opt;
+}
+
+static void sdcardfs_copy_mnt_data(void *data, void *newdata)
+{
+ struct sdcardfs_vfsmount_options *old = data;
+ struct sdcardfs_vfsmount_options *new = newdata;
+
+ old->gid = new->gid;
+ old->mask = new->mask;
+}
+
+/*
+ * Called by iput() when the inode reference count reached zero
+ * and the inode is not hashed anywhere. Used to clear anything
+ * that needs to be, before the inode is completely destroyed and put
+ * on the inode free list.
+ */
+static void sdcardfs_evict_inode(struct inode *inode)
+{
+ struct inode *lower_inode;
+
+ truncate_inode_pages(&inode->i_data, 0);
+ clear_inode(inode);
+ /*
+ * Decrement a reference to a lower_inode, which was incremented
+ * by our read_inode when it was created initially.
+ */
+ lower_inode = sdcardfs_lower_inode(inode);
+ sdcardfs_set_lower_inode(inode, NULL);
+ set_top(SDCARDFS_I(inode), inode);
+ iput(lower_inode);
+}
+
+static struct inode *sdcardfs_alloc_inode(struct super_block *sb)
+{
+ struct sdcardfs_inode_info *i;
+
+ i = kmem_cache_alloc(sdcardfs_inode_cachep, GFP_KERNEL);
+ if (!i)
+ return NULL;
+
+ /* memset everything up to the inode to 0 */
+ memset(i, 0, offsetof(struct sdcardfs_inode_info, vfs_inode));
+
+ i->vfs_inode.i_version = 1;
+ return &i->vfs_inode;
+}
+
+static void sdcardfs_destroy_inode(struct inode *inode)
+{
+ kmem_cache_free(sdcardfs_inode_cachep, SDCARDFS_I(inode));
+}
+
+/* sdcardfs inode cache constructor */
+static void init_once(void *obj)
+{
+ struct sdcardfs_inode_info *i = obj;
+
+ inode_init_once(&i->vfs_inode);
+}
+
+int sdcardfs_init_inode_cache(void)
+{
+ int err = 0;
+
+ sdcardfs_inode_cachep =
+ kmem_cache_create("sdcardfs_inode_cache",
+ sizeof(struct sdcardfs_inode_info), 0,
+ SLAB_RECLAIM_ACCOUNT, init_once);
+ if (!sdcardfs_inode_cachep)
+ err = -ENOMEM;
+ return err;
+}
+
+/* sdcardfs inode cache destructor */
+void sdcardfs_destroy_inode_cache(void)
+{
+ kmem_cache_destroy(sdcardfs_inode_cachep);
+}
+
+/*
+ * Used only in nfs, to kill any pending RPC tasks, so that subsequent
+ * code can actually succeed and won't leave tasks that need handling.
+ */
+static void sdcardfs_umount_begin(struct super_block *sb)
+{
+ struct super_block *lower_sb;
+
+ lower_sb = sdcardfs_lower_super(sb);
+ if (lower_sb && lower_sb->s_op && lower_sb->s_op->umount_begin)
+ lower_sb->s_op->umount_begin(lower_sb);
+}
+
+static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m,
+ struct dentry *root)
+{
+ struct sdcardfs_sb_info *sbi = SDCARDFS_SB(root->d_sb);
+ struct sdcardfs_mount_options *opts = &sbi->options;
+ struct sdcardfs_vfsmount_options *vfsopts = mnt->data;
+
+ if (opts->fs_low_uid != 0)
+ seq_printf(m, ",fsuid=%u", opts->fs_low_uid);
+ if (opts->fs_low_gid != 0)
+ seq_printf(m, ",fsgid=%u", opts->fs_low_gid);
+ if (vfsopts->gid != 0)
+ seq_printf(m, ",gid=%u", vfsopts->gid);
+ if (opts->multiuser)
+ seq_puts(m, ",multiuser");
+ if (vfsopts->mask)
+ seq_printf(m, ",mask=%u", vfsopts->mask);
+ if (opts->fs_user_id)
+ seq_printf(m, ",userid=%u", opts->fs_user_id);
+ if (opts->reserved_mb != 0)
+ seq_printf(m, ",reserved=%uMB", opts->reserved_mb);
+
+ return 0;
+};
+
+const struct super_operations sdcardfs_sops = {
+ .put_super = sdcardfs_put_super,
+ .statfs = sdcardfs_statfs,
+ .remount_fs = sdcardfs_remount_fs,
+ .remount_fs2 = sdcardfs_remount_fs2,
+ .clone_mnt_data = sdcardfs_clone_mnt_data,
+ .copy_mnt_data = sdcardfs_copy_mnt_data,
+ .evict_inode = sdcardfs_evict_inode,
+ .umount_begin = sdcardfs_umount_begin,
+ .show_options2 = sdcardfs_show_options,
+ .alloc_inode = sdcardfs_alloc_inode,
+ .destroy_inode = sdcardfs_destroy_inode,
+ .drop_inode = generic_delete_inode,
+};
diff --git a/fs/select.c b/fs/select.c
index 015547330e88..09e71a00a9b8 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -70,9 +70,9 @@ static long __estimate_accuracy(struct timespec *tv)
return slack;
}
-long select_estimate_accuracy(struct timespec *tv)
+u64 select_estimate_accuracy(struct timespec *tv)
{
- unsigned long ret;
+ u64 ret;
struct timespec now;
/*
@@ -402,7 +402,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
struct poll_wqueues table;
poll_table *wait;
int retval, i, timed_out = 0;
- unsigned long slack = 0;
+ u64 slack = 0;
unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
unsigned long busy_end = 0;
@@ -784,7 +784,7 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
poll_table* pt = &wait->pt;
ktime_t expire, *to = NULL;
int timed_out = 0, count = 0;
- unsigned long slack = 0;
+ u64 slack = 0;
unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
unsigned long busy_end = 0;
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index ffb093e72b6c..6dd158a216f4 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -26,34 +26,6 @@ config SQUASHFS
If unsure, say N.
choice
- prompt "File decompression options"
- depends on SQUASHFS
- help
- Squashfs now supports two options for decompressing file
- data. Traditionally Squashfs has decompressed into an
- intermediate buffer and then memcopied it into the page cache.
- Squashfs now supports the ability to decompress directly into
- the page cache.
-
- If unsure, select "Decompress file data into an intermediate buffer"
-
-config SQUASHFS_FILE_CACHE
- bool "Decompress file data into an intermediate buffer"
- help
- Decompress file data into an intermediate buffer and then
- memcopy it into the page cache.
-
-config SQUASHFS_FILE_DIRECT
- bool "Decompress files directly into the page cache"
- help
- Directly decompress file data into the page cache.
- Doing so can significantly improve performance because
- it eliminates a memcpy and it also removes the lock contention
- on the single buffer.
-
-endchoice
-
-choice
prompt "Decompressor parallelisation options"
depends on SQUASHFS
help
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index 246a6f329d89..fe51f1507ed1 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -5,8 +5,7 @@
obj-$(CONFIG_SQUASHFS) += squashfs.o
squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
squashfs-y += namei.o super.o symlink.o decompressor.o
-squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o
-squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o
+squashfs-y += file_direct.o page_actor.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 0cea9b9236d0..2eb66decc5ab 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -28,9 +28,12 @@
#include <linux/fs.h>
#include <linux/vfs.h>
+#include <linux/bio.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/pagemap.h>
#include <linux/buffer_head.h>
+#include <linux/workqueue.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@@ -38,177 +41,434 @@
#include "decompressor.h"
#include "page_actor.h"
-/*
- * Read the metadata block length, this is stored in the first two
- * bytes of the metadata block.
- */
-static struct buffer_head *get_block_length(struct super_block *sb,
- u64 *cur_index, int *offset, int *length)
+static struct workqueue_struct *squashfs_read_wq;
+
+struct squashfs_read_request {
+ struct super_block *sb;
+ u64 index;
+ int length;
+ int compressed;
+ int offset;
+ u64 read_end;
+ struct squashfs_page_actor *output;
+ enum {
+ SQUASHFS_COPY,
+ SQUASHFS_DECOMPRESS,
+ SQUASHFS_METADATA,
+ } data_processing;
+ bool synchronous;
+
+ /*
+ * If the read is synchronous, it is possible to retrieve information
+ * about the request by setting these pointers.
+ */
+ int *res;
+ int *bytes_read;
+ int *bytes_uncompressed;
+
+ int nr_buffers;
+ struct buffer_head **bh;
+ struct work_struct offload;
+};
+
+struct squashfs_bio_request {
+ struct buffer_head **bh;
+ int nr_buffers;
+};
+
+static int squashfs_bio_submit(struct squashfs_read_request *req);
+
+int squashfs_init_read_wq(void)
{
- struct squashfs_sb_info *msblk = sb->s_fs_info;
- struct buffer_head *bh;
+ squashfs_read_wq = create_workqueue("SquashFS read wq");
+ return !!squashfs_read_wq;
+}
+
+void squashfs_destroy_read_wq(void)
+{
+ flush_workqueue(squashfs_read_wq);
+ destroy_workqueue(squashfs_read_wq);
+}
+
+static void free_read_request(struct squashfs_read_request *req, int error)
+{
+ if (!req->synchronous)
+ squashfs_page_actor_free(req->output, error);
+ if (req->res)
+ *(req->res) = error;
+ kfree(req->bh);
+ kfree(req);
+}
+
+static void squashfs_process_blocks(struct squashfs_read_request *req)
+{
+ int error = 0;
+ int bytes, i, length;
+ struct squashfs_sb_info *msblk = req->sb->s_fs_info;
+ struct squashfs_page_actor *actor = req->output;
+ struct buffer_head **bh = req->bh;
+ int nr_buffers = req->nr_buffers;
+
+ for (i = 0; i < nr_buffers; ++i) {
+ if (!bh[i])
+ continue;
+ wait_on_buffer(bh[i]);
+ if (!buffer_uptodate(bh[i]))
+ error = -EIO;
+ }
+ if (error)
+ goto cleanup;
+
+ if (req->data_processing == SQUASHFS_METADATA) {
+ /* Extract the length of the metadata block */
+ if (req->offset != msblk->devblksize - 1)
+ length = *((u16 *)(bh[0]->b_data + req->offset));
+ else {
+ length = bh[0]->b_data[req->offset];
+ length |= bh[1]->b_data[0] << 8;
+ }
+ req->compressed = SQUASHFS_COMPRESSED(length);
+ req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS
+ : SQUASHFS_COPY;
+ length = SQUASHFS_COMPRESSED_SIZE(length);
+ if (req->index + length + 2 > req->read_end) {
+ for (i = 0; i < nr_buffers; ++i)
+ put_bh(bh[i]);
+ kfree(bh);
+ req->length = length;
+ req->index += 2;
+ squashfs_bio_submit(req);
+ return;
+ }
+ req->length = length;
+ req->offset = (req->offset + 2) % PAGE_SIZE;
+ if (req->offset < 2) {
+ put_bh(bh[0]);
+ ++bh;
+ --nr_buffers;
+ }
+ }
+ if (req->bytes_read)
+ *(req->bytes_read) = req->length;
- bh = sb_bread(sb, *cur_index);
- if (bh == NULL)
- return NULL;
-
- if (msblk->devblksize - *offset == 1) {
- *length = (unsigned char) bh->b_data[*offset];
- put_bh(bh);
- bh = sb_bread(sb, ++(*cur_index));
- if (bh == NULL)
- return NULL;
- *length |= (unsigned char) bh->b_data[0] << 8;
- *offset = 1;
- } else {
- *length = (unsigned char) bh->b_data[*offset] |
- (unsigned char) bh->b_data[*offset + 1] << 8;
- *offset += 2;
-
- if (*offset == msblk->devblksize) {
- put_bh(bh);
- bh = sb_bread(sb, ++(*cur_index));
- if (bh == NULL)
- return NULL;
- *offset = 0;
+ if (req->data_processing == SQUASHFS_COPY) {
+ squashfs_bh_to_actor(bh, nr_buffers, req->output, req->offset,
+ req->length, msblk->devblksize);
+ } else if (req->data_processing == SQUASHFS_DECOMPRESS) {
+ req->length = squashfs_decompress(msblk, bh, nr_buffers,
+ req->offset, req->length, actor);
+ if (req->length < 0) {
+ error = -EIO;
+ goto cleanup;
}
}
- return bh;
+ /* Last page may have trailing bytes not filled */
+ bytes = req->length % PAGE_SIZE;
+ if (bytes && actor->page[actor->pages - 1])
+ zero_user_segment(actor->page[actor->pages - 1], bytes,
+ PAGE_SIZE);
+
+cleanup:
+ if (req->bytes_uncompressed)
+ *(req->bytes_uncompressed) = req->length;
+ if (error) {
+ for (i = 0; i < nr_buffers; ++i)
+ if (bh[i])
+ put_bh(bh[i]);
+ }
+ free_read_request(req, error);
}
+static void read_wq_handler(struct work_struct *work)
+{
+ squashfs_process_blocks(container_of(work,
+ struct squashfs_read_request, offload));
+}
-/*
- * Read and decompress a metadata block or datablock. Length is non-zero
- * if a datablock is being read (the size is stored elsewhere in the
- * filesystem), otherwise the length is obtained from the first two bytes of
- * the metadata block. A bit in the length field indicates if the block
- * is stored uncompressed in the filesystem (usually because compression
- * generated a larger block - this does occasionally happen with compression
- * algorithms).
- */
-int squashfs_read_data(struct super_block *sb, u64 index, int length,
- u64 *next_index, struct squashfs_page_actor *output)
+static void squashfs_bio_end_io(struct bio *bio)
{
- struct squashfs_sb_info *msblk = sb->s_fs_info;
- struct buffer_head **bh;
- int offset = index & ((1 << msblk->devblksize_log2) - 1);
- u64 cur_index = index >> msblk->devblksize_log2;
- int bytes, compressed, b = 0, k = 0, avail, i;
+ int i;
+ int error = bio->bi_error;
+ struct squashfs_bio_request *bio_req = bio->bi_private;
+
+ bio_put(bio);
+
+ for (i = 0; i < bio_req->nr_buffers; ++i) {
+ if (!bio_req->bh[i])
+ continue;
+ if (!error)
+ set_buffer_uptodate(bio_req->bh[i]);
+ else
+ clear_buffer_uptodate(bio_req->bh[i]);
+ unlock_buffer(bio_req->bh[i]);
+ }
+ kfree(bio_req);
+}
+
+static int bh_is_optional(struct squashfs_read_request *req, int idx)
+{
+ int start_idx, end_idx;
+ struct squashfs_sb_info *msblk = req->sb->s_fs_info;
- bh = kcalloc(((output->length + msblk->devblksize - 1)
- >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
- if (bh == NULL)
+ start_idx = (idx * msblk->devblksize - req->offset) / PAGE_CACHE_SIZE;
+ end_idx = ((idx + 1) * msblk->devblksize - req->offset + 1) / PAGE_CACHE_SIZE;
+ if (start_idx >= req->output->pages)
+ return 1;
+ if (start_idx < 0)
+ start_idx = end_idx;
+ if (end_idx >= req->output->pages)
+ end_idx = start_idx;
+ return !req->output->page[start_idx] && !req->output->page[end_idx];
+}
+
+static int actor_getblks(struct squashfs_read_request *req, u64 block)
+{
+ int i;
+
+ req->bh = kmalloc_array(req->nr_buffers, sizeof(*(req->bh)), GFP_NOIO);
+ if (!req->bh)
return -ENOMEM;
- if (length) {
+ for (i = 0; i < req->nr_buffers; ++i) {
/*
- * Datablock.
+ * When dealing with an uncompressed block, the actor may
+ * contains NULL pages. There's no need to read the buffers
+ * associated with these pages.
*/
- bytes = -offset;
- compressed = SQUASHFS_COMPRESSED_BLOCK(length);
- length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
- if (next_index)
- *next_index = index + length;
-
- TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
- index, compressed ? "" : "un", length, output->length);
-
- if (length < 0 || length > output->length ||
- (index + length) > msblk->bytes_used)
- goto read_failure;
-
- for (b = 0; bytes < length; b++, cur_index++) {
- bh[b] = sb_getblk(sb, cur_index);
- if (bh[b] == NULL)
- goto block_release;
- bytes += msblk->devblksize;
+ if (!req->compressed && bh_is_optional(req, i)) {
+ req->bh[i] = NULL;
+ continue;
}
- ll_rw_block(READ, b, bh);
- } else {
- /*
- * Metadata block.
- */
- if ((index + 2) > msblk->bytes_used)
- goto read_failure;
+ req->bh[i] = sb_getblk(req->sb, block + i);
+ if (!req->bh[i]) {
+ while (--i) {
+ if (req->bh[i])
+ put_bh(req->bh[i]);
+ }
+ return -1;
+ }
+ }
+ return 0;
+}
- bh[0] = get_block_length(sb, &cur_index, &offset, &length);
- if (bh[0] == NULL)
- goto read_failure;
- b = 1;
+static int squashfs_bio_submit(struct squashfs_read_request *req)
+{
+ struct bio *bio = NULL;
+ struct buffer_head *bh;
+ struct squashfs_bio_request *bio_req = NULL;
+ int b = 0, prev_block = 0;
+ struct squashfs_sb_info *msblk = req->sb->s_fs_info;
- bytes = msblk->devblksize - offset;
- compressed = SQUASHFS_COMPRESSED(length);
- length = SQUASHFS_COMPRESSED_SIZE(length);
- if (next_index)
- *next_index = index + length + 2;
+ u64 read_start = round_down(req->index, msblk->devblksize);
+ u64 read_end = round_up(req->index + req->length, msblk->devblksize);
+ sector_t block = read_start >> msblk->devblksize_log2;
+ sector_t block_end = read_end >> msblk->devblksize_log2;
+ int offset = read_start - round_down(req->index, PAGE_SIZE);
+ int nr_buffers = block_end - block;
+ int blksz = msblk->devblksize;
+ int bio_max_pages = nr_buffers > BIO_MAX_PAGES ? BIO_MAX_PAGES
+ : nr_buffers;
- TRACE("Block @ 0x%llx, %scompressed size %d\n", index,
- compressed ? "" : "un", length);
+ /* Setup the request */
+ req->read_end = read_end;
+ req->offset = req->index - read_start;
+ req->nr_buffers = nr_buffers;
+ if (actor_getblks(req, block) < 0)
+ goto getblk_failed;
- if (length < 0 || length > output->length ||
- (index + length) > msblk->bytes_used)
- goto block_release;
+ /* Create and submit the BIOs */
+ for (b = 0; b < nr_buffers; ++b, offset += blksz) {
+ bh = req->bh[b];
+ if (!bh || !trylock_buffer(bh))
+ continue;
+ if (buffer_uptodate(bh)) {
+ unlock_buffer(bh);
+ continue;
+ }
+ offset %= PAGE_SIZE;
- for (; bytes < length; b++) {
- bh[b] = sb_getblk(sb, ++cur_index);
- if (bh[b] == NULL)
- goto block_release;
- bytes += msblk->devblksize;
+ /* Append the buffer to the current BIO if it is contiguous */
+ if (bio && bio_req && prev_block + 1 == b) {
+ if (bio_add_page(bio, bh->b_page, blksz, offset)) {
+ bio_req->nr_buffers += 1;
+ prev_block = b;
+ continue;
+ }
}
- ll_rw_block(READ, b - 1, bh + 1);
+
+ /* Otherwise, submit the current BIO and create a new one */
+ if (bio)
+ submit_bio(READ, bio);
+ bio_req = kcalloc(1, sizeof(struct squashfs_bio_request),
+ GFP_NOIO);
+ if (!bio_req)
+ goto req_alloc_failed;
+ bio_req->bh = &req->bh[b];
+ bio = bio_alloc(GFP_NOIO, bio_max_pages);
+ if (!bio)
+ goto bio_alloc_failed;
+ bio->bi_bdev = req->sb->s_bdev;
+ bio->bi_iter.bi_sector = (block + b)
+ << (msblk->devblksize_log2 - 9);
+ bio->bi_private = bio_req;
+ bio->bi_end_io = squashfs_bio_end_io;
+
+ bio_add_page(bio, bh->b_page, blksz, offset);
+ bio_req->nr_buffers += 1;
+ prev_block = b;
}
+ if (bio)
+ submit_bio(READ, bio);
- for (i = 0; i < b; i++) {
- wait_on_buffer(bh[i]);
- if (!buffer_uptodate(bh[i]))
- goto block_release;
+ if (req->synchronous)
+ squashfs_process_blocks(req);
+ else {
+ INIT_WORK(&req->offload, read_wq_handler);
+ schedule_work(&req->offload);
}
+ return 0;
- if (compressed) {
- length = squashfs_decompress(msblk, bh, b, offset, length,
- output);
- if (length < 0)
- goto read_failure;
- } else {
- /*
- * Block is uncompressed.
- */
- int in, pg_offset = 0;
- void *data = squashfs_first_page(output);
-
- for (bytes = length; k < b; k++) {
- in = min(bytes, msblk->devblksize - offset);
- bytes -= in;
- while (in) {
- if (pg_offset == PAGE_CACHE_SIZE) {
- data = squashfs_next_page(output);
- pg_offset = 0;
- }
- avail = min_t(int, in, PAGE_CACHE_SIZE -
- pg_offset);
- memcpy(data + pg_offset, bh[k]->b_data + offset,
- avail);
- in -= avail;
- pg_offset += avail;
- offset += avail;
- }
- offset = 0;
- put_bh(bh[k]);
- }
- squashfs_finish_page(output);
+bio_alloc_failed:
+ kfree(bio_req);
+req_alloc_failed:
+ unlock_buffer(bh);
+ while (--nr_buffers >= b)
+ if (req->bh[nr_buffers])
+ put_bh(req->bh[nr_buffers]);
+ while (--b >= 0)
+ if (req->bh[b])
+ wait_on_buffer(req->bh[b]);
+getblk_failed:
+ free_read_request(req, -ENOMEM);
+ return -ENOMEM;
+}
+
+static int read_metadata_block(struct squashfs_read_request *req,
+ u64 *next_index)
+{
+ int ret, error, bytes_read = 0, bytes_uncompressed = 0;
+ struct squashfs_sb_info *msblk = req->sb->s_fs_info;
+
+ if (req->index + 2 > msblk->bytes_used) {
+ free_read_request(req, -EINVAL);
+ return -EINVAL;
+ }
+ req->length = 2;
+
+ /* Do not read beyond the end of the device */
+ if (req->index + req->length > msblk->bytes_used)
+ req->length = msblk->bytes_used - req->index;
+ req->data_processing = SQUASHFS_METADATA;
+
+ /*
+ * Reading metadata is always synchronous because we don't know the
+ * length in advance and the function is expected to update
+ * 'next_index' and return the length.
+ */
+ req->synchronous = true;
+ req->res = &error;
+ req->bytes_read = &bytes_read;
+ req->bytes_uncompressed = &bytes_uncompressed;
+
+ TRACE("Metadata block @ 0x%llx, %scompressed size %d, src size %d\n",
+ req->index, req->compressed ? "" : "un", bytes_read,
+ req->output->length);
+
+ ret = squashfs_bio_submit(req);
+ if (ret)
+ return ret;
+ if (error)
+ return error;
+ if (next_index)
+ *next_index += 2 + bytes_read;
+ return bytes_uncompressed;
+}
+
+static int read_data_block(struct squashfs_read_request *req, int length,
+ u64 *next_index, bool synchronous)
+{
+ int ret, error = 0, bytes_uncompressed = 0, bytes_read = 0;
+
+ req->compressed = SQUASHFS_COMPRESSED_BLOCK(length);
+ req->length = length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
+ req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS
+ : SQUASHFS_COPY;
+
+ req->synchronous = synchronous;
+ if (synchronous) {
+ req->res = &error;
+ req->bytes_read = &bytes_read;
+ req->bytes_uncompressed = &bytes_uncompressed;
+ }
+
+ TRACE("Data block @ 0x%llx, %scompressed size %d, src size %d\n",
+ req->index, req->compressed ? "" : "un", req->length,
+ req->output->length);
+
+ ret = squashfs_bio_submit(req);
+ if (ret)
+ return ret;
+ if (synchronous)
+ ret = error ? error : bytes_uncompressed;
+ if (next_index)
+ *next_index += length;
+ return ret;
+}
+
+/*
+ * Read and decompress a metadata block or datablock. Length is non-zero
+ * if a datablock is being read (the size is stored elsewhere in the
+ * filesystem), otherwise the length is obtained from the first two bytes of
+ * the metadata block. A bit in the length field indicates if the block
+ * is stored uncompressed in the filesystem (usually because compression
+ * generated a larger block - this does occasionally happen with compression
+ * algorithms).
+ */
+static int __squashfs_read_data(struct super_block *sb, u64 index, int length,
+ u64 *next_index, struct squashfs_page_actor *output, bool sync)
+{
+ struct squashfs_read_request *req;
+
+ req = kcalloc(1, sizeof(struct squashfs_read_request), GFP_KERNEL);
+ if (!req) {
+ if (!sync)
+ squashfs_page_actor_free(output, -ENOMEM);
+ return -ENOMEM;
+ }
+
+ req->sb = sb;
+ req->index = index;
+ req->output = output;
+
+ if (next_index)
+ *next_index = index;
+
+ if (length)
+ length = read_data_block(req, length, next_index, sync);
+ else
+ length = read_metadata_block(req, next_index);
+
+ if (length < 0) {
+ ERROR("squashfs_read_data failed to read block 0x%llx\n",
+ (unsigned long long)index);
+ return -EIO;
}
- kfree(bh);
return length;
+}
-block_release:
- for (; k < b; k++)
- put_bh(bh[k]);
+int squashfs_read_data(struct super_block *sb, u64 index, int length,
+ u64 *next_index, struct squashfs_page_actor *output)
+{
+ return __squashfs_read_data(sb, index, length, next_index, output,
+ true);
+}
+
+int squashfs_read_data_async(struct super_block *sb, u64 index, int length,
+ u64 *next_index, struct squashfs_page_actor *output)
+{
-read_failure:
- ERROR("squashfs_read_data failed to read block 0x%llx\n",
- (unsigned long long) index);
- kfree(bh);
- return -EIO;
+ return __squashfs_read_data(sb, index, length, next_index, output,
+ false);
}
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 1cb70a0b2168..6785d086ab38 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -209,17 +209,14 @@ void squashfs_cache_put(struct squashfs_cache_entry *entry)
*/
void squashfs_cache_delete(struct squashfs_cache *cache)
{
- int i, j;
+ int i;
if (cache == NULL)
return;
for (i = 0; i < cache->entries; i++) {
- if (cache->entry[i].data) {
- for (j = 0; j < cache->pages; j++)
- kfree(cache->entry[i].data[j]);
- kfree(cache->entry[i].data);
- }
+ if (cache->entry[i].page)
+ free_page_array(cache->entry[i].page, cache->pages);
kfree(cache->entry[i].actor);
}
@@ -236,7 +233,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache)
struct squashfs_cache *squashfs_cache_init(char *name, int entries,
int block_size)
{
- int i, j;
+ int i;
struct squashfs_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL);
if (cache == NULL) {
@@ -268,22 +265,13 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries,
init_waitqueue_head(&cache->entry[i].wait_queue);
entry->cache = cache;
entry->block = SQUASHFS_INVALID_BLK;
- entry->data = kcalloc(cache->pages, sizeof(void *), GFP_KERNEL);
- if (entry->data == NULL) {
+ entry->page = alloc_page_array(cache->pages, GFP_KERNEL);
+ if (!entry->page) {
ERROR("Failed to allocate %s cache entry\n", name);
goto cleanup;
}
-
- for (j = 0; j < cache->pages; j++) {
- entry->data[j] = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
- if (entry->data[j] == NULL) {
- ERROR("Failed to allocate %s buffer\n", name);
- goto cleanup;
- }
- }
-
- entry->actor = squashfs_page_actor_init(entry->data,
- cache->pages, 0);
+ entry->actor = squashfs_page_actor_init(entry->page,
+ cache->pages, 0, NULL);
if (entry->actor == NULL) {
ERROR("Failed to allocate %s cache entry\n", name);
goto cleanup;
@@ -314,18 +302,20 @@ int squashfs_copy_data(void *buffer, struct squashfs_cache_entry *entry,
return min(length, entry->length - offset);
while (offset < entry->length) {
- void *buff = entry->data[offset / PAGE_CACHE_SIZE]
- + (offset % PAGE_CACHE_SIZE);
+ void *buff = kmap_atomic(entry->page[offset / PAGE_CACHE_SIZE])
+ + (offset % PAGE_CACHE_SIZE);
int bytes = min_t(int, entry->length - offset,
PAGE_CACHE_SIZE - (offset % PAGE_CACHE_SIZE));
if (bytes >= remaining) {
memcpy(buffer, buff, remaining);
+ kunmap_atomic(buff);
remaining = 0;
break;
}
memcpy(buffer, buff, bytes);
+ kunmap_atomic(buff);
buffer += bytes;
remaining -= bytes;
offset += bytes;
@@ -416,43 +406,38 @@ struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *sb,
void *squashfs_read_table(struct super_block *sb, u64 block, int length)
{
int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- int i, res;
- void *table, *buffer, **data;
+ struct page **page;
+ void *buff;
+ int res;
struct squashfs_page_actor *actor;
- table = buffer = kmalloc(length, GFP_KERNEL);
- if (table == NULL)
+ page = alloc_page_array(pages, GFP_KERNEL);
+ if (!page)
return ERR_PTR(-ENOMEM);
- data = kcalloc(pages, sizeof(void *), GFP_KERNEL);
- if (data == NULL) {
- res = -ENOMEM;
- goto failed;
- }
-
- actor = squashfs_page_actor_init(data, pages, length);
+ actor = squashfs_page_actor_init(page, pages, length, NULL);
if (actor == NULL) {
res = -ENOMEM;
- goto failed2;
+ goto failed;
}
- for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
- data[i] = buffer;
-
res = squashfs_read_data(sb, block, length |
SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor);
- kfree(data);
- kfree(actor);
-
if (res < 0)
- goto failed;
+ goto failed2;
- return table;
+ buff = kmalloc(length, GFP_KERNEL);
+ if (!buff)
+ goto failed2;
+ squashfs_actor_to_buf(actor, buff, length);
+ squashfs_page_actor_free(actor, 0);
+ free_page_array(page, pages);
+ return buff;
failed2:
- kfree(data);
+ squashfs_page_actor_free(actor, 0);
failed:
- kfree(table);
+ free_page_array(page, pages);
return ERR_PTR(res);
}
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index e9034bf6e5ae..7de35bf297aa 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -24,7 +24,8 @@
#include <linux/types.h>
#include <linux/mutex.h>
#include <linux/slab.h>
-#include <linux/buffer_head.h>
+#include <linux/highmem.h>
+#include <linux/fs.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@@ -94,40 +95,44 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
static void *get_comp_opts(struct super_block *sb, unsigned short flags)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
- void *buffer = NULL, *comp_opts;
+ void *comp_opts, *buffer = NULL;
+ struct page *page;
struct squashfs_page_actor *actor = NULL;
int length = 0;
+ if (!SQUASHFS_COMP_OPTS(flags))
+ return squashfs_comp_opts(msblk, buffer, length);
+
/*
* Read decompressor specific options from file system if present
*/
- if (SQUASHFS_COMP_OPTS(flags)) {
- buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
- if (buffer == NULL) {
- comp_opts = ERR_PTR(-ENOMEM);
- goto out;
- }
-
- actor = squashfs_page_actor_init(&buffer, 1, 0);
- if (actor == NULL) {
- comp_opts = ERR_PTR(-ENOMEM);
- goto out;
- }
-
- length = squashfs_read_data(sb,
- sizeof(struct squashfs_super_block), 0, NULL, actor);
-
- if (length < 0) {
- comp_opts = ERR_PTR(length);
- goto out;
- }
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+
+ actor = squashfs_page_actor_init(&page, 1, 0, NULL);
+ if (actor == NULL) {
+ comp_opts = ERR_PTR(-ENOMEM);
+ goto actor_error;
+ }
+
+ length = squashfs_read_data(sb,
+ sizeof(struct squashfs_super_block), 0, NULL, actor);
+
+ if (length < 0) {
+ comp_opts = ERR_PTR(length);
+ goto read_error;
}
+ buffer = kmap_atomic(page);
comp_opts = squashfs_comp_opts(msblk, buffer, length);
+ kunmap_atomic(buffer);
-out:
- kfree(actor);
- kfree(buffer);
+read_error:
+ squashfs_page_actor_free(actor, 0);
+actor_error:
+ __free_page(page);
return comp_opts;
}
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index e5c9689062ba..6f5ef8d7e55a 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -47,12 +47,16 @@
#include <linux/string.h>
#include <linux/pagemap.h>
#include <linux/mutex.h>
+#include <linux/mm_inline.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
#include "squashfs_fs_i.h"
#include "squashfs.h"
+// Backported from 4.5
+#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
+
/*
* Locate cache slot in range [offset, index] for specified inode. If
* there's more than one return the slot closest to index.
@@ -438,6 +442,21 @@ static int squashfs_readpage_fragment(struct page *page)
return res;
}
+static int squashfs_readpages_fragment(struct page *page,
+ struct list_head *readahead_pages, struct address_space *mapping)
+{
+ if (!page) {
+ page = lru_to_page(readahead_pages);
+ list_del(&page->lru);
+ if (add_to_page_cache_lru(page, mapping, page->index,
+ mapping_gfp_constraint(mapping, GFP_KERNEL))) {
+ put_page(page);
+ return 0;
+ }
+ }
+ return squashfs_readpage_fragment(page);
+}
+
static int squashfs_readpage_sparse(struct page *page, int index, int file_end)
{
struct inode *inode = page->mapping->host;
@@ -450,54 +469,105 @@ static int squashfs_readpage_sparse(struct page *page, int index, int file_end)
return 0;
}
-static int squashfs_readpage(struct file *file, struct page *page)
+static int squashfs_readpages_sparse(struct page *page,
+ struct list_head *readahead_pages, int index, int file_end,
+ struct address_space *mapping)
{
- struct inode *inode = page->mapping->host;
+ if (!page) {
+ page = lru_to_page(readahead_pages);
+ list_del(&page->lru);
+ if (add_to_page_cache_lru(page, mapping, page->index,
+ mapping_gfp_constraint(mapping, GFP_KERNEL))) {
+ put_page(page);
+ return 0;
+ }
+ }
+ return squashfs_readpage_sparse(page, index, file_end);
+}
+
+static int __squashfs_readpages(struct file *file, struct page *page,
+ struct list_head *readahead_pages, unsigned int nr_pages,
+ struct address_space *mapping)
+{
+ struct inode *inode = mapping->host;
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
- int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
int file_end = i_size_read(inode) >> msblk->block_log;
int res;
- void *pageaddr;
- TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
- page->index, squashfs_i(inode)->start);
+ do {
+ struct page *cur_page = page ? page
+ : lru_to_page(readahead_pages);
+ int page_index = cur_page->index;
+ int index = page_index >> (msblk->block_log - PAGE_CACHE_SHIFT);
+
+ if (page_index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT))
+ return 1;
+
+ if (index < file_end || squashfs_i(inode)->fragment_block ==
+ SQUASHFS_INVALID_BLK) {
+ u64 block = 0;
+ int bsize = read_blocklist(inode, index, &block);
+
+ if (bsize < 0)
+ return -1;
+
+ if (bsize == 0) {
+ res = squashfs_readpages_sparse(page,
+ readahead_pages, index, file_end,
+ mapping);
+ } else {
+ res = squashfs_readpages_block(page,
+ readahead_pages, &nr_pages, mapping,
+ page_index, block, bsize);
+ }
+ } else {
+ res = squashfs_readpages_fragment(page,
+ readahead_pages, mapping);
+ }
+ if (res)
+ return 0;
+ page = NULL;
+ } while (readahead_pages && !list_empty(readahead_pages));
- if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT))
- goto out;
+ return 0;
+}
- if (index < file_end || squashfs_i(inode)->fragment_block ==
- SQUASHFS_INVALID_BLK) {
- u64 block = 0;
- int bsize = read_blocklist(inode, index, &block);
- if (bsize < 0)
- goto error_out;
+static int squashfs_readpage(struct file *file, struct page *page)
+{
+ int ret;
- if (bsize == 0)
- res = squashfs_readpage_sparse(page, index, file_end);
+ TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
+ page->index, squashfs_i(page->mapping->host)->start);
+
+ get_page(page);
+
+ ret = __squashfs_readpages(file, page, NULL, 1, page->mapping);
+ if (ret) {
+ flush_dcache_page(page);
+ if (ret < 0)
+ SetPageError(page);
else
- res = squashfs_readpage_block(page, block, bsize);
- } else
- res = squashfs_readpage_fragment(page);
-
- if (!res)
- return 0;
-
-error_out:
- SetPageError(page);
-out:
- pageaddr = kmap_atomic(page);
- memset(pageaddr, 0, PAGE_CACHE_SIZE);
- kunmap_atomic(pageaddr);
- flush_dcache_page(page);
- if (!PageError(page))
- SetPageUptodate(page);
- unlock_page(page);
+ SetPageUptodate(page);
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ unlock_page(page);
+ put_page(page);
+ }
return 0;
}
+static int squashfs_readpages(struct file *file, struct address_space *mapping,
+ struct list_head *pages, unsigned int nr_pages)
+{
+ TRACE("Entered squashfs_readpages, %u pages, first page index %lx\n",
+ nr_pages, lru_to_page(pages)->index);
+ __squashfs_readpages(file, NULL, pages, nr_pages, mapping);
+ return 0;
+}
+
const struct address_space_operations squashfs_aops = {
- .readpage = squashfs_readpage
+ .readpage = squashfs_readpage,
+ .readpages = squashfs_readpages,
};
diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c
deleted file mode 100644
index f2310d2a2019..000000000000
--- a/fs/squashfs/file_cache.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2013
- * Phillip Lougher <phillip@squashfs.org.uk>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- */
-
-#include <linux/fs.h>
-#include <linux/vfs.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/pagemap.h>
-#include <linux/mutex.h>
-
-#include "squashfs_fs.h"
-#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
-#include "squashfs.h"
-
-/* Read separately compressed datablock and memcopy into page cache */
-int squashfs_readpage_block(struct page *page, u64 block, int bsize)
-{
- struct inode *i = page->mapping->host;
- struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
- block, bsize);
- int res = buffer->error;
-
- if (res)
- ERROR("Unable to read page, block %llx, size %x\n", block,
- bsize);
- else
- squashfs_copy_cache(page, buffer, buffer->length, 0);
-
- squashfs_cache_put(buffer);
- return res;
-}
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index 43e7a7eddac0..c97af4c6ccd0 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -13,6 +13,7 @@
#include <linux/string.h>
#include <linux/pagemap.h>
#include <linux/mutex.h>
+#include <linux/mm_inline.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@@ -20,157 +21,139 @@
#include "squashfs.h"
#include "page_actor.h"
-static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
- int pages, struct page **page);
-
-/* Read separately compressed datablock directly into page cache */
-int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
+// Backported from 4.5
+#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
+static void release_actor_pages(struct page **page, int pages, int error)
{
- struct inode *inode = target_page->mapping->host;
- struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ int i;
- int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
- int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
- int start_index = target_page->index & ~mask;
- int end_index = start_index | mask;
- int i, n, pages, missing_pages, bytes, res = -ENOMEM;
+ for (i = 0; i < pages; i++) {
+ if (!page[i])
+ continue;
+ flush_dcache_page(page[i]);
+ if (!error)
+ SetPageUptodate(page[i]);
+ else {
+ SetPageError(page[i]);
+ zero_user_segment(page[i], 0, PAGE_CACHE_SIZE);
+ }
+ unlock_page(page[i]);
+ put_page(page[i]);
+ }
+ kfree(page);
+}
+
+/*
+ * Create a "page actor" which will kmap and kunmap the
+ * page cache pages appropriately within the decompressor
+ */
+static struct squashfs_page_actor *actor_from_page_cache(
+ unsigned int actor_pages, struct page *target_page,
+ struct list_head *rpages, unsigned int *nr_pages, int start_index,
+ struct address_space *mapping)
+{
struct page **page;
struct squashfs_page_actor *actor;
- void *pageaddr;
-
- if (end_index > file_end)
- end_index = file_end;
-
- pages = end_index - start_index + 1;
-
- page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL);
- if (page == NULL)
- return res;
-
- /*
- * Create a "page actor" which will kmap and kunmap the
- * page cache pages appropriately within the decompressor
- */
- actor = squashfs_page_actor_init_special(page, pages, 0);
- if (actor == NULL)
- goto out;
-
- /* Try to grab all the pages covered by the Squashfs block */
- for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) {
- page[i] = (n == target_page->index) ? target_page :
- grab_cache_page_nowait(target_page->mapping, n);
+ int i, n;
+ gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
+
+ page = kmalloc_array(actor_pages, sizeof(void *), GFP_KERNEL);
+ if (!page)
+ return NULL;
+
+ for (i = 0, n = start_index; i < actor_pages; i++, n++) {
+ if (target_page == NULL && rpages && !list_empty(rpages)) {
+ struct page *cur_page = lru_to_page(rpages);
+
+ if (cur_page->index < start_index + actor_pages) {
+ list_del(&cur_page->lru);
+ --(*nr_pages);
+ if (add_to_page_cache_lru(cur_page, mapping,
+ cur_page->index, gfp))
+ put_page(cur_page);
+ else
+ target_page = cur_page;
+ } else
+ rpages = NULL;
+ }
- if (page[i] == NULL) {
- missing_pages++;
- continue;
+ if (target_page && target_page->index == n) {
+ page[i] = target_page;
+ target_page = NULL;
+ } else {
+ page[i] = grab_cache_page_nowait(mapping, n);
+ if (page[i] == NULL)
+ continue;
}
if (PageUptodate(page[i])) {
unlock_page(page[i]);
- page_cache_release(page[i]);
+ put_page(page[i]);
page[i] = NULL;
- missing_pages++;
}
}
- if (missing_pages) {
- /*
- * Couldn't get one or more pages, this page has either
- * been VM reclaimed, but others are still in the page cache
- * and uptodate, or we're racing with another thread in
- * squashfs_readpage also trying to grab them. Fall back to
- * using an intermediate buffer.
- */
- res = squashfs_read_cache(target_page, block, bsize, pages,
- page);
- if (res < 0)
- goto mark_errored;
-
- goto out;
+ actor = squashfs_page_actor_init(page, actor_pages, 0,
+ release_actor_pages);
+ if (!actor) {
+ release_actor_pages(page, actor_pages, -ENOMEM);
+ kfree(page);
+ return NULL;
}
-
- /* Decompress directly into the page cache buffers */
- res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
- if (res < 0)
- goto mark_errored;
-
- /* Last page may have trailing bytes not filled */
- bytes = res % PAGE_CACHE_SIZE;
- if (bytes) {
- pageaddr = kmap_atomic(page[pages - 1]);
- memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
- kunmap_atomic(pageaddr);
- }
-
- /* Mark pages as uptodate, unlock and release */
- for (i = 0; i < pages; i++) {
- flush_dcache_page(page[i]);
- SetPageUptodate(page[i]);
- unlock_page(page[i]);
- if (page[i] != target_page)
- page_cache_release(page[i]);
- }
-
- kfree(actor);
- kfree(page);
-
- return 0;
-
-mark_errored:
- /* Decompression failed, mark pages as errored. Target_page is
- * dealt with by the caller
- */
- for (i = 0; i < pages; i++) {
- if (page[i] == NULL || page[i] == target_page)
- continue;
- flush_dcache_page(page[i]);
- SetPageError(page[i]);
- unlock_page(page[i]);
- page_cache_release(page[i]);
- }
-
-out:
- kfree(actor);
- kfree(page);
- return res;
+ return actor;
}
+int squashfs_readpages_block(struct page *target_page,
+ struct list_head *readahead_pages,
+ unsigned int *nr_pages,
+ struct address_space *mapping,
+ int page_index, u64 block, int bsize)
-static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
- int pages, struct page **page)
{
- struct inode *i = target_page->mapping->host;
- struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
- block, bsize);
- int bytes = buffer->length, res = buffer->error, n, offset = 0;
- void *pageaddr;
-
- if (res) {
- ERROR("Unable to read page, block %llx, size %x\n", block,
- bsize);
- goto out;
- }
-
- for (n = 0; n < pages && bytes > 0; n++,
- bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
- int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
-
- if (page[n] == NULL)
- continue;
+ struct squashfs_page_actor *actor;
+ struct inode *inode = mapping->host;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ int start_index, end_index, file_end, actor_pages, res;
+ int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
- pageaddr = kmap_atomic(page[n]);
- squashfs_copy_data(pageaddr, buffer, offset, avail);
- memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
- kunmap_atomic(pageaddr);
- flush_dcache_page(page[n]);
- SetPageUptodate(page[n]);
- unlock_page(page[n]);
- if (page[n] != target_page)
- page_cache_release(page[n]);
+ /*
+ * If readpage() is called on an uncompressed datablock, we can just
+ * read the pages instead of fetching the whole block.
+ * This greatly improves the performance when a process keep doing
+ * random reads because we only fetch the necessary data.
+ * The readahead algorithm will take care of doing speculative reads
+ * if necessary.
+ * We can't read more than 1 block even if readahead provides use more
+ * pages because we don't know yet if the next block is compressed or
+ * not.
+ */
+ if (bsize && !SQUASHFS_COMPRESSED_BLOCK(bsize)) {
+ u64 block_end = block + msblk->block_size;
+
+ block += (page_index & mask) * PAGE_CACHE_SIZE;
+ actor_pages = (block_end - block) / PAGE_CACHE_SIZE;
+ if (*nr_pages < actor_pages)
+ actor_pages = *nr_pages;
+ start_index = page_index;
+ bsize = min_t(int, bsize, (PAGE_CACHE_SIZE * actor_pages)
+ | SQUASHFS_COMPRESSED_BIT_BLOCK);
+ } else {
+ file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+ start_index = page_index & ~mask;
+ end_index = start_index | mask;
+ if (end_index > file_end)
+ end_index = file_end;
+ actor_pages = end_index - start_index + 1;
}
-out:
- squashfs_cache_put(buffer);
- return res;
+ actor = actor_from_page_cache(actor_pages, target_page,
+ readahead_pages, nr_pages, start_index,
+ mapping);
+ if (!actor)
+ return -ENOMEM;
+
+ res = squashfs_read_data_async(inode->i_sb, block, bsize, NULL,
+ actor);
+ return res < 0 ? res : 0;
}
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index c31e2bc9c081..df4fa3c7ddd0 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c
@@ -94,39 +94,17 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
struct buffer_head **bh, int b, int offset, int length,
struct squashfs_page_actor *output)
{
- struct squashfs_lz4 *stream = strm;
- void *buff = stream->input, *data;
- int avail, i, bytes = length, res;
+ int res;
size_t dest_len = output->length;
+ struct squashfs_lz4 *stream = strm;
- for (i = 0; i < b; i++) {
- avail = min(bytes, msblk->devblksize - offset);
- memcpy(buff, bh[i]->b_data + offset, avail);
- buff += avail;
- bytes -= avail;
- offset = 0;
- put_bh(bh[i]);
- }
-
+ squashfs_bh_to_buf(bh, b, stream->input, offset, length,
+ msblk->devblksize);
res = lz4_decompress_unknownoutputsize(stream->input, length,
stream->output, &dest_len);
if (res)
return -EIO;
-
- bytes = dest_len;
- data = squashfs_first_page(output);
- buff = stream->output;
- while (data) {
- if (bytes <= PAGE_CACHE_SIZE) {
- memcpy(data, buff, bytes);
- break;
- }
- memcpy(data, buff, PAGE_CACHE_SIZE);
- buff += PAGE_CACHE_SIZE;
- bytes -= PAGE_CACHE_SIZE;
- data = squashfs_next_page(output);
- }
- squashfs_finish_page(output);
+ squashfs_buf_to_actor(stream->output, output, dest_len);
return dest_len;
}
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 244b9fbfff7b..2c844d53a59e 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -79,45 +79,19 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
struct buffer_head **bh, int b, int offset, int length,
struct squashfs_page_actor *output)
{
- struct squashfs_lzo *stream = strm;
- void *buff = stream->input, *data;
- int avail, i, bytes = length, res;
+ int res;
size_t out_len = output->length;
+ struct squashfs_lzo *stream = strm;
- for (i = 0; i < b; i++) {
- avail = min(bytes, msblk->devblksize - offset);
- memcpy(buff, bh[i]->b_data + offset, avail);
- buff += avail;
- bytes -= avail;
- offset = 0;
- put_bh(bh[i]);
- }
-
+ squashfs_bh_to_buf(bh, b, stream->input, offset, length,
+ msblk->devblksize);
res = lzo1x_decompress_safe(stream->input, (size_t)length,
stream->output, &out_len);
if (res != LZO_E_OK)
- goto failed;
+ return -EIO;
+ squashfs_buf_to_actor(stream->output, output, out_len);
- res = bytes = (int)out_len;
- data = squashfs_first_page(output);
- buff = stream->output;
- while (data) {
- if (bytes <= PAGE_CACHE_SIZE) {
- memcpy(data, buff, bytes);
- break;
- } else {
- memcpy(data, buff, PAGE_CACHE_SIZE);
- buff += PAGE_CACHE_SIZE;
- bytes -= PAGE_CACHE_SIZE;
- data = squashfs_next_page(output);
- }
- }
- squashfs_finish_page(output);
-
- return res;
-
-failed:
- return -EIO;
+ return out_len;
}
const struct squashfs_decompressor squashfs_lzo_comp_ops = {
diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
index 5a1c11f56441..53863508e400 100644
--- a/fs/squashfs/page_actor.c
+++ b/fs/squashfs/page_actor.c
@@ -9,39 +9,11 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
+#include <linux/buffer_head.h>
#include "page_actor.h"
-/*
- * This file contains implementations of page_actor for decompressing into
- * an intermediate buffer, and for decompressing directly into the
- * page cache.
- *
- * Calling code should avoid sleeping between calls to squashfs_first_page()
- * and squashfs_finish_page().
- */
-
-/* Implementation of page_actor for decompressing into intermediate buffer */
-static void *cache_first_page(struct squashfs_page_actor *actor)
-{
- actor->next_page = 1;
- return actor->buffer[0];
-}
-
-static void *cache_next_page(struct squashfs_page_actor *actor)
-{
- if (actor->next_page == actor->pages)
- return NULL;
-
- return actor->buffer[actor->next_page++];
-}
-
-static void cache_finish_page(struct squashfs_page_actor *actor)
-{
- /* empty */
-}
-
-struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
- int pages, int length)
+struct squashfs_page_actor *squashfs_page_actor_init(struct page **page,
+ int pages, int length, void (*release_pages)(struct page **, int, int))
{
struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
@@ -49,52 +21,133 @@ struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
return NULL;
actor->length = length ? : pages * PAGE_CACHE_SIZE;
- actor->buffer = buffer;
+ actor->page = page;
actor->pages = pages;
actor->next_page = 0;
- actor->squashfs_first_page = cache_first_page;
- actor->squashfs_next_page = cache_next_page;
- actor->squashfs_finish_page = cache_finish_page;
+ actor->pageaddr = NULL;
+ actor->release_pages = release_pages;
return actor;
}
-/* Implementation of page_actor for decompressing directly into page cache. */
-static void *direct_first_page(struct squashfs_page_actor *actor)
+void squashfs_page_actor_free(struct squashfs_page_actor *actor, int error)
+{
+ if (!actor)
+ return;
+
+ if (actor->release_pages)
+ actor->release_pages(actor->page, actor->pages, error);
+ kfree(actor);
+}
+
+void squashfs_actor_to_buf(struct squashfs_page_actor *actor, void *buf,
+ int length)
{
- actor->next_page = 1;
- return actor->pageaddr = kmap_atomic(actor->page[0]);
+ void *pageaddr;
+ int pos = 0, avail, i;
+
+ for (i = 0; i < actor->pages && pos < length; ++i) {
+ avail = min_t(int, length - pos, PAGE_CACHE_SIZE);
+ if (actor->page[i]) {
+ pageaddr = kmap_atomic(actor->page[i]);
+ memcpy(buf + pos, pageaddr, avail);
+ kunmap_atomic(pageaddr);
+ }
+ pos += avail;
+ }
}
-static void *direct_next_page(struct squashfs_page_actor *actor)
+void squashfs_buf_to_actor(void *buf, struct squashfs_page_actor *actor,
+ int length)
{
- if (actor->pageaddr)
- kunmap_atomic(actor->pageaddr);
+ void *pageaddr;
+ int pos = 0, avail, i;
+
+ for (i = 0; i < actor->pages && pos < length; ++i) {
+ avail = min_t(int, length - pos, PAGE_CACHE_SIZE);
+ if (actor->page[i]) {
+ pageaddr = kmap_atomic(actor->page[i]);
+ memcpy(pageaddr, buf + pos, avail);
+ kunmap_atomic(pageaddr);
+ }
+ pos += avail;
+ }
+}
- return actor->pageaddr = actor->next_page == actor->pages ? NULL :
- kmap_atomic(actor->page[actor->next_page++]);
+void squashfs_bh_to_actor(struct buffer_head **bh, int nr_buffers,
+ struct squashfs_page_actor *actor, int offset, int length, int blksz)
+{
+ void *kaddr = NULL;
+ int bytes = 0, pgoff = 0, b = 0, p = 0, avail, i;
+
+ while (bytes < length) {
+ if (actor->page[p]) {
+ kaddr = kmap_atomic(actor->page[p]);
+ while (pgoff < PAGE_CACHE_SIZE && bytes < length) {
+ avail = min_t(int, blksz - offset,
+ PAGE_CACHE_SIZE - pgoff);
+ memcpy(kaddr + pgoff, bh[b]->b_data + offset,
+ avail);
+ pgoff += avail;
+ bytes += avail;
+ offset = (offset + avail) % blksz;
+ if (!offset) {
+ put_bh(bh[b]);
+ ++b;
+ }
+ }
+ kunmap_atomic(kaddr);
+ pgoff = 0;
+ } else {
+ for (i = 0; i < PAGE_CACHE_SIZE / blksz; ++i) {
+ if (bh[b])
+ put_bh(bh[b]);
+ ++b;
+ }
+ bytes += PAGE_CACHE_SIZE;
+ }
+ ++p;
+ }
}
-static void direct_finish_page(struct squashfs_page_actor *actor)
+void squashfs_bh_to_buf(struct buffer_head **bh, int nr_buffers, void *buf,
+ int offset, int length, int blksz)
{
- if (actor->pageaddr)
- kunmap_atomic(actor->pageaddr);
+ int i, avail, bytes = 0;
+
+ for (i = 0; i < nr_buffers && bytes < length; ++i) {
+ avail = min_t(int, length - bytes, blksz - offset);
+ if (bh[i]) {
+ memcpy(buf + bytes, bh[i]->b_data + offset, avail);
+ put_bh(bh[i]);
+ }
+ bytes += avail;
+ offset = 0;
+ }
}
-struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page,
- int pages, int length)
+void free_page_array(struct page **page, int nr_pages)
{
- struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+ int i;
- if (actor == NULL)
- return NULL;
+ for (i = 0; i < nr_pages; ++i)
+ __free_page(page[i]);
+ kfree(page);
+}
- actor->length = length ? : pages * PAGE_CACHE_SIZE;
- actor->page = page;
- actor->pages = pages;
- actor->next_page = 0;
- actor->pageaddr = NULL;
- actor->squashfs_first_page = direct_first_page;
- actor->squashfs_next_page = direct_next_page;
- actor->squashfs_finish_page = direct_finish_page;
- return actor;
+struct page **alloc_page_array(int nr_pages, int gfp_mask)
+{
+ int i;
+ struct page **page;
+
+ page = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
+ if (!page)
+ return NULL;
+ for (i = 0; i < nr_pages; ++i) {
+ page[i] = alloc_page(gfp_mask);
+ if (!page[i]) {
+ free_page_array(page, i);
+ return NULL;
+ }
+ }
+ return page;
}
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
index 26dd82008b82..aa1ed790b5a3 100644
--- a/fs/squashfs/page_actor.h
+++ b/fs/squashfs/page_actor.h
@@ -5,77 +5,61 @@
* Phillip Lougher <phillip@squashfs.org.uk>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
+ * the COPYING file in the top-level squashfsory.
*/
-#ifndef CONFIG_SQUASHFS_FILE_DIRECT
struct squashfs_page_actor {
- void **page;
+ struct page **page;
+ void *pageaddr;
int pages;
int length;
int next_page;
+ void (*release_pages)(struct page **, int, int);
};
-static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page,
- int pages, int length)
-{
- struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
-
- if (actor == NULL)
- return NULL;
+extern struct squashfs_page_actor *squashfs_page_actor_init(struct page **,
+ int, int, void (*)(struct page **, int, int));
+extern void squashfs_page_actor_free(struct squashfs_page_actor *, int);
- actor->length = length ? : pages * PAGE_CACHE_SIZE;
- actor->page = page;
- actor->pages = pages;
- actor->next_page = 0;
- return actor;
-}
+extern void squashfs_actor_to_buf(struct squashfs_page_actor *, void *, int);
+extern void squashfs_buf_to_actor(void *, struct squashfs_page_actor *, int);
+extern void squashfs_bh_to_actor(struct buffer_head **, int,
+ struct squashfs_page_actor *, int, int, int);
+extern void squashfs_bh_to_buf(struct buffer_head **, int, void *, int, int,
+ int);
+/*
+ * Calling code should avoid sleeping between calls to squashfs_first_page()
+ * and squashfs_finish_page().
+ */
static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
{
actor->next_page = 1;
- return actor->page[0];
+ return actor->pageaddr = actor->page[0] ? kmap_atomic(actor->page[0])
+ : NULL;
}
static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
{
- return actor->next_page == actor->pages ? NULL :
- actor->page[actor->next_page++];
-}
+ if (!IS_ERR_OR_NULL(actor->pageaddr))
+ kunmap_atomic(actor->pageaddr);
-static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
-{
- /* empty */
-}
-#else
-struct squashfs_page_actor {
- union {
- void **buffer;
- struct page **page;
- };
- void *pageaddr;
- void *(*squashfs_first_page)(struct squashfs_page_actor *);
- void *(*squashfs_next_page)(struct squashfs_page_actor *);
- void (*squashfs_finish_page)(struct squashfs_page_actor *);
- int pages;
- int length;
- int next_page;
-};
+ if (actor->next_page == actor->pages)
+ return actor->pageaddr = ERR_PTR(-ENODATA);
-extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int);
-extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page
- **, int, int);
-static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
-{
- return actor->squashfs_first_page(actor);
-}
-static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
-{
- return actor->squashfs_next_page(actor);
+ actor->pageaddr = actor->page[actor->next_page] ?
+ kmap_atomic(actor->page[actor->next_page]) : NULL;
+ ++actor->next_page;
+ return actor->pageaddr;
}
+
static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
{
- actor->squashfs_finish_page(actor);
+ if (!IS_ERR_OR_NULL(actor->pageaddr))
+ kunmap_atomic(actor->pageaddr);
}
-#endif
+
+extern struct page **alloc_page_array(int, int);
+extern void free_page_array(struct page **, int);
+
#endif
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 887d6d270080..6093579c6c5d 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -28,8 +28,14 @@
#define WARNING(s, args...) pr_warn("SQUASHFS: "s, ## args)
/* block.c */
+extern int squashfs_init_read_wq(void);
+extern void squashfs_destroy_read_wq(void);
extern int squashfs_read_data(struct super_block *, u64, int, u64 *,
struct squashfs_page_actor *);
+extern int squashfs_read_data(struct super_block *, u64, int, u64 *,
+ struct squashfs_page_actor *);
+extern int squashfs_read_data_async(struct super_block *, u64, int, u64 *,
+ struct squashfs_page_actor *);
/* cache.c */
extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
@@ -70,8 +76,9 @@ extern __le64 *squashfs_read_fragment_index_table(struct super_block *,
void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int,
int);
-/* file_xxx.c */
-extern int squashfs_readpage_block(struct page *, u64, int);
+/* file_direct.c */
+extern int squashfs_readpages_block(struct page *, struct list_head *,
+ unsigned int *, struct address_space *, int, u64, int);
/* id.c */
extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *);
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 1da565cb50c3..8a6995de0277 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -49,7 +49,7 @@ struct squashfs_cache_entry {
int num_waiters;
wait_queue_head_t wait_queue;
struct squashfs_cache *cache;
- void **data;
+ struct page **page;
struct squashfs_page_actor *actor;
};
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 5056babe00df..61cd0b39ed0e 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -444,9 +444,15 @@ static int __init init_squashfs_fs(void)
if (err)
return err;
+ if (!squashfs_init_read_wq()) {
+ destroy_inodecache();
+ return -ENOMEM;
+ }
+
err = register_filesystem(&squashfs_fs_type);
if (err) {
destroy_inodecache();
+ squashfs_destroy_read_wq();
return err;
}
@@ -460,6 +466,7 @@ static void __exit exit_squashfs_fs(void)
{
unregister_filesystem(&squashfs_fs_type);
destroy_inodecache();
+ squashfs_destroy_read_wq();
}
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index c609624e4b8a..14cd373e1897 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -55,7 +55,7 @@ static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk,
struct comp_opts *opts;
int err = 0, n;
- opts = kmalloc(sizeof(*opts), GFP_KERNEL);
+ opts = kmalloc(sizeof(*opts), GFP_ATOMIC);
if (opts == NULL) {
err = -ENOMEM;
goto out2;
@@ -136,6 +136,7 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
enum xz_ret xz_err;
int avail, total = 0, k = 0;
struct squashfs_xz *stream = strm;
+ void *buf = NULL;
xz_dec_reset(stream->state);
stream->buf.in_pos = 0;
@@ -156,12 +157,20 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
if (stream->buf.out_pos == stream->buf.out_size) {
stream->buf.out = squashfs_next_page(output);
- if (stream->buf.out != NULL) {
+ if (!IS_ERR(stream->buf.out)) {
stream->buf.out_pos = 0;
total += PAGE_CACHE_SIZE;
}
}
+ if (!stream->buf.out) {
+ if (!buf) {
+ buf = kmalloc(PAGE_CACHE_SIZE, GFP_ATOMIC);
+ if (!buf)
+ goto out;
+ }
+ stream->buf.out = buf;
+ }
xz_err = xz_dec_run(stream->state, &stream->buf);
if (stream->buf.in_pos == stream->buf.in_size && k < b)
@@ -173,11 +182,13 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
if (xz_err != XZ_STREAM_END || k < b)
goto out;
+ kfree(buf);
return total + stream->buf.out_pos;
out:
for (; k < b; k++)
put_bh(bh[k]);
+ kfree(buf);
return -EIO;
}
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 8727caba6882..09c892b5308e 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -66,6 +66,7 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
struct buffer_head **bh, int b, int offset, int length,
struct squashfs_page_actor *output)
{
+ void *buf = NULL;
int zlib_err, zlib_init = 0, k = 0;
z_stream *stream = strm;
@@ -84,10 +85,19 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
if (stream->avail_out == 0) {
stream->next_out = squashfs_next_page(output);
- if (stream->next_out != NULL)
+ if (!IS_ERR(stream->next_out))
stream->avail_out = PAGE_CACHE_SIZE;
}
+ if (!stream->next_out) {
+ if (!buf) {
+ buf = kmalloc(PAGE_CACHE_SIZE, GFP_ATOMIC);
+ if (!buf)
+ goto out;
+ }
+ stream->next_out = buf;
+ }
+
if (!zlib_init) {
zlib_err = zlib_inflateInit(stream);
if (zlib_err != Z_OK) {
@@ -115,11 +125,13 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
if (k < b)
goto out;
+ kfree(buf);
return stream->total_out;
out:
for (; k < b; k++)
put_bh(bh[k]);
+ kfree(buf);
return -EIO;
}
diff --git a/fs/super.c b/fs/super.c
index d4d2591b77c8..c96434ea71e2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -703,7 +703,8 @@ rescan:
}
/**
- * do_remount_sb - asks filesystem to change mount options.
+ * do_remount_sb2 - asks filesystem to change mount options.
+ * @mnt: mount we are looking at
* @sb: superblock in question
* @flags: numeric part of options
* @data: the rest of options
@@ -711,7 +712,7 @@ rescan:
*
* Alters the mount options of a mounted file system.
*/
-int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
+int do_remount_sb2(struct vfsmount *mnt, struct super_block *sb, int flags, void *data, int force)
{
int retval;
int remount_ro;
@@ -753,7 +754,16 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
}
}
- if (sb->s_op->remount_fs) {
+ if (mnt && sb->s_op->remount_fs2) {
+ retval = sb->s_op->remount_fs2(mnt, sb, &flags, data);
+ if (retval) {
+ if (!force)
+ goto cancel_readonly;
+ /* If forced remount, go ahead despite any errors */
+ WARN(1, "forced remount of a %s fs returned %i\n",
+ sb->s_type->name, retval);
+ }
+ } else if (sb->s_op->remount_fs) {
retval = sb->s_op->remount_fs(sb, &flags, data);
if (retval) {
if (!force)
@@ -785,12 +795,17 @@ cancel_readonly:
return retval;
}
+int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
+{
+ return do_remount_sb2(NULL, sb, flags, data, force);
+}
+
static void do_emergency_remount(struct work_struct *work)
{
struct super_block *sb, *p = NULL;
spin_lock(&sb_lock);
- list_for_each_entry(sb, &super_blocks, s_list) {
+ list_for_each_entry_reverse(sb, &super_blocks, s_list) {
if (hlist_unhashed(&sb->s_instances))
continue;
sb->s_count++;
@@ -1104,7 +1119,7 @@ struct dentry *mount_single(struct file_system_type *fs_type,
EXPORT_SYMBOL(mount_single);
struct dentry *
-mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
+mount_fs(struct file_system_type *type, int flags, const char *name, struct vfsmount *mnt, void *data)
{
struct dentry *root;
struct super_block *sb;
@@ -1121,7 +1136,10 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
goto out_free_secdata;
}
- root = type->mount(type, flags, name, data);
+ if (type->mount2)
+ root = type->mount2(mnt, type, flags, name, data);
+ else
+ root = type->mount(type, flags, name, data);
if (IS_ERR(root)) {
error = PTR_ERR(root);
goto out_free_secdata;
diff --git a/fs/sync.c b/fs/sync.c
index dd5d1711c7ac..452179e31c39 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -218,6 +218,7 @@ static int do_fsync(unsigned int fd, int datasync)
if (f.file) {
ret = vfs_fsync(f.file, datasync);
fdput(f);
+ inc_syscfs(current);
}
return ret;
}
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 66cdb44616d5..d473e6e07a7e 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -457,7 +457,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
new_flags, vma->anon_vma,
vma->vm_file, vma->vm_pgoff,
vma_policy(vma),
- NULL_VM_UFFD_CTX);
+ NULL_VM_UFFD_CTX,
+ vma_get_anon_name(vma));
if (prev)
vma = prev;
else
@@ -833,7 +834,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
prev = vma_merge(mm, prev, start, vma_end, new_flags,
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
vma_policy(vma),
- ((struct vm_userfaultfd_ctx){ ctx }));
+ ((struct vm_userfaultfd_ctx){ ctx }),
+ vma_get_anon_name(vma));
if (prev) {
vma = prev;
goto next;
@@ -967,7 +969,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
prev = vma_merge(mm, prev, start, vma_end, new_flags,
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
vma_policy(vma),
- NULL_VM_UFFD_CTX);
+ NULL_VM_UFFD_CTX,
+ vma_get_anon_name(vma));
if (prev) {
vma = prev;
goto next;
diff --git a/fs/utimes.c b/fs/utimes.c
index cb771c30d102..a35e909cf8e3 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -91,7 +91,7 @@ static int utimes_common(struct path *path, struct timespec *times)
}
retry_deleg:
mutex_lock(&inode->i_mutex);
- error = notify_change(path->dentry, &newattrs, &delegated_inode);
+ error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode);
mutex_unlock(&inode->i_mutex);
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);