summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig5
-rw-r--r--fs/aio.c5
-rw-r--r--fs/block_dev.c32
-rw-r--r--fs/btrfs/inode.c16
-rw-r--r--fs/buffer.c60
-rw-r--r--fs/cifs/file.c8
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/dcache.c24
-rw-r--r--fs/direct-io.c14
-rw-r--r--fs/drop_caches.c2
-rw-r--r--fs/ecryptfs/Makefile2
-rw-r--r--fs/ecryptfs/crypto.c193
-rw-r--r--fs/ecryptfs/debug.c29
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h83
-rw-r--r--fs/ecryptfs/events.c393
-rw-r--r--fs/ecryptfs/file.c32
-rw-r--r--fs/ecryptfs/inode.c14
-rw-r--r--fs/ecryptfs/keystore.c94
-rw-r--r--fs/ecryptfs/main.c64
-rw-r--r--fs/ecryptfs/mmap.c6
-rw-r--r--fs/ecryptfs/super.c14
-rw-r--r--fs/ext2/namei.c6
-rw-r--r--fs/ext4/Kconfig10
-rw-r--r--fs/ext4/Makefile2
-rw-r--r--fs/ext4/crypto.c9
-rw-r--r--fs/ext4/crypto_key.c98
-rw-r--r--fs/ext4/dir.c7
-rw-r--r--fs/ext4/ext4.h35
-rw-r--r--fs/ext4/ext4_crypto.h7
-rw-r--r--fs/ext4/ext4_ice.c109
-rw-r--r--fs/ext4/ext4_ice.h104
-rw-r--r--fs/ext4/extents.c12
-rw-r--r--fs/ext4/inline.c10
-rw-r--r--fs/ext4/inode.c37
-rw-r--r--fs/ext4/namei.c21
-rw-r--r--fs/ext4/page-io.c7
-rw-r--r--fs/ext4/readpage.c16
-rw-r--r--fs/f2fs/namei.c12
-rw-r--r--fs/fat/fatent.c7
-rw-r--r--fs/fat/inode.c5
-rw-r--r--fs/file_table.c137
-rw-r--r--fs/fuse/Makefile2
-rw-r--r--fs/fuse/dev.c13
-rw-r--r--fs/fuse/dir.c3
-rw-r--r--fs/fuse/file.c83
-rw-r--r--fs/fuse/fuse_i.h11
-rw-r--r--fs/fuse/fuse_passthrough.h31
-rw-r--r--fs/fuse/inode.c8
-rw-r--r--fs/fuse/passthrough.c132
-rw-r--r--fs/gfs2/acl.c5
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/inode.c6
-rw-r--r--fs/internal.h26
-rw-r--r--fs/jffs2/dir.c12
-rw-r--r--fs/jfs/namei.c12
-rw-r--r--fs/mbcache.c33
-rw-r--r--fs/namei.c16
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nilfs2/namei.c6
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/proc/array.c6
-rw-r--r--fs/proc/base.c221
-rw-r--r--fs/proc/internal.h1
-rw-r--r--fs/proc/meminfo.c7
-rw-r--r--fs/proc/task_mmu.c238
-rw-r--r--fs/reiserfs/namei.c12
-rw-r--r--fs/sdcardfs/inode.c2
-rw-r--r--fs/sdcardfs/main.c2
-rw-r--r--fs/sdcardfs/super.c2
-rw-r--r--fs/super.c2
-rw-r--r--fs/timerfd.c32
-rw-r--r--fs/ubifs/key.h6
-rw-r--r--fs/ubifs/xattr.c4
-rw-r--r--fs/udf/namei.c6
-rw-r--r--fs/ufs/namei.c6
-rw-r--r--fs/xfs/xfs_acl.c4
76 files changed, 2430 insertions, 270 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 80af05163579..89ddd182f568 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -284,4 +284,9 @@ endif # NETWORK_FILESYSTEMS
source "fs/nls/Kconfig"
source "fs/dlm/Kconfig"
+config FILE_TABLE_DEBUG
+ bool "Enable FILE_TABLE_DEBUG"
+ help
+ This option enables debug of the open files using a global filetable
+
endmenu
diff --git a/fs/aio.c b/fs/aio.c
index c283eb03cb38..3fe07571f942 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -262,6 +262,7 @@ static int __init aio_setup(void)
aio_mnt = kern_mount(&aio_fs);
if (IS_ERR(aio_mnt))
panic("Failed to create aio fs mount.");
+ aio_mnt->mnt_flags |= MNT_NOEXEC;
kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
@@ -1336,7 +1337,7 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
{
struct kioctx *ioctx = NULL;
- unsigned long ctx;
+ unsigned long ctx = 0;
long ret;
ret = get_user(ctx, ctxp);
@@ -1469,6 +1470,7 @@ rw_common:
len = ret;
+ get_file(file);
if (rw == WRITE)
file_start_write(file);
@@ -1476,6 +1478,7 @@ rw_common:
if (rw == WRITE)
file_end_write(file);
+ fput(file);
kfree(iovec);
break;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 26bbaaefdff4..43b80ca84d9c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -532,6 +532,7 @@ static void init_once(void *foo)
#ifdef CONFIG_SYSFS
INIT_LIST_HEAD(&bdev->bd_holder_disks);
#endif
+ bdev->bd_bdi = &noop_backing_dev_info;
inode_init_once(&ei->vfs_inode);
/* Initialize mutex for freeze. */
mutex_init(&bdev->bd_fsfreeze_mutex);
@@ -557,6 +558,12 @@ static void bdev_evict_inode(struct inode *inode)
}
list_del_init(&bdev->bd_list);
spin_unlock(&bdev_lock);
+ /* Detach inode from wb early as bdi_put() may free bdi->wb */
+ inode_detach_wb(inode);
+ if (bdev->bd_bdi != &noop_backing_dev_info) {
+ bdi_put(bdev->bd_bdi);
+ bdev->bd_bdi = &noop_backing_dev_info;
+ }
}
static const struct super_operations bdev_sops = {
@@ -623,6 +630,21 @@ static int bdev_set(struct inode *inode, void *data)
static LIST_HEAD(all_bdevs);
+/*
+ * If there is a bdev inode for this device, unhash it so that it gets evicted
+ * as soon as last inode reference is dropped.
+ */
+void bdev_unhash_inode(dev_t dev)
+{
+ struct inode *inode;
+
+ inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev);
+ if (inode) {
+ remove_inode_hash(inode);
+ iput(inode);
+ }
+}
+
struct block_device *bdget(dev_t dev)
{
struct block_device *bdev;
@@ -1201,6 +1223,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
bdev->bd_disk = disk;
bdev->bd_queue = disk->queue;
bdev->bd_contains = bdev;
+
bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0;
if (!partno) {
ret = -ENXIO;
@@ -1271,6 +1294,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
(bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
bdev->bd_inode->i_flags &= ~S_DAX;
}
+
+ if (bdev->bd_bdi == &noop_backing_dev_info)
+ bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
} else {
if (bdev->bd_contains == bdev) {
ret = 0;
@@ -1523,12 +1549,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
kill_bdev(bdev);
bdev_write_inode(bdev);
- /*
- * Detaching bdev inode from its wb in __destroy_inode()
- * is too late: the queue which embeds its bdi (along with
- * root wb) can be gone as soon as we put_disk() below.
- */
- inode_detach_wb(bdev->bd_inode);
}
if (bdev->bd_contains == bdev) {
if (disk->fops->release)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 383717ccecc7..d2c3edb50702 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6441,7 +6441,8 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
goto out_unlock_inode;
} else {
btrfs_update_inode(trans, root, inode);
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
}
out_unlock:
@@ -6516,7 +6517,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
goto out_unlock_inode;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
out_unlock:
btrfs_end_transaction(trans, root);
@@ -6659,7 +6661,12 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (err)
goto out_fail_inode;
- d_instantiate_new(dentry, inode);
+ d_instantiate(dentry, inode);
+ /*
+ * mkdir is special. We're unlocking after we call d_instantiate
+ * to avoid a race with nfsd calling d_instantiate.
+ */
+ unlock_new_inode(inode);
drop_on_err = 0;
out_fail:
@@ -9810,7 +9817,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
goto out_unlock_inode;
}
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
out_unlock:
btrfs_end_transaction(trans, root);
diff --git a/fs/buffer.c b/fs/buffer.c
index 6f7d519a093b..14ce7b24f32a 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -621,6 +621,18 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
}
EXPORT_SYMBOL(mark_buffer_dirty_inode);
+#ifdef CONFIG_BLK_DEV_IO_TRACE
+static inline void save_dirty_task(struct page *page)
+{
+ /* Save the task that is dirtying this page */
+ page->tsk_dirty = current;
+}
+#else
+static inline void save_dirty_task(struct page *page)
+{
+}
+#endif
+
/*
* Mark the page dirty, and set it dirty in the radix tree, and mark the inode
* dirty.
@@ -641,6 +653,7 @@ static void __set_page_dirty(struct page *page, struct address_space *mapping,
account_page_dirtied(page, mapping, memcg);
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
+ save_dirty_task(page);
}
spin_unlock_irqrestore(&mapping->tree_lock, flags);
}
@@ -1466,12 +1479,48 @@ static bool has_bh_in_lru(int cpu, void *dummy)
return 0;
}
+static void __evict_bh_lru(void *arg)
+{
+ struct bh_lru *b = &get_cpu_var(bh_lrus);
+ struct buffer_head *bh = arg;
+ int i;
+
+ for (i = 0; i < BH_LRU_SIZE; i++) {
+ if (b->bhs[i] == bh) {
+ brelse(b->bhs[i]);
+ b->bhs[i] = NULL;
+ goto out;
+ }
+ }
+out:
+ put_cpu_var(bh_lrus);
+}
+
+static bool bh_exists_in_lru(int cpu, void *arg)
+{
+ struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
+ struct buffer_head *bh = arg;
+ int i;
+
+ for (i = 0; i < BH_LRU_SIZE; i++) {
+ if (b->bhs[i] == bh)
+ return 1;
+ }
+
+ return 0;
+
+}
void invalidate_bh_lrus(void)
{
on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
}
EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
+static void evict_bh_lrus(struct buffer_head *bh)
+{
+ on_each_cpu_cond(bh_exists_in_lru, __evict_bh_lru, bh, 1, GFP_ATOMIC);
+}
+
void set_bh_page(struct buffer_head *bh,
struct page *page, unsigned long offset)
{
@@ -3192,8 +3241,15 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
do {
if (buffer_write_io_error(bh) && page->mapping)
set_bit(AS_EIO, &page->mapping->flags);
- if (buffer_busy(bh))
- goto failed;
+ if (buffer_busy(bh)) {
+ /*
+ * Check if the busy failure was due to an
+ * outstanding LRU reference
+ */
+ evict_bh_lrus(bh);
+ if (buffer_busy(bh))
+ goto failed;
+ }
bh = bh->b_this_page;
} while (bh != head);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0141aba9eca6..79a1bad88931 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3406,13 +3406,13 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
* should have access to this page, we're safe to simply set
* PG_locked without checking it first.
*/
- __set_page_locked(page);
+ __SetPageLocked(page);
rc = add_to_page_cache_locked(page, mapping,
page->index, gfp);
/* give up if we can't stick it in the cache */
if (rc) {
- __clear_page_locked(page);
+ __ClearPageLocked(page);
return rc;
}
@@ -3433,9 +3433,9 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
if (*bytes + PAGE_CACHE_SIZE > rsize)
break;
- __set_page_locked(page);
+ __SetPageLocked(page);
if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
- __clear_page_locked(page);
+ __ClearPageLocked(page);
break;
}
list_move_tail(&page->lru, tmplist);
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index a52ca5cba015..42e014cdd59f 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -871,6 +871,9 @@ COMPATIBLE_IOCTL(TIOCGPTN)
COMPATIBLE_IOCTL(TIOCSPTLCK)
COMPATIBLE_IOCTL(TIOCSERGETLSR)
COMPATIBLE_IOCTL(TIOCSIG)
+COMPATIBLE_IOCTL(TIOCPMGET)
+COMPATIBLE_IOCTL(TIOCPMPUT)
+COMPATIBLE_IOCTL(TIOCPMACT)
#ifdef TIOCSRS485
COMPATIBLE_IOCTL(TIOCSRS485)
#endif
diff --git a/fs/dcache.c b/fs/dcache.c
index b056cf8271a5..86f52a555dec 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1398,7 +1398,7 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
goto out;
if (dentry->d_flags & DCACHE_SHRINK_LIST) {
- data->found++;
+ goto out;
} else {
if (dentry->d_flags & DCACHE_LRU_LIST)
d_lru_del(dentry);
@@ -1898,28 +1898,6 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
EXPORT_SYMBOL(d_instantiate_unique);
-/*
- * This should be equivalent to d_instantiate() + unlock_new_inode(),
- * with lockdep-related part of unlock_new_inode() done before
- * anything else. Use that instead of open-coding d_instantiate()/
- * unlock_new_inode() combinations.
- */
-void d_instantiate_new(struct dentry *entry, struct inode *inode)
-{
- BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
- BUG_ON(!inode);
- lockdep_annotate_inode_mutex_key(inode);
- security_d_instantiate(entry, inode);
- spin_lock(&inode->i_lock);
- __d_instantiate(entry, inode);
- WARN_ON(!(inode->i_state & I_NEW));
- inode->i_state &= ~I_NEW;
- smp_mb();
- wake_up_bit(&inode->i_state, __I_NEW);
- spin_unlock(&inode->i_lock);
-}
-EXPORT_SYMBOL(d_instantiate_new);
-
/**
* d_instantiate_no_diralias - instantiate a non-aliased dentry
* @entry: dentry to complete
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 44f49d86d714..da574a74a467 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -399,6 +399,7 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
if (dio->is_async && dio->rw == READ && dio->should_dirty)
bio_set_pages_dirty(bio);
+ bio->bi_dio_inode = dio->inode;
dio->bio_bdev = bio->bi_bdev;
if (sdio->submit_io) {
@@ -413,6 +414,19 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
sdio->logical_offset_in_bio = 0;
}
+struct inode *dio_bio_get_inode(struct bio *bio)
+{
+ struct inode *inode = NULL;
+
+ if (bio == NULL)
+ return NULL;
+
+ inode = bio->bi_dio_inode;
+
+ return inode;
+}
+EXPORT_SYMBOL(dio_bio_get_inode);
+
/*
* Release any resources in case of a failure
*/
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index d72d52b90433..ddf319bcfccd 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -13,7 +13,7 @@
/* A global variable is a bit ugly, but it keeps the code simple */
int sysctl_drop_caches;
-static void drop_pagecache_sb(struct super_block *sb, void *unused)
+void drop_pagecache_sb(struct super_block *sb, void *unused)
{
struct inode *inode, *toput_inode = NULL;
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index 49678a69947d..c29cdd20d08a 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -4,7 +4,7 @@
obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
-ecryptfs-y := dentry.o file.o inode.o main.o super.o mmap.o read_write.o \
+ecryptfs-y := dentry.o file.o inode.o main.o super.o mmap.o read_write.o events.o \
crypto.o keystore.o kthread.o debug.o
ecryptfs-$(CONFIG_ECRYPT_FS_MESSAGING) += messaging.o miscdev.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 80d6901493cf..cf0186fd9bfe 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -35,6 +35,7 @@
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <asm/unaligned.h>
+#include <linux/ecryptfs.h>
#include "ecryptfs_kernel.h"
#define DECRYPT 0
@@ -350,9 +351,9 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
|| !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED));
if (unlikely(ecryptfs_verbosity > 0)) {
ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n",
- crypt_stat->key_size);
+ ecryptfs_get_key_size_to_enc_data(crypt_stat));
ecryptfs_dump_hex(crypt_stat->key,
- crypt_stat->key_size);
+ ecryptfs_get_key_size_to_enc_data(crypt_stat));
}
init_completion(&ecr.completion);
@@ -371,7 +372,7 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
/* Consider doing this once, when the file is opened */
if (!(crypt_stat->flags & ECRYPTFS_KEY_SET)) {
rc = crypto_ablkcipher_setkey(crypt_stat->tfm, crypt_stat->key,
- crypt_stat->key_size);
+ ecryptfs_get_key_size_to_enc_data(crypt_stat));
if (rc) {
ecryptfs_printk(KERN_ERR,
"Error setting key; rc = [%d]\n",
@@ -466,6 +467,30 @@ out:
return rc;
}
+static void init_ecryption_parameters(bool *hw_crypt, bool *cipher_supported,
+ struct ecryptfs_crypt_stat *crypt_stat)
+{
+ if (!hw_crypt || !cipher_supported)
+ return;
+
+ *cipher_supported = false;
+ *hw_crypt = false;
+
+ if (get_events() && get_events()->is_cipher_supported_cb) {
+ *cipher_supported =
+ get_events()->is_cipher_supported_cb(crypt_stat);
+ if (*cipher_supported) {
+
+ /**
+ * we should apply external algorythm
+ * assume that is_hw_crypt() cbck is supplied
+ */
+ if (get_events()->is_hw_crypt_cb)
+ *hw_crypt = get_events()->is_hw_crypt_cb();
+ }
+ }
+}
+
/**
* ecryptfs_encrypt_page
* @page: Page mapped from the eCryptfs inode for the file; contains
@@ -491,11 +516,18 @@ int ecryptfs_encrypt_page(struct page *page)
loff_t extent_offset;
loff_t lower_offset;
int rc = 0;
+ bool is_hw_crypt;
+ bool is_cipher_supported;
+
ecryptfs_inode = page->mapping->host;
crypt_stat =
&(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat);
BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED));
+
+ init_ecryption_parameters(&is_hw_crypt,
+ &is_cipher_supported, crypt_stat);
+
enc_extent_page = alloc_page(GFP_USER);
if (!enc_extent_page) {
rc = -ENOMEM;
@@ -503,24 +535,51 @@ int ecryptfs_encrypt_page(struct page *page)
"encrypted extent\n");
goto out;
}
-
- for (extent_offset = 0;
- extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
- extent_offset++) {
- rc = crypt_extent(crypt_stat, enc_extent_page, page,
- extent_offset, ENCRYPT);
- if (rc) {
- printk(KERN_ERR "%s: Error encrypting extent; "
- "rc = [%d]\n", __func__, rc);
- goto out;
+ if (is_hw_crypt) {
+ /* no need for encryption */
+ } else {
+ for (extent_offset = 0;
+ extent_offset <
+ (PAGE_CACHE_SIZE / crypt_stat->extent_size);
+ extent_offset++) {
+
+ if (is_cipher_supported) {
+ if (!get_events()->encrypt_cb) {
+ rc = -EPERM;
+ goto out;
+ }
+ rc = get_events()->encrypt_cb(page,
+ enc_extent_page,
+ ecryptfs_inode_to_lower(
+ ecryptfs_inode),
+ extent_offset);
+ } else {
+ rc = crypt_extent(crypt_stat,
+ enc_extent_page, page,
+ extent_offset, ENCRYPT);
+ }
+ if (rc) {
+ ecryptfs_printk(KERN_ERR,
+ "%s: Error encrypting; rc = [%d]\n",
+ __func__, rc);
+ goto out;
+ }
}
}
lower_offset = lower_offset_for_page(crypt_stat, page);
- enc_extent_virt = kmap(enc_extent_page);
+ if (is_hw_crypt)
+ enc_extent_virt = kmap(page);
+ else
+ enc_extent_virt = kmap(enc_extent_page);
+
rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, lower_offset,
PAGE_CACHE_SIZE);
- kunmap(enc_extent_page);
+ if (!is_hw_crypt)
+ kunmap(enc_extent_page);
+ else
+ kunmap(page);
+
if (rc < 0) {
ecryptfs_printk(KERN_ERR,
"Error attempting to write lower page; rc = [%d]\n",
@@ -559,6 +618,8 @@ int ecryptfs_decrypt_page(struct page *page)
unsigned long extent_offset;
loff_t lower_offset;
int rc = 0;
+ bool is_cipher_supported;
+ bool is_hw_crypt;
ecryptfs_inode = page->mapping->host;
crypt_stat =
@@ -577,13 +638,33 @@ int ecryptfs_decrypt_page(struct page *page)
goto out;
}
+ init_ecryption_parameters(&is_hw_crypt,
+ &is_cipher_supported, crypt_stat);
+
+ if (is_hw_crypt) {
+ rc = 0;
+ return rc;
+ }
+
for (extent_offset = 0;
extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
extent_offset++) {
- rc = crypt_extent(crypt_stat, page, page,
+ if (is_cipher_supported) {
+ if (!get_events()->decrypt_cb) {
+ rc = -EPERM;
+ goto out;
+ }
+
+ rc = get_events()->decrypt_cb(page, page,
+ ecryptfs_inode_to_lower(ecryptfs_inode),
+ extent_offset);
+
+ } else
+ rc = crypt_extent(crypt_stat, page, page,
extent_offset, DECRYPT);
+
if (rc) {
- printk(KERN_ERR "%s: Error encrypting extent; "
+ ecryptfs_printk(KERN_ERR, "%s: Error decrypting extent;"
"rc = [%d]\n", __func__, rc);
goto out;
}
@@ -612,7 +693,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
"Initializing cipher [%s]; strlen = [%d]; "
"key_size_bits = [%zd]\n",
crypt_stat->cipher, (int)strlen(crypt_stat->cipher),
- crypt_stat->key_size << 3);
+ ecryptfs_get_key_size_to_enc_data(crypt_stat) << 3);
mutex_lock(&crypt_stat->cs_tfm_mutex);
if (crypt_stat->tfm) {
rc = 0;
@@ -694,7 +775,7 @@ int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat)
goto out;
}
rc = ecryptfs_calculate_md5(dst, crypt_stat, crypt_stat->key,
- crypt_stat->key_size);
+ ecryptfs_get_key_size_to_enc_data(crypt_stat));
if (rc) {
ecryptfs_printk(KERN_WARNING, "Error attempting to compute "
"MD5 while generating root IV\n");
@@ -721,6 +802,31 @@ static void ecryptfs_generate_new_key(struct ecryptfs_crypt_stat *crypt_stat)
}
}
+static int ecryptfs_generate_new_salt(struct ecryptfs_crypt_stat *crypt_stat)
+{
+ size_t salt_size = 0;
+
+ salt_size = ecryptfs_get_salt_size_for_cipher(crypt_stat);
+
+ if (0 == salt_size)
+ return 0;
+
+ if (!ecryptfs_check_space_for_salt(crypt_stat->key_size, salt_size)) {
+ ecryptfs_printk(KERN_WARNING, "not enough space for salt\n");
+ crypt_stat->flags |= ECRYPTFS_SECURITY_WARNING;
+ return -EINVAL;
+ }
+
+ get_random_bytes(crypt_stat->key + crypt_stat->key_size, salt_size);
+ if (unlikely(ecryptfs_verbosity > 0)) {
+ ecryptfs_printk(KERN_DEBUG, "Generated new session salt:\n");
+ ecryptfs_dump_hex(crypt_stat->key + crypt_stat->key_size,
+ salt_size);
+ }
+
+ return 0;
+}
+
/**
* ecryptfs_copy_mount_wide_flags_to_inode_flags
* @crypt_stat: The inode's cryptographic context
@@ -823,7 +929,6 @@ int ecryptfs_new_file_context(struct inode *ecryptfs_inode)
struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
&ecryptfs_superblock_to_private(
ecryptfs_inode->i_sb)->mount_crypt_stat;
- int cipher_name_len;
int rc = 0;
ecryptfs_set_default_crypt_stat_vals(crypt_stat, mount_crypt_stat);
@@ -837,15 +942,19 @@ int ecryptfs_new_file_context(struct inode *ecryptfs_inode)
"to the inode key sigs; rc = [%d]\n", rc);
goto out;
}
- cipher_name_len =
- strlen(mount_crypt_stat->global_default_cipher_name);
- memcpy(crypt_stat->cipher,
+ strlcpy(crypt_stat->cipher,
mount_crypt_stat->global_default_cipher_name,
- cipher_name_len);
- crypt_stat->cipher[cipher_name_len] = '\0';
+ sizeof(crypt_stat->cipher));
+
+ strlcpy(crypt_stat->cipher_mode,
+ mount_crypt_stat->global_default_cipher_mode,
+ sizeof(crypt_stat->cipher_mode));
+
crypt_stat->key_size =
mount_crypt_stat->global_default_cipher_key_size;
ecryptfs_generate_new_key(crypt_stat);
+ ecryptfs_generate_new_salt(crypt_stat);
+
rc = ecryptfs_init_crypt_ctx(crypt_stat);
if (rc)
ecryptfs_printk(KERN_ERR, "Error initializing cryptographic "
@@ -971,7 +1080,8 @@ ecryptfs_cipher_code_str_map[] = {
{"twofish", RFC2440_CIPHER_TWOFISH},
{"cast6", RFC2440_CIPHER_CAST_6},
{"aes", RFC2440_CIPHER_AES_192},
- {"aes", RFC2440_CIPHER_AES_256}
+ {"aes", RFC2440_CIPHER_AES_256},
+ {"aes_xts", RFC2440_CIPHER_AES_XTS_256}
};
/**
@@ -999,6 +1109,11 @@ u8 ecryptfs_code_for_cipher_string(char *cipher_name, size_t key_bytes)
case 32:
code = RFC2440_CIPHER_AES_256;
}
+ } else if (strcmp(cipher_name, "aes_xts") == 0) {
+ switch (key_bytes) {
+ case 32:
+ code = RFC2440_CIPHER_AES_XTS_256;
+ }
} else {
for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++)
if (strcmp(cipher_name, map[i].cipher_str) == 0) {
@@ -1038,9 +1153,24 @@ int ecryptfs_read_and_validate_header_region(struct inode *inode)
u8 file_size[ECRYPTFS_SIZE_AND_MARKER_BYTES];
u8 *marker = file_size + ECRYPTFS_FILE_SIZE_BYTES;
int rc;
+ unsigned int ra_pages_org;
+ struct file *lower_file = NULL;
+
+ if (!inode)
+ return -EIO;
+ lower_file = ecryptfs_inode_to_private(inode)->lower_file;
+ if (!lower_file)
+ return -EIO;
+
+ /*disable read a head mechanism for a while */
+ ra_pages_org = lower_file->f_ra.ra_pages;
+ lower_file->f_ra.ra_pages = 0;
rc = ecryptfs_read_lower(file_size, 0, ECRYPTFS_SIZE_AND_MARKER_BYTES,
inode);
+ lower_file->f_ra.ra_pages = ra_pages_org;
+ /* restore read a head mechanism */
+
if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES)
return rc >= 0 ? -EINVAL : rc;
rc = ecryptfs_validate_marker(marker);
@@ -1430,6 +1560,11 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
&ecryptfs_superblock_to_private(
ecryptfs_dentry->d_sb)->mount_crypt_stat;
+ unsigned int ra_pages_org;
+ struct file *lower_file =
+ ecryptfs_inode_to_private(ecryptfs_inode)->lower_file;
+ if (!lower_file)
+ return -EIO;
ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat,
mount_crypt_stat);
@@ -1441,8 +1576,14 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
__func__);
goto out;
}
+ /*disable read a head mechanism */
+ ra_pages_org = lower_file->f_ra.ra_pages;
+ lower_file->f_ra.ra_pages = 0;
+
rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size,
ecryptfs_inode);
+ lower_file->f_ra.ra_pages = ra_pages_org; /* restore it back */
+
if (rc >= 0)
rc = ecryptfs_read_headers_virt(page_virt, crypt_stat,
ecryptfs_dentry,
diff --git a/fs/ecryptfs/debug.c b/fs/ecryptfs/debug.c
index 3d2bdf546ec6..0556af1adfb7 100644
--- a/fs/ecryptfs/debug.c
+++ b/fs/ecryptfs/debug.c
@@ -119,3 +119,32 @@ void ecryptfs_dump_hex(char *data, int bytes)
printk("\n");
}
+void ecryptfs_dump_salt_hex(char *data, int key_size,
+ const struct ecryptfs_crypt_stat *crypt_stat)
+{
+ size_t salt_size = ecryptfs_get_salt_size_for_cipher(crypt_stat);
+
+ if (0 == salt_size)
+ return;
+
+ if (!ecryptfs_check_space_for_salt(key_size, salt_size))
+ return;
+
+ ecryptfs_printk(KERN_DEBUG, "Decrypted session salt key:\n");
+ ecryptfs_dump_hex(data + key_size, salt_size);
+}
+
+void ecryptfs_dump_cipher(struct ecryptfs_crypt_stat *stat)
+{
+ if (!stat)
+ return;
+
+ if (stat->cipher)
+ ecryptfs_printk(KERN_DEBUG,
+ "ecryptfs cipher is %s\n", stat->cipher);
+
+ if (stat->cipher_mode)
+ ecryptfs_printk(KERN_DEBUG, "ecryptfs cipher mode is %s\n",
+ stat->cipher_mode);
+
+}
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index eae9cdb8af46..f5908e91eb17 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -254,6 +254,7 @@ struct ecryptfs_crypt_stat {
struct mutex cs_tfm_mutex;
struct mutex cs_hash_tfm_mutex;
struct mutex cs_mutex;
+ unsigned char cipher_mode[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
};
/* inode private data. */
@@ -354,6 +355,8 @@ struct ecryptfs_mount_crypt_stat {
unsigned char global_default_fn_cipher_name[
ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
char global_default_fnek_sig[ECRYPTFS_SIG_SIZE_HEX + 1];
+ unsigned char global_default_cipher_mode[ECRYPTFS_MAX_CIPHER_NAME_SIZE
+ + 1];
};
/* superblock private data. */
@@ -536,6 +539,53 @@ ecryptfs_dentry_to_lower_path(struct dentry *dentry)
return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path;
}
+/**
+ * Given a cipher and mode strings, the function
+ * concatenates them to create a new string of
+ * <cipher>_<mode> format.
+ */
+static inline unsigned char *ecryptfs_get_full_cipher(
+ unsigned char *cipher, unsigned char *mode,
+ unsigned char *final, size_t final_size)
+{
+ memset(final, 0, final_size);
+
+ if (strlen(mode) > 0) {
+ snprintf(final, final_size, "%s_%s", cipher, mode);
+ return final;
+ }
+
+ return cipher;
+}
+
+/**
+ * Given a <cipher>[_<mode>] formatted string, the function
+ * extracts cipher string and/or mode string.
+ * Note: the passed cipher and/or mode strings will be null-terminated.
+ */
+static inline void ecryptfs_parse_full_cipher(
+ char *s, char *cipher, char *mode)
+{
+ char input[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1+1];
+ /* +1 for '_'; +1 for '\0' */
+ char *p;
+ char *input_p = input;
+
+ if (s == NULL || cipher == NULL)
+ return;
+
+ memset(input, 0, sizeof(input));
+ strlcpy(input, s, sizeof(input));
+
+ p = strsep(&input_p, "_");
+ strlcpy(cipher, p, ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1);
+
+
+ /* check if mode is specified */
+ if (input_p != NULL && mode != NULL)
+ strlcpy(mode, input_p, ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1);
+}
+
#define ecryptfs_printk(type, fmt, arg...) \
__ecryptfs_printk(type "%s: " fmt, __func__, ## arg);
__printf(1, 2)
@@ -584,6 +634,10 @@ int ecryptfs_encrypt_and_encode_filename(
const char *name, size_t name_size);
struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry);
void ecryptfs_dump_hex(char *data, int bytes);
+void ecryptfs_dump_salt_hex(char *data, int key_size,
+ const struct ecryptfs_crypt_stat *crypt_stat);
+extern void ecryptfs_dump_cipher(struct ecryptfs_crypt_stat *stat);
+
int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
int sg_size);
int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat);
@@ -727,4 +781,33 @@ int ecryptfs_set_f_namelen(long *namelen, long lower_namelen,
int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
loff_t offset);
+void clean_inode_pages(struct address_space *mapping,
+ pgoff_t start, pgoff_t end);
+
+void ecryptfs_drop_pagecache_sb(struct super_block *sb, void *unused);
+
+void ecryptfs_free_events(void);
+
+void ecryptfs_freepage(struct page *page);
+
+struct ecryptfs_events *get_events(void);
+
+size_t ecryptfs_get_salt_size_for_cipher(
+ const struct ecryptfs_crypt_stat *crypt_stat);
+
+size_t ecryptfs_get_salt_size_for_cipher_mount(
+ const struct ecryptfs_mount_crypt_stat *mount_crypt_stat);
+
+size_t ecryptfs_get_key_size_to_enc_data(
+ const struct ecryptfs_crypt_stat *crypt_stat);
+
+size_t ecryptfs_get_key_size_to_store_key(
+ const struct ecryptfs_crypt_stat *crypt_stat);
+
+size_t ecryptfs_get_key_size_to_restore_key(size_t stored_key_size,
+ const struct ecryptfs_crypt_stat *crypt_stat);
+
+bool ecryptfs_check_space_for_salt(const size_t key_size,
+ const size_t salt_size);
+
#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/events.c b/fs/ecryptfs/events.c
new file mode 100644
index 000000000000..12e26c683cf6
--- /dev/null
+++ b/fs/ecryptfs/events.c
@@ -0,0 +1,393 @@
+/**
+ * eCryptfs: Linux filesystem encryption layer
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/string.h>
+#include <linux/ecryptfs.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/random.h>
+#include "ecryptfs_kernel.h"
+
+static DEFINE_MUTEX(events_mutex);
+struct ecryptfs_events *events_ptr = NULL;
+static int handle;
+
+void ecryptfs_free_events(void)
+{
+ mutex_lock(&events_mutex);
+ if (events_ptr != NULL) {
+ kfree(events_ptr);
+ events_ptr = NULL;
+ }
+
+ mutex_unlock(&events_mutex);
+}
+
+/**
+ * Register to ecryptfs events, by passing callback
+ * functions to be called upon events occurence.
+ * The function returns a handle to be passed
+ * to unregister function.
+ */
+int ecryptfs_register_to_events(const struct ecryptfs_events *ops)
+{
+ int ret_value = 0;
+
+ if (!ops)
+ return -EINVAL;
+
+ mutex_lock(&events_mutex);
+
+ if (events_ptr != NULL) {
+ ecryptfs_printk(KERN_ERR,
+ "already registered!\n");
+ ret_value = -EPERM;
+ goto out;
+ }
+ events_ptr =
+ kzalloc(sizeof(struct ecryptfs_events), GFP_KERNEL);
+
+ if (!events_ptr) {
+ ecryptfs_printk(KERN_ERR, "malloc failure\n");
+ ret_value = -ENOMEM;
+ goto out;
+ }
+ /* copy the callbacks */
+ events_ptr->open_cb = ops->open_cb;
+ events_ptr->release_cb = ops->release_cb;
+ events_ptr->encrypt_cb = ops->encrypt_cb;
+ events_ptr->decrypt_cb = ops->decrypt_cb;
+ events_ptr->is_cipher_supported_cb =
+ ops->is_cipher_supported_cb;
+ events_ptr->is_hw_crypt_cb = ops->is_hw_crypt_cb;
+ events_ptr->get_salt_key_size_cb = ops->get_salt_key_size_cb;
+
+ get_random_bytes(&handle, sizeof(handle));
+ ret_value = handle;
+
+out:
+ mutex_unlock(&events_mutex);
+ return ret_value;
+}
+
+/**
+ * Unregister from ecryptfs events.
+ */
+int ecryptfs_unregister_from_events(int user_handle)
+{
+ int ret_value = 0;
+
+ mutex_lock(&events_mutex);
+
+ if (!events_ptr) {
+ ret_value = -EINVAL;
+ goto out;
+ }
+ if (user_handle != handle) {
+ ret_value = ECRYPTFS_INVALID_EVENTS_HANDLE;
+ goto out;
+ }
+
+ kfree(events_ptr);
+ events_ptr = NULL;
+
+out:
+ mutex_unlock(&events_mutex);
+ return ret_value;
+}
+
+/**
+ * This function decides whether the passed file offset
+ * belongs to ecryptfs metadata or not.
+ * The caller must pass ecryptfs data, which was received in one
+ * of the callback invocations.
+ */
+bool ecryptfs_is_page_in_metadata(const void *data, pgoff_t offset)
+{
+
+ struct ecryptfs_crypt_stat *stat = NULL;
+ bool ret = true;
+
+ if (!data) {
+ ecryptfs_printk(KERN_ERR, "ecryptfs_is_page_in_metadata: invalid data parameter\n");
+ ret = false;
+ goto end;
+ }
+ stat = (struct ecryptfs_crypt_stat *)data;
+
+ if (stat->flags & ECRYPTFS_METADATA_IN_XATTR) {
+ ret = false;
+ goto end;
+ }
+
+ if (offset >= (stat->metadata_size/PAGE_CACHE_SIZE)) {
+ ret = false;
+ goto end;
+ }
+end:
+ return ret;
+}
+
+/**
+ * Given two ecryptfs data, the function
+ * decides whether they are equal.
+ */
+inline bool ecryptfs_is_data_equal(const void *data1, const void *data2)
+{
+ /* pointer comparison*/
+ return data1 == data2;
+}
+
+/**
+ * Given ecryptfs data, the function
+ * returns appropriate key size.
+ */
+size_t ecryptfs_get_key_size(const void *data)
+{
+
+ struct ecryptfs_crypt_stat *stat = NULL;
+
+ if (!data)
+ return 0;
+
+ stat = (struct ecryptfs_crypt_stat *)data;
+ return stat->key_size;
+}
+
+/**
+ * Given ecryptfs data, the function
+ * returns appropriate salt size.
+ *
+ * !!! crypt_stat cipher name and mode must be initialized
+ */
+size_t ecryptfs_get_salt_size(const void *data)
+{
+ if (!data) {
+ ecryptfs_printk(KERN_ERR,
+ "ecryptfs_get_salt_size: invalid data parameter\n");
+ return 0;
+ }
+
+ return ecryptfs_get_salt_size_for_cipher(data);
+
+}
+
+/**
+ * Given ecryptfs data and cipher string, the function
+ * returns true if provided cipher and the one in ecryptfs match.
+ */
+bool ecryptfs_cipher_match(const void *data,
+ const unsigned char *cipher, size_t cipher_size)
+{
+ unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1];
+ const unsigned char *ecryptfs_cipher = NULL;
+ struct ecryptfs_crypt_stat *stat = NULL;
+
+ if (!data || !cipher) {
+ ecryptfs_printk(KERN_ERR,
+ "ecryptfs_get_cipher: invalid data parameter\n");
+ return false;
+ }
+
+ if (!cipher_size || cipher_size > sizeof(final)) {
+ ecryptfs_printk(KERN_ERR,
+ "ecryptfs_get_cipher: cipher_size\n");
+ return false;
+ }
+
+ stat = (struct ecryptfs_crypt_stat *)data;
+ ecryptfs_cipher = ecryptfs_get_full_cipher(stat->cipher,
+ stat->cipher_mode,
+ final, sizeof(final));
+
+ if (!ecryptfs_cipher) {
+ ecryptfs_printk(KERN_ERR,
+ "ecryptfs_get_cipher: internal error while parsing cipher\n");
+ return false;
+ }
+
+ if (strcmp(ecryptfs_cipher, cipher)) {
+ if (ecryptfs_verbosity > 0)
+ ecryptfs_dump_cipher(stat);
+
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * Given ecryptfs data, the function
+ * returns file encryption key.
+ */
+const unsigned char *ecryptfs_get_key(const void *data)
+{
+
+ struct ecryptfs_crypt_stat *stat = NULL;
+
+ if (!data) {
+ ecryptfs_printk(KERN_ERR,
+ "ecryptfs_get_key: invalid data parameter\n");
+ return NULL;
+ }
+ stat = (struct ecryptfs_crypt_stat *)data;
+ return stat->key;
+}
+
+/**
+ * Given ecryptfs data, the function
+ * returns file encryption salt.
+ */
+const unsigned char *ecryptfs_get_salt(const void *data)
+{
+ struct ecryptfs_crypt_stat *stat = NULL;
+
+ if (!data) {
+ ecryptfs_printk(KERN_ERR,
+ "ecryptfs_get_salt: invalid data parameter\n");
+ return NULL;
+ }
+ stat = (struct ecryptfs_crypt_stat *)data;
+ return stat->key + ecryptfs_get_salt_size(data);
+}
+
+/**
+ * Returns ecryptfs events pointer
+ */
+inline struct ecryptfs_events *get_events(void)
+{
+ return events_ptr;
+}
+
+/**
+ * If external crypto module requires salt in addition to key,
+ * we store it as part of key array (if there is enough space)
+ * Checks whether a salt key can fit into array allocated for
+ * regular key
+ */
+bool ecryptfs_check_space_for_salt(const size_t key_size,
+ const size_t salt_size)
+{
+ if ((salt_size + key_size) > ECRYPTFS_MAX_KEY_BYTES)
+ return false;
+
+ return true;
+}
+
+/*
+ * If there is salt that is used by external crypto module, it is stored
+ * in the same array where regular key is. Salt is going to be used by
+ * external crypto module only, so for all internal crypto operations salt
+ * should be ignored.
+ *
+ * Get key size in cases where it is going to be used for data encryption
+ * or for all other general purposes
+ */
+size_t ecryptfs_get_key_size_to_enc_data(
+ const struct ecryptfs_crypt_stat *crypt_stat)
+{
+ if (!crypt_stat)
+ return 0;
+
+ return crypt_stat->key_size;
+}
+
+/*
+ * If there is salt that is used by external crypto module, it is stored
+ * in the same array where regular key is. Salt is going to be used by
+ * external crypto module only, but we still need to save and restore it
+ * (in encrypted form) as part of ecryptfs header along with the regular
+ * key.
+ *
+ * Get key size in cases where it is going to be stored persistently
+ *
+ * !!! crypt_stat cipher name and mode must be initialized
+ */
+size_t ecryptfs_get_key_size_to_store_key(
+ const struct ecryptfs_crypt_stat *crypt_stat)
+{
+ size_t salt_size = 0;
+
+ if (!crypt_stat)
+ return 0;
+
+ salt_size = ecryptfs_get_salt_size(crypt_stat);
+
+ if (!ecryptfs_check_space_for_salt(crypt_stat->key_size, salt_size)) {
+ ecryptfs_printk(KERN_WARNING,
+ "ecryptfs_get_key_size_to_store_key: not enough space for salt\n");
+ return crypt_stat->key_size;
+ }
+
+ return crypt_stat->key_size + salt_size;
+}
+
+/*
+ * If there is salt that is used by external crypto module, it is stored
+ * in the same array where regular key is. Salt is going to be used by
+ * external crypto module only, but we still need to save and restore it
+ * (in encrypted form) as part of ecryptfs header along with the regular
+ * key.
+ *
+ * Get key size in cases where it is going to be restored from storage
+ *
+ * !!! crypt_stat cipher name and mode must be initialized
+ */
+size_t ecryptfs_get_key_size_to_restore_key(size_t stored_key_size,
+ const struct ecryptfs_crypt_stat *crypt_stat)
+{
+ size_t salt_size = 0;
+
+ if (!crypt_stat)
+ return 0;
+
+ salt_size = ecryptfs_get_salt_size_for_cipher(crypt_stat);
+
+ if (salt_size >= stored_key_size) {
+ ecryptfs_printk(KERN_WARNING,
+ "ecryptfs_get_key_size_to_restore_key: salt %zu >= stred size %zu\n",
+ salt_size, stored_key_size);
+
+ return stored_key_size;
+ }
+
+ return stored_key_size - salt_size;
+}
+
+/**
+ * Given crypt_stat, the function returns appropriate salt size.
+ */
+size_t ecryptfs_get_salt_size_for_cipher(
+ const struct ecryptfs_crypt_stat *crypt_stat)
+{
+ if (!get_events() || !(get_events()->get_salt_key_size_cb))
+ return 0;
+
+ return get_events()->get_salt_key_size_cb(crypt_stat);
+}
+
+/**
+ * Given mount_crypt_stat, the function returns appropriate salt size.
+ */
+size_t ecryptfs_get_salt_size_for_cipher_mount(
+ const struct ecryptfs_mount_crypt_stat *crypt_stat)
+{
+ if (!get_events() || !(get_events()->get_salt_key_size_cb))
+ return 0;
+
+ return get_events()->get_salt_key_size_cb(crypt_stat);
+}
+
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 27794b137b24..c93fe5fce41e 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -31,6 +31,7 @@
#include <linux/security.h>
#include <linux/compat.h>
#include <linux/fs_stack.h>
+#include <linux/ecryptfs.h>
#include "ecryptfs_kernel.h"
/**
@@ -196,6 +197,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
int rc = 0;
struct ecryptfs_crypt_stat *crypt_stat = NULL;
struct dentry *ecryptfs_dentry = file->f_path.dentry;
+ int ret;
+
+
/* Private value of ecryptfs_dentry allocated in
* ecryptfs_lookup() */
struct ecryptfs_file_info *file_info;
@@ -235,12 +239,39 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
}
ecryptfs_set_file_lower(
file, ecryptfs_inode_to_private(inode)->lower_file);
+ if (d_is_dir(ecryptfs_dentry)) {
+ ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
+ mutex_lock(&crypt_stat->cs_mutex);
+ crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
+ mutex_unlock(&crypt_stat->cs_mutex);
+ rc = 0;
+ goto out;
+ }
+
rc = read_or_initialize_metadata(ecryptfs_dentry);
if (rc)
goto out_put;
ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = "
"[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino,
(unsigned long long)i_size_read(inode));
+
+ if (get_events() && get_events()->open_cb) {
+
+ ret = vfs_fsync(file, false);
+
+ if (ret)
+ ecryptfs_printk(KERN_ERR,
+ "failed to sync file ret = %d.\n", ret);
+
+ get_events()->open_cb(ecryptfs_inode_to_lower(inode),
+ crypt_stat);
+
+ if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) {
+ truncate_inode_pages(inode->i_mapping, 0);
+ truncate_inode_pages(
+ ecryptfs_inode_to_lower(inode)->i_mapping, 0);
+ }
+ }
goto out;
out_put:
ecryptfs_put_lower_file(inode);
@@ -307,6 +338,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
ecryptfs_put_lower_file(inode);
kmem_cache_free(ecryptfs_file_info_cache,
ecryptfs_file_to_private(file));
+
return 0;
}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 844d0c4da84f..7f277e843ea5 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -261,12 +261,15 @@ out:
*
* Returns zero on success; non-zero on error condition
*/
+
+
static int
ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
umode_t mode, bool excl)
{
struct inode *ecryptfs_inode;
int rc;
+ struct ecryptfs_crypt_stat *crypt_stat;
ecryptfs_inode = ecryptfs_do_create(directory_inode, ecryptfs_dentry,
mode);
@@ -276,6 +279,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
rc = PTR_ERR(ecryptfs_inode);
goto out;
}
+
/* At this point, a file exists on "disk"; we need to make sure
* that this on disk file is prepared to be an ecryptfs file */
rc = ecryptfs_initialize_file(ecryptfs_dentry, ecryptfs_inode);
@@ -287,7 +291,15 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
iput(ecryptfs_inode);
goto out;
}
- d_instantiate_new(ecryptfs_dentry, ecryptfs_inode);
+
+ crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
+ if (get_events() && get_events()->open_cb)
+ get_events()->open_cb(
+ ecryptfs_inode_to_lower(ecryptfs_inode),
+ crypt_stat);
+
+ unlock_new_inode(ecryptfs_inode);
+ d_instantiate(ecryptfs_dentry, ecryptfs_inode);
out:
return rc;
}
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 20632ee51ae5..ea3d99ebb6ee 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -315,7 +315,8 @@ write_tag_66_packet(char *signature, u8 cipher_code,
* | File Encryption Key Size | 1 or 2 bytes |
* | File Encryption Key | arbitrary |
*/
- data_len = (5 + ECRYPTFS_SIG_SIZE_HEX + crypt_stat->key_size);
+ data_len = (5 + ECRYPTFS_SIG_SIZE_HEX +
+ ecryptfs_get_key_size_to_store_key(crypt_stat));
*packet = kmalloc(data_len, GFP_KERNEL);
message = *packet;
if (!message) {
@@ -335,8 +336,9 @@ write_tag_66_packet(char *signature, u8 cipher_code,
memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX);
i += ECRYPTFS_SIG_SIZE_HEX;
/* The encrypted key includes 1 byte cipher code and 2 byte checksum */
- rc = ecryptfs_write_packet_length(&message[i], crypt_stat->key_size + 3,
- &packet_size_len);
+ rc = ecryptfs_write_packet_length(&message[i],
+ ecryptfs_get_key_size_to_store_key(crypt_stat) + 3,
+ &packet_size_len);
if (rc) {
ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet "
"header; cannot generate packet length\n");
@@ -344,9 +346,10 @@ write_tag_66_packet(char *signature, u8 cipher_code,
}
i += packet_size_len;
message[i++] = cipher_code;
- memcpy(&message[i], crypt_stat->key, crypt_stat->key_size);
- i += crypt_stat->key_size;
- for (j = 0; j < crypt_stat->key_size; j++)
+ memcpy(&message[i], crypt_stat->key,
+ ecryptfs_get_key_size_to_store_key(crypt_stat));
+ i += ecryptfs_get_key_size_to_store_key(crypt_stat);
+ for (j = 0; j < ecryptfs_get_key_size_to_store_key(crypt_stat); j++)
checksum += crypt_stat->key[j];
message[i++] = (checksum / 256) % 256;
message[i++] = (checksum % 256);
@@ -925,6 +928,7 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
struct ecryptfs_parse_tag_70_packet_silly_stack *s;
struct key *auth_tok_key = NULL;
int rc = 0;
+ char full_cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
(*packet_size) = 0;
(*filename_size) = 0;
@@ -984,12 +988,13 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
s->fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX] = '\0';
(*packet_size) += ECRYPTFS_SIG_SIZE;
s->cipher_code = data[(*packet_size)++];
- rc = ecryptfs_cipher_code_to_string(s->cipher_string, s->cipher_code);
+ rc = ecryptfs_cipher_code_to_string(full_cipher, s->cipher_code);
if (rc) {
printk(KERN_WARNING "%s: Cipher code [%d] is invalid\n",
__func__, s->cipher_code);
goto out;
}
+ ecryptfs_parse_full_cipher(full_cipher, s->cipher_string, 0);
rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key,
&s->auth_tok, mount_crypt_stat,
s->fnek_sig_hex);
@@ -1158,6 +1163,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
char *payload = NULL;
size_t payload_len = 0;
int rc;
+ char full_cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok);
if (rc) {
@@ -1191,21 +1197,31 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
rc);
goto out;
}
- auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY;
- memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key,
- auth_tok->session_key.decrypted_key_size);
- crypt_stat->key_size = auth_tok->session_key.decrypted_key_size;
- rc = ecryptfs_cipher_code_to_string(crypt_stat->cipher, cipher_code);
+
+ rc = ecryptfs_cipher_code_to_string(full_cipher, cipher_code);
if (rc) {
ecryptfs_printk(KERN_ERR, "Cipher code [%d] is invalid\n",
cipher_code)
- goto out;
+ goto out;
}
+
+ auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY;
+ memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key,
+ auth_tok->session_key.decrypted_key_size);
+ crypt_stat->key_size = ecryptfs_get_key_size_to_restore_key(
+ auth_tok->session_key.decrypted_key_size, crypt_stat);
+
+ ecryptfs_parse_full_cipher(full_cipher,
+ crypt_stat->cipher, crypt_stat->cipher_mode);
+
crypt_stat->flags |= ECRYPTFS_KEY_VALID;
if (ecryptfs_verbosity > 0) {
ecryptfs_printk(KERN_DEBUG, "Decrypted session key:\n");
ecryptfs_dump_hex(crypt_stat->key,
crypt_stat->key_size);
+
+ ecryptfs_dump_salt_hex(crypt_stat->key, crypt_stat->key_size,
+ crypt_stat);
}
out:
kfree(msg);
@@ -1387,6 +1403,7 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
size_t length_size;
int rc = 0;
+ char full_cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
(*packet_size) = 0;
(*new_auth_tok) = NULL;
@@ -1460,10 +1477,13 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
rc = -EINVAL;
goto out_free;
}
- rc = ecryptfs_cipher_code_to_string(crypt_stat->cipher,
+ rc = ecryptfs_cipher_code_to_string(full_cipher,
(u16)data[(*packet_size)]);
if (rc)
goto out_free;
+ ecryptfs_parse_full_cipher(full_cipher,
+ crypt_stat->cipher, crypt_stat->cipher_mode);
+
/* A little extra work to differentiate among the AES key
* sizes; see RFC2440 */
switch(data[(*packet_size)++]) {
@@ -1472,7 +1492,10 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
break;
default:
crypt_stat->key_size =
- (*new_auth_tok)->session_key.encrypted_key_size;
+ ecryptfs_get_key_size_to_restore_key(
+ (*new_auth_tok)->session_key.encrypted_key_size,
+ crypt_stat);
+
}
rc = ecryptfs_init_crypt_ctx(crypt_stat);
if (rc)
@@ -1720,7 +1743,7 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
mutex_lock(tfm_mutex);
rc = crypto_blkcipher_setkey(
desc.tfm, auth_tok->token.password.session_key_encryption_key,
- crypt_stat->key_size);
+ auth_tok->token.password.session_key_encryption_key_bytes);
if (unlikely(rc < 0)) {
mutex_unlock(tfm_mutex);
printk(KERN_ERR "Error setting key for crypto context\n");
@@ -1743,6 +1766,8 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
crypt_stat->key_size);
ecryptfs_dump_hex(crypt_stat->key,
crypt_stat->key_size);
+ ecryptfs_dump_salt_hex(crypt_stat->key, crypt_stat->key_size,
+ crypt_stat);
}
out:
return rc;
@@ -1979,12 +2004,17 @@ pki_encrypt_session_key(struct key *auth_tok_key,
size_t payload_len = 0;
struct ecryptfs_message *msg;
int rc;
+ unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1];
rc = write_tag_66_packet(auth_tok->token.private_key.signature,
- ecryptfs_code_for_cipher_string(
- crypt_stat->cipher,
- crypt_stat->key_size),
- crypt_stat, &payload, &payload_len);
+ ecryptfs_code_for_cipher_string(
+ ecryptfs_get_full_cipher(
+ crypt_stat->cipher,
+ crypt_stat->cipher_mode,
+ final, sizeof(final)),
+ ecryptfs_get_key_size_to_enc_data(
+ crypt_stat)),
+ crypt_stat, &payload, &payload_len);
up_write(&(auth_tok_key->sem));
key_put(auth_tok_key);
if (rc) {
@@ -2042,7 +2072,7 @@ write_tag_1_packet(char *dest, size_t *remaining_bytes,
ecryptfs_from_hex(key_rec->sig, auth_tok->token.private_key.signature,
ECRYPTFS_SIG_SIZE);
encrypted_session_key_valid = 0;
- for (i = 0; i < crypt_stat->key_size; i++)
+ for (i = 0; i < ecryptfs_get_key_size_to_store_key(crypt_stat); i++)
encrypted_session_key_valid |=
auth_tok->session_key.encrypted_key[i];
if (encrypted_session_key_valid) {
@@ -2196,6 +2226,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
u8 cipher_code;
size_t packet_size_length;
size_t max_packet_size;
+ unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1];
struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
crypt_stat->mount_crypt_stat;
struct blkcipher_desc desc = {
@@ -2228,13 +2259,14 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
mount_crypt_stat->global_default_cipher_key_size;
if (auth_tok->session_key.encrypted_key_size == 0)
auth_tok->session_key.encrypted_key_size =
- crypt_stat->key_size;
+ ecryptfs_get_key_size_to_store_key(crypt_stat);
if (crypt_stat->key_size == 24
&& strcmp("aes", crypt_stat->cipher) == 0) {
memset((crypt_stat->key + 24), 0, 8);
auth_tok->session_key.encrypted_key_size = 32;
} else
- auth_tok->session_key.encrypted_key_size = crypt_stat->key_size;
+ auth_tok->session_key.encrypted_key_size =
+ ecryptfs_get_key_size_to_store_key(crypt_stat);
key_rec->enc_key_size =
auth_tok->session_key.encrypted_key_size;
encrypted_session_key_valid = 0;
@@ -2258,8 +2290,8 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
auth_tok->token.password.
session_key_encryption_key_bytes);
memcpy(session_key_encryption_key,
- auth_tok->token.password.session_key_encryption_key,
- crypt_stat->key_size);
+ auth_tok->token.password.session_key_encryption_key,
+ auth_tok->token.password.session_key_encryption_key_bytes);
ecryptfs_printk(KERN_DEBUG,
"Cached session key encryption key:\n");
if (ecryptfs_verbosity > 0)
@@ -2292,7 +2324,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
}
mutex_lock(tfm_mutex);
rc = crypto_blkcipher_setkey(desc.tfm, session_key_encryption_key,
- crypt_stat->key_size);
+ auth_tok->token.password.session_key_encryption_key_bytes);
if (rc < 0) {
mutex_unlock(tfm_mutex);
ecryptfs_printk(KERN_ERR, "Error setting key for crypto "
@@ -2301,7 +2333,9 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
}
rc = 0;
ecryptfs_printk(KERN_DEBUG, "Encrypting [%zd] bytes of the key\n",
- crypt_stat->key_size);
+ crypt_stat->key_size);
+ ecryptfs_printk(KERN_DEBUG, "Encrypting [%zd] bytes of the salt key\n",
+ ecryptfs_get_salt_size_for_cipher(crypt_stat));
rc = crypto_blkcipher_encrypt(&desc, dst_sg, src_sg,
(*key_rec).enc_key_size);
mutex_unlock(tfm_mutex);
@@ -2350,8 +2384,10 @@ encrypted_session_key_set:
dest[(*packet_size)++] = 0x04; /* version 4 */
/* TODO: Break from RFC2440 so that arbitrary ciphers can be
* specified with strings */
- cipher_code = ecryptfs_code_for_cipher_string(crypt_stat->cipher,
- crypt_stat->key_size);
+ cipher_code = ecryptfs_code_for_cipher_string(
+ ecryptfs_get_full_cipher(crypt_stat->cipher,
+ crypt_stat->cipher_mode, final, sizeof(final)),
+ crypt_stat->key_size);
if (cipher_code == 0) {
ecryptfs_printk(KERN_WARNING, "Unable to generate code for "
"cipher [%s]\n", crypt_stat->cipher);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 4f4d0474bee9..85f7a289bdac 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -156,16 +156,41 @@ int ecryptfs_get_lower_file(struct dentry *dentry, struct inode *inode)
void ecryptfs_put_lower_file(struct inode *inode)
{
+ int ret = 0;
struct ecryptfs_inode_info *inode_info;
+ bool clear_cache_needed = false;
inode_info = ecryptfs_inode_to_private(inode);
if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count,
&inode_info->lower_file_mutex)) {
+
+ if (get_events() && get_events()->is_hw_crypt_cb &&
+ get_events()->is_hw_crypt_cb())
+ clear_cache_needed = true;
+
filemap_write_and_wait(inode->i_mapping);
+ if (clear_cache_needed) {
+ ret = vfs_fsync(inode_info->lower_file, false);
+
+ if (ret)
+ pr_err("failed to sync file ret = %d.\n", ret);
+ }
fput(inode_info->lower_file);
inode_info->lower_file = NULL;
mutex_unlock(&inode_info->lower_file_mutex);
+
+ if (clear_cache_needed) {
+ truncate_inode_pages_fill_zero(inode->i_mapping, 0);
+ truncate_inode_pages_fill_zero(
+ ecryptfs_inode_to_lower(inode)->i_mapping, 0);
+ }
+
+ if (get_events() && get_events()->release_cb)
+ get_events()->release_cb(
+ ecryptfs_inode_to_lower(inode));
}
+
+
}
enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
@@ -280,6 +305,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
char *cipher_key_bytes_src;
char *fn_cipher_key_bytes_src;
u8 cipher_code;
+ unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1];
*check_ruid = 0;
@@ -309,12 +335,14 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
case ecryptfs_opt_ecryptfs_cipher:
cipher_name_src = args[0].from;
cipher_name_dst =
- mount_crypt_stat->
- global_default_cipher_name;
- strncpy(cipher_name_dst, cipher_name_src,
- ECRYPTFS_MAX_CIPHER_NAME_SIZE);
- cipher_name_dst[ECRYPTFS_MAX_CIPHER_NAME_SIZE] = '\0';
+ mount_crypt_stat->global_default_cipher_name;
+
+ ecryptfs_parse_full_cipher(cipher_name_src,
+ mount_crypt_stat->global_default_cipher_name,
+ mount_crypt_stat->global_default_cipher_mode);
+
cipher_name_set = 1;
+
break;
case ecryptfs_opt_ecryptfs_key_bytes:
cipher_key_bytes_src = args[0].from;
@@ -411,24 +439,35 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
strcpy(mount_crypt_stat->global_default_cipher_name,
ECRYPTFS_DEFAULT_CIPHER);
}
+
if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
&& !fn_cipher_name_set)
strcpy(mount_crypt_stat->global_default_fn_cipher_name,
mount_crypt_stat->global_default_cipher_name);
+
if (!cipher_key_bytes_set)
mount_crypt_stat->global_default_cipher_key_size = 0;
+
if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
&& !fn_cipher_key_bytes_set)
mount_crypt_stat->global_default_fn_cipher_key_bytes =
mount_crypt_stat->global_default_cipher_key_size;
cipher_code = ecryptfs_code_for_cipher_string(
- mount_crypt_stat->global_default_cipher_name,
+ ecryptfs_get_full_cipher(
+ mount_crypt_stat->global_default_cipher_name,
+ mount_crypt_stat->global_default_cipher_mode,
+ final, sizeof(final)),
mount_crypt_stat->global_default_cipher_key_size);
if (!cipher_code) {
- ecryptfs_printk(KERN_ERR,
- "eCryptfs doesn't support cipher: %s",
- mount_crypt_stat->global_default_cipher_name);
+ ecryptfs_printk(
+ KERN_ERR,
+ "eCryptfs doesn't support cipher: %s and key size %zu",
+ ecryptfs_get_full_cipher(
+ mount_crypt_stat->global_default_cipher_name,
+ mount_crypt_stat->global_default_cipher_mode,
+ final, sizeof(final)),
+ mount_crypt_stat->global_default_cipher_key_size);
rc = -EINVAL;
goto out;
}
@@ -488,6 +527,7 @@ static struct file_system_type ecryptfs_fs_type;
* @dev_name: The path to mount over
* @raw_data: The options passed into the kernel
*/
+
static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *raw_data)
{
@@ -557,6 +597,11 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
ecryptfs_set_superblock_lower(s, path.dentry->d_sb);
+
+ if (get_events() && get_events()->is_hw_crypt_cb &&
+ get_events()->is_hw_crypt_cb())
+ drop_pagecache_sb(ecryptfs_superblock_to_lower(s), 0);
+
/**
* Set the POSIX ACL flag based on whether they're enabled in the lower
* mount.
@@ -895,6 +940,7 @@ static void __exit ecryptfs_exit(void)
do_sysfs_unregistration();
unregister_filesystem(&ecryptfs_fs_type);
ecryptfs_free_kmem_caches();
+ ecryptfs_free_events();
}
MODULE_AUTHOR("Michael A. Halcrow <mhalcrow@us.ibm.com>");
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index caba848ac763..bdbc72d52438 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -552,10 +552,16 @@ static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block)
return rc;
}
+void ecryptfs_freepage(struct page *page)
+{
+ zero_user(page, 0, PAGE_CACHE_SIZE);
+}
+
const struct address_space_operations ecryptfs_aops = {
.writepage = ecryptfs_writepage,
.readpage = ecryptfs_readpage,
.write_begin = ecryptfs_write_begin,
.write_end = ecryptfs_write_end,
.bmap = ecryptfs_bmap,
+ .freepage = ecryptfs_freepage,
};
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index afa1b81c3418..25e436ddcf8e 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -69,6 +69,9 @@ static void ecryptfs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
struct ecryptfs_inode_info *inode_info;
+ if (inode == NULL)
+ return;
+
inode_info = ecryptfs_inode_to_private(inode);
kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
@@ -88,9 +91,12 @@ static void ecryptfs_destroy_inode(struct inode *inode)
struct ecryptfs_inode_info *inode_info;
inode_info = ecryptfs_inode_to_private(inode);
+
BUG_ON(inode_info->lower_file);
+
ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
call_rcu(&inode->i_rcu, ecryptfs_i_callback);
+
}
/**
@@ -149,6 +155,9 @@ static int ecryptfs_show_options(struct seq_file *m, struct dentry *root)
struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
&ecryptfs_superblock_to_private(sb)->mount_crypt_stat;
struct ecryptfs_global_auth_tok *walker;
+ unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1];
+
+ memset(final, 0, sizeof(final));
mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
list_for_each_entry(walker,
@@ -162,7 +171,10 @@ static int ecryptfs_show_options(struct seq_file *m, struct dentry *root)
mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
seq_printf(m, ",ecryptfs_cipher=%s",
- mount_crypt_stat->global_default_cipher_name);
+ ecryptfs_get_full_cipher(
+ mount_crypt_stat->global_default_cipher_name,
+ mount_crypt_stat->global_default_cipher_mode,
+ final, sizeof(final)));
if (mount_crypt_stat->global_default_cipher_key_size)
seq_printf(m, ",ecryptfs_key_bytes=%zd",
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index da3d40ef1668..3267a80dbbe2 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -40,7 +40,8 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
{
int err = ext2_add_link(dentry, inode);
if (!err) {
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
return 0;
}
inode_dec_link_count(inode);
@@ -266,7 +267,8 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
if (err)
goto out_fail;
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
out:
return err;
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 3c8293215603..ebaff5ab93da 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -118,10 +118,16 @@ config EXT4_ENCRYPTION
decrypted pages in the page cache.
config EXT4_FS_ENCRYPTION
- bool
- default y
+ bool "Ext4 FS Encryption"
+ default n
depends on EXT4_ENCRYPTION
+config EXT4_FS_ICE_ENCRYPTION
+ bool "Ext4 Encryption with ICE support"
+ default n
+ depends on EXT4_FS_ENCRYPTION
+ depends on PFK
+
config EXT4_DEBUG
bool "EXT4 debugging support"
depends on EXT4_FS
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index f52cf54f0cbc..1cabbd9a9229 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -14,3 +14,5 @@ ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
ext4-$(CONFIG_EXT4_FS_ENCRYPTION) += crypto_policy.o crypto.o \
crypto_key.o crypto_fname.o
+
+ext4-$(CONFIG_EXT4_FS_ICE_ENCRYPTION) += ext4_ice.o
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index f240cef8b326..f5099a3386ec 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -389,14 +389,12 @@ int ext4_decrypt(struct page *page)
page->index, page, page, GFP_NOFS);
}
-int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
+int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
+ ext4_fsblk_t pblk, ext4_lblk_t len)
{
struct ext4_crypto_ctx *ctx;
struct page *ciphertext_page = NULL;
struct bio *bio;
- ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
- ext4_fsblk_t pblk = ext4_ext_pblock(ex);
- unsigned int len = ext4_ext_get_actual_len(ex);
int ret, err = 0;
#if 0
@@ -459,7 +457,8 @@ errout:
bool ext4_valid_contents_enc_mode(uint32_t mode)
{
- return (mode == EXT4_ENCRYPTION_MODE_AES_256_XTS);
+ return (mode == EXT4_ENCRYPTION_MODE_AES_256_XTS ||
+ mode == EXT4_ENCRYPTION_MODE_PRIVATE);
}
/**
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
index 14ae7781f2a8..d3d6b28ce9b9 100644
--- a/fs/ext4/crypto_key.c
+++ b/fs/ext4/crypto_key.c
@@ -15,6 +15,7 @@
#include <uapi/linux/keyctl.h>
#include "ext4.h"
+#include "ext4_ice.h"
#include "xattr.h"
static void derive_crypt_complete(struct crypto_async_request *req, int rc)
@@ -173,6 +174,8 @@ void ext4_free_crypt_info(struct ext4_crypt_info *ci)
if (!ci)
return;
+ if (ci->ci_keyring_key)
+ key_put(ci->ci_keyring_key);
crypto_free_ablkcipher(ci->ci_ctfm);
kmem_cache_free(ext4_crypt_info_cachep, ci);
}
@@ -194,7 +197,13 @@ void ext4_free_encryption_info(struct inode *inode,
ext4_free_crypt_info(ci);
}
-int ext4_get_encryption_info(struct inode *inode)
+static int ext4_default_data_encryption_mode(void)
+{
+ return ext4_is_ice_enabled() ? EXT4_ENCRYPTION_MODE_PRIVATE :
+ EXT4_ENCRYPTION_MODE_AES_256_XTS;
+}
+
+int _ext4_get_encryption_info(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_crypt_info *crypt_info;
@@ -207,24 +216,32 @@ int ext4_get_encryption_info(struct inode *inode)
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct crypto_ablkcipher *ctfm;
const char *cipher_str;
- char raw_key[EXT4_MAX_KEY_SIZE];
- char mode;
+ int for_fname = 0;
+ int mode;
int res;
- if (ei->i_crypt_info)
- return 0;
-
res = ext4_init_crypto();
if (res)
return res;
+retry:
+ crypt_info = ACCESS_ONCE(ei->i_crypt_info);
+ if (crypt_info) {
+ if (!crypt_info->ci_keyring_key ||
+ key_validate(crypt_info->ci_keyring_key) == 0)
+ return 0;
+ ext4_free_encryption_info(inode, crypt_info);
+ goto retry;
+ }
+
res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
&ctx, sizeof(ctx));
if (res < 0) {
if (!DUMMY_ENCRYPTION_ENABLED(sbi))
return res;
- ctx.contents_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS;
+ ctx.contents_encryption_mode =
+ ext4_default_data_encryption_mode();
ctx.filenames_encryption_mode =
EXT4_ENCRYPTION_MODE_AES_256_CTS;
ctx.flags = 0;
@@ -240,14 +257,15 @@ int ext4_get_encryption_info(struct inode *inode)
crypt_info->ci_data_mode = ctx.contents_encryption_mode;
crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
crypt_info->ci_ctfm = NULL;
+ crypt_info->ci_keyring_key = NULL;
memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
sizeof(crypt_info->ci_master_key));
- if (S_ISREG(inode->i_mode))
- mode = crypt_info->ci_data_mode;
- else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
- mode = crypt_info->ci_filename_mode;
- else
+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+ for_fname = 1;
+ else if (!S_ISREG(inode->i_mode))
BUG();
+ mode = for_fname ? crypt_info->ci_filename_mode :
+ crypt_info->ci_data_mode;
switch (mode) {
case EXT4_ENCRYPTION_MODE_AES_256_XTS:
cipher_str = "xts(aes)";
@@ -255,6 +273,8 @@ int ext4_get_encryption_info(struct inode *inode)
case EXT4_ENCRYPTION_MODE_AES_256_CTS:
cipher_str = "cts(cbc(aes))";
break;
+ case EXT4_ENCRYPTION_MODE_PRIVATE:
+ cipher_str = "bugon";
case EXT4_ENCRYPTION_MODE_AES_256_HEH:
cipher_str = "heh(aes)";
break;
@@ -266,7 +286,7 @@ int ext4_get_encryption_info(struct inode *inode)
goto out;
}
if (DUMMY_ENCRYPTION_ENABLED(sbi)) {
- memset(raw_key, 0x42, EXT4_AES_256_XTS_KEY_SIZE);
+ memset(crypt_info->ci_raw_key, 0x42, EXT4_AES_256_XTS_KEY_SIZE);
goto got_key;
}
memcpy(full_key_descriptor, EXT4_KEY_DESC_PREFIX,
@@ -282,6 +302,7 @@ int ext4_get_encryption_info(struct inode *inode)
keyring_key = NULL;
goto out;
}
+ crypt_info->ci_keyring_key = keyring_key;
if (keyring_key->type != &key_type_logon) {
printk_once(KERN_WARNING
"ext4: key type must be logon\n");
@@ -312,36 +333,49 @@ int ext4_get_encryption_info(struct inode *inode)
up_read(&keyring_key->sem);
goto out;
}
- res = ext4_derive_key(&ctx, master_key->raw, raw_key);
+ res = ext4_derive_key(&ctx, master_key->raw,
+ crypt_info->ci_raw_key);
up_read(&keyring_key->sem);
if (res)
goto out;
got_key:
- ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
- if (!ctfm || IS_ERR(ctfm)) {
- res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
- printk(KERN_DEBUG
- "%s: error %d (inode %u) allocating crypto tfm\n",
- __func__, res, (unsigned) inode->i_ino);
+ if (for_fname ||
+ (crypt_info->ci_data_mode != EXT4_ENCRYPTION_MODE_PRIVATE)) {
+ ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
+ if (!ctfm || IS_ERR(ctfm)) {
+ res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
+ pr_debug("%s: error %d (inode %u) allocating crypto tfm\n",
+ __func__, res, (unsigned) inode->i_ino);
+ goto out;
+ }
+ crypt_info->ci_ctfm = ctfm;
+ crypto_ablkcipher_clear_flags(ctfm, ~0);
+ crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm),
+ CRYPTO_TFM_REQ_WEAK_KEY);
+ res = crypto_ablkcipher_setkey(ctfm, crypt_info->ci_raw_key,
+ ext4_encryption_key_size(mode));
+ if (res)
+ goto out;
+ memzero_explicit(crypt_info->ci_raw_key,
+ sizeof(crypt_info->ci_raw_key));
+ } else if (!ext4_is_ice_enabled()) {
+ pr_warn("%s: ICE support not available\n",
+ __func__);
+ res = -EINVAL;
goto out;
}
- crypt_info->ci_ctfm = ctfm;
- crypto_ablkcipher_clear_flags(ctfm, ~0);
- crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm),
- CRYPTO_TFM_REQ_WEAK_KEY);
- res = crypto_ablkcipher_setkey(ctfm, raw_key,
- ext4_encryption_key_size(mode));
- if (res)
- goto out;
+ if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) != NULL) {
+ ext4_free_crypt_info(crypt_info);
+ goto retry;
+ }
+ return 0;
- if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) == NULL)
- crypt_info = NULL;
out:
if (res == -ENOKEY)
res = 0;
- key_put(keyring_key);
+ memzero_explicit(crypt_info->ci_raw_key,
+ sizeof(crypt_info->ci_raw_key));
ext4_free_crypt_info(crypt_info);
- memzero_explicit(raw_key, sizeof(raw_key));
return res;
}
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 8eb768bbf5b5..e452f9a9f174 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -161,8 +161,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
index, 1);
file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
bh = ext4_bread(NULL, inode, map.m_lblk, 0);
- if (IS_ERR(bh))
- return PTR_ERR(bh);
+ if (IS_ERR(bh)) {
+ err = PTR_ERR(bh);
+ bh = NULL;
+ goto errout;
+ }
}
if (!bh) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 977a74234287..c161f9a9e7e1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -589,6 +589,7 @@ enum {
#define EXT4_ENCRYPTION_MODE_AES_256_GCM 2
#define EXT4_ENCRYPTION_MODE_AES_256_CBC 3
#define EXT4_ENCRYPTION_MODE_AES_256_CTS 4
+#define EXT4_ENCRYPTION_MODE_PRIVATE 127
#define EXT4_ENCRYPTION_MODE_AES_256_HEH 126
#include "ext4_crypto.h"
@@ -2265,7 +2266,8 @@ struct page *ext4_encrypt(struct inode *inode,
struct page *plaintext_page,
gfp_t gfp_flags);
int ext4_decrypt(struct page *page);
-int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex);
+int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
+ ext4_fsblk_t pblk, ext4_lblk_t len);
extern const struct dentry_operations ext4_encrypted_d_ops;
#ifdef CONFIG_EXT4_FS_ENCRYPTION
@@ -2329,17 +2331,37 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
/* crypto_key.c */
void ext4_free_crypt_info(struct ext4_crypt_info *ci);
void ext4_free_encryption_info(struct inode *inode, struct ext4_crypt_info *ci);
+int _ext4_get_encryption_info(struct inode *inode);
#ifdef CONFIG_EXT4_FS_ENCRYPTION
int ext4_has_encryption_key(struct inode *inode);
-int ext4_get_encryption_info(struct inode *inode);
+static inline int ext4_get_encryption_info(struct inode *inode)
+{
+ struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
+
+ if (!ci ||
+ (ci->ci_keyring_key &&
+ (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+ (1 << KEY_FLAG_REVOKED) |
+ (1 << KEY_FLAG_DEAD)))))
+ return _ext4_get_encryption_info(inode);
+ return 0;
+}
static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode)
{
return EXT4_I(inode)->i_crypt_info;
}
+static inline int ext4_using_hardware_encryption(struct inode *inode)
+{
+ struct ext4_crypt_info *ci = ext4_encryption_info(inode);
+
+ return S_ISREG(inode->i_mode) && ci &&
+ ci->ci_data_mode == EXT4_ENCRYPTION_MODE_PRIVATE;
+}
+
#else
static inline int ext4_has_encryption_key(struct inode *inode)
{
@@ -2353,6 +2375,10 @@ static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode)
{
return NULL;
}
+static inline int ext4_using_hardware_encryption(struct inode *inode)
+{
+ return 0;
+}
#endif
@@ -2509,6 +2535,8 @@ extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim);
+extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
+ ext4_fsblk_t pblk, ext4_lblk_t len);
/* indirect.c */
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
@@ -3011,8 +3039,7 @@ extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
struct page *page);
extern int ext4_try_add_inline_entry(handle_t *handle,
struct ext4_filename *fname,
- struct dentry *dentry,
- struct inode *inode);
+ struct inode *dir, struct inode *inode);
extern int ext4_try_create_inline_dir(handle_t *handle,
struct inode *parent,
struct inode *inode);
diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h
index e52637d969db..e28cc5aab04a 100644
--- a/fs/ext4/ext4_crypto.h
+++ b/fs/ext4/ext4_crypto.h
@@ -12,6 +12,7 @@
#define _EXT4_CRYPTO_H
#include <linux/fs.h>
+#include <linux/pfk.h>
#define EXT4_KEY_DESCRIPTOR_SIZE 8
@@ -63,6 +64,7 @@ struct ext4_encryption_context {
#define EXT4_AES_256_CTS_KEY_SIZE 32
#define EXT4_AES_256_HEH_KEY_SIZE 32
#define EXT4_AES_256_XTS_KEY_SIZE 64
+#define EXT4_PRIVATE_KEY_SIZE 64
#define EXT4_MAX_KEY_SIZE 64
#define EXT4_KEY_DESC_PREFIX "ext4:"
@@ -80,9 +82,13 @@ struct ext4_crypt_info {
char ci_filename_mode;
char ci_flags;
struct crypto_ablkcipher *ci_ctfm;
+ struct key *ci_keyring_key;
char ci_master_key[EXT4_KEY_DESCRIPTOR_SIZE];
+ char ci_raw_key[EXT4_MAX_KEY_SIZE];
};
+
+
#define EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
#define EXT4_WRITE_PATH_FL 0x00000002
@@ -115,6 +121,7 @@ static inline int ext4_encryption_key_size(int mode)
{
switch (mode) {
case EXT4_ENCRYPTION_MODE_AES_256_XTS:
+ case EXT4_ENCRYPTION_MODE_PRIVATE:
return EXT4_AES_256_XTS_KEY_SIZE;
case EXT4_ENCRYPTION_MODE_AES_256_GCM:
return EXT4_AES_256_GCM_KEY_SIZE;
diff --git a/fs/ext4/ext4_ice.c b/fs/ext4/ext4_ice.c
new file mode 100644
index 000000000000..d85bcb8ea1ba
--- /dev/null
+++ b/fs/ext4/ext4_ice.c
@@ -0,0 +1,109 @@
+/* Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "ext4_ice.h"
+#include "ext4_crypto.h"
+
+
+/*
+ * Retrieves encryption key from the inode
+ */
+char *ext4_get_ice_encryption_key(const struct inode *inode)
+{
+ struct ext4_crypt_info *ci = NULL;
+
+ if (!inode)
+ return NULL;
+
+ ci = ext4_encryption_info((struct inode *)inode);
+ if (!ci)
+ return NULL;
+
+ return &(ci->ci_raw_key[0]);
+}
+
+/*
+ * Retrieves encryption salt from the inode
+ */
+char *ext4_get_ice_encryption_salt(const struct inode *inode)
+{
+ struct ext4_crypt_info *ci = NULL;
+
+ if (!inode)
+ return NULL;
+
+ ci = ext4_encryption_info((struct inode *)inode);
+ if (!ci)
+ return NULL;
+
+ return &(ci->ci_raw_key[ext4_get_ice_encryption_key_size(inode)]);
+}
+
+/*
+ * returns true if the cipher mode in inode is AES XTS
+ */
+int ext4_is_aes_xts_cipher(const struct inode *inode)
+{
+ struct ext4_crypt_info *ci = NULL;
+
+ ci = ext4_encryption_info((struct inode *)inode);
+ if (!ci)
+ return 0;
+
+ return (ci->ci_data_mode == EXT4_ENCRYPTION_MODE_PRIVATE);
+}
+
+/*
+ * returns true if encryption info in both inodes is equal
+ */
+int ext4_is_ice_encryption_info_equal(const struct inode *inode1,
+ const struct inode *inode2)
+{
+ char *key1 = NULL;
+ char *key2 = NULL;
+ char *salt1 = NULL;
+ char *salt2 = NULL;
+
+ if (!inode1 || !inode2)
+ return 0;
+
+ if (inode1 == inode2)
+ return 1;
+
+ /* both do not belong to ice, so we don't care, they are equal for us */
+ if (!ext4_should_be_processed_by_ice(inode1) &&
+ !ext4_should_be_processed_by_ice(inode2))
+ return 1;
+
+ /* one belongs to ice, the other does not -> not equal */
+ if (ext4_should_be_processed_by_ice(inode1) ^
+ ext4_should_be_processed_by_ice(inode2))
+ return 0;
+
+ key1 = ext4_get_ice_encryption_key(inode1);
+ key2 = ext4_get_ice_encryption_key(inode2);
+ salt1 = ext4_get_ice_encryption_salt(inode1);
+ salt2 = ext4_get_ice_encryption_salt(inode2);
+
+ /* key and salt should not be null by this point */
+ if (!key1 || !key2 || !salt1 || !salt2 ||
+ (ext4_get_ice_encryption_key_size(inode1) !=
+ ext4_get_ice_encryption_key_size(inode2)) ||
+ (ext4_get_ice_encryption_salt_size(inode1) !=
+ ext4_get_ice_encryption_salt_size(inode2)))
+ return 0;
+
+ return ((memcmp(key1, key2,
+ ext4_get_ice_encryption_key_size(inode1)) == 0) &&
+ (memcmp(salt1, salt2,
+ ext4_get_ice_encryption_salt_size(inode1)) == 0));
+}
diff --git a/fs/ext4/ext4_ice.h b/fs/ext4/ext4_ice.h
new file mode 100644
index 000000000000..5257edabd6b2
--- /dev/null
+++ b/fs/ext4/ext4_ice.h
@@ -0,0 +1,104 @@
+/* Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _EXT4_ICE_H
+#define _EXT4_ICE_H
+
+#include "ext4.h"
+#include "ext4_crypto.h"
+
+#ifdef CONFIG_EXT4_FS_ICE_ENCRYPTION
+static inline int ext4_should_be_processed_by_ice(const struct inode *inode)
+{
+ if (!ext4_encrypted_inode((struct inode *)inode))
+ return 0;
+
+ return ext4_using_hardware_encryption((struct inode *)inode);
+}
+
+static inline int ext4_is_ice_enabled(void)
+{
+ return 1;
+}
+
+int ext4_is_aes_xts_cipher(const struct inode *inode);
+
+char *ext4_get_ice_encryption_key(const struct inode *inode);
+char *ext4_get_ice_encryption_salt(const struct inode *inode);
+
+int ext4_is_ice_encryption_info_equal(const struct inode *inode1,
+ const struct inode *inode2);
+
+static inline size_t ext4_get_ice_encryption_key_size(
+ const struct inode *inode)
+{
+ return EXT4_AES_256_XTS_KEY_SIZE / 2;
+}
+
+static inline size_t ext4_get_ice_encryption_salt_size(
+ const struct inode *inode)
+{
+ return EXT4_AES_256_XTS_KEY_SIZE / 2;
+}
+
+#else
+static inline int ext4_should_be_processed_by_ice(const struct inode *inode)
+{
+ return 0;
+}
+static inline int ext4_is_ice_enabled(void)
+{
+ return 0;
+}
+
+static inline char *ext4_get_ice_encryption_key(const struct inode *inode)
+{
+ return NULL;
+}
+
+static inline char *ext4_get_ice_encryption_salt(const struct inode *inode)
+{
+ return NULL;
+}
+
+static inline size_t ext4_get_ice_encryption_key_size(
+ const struct inode *inode)
+{
+ return 0;
+}
+
+static inline size_t ext4_get_ice_encryption_salt_size(
+ const struct inode *inode)
+{
+ return 0;
+}
+
+static inline int ext4_is_xts_cipher(const struct inode *inode)
+{
+ return 0;
+}
+
+static inline int ext4_is_ice_encryption_info_equal(
+ const struct inode *inode1,
+ const struct inode *inode2)
+{
+ return 0;
+}
+
+static inline int ext4_is_aes_xts_cipher(const struct inode *inode)
+{
+ return 0;
+}
+
+#endif
+
+#endif /* _EXT4_ICE_H */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 1708597659a1..4a0b3a521399 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3133,19 +3133,11 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
{
ext4_fsblk_t ee_pblock;
unsigned int ee_len;
- int ret;
ee_len = ext4_ext_get_actual_len(ex);
ee_pblock = ext4_ext_pblock(ex);
-
- if (ext4_encrypted_inode(inode))
- return ext4_encrypted_zeroout(inode, ex);
-
- ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
- if (ret > 0)
- ret = 0;
-
- return ret;
+ return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock,
+ ee_len);
}
/*
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 99ed5b9f2b86..cab694540930 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1021,12 +1021,11 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
*/
static int ext4_add_dirent_to_inline(handle_t *handle,
struct ext4_filename *fname,
- struct dentry *dentry,
+ struct inode *dir,
struct inode *inode,
struct ext4_iloc *iloc,
void *inline_start, int inline_size)
{
- struct inode *dir = d_inode(dentry->d_parent);
int err;
struct ext4_dir_entry_2 *de;
@@ -1270,12 +1269,11 @@ out:
* the new created block.
*/
int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
- struct dentry *dentry, struct inode *inode)
+ struct inode *dir, struct inode *inode)
{
int ret, inline_size, no_expand;
void *inline_start;
struct ext4_iloc iloc;
- struct inode *dir = d_inode(dentry->d_parent);
ret = ext4_get_inode_loc(dir, &iloc);
if (ret)
@@ -1289,7 +1287,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
EXT4_INLINE_DOTDOT_SIZE;
inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
- ret = ext4_add_dirent_to_inline(handle, fname, dentry, inode, &iloc,
+ ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc,
inline_start, inline_size);
if (ret != -ENOSPC)
goto out;
@@ -1310,7 +1308,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
if (inline_size) {
inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
- ret = ext4_add_dirent_to_inline(handle, fname, dentry,
+ ret = ext4_add_dirent_to_inline(handle, fname, dir,
inode, &iloc, inline_start,
inline_size);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 920665b94f11..12ca9da02fdc 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -42,6 +42,7 @@
#include "xattr.h"
#include "acl.h"
#include "truncate.h"
+#include "ext4_ice.h"
#include <trace/events/ext4.h>
#include <trace/events/android_fs.h>
@@ -389,6 +390,21 @@ static int __check_block_validity(struct inode *inode, const char *func,
return 0;
}
+int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
+ ext4_lblk_t len)
+{
+ int ret;
+
+ if (ext4_encrypted_inode(inode))
+ return ext4_encrypted_zeroout(inode, lblk, pblk, len);
+
+ ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS);
+ if (ret > 0)
+ ret = 0;
+
+ return ret;
+}
+
#define check_block_validity(inode, map) \
__check_block_validity((inode), __func__, __LINE__, (map))
@@ -999,7 +1015,8 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
ll_rw_block(READ, 1, &bh);
*wait_bh++ = bh;
decrypt = ext4_encrypted_inode(inode) &&
- S_ISREG(inode->i_mode);
+ S_ISREG(inode->i_mode) &&
+ !ext4_is_ice_enabled();
}
}
/*
@@ -3296,7 +3313,9 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
get_block_func = ext4_get_block_write;
dio_flags = DIO_LOCKING;
}
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#if defined(CONFIG_EXT4_FS_ENCRYPTION) && \
+!defined(CONFIG_EXT4_FS_ICE_ENCRYPTION)
+
BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
#endif
if (IS_DAX(inode))
@@ -3363,7 +3382,9 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
size_t count = iov_iter_count(iter);
ssize_t ret;
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#if defined(CONFIG_EXT4_FS_ENCRYPTION) && \
+!defined(CONFIG_EXT4_FS_ICE_ENCRYPTION)
+
if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode))
return 0;
#endif
@@ -3563,7 +3584,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
if (!buffer_uptodate(bh))
goto unlock;
if (S_ISREG(inode->i_mode) &&
- ext4_encrypted_inode(inode)) {
+ ext4_encrypted_inode(inode) &&
+ !ext4_using_hardware_encryption(inode)) {
/* We expect the key to be set. */
BUG_ON(!ext4_has_encryption_key(inode));
BUG_ON(blocksize != PAGE_CACHE_SIZE);
@@ -3736,6 +3758,7 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
{
+#if 0
struct super_block *sb = inode->i_sb;
ext4_lblk_t first_block, stop_block;
struct address_space *mapping = inode->i_mapping;
@@ -3866,6 +3889,12 @@ out_dio:
out_mutex:
mutex_unlock(&inode->i_mutex);
return ret;
+#else
+ /*
+ * Disabled as per b/28760453
+ */
+ return -EOPNOTSUPP;
+#endif
}
int ext4_inode_attach_jinode(struct inode *inode)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index aa08e129149d..c455a8b649a1 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -274,7 +274,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
struct ext4_filename *fname,
struct ext4_dir_entry_2 **res_dir);
static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
- struct dentry *dentry, struct inode *inode);
+ struct inode *dir, struct inode *inode);
/* checksumming functions */
void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
@@ -1951,10 +1951,9 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
* directory, and adds the dentry to the indexed directory.
*/
static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
- struct dentry *dentry,
+ struct inode *dir,
struct inode *inode, struct buffer_head *bh)
{
- struct inode *dir = d_inode(dentry->d_parent);
struct buffer_head *bh2;
struct dx_root *root;
struct dx_frame frames[2], *frame;
@@ -2107,8 +2106,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
return retval;
if (ext4_has_inline_data(dir)) {
- retval = ext4_try_add_inline_entry(handle, &fname,
- dentry, inode);
+ retval = ext4_try_add_inline_entry(handle, &fname, dir, inode);
if (retval < 0)
goto out;
if (retval == 1) {
@@ -2118,7 +2116,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
}
if (is_dx(dir)) {
- retval = ext4_dx_add_entry(handle, &fname, dentry, inode);
+ retval = ext4_dx_add_entry(handle, &fname, dir, inode);
if (!retval || (retval != ERR_BAD_DX_DIR))
goto out;
ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
@@ -2140,7 +2138,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
if (blocks == 1 && !dx_fallback &&
ext4_has_feature_dir_index(sb)) {
- retval = make_indexed_dir(handle, &fname, dentry,
+ retval = make_indexed_dir(handle, &fname, dir,
inode, bh);
bh = NULL; /* make_indexed_dir releases bh */
goto out;
@@ -2175,12 +2173,11 @@ out:
* Returns 0 for success, or a negative error value
*/
static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
- struct dentry *dentry, struct inode *inode)
+ struct inode *dir, struct inode *inode)
{
struct dx_frame frames[2], *frame;
struct dx_entry *entries, *at;
struct buffer_head *bh;
- struct inode *dir = d_inode(dentry->d_parent);
struct super_block *sb = dir->i_sb;
struct ext4_dir_entry_2 *de;
int err;
@@ -2431,7 +2428,8 @@ static int ext4_add_nondir(handle_t *handle,
int err = ext4_add_entry(handle, dentry, inode);
if (!err) {
ext4_mark_inode_dirty(handle, inode);
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
return 0;
}
drop_nlink(inode);
@@ -2670,7 +2668,8 @@ out_clear_inode:
err = ext4_mark_inode_dirty(handle, dir);
if (err)
goto out_clear_inode;
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 6ca56f5f72b5..978141e8b800 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -28,6 +28,7 @@
#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
+#include "ext4_ice.h"
static struct kmem_cache *io_end_cachep;
@@ -489,7 +490,11 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
gfp_t gfp_flags = GFP_NOFS;
retry_encrypt:
- data_page = ext4_encrypt(inode, page, gfp_flags);
+
+ if (!ext4_using_hardware_encryption(inode))
+ data_page = ext4_encrypt(inode, page, gfp_flags);
+
+
if (IS_ERR(data_page)) {
ret = PTR_ERR(data_page);
if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) {
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 783e33d839cf..99f1bd8c7f05 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -45,6 +45,7 @@
#include <linux/cleancache.h>
#include "ext4.h"
+#include "ext4_ice.h"
#include <trace/events/android_fs.h>
/*
@@ -63,12 +64,17 @@ static void completion_pages(struct work_struct *work)
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
- int ret = ext4_decrypt(page);
- if (ret) {
- WARN_ON_ONCE(1);
- SetPageError(page);
- } else
+ if (ext4_is_ice_enabled()) {
SetPageUptodate(page);
+ } else {
+ int ret = ext4_decrypt(page);
+
+ if (ret) {
+ WARN_ON_ONCE(1);
+ SetPageError(page);
+ } else
+ SetPageUptodate(page);
+ }
unlock_page(page);
}
ext4_release_crypto_ctx(ctx);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index e50f43fa6afc..f2cdf47b76b6 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -299,7 +299,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
f2fs_alloc_nid_done(sbi, ino);
- d_instantiate_new(dentry, inode);
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
@@ -604,7 +605,8 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
err = page_symlink(inode, disk_link.name, disk_link.len);
err_out:
- d_instantiate_new(dentry, inode);
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
/*
* Let's flush symlink data in order to avoid broken symlink as much as
@@ -667,7 +669,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
f2fs_alloc_nid_done(sbi, inode->i_ino);
- d_instantiate_new(dentry, inode);
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
@@ -721,7 +724,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
f2fs_alloc_nid_done(sbi, inode->i_ino);
- d_instantiate_new(dentry, inode);
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index e3fc477728b3..85358940fb75 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -92,7 +92,8 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent,
err_brelse:
brelse(bhs[0]);
err:
- fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr);
+ fat_msg_ratelimit(sb, KERN_ERR,
+ "FAT read failed (blocknr %llu)", (llu)blocknr);
return -EIO;
}
@@ -105,8 +106,8 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent,
fatent->fat_inode = MSDOS_SB(sb)->fat_inode;
fatent->bhs[0] = sb_bread(sb, blocknr);
if (!fatent->bhs[0]) {
- fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)",
- (llu)blocknr);
+ fat_msg_ratelimit(sb, KERN_ERR,
+ "FAT read failed (blocknr %llu)", (llu)blocknr);
return -EIO;
}
fatent->nr_bhs = 1;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index c81cfb79a339..1ac142ad7d1b 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -768,8 +768,9 @@ retry:
fat_get_blknr_offset(sbi, i_pos, &blocknr, &offset);
bh = sb_bread(sb, blocknr);
if (!bh) {
- fat_msg(sb, KERN_ERR, "unable to read inode block "
- "for updating (i_pos %lld)", i_pos);
+ fat_msg_ratelimit(sb, KERN_ERR,
+ "unable to read inode block for updating (i_pos %lld)",
+ i_pos);
return -EIO;
}
spin_lock(&sbi->inode_hash_lock);
diff --git a/fs/file_table.c b/fs/file_table.c
index ad17e05ebf95..b4baa0de4988 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -41,6 +41,141 @@ static struct kmem_cache *filp_cachep __read_mostly;
static struct percpu_counter nr_files __cacheline_aligned_in_smp;
+#ifdef CONFIG_FILE_TABLE_DEBUG
+#include <linux/hashtable.h>
+#include <mount.h>
+static DEFINE_MUTEX(global_files_lock);
+static DEFINE_HASHTABLE(global_files_hashtable, 10);
+
+struct global_filetable_lookup_key {
+ struct work_struct work;
+ uintptr_t value;
+};
+
+void global_filetable_print_warning_once(void)
+{
+ pr_err_once("\n**********************************************************\n");
+ pr_err_once("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
+ pr_err_once("** **\n");
+ pr_err_once("** VFS FILE TABLE DEBUG is enabled . **\n");
+ pr_err_once("** Allocating extra memory and slowing access to files **\n");
+ pr_err_once("** **\n");
+ pr_err_once("** This means that this is a DEBUG kernel and it is **\n");
+ pr_err_once("** unsafe for production use. **\n");
+ pr_err_once("** **\n");
+ pr_err_once("** If you see this message and you are not debugging **\n");
+ pr_err_once("** the kernel, report this immediately to your vendor! **\n");
+ pr_err_once("** **\n");
+ pr_err_once("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
+ pr_err_once("**********************************************************\n");
+}
+
+void global_filetable_add(struct file *filp)
+{
+ struct mount *mnt;
+
+ if (filp->f_path.dentry->d_iname == NULL ||
+ strlen(filp->f_path.dentry->d_iname) == 0)
+ return;
+
+ mnt = real_mount(filp->f_path.mnt);
+
+ mutex_lock(&global_files_lock);
+ hash_add(global_files_hashtable, &filp->f_hash, (uintptr_t)mnt);
+ mutex_unlock(&global_files_lock);
+}
+
+void global_filetable_del(struct file *filp)
+{
+ mutex_lock(&global_files_lock);
+ hash_del(&filp->f_hash);
+ mutex_unlock(&global_files_lock);
+}
+
+static void global_print_file(struct file *filp, char *path_buffer, int *count)
+{
+ char *pathname;
+
+ pathname = d_path(&filp->f_path, path_buffer, PAGE_SIZE);
+ if (IS_ERR(pathname))
+ pr_err("VFS: File %d Address : %pa partial filename: %s ref_count=%ld\n",
+ ++(*count), &filp, filp->f_path.dentry->d_iname,
+ atomic_long_read(&filp->f_count));
+ else
+ pr_err("VFS: File %d Address : %pa full filepath: %s ref_count=%ld\n",
+ ++(*count), &filp, pathname,
+ atomic_long_read(&filp->f_count));
+}
+
+static void global_filetable_print(uintptr_t lookup_mnt)
+{
+ struct hlist_node *tmp;
+ struct file *filp;
+ struct mount *mnt;
+ int index;
+ int count = 0;
+ char *path_buffer = (char *)__get_free_page(GFP_TEMPORARY);
+
+ mutex_lock(&global_files_lock);
+ pr_err("\n**********************************************************\n");
+ pr_err("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
+
+ pr_err("\n");
+ pr_err("VFS: The following files hold a reference to the mount\n");
+ pr_err("\n");
+ hash_for_each_possible_safe(global_files_hashtable, filp, tmp, f_hash,
+ lookup_mnt) {
+ mnt = real_mount(filp->f_path.mnt);
+ if ((uintptr_t)mnt == lookup_mnt)
+ global_print_file(filp, path_buffer, &count);
+ }
+ pr_err("\n");
+ pr_err("VFS: Found total of %d open files\n", count);
+ pr_err("\n");
+
+ count = 0;
+ pr_err("\n");
+ pr_err("VFS: The following files need to cleaned up\n");
+ pr_err("\n");
+ hash_for_each_safe(global_files_hashtable, index, tmp, filp, f_hash) {
+ if (atomic_long_read(&filp->f_count) == 0)
+ global_print_file(filp, path_buffer, &count);
+ }
+
+ pr_err("\n");
+ pr_err("VFS: Found total of %d files awaiting clean-up\n", count);
+ pr_err("\n");
+ pr_err("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
+ pr_err("\n**********************************************************\n");
+
+ mutex_unlock(&global_files_lock);
+ free_page((unsigned long)path_buffer);
+}
+
+static void global_filetable_print_work_fn(struct work_struct *work)
+{
+ struct global_filetable_lookup_key *key;
+ uintptr_t lookup_mnt;
+
+ key = container_of(work, struct global_filetable_lookup_key, work);
+ lookup_mnt = key->value;
+ kfree(key);
+ global_filetable_print(lookup_mnt);
+}
+
+void global_filetable_delayed_print(struct mount *mnt)
+{
+ struct global_filetable_lookup_key *key;
+
+ key = kzalloc(sizeof(*key), GFP_KERNEL);
+ if (key == NULL)
+ return;
+ key->value = (uintptr_t)mnt;
+ INIT_WORK(&key->work, global_filetable_print_work_fn);
+ schedule_work(&key->work);
+}
+#endif /* CONFIG_FILE_TABLE_DEBUG */
+
static void file_free_rcu(struct rcu_head *head)
{
struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
@@ -219,6 +354,7 @@ static void __fput(struct file *file)
put_write_access(inode);
__mnt_drop_write(mnt);
}
+ global_filetable_del(file);
file->f_path.dentry = NULL;
file->f_path.mnt = NULL;
file->f_inode = NULL;
@@ -314,6 +450,7 @@ void __init files_init(void)
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
percpu_counter_init(&nr_files, 0, GFP_KERNEL);
+ global_filetable_print_warning_once();
}
/*
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index e95eeb445e58..3805040bee46 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -5,4 +5,4 @@
obj-$(CONFIG_FUSE_FS) += fuse.o
obj-$(CONFIG_CUSE) += cuse.o
-fuse-objs := dev.o dir.o file.o inode.o control.o
+fuse-objs := dev.o dir.o file.o inode.o control.o passthrough.o
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index e98a75233f68..c6e4e4c0221b 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -7,6 +7,7 @@
*/
#include "fuse_i.h"
+#include "fuse_passthrough.h"
#include <linux/init.h>
#include <linux/module.h>
@@ -592,9 +593,14 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
args->out.numargs * sizeof(struct fuse_arg));
fuse_request_send(fc, req);
ret = req->out.h.error;
- if (!ret && args->out.argvar) {
- BUG_ON(args->out.numargs != 1);
- ret = req->out.args[0].size;
+ if (!ret) {
+ if (args->out.argvar) {
+ BUG_ON(args->out.numargs != 1);
+ ret = req->out.args[0].size;
+ }
+
+ if (req->passthrough_filp != NULL)
+ args->out.passthrough_filp = req->passthrough_filp;
}
fuse_put_request(fc, req);
@@ -1974,6 +1980,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
}
fuse_copy_finish(cs);
+ fuse_setup_passthrough(fc, req);
spin_lock(&fpq->lock);
clear_bit(FR_LOCKED, &req->flags);
if (!fpq->connected)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 62fcec0f6d01..d3c77413dd56 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -473,6 +473,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
args.out.args[0].value = &outentry;
args.out.args[1].size = sizeof(outopen);
args.out.args[1].value = &outopen;
+ args.out.passthrough_filp = NULL;
err = fuse_simple_request(fc, &args);
if (err)
goto out_free_ff;
@@ -484,6 +485,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
ff->fh = outopen.fh;
ff->nodeid = outentry.nodeid;
ff->open_flags = outopen.open_flags;
+ if (args.out.passthrough_filp != NULL)
+ ff->passthrough_filp = args.out.passthrough_filp;
inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
&outentry.attr, entry_attr_timeout(&outentry), 0);
if (!inode) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 7014318f6d18..5ff580633a63 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -7,6 +7,7 @@
*/
#include "fuse_i.h"
+#include "fuse_passthrough.h"
#include <linux/pagemap.h>
#include <linux/slab.h>
@@ -21,8 +22,10 @@
static const struct file_operations fuse_direct_io_file_operations;
static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
- int opcode, struct fuse_open_out *outargp)
+ int opcode, struct fuse_open_out *outargp,
+ struct file **passthrough_filpp)
{
+ int ret_val;
struct fuse_open_in inarg;
FUSE_ARGS(args);
@@ -38,8 +41,14 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
args.out.numargs = 1;
args.out.args[0].size = sizeof(*outargp);
args.out.args[0].value = outargp;
+ args.out.passthrough_filp = NULL;
- return fuse_simple_request(fc, &args);
+ ret_val = fuse_simple_request(fc, &args);
+
+ if (args.out.passthrough_filp != NULL)
+ *passthrough_filpp = args.out.passthrough_filp;
+
+ return ret_val;
}
struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
@@ -50,6 +59,10 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
if (unlikely(!ff))
return NULL;
+ ff->passthrough_filp = NULL;
+ ff->passthrough_enabled = 0;
+ if (fc->passthrough)
+ ff->passthrough_enabled = 1;
ff->fc = fc;
ff->reserved_req = fuse_request_alloc(0);
if (unlikely(!ff->reserved_req)) {
@@ -118,6 +131,7 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
bool isdir)
{
struct fuse_file *ff;
+ struct file *passthrough_filp = NULL;
int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
ff = fuse_file_alloc(fc);
@@ -130,10 +144,12 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
struct fuse_open_out outarg;
int err;
- err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
+ err = fuse_send_open(fc, nodeid, file, opcode, &outarg,
+ &(passthrough_filp));
if (!err) {
ff->fh = outarg.fh;
ff->open_flags = outarg.open_flags;
+ ff->passthrough_filp = passthrough_filp;
} else if (err != -ENOSYS || isdir) {
fuse_file_free(ff);
@@ -253,6 +269,8 @@ void fuse_release_common(struct file *file, int opcode)
if (unlikely(!ff))
return;
+ fuse_passthrough_release(ff);
+
req = ff->reserved_req;
fuse_prepare_release(ff, file->f_flags, opcode);
@@ -884,6 +902,43 @@ static int fuse_readpages_fill(void *_data, struct page *page)
return -EIO;
}
+#ifdef CONFIG_CMA
+ if (is_cma_pageblock(page)) {
+ struct page *oldpage = page, *newpage;
+ int err;
+
+ /* make sure that old page is not free in-between the calls */
+ page_cache_get(oldpage);
+
+ newpage = alloc_page(GFP_HIGHUSER);
+ if (!newpage) {
+ page_cache_release(oldpage);
+ return -ENOMEM;
+ }
+
+ err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
+ if (err) {
+ __free_page(newpage);
+ page_cache_release(oldpage);
+ return err;
+ }
+
+ /*
+ * Decrement the count on new page to make page cache the only
+ * owner of it
+ */
+ lock_page(newpage);
+ put_page(newpage);
+
+ lru_cache_add_file(newpage);
+
+ /* finally release the old page and swap pointers */
+ unlock_page(oldpage);
+ page_cache_release(oldpage);
+ page = newpage;
+ }
+#endif
+
page_cache_get(page);
req->pages[req->num_pages] = page;
req->page_descs[req->num_pages].length = PAGE_SIZE;
@@ -929,8 +984,10 @@ out:
static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
+ ssize_t ret_val;
struct inode *inode = iocb->ki_filp->f_mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = iocb->ki_filp->private_data;
/*
* In auto invalidate mode, always update attributes on read.
@@ -945,7 +1002,12 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return err;
}
- return generic_file_read_iter(iocb, to);
+ if (ff && ff->passthrough_enabled && ff->passthrough_filp)
+ ret_val = fuse_passthrough_read_iter(iocb, to);
+ else
+ ret_val = generic_file_read_iter(iocb, to);
+
+ return ret_val;
}
static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
@@ -1177,6 +1239,7 @@ static ssize_t fuse_perform_write(struct file *file,
static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
+ struct fuse_file *ff = file->private_data;
struct address_space *mapping = file->f_mapping;
ssize_t written = 0;
ssize_t written_buffered = 0;
@@ -1210,8 +1273,14 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (err)
goto out;
+ if (ff && ff->passthrough_enabled && ff->passthrough_filp) {
+ written = fuse_passthrough_write_iter(iocb, from);
+ goto out;
+ }
+
if (iocb->ki_flags & IOCB_DIRECT) {
loff_t pos = iocb->ki_pos;
+
written = generic_file_direct_write(iocb, from, pos);
if (written < 0 || !iov_iter_count(from))
goto out;
@@ -2082,6 +2151,9 @@ static const struct vm_operations_struct fuse_file_vm_ops = {
static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
{
+ struct fuse_file *ff = file->private_data;
+
+ ff->passthrough_enabled = 0;
if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
fuse_link_write_file(file);
@@ -2092,6 +2164,9 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma)
{
+ struct fuse_file *ff = file->private_data;
+
+ ff->passthrough_enabled = 0;
/* Can't provide the coherency needed for MAP_SHARED */
if (vma->vm_flags & VM_MAYSHARE)
return -ENODEV;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index d40fad13cf43..5a8cef204cda 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -158,6 +158,10 @@ struct fuse_file {
/** Has flock been performed on this file? */
bool flock:1;
+
+ /* the read write file */
+ struct file *passthrough_filp;
+ bool passthrough_enabled;
};
/** One input argument of a request */
@@ -237,6 +241,7 @@ struct fuse_args {
unsigned argvar:1;
unsigned numargs;
struct fuse_arg args[2];
+ struct file *passthrough_filp;
} out;
};
@@ -387,6 +392,9 @@ struct fuse_req {
/** Request is stolen from fuse_file->reserved_req */
struct file *stolen_file;
+
+ /** fuse passthrough file */
+ struct file *passthrough_filp;
};
struct fuse_iqueue {
@@ -544,6 +552,9 @@ struct fuse_conn {
/** write-back cache policy (default is write-through) */
unsigned writeback_cache:1;
+ /** passthrough IO. */
+ unsigned passthrough:1;
+
/*
* The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction
diff --git a/fs/fuse/fuse_passthrough.h b/fs/fuse/fuse_passthrough.h
new file mode 100644
index 000000000000..62f12c12ffec
--- /dev/null
+++ b/fs/fuse/fuse_passthrough.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _FS_FUSE_PASSTHROUGH_H
+#define _FS_FUSE_PASSTHROUGH_H
+
+#include "fuse_i.h"
+
+#include <linux/fuse.h>
+#include <linux/file.h>
+
+void fuse_setup_passthrough(struct fuse_conn *fc, struct fuse_req *req);
+
+ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *to);
+
+ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *from);
+
+void fuse_passthrough_release(struct fuse_file *ff);
+
+#endif /* _FS_FUSE_PASSTHROUGH_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 4b2eb65be0d4..ca9c492a1885 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -857,6 +857,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->conn_error = 1;
else {
unsigned long ra_pages;
+ struct super_block *sb = fc->sb;
process_init_limits(fc, arg);
@@ -895,6 +896,13 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->async_dio = 1;
if (arg->flags & FUSE_WRITEBACK_CACHE)
fc->writeback_cache = 1;
+ if (arg->flags & FUSE_PASSTHROUGH) {
+ fc->passthrough = 1;
+ /* Prevent further stacking */
+ sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
+ pr_info("FUSE: Pass through is enabled [%s : %d]!\n",
+ current->comm, current->pid);
+ }
if (arg->time_gran && arg->time_gran <= 1000000000)
fc->sb->s_time_gran = arg->time_gran;
} else {
diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
new file mode 100644
index 000000000000..785af63acabd
--- /dev/null
+++ b/fs/fuse/passthrough.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "fuse_passthrough.h"
+
+#include <linux/aio.h>
+#include <linux/fs_stack.h>
+
+void fuse_setup_passthrough(struct fuse_conn *fc, struct fuse_req *req)
+{
+ int daemon_fd, fs_stack_depth;
+ unsigned open_out_index;
+ struct file *passthrough_filp;
+ struct inode *passthrough_inode;
+ struct super_block *passthrough_sb;
+ struct fuse_open_out *open_out;
+
+ req->passthrough_filp = NULL;
+
+ if (!(fc->passthrough))
+ return;
+
+ if ((req->in.h.opcode != FUSE_OPEN) &&
+ (req->in.h.opcode != FUSE_CREATE))
+ return;
+
+ open_out_index = req->in.numargs - 1;
+
+ BUG_ON(open_out_index != 0 && open_out_index != 1);
+ BUG_ON(req->out.args[open_out_index].size != sizeof(*open_out));
+
+ open_out = req->out.args[open_out_index].value;
+
+ daemon_fd = (int)open_out->passthrough_fd;
+ if (daemon_fd < 0)
+ return;
+
+ passthrough_filp = fget_raw(daemon_fd);
+ if (!passthrough_filp)
+ return;
+
+ passthrough_inode = file_inode(passthrough_filp);
+ passthrough_sb = passthrough_inode->i_sb;
+ fs_stack_depth = passthrough_sb->s_stack_depth + 1;
+
+ /* If we reached the stacking limit go through regular io */
+ if (fs_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
+ /* Release the passthrough file. */
+ fput(passthrough_filp);
+ pr_err("FUSE: maximum fs stacking depth exceeded, cannot use passthrough for this file\n");
+ return;
+ }
+ req->passthrough_filp = passthrough_filp;
+}
+
+static ssize_t fuse_passthrough_read_write_iter(struct kiocb *iocb,
+ struct iov_iter *iter, int do_write)
+{
+ ssize_t ret_val;
+ struct fuse_file *ff;
+ struct file *fuse_file, *passthrough_filp;
+ struct inode *fuse_inode, *passthrough_inode;
+ struct fuse_conn *fc;
+
+ ff = iocb->ki_filp->private_data;
+ fuse_file = iocb->ki_filp;
+ passthrough_filp = ff->passthrough_filp;
+ fc = ff->fc;
+
+ /* lock passthrough file to prevent it from being released */
+ get_file(passthrough_filp);
+ iocb->ki_filp = passthrough_filp;
+ fuse_inode = fuse_file->f_path.dentry->d_inode;
+ passthrough_inode = file_inode(passthrough_filp);
+
+ if (do_write) {
+ if (!passthrough_filp->f_op->write_iter)
+ return -EIO;
+ ret_val = passthrough_filp->f_op->write_iter(iocb, iter);
+
+ if (ret_val >= 0 || ret_val == -EIOCBQUEUED) {
+ spin_lock(&fc->lock);
+ fsstack_copy_inode_size(fuse_inode, passthrough_inode);
+ spin_unlock(&fc->lock);
+ fsstack_copy_attr_times(fuse_inode, passthrough_inode);
+ }
+ } else {
+ if (!passthrough_filp->f_op->read_iter)
+ return -EIO;
+ ret_val = passthrough_filp->f_op->read_iter(iocb, iter);
+ if (ret_val >= 0 || ret_val == -EIOCBQUEUED)
+ fsstack_copy_attr_atime(fuse_inode, passthrough_inode);
+ }
+
+ iocb->ki_filp = fuse_file;
+
+ /* unlock passthrough file */
+ fput(passthrough_filp);
+
+ return ret_val;
+}
+
+ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ return fuse_passthrough_read_write_iter(iocb, to, 0);
+}
+
+ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ return fuse_passthrough_read_write_iter(iocb, from, 1);
+}
+
+void fuse_passthrough_release(struct fuse_file *ff)
+{
+ if (!(ff->passthrough_filp))
+ return;
+
+ /* Release the passthrough file. */
+ fput(ff->passthrough_filp);
+ ff->passthrough_filp = NULL;
+}
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index ff0ac96a8e7b..db4c867369b5 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -78,8 +78,11 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (type == ACL_TYPE_ACCESS) {
umode_t mode = inode->i_mode;
-
+ struct posix_acl *old_acl = acl;
error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+
+ if (!acl)
+ posix_acl_release(old_acl);
if (error)
return error;
if (mode != inode->i_mode)
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index d9178388cf48..ad23a9257418 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1222,7 +1222,7 @@ static int set_gfs2_super(struct super_block *s, void *data)
* We set the bdi here to the queue backing, file systems can
* overwrite this in ->fill_super()
*/
- s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info;
+ s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info;
return 0;
}
diff --git a/fs/inode.c b/fs/inode.c
index b30cbe7c1baa..48185da625ce 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -154,6 +154,12 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_rdev = 0;
inode->dirtied_when = 0;
+#ifdef CONFIG_CGROUP_WRITEBACK
+ inode->i_wb_frn_winner = 0;
+ inode->i_wb_frn_avg_time = 0;
+ inode->i_wb_frn_history = 0;
+#endif
+
if (security_inode_alloc(inode))
goto out;
spin_lock_init(&inode->i_lock);
diff --git a/fs/internal.h b/fs/internal.h
index 6387b35a1c0d..1b93a3929b16 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -153,3 +153,29 @@ extern void mnt_pin_kill(struct mount *m);
* fs/nsfs.c
*/
extern struct dentry_operations ns_dentry_operations;
+
+#ifdef CONFIG_FILE_TABLE_DEBUG
+void global_filetable_print_warning_once(void);
+void global_filetable_add(struct file *filp);
+void global_filetable_del(struct file *filp);
+void global_filetable_delayed_print(struct mount *mnt);
+
+#else /* i.e NOT CONFIG_FILE_TABLE_DEBUG */
+
+static inline void global_filetable_print_warning_once(void)
+{
+}
+
+static inline void global_filetable_add(struct file *filp)
+{
+}
+
+static inline void global_filetable_del(struct file *filp)
+{
+}
+
+static inline void global_filetable_delayed_print(struct mount *mnt)
+{
+}
+
+#endif /* CONFIG_FILE_TABLE_DEBUG */
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index e27317169697..30c4c9ebb693 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -207,7 +207,8 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
__func__, inode->i_ino, inode->i_mode, inode->i_nlink,
f->inocache->pino_nlink, inode->i_mapping->nrpages);
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
return 0;
fail:
@@ -427,7 +428,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
mutex_unlock(&dir_f->sem);
jffs2_complete_reservation(c);
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
return 0;
fail:
@@ -571,7 +573,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode
mutex_unlock(&dir_f->sem);
jffs2_complete_reservation(c);
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
return 0;
fail:
@@ -742,7 +745,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode
mutex_unlock(&dir_f->sem);
jffs2_complete_reservation(c);
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
return 0;
fail:
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index f217ae750adb..9d7551f5c32a 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -178,7 +178,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
unlock_new_inode(ip);
iput(ip);
} else {
- d_instantiate_new(dentry, ip);
+ unlock_new_inode(ip);
+ d_instantiate(dentry, ip);
}
out2:
@@ -312,7 +313,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
unlock_new_inode(ip);
iput(ip);
} else {
- d_instantiate_new(dentry, ip);
+ unlock_new_inode(ip);
+ d_instantiate(dentry, ip);
}
out2:
@@ -1056,7 +1058,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
unlock_new_inode(ip);
iput(ip);
} else {
- d_instantiate_new(dentry, ip);
+ unlock_new_inode(ip);
+ d_instantiate(dentry, ip);
}
out2:
@@ -1440,7 +1443,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
unlock_new_inode(ip);
iput(ip);
} else {
- d_instantiate_new(dentry, ip);
+ unlock_new_inode(ip);
+ d_instantiate(dentry, ip);
}
out1:
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 187477ded6b3..de509271d031 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -222,8 +222,19 @@ __mb_cache_entry_release(struct mb_cache_entry *ce)
* then reacquire the lock in the proper order.
*/
spin_lock(&mb_cache_spinlock);
- if (list_empty(&ce->e_lru_list))
- list_add_tail(&ce->e_lru_list, &mb_cache_lru_list);
+ /*
+ * Evaluate the conditions under global lock mb_cache_spinlock,
+ * to check if mb_cache_entry_get() is running now
+ * and has already deleted the entry from mb_cache_lru_list
+ * and incremented ce->e_refcnt to prevent further additions
+ * to mb_cache_lru_list.
+ */
+ if (!(ce->e_used || ce->e_queued ||
+ atomic_read(&ce->e_refcnt))) {
+ if (list_empty(&ce->e_lru_list))
+ list_add_tail(&ce->e_lru_list,
+ &mb_cache_lru_list);
+ }
spin_unlock(&mb_cache_spinlock);
}
__spin_unlock_mb_cache_entry(ce);
@@ -262,7 +273,6 @@ mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
list_del_init(&ce->e_lru_list);
if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))
continue;
- spin_unlock(&mb_cache_spinlock);
/* Prevent any find or get operation on the entry */
hlist_bl_lock(ce->e_block_hash_p);
hlist_bl_lock(ce->e_index_hash_p);
@@ -271,10 +281,10 @@ mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
!list_empty(&ce->e_lru_list)) {
hlist_bl_unlock(ce->e_index_hash_p);
hlist_bl_unlock(ce->e_block_hash_p);
- spin_lock(&mb_cache_spinlock);
continue;
}
__mb_cache_entry_unhash_unlock(ce);
+ spin_unlock(&mb_cache_spinlock);
list_add_tail(&ce->e_lru_list, &free_list);
spin_lock(&mb_cache_spinlock);
}
@@ -516,7 +526,6 @@ mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
if (ce->e_used || ce->e_queued ||
atomic_read(&ce->e_refcnt))
continue;
- spin_unlock(&mb_cache_spinlock);
/*
* Prevent any find or get operation on the
* entry.
@@ -530,13 +539,13 @@ mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
hlist_bl_unlock(ce->e_index_hash_p);
hlist_bl_unlock(ce->e_block_hash_p);
l = &mb_cache_lru_list;
- spin_lock(&mb_cache_spinlock);
continue;
}
mb_assert(list_empty(&ce->e_lru_list));
mb_assert(!(ce->e_used || ce->e_queued ||
atomic_read(&ce->e_refcnt)));
__mb_cache_entry_unhash_unlock(ce);
+ spin_unlock(&mb_cache_spinlock);
goto found;
}
}
@@ -670,6 +679,7 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
cache->c_bucket_bits);
block_hash_p = &cache->c_block_hash[bucket];
/* First serialize access to the block corresponding hash chain. */
+ spin_lock(&mb_cache_spinlock);
hlist_bl_lock(block_hash_p);
hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) {
mb_assert(ce->e_block_hash_p == block_hash_p);
@@ -678,9 +688,11 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
* Prevent a free from removing the entry.
*/
atomic_inc(&ce->e_refcnt);
+ if (!list_empty(&ce->e_lru_list))
+ list_del_init(&ce->e_lru_list);
hlist_bl_unlock(block_hash_p);
+ spin_unlock(&mb_cache_spinlock);
__spin_lock_mb_cache_entry(ce);
- atomic_dec(&ce->e_refcnt);
if (ce->e_used > 0) {
DEFINE_WAIT(wait);
while (ce->e_used > 0) {
@@ -695,13 +707,9 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
finish_wait(&mb_cache_queue, &wait);
}
ce->e_used += 1 + MB_CACHE_WRITER;
+ atomic_dec(&ce->e_refcnt);
__spin_unlock_mb_cache_entry(ce);
- if (!list_empty(&ce->e_lru_list)) {
- spin_lock(&mb_cache_spinlock);
- list_del_init(&ce->e_lru_list);
- spin_unlock(&mb_cache_spinlock);
- }
if (!__mb_cache_entry_is_block_hashed(ce)) {
__mb_cache_entry_release(ce);
return NULL;
@@ -710,6 +718,7 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
}
}
hlist_bl_unlock(block_hash_p);
+ spin_unlock(&mb_cache_spinlock);
return NULL;
}
diff --git a/fs/namei.c b/fs/namei.c
index 5b56423e2e0b..ea6050b6134a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2692,8 +2692,14 @@ int vfs_create2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry,
if (error)
return error;
error = dir->i_op->create(dir, dentry, mode, want_excl);
+ if (error)
+ return error;
+ error = security_inode_post_create(dir, dentry, mode);
+ if (error)
+ return error;
if (!error)
fsnotify_create(dir, dentry);
+
return error;
}
EXPORT_SYMBOL(vfs_create2);
@@ -3378,6 +3384,8 @@ out2:
error = -ESTALE;
}
file = ERR_PTR(error);
+ } else {
+ global_filetable_add(file);
}
return file;
}
@@ -3545,8 +3553,16 @@ int vfs_mknod2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, u
return error;
error = dir->i_op->mknod(dir, dentry, mode, dev);
+ if (error)
+ return error;
+
+ error = security_inode_post_create(dir, dentry, mode);
+ if (error)
+ return error;
+
if (!error)
fsnotify_create(dir, dentry);
+
return error;
}
EXPORT_SYMBOL(vfs_mknod2);
diff --git a/fs/namespace.c b/fs/namespace.c
index cee965df8d7e..e6585f285234 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1625,6 +1625,8 @@ static int do_umount(struct mount *mnt, int flags)
out:
unlock_mount_hash();
namespace_unlock();
+ if (retval == -EBUSY)
+ global_filetable_delayed_print(mnt);
return retval;
}
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index cd7f5b0abe84..c9a1a491aa91 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -50,7 +50,8 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode)
{
int err = nilfs_add_link(dentry, inode);
if (!err) {
- d_instantiate_new(dentry, inode);
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
return 0;
}
inode_dec_link_count(inode);
@@ -245,7 +246,8 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out_fail;
nilfs_mark_inode_dirty(inode);
- d_instantiate_new(dentry, inode);
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
out:
if (!err)
err = nilfs_transaction_commit(dir->i_sb);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 354013ea22ec..67c6c650b21e 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1079,7 +1079,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_time_gran = 1;
sb->s_max_links = NILFS_LINK_MAX;
- sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info;
+ sb->s_bdi = bdev_get_queue(sb->s_bdev)->backing_dev_info;
err = load_nilfs(nilfs, sb);
if (err)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index cb71cbae606d..161441f52ebf 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -172,15 +172,15 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
seq_printf(m,
"State:\t%s\n"
"Tgid:\t%d\n"
- "Ngid:\t%d\n"
"Pid:\t%d\n"
"PPid:\t%d\n"
"TracerPid:\t%d\n"
"Uid:\t%d\t%d\t%d\t%d\n"
"Gid:\t%d\t%d\t%d\t%d\n"
+ "Ngid:\t%d\n"
"FDSize:\t%d\nGroups:\t",
get_task_state(p),
- tgid, ngid, pid_nr_ns(pid, ns), ppid, tpid,
+ tgid, pid_nr_ns(pid, ns), ppid, tpid,
from_kuid_munged(user_ns, cred->uid),
from_kuid_munged(user_ns, cred->euid),
from_kuid_munged(user_ns, cred->suid),
@@ -189,7 +189,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
from_kgid_munged(user_ns, cred->egid),
from_kgid_munged(user_ns, cred->sgid),
from_kgid_munged(user_ns, cred->fsgid),
- max_fds);
+ ngid, max_fds);
group_info = cred->group_info;
for (g = 0; g < group_info->ngroups; g++)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d780651a760c..ee9b7f0ea938 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1042,15 +1042,20 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
int oom_adj = OOM_ADJUST_MIN;
size_t len;
unsigned long flags;
+ int mult = 1;
if (!task)
return -ESRCH;
if (lock_task_sighand(task, &flags)) {
- if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
+ if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) {
oom_adj = OOM_ADJUST_MAX;
- else
- oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
- OOM_SCORE_ADJ_MAX;
+ } else {
+ if (task->signal->oom_score_adj < 0)
+ mult = -1;
+ oom_adj = roundup(mult * task->signal->oom_score_adj *
+ -OOM_DISABLE, OOM_SCORE_ADJ_MAX) /
+ OOM_SCORE_ADJ_MAX * mult;
+ }
unlock_task_sighand(task, &flags);
}
put_task_struct(task);
@@ -1434,6 +1439,204 @@ static const struct file_operations proc_pid_sched_operations = {
#endif
+/*
+ * Print out various scheduling related per-task fields:
+ */
+
+#ifdef CONFIG_SMP
+
+static int sched_wake_up_idle_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = m->private;
+ struct task_struct *p;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ seq_printf(m, "%d\n", sched_get_wake_up_idle(p));
+
+ put_task_struct(p);
+
+ return 0;
+}
+
+static ssize_t
+sched_wake_up_idle_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offset)
+{
+ struct inode *inode = file_inode(file);
+ struct task_struct *p;
+ char buffer[PROC_NUMBUF];
+ int wake_up_idle, err;
+
+ memset(buffer, 0, sizeof(buffer));
+ if (count > sizeof(buffer) - 1)
+ count = sizeof(buffer) - 1;
+ if (copy_from_user(buffer, buf, count)) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ err = kstrtoint(strstrip(buffer), 0, &wake_up_idle);
+ if (err)
+ goto out;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ err = sched_set_wake_up_idle(p, wake_up_idle);
+
+ put_task_struct(p);
+
+out:
+ return err < 0 ? err : count;
+}
+
+static int sched_wake_up_idle_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, sched_wake_up_idle_show, inode);
+}
+
+static const struct file_operations proc_pid_sched_wake_up_idle_operations = {
+ .open = sched_wake_up_idle_open,
+ .read = seq_read,
+ .write = sched_wake_up_idle_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_SCHED_HMP
+
+static int sched_init_task_load_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = m->private;
+ struct task_struct *p;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ seq_printf(m, "%d\n", sched_get_init_task_load(p));
+
+ put_task_struct(p);
+
+ return 0;
+}
+
+static ssize_t
+sched_init_task_load_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offset)
+{
+ struct inode *inode = file_inode(file);
+ struct task_struct *p;
+ char buffer[PROC_NUMBUF];
+ int init_task_load, err;
+
+ memset(buffer, 0, sizeof(buffer));
+ if (count > sizeof(buffer) - 1)
+ count = sizeof(buffer) - 1;
+ if (copy_from_user(buffer, buf, count)) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ err = kstrtoint(strstrip(buffer), 0, &init_task_load);
+ if (err)
+ goto out;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ err = sched_set_init_task_load(p, init_task_load);
+
+ put_task_struct(p);
+
+out:
+ return err < 0 ? err : count;
+}
+
+static int sched_init_task_load_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, sched_init_task_load_show, inode);
+}
+
+static const struct file_operations proc_pid_sched_init_task_load_operations = {
+ .open = sched_init_task_load_open,
+ .read = seq_read,
+ .write = sched_init_task_load_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int sched_group_id_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = m->private;
+ struct task_struct *p;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ seq_printf(m, "%d\n", sched_get_group_id(p));
+
+ put_task_struct(p);
+
+ return 0;
+}
+
+static ssize_t
+sched_group_id_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offset)
+{
+ struct inode *inode = file_inode(file);
+ struct task_struct *p;
+ char buffer[PROC_NUMBUF];
+ int group_id, err;
+
+ memset(buffer, 0, sizeof(buffer));
+ if (count > sizeof(buffer) - 1)
+ count = sizeof(buffer) - 1;
+ if (copy_from_user(buffer, buf, count)) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ err = kstrtoint(strstrip(buffer), 0, &group_id);
+ if (err)
+ goto out;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ err = sched_set_group_id(p, group_id);
+
+ put_task_struct(p);
+
+out:
+ return err < 0 ? err : count;
+}
+
+static int sched_group_id_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, sched_group_id_show, inode);
+}
+
+static const struct file_operations proc_pid_sched_group_id_operations = {
+ .open = sched_group_id_open,
+ .read = seq_read,
+ .write = sched_group_id_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+#endif /* CONFIG_SCHED_HMP */
+
#ifdef CONFIG_SCHED_AUTOGROUP
/*
* Print out autogroup related information:
@@ -2874,6 +3077,13 @@ static const struct pid_entry tgid_base_stuff[] = {
ONE("status", S_IRUGO, proc_pid_status),
ONE("personality", S_IRUSR, proc_pid_personality),
ONE("limits", S_IRUGO, proc_pid_limits),
+#ifdef CONFIG_SMP
+ REG("sched_wake_up_idle", S_IRUGO|S_IWUSR, proc_pid_sched_wake_up_idle_operations),
+#endif
+#ifdef CONFIG_SCHED_HMP
+ REG("sched_init_task_load", S_IRUGO|S_IWUSR, proc_pid_sched_init_task_load_operations),
+ REG("sched_group_id", S_IRUGO|S_IWUGO, proc_pid_sched_group_id_operations),
+#endif
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif
@@ -2898,6 +3108,9 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("mounts", S_IRUGO, proc_mounts_operations),
REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
REG("mountstats", S_IRUSR, proc_mountstats_operations),
+#ifdef CONFIG_PROCESS_RECLAIM
+ REG("reclaim", S_IWUSR, proc_reclaim_operations),
+#endif
#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
REG("smaps", S_IRUGO, proc_pid_smaps_operations),
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index fa4dd182bfaa..1f01beda3bf3 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -209,6 +209,7 @@ struct pde_opener {
extern const struct inode_operations proc_link_inode_operations;
extern const struct inode_operations proc_pid_link_inode_operations;
+extern const struct file_operations proc_reclaim_operations;
extern void proc_init_inodecache(void);
extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index df4661abadc4..5e5c443591ea 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -76,6 +76,13 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
available += global_page_state(NR_SLAB_RECLAIMABLE) -
min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low);
+ /*
+ * Part of the kernel memory, which can be released under memory
+ * pressure.
+ */
+ available += global_page_state(NR_INDIRECTLY_RECLAIMABLE_BYTES) >>
+ PAGE_SHIFT;
+
if (available < 0)
available = 0;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index fdbb9df83bc5..99c4ffaa43a8 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -14,6 +14,8 @@
#include <linux/swapops.h>
#include <linux/mmu_notifier.h>
#include <linux/page_idle.h>
+#include <linux/mm_inline.h>
+#include <linux/ctype.h>
#include <asm/elf.h>
#include <asm/uaccess.h>
@@ -382,7 +384,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
name = "[stack]";
goto done;
}
-
if (vma_get_anon_name(vma)) {
seq_pad(m, ' ');
seq_print_vma_name(m, vma);
@@ -1415,6 +1416,241 @@ const struct file_operations proc_pagemap_operations = {
};
#endif /* CONFIG_PROC_PAGE_MONITOR */
+#ifdef CONFIG_PROCESS_RECLAIM
+static int reclaim_pte_range(pmd_t *pmd, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ struct reclaim_param *rp = walk->private;
+ struct vm_area_struct *vma = rp->vma;
+ pte_t *pte, ptent;
+ spinlock_t *ptl;
+ struct page *page;
+ LIST_HEAD(page_list);
+ int isolated;
+ int reclaimed;
+
+ split_huge_page_pmd(vma, addr, pmd);
+ if (pmd_trans_unstable(pmd) || !rp->nr_to_reclaim)
+ return 0;
+cont:
+ isolated = 0;
+ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ for (; addr != end; pte++, addr += PAGE_SIZE) {
+ ptent = *pte;
+ if (!pte_present(ptent))
+ continue;
+
+ page = vm_normal_page(vma, addr, ptent);
+ if (!page)
+ continue;
+
+ if (isolate_lru_page(page))
+ continue;
+
+ list_add(&page->lru, &page_list);
+ inc_zone_page_state(page, NR_ISOLATED_ANON +
+ page_is_file_cache(page));
+ isolated++;
+ rp->nr_scanned++;
+ if ((isolated >= SWAP_CLUSTER_MAX) || !rp->nr_to_reclaim)
+ break;
+ }
+ pte_unmap_unlock(pte - 1, ptl);
+ reclaimed = reclaim_pages_from_list(&page_list, vma);
+ rp->nr_reclaimed += reclaimed;
+ rp->nr_to_reclaim -= reclaimed;
+ if (rp->nr_to_reclaim < 0)
+ rp->nr_to_reclaim = 0;
+
+ if (rp->nr_to_reclaim && (addr != end))
+ goto cont;
+
+ cond_resched();
+ return 0;
+}
+
+enum reclaim_type {
+ RECLAIM_FILE,
+ RECLAIM_ANON,
+ RECLAIM_ALL,
+ RECLAIM_RANGE,
+};
+
+struct reclaim_param reclaim_task_anon(struct task_struct *task,
+ int nr_to_reclaim)
+{
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ struct mm_walk reclaim_walk = {};
+ struct reclaim_param rp;
+
+ rp.nr_reclaimed = 0;
+ rp.nr_scanned = 0;
+ get_task_struct(task);
+ mm = get_task_mm(task);
+ if (!mm)
+ goto out;
+
+ reclaim_walk.mm = mm;
+ reclaim_walk.pmd_entry = reclaim_pte_range;
+
+ rp.nr_to_reclaim = nr_to_reclaim;
+ reclaim_walk.private = &rp;
+
+ down_read(&mm->mmap_sem);
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ if (is_vm_hugetlb_page(vma))
+ continue;
+
+ if (vma->vm_file)
+ continue;
+
+ if (vma->vm_flags & VM_LOCKED)
+ continue;
+
+ if (!rp.nr_to_reclaim)
+ break;
+
+ rp.vma = vma;
+ walk_page_range(vma->vm_start, vma->vm_end,
+ &reclaim_walk);
+ }
+
+ flush_tlb_mm(mm);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+out:
+ put_task_struct(task);
+ return rp;
+}
+
+static ssize_t reclaim_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task;
+ char buffer[200];
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ enum reclaim_type type;
+ char *type_buf;
+ struct mm_walk reclaim_walk = {};
+ unsigned long start = 0;
+ unsigned long end = 0;
+ struct reclaim_param rp;
+
+ memset(buffer, 0, sizeof(buffer));
+ if (count > sizeof(buffer) - 1)
+ count = sizeof(buffer) - 1;
+
+ if (copy_from_user(buffer, buf, count))
+ return -EFAULT;
+
+ type_buf = strstrip(buffer);
+ if (!strcmp(type_buf, "file"))
+ type = RECLAIM_FILE;
+ else if (!strcmp(type_buf, "anon"))
+ type = RECLAIM_ANON;
+ else if (!strcmp(type_buf, "all"))
+ type = RECLAIM_ALL;
+ else if (isdigit(*type_buf))
+ type = RECLAIM_RANGE;
+ else
+ goto out_err;
+
+ if (type == RECLAIM_RANGE) {
+ char *token;
+ unsigned long long len, len_in, tmp;
+ token = strsep(&type_buf, " ");
+ if (!token)
+ goto out_err;
+ tmp = memparse(token, &token);
+ if (tmp & ~PAGE_MASK || tmp > ULONG_MAX)
+ goto out_err;
+ start = tmp;
+
+ token = strsep(&type_buf, " ");
+ if (!token)
+ goto out_err;
+ len_in = memparse(token, &token);
+ len = (len_in + ~PAGE_MASK) & PAGE_MASK;
+ if (len > ULONG_MAX)
+ goto out_err;
+ /*
+ * Check to see whether len was rounded up from small -ve
+ * to zero.
+ */
+ if (len_in && !len)
+ goto out_err;
+
+ end = start + len;
+ if (end < start)
+ goto out_err;
+ }
+
+ task = get_proc_task(file->f_path.dentry->d_inode);
+ if (!task)
+ return -ESRCH;
+
+ mm = get_task_mm(task);
+ if (!mm)
+ goto out;
+
+ reclaim_walk.mm = mm;
+ reclaim_walk.pmd_entry = reclaim_pte_range;
+
+ rp.nr_to_reclaim = ~0;
+ rp.nr_reclaimed = 0;
+ reclaim_walk.private = &rp;
+
+ down_read(&mm->mmap_sem);
+ if (type == RECLAIM_RANGE) {
+ vma = find_vma(mm, start);
+ while (vma) {
+ if (vma->vm_start > end)
+ break;
+ if (is_vm_hugetlb_page(vma))
+ continue;
+
+ rp.vma = vma;
+ walk_page_range(max(vma->vm_start, start),
+ min(vma->vm_end, end),
+ &reclaim_walk);
+ vma = vma->vm_next;
+ }
+ } else {
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ if (is_vm_hugetlb_page(vma))
+ continue;
+
+ if (type == RECLAIM_ANON && vma->vm_file)
+ continue;
+
+ if (type == RECLAIM_FILE && !vma->vm_file)
+ continue;
+
+ rp.vma = vma;
+ walk_page_range(vma->vm_start, vma->vm_end,
+ &reclaim_walk);
+ }
+ }
+
+ flush_tlb_mm(mm);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+out:
+ put_task_struct(task);
+ return count;
+
+out_err:
+ return -EINVAL;
+}
+
+const struct file_operations proc_reclaim_operations = {
+ .write = reclaim_write,
+ .llseek = noop_llseek,
+};
+#endif
+
#ifdef CONFIG_NUMA
struct numa_maps {
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index eb611bdd4725..3ebc70167e41 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -687,7 +687,8 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod
reiserfs_update_inode_transaction(inode);
reiserfs_update_inode_transaction(dir);
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
retval = journal_end(&th);
out_failed:
@@ -770,7 +771,8 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode
goto out_failed;
}
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
retval = journal_end(&th);
out_failed:
@@ -869,7 +871,8 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
/* the above add_entry did not update dir's stat data */
reiserfs_update_sd(&th, dir);
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
retval = journal_end(&th);
out_failed:
reiserfs_write_unlock(dir->i_sb);
@@ -1183,7 +1186,8 @@ static int reiserfs_symlink(struct inode *parent_dir,
goto out_failed;
}
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
retval = journal_end(&th);
out_failed:
reiserfs_write_unlock(parent_dir->i_sb);
diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c
index 6c0039284ae0..ab0952f13510 100644
--- a/fs/sdcardfs/inode.c
+++ b/fs/sdcardfs/inode.c
@@ -525,7 +525,7 @@ static const char *sdcardfs_follow_link(struct dentry *dentry, void **cookie)
static int sdcardfs_permission_wrn(struct inode *inode, int mask)
{
- WARN_RATELIMIT(1, "sdcardfs does not support permission. Use permission2.\n");
+ pr_debug("sdcardfs does not support permission. Use permission2.\n");
return -EINVAL;
}
diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c
index ba52af8644cc..3151ec9cf7e9 100644
--- a/fs/sdcardfs/main.c
+++ b/fs/sdcardfs/main.c
@@ -270,7 +270,7 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb,
pr_info("sdcardfs: dev_name -> %s\n", dev_name);
pr_info("sdcardfs: options -> %s\n", (char *)raw_data);
- pr_info("sdcardfs: mnt -> %p\n", mnt);
+ pr_info("sdcardfs: mnt -> %pK\n", mnt);
/* parse lower path */
err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c
index 140696ed3ed3..7693b0e8efef 100644
--- a/fs/sdcardfs/super.c
+++ b/fs/sdcardfs/super.c
@@ -144,7 +144,7 @@ static int sdcardfs_remount_fs2(struct vfsmount *mnt, struct super_block *sb,
pr_err("sdcardfs: remount flags 0x%x unsupported\n", *flags);
err = -EINVAL;
}
- pr_info("Remount options were %s for vfsmnt %p.\n", options, mnt);
+ pr_info("Remount options were %s for vfsmnt %pK.\n", options, mnt);
err = parse_options_remount(sb, options, *flags & ~MS_SILENT, mnt->data);
diff --git a/fs/super.c b/fs/super.c
index 652c9e6b6b92..bc7ae0f327d0 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -972,7 +972,7 @@ static int set_bdev_super(struct super_block *s, void *data)
* We set the bdi here to the queue backing, file systems can
* overwrite this in ->fill_super()
*/
- s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info;
+ s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info;
return 0;
}
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 1327a02ec778..0548c572839c 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -50,7 +50,8 @@ static DEFINE_SPINLOCK(cancel_lock);
static inline bool isalarm(struct timerfd_ctx *ctx)
{
return ctx->clockid == CLOCK_REALTIME_ALARM ||
- ctx->clockid == CLOCK_BOOTTIME_ALARM;
+ ctx->clockid == CLOCK_BOOTTIME_ALARM ||
+ ctx->clockid == CLOCK_POWEROFF_ALARM;
}
/*
@@ -142,7 +143,8 @@ static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
{
spin_lock(&ctx->cancel_lock);
if ((ctx->clockid == CLOCK_REALTIME ||
- ctx->clockid == CLOCK_REALTIME_ALARM) &&
+ ctx->clockid == CLOCK_REALTIME_ALARM ||
+ ctx->clockid == CLOCK_POWEROFF_ALARM) &&
(flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) {
if (!ctx->might_cancel) {
ctx->might_cancel = true;
@@ -174,6 +176,7 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
enum hrtimer_mode htmode;
ktime_t texp;
int clockid = ctx->clockid;
+ enum alarmtimer_type type;
htmode = (flags & TFD_TIMER_ABSTIME) ?
HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
@@ -184,10 +187,8 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
ctx->tintv = timespec_to_ktime(ktmr->it_interval);
if (isalarm(ctx)) {
- alarm_init(&ctx->t.alarm,
- ctx->clockid == CLOCK_REALTIME_ALARM ?
- ALARM_REALTIME : ALARM_BOOTTIME,
- timerfd_alarmproc);
+ type = clock2alarm(ctx->clockid);
+ alarm_init(&ctx->t.alarm, type, timerfd_alarmproc);
} else {
hrtimer_init(&ctx->t.tmr, clockid, htmode);
hrtimer_set_expires(&ctx->t.tmr, texp);
@@ -387,6 +388,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
{
int ufd;
struct timerfd_ctx *ctx;
+ enum alarmtimer_type type;
/* Check the TFD_* constants for consistency. */
BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
@@ -397,7 +399,8 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
clockid != CLOCK_REALTIME &&
clockid != CLOCK_REALTIME_ALARM &&
clockid != CLOCK_BOOTTIME &&
- clockid != CLOCK_BOOTTIME_ALARM))
+ clockid != CLOCK_BOOTTIME_ALARM &&
+ clockid != CLOCK_POWEROFF_ALARM))
return -EINVAL;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -408,13 +411,12 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
spin_lock_init(&ctx->cancel_lock);
ctx->clockid = clockid;
- if (isalarm(ctx))
- alarm_init(&ctx->t.alarm,
- ctx->clockid == CLOCK_REALTIME_ALARM ?
- ALARM_REALTIME : ALARM_BOOTTIME,
- timerfd_alarmproc);
- else
+ if (isalarm(ctx)) {
+ type = clock2alarm(ctx->clockid);
+ alarm_init(&ctx->t.alarm, type, timerfd_alarmproc);
+ } else {
hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS);
+ }
ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
@@ -486,6 +488,10 @@ static int do_timerfd_settime(int ufd, int flags,
ret = timerfd_setup(ctx, flags, new);
spin_unlock_irq(&ctx->wqh.lock);
+
+ if (ctx->clockid == CLOCK_POWEROFF_ALARM)
+ set_power_on_alarm();
+
fdput(f);
return ret;
}
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
index 92a8491a8f8c..c0a95e393347 100644
--- a/fs/ubifs/key.h
+++ b/fs/ubifs/key.h
@@ -34,6 +34,12 @@
* node. We use "r5" hash borrowed from reiserfs.
*/
+/*
+ * Lot's of the key helpers require a struct ubifs_info *c as the first parameter.
+ * But we are not using it at all currently. That's designed for future extensions of
+ * different c->key_format. But right now, there is only one key type, UBIFS_SIMPLE_KEY_FMT.
+ */
+
#ifndef __UBIFS_KEY_H__
#define __UBIFS_KEY_H__
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index b5bf23b34241..de6f82d4eda2 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -268,7 +268,7 @@ static int check_namespace(const struct qstr *nm)
if (!strncmp(nm->name, XATTR_TRUSTED_PREFIX,
XATTR_TRUSTED_PREFIX_LEN)) {
- if (nm->name[sizeof(XATTR_TRUSTED_PREFIX) - 1] == '\0')
+ if (nm->name[XATTR_TRUSTED_PREFIX_LEN] == '\0')
return -EINVAL;
type = TRUSTED_XATTR;
} else if (!strncmp(nm->name, XATTR_USER_PREFIX,
@@ -278,7 +278,7 @@ static int check_namespace(const struct qstr *nm)
type = USER_XATTR;
} else if (!strncmp(nm->name, XATTR_SECURITY_PREFIX,
XATTR_SECURITY_PREFIX_LEN)) {
- if (nm->name[sizeof(XATTR_SECURITY_PREFIX) - 1] == '\0')
+ if (nm->name[XATTR_SECURITY_PREFIX_LEN] == '\0')
return -EINVAL;
type = SECURITY_XATTR;
} else
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index f34c545f4e54..c97b5a8d1e24 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -611,7 +611,8 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
if (fibh.sbh != fibh.ebh)
brelse(fibh.ebh);
brelse(fibh.sbh);
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
return 0;
}
@@ -721,7 +722,8 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
inc_nlink(dir);
dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb);
mark_inode_dirty(dir);
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
if (fibh.sbh != fibh.ebh)
brelse(fibh.ebh);
brelse(fibh.sbh);
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 2ec7689c25cf..47966554317c 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -38,7 +38,8 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode)
{
int err = ufs_add_link(dentry, inode);
if (!err) {
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
return 0;
}
inode_dec_link_count(inode);
@@ -190,7 +191,8 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
if (err)
goto out_fail;
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
return 0;
out_fail:
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index c5101a3295d8..62ba66e1c598 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -289,8 +289,10 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (type == ACL_TYPE_ACCESS) {
umode_t mode;
-
+ struct posix_acl *old_acl = acl;
error = posix_acl_update_mode(inode, &mode, &acl);
+ if (!acl)
+ posix_acl_release(old_acl);
if (error)
return error;
error = xfs_set_mode(inode, mode);