diff options
Diffstat (limited to 'fs')
64 files changed, 2350 insertions, 212 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 80af05163579..89ddd182f568 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -284,4 +284,9 @@ endif # NETWORK_FILESYSTEMS source "fs/nls/Kconfig" source "fs/dlm/Kconfig" +config FILE_TABLE_DEBUG + bool "Enable FILE_TABLE_DEBUG" + help + This option enables debug of the open files using a global filetable + endmenu @@ -261,6 +261,7 @@ static int __init aio_setup(void) aio_mnt = kern_mount(&aio_fs); if (IS_ERR(aio_mnt)) panic("Failed to create aio fs mount."); + aio_mnt->mnt_flags |= MNT_NOEXEC; kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); @@ -1321,7 +1322,7 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr, SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) { struct kioctx *ioctx = NULL; - unsigned long ctx; + unsigned long ctx = 0; long ret; ret = get_user(ctx, ctxp); diff --git a/fs/block_dev.c b/fs/block_dev.c index 26bbaaefdff4..43b80ca84d9c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -532,6 +532,7 @@ static void init_once(void *foo) #ifdef CONFIG_SYSFS INIT_LIST_HEAD(&bdev->bd_holder_disks); #endif + bdev->bd_bdi = &noop_backing_dev_info; inode_init_once(&ei->vfs_inode); /* Initialize mutex for freeze. */ mutex_init(&bdev->bd_fsfreeze_mutex); @@ -557,6 +558,12 @@ static void bdev_evict_inode(struct inode *inode) } list_del_init(&bdev->bd_list); spin_unlock(&bdev_lock); + /* Detach inode from wb early as bdi_put() may free bdi->wb */ + inode_detach_wb(inode); + if (bdev->bd_bdi != &noop_backing_dev_info) { + bdi_put(bdev->bd_bdi); + bdev->bd_bdi = &noop_backing_dev_info; + } } static const struct super_operations bdev_sops = { @@ -623,6 +630,21 @@ static int bdev_set(struct inode *inode, void *data) static LIST_HEAD(all_bdevs); +/* + * If there is a bdev inode for this device, unhash it so that it gets evicted + * as soon as last inode reference is dropped. + */ +void bdev_unhash_inode(dev_t dev) +{ + struct inode *inode; + + inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev); + if (inode) { + remove_inode_hash(inode); + iput(inode); + } +} + struct block_device *bdget(dev_t dev) { struct block_device *bdev; @@ -1201,6 +1223,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_disk = disk; bdev->bd_queue = disk->queue; bdev->bd_contains = bdev; + bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0; if (!partno) { ret = -ENXIO; @@ -1271,6 +1294,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) bdev->bd_inode->i_flags &= ~S_DAX; } + + if (bdev->bd_bdi == &noop_backing_dev_info) + bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info); } else { if (bdev->bd_contains == bdev) { ret = 0; @@ -1523,12 +1549,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) kill_bdev(bdev); bdev_write_inode(bdev); - /* - * Detaching bdev inode from its wb in __destroy_inode() - * is too late: the queue which embeds its bdi (along with - * root wb) can be gone as soon as we put_disk() below. - */ - inode_detach_wb(bdev->bd_inode); } if (bdev->bd_contains == bdev) { if (disk->fops->release) diff --git a/fs/buffer.c b/fs/buffer.c index 6f7d519a093b..14ce7b24f32a 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -621,6 +621,18 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) } EXPORT_SYMBOL(mark_buffer_dirty_inode); +#ifdef CONFIG_BLK_DEV_IO_TRACE +static inline void save_dirty_task(struct page *page) +{ + /* Save the task that is dirtying this page */ + page->tsk_dirty = current; +} +#else +static inline void save_dirty_task(struct page *page) +{ +} +#endif + /* * Mark the page dirty, and set it dirty in the radix tree, and mark the inode * dirty. @@ -641,6 +653,7 @@ static void __set_page_dirty(struct page *page, struct address_space *mapping, account_page_dirtied(page, mapping, memcg); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); + save_dirty_task(page); } spin_unlock_irqrestore(&mapping->tree_lock, flags); } @@ -1466,12 +1479,48 @@ static bool has_bh_in_lru(int cpu, void *dummy) return 0; } +static void __evict_bh_lru(void *arg) +{ + struct bh_lru *b = &get_cpu_var(bh_lrus); + struct buffer_head *bh = arg; + int i; + + for (i = 0; i < BH_LRU_SIZE; i++) { + if (b->bhs[i] == bh) { + brelse(b->bhs[i]); + b->bhs[i] = NULL; + goto out; + } + } +out: + put_cpu_var(bh_lrus); +} + +static bool bh_exists_in_lru(int cpu, void *arg) +{ + struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu); + struct buffer_head *bh = arg; + int i; + + for (i = 0; i < BH_LRU_SIZE; i++) { + if (b->bhs[i] == bh) + return 1; + } + + return 0; + +} void invalidate_bh_lrus(void) { on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL); } EXPORT_SYMBOL_GPL(invalidate_bh_lrus); +static void evict_bh_lrus(struct buffer_head *bh) +{ + on_each_cpu_cond(bh_exists_in_lru, __evict_bh_lru, bh, 1, GFP_ATOMIC); +} + void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset) { @@ -3192,8 +3241,15 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free) do { if (buffer_write_io_error(bh) && page->mapping) set_bit(AS_EIO, &page->mapping->flags); - if (buffer_busy(bh)) - goto failed; + if (buffer_busy(bh)) { + /* + * Check if the busy failure was due to an + * outstanding LRU reference + */ + evict_bh_lrus(bh); + if (buffer_busy(bh)) + goto failed; + } bh = bh->b_this_page; } while (bh != head); diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index dcf26537c935..b4967f7aaad0 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -871,6 +871,9 @@ COMPATIBLE_IOCTL(TIOCGPTN) COMPATIBLE_IOCTL(TIOCSPTLCK) COMPATIBLE_IOCTL(TIOCSERGETLSR) COMPATIBLE_IOCTL(TIOCSIG) +COMPATIBLE_IOCTL(TIOCPMGET) +COMPATIBLE_IOCTL(TIOCPMPUT) +COMPATIBLE_IOCTL(TIOCPMACT) #ifdef TIOCSRS485 COMPATIBLE_IOCTL(TIOCSRS485) #endif diff --git a/fs/dcache.c b/fs/dcache.c index 5bf7b4a188e9..ba56a39a3b74 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1392,7 +1392,7 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) goto out; if (dentry->d_flags & DCACHE_SHRINK_LIST) { - data->found++; + goto out; } else { if (dentry->d_flags & DCACHE_LRU_LIST) d_lru_del(dentry); diff --git a/fs/direct-io.c b/fs/direct-io.c index 44f49d86d714..da574a74a467 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -399,6 +399,7 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) if (dio->is_async && dio->rw == READ && dio->should_dirty) bio_set_pages_dirty(bio); + bio->bi_dio_inode = dio->inode; dio->bio_bdev = bio->bi_bdev; if (sdio->submit_io) { @@ -413,6 +414,19 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) sdio->logical_offset_in_bio = 0; } +struct inode *dio_bio_get_inode(struct bio *bio) +{ + struct inode *inode = NULL; + + if (bio == NULL) + return NULL; + + inode = bio->bi_dio_inode; + + return inode; +} +EXPORT_SYMBOL(dio_bio_get_inode); + /* * Release any resources in case of a failure */ diff --git a/fs/drop_caches.c b/fs/drop_caches.c index d72d52b90433..ddf319bcfccd 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -13,7 +13,7 @@ /* A global variable is a bit ugly, but it keeps the code simple */ int sysctl_drop_caches; -static void drop_pagecache_sb(struct super_block *sb, void *unused) +void drop_pagecache_sb(struct super_block *sb, void *unused) { struct inode *inode, *toput_inode = NULL; diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile index 49678a69947d..c29cdd20d08a 100644 --- a/fs/ecryptfs/Makefile +++ b/fs/ecryptfs/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o -ecryptfs-y := dentry.o file.o inode.o main.o super.o mmap.o read_write.o \ +ecryptfs-y := dentry.o file.o inode.o main.o super.o mmap.o read_write.o events.o \ crypto.o keystore.o kthread.o debug.o ecryptfs-$(CONFIG_ECRYPT_FS_MESSAGING) += messaging.o miscdev.o diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 80d6901493cf..cf0186fd9bfe 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -35,6 +35,7 @@ #include <linux/scatterlist.h> #include <linux/slab.h> #include <asm/unaligned.h> +#include <linux/ecryptfs.h> #include "ecryptfs_kernel.h" #define DECRYPT 0 @@ -350,9 +351,9 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)); if (unlikely(ecryptfs_verbosity > 0)) { ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n", - crypt_stat->key_size); + ecryptfs_get_key_size_to_enc_data(crypt_stat)); ecryptfs_dump_hex(crypt_stat->key, - crypt_stat->key_size); + ecryptfs_get_key_size_to_enc_data(crypt_stat)); } init_completion(&ecr.completion); @@ -371,7 +372,7 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, /* Consider doing this once, when the file is opened */ if (!(crypt_stat->flags & ECRYPTFS_KEY_SET)) { rc = crypto_ablkcipher_setkey(crypt_stat->tfm, crypt_stat->key, - crypt_stat->key_size); + ecryptfs_get_key_size_to_enc_data(crypt_stat)); if (rc) { ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n", @@ -466,6 +467,30 @@ out: return rc; } +static void init_ecryption_parameters(bool *hw_crypt, bool *cipher_supported, + struct ecryptfs_crypt_stat *crypt_stat) +{ + if (!hw_crypt || !cipher_supported) + return; + + *cipher_supported = false; + *hw_crypt = false; + + if (get_events() && get_events()->is_cipher_supported_cb) { + *cipher_supported = + get_events()->is_cipher_supported_cb(crypt_stat); + if (*cipher_supported) { + + /** + * we should apply external algorythm + * assume that is_hw_crypt() cbck is supplied + */ + if (get_events()->is_hw_crypt_cb) + *hw_crypt = get_events()->is_hw_crypt_cb(); + } + } +} + /** * ecryptfs_encrypt_page * @page: Page mapped from the eCryptfs inode for the file; contains @@ -491,11 +516,18 @@ int ecryptfs_encrypt_page(struct page *page) loff_t extent_offset; loff_t lower_offset; int rc = 0; + bool is_hw_crypt; + bool is_cipher_supported; + ecryptfs_inode = page->mapping->host; crypt_stat = &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)); + + init_ecryption_parameters(&is_hw_crypt, + &is_cipher_supported, crypt_stat); + enc_extent_page = alloc_page(GFP_USER); if (!enc_extent_page) { rc = -ENOMEM; @@ -503,24 +535,51 @@ int ecryptfs_encrypt_page(struct page *page) "encrypted extent\n"); goto out; } - - for (extent_offset = 0; - extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); - extent_offset++) { - rc = crypt_extent(crypt_stat, enc_extent_page, page, - extent_offset, ENCRYPT); - if (rc) { - printk(KERN_ERR "%s: Error encrypting extent; " - "rc = [%d]\n", __func__, rc); - goto out; + if (is_hw_crypt) { + /* no need for encryption */ + } else { + for (extent_offset = 0; + extent_offset < + (PAGE_CACHE_SIZE / crypt_stat->extent_size); + extent_offset++) { + + if (is_cipher_supported) { + if (!get_events()->encrypt_cb) { + rc = -EPERM; + goto out; + } + rc = get_events()->encrypt_cb(page, + enc_extent_page, + ecryptfs_inode_to_lower( + ecryptfs_inode), + extent_offset); + } else { + rc = crypt_extent(crypt_stat, + enc_extent_page, page, + extent_offset, ENCRYPT); + } + if (rc) { + ecryptfs_printk(KERN_ERR, + "%s: Error encrypting; rc = [%d]\n", + __func__, rc); + goto out; + } } } lower_offset = lower_offset_for_page(crypt_stat, page); - enc_extent_virt = kmap(enc_extent_page); + if (is_hw_crypt) + enc_extent_virt = kmap(page); + else + enc_extent_virt = kmap(enc_extent_page); + rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, lower_offset, PAGE_CACHE_SIZE); - kunmap(enc_extent_page); + if (!is_hw_crypt) + kunmap(enc_extent_page); + else + kunmap(page); + if (rc < 0) { ecryptfs_printk(KERN_ERR, "Error attempting to write lower page; rc = [%d]\n", @@ -559,6 +618,8 @@ int ecryptfs_decrypt_page(struct page *page) unsigned long extent_offset; loff_t lower_offset; int rc = 0; + bool is_cipher_supported; + bool is_hw_crypt; ecryptfs_inode = page->mapping->host; crypt_stat = @@ -577,13 +638,33 @@ int ecryptfs_decrypt_page(struct page *page) goto out; } + init_ecryption_parameters(&is_hw_crypt, + &is_cipher_supported, crypt_stat); + + if (is_hw_crypt) { + rc = 0; + return rc; + } + for (extent_offset = 0; extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); extent_offset++) { - rc = crypt_extent(crypt_stat, page, page, + if (is_cipher_supported) { + if (!get_events()->decrypt_cb) { + rc = -EPERM; + goto out; + } + + rc = get_events()->decrypt_cb(page, page, + ecryptfs_inode_to_lower(ecryptfs_inode), + extent_offset); + + } else + rc = crypt_extent(crypt_stat, page, page, extent_offset, DECRYPT); + if (rc) { - printk(KERN_ERR "%s: Error encrypting extent; " + ecryptfs_printk(KERN_ERR, "%s: Error decrypting extent;" "rc = [%d]\n", __func__, rc); goto out; } @@ -612,7 +693,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat) "Initializing cipher [%s]; strlen = [%d]; " "key_size_bits = [%zd]\n", crypt_stat->cipher, (int)strlen(crypt_stat->cipher), - crypt_stat->key_size << 3); + ecryptfs_get_key_size_to_enc_data(crypt_stat) << 3); mutex_lock(&crypt_stat->cs_tfm_mutex); if (crypt_stat->tfm) { rc = 0; @@ -694,7 +775,7 @@ int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat) goto out; } rc = ecryptfs_calculate_md5(dst, crypt_stat, crypt_stat->key, - crypt_stat->key_size); + ecryptfs_get_key_size_to_enc_data(crypt_stat)); if (rc) { ecryptfs_printk(KERN_WARNING, "Error attempting to compute " "MD5 while generating root IV\n"); @@ -721,6 +802,31 @@ static void ecryptfs_generate_new_key(struct ecryptfs_crypt_stat *crypt_stat) } } +static int ecryptfs_generate_new_salt(struct ecryptfs_crypt_stat *crypt_stat) +{ + size_t salt_size = 0; + + salt_size = ecryptfs_get_salt_size_for_cipher(crypt_stat); + + if (0 == salt_size) + return 0; + + if (!ecryptfs_check_space_for_salt(crypt_stat->key_size, salt_size)) { + ecryptfs_printk(KERN_WARNING, "not enough space for salt\n"); + crypt_stat->flags |= ECRYPTFS_SECURITY_WARNING; + return -EINVAL; + } + + get_random_bytes(crypt_stat->key + crypt_stat->key_size, salt_size); + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "Generated new session salt:\n"); + ecryptfs_dump_hex(crypt_stat->key + crypt_stat->key_size, + salt_size); + } + + return 0; +} + /** * ecryptfs_copy_mount_wide_flags_to_inode_flags * @crypt_stat: The inode's cryptographic context @@ -823,7 +929,6 @@ int ecryptfs_new_file_context(struct inode *ecryptfs_inode) struct ecryptfs_mount_crypt_stat *mount_crypt_stat = &ecryptfs_superblock_to_private( ecryptfs_inode->i_sb)->mount_crypt_stat; - int cipher_name_len; int rc = 0; ecryptfs_set_default_crypt_stat_vals(crypt_stat, mount_crypt_stat); @@ -837,15 +942,19 @@ int ecryptfs_new_file_context(struct inode *ecryptfs_inode) "to the inode key sigs; rc = [%d]\n", rc); goto out; } - cipher_name_len = - strlen(mount_crypt_stat->global_default_cipher_name); - memcpy(crypt_stat->cipher, + strlcpy(crypt_stat->cipher, mount_crypt_stat->global_default_cipher_name, - cipher_name_len); - crypt_stat->cipher[cipher_name_len] = '\0'; + sizeof(crypt_stat->cipher)); + + strlcpy(crypt_stat->cipher_mode, + mount_crypt_stat->global_default_cipher_mode, + sizeof(crypt_stat->cipher_mode)); + crypt_stat->key_size = mount_crypt_stat->global_default_cipher_key_size; ecryptfs_generate_new_key(crypt_stat); + ecryptfs_generate_new_salt(crypt_stat); + rc = ecryptfs_init_crypt_ctx(crypt_stat); if (rc) ecryptfs_printk(KERN_ERR, "Error initializing cryptographic " @@ -971,7 +1080,8 @@ ecryptfs_cipher_code_str_map[] = { {"twofish", RFC2440_CIPHER_TWOFISH}, {"cast6", RFC2440_CIPHER_CAST_6}, {"aes", RFC2440_CIPHER_AES_192}, - {"aes", RFC2440_CIPHER_AES_256} + {"aes", RFC2440_CIPHER_AES_256}, + {"aes_xts", RFC2440_CIPHER_AES_XTS_256} }; /** @@ -999,6 +1109,11 @@ u8 ecryptfs_code_for_cipher_string(char *cipher_name, size_t key_bytes) case 32: code = RFC2440_CIPHER_AES_256; } + } else if (strcmp(cipher_name, "aes_xts") == 0) { + switch (key_bytes) { + case 32: + code = RFC2440_CIPHER_AES_XTS_256; + } } else { for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++) if (strcmp(cipher_name, map[i].cipher_str) == 0) { @@ -1038,9 +1153,24 @@ int ecryptfs_read_and_validate_header_region(struct inode *inode) u8 file_size[ECRYPTFS_SIZE_AND_MARKER_BYTES]; u8 *marker = file_size + ECRYPTFS_FILE_SIZE_BYTES; int rc; + unsigned int ra_pages_org; + struct file *lower_file = NULL; + + if (!inode) + return -EIO; + lower_file = ecryptfs_inode_to_private(inode)->lower_file; + if (!lower_file) + return -EIO; + + /*disable read a head mechanism for a while */ + ra_pages_org = lower_file->f_ra.ra_pages; + lower_file->f_ra.ra_pages = 0; rc = ecryptfs_read_lower(file_size, 0, ECRYPTFS_SIZE_AND_MARKER_BYTES, inode); + lower_file->f_ra.ra_pages = ra_pages_org; + /* restore read a head mechanism */ + if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES) return rc >= 0 ? -EINVAL : rc; rc = ecryptfs_validate_marker(marker); @@ -1430,6 +1560,11 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) struct ecryptfs_mount_crypt_stat *mount_crypt_stat = &ecryptfs_superblock_to_private( ecryptfs_dentry->d_sb)->mount_crypt_stat; + unsigned int ra_pages_org; + struct file *lower_file = + ecryptfs_inode_to_private(ecryptfs_inode)->lower_file; + if (!lower_file) + return -EIO; ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat, mount_crypt_stat); @@ -1441,8 +1576,14 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) __func__); goto out; } + /*disable read a head mechanism */ + ra_pages_org = lower_file->f_ra.ra_pages; + lower_file->f_ra.ra_pages = 0; + rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size, ecryptfs_inode); + lower_file->f_ra.ra_pages = ra_pages_org; /* restore it back */ + if (rc >= 0) rc = ecryptfs_read_headers_virt(page_virt, crypt_stat, ecryptfs_dentry, diff --git a/fs/ecryptfs/debug.c b/fs/ecryptfs/debug.c index 3d2bdf546ec6..0556af1adfb7 100644 --- a/fs/ecryptfs/debug.c +++ b/fs/ecryptfs/debug.c @@ -119,3 +119,32 @@ void ecryptfs_dump_hex(char *data, int bytes) printk("\n"); } +void ecryptfs_dump_salt_hex(char *data, int key_size, + const struct ecryptfs_crypt_stat *crypt_stat) +{ + size_t salt_size = ecryptfs_get_salt_size_for_cipher(crypt_stat); + + if (0 == salt_size) + return; + + if (!ecryptfs_check_space_for_salt(key_size, salt_size)) + return; + + ecryptfs_printk(KERN_DEBUG, "Decrypted session salt key:\n"); + ecryptfs_dump_hex(data + key_size, salt_size); +} + +void ecryptfs_dump_cipher(struct ecryptfs_crypt_stat *stat) +{ + if (!stat) + return; + + if (stat->cipher) + ecryptfs_printk(KERN_DEBUG, + "ecryptfs cipher is %s\n", stat->cipher); + + if (stat->cipher_mode) + ecryptfs_printk(KERN_DEBUG, "ecryptfs cipher mode is %s\n", + stat->cipher_mode); + +} diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index eae9cdb8af46..f5908e91eb17 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -254,6 +254,7 @@ struct ecryptfs_crypt_stat { struct mutex cs_tfm_mutex; struct mutex cs_hash_tfm_mutex; struct mutex cs_mutex; + unsigned char cipher_mode[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1]; }; /* inode private data. */ @@ -354,6 +355,8 @@ struct ecryptfs_mount_crypt_stat { unsigned char global_default_fn_cipher_name[ ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1]; char global_default_fnek_sig[ECRYPTFS_SIG_SIZE_HEX + 1]; + unsigned char global_default_cipher_mode[ECRYPTFS_MAX_CIPHER_NAME_SIZE + + 1]; }; /* superblock private data. */ @@ -536,6 +539,53 @@ ecryptfs_dentry_to_lower_path(struct dentry *dentry) return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path; } +/** + * Given a cipher and mode strings, the function + * concatenates them to create a new string of + * <cipher>_<mode> format. + */ +static inline unsigned char *ecryptfs_get_full_cipher( + unsigned char *cipher, unsigned char *mode, + unsigned char *final, size_t final_size) +{ + memset(final, 0, final_size); + + if (strlen(mode) > 0) { + snprintf(final, final_size, "%s_%s", cipher, mode); + return final; + } + + return cipher; +} + +/** + * Given a <cipher>[_<mode>] formatted string, the function + * extracts cipher string and/or mode string. + * Note: the passed cipher and/or mode strings will be null-terminated. + */ +static inline void ecryptfs_parse_full_cipher( + char *s, char *cipher, char *mode) +{ + char input[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1+1]; + /* +1 for '_'; +1 for '\0' */ + char *p; + char *input_p = input; + + if (s == NULL || cipher == NULL) + return; + + memset(input, 0, sizeof(input)); + strlcpy(input, s, sizeof(input)); + + p = strsep(&input_p, "_"); + strlcpy(cipher, p, ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1); + + + /* check if mode is specified */ + if (input_p != NULL && mode != NULL) + strlcpy(mode, input_p, ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1); +} + #define ecryptfs_printk(type, fmt, arg...) \ __ecryptfs_printk(type "%s: " fmt, __func__, ## arg); __printf(1, 2) @@ -584,6 +634,10 @@ int ecryptfs_encrypt_and_encode_filename( const char *name, size_t name_size); struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry); void ecryptfs_dump_hex(char *data, int bytes); +void ecryptfs_dump_salt_hex(char *data, int key_size, + const struct ecryptfs_crypt_stat *crypt_stat); +extern void ecryptfs_dump_cipher(struct ecryptfs_crypt_stat *stat); + int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg, int sg_size); int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat); @@ -727,4 +781,33 @@ int ecryptfs_set_f_namelen(long *namelen, long lower_namelen, int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat, loff_t offset); +void clean_inode_pages(struct address_space *mapping, + pgoff_t start, pgoff_t end); + +void ecryptfs_drop_pagecache_sb(struct super_block *sb, void *unused); + +void ecryptfs_free_events(void); + +void ecryptfs_freepage(struct page *page); + +struct ecryptfs_events *get_events(void); + +size_t ecryptfs_get_salt_size_for_cipher( + const struct ecryptfs_crypt_stat *crypt_stat); + +size_t ecryptfs_get_salt_size_for_cipher_mount( + const struct ecryptfs_mount_crypt_stat *mount_crypt_stat); + +size_t ecryptfs_get_key_size_to_enc_data( + const struct ecryptfs_crypt_stat *crypt_stat); + +size_t ecryptfs_get_key_size_to_store_key( + const struct ecryptfs_crypt_stat *crypt_stat); + +size_t ecryptfs_get_key_size_to_restore_key(size_t stored_key_size, + const struct ecryptfs_crypt_stat *crypt_stat); + +bool ecryptfs_check_space_for_salt(const size_t key_size, + const size_t salt_size); + #endif /* #ifndef ECRYPTFS_KERNEL_H */ diff --git a/fs/ecryptfs/events.c b/fs/ecryptfs/events.c new file mode 100644 index 000000000000..12e26c683cf6 --- /dev/null +++ b/fs/ecryptfs/events.c @@ -0,0 +1,393 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/string.h> +#include <linux/ecryptfs.h> +#include <linux/mutex.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#include <linux/random.h> +#include "ecryptfs_kernel.h" + +static DEFINE_MUTEX(events_mutex); +struct ecryptfs_events *events_ptr = NULL; +static int handle; + +void ecryptfs_free_events(void) +{ + mutex_lock(&events_mutex); + if (events_ptr != NULL) { + kfree(events_ptr); + events_ptr = NULL; + } + + mutex_unlock(&events_mutex); +} + +/** + * Register to ecryptfs events, by passing callback + * functions to be called upon events occurence. + * The function returns a handle to be passed + * to unregister function. + */ +int ecryptfs_register_to_events(const struct ecryptfs_events *ops) +{ + int ret_value = 0; + + if (!ops) + return -EINVAL; + + mutex_lock(&events_mutex); + + if (events_ptr != NULL) { + ecryptfs_printk(KERN_ERR, + "already registered!\n"); + ret_value = -EPERM; + goto out; + } + events_ptr = + kzalloc(sizeof(struct ecryptfs_events), GFP_KERNEL); + + if (!events_ptr) { + ecryptfs_printk(KERN_ERR, "malloc failure\n"); + ret_value = -ENOMEM; + goto out; + } + /* copy the callbacks */ + events_ptr->open_cb = ops->open_cb; + events_ptr->release_cb = ops->release_cb; + events_ptr->encrypt_cb = ops->encrypt_cb; + events_ptr->decrypt_cb = ops->decrypt_cb; + events_ptr->is_cipher_supported_cb = + ops->is_cipher_supported_cb; + events_ptr->is_hw_crypt_cb = ops->is_hw_crypt_cb; + events_ptr->get_salt_key_size_cb = ops->get_salt_key_size_cb; + + get_random_bytes(&handle, sizeof(handle)); + ret_value = handle; + +out: + mutex_unlock(&events_mutex); + return ret_value; +} + +/** + * Unregister from ecryptfs events. + */ +int ecryptfs_unregister_from_events(int user_handle) +{ + int ret_value = 0; + + mutex_lock(&events_mutex); + + if (!events_ptr) { + ret_value = -EINVAL; + goto out; + } + if (user_handle != handle) { + ret_value = ECRYPTFS_INVALID_EVENTS_HANDLE; + goto out; + } + + kfree(events_ptr); + events_ptr = NULL; + +out: + mutex_unlock(&events_mutex); + return ret_value; +} + +/** + * This function decides whether the passed file offset + * belongs to ecryptfs metadata or not. + * The caller must pass ecryptfs data, which was received in one + * of the callback invocations. + */ +bool ecryptfs_is_page_in_metadata(const void *data, pgoff_t offset) +{ + + struct ecryptfs_crypt_stat *stat = NULL; + bool ret = true; + + if (!data) { + ecryptfs_printk(KERN_ERR, "ecryptfs_is_page_in_metadata: invalid data parameter\n"); + ret = false; + goto end; + } + stat = (struct ecryptfs_crypt_stat *)data; + + if (stat->flags & ECRYPTFS_METADATA_IN_XATTR) { + ret = false; + goto end; + } + + if (offset >= (stat->metadata_size/PAGE_CACHE_SIZE)) { + ret = false; + goto end; + } +end: + return ret; +} + +/** + * Given two ecryptfs data, the function + * decides whether they are equal. + */ +inline bool ecryptfs_is_data_equal(const void *data1, const void *data2) +{ + /* pointer comparison*/ + return data1 == data2; +} + +/** + * Given ecryptfs data, the function + * returns appropriate key size. + */ +size_t ecryptfs_get_key_size(const void *data) +{ + + struct ecryptfs_crypt_stat *stat = NULL; + + if (!data) + return 0; + + stat = (struct ecryptfs_crypt_stat *)data; + return stat->key_size; +} + +/** + * Given ecryptfs data, the function + * returns appropriate salt size. + * + * !!! crypt_stat cipher name and mode must be initialized + */ +size_t ecryptfs_get_salt_size(const void *data) +{ + if (!data) { + ecryptfs_printk(KERN_ERR, + "ecryptfs_get_salt_size: invalid data parameter\n"); + return 0; + } + + return ecryptfs_get_salt_size_for_cipher(data); + +} + +/** + * Given ecryptfs data and cipher string, the function + * returns true if provided cipher and the one in ecryptfs match. + */ +bool ecryptfs_cipher_match(const void *data, + const unsigned char *cipher, size_t cipher_size) +{ + unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1]; + const unsigned char *ecryptfs_cipher = NULL; + struct ecryptfs_crypt_stat *stat = NULL; + + if (!data || !cipher) { + ecryptfs_printk(KERN_ERR, + "ecryptfs_get_cipher: invalid data parameter\n"); + return false; + } + + if (!cipher_size || cipher_size > sizeof(final)) { + ecryptfs_printk(KERN_ERR, + "ecryptfs_get_cipher: cipher_size\n"); + return false; + } + + stat = (struct ecryptfs_crypt_stat *)data; + ecryptfs_cipher = ecryptfs_get_full_cipher(stat->cipher, + stat->cipher_mode, + final, sizeof(final)); + + if (!ecryptfs_cipher) { + ecryptfs_printk(KERN_ERR, + "ecryptfs_get_cipher: internal error while parsing cipher\n"); + return false; + } + + if (strcmp(ecryptfs_cipher, cipher)) { + if (ecryptfs_verbosity > 0) + ecryptfs_dump_cipher(stat); + + return false; + } + + return true; +} + +/** + * Given ecryptfs data, the function + * returns file encryption key. + */ +const unsigned char *ecryptfs_get_key(const void *data) +{ + + struct ecryptfs_crypt_stat *stat = NULL; + + if (!data) { + ecryptfs_printk(KERN_ERR, + "ecryptfs_get_key: invalid data parameter\n"); + return NULL; + } + stat = (struct ecryptfs_crypt_stat *)data; + return stat->key; +} + +/** + * Given ecryptfs data, the function + * returns file encryption salt. + */ +const unsigned char *ecryptfs_get_salt(const void *data) +{ + struct ecryptfs_crypt_stat *stat = NULL; + + if (!data) { + ecryptfs_printk(KERN_ERR, + "ecryptfs_get_salt: invalid data parameter\n"); + return NULL; + } + stat = (struct ecryptfs_crypt_stat *)data; + return stat->key + ecryptfs_get_salt_size(data); +} + +/** + * Returns ecryptfs events pointer + */ +inline struct ecryptfs_events *get_events(void) +{ + return events_ptr; +} + +/** + * If external crypto module requires salt in addition to key, + * we store it as part of key array (if there is enough space) + * Checks whether a salt key can fit into array allocated for + * regular key + */ +bool ecryptfs_check_space_for_salt(const size_t key_size, + const size_t salt_size) +{ + if ((salt_size + key_size) > ECRYPTFS_MAX_KEY_BYTES) + return false; + + return true; +} + +/* + * If there is salt that is used by external crypto module, it is stored + * in the same array where regular key is. Salt is going to be used by + * external crypto module only, so for all internal crypto operations salt + * should be ignored. + * + * Get key size in cases where it is going to be used for data encryption + * or for all other general purposes + */ +size_t ecryptfs_get_key_size_to_enc_data( + const struct ecryptfs_crypt_stat *crypt_stat) +{ + if (!crypt_stat) + return 0; + + return crypt_stat->key_size; +} + +/* + * If there is salt that is used by external crypto module, it is stored + * in the same array where regular key is. Salt is going to be used by + * external crypto module only, but we still need to save and restore it + * (in encrypted form) as part of ecryptfs header along with the regular + * key. + * + * Get key size in cases where it is going to be stored persistently + * + * !!! crypt_stat cipher name and mode must be initialized + */ +size_t ecryptfs_get_key_size_to_store_key( + const struct ecryptfs_crypt_stat *crypt_stat) +{ + size_t salt_size = 0; + + if (!crypt_stat) + return 0; + + salt_size = ecryptfs_get_salt_size(crypt_stat); + + if (!ecryptfs_check_space_for_salt(crypt_stat->key_size, salt_size)) { + ecryptfs_printk(KERN_WARNING, + "ecryptfs_get_key_size_to_store_key: not enough space for salt\n"); + return crypt_stat->key_size; + } + + return crypt_stat->key_size + salt_size; +} + +/* + * If there is salt that is used by external crypto module, it is stored + * in the same array where regular key is. Salt is going to be used by + * external crypto module only, but we still need to save and restore it + * (in encrypted form) as part of ecryptfs header along with the regular + * key. + * + * Get key size in cases where it is going to be restored from storage + * + * !!! crypt_stat cipher name and mode must be initialized + */ +size_t ecryptfs_get_key_size_to_restore_key(size_t stored_key_size, + const struct ecryptfs_crypt_stat *crypt_stat) +{ + size_t salt_size = 0; + + if (!crypt_stat) + return 0; + + salt_size = ecryptfs_get_salt_size_for_cipher(crypt_stat); + + if (salt_size >= stored_key_size) { + ecryptfs_printk(KERN_WARNING, + "ecryptfs_get_key_size_to_restore_key: salt %zu >= stred size %zu\n", + salt_size, stored_key_size); + + return stored_key_size; + } + + return stored_key_size - salt_size; +} + +/** + * Given crypt_stat, the function returns appropriate salt size. + */ +size_t ecryptfs_get_salt_size_for_cipher( + const struct ecryptfs_crypt_stat *crypt_stat) +{ + if (!get_events() || !(get_events()->get_salt_key_size_cb)) + return 0; + + return get_events()->get_salt_key_size_cb(crypt_stat); +} + +/** + * Given mount_crypt_stat, the function returns appropriate salt size. + */ +size_t ecryptfs_get_salt_size_for_cipher_mount( + const struct ecryptfs_mount_crypt_stat *crypt_stat) +{ + if (!get_events() || !(get_events()->get_salt_key_size_cb)) + return 0; + + return get_events()->get_salt_key_size_cb(crypt_stat); +} + diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 27794b137b24..c93fe5fce41e 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -31,6 +31,7 @@ #include <linux/security.h> #include <linux/compat.h> #include <linux/fs_stack.h> +#include <linux/ecryptfs.h> #include "ecryptfs_kernel.h" /** @@ -196,6 +197,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file) int rc = 0; struct ecryptfs_crypt_stat *crypt_stat = NULL; struct dentry *ecryptfs_dentry = file->f_path.dentry; + int ret; + + /* Private value of ecryptfs_dentry allocated in * ecryptfs_lookup() */ struct ecryptfs_file_info *file_info; @@ -235,12 +239,39 @@ static int ecryptfs_open(struct inode *inode, struct file *file) } ecryptfs_set_file_lower( file, ecryptfs_inode_to_private(inode)->lower_file); + if (d_is_dir(ecryptfs_dentry)) { + ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); + mutex_lock(&crypt_stat->cs_mutex); + crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); + mutex_unlock(&crypt_stat->cs_mutex); + rc = 0; + goto out; + } + rc = read_or_initialize_metadata(ecryptfs_dentry); if (rc) goto out_put; ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = " "[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino, (unsigned long long)i_size_read(inode)); + + if (get_events() && get_events()->open_cb) { + + ret = vfs_fsync(file, false); + + if (ret) + ecryptfs_printk(KERN_ERR, + "failed to sync file ret = %d.\n", ret); + + get_events()->open_cb(ecryptfs_inode_to_lower(inode), + crypt_stat); + + if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) { + truncate_inode_pages(inode->i_mapping, 0); + truncate_inode_pages( + ecryptfs_inode_to_lower(inode)->i_mapping, 0); + } + } goto out; out_put: ecryptfs_put_lower_file(inode); @@ -307,6 +338,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file) ecryptfs_put_lower_file(inode); kmem_cache_free(ecryptfs_file_info_cache, ecryptfs_file_to_private(file)); + return 0; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index e2e47ba5d313..cb3ecf442d96 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -261,12 +261,15 @@ out: * * Returns zero on success; non-zero on error condition */ + + static int ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, umode_t mode, bool excl) { struct inode *ecryptfs_inode; int rc; + struct ecryptfs_crypt_stat *crypt_stat; ecryptfs_inode = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode); @@ -276,6 +279,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, rc = PTR_ERR(ecryptfs_inode); goto out; } + /* At this point, a file exists on "disk"; we need to make sure * that this on disk file is prepared to be an ecryptfs file */ rc = ecryptfs_initialize_file(ecryptfs_dentry, ecryptfs_inode); @@ -288,6 +292,13 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, goto out; } unlock_new_inode(ecryptfs_inode); + + crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; + if (get_events() && get_events()->open_cb) + get_events()->open_cb( + ecryptfs_inode_to_lower(ecryptfs_inode), + crypt_stat); + d_instantiate(ecryptfs_dentry, ecryptfs_inode); out: return rc; diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 20632ee51ae5..ea3d99ebb6ee 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -315,7 +315,8 @@ write_tag_66_packet(char *signature, u8 cipher_code, * | File Encryption Key Size | 1 or 2 bytes | * | File Encryption Key | arbitrary | */ - data_len = (5 + ECRYPTFS_SIG_SIZE_HEX + crypt_stat->key_size); + data_len = (5 + ECRYPTFS_SIG_SIZE_HEX + + ecryptfs_get_key_size_to_store_key(crypt_stat)); *packet = kmalloc(data_len, GFP_KERNEL); message = *packet; if (!message) { @@ -335,8 +336,9 @@ write_tag_66_packet(char *signature, u8 cipher_code, memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX); i += ECRYPTFS_SIG_SIZE_HEX; /* The encrypted key includes 1 byte cipher code and 2 byte checksum */ - rc = ecryptfs_write_packet_length(&message[i], crypt_stat->key_size + 3, - &packet_size_len); + rc = ecryptfs_write_packet_length(&message[i], + ecryptfs_get_key_size_to_store_key(crypt_stat) + 3, + &packet_size_len); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet " "header; cannot generate packet length\n"); @@ -344,9 +346,10 @@ write_tag_66_packet(char *signature, u8 cipher_code, } i += packet_size_len; message[i++] = cipher_code; - memcpy(&message[i], crypt_stat->key, crypt_stat->key_size); - i += crypt_stat->key_size; - for (j = 0; j < crypt_stat->key_size; j++) + memcpy(&message[i], crypt_stat->key, + ecryptfs_get_key_size_to_store_key(crypt_stat)); + i += ecryptfs_get_key_size_to_store_key(crypt_stat); + for (j = 0; j < ecryptfs_get_key_size_to_store_key(crypt_stat); j++) checksum += crypt_stat->key[j]; message[i++] = (checksum / 256) % 256; message[i++] = (checksum % 256); @@ -925,6 +928,7 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, struct ecryptfs_parse_tag_70_packet_silly_stack *s; struct key *auth_tok_key = NULL; int rc = 0; + char full_cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; (*packet_size) = 0; (*filename_size) = 0; @@ -984,12 +988,13 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, s->fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX] = '\0'; (*packet_size) += ECRYPTFS_SIG_SIZE; s->cipher_code = data[(*packet_size)++]; - rc = ecryptfs_cipher_code_to_string(s->cipher_string, s->cipher_code); + rc = ecryptfs_cipher_code_to_string(full_cipher, s->cipher_code); if (rc) { printk(KERN_WARNING "%s: Cipher code [%d] is invalid\n", __func__, s->cipher_code); goto out; } + ecryptfs_parse_full_cipher(full_cipher, s->cipher_string, 0); rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key, &s->auth_tok, mount_crypt_stat, s->fnek_sig_hex); @@ -1158,6 +1163,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, char *payload = NULL; size_t payload_len = 0; int rc; + char full_cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok); if (rc) { @@ -1191,21 +1197,31 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, rc); goto out; } - auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY; - memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key, - auth_tok->session_key.decrypted_key_size); - crypt_stat->key_size = auth_tok->session_key.decrypted_key_size; - rc = ecryptfs_cipher_code_to_string(crypt_stat->cipher, cipher_code); + + rc = ecryptfs_cipher_code_to_string(full_cipher, cipher_code); if (rc) { ecryptfs_printk(KERN_ERR, "Cipher code [%d] is invalid\n", cipher_code) - goto out; + goto out; } + + auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY; + memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key, + auth_tok->session_key.decrypted_key_size); + crypt_stat->key_size = ecryptfs_get_key_size_to_restore_key( + auth_tok->session_key.decrypted_key_size, crypt_stat); + + ecryptfs_parse_full_cipher(full_cipher, + crypt_stat->cipher, crypt_stat->cipher_mode); + crypt_stat->flags |= ECRYPTFS_KEY_VALID; if (ecryptfs_verbosity > 0) { ecryptfs_printk(KERN_DEBUG, "Decrypted session key:\n"); ecryptfs_dump_hex(crypt_stat->key, crypt_stat->key_size); + + ecryptfs_dump_salt_hex(crypt_stat->key, crypt_stat->key_size, + crypt_stat); } out: kfree(msg); @@ -1387,6 +1403,7 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, struct ecryptfs_auth_tok_list_item *auth_tok_list_item; size_t length_size; int rc = 0; + char full_cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; (*packet_size) = 0; (*new_auth_tok) = NULL; @@ -1460,10 +1477,13 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, rc = -EINVAL; goto out_free; } - rc = ecryptfs_cipher_code_to_string(crypt_stat->cipher, + rc = ecryptfs_cipher_code_to_string(full_cipher, (u16)data[(*packet_size)]); if (rc) goto out_free; + ecryptfs_parse_full_cipher(full_cipher, + crypt_stat->cipher, crypt_stat->cipher_mode); + /* A little extra work to differentiate among the AES key * sizes; see RFC2440 */ switch(data[(*packet_size)++]) { @@ -1472,7 +1492,10 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, break; default: crypt_stat->key_size = - (*new_auth_tok)->session_key.encrypted_key_size; + ecryptfs_get_key_size_to_restore_key( + (*new_auth_tok)->session_key.encrypted_key_size, + crypt_stat); + } rc = ecryptfs_init_crypt_ctx(crypt_stat); if (rc) @@ -1720,7 +1743,7 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, mutex_lock(tfm_mutex); rc = crypto_blkcipher_setkey( desc.tfm, auth_tok->token.password.session_key_encryption_key, - crypt_stat->key_size); + auth_tok->token.password.session_key_encryption_key_bytes); if (unlikely(rc < 0)) { mutex_unlock(tfm_mutex); printk(KERN_ERR "Error setting key for crypto context\n"); @@ -1743,6 +1766,8 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, crypt_stat->key_size); ecryptfs_dump_hex(crypt_stat->key, crypt_stat->key_size); + ecryptfs_dump_salt_hex(crypt_stat->key, crypt_stat->key_size, + crypt_stat); } out: return rc; @@ -1979,12 +2004,17 @@ pki_encrypt_session_key(struct key *auth_tok_key, size_t payload_len = 0; struct ecryptfs_message *msg; int rc; + unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1]; rc = write_tag_66_packet(auth_tok->token.private_key.signature, - ecryptfs_code_for_cipher_string( - crypt_stat->cipher, - crypt_stat->key_size), - crypt_stat, &payload, &payload_len); + ecryptfs_code_for_cipher_string( + ecryptfs_get_full_cipher( + crypt_stat->cipher, + crypt_stat->cipher_mode, + final, sizeof(final)), + ecryptfs_get_key_size_to_enc_data( + crypt_stat)), + crypt_stat, &payload, &payload_len); up_write(&(auth_tok_key->sem)); key_put(auth_tok_key); if (rc) { @@ -2042,7 +2072,7 @@ write_tag_1_packet(char *dest, size_t *remaining_bytes, ecryptfs_from_hex(key_rec->sig, auth_tok->token.private_key.signature, ECRYPTFS_SIG_SIZE); encrypted_session_key_valid = 0; - for (i = 0; i < crypt_stat->key_size; i++) + for (i = 0; i < ecryptfs_get_key_size_to_store_key(crypt_stat); i++) encrypted_session_key_valid |= auth_tok->session_key.encrypted_key[i]; if (encrypted_session_key_valid) { @@ -2196,6 +2226,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, u8 cipher_code; size_t packet_size_length; size_t max_packet_size; + unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1]; struct ecryptfs_mount_crypt_stat *mount_crypt_stat = crypt_stat->mount_crypt_stat; struct blkcipher_desc desc = { @@ -2228,13 +2259,14 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, mount_crypt_stat->global_default_cipher_key_size; if (auth_tok->session_key.encrypted_key_size == 0) auth_tok->session_key.encrypted_key_size = - crypt_stat->key_size; + ecryptfs_get_key_size_to_store_key(crypt_stat); if (crypt_stat->key_size == 24 && strcmp("aes", crypt_stat->cipher) == 0) { memset((crypt_stat->key + 24), 0, 8); auth_tok->session_key.encrypted_key_size = 32; } else - auth_tok->session_key.encrypted_key_size = crypt_stat->key_size; + auth_tok->session_key.encrypted_key_size = + ecryptfs_get_key_size_to_store_key(crypt_stat); key_rec->enc_key_size = auth_tok->session_key.encrypted_key_size; encrypted_session_key_valid = 0; @@ -2258,8 +2290,8 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, auth_tok->token.password. session_key_encryption_key_bytes); memcpy(session_key_encryption_key, - auth_tok->token.password.session_key_encryption_key, - crypt_stat->key_size); + auth_tok->token.password.session_key_encryption_key, + auth_tok->token.password.session_key_encryption_key_bytes); ecryptfs_printk(KERN_DEBUG, "Cached session key encryption key:\n"); if (ecryptfs_verbosity > 0) @@ -2292,7 +2324,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, } mutex_lock(tfm_mutex); rc = crypto_blkcipher_setkey(desc.tfm, session_key_encryption_key, - crypt_stat->key_size); + auth_tok->token.password.session_key_encryption_key_bytes); if (rc < 0) { mutex_unlock(tfm_mutex); ecryptfs_printk(KERN_ERR, "Error setting key for crypto " @@ -2301,7 +2333,9 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, } rc = 0; ecryptfs_printk(KERN_DEBUG, "Encrypting [%zd] bytes of the key\n", - crypt_stat->key_size); + crypt_stat->key_size); + ecryptfs_printk(KERN_DEBUG, "Encrypting [%zd] bytes of the salt key\n", + ecryptfs_get_salt_size_for_cipher(crypt_stat)); rc = crypto_blkcipher_encrypt(&desc, dst_sg, src_sg, (*key_rec).enc_key_size); mutex_unlock(tfm_mutex); @@ -2350,8 +2384,10 @@ encrypted_session_key_set: dest[(*packet_size)++] = 0x04; /* version 4 */ /* TODO: Break from RFC2440 so that arbitrary ciphers can be * specified with strings */ - cipher_code = ecryptfs_code_for_cipher_string(crypt_stat->cipher, - crypt_stat->key_size); + cipher_code = ecryptfs_code_for_cipher_string( + ecryptfs_get_full_cipher(crypt_stat->cipher, + crypt_stat->cipher_mode, final, sizeof(final)), + crypt_stat->key_size); if (cipher_code == 0) { ecryptfs_printk(KERN_WARNING, "Unable to generate code for " "cipher [%s]\n", crypt_stat->cipher); diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 4f4d0474bee9..85f7a289bdac 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -156,16 +156,41 @@ int ecryptfs_get_lower_file(struct dentry *dentry, struct inode *inode) void ecryptfs_put_lower_file(struct inode *inode) { + int ret = 0; struct ecryptfs_inode_info *inode_info; + bool clear_cache_needed = false; inode_info = ecryptfs_inode_to_private(inode); if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count, &inode_info->lower_file_mutex)) { + + if (get_events() && get_events()->is_hw_crypt_cb && + get_events()->is_hw_crypt_cb()) + clear_cache_needed = true; + filemap_write_and_wait(inode->i_mapping); + if (clear_cache_needed) { + ret = vfs_fsync(inode_info->lower_file, false); + + if (ret) + pr_err("failed to sync file ret = %d.\n", ret); + } fput(inode_info->lower_file); inode_info->lower_file = NULL; mutex_unlock(&inode_info->lower_file_mutex); + + if (clear_cache_needed) { + truncate_inode_pages_fill_zero(inode->i_mapping, 0); + truncate_inode_pages_fill_zero( + ecryptfs_inode_to_lower(inode)->i_mapping, 0); + } + + if (get_events() && get_events()->release_cb) + get_events()->release_cb( + ecryptfs_inode_to_lower(inode)); } + + } enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, @@ -280,6 +305,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, char *cipher_key_bytes_src; char *fn_cipher_key_bytes_src; u8 cipher_code; + unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1]; *check_ruid = 0; @@ -309,12 +335,14 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, case ecryptfs_opt_ecryptfs_cipher: cipher_name_src = args[0].from; cipher_name_dst = - mount_crypt_stat-> - global_default_cipher_name; - strncpy(cipher_name_dst, cipher_name_src, - ECRYPTFS_MAX_CIPHER_NAME_SIZE); - cipher_name_dst[ECRYPTFS_MAX_CIPHER_NAME_SIZE] = '\0'; + mount_crypt_stat->global_default_cipher_name; + + ecryptfs_parse_full_cipher(cipher_name_src, + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_mode); + cipher_name_set = 1; + break; case ecryptfs_opt_ecryptfs_key_bytes: cipher_key_bytes_src = args[0].from; @@ -411,24 +439,35 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, strcpy(mount_crypt_stat->global_default_cipher_name, ECRYPTFS_DEFAULT_CIPHER); } + if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) && !fn_cipher_name_set) strcpy(mount_crypt_stat->global_default_fn_cipher_name, mount_crypt_stat->global_default_cipher_name); + if (!cipher_key_bytes_set) mount_crypt_stat->global_default_cipher_key_size = 0; + if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) && !fn_cipher_key_bytes_set) mount_crypt_stat->global_default_fn_cipher_key_bytes = mount_crypt_stat->global_default_cipher_key_size; cipher_code = ecryptfs_code_for_cipher_string( - mount_crypt_stat->global_default_cipher_name, + ecryptfs_get_full_cipher( + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_mode, + final, sizeof(final)), mount_crypt_stat->global_default_cipher_key_size); if (!cipher_code) { - ecryptfs_printk(KERN_ERR, - "eCryptfs doesn't support cipher: %s", - mount_crypt_stat->global_default_cipher_name); + ecryptfs_printk( + KERN_ERR, + "eCryptfs doesn't support cipher: %s and key size %zu", + ecryptfs_get_full_cipher( + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_mode, + final, sizeof(final)), + mount_crypt_stat->global_default_cipher_key_size); rc = -EINVAL; goto out; } @@ -488,6 +527,7 @@ static struct file_system_type ecryptfs_fs_type; * @dev_name: The path to mount over * @raw_data: The options passed into the kernel */ + static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { @@ -557,6 +597,11 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags ecryptfs_set_superblock_lower(s, path.dentry->d_sb); + + if (get_events() && get_events()->is_hw_crypt_cb && + get_events()->is_hw_crypt_cb()) + drop_pagecache_sb(ecryptfs_superblock_to_lower(s), 0); + /** * Set the POSIX ACL flag based on whether they're enabled in the lower * mount. @@ -895,6 +940,7 @@ static void __exit ecryptfs_exit(void) do_sysfs_unregistration(); unregister_filesystem(&ecryptfs_fs_type); ecryptfs_free_kmem_caches(); + ecryptfs_free_events(); } MODULE_AUTHOR("Michael A. Halcrow <mhalcrow@us.ibm.com>"); diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index caba848ac763..bdbc72d52438 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -552,10 +552,16 @@ static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block) return rc; } +void ecryptfs_freepage(struct page *page) +{ + zero_user(page, 0, PAGE_CACHE_SIZE); +} + const struct address_space_operations ecryptfs_aops = { .writepage = ecryptfs_writepage, .readpage = ecryptfs_readpage, .write_begin = ecryptfs_write_begin, .write_end = ecryptfs_write_end, .bmap = ecryptfs_bmap, + .freepage = ecryptfs_freepage, }; diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index afa1b81c3418..25e436ddcf8e 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -69,6 +69,9 @@ static void ecryptfs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); struct ecryptfs_inode_info *inode_info; + if (inode == NULL) + return; + inode_info = ecryptfs_inode_to_private(inode); kmem_cache_free(ecryptfs_inode_info_cache, inode_info); @@ -88,9 +91,12 @@ static void ecryptfs_destroy_inode(struct inode *inode) struct ecryptfs_inode_info *inode_info; inode_info = ecryptfs_inode_to_private(inode); + BUG_ON(inode_info->lower_file); + ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); call_rcu(&inode->i_rcu, ecryptfs_i_callback); + } /** @@ -149,6 +155,9 @@ static int ecryptfs_show_options(struct seq_file *m, struct dentry *root) struct ecryptfs_mount_crypt_stat *mount_crypt_stat = &ecryptfs_superblock_to_private(sb)->mount_crypt_stat; struct ecryptfs_global_auth_tok *walker; + unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1]; + + memset(final, 0, sizeof(final)); mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex); list_for_each_entry(walker, @@ -162,7 +171,10 @@ static int ecryptfs_show_options(struct seq_file *m, struct dentry *root) mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); seq_printf(m, ",ecryptfs_cipher=%s", - mount_crypt_stat->global_default_cipher_name); + ecryptfs_get_full_cipher( + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_mode, + final, sizeof(final))); if (mount_crypt_stat->global_default_cipher_key_size) seq_printf(m, ",ecryptfs_key_bytes=%zd", diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 3c8293215603..ebaff5ab93da 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -118,10 +118,16 @@ config EXT4_ENCRYPTION decrypted pages in the page cache. config EXT4_FS_ENCRYPTION - bool - default y + bool "Ext4 FS Encryption" + default n depends on EXT4_ENCRYPTION +config EXT4_FS_ICE_ENCRYPTION + bool "Ext4 Encryption with ICE support" + default n + depends on EXT4_FS_ENCRYPTION + depends on PFK + config EXT4_DEBUG bool "EXT4 debugging support" depends on EXT4_FS diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index f52cf54f0cbc..1cabbd9a9229 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -14,3 +14,5 @@ ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o ext4-$(CONFIG_EXT4_FS_ENCRYPTION) += crypto_policy.o crypto.o \ crypto_key.o crypto_fname.o + +ext4-$(CONFIG_EXT4_FS_ICE_ENCRYPTION) += ext4_ice.o diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index f240cef8b326..f5099a3386ec 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -389,14 +389,12 @@ int ext4_decrypt(struct page *page) page->index, page, page, GFP_NOFS); } -int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) +int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk, + ext4_fsblk_t pblk, ext4_lblk_t len) { struct ext4_crypto_ctx *ctx; struct page *ciphertext_page = NULL; struct bio *bio; - ext4_lblk_t lblk = le32_to_cpu(ex->ee_block); - ext4_fsblk_t pblk = ext4_ext_pblock(ex); - unsigned int len = ext4_ext_get_actual_len(ex); int ret, err = 0; #if 0 @@ -459,7 +457,8 @@ errout: bool ext4_valid_contents_enc_mode(uint32_t mode) { - return (mode == EXT4_ENCRYPTION_MODE_AES_256_XTS); + return (mode == EXT4_ENCRYPTION_MODE_AES_256_XTS || + mode == EXT4_ENCRYPTION_MODE_PRIVATE); } /** diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c index 14ae7781f2a8..d3d6b28ce9b9 100644 --- a/fs/ext4/crypto_key.c +++ b/fs/ext4/crypto_key.c @@ -15,6 +15,7 @@ #include <uapi/linux/keyctl.h> #include "ext4.h" +#include "ext4_ice.h" #include "xattr.h" static void derive_crypt_complete(struct crypto_async_request *req, int rc) @@ -173,6 +174,8 @@ void ext4_free_crypt_info(struct ext4_crypt_info *ci) if (!ci) return; + if (ci->ci_keyring_key) + key_put(ci->ci_keyring_key); crypto_free_ablkcipher(ci->ci_ctfm); kmem_cache_free(ext4_crypt_info_cachep, ci); } @@ -194,7 +197,13 @@ void ext4_free_encryption_info(struct inode *inode, ext4_free_crypt_info(ci); } -int ext4_get_encryption_info(struct inode *inode) +static int ext4_default_data_encryption_mode(void) +{ + return ext4_is_ice_enabled() ? EXT4_ENCRYPTION_MODE_PRIVATE : + EXT4_ENCRYPTION_MODE_AES_256_XTS; +} + +int _ext4_get_encryption_info(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_crypt_info *crypt_info; @@ -207,24 +216,32 @@ int ext4_get_encryption_info(struct inode *inode) struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct crypto_ablkcipher *ctfm; const char *cipher_str; - char raw_key[EXT4_MAX_KEY_SIZE]; - char mode; + int for_fname = 0; + int mode; int res; - if (ei->i_crypt_info) - return 0; - res = ext4_init_crypto(); if (res) return res; +retry: + crypt_info = ACCESS_ONCE(ei->i_crypt_info); + if (crypt_info) { + if (!crypt_info->ci_keyring_key || + key_validate(crypt_info->ci_keyring_key) == 0) + return 0; + ext4_free_encryption_info(inode, crypt_info); + goto retry; + } + res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, sizeof(ctx)); if (res < 0) { if (!DUMMY_ENCRYPTION_ENABLED(sbi)) return res; - ctx.contents_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS; + ctx.contents_encryption_mode = + ext4_default_data_encryption_mode(); ctx.filenames_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_CTS; ctx.flags = 0; @@ -240,14 +257,15 @@ int ext4_get_encryption_info(struct inode *inode) crypt_info->ci_data_mode = ctx.contents_encryption_mode; crypt_info->ci_filename_mode = ctx.filenames_encryption_mode; crypt_info->ci_ctfm = NULL; + crypt_info->ci_keyring_key = NULL; memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, sizeof(crypt_info->ci_master_key)); - if (S_ISREG(inode->i_mode)) - mode = crypt_info->ci_data_mode; - else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - mode = crypt_info->ci_filename_mode; - else + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + for_fname = 1; + else if (!S_ISREG(inode->i_mode)) BUG(); + mode = for_fname ? crypt_info->ci_filename_mode : + crypt_info->ci_data_mode; switch (mode) { case EXT4_ENCRYPTION_MODE_AES_256_XTS: cipher_str = "xts(aes)"; @@ -255,6 +273,8 @@ int ext4_get_encryption_info(struct inode *inode) case EXT4_ENCRYPTION_MODE_AES_256_CTS: cipher_str = "cts(cbc(aes))"; break; + case EXT4_ENCRYPTION_MODE_PRIVATE: + cipher_str = "bugon"; case EXT4_ENCRYPTION_MODE_AES_256_HEH: cipher_str = "heh(aes)"; break; @@ -266,7 +286,7 @@ int ext4_get_encryption_info(struct inode *inode) goto out; } if (DUMMY_ENCRYPTION_ENABLED(sbi)) { - memset(raw_key, 0x42, EXT4_AES_256_XTS_KEY_SIZE); + memset(crypt_info->ci_raw_key, 0x42, EXT4_AES_256_XTS_KEY_SIZE); goto got_key; } memcpy(full_key_descriptor, EXT4_KEY_DESC_PREFIX, @@ -282,6 +302,7 @@ int ext4_get_encryption_info(struct inode *inode) keyring_key = NULL; goto out; } + crypt_info->ci_keyring_key = keyring_key; if (keyring_key->type != &key_type_logon) { printk_once(KERN_WARNING "ext4: key type must be logon\n"); @@ -312,36 +333,49 @@ int ext4_get_encryption_info(struct inode *inode) up_read(&keyring_key->sem); goto out; } - res = ext4_derive_key(&ctx, master_key->raw, raw_key); + res = ext4_derive_key(&ctx, master_key->raw, + crypt_info->ci_raw_key); up_read(&keyring_key->sem); if (res) goto out; got_key: - ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0); - if (!ctfm || IS_ERR(ctfm)) { - res = ctfm ? PTR_ERR(ctfm) : -ENOMEM; - printk(KERN_DEBUG - "%s: error %d (inode %u) allocating crypto tfm\n", - __func__, res, (unsigned) inode->i_ino); + if (for_fname || + (crypt_info->ci_data_mode != EXT4_ENCRYPTION_MODE_PRIVATE)) { + ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0); + if (!ctfm || IS_ERR(ctfm)) { + res = ctfm ? PTR_ERR(ctfm) : -ENOMEM; + pr_debug("%s: error %d (inode %u) allocating crypto tfm\n", + __func__, res, (unsigned) inode->i_ino); + goto out; + } + crypt_info->ci_ctfm = ctfm; + crypto_ablkcipher_clear_flags(ctfm, ~0); + crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm), + CRYPTO_TFM_REQ_WEAK_KEY); + res = crypto_ablkcipher_setkey(ctfm, crypt_info->ci_raw_key, + ext4_encryption_key_size(mode)); + if (res) + goto out; + memzero_explicit(crypt_info->ci_raw_key, + sizeof(crypt_info->ci_raw_key)); + } else if (!ext4_is_ice_enabled()) { + pr_warn("%s: ICE support not available\n", + __func__); + res = -EINVAL; goto out; } - crypt_info->ci_ctfm = ctfm; - crypto_ablkcipher_clear_flags(ctfm, ~0); - crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm), - CRYPTO_TFM_REQ_WEAK_KEY); - res = crypto_ablkcipher_setkey(ctfm, raw_key, - ext4_encryption_key_size(mode)); - if (res) - goto out; + if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) != NULL) { + ext4_free_crypt_info(crypt_info); + goto retry; + } + return 0; - if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) == NULL) - crypt_info = NULL; out: if (res == -ENOKEY) res = 0; - key_put(keyring_key); + memzero_explicit(crypt_info->ci_raw_key, + sizeof(crypt_info->ci_raw_key)); ext4_free_crypt_info(crypt_info); - memzero_explicit(raw_key, sizeof(raw_key)); return res; } diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 6d17f31a31d7..33f5e2a50cf8 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -163,8 +163,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) index, 1); file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; bh = ext4_bread(NULL, inode, map.m_lblk, 0); - if (IS_ERR(bh)) - return PTR_ERR(bh); + if (IS_ERR(bh)) { + err = PTR_ERR(bh); + bh = NULL; + goto errout; + } } if (!bh) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6edacb849e48..abc9e169cb44 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -589,6 +589,7 @@ enum { #define EXT4_ENCRYPTION_MODE_AES_256_GCM 2 #define EXT4_ENCRYPTION_MODE_AES_256_CBC 3 #define EXT4_ENCRYPTION_MODE_AES_256_CTS 4 +#define EXT4_ENCRYPTION_MODE_PRIVATE 127 #define EXT4_ENCRYPTION_MODE_AES_256_HEH 126 #include "ext4_crypto.h" @@ -2270,7 +2271,8 @@ struct page *ext4_encrypt(struct inode *inode, struct page *plaintext_page, gfp_t gfp_flags); int ext4_decrypt(struct page *page); -int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex); +int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk, + ext4_fsblk_t pblk, ext4_lblk_t len); extern const struct dentry_operations ext4_encrypted_d_ops; #ifdef CONFIG_EXT4_FS_ENCRYPTION @@ -2334,17 +2336,37 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) { } /* crypto_key.c */ void ext4_free_crypt_info(struct ext4_crypt_info *ci); void ext4_free_encryption_info(struct inode *inode, struct ext4_crypt_info *ci); +int _ext4_get_encryption_info(struct inode *inode); #ifdef CONFIG_EXT4_FS_ENCRYPTION int ext4_has_encryption_key(struct inode *inode); -int ext4_get_encryption_info(struct inode *inode); +static inline int ext4_get_encryption_info(struct inode *inode) +{ + struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info; + + if (!ci || + (ci->ci_keyring_key && + (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED) | + (1 << KEY_FLAG_DEAD))))) + return _ext4_get_encryption_info(inode); + return 0; +} static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode) { return EXT4_I(inode)->i_crypt_info; } +static inline int ext4_using_hardware_encryption(struct inode *inode) +{ + struct ext4_crypt_info *ci = ext4_encryption_info(inode); + + return S_ISREG(inode->i_mode) && ci && + ci->ci_data_mode == EXT4_ENCRYPTION_MODE_PRIVATE; +} + #else static inline int ext4_has_encryption_key(struct inode *inode) { @@ -2358,6 +2380,10 @@ static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode) { return NULL; } +static inline int ext4_using_hardware_encryption(struct inode *inode) +{ + return 0; +} #endif @@ -2514,6 +2540,8 @@ extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern void ext4_da_update_reserve_space(struct inode *inode, int used, int quota_claim); +extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, + ext4_fsblk_t pblk, ext4_lblk_t len); /* indirect.c */ extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, @@ -3016,8 +3044,7 @@ extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, struct page *page); extern int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, - struct dentry *dentry, - struct inode *inode); + struct inode *dir, struct inode *inode); extern int ext4_try_create_inline_dir(handle_t *handle, struct inode *parent, struct inode *inode); diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h index e52637d969db..e28cc5aab04a 100644 --- a/fs/ext4/ext4_crypto.h +++ b/fs/ext4/ext4_crypto.h @@ -12,6 +12,7 @@ #define _EXT4_CRYPTO_H #include <linux/fs.h> +#include <linux/pfk.h> #define EXT4_KEY_DESCRIPTOR_SIZE 8 @@ -63,6 +64,7 @@ struct ext4_encryption_context { #define EXT4_AES_256_CTS_KEY_SIZE 32 #define EXT4_AES_256_HEH_KEY_SIZE 32 #define EXT4_AES_256_XTS_KEY_SIZE 64 +#define EXT4_PRIVATE_KEY_SIZE 64 #define EXT4_MAX_KEY_SIZE 64 #define EXT4_KEY_DESC_PREFIX "ext4:" @@ -80,9 +82,13 @@ struct ext4_crypt_info { char ci_filename_mode; char ci_flags; struct crypto_ablkcipher *ci_ctfm; + struct key *ci_keyring_key; char ci_master_key[EXT4_KEY_DESCRIPTOR_SIZE]; + char ci_raw_key[EXT4_MAX_KEY_SIZE]; }; + + #define EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 #define EXT4_WRITE_PATH_FL 0x00000002 @@ -115,6 +121,7 @@ static inline int ext4_encryption_key_size(int mode) { switch (mode) { case EXT4_ENCRYPTION_MODE_AES_256_XTS: + case EXT4_ENCRYPTION_MODE_PRIVATE: return EXT4_AES_256_XTS_KEY_SIZE; case EXT4_ENCRYPTION_MODE_AES_256_GCM: return EXT4_AES_256_GCM_KEY_SIZE; diff --git a/fs/ext4/ext4_ice.c b/fs/ext4/ext4_ice.c new file mode 100644 index 000000000000..d85bcb8ea1ba --- /dev/null +++ b/fs/ext4/ext4_ice.c @@ -0,0 +1,109 @@ +/* Copyright (c) 2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "ext4_ice.h" +#include "ext4_crypto.h" + + +/* + * Retrieves encryption key from the inode + */ +char *ext4_get_ice_encryption_key(const struct inode *inode) +{ + struct ext4_crypt_info *ci = NULL; + + if (!inode) + return NULL; + + ci = ext4_encryption_info((struct inode *)inode); + if (!ci) + return NULL; + + return &(ci->ci_raw_key[0]); +} + +/* + * Retrieves encryption salt from the inode + */ +char *ext4_get_ice_encryption_salt(const struct inode *inode) +{ + struct ext4_crypt_info *ci = NULL; + + if (!inode) + return NULL; + + ci = ext4_encryption_info((struct inode *)inode); + if (!ci) + return NULL; + + return &(ci->ci_raw_key[ext4_get_ice_encryption_key_size(inode)]); +} + +/* + * returns true if the cipher mode in inode is AES XTS + */ +int ext4_is_aes_xts_cipher(const struct inode *inode) +{ + struct ext4_crypt_info *ci = NULL; + + ci = ext4_encryption_info((struct inode *)inode); + if (!ci) + return 0; + + return (ci->ci_data_mode == EXT4_ENCRYPTION_MODE_PRIVATE); +} + +/* + * returns true if encryption info in both inodes is equal + */ +int ext4_is_ice_encryption_info_equal(const struct inode *inode1, + const struct inode *inode2) +{ + char *key1 = NULL; + char *key2 = NULL; + char *salt1 = NULL; + char *salt2 = NULL; + + if (!inode1 || !inode2) + return 0; + + if (inode1 == inode2) + return 1; + + /* both do not belong to ice, so we don't care, they are equal for us */ + if (!ext4_should_be_processed_by_ice(inode1) && + !ext4_should_be_processed_by_ice(inode2)) + return 1; + + /* one belongs to ice, the other does not -> not equal */ + if (ext4_should_be_processed_by_ice(inode1) ^ + ext4_should_be_processed_by_ice(inode2)) + return 0; + + key1 = ext4_get_ice_encryption_key(inode1); + key2 = ext4_get_ice_encryption_key(inode2); + salt1 = ext4_get_ice_encryption_salt(inode1); + salt2 = ext4_get_ice_encryption_salt(inode2); + + /* key and salt should not be null by this point */ + if (!key1 || !key2 || !salt1 || !salt2 || + (ext4_get_ice_encryption_key_size(inode1) != + ext4_get_ice_encryption_key_size(inode2)) || + (ext4_get_ice_encryption_salt_size(inode1) != + ext4_get_ice_encryption_salt_size(inode2))) + return 0; + + return ((memcmp(key1, key2, + ext4_get_ice_encryption_key_size(inode1)) == 0) && + (memcmp(salt1, salt2, + ext4_get_ice_encryption_salt_size(inode1)) == 0)); +} diff --git a/fs/ext4/ext4_ice.h b/fs/ext4/ext4_ice.h new file mode 100644 index 000000000000..5257edabd6b2 --- /dev/null +++ b/fs/ext4/ext4_ice.h @@ -0,0 +1,104 @@ +/* Copyright (c) 2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _EXT4_ICE_H +#define _EXT4_ICE_H + +#include "ext4.h" +#include "ext4_crypto.h" + +#ifdef CONFIG_EXT4_FS_ICE_ENCRYPTION +static inline int ext4_should_be_processed_by_ice(const struct inode *inode) +{ + if (!ext4_encrypted_inode((struct inode *)inode)) + return 0; + + return ext4_using_hardware_encryption((struct inode *)inode); +} + +static inline int ext4_is_ice_enabled(void) +{ + return 1; +} + +int ext4_is_aes_xts_cipher(const struct inode *inode); + +char *ext4_get_ice_encryption_key(const struct inode *inode); +char *ext4_get_ice_encryption_salt(const struct inode *inode); + +int ext4_is_ice_encryption_info_equal(const struct inode *inode1, + const struct inode *inode2); + +static inline size_t ext4_get_ice_encryption_key_size( + const struct inode *inode) +{ + return EXT4_AES_256_XTS_KEY_SIZE / 2; +} + +static inline size_t ext4_get_ice_encryption_salt_size( + const struct inode *inode) +{ + return EXT4_AES_256_XTS_KEY_SIZE / 2; +} + +#else +static inline int ext4_should_be_processed_by_ice(const struct inode *inode) +{ + return 0; +} +static inline int ext4_is_ice_enabled(void) +{ + return 0; +} + +static inline char *ext4_get_ice_encryption_key(const struct inode *inode) +{ + return NULL; +} + +static inline char *ext4_get_ice_encryption_salt(const struct inode *inode) +{ + return NULL; +} + +static inline size_t ext4_get_ice_encryption_key_size( + const struct inode *inode) +{ + return 0; +} + +static inline size_t ext4_get_ice_encryption_salt_size( + const struct inode *inode) +{ + return 0; +} + +static inline int ext4_is_xts_cipher(const struct inode *inode) +{ + return 0; +} + +static inline int ext4_is_ice_encryption_info_equal( + const struct inode *inode1, + const struct inode *inode2) +{ + return 0; +} + +static inline int ext4_is_aes_xts_cipher(const struct inode *inode) +{ + return 0; +} + +#endif + +#endif /* _EXT4_ICE_H */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 403c4bae3e18..cfb978fd3ec4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3127,19 +3127,11 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) { ext4_fsblk_t ee_pblock; unsigned int ee_len; - int ret; ee_len = ext4_ext_get_actual_len(ex); ee_pblock = ext4_ext_pblock(ex); - - if (ext4_encrypted_inode(inode)) - return ext4_encrypted_zeroout(inode, ex); - - ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS); - if (ret > 0) - ret = 0; - - return ret; + return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock, + ee_len); } /* diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index bc7c082b7913..280d67fe33a7 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1018,12 +1018,11 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh, */ static int ext4_add_dirent_to_inline(handle_t *handle, struct ext4_filename *fname, - struct dentry *dentry, + struct inode *dir, struct inode *inode, struct ext4_iloc *iloc, void *inline_start, int inline_size) { - struct inode *dir = d_inode(dentry->d_parent); int err; struct ext4_dir_entry_2 *de; @@ -1267,12 +1266,11 @@ out: * the new created block. */ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, - struct dentry *dentry, struct inode *inode) + struct inode *dir, struct inode *inode) { int ret, inline_size; void *inline_start; struct ext4_iloc iloc; - struct inode *dir = d_inode(dentry->d_parent); ret = ext4_get_inode_loc(dir, &iloc); if (ret) @@ -1286,7 +1284,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, EXT4_INLINE_DOTDOT_SIZE; inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; - ret = ext4_add_dirent_to_inline(handle, fname, dentry, inode, &iloc, + ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc, inline_start, inline_size); if (ret != -ENOSPC) goto out; @@ -1307,7 +1305,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, if (inline_size) { inline_start = ext4_get_inline_xattr_pos(dir, &iloc); - ret = ext4_add_dirent_to_inline(handle, fname, dentry, + ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc, inline_start, inline_size); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c5b56aa719c8..78701445348f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -42,6 +42,7 @@ #include "xattr.h" #include "acl.h" #include "truncate.h" +#include "ext4_ice.h" #include <trace/events/ext4.h> #include <trace/events/android_fs.h> @@ -389,6 +390,21 @@ static int __check_block_validity(struct inode *inode, const char *func, return 0; } +int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk, + ext4_lblk_t len) +{ + int ret; + + if (ext4_encrypted_inode(inode)) + return ext4_encrypted_zeroout(inode, lblk, pblk, len); + + ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS); + if (ret > 0) + ret = 0; + + return ret; +} + #define check_block_validity(inode, map) \ __check_block_validity((inode), __func__, __LINE__, (map)) @@ -999,7 +1015,8 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, ll_rw_block(READ, 1, &bh); *wait_bh++ = bh; decrypt = ext4_encrypted_inode(inode) && - S_ISREG(inode->i_mode); + S_ISREG(inode->i_mode) && + !ext4_is_ice_enabled(); } } /* @@ -3306,7 +3323,9 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, get_block_func = ext4_get_block_write; dio_flags = DIO_LOCKING; } -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#if defined(CONFIG_EXT4_FS_ENCRYPTION) && \ +!defined(CONFIG_EXT4_FS_ICE_ENCRYPTION) + BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)); #endif if (IS_DAX(inode)) @@ -3373,7 +3392,9 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter, size_t count = iov_iter_count(iter); ssize_t ret; -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#if defined(CONFIG_EXT4_FS_ENCRYPTION) && \ +!defined(CONFIG_EXT4_FS_ICE_ENCRYPTION) + if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) return 0; #endif @@ -3573,7 +3594,8 @@ static int __ext4_block_zero_page_range(handle_t *handle, if (!buffer_uptodate(bh)) goto unlock; if (S_ISREG(inode->i_mode) && - ext4_encrypted_inode(inode)) { + ext4_encrypted_inode(inode) && + !ext4_using_hardware_encryption(inode)) { /* We expect the key to be set. */ BUG_ON(!ext4_has_encryption_key(inode)); BUG_ON(blocksize != PAGE_CACHE_SIZE); @@ -3746,6 +3768,7 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) { +#if 0 struct super_block *sb = inode->i_sb; ext4_lblk_t first_block, stop_block; struct address_space *mapping = inode->i_mapping; @@ -3876,6 +3899,12 @@ out_dio: out_mutex: mutex_unlock(&inode->i_mutex); return ret; +#else + /* + * Disabled as per b/28760453 + */ + return -EOPNOTSUPP; +#endif } int ext4_inode_attach_jinode(struct inode *inode) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 32960b3ecd4f..b9324d0ff218 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -273,7 +273,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, struct ext4_filename *fname, struct ext4_dir_entry_2 **res_dir); static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, - struct dentry *dentry, struct inode *inode); + struct inode *dir, struct inode *inode); /* checksumming functions */ void initialize_dirent_tail(struct ext4_dir_entry_tail *t, @@ -1949,10 +1949,9 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname, * directory, and adds the dentry to the indexed directory. */ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, - struct dentry *dentry, + struct inode *dir, struct inode *inode, struct buffer_head *bh) { - struct inode *dir = d_inode(dentry->d_parent); struct buffer_head *bh2; struct dx_root *root; struct dx_frame frames[2], *frame; @@ -2105,8 +2104,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, return retval; if (ext4_has_inline_data(dir)) { - retval = ext4_try_add_inline_entry(handle, &fname, - dentry, inode); + retval = ext4_try_add_inline_entry(handle, &fname, dir, inode); if (retval < 0) goto out; if (retval == 1) { @@ -2116,7 +2114,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, } if (is_dx(dir)) { - retval = ext4_dx_add_entry(handle, &fname, dentry, inode); + retval = ext4_dx_add_entry(handle, &fname, dir, inode); if (!retval || (retval != ERR_BAD_DX_DIR)) goto out; ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); @@ -2138,7 +2136,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, if (blocks == 1 && !dx_fallback && ext4_has_feature_dir_index(sb)) { - retval = make_indexed_dir(handle, &fname, dentry, + retval = make_indexed_dir(handle, &fname, dir, inode, bh); bh = NULL; /* make_indexed_dir releases bh */ goto out; @@ -2173,12 +2171,11 @@ out: * Returns 0 for success, or a negative error value */ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, - struct dentry *dentry, struct inode *inode) + struct inode *dir, struct inode *inode) { struct dx_frame frames[2], *frame; struct dx_entry *entries, *at; struct buffer_head *bh; - struct inode *dir = d_inode(dentry->d_parent); struct super_block *sb = dir->i_sb; struct ext4_dir_entry_2 *de; int err; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 6ca56f5f72b5..978141e8b800 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -28,6 +28,7 @@ #include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" +#include "ext4_ice.h" static struct kmem_cache *io_end_cachep; @@ -489,7 +490,11 @@ int ext4_bio_write_page(struct ext4_io_submit *io, gfp_t gfp_flags = GFP_NOFS; retry_encrypt: - data_page = ext4_encrypt(inode, page, gfp_flags); + + if (!ext4_using_hardware_encryption(inode)) + data_page = ext4_encrypt(inode, page, gfp_flags); + + if (IS_ERR(data_page)) { ret = PTR_ERR(data_page); if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) { diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 783e33d839cf..99f1bd8c7f05 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -45,6 +45,7 @@ #include <linux/cleancache.h> #include "ext4.h" +#include "ext4_ice.h" #include <trace/events/android_fs.h> /* @@ -63,12 +64,17 @@ static void completion_pages(struct work_struct *work) bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; - int ret = ext4_decrypt(page); - if (ret) { - WARN_ON_ONCE(1); - SetPageError(page); - } else + if (ext4_is_ice_enabled()) { SetPageUptodate(page); + } else { + int ret = ext4_decrypt(page); + + if (ret) { + WARN_ON_ONCE(1); + SetPageError(page); + } else + SetPageUptodate(page); + } unlock_page(page); } ext4_release_crypto_ctx(ctx); diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 8226557130a2..6abd78629140 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -92,7 +92,8 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent, err_brelse: brelse(bhs[0]); err: - fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr); + fat_msg_ratelimit(sb, KERN_ERR, + "FAT read failed (blocknr %llu)", (llu)blocknr); return -EIO; } @@ -105,8 +106,8 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent, fatent->fat_inode = MSDOS_SB(sb)->fat_inode; fatent->bhs[0] = sb_bread(sb, blocknr); if (!fatent->bhs[0]) { - fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", - (llu)blocknr); + fat_msg_ratelimit(sb, KERN_ERR, + "FAT read failed (blocknr %llu)", (llu)blocknr); return -EIO; } fatent->nr_bhs = 1; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index cf644d52c0cf..a6c21fba6e9f 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -760,8 +760,9 @@ retry: fat_get_blknr_offset(sbi, i_pos, &blocknr, &offset); bh = sb_bread(sb, blocknr); if (!bh) { - fat_msg(sb, KERN_ERR, "unable to read inode block " - "for updating (i_pos %lld)", i_pos); + fat_msg_ratelimit(sb, KERN_ERR, + "unable to read inode block for updating (i_pos %lld)", + i_pos); return -EIO; } spin_lock(&sbi->inode_hash_lock); diff --git a/fs/file_table.c b/fs/file_table.c index ad17e05ebf95..b4baa0de4988 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -41,6 +41,141 @@ static struct kmem_cache *filp_cachep __read_mostly; static struct percpu_counter nr_files __cacheline_aligned_in_smp; +#ifdef CONFIG_FILE_TABLE_DEBUG +#include <linux/hashtable.h> +#include <mount.h> +static DEFINE_MUTEX(global_files_lock); +static DEFINE_HASHTABLE(global_files_hashtable, 10); + +struct global_filetable_lookup_key { + struct work_struct work; + uintptr_t value; +}; + +void global_filetable_print_warning_once(void) +{ + pr_err_once("\n**********************************************************\n"); + pr_err_once("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_err_once("** **\n"); + pr_err_once("** VFS FILE TABLE DEBUG is enabled . **\n"); + pr_err_once("** Allocating extra memory and slowing access to files **\n"); + pr_err_once("** **\n"); + pr_err_once("** This means that this is a DEBUG kernel and it is **\n"); + pr_err_once("** unsafe for production use. **\n"); + pr_err_once("** **\n"); + pr_err_once("** If you see this message and you are not debugging **\n"); + pr_err_once("** the kernel, report this immediately to your vendor! **\n"); + pr_err_once("** **\n"); + pr_err_once("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_err_once("**********************************************************\n"); +} + +void global_filetable_add(struct file *filp) +{ + struct mount *mnt; + + if (filp->f_path.dentry->d_iname == NULL || + strlen(filp->f_path.dentry->d_iname) == 0) + return; + + mnt = real_mount(filp->f_path.mnt); + + mutex_lock(&global_files_lock); + hash_add(global_files_hashtable, &filp->f_hash, (uintptr_t)mnt); + mutex_unlock(&global_files_lock); +} + +void global_filetable_del(struct file *filp) +{ + mutex_lock(&global_files_lock); + hash_del(&filp->f_hash); + mutex_unlock(&global_files_lock); +} + +static void global_print_file(struct file *filp, char *path_buffer, int *count) +{ + char *pathname; + + pathname = d_path(&filp->f_path, path_buffer, PAGE_SIZE); + if (IS_ERR(pathname)) + pr_err("VFS: File %d Address : %pa partial filename: %s ref_count=%ld\n", + ++(*count), &filp, filp->f_path.dentry->d_iname, + atomic_long_read(&filp->f_count)); + else + pr_err("VFS: File %d Address : %pa full filepath: %s ref_count=%ld\n", + ++(*count), &filp, pathname, + atomic_long_read(&filp->f_count)); +} + +static void global_filetable_print(uintptr_t lookup_mnt) +{ + struct hlist_node *tmp; + struct file *filp; + struct mount *mnt; + int index; + int count = 0; + char *path_buffer = (char *)__get_free_page(GFP_TEMPORARY); + + mutex_lock(&global_files_lock); + pr_err("\n**********************************************************\n"); + pr_err("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + + pr_err("\n"); + pr_err("VFS: The following files hold a reference to the mount\n"); + pr_err("\n"); + hash_for_each_possible_safe(global_files_hashtable, filp, tmp, f_hash, + lookup_mnt) { + mnt = real_mount(filp->f_path.mnt); + if ((uintptr_t)mnt == lookup_mnt) + global_print_file(filp, path_buffer, &count); + } + pr_err("\n"); + pr_err("VFS: Found total of %d open files\n", count); + pr_err("\n"); + + count = 0; + pr_err("\n"); + pr_err("VFS: The following files need to cleaned up\n"); + pr_err("\n"); + hash_for_each_safe(global_files_hashtable, index, tmp, filp, f_hash) { + if (atomic_long_read(&filp->f_count) == 0) + global_print_file(filp, path_buffer, &count); + } + + pr_err("\n"); + pr_err("VFS: Found total of %d files awaiting clean-up\n", count); + pr_err("\n"); + pr_err("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_err("\n**********************************************************\n"); + + mutex_unlock(&global_files_lock); + free_page((unsigned long)path_buffer); +} + +static void global_filetable_print_work_fn(struct work_struct *work) +{ + struct global_filetable_lookup_key *key; + uintptr_t lookup_mnt; + + key = container_of(work, struct global_filetable_lookup_key, work); + lookup_mnt = key->value; + kfree(key); + global_filetable_print(lookup_mnt); +} + +void global_filetable_delayed_print(struct mount *mnt) +{ + struct global_filetable_lookup_key *key; + + key = kzalloc(sizeof(*key), GFP_KERNEL); + if (key == NULL) + return; + key->value = (uintptr_t)mnt; + INIT_WORK(&key->work, global_filetable_print_work_fn); + schedule_work(&key->work); +} +#endif /* CONFIG_FILE_TABLE_DEBUG */ + static void file_free_rcu(struct rcu_head *head) { struct file *f = container_of(head, struct file, f_u.fu_rcuhead); @@ -219,6 +354,7 @@ static void __fput(struct file *file) put_write_access(inode); __mnt_drop_write(mnt); } + global_filetable_del(file); file->f_path.dentry = NULL; file->f_path.mnt = NULL; file->f_inode = NULL; @@ -314,6 +450,7 @@ void __init files_init(void) filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); percpu_counter_init(&nr_files, 0, GFP_KERNEL); + global_filetable_print_warning_once(); } /* diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index e95eeb445e58..3805040bee46 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -5,4 +5,4 @@ obj-$(CONFIG_FUSE_FS) += fuse.o obj-$(CONFIG_CUSE) += cuse.o -fuse-objs := dev.o dir.o file.o inode.o control.o +fuse-objs := dev.o dir.o file.o inode.o control.o passthrough.o diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index fbfec06b054d..ca7d46de5ca3 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -7,6 +7,7 @@ */ #include "fuse_i.h" +#include "fuse_passthrough.h" #include <linux/init.h> #include <linux/module.h> @@ -574,9 +575,14 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) args->out.numargs * sizeof(struct fuse_arg)); fuse_request_send(fc, req); ret = req->out.h.error; - if (!ret && args->out.argvar) { - BUG_ON(args->out.numargs != 1); - ret = req->out.args[0].size; + if (!ret) { + if (args->out.argvar) { + BUG_ON(args->out.numargs != 1); + ret = req->out.args[0].size; + } + + if (req->passthrough_filp != NULL) + args->out.passthrough_filp = req->passthrough_filp; } fuse_put_request(fc, req); @@ -1946,6 +1952,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, } fuse_copy_finish(cs); + fuse_setup_passthrough(fc, req); spin_lock(&fpq->lock); clear_bit(FR_LOCKED, &req->flags); if (!fpq->connected) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index bfa274c06666..0dede8a66816 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -473,6 +473,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, args.out.args[0].value = &outentry; args.out.args[1].size = sizeof(outopen); args.out.args[1].value = &outopen; + args.out.passthrough_filp = NULL; err = fuse_simple_request(fc, &args); if (err) goto out_free_ff; @@ -484,6 +485,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, ff->fh = outopen.fh; ff->nodeid = outentry.nodeid; ff->open_flags = outopen.open_flags; + if (args.out.passthrough_filp != NULL) + ff->passthrough_filp = args.out.passthrough_filp; inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, &outentry.attr, entry_attr_timeout(&outentry), 0); if (!inode) { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 1a063cbfe503..f0de8fe294f4 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -7,6 +7,7 @@ */ #include "fuse_i.h" +#include "fuse_passthrough.h" #include <linux/pagemap.h> #include <linux/slab.h> @@ -21,8 +22,10 @@ static const struct file_operations fuse_direct_io_file_operations; static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, - int opcode, struct fuse_open_out *outargp) + int opcode, struct fuse_open_out *outargp, + struct file **passthrough_filpp) { + int ret_val; struct fuse_open_in inarg; FUSE_ARGS(args); @@ -38,8 +41,14 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, args.out.numargs = 1; args.out.args[0].size = sizeof(*outargp); args.out.args[0].value = outargp; + args.out.passthrough_filp = NULL; - return fuse_simple_request(fc, &args); + ret_val = fuse_simple_request(fc, &args); + + if (args.out.passthrough_filp != NULL) + *passthrough_filpp = args.out.passthrough_filp; + + return ret_val; } struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) @@ -50,6 +59,10 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) if (unlikely(!ff)) return NULL; + ff->passthrough_filp = NULL; + ff->passthrough_enabled = 0; + if (fc->passthrough) + ff->passthrough_enabled = 1; ff->fc = fc; ff->reserved_req = fuse_request_alloc(0); if (unlikely(!ff->reserved_req)) { @@ -118,6 +131,7 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, bool isdir) { struct fuse_file *ff; + struct file *passthrough_filp = NULL; int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; ff = fuse_file_alloc(fc); @@ -130,10 +144,12 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, struct fuse_open_out outarg; int err; - err = fuse_send_open(fc, nodeid, file, opcode, &outarg); + err = fuse_send_open(fc, nodeid, file, opcode, &outarg, + &(passthrough_filp)); if (!err) { ff->fh = outarg.fh; ff->open_flags = outarg.open_flags; + ff->passthrough_filp = passthrough_filp; } else if (err != -ENOSYS || isdir) { fuse_file_free(ff); @@ -253,6 +269,8 @@ void fuse_release_common(struct file *file, int opcode) if (unlikely(!ff)) return; + fuse_passthrough_release(ff); + req = ff->reserved_req; fuse_prepare_release(ff, file->f_flags, opcode); @@ -883,6 +901,43 @@ static int fuse_readpages_fill(void *_data, struct page *page) return -EIO; } +#ifdef CONFIG_CMA + if (is_cma_pageblock(page)) { + struct page *oldpage = page, *newpage; + int err; + + /* make sure that old page is not free in-between the calls */ + page_cache_get(oldpage); + + newpage = alloc_page(GFP_HIGHUSER); + if (!newpage) { + page_cache_release(oldpage); + return -ENOMEM; + } + + err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL); + if (err) { + __free_page(newpage); + page_cache_release(oldpage); + return err; + } + + /* + * Decrement the count on new page to make page cache the only + * owner of it + */ + lock_page(newpage); + put_page(newpage); + + lru_cache_add_file(newpage); + + /* finally release the old page and swap pointers */ + unlock_page(oldpage); + page_cache_release(oldpage); + page = newpage; + } +#endif + page_cache_get(page); req->pages[req->num_pages] = page; req->page_descs[req->num_pages].length = PAGE_SIZE; @@ -928,8 +983,10 @@ out: static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { + ssize_t ret_val; struct inode *inode = iocb->ki_filp->f_mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_file *ff = iocb->ki_filp->private_data; /* * In auto invalidate mode, always update attributes on read. @@ -944,7 +1001,12 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) return err; } - return generic_file_read_iter(iocb, to); + if (ff && ff->passthrough_enabled && ff->passthrough_filp) + ret_val = fuse_passthrough_read_iter(iocb, to); + else + ret_val = generic_file_read_iter(iocb, to); + + return ret_val; } static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, @@ -1176,6 +1238,7 @@ static ssize_t fuse_perform_write(struct file *file, static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; + struct fuse_file *ff = file->private_data; struct address_space *mapping = file->f_mapping; ssize_t written = 0; ssize_t written_buffered = 0; @@ -1209,8 +1272,14 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (err) goto out; + if (ff && ff->passthrough_enabled && ff->passthrough_filp) { + written = fuse_passthrough_write_iter(iocb, from); + goto out; + } + if (iocb->ki_flags & IOCB_DIRECT) { loff_t pos = iocb->ki_pos; + written = generic_file_direct_write(iocb, from, pos); if (written < 0 || !iov_iter_count(from)) goto out; @@ -2081,6 +2150,9 @@ static const struct vm_operations_struct fuse_file_vm_ops = { static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) { + struct fuse_file *ff = file->private_data; + + ff->passthrough_enabled = 0; if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) fuse_link_write_file(file); @@ -2091,6 +2163,9 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma) { + struct fuse_file *ff = file->private_data; + + ff->passthrough_enabled = 0; /* Can't provide the coherency needed for MAP_SHARED */ if (vma->vm_flags & VM_MAYSHARE) return -ENODEV; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 644687ae04bd..1cc0dce47a2f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -158,6 +158,10 @@ struct fuse_file { /** Has flock been performed on this file? */ bool flock:1; + + /* the read write file */ + struct file *passthrough_filp; + bool passthrough_enabled; }; /** One input argument of a request */ @@ -237,6 +241,7 @@ struct fuse_args { unsigned argvar:1; unsigned numargs; struct fuse_arg args[2]; + struct file *passthrough_filp; } out; }; @@ -386,6 +391,9 @@ struct fuse_req { /** Request is stolen from fuse_file->reserved_req */ struct file *stolen_file; + + /** fuse passthrough file */ + struct file *passthrough_filp; }; struct fuse_iqueue { @@ -543,6 +551,9 @@ struct fuse_conn { /** write-back cache policy (default is write-through) */ unsigned writeback_cache:1; + /** passthrough IO. */ + unsigned passthrough:1; + /* * The following bitfields are only for optimization purposes * and hence races in setting them will not cause malfunction diff --git a/fs/fuse/fuse_passthrough.h b/fs/fuse/fuse_passthrough.h new file mode 100644 index 000000000000..62f12c12ffec --- /dev/null +++ b/fs/fuse/fuse_passthrough.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _FS_FUSE_PASSTHROUGH_H +#define _FS_FUSE_PASSTHROUGH_H + +#include "fuse_i.h" + +#include <linux/fuse.h> +#include <linux/file.h> + +void fuse_setup_passthrough(struct fuse_conn *fc, struct fuse_req *req); + +ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *to); + +ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *from); + +void fuse_passthrough_release(struct fuse_file *ff); + +#endif /* _FS_FUSE_PASSTHROUGH_H */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 0d5e8e59b390..43bb5eb17ad2 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -860,6 +860,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->conn_error = 1; else { unsigned long ra_pages; + struct super_block *sb = fc->sb; process_init_limits(fc, arg); @@ -898,6 +899,13 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->async_dio = 1; if (arg->flags & FUSE_WRITEBACK_CACHE) fc->writeback_cache = 1; + if (arg->flags & FUSE_PASSTHROUGH) { + fc->passthrough = 1; + /* Prevent further stacking */ + sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; + pr_info("FUSE: Pass through is enabled [%s : %d]!\n", + current->comm, current->pid); + } if (arg->time_gran && arg->time_gran <= 1000000000) fc->sb->s_time_gran = arg->time_gran; } else { diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c new file mode 100644 index 000000000000..785af63acabd --- /dev/null +++ b/fs/fuse/passthrough.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "fuse_passthrough.h" + +#include <linux/aio.h> +#include <linux/fs_stack.h> + +void fuse_setup_passthrough(struct fuse_conn *fc, struct fuse_req *req) +{ + int daemon_fd, fs_stack_depth; + unsigned open_out_index; + struct file *passthrough_filp; + struct inode *passthrough_inode; + struct super_block *passthrough_sb; + struct fuse_open_out *open_out; + + req->passthrough_filp = NULL; + + if (!(fc->passthrough)) + return; + + if ((req->in.h.opcode != FUSE_OPEN) && + (req->in.h.opcode != FUSE_CREATE)) + return; + + open_out_index = req->in.numargs - 1; + + BUG_ON(open_out_index != 0 && open_out_index != 1); + BUG_ON(req->out.args[open_out_index].size != sizeof(*open_out)); + + open_out = req->out.args[open_out_index].value; + + daemon_fd = (int)open_out->passthrough_fd; + if (daemon_fd < 0) + return; + + passthrough_filp = fget_raw(daemon_fd); + if (!passthrough_filp) + return; + + passthrough_inode = file_inode(passthrough_filp); + passthrough_sb = passthrough_inode->i_sb; + fs_stack_depth = passthrough_sb->s_stack_depth + 1; + + /* If we reached the stacking limit go through regular io */ + if (fs_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { + /* Release the passthrough file. */ + fput(passthrough_filp); + pr_err("FUSE: maximum fs stacking depth exceeded, cannot use passthrough for this file\n"); + return; + } + req->passthrough_filp = passthrough_filp; +} + +static ssize_t fuse_passthrough_read_write_iter(struct kiocb *iocb, + struct iov_iter *iter, int do_write) +{ + ssize_t ret_val; + struct fuse_file *ff; + struct file *fuse_file, *passthrough_filp; + struct inode *fuse_inode, *passthrough_inode; + struct fuse_conn *fc; + + ff = iocb->ki_filp->private_data; + fuse_file = iocb->ki_filp; + passthrough_filp = ff->passthrough_filp; + fc = ff->fc; + + /* lock passthrough file to prevent it from being released */ + get_file(passthrough_filp); + iocb->ki_filp = passthrough_filp; + fuse_inode = fuse_file->f_path.dentry->d_inode; + passthrough_inode = file_inode(passthrough_filp); + + if (do_write) { + if (!passthrough_filp->f_op->write_iter) + return -EIO; + ret_val = passthrough_filp->f_op->write_iter(iocb, iter); + + if (ret_val >= 0 || ret_val == -EIOCBQUEUED) { + spin_lock(&fc->lock); + fsstack_copy_inode_size(fuse_inode, passthrough_inode); + spin_unlock(&fc->lock); + fsstack_copy_attr_times(fuse_inode, passthrough_inode); + } + } else { + if (!passthrough_filp->f_op->read_iter) + return -EIO; + ret_val = passthrough_filp->f_op->read_iter(iocb, iter); + if (ret_val >= 0 || ret_val == -EIOCBQUEUED) + fsstack_copy_attr_atime(fuse_inode, passthrough_inode); + } + + iocb->ki_filp = fuse_file; + + /* unlock passthrough file */ + fput(passthrough_filp); + + return ret_val; +} + +ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + return fuse_passthrough_read_write_iter(iocb, to, 0); +} + +ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + return fuse_passthrough_read_write_iter(iocb, from, 1); +} + +void fuse_passthrough_release(struct fuse_file *ff) +{ + if (!(ff->passthrough_filp)) + return; + + /* Release the passthrough file. */ + fput(ff->passthrough_filp); + ff->passthrough_filp = NULL; +} diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index ff0ac96a8e7b..db4c867369b5 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -78,8 +78,11 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) if (type == ACL_TYPE_ACCESS) { umode_t mode = inode->i_mode; - + struct posix_acl *old_acl = acl; error = posix_acl_update_mode(inode, &inode->i_mode, &acl); + + if (!acl) + posix_acl_release(old_acl); if (error) return error; if (mode != inode->i_mode) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index baab99b69d8a..de3e91817228 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1222,7 +1222,7 @@ static int set_gfs2_super(struct super_block *s, void *data) * We set the bdi here to the queue backing, file systems can * overwrite this in ->fill_super() */ - s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; + s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info; return 0; } diff --git a/fs/inode.c b/fs/inode.c index 5f6faa64ee9d..bd16497b3bba 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -154,6 +154,12 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_rdev = 0; inode->dirtied_when = 0; +#ifdef CONFIG_CGROUP_WRITEBACK + inode->i_wb_frn_winner = 0; + inode->i_wb_frn_avg_time = 0; + inode->i_wb_frn_history = 0; +#endif + if (security_inode_alloc(inode)) goto out; spin_lock_init(&inode->i_lock); diff --git a/fs/internal.h b/fs/internal.h index 6387b35a1c0d..1b93a3929b16 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -153,3 +153,29 @@ extern void mnt_pin_kill(struct mount *m); * fs/nsfs.c */ extern struct dentry_operations ns_dentry_operations; + +#ifdef CONFIG_FILE_TABLE_DEBUG +void global_filetable_print_warning_once(void); +void global_filetable_add(struct file *filp); +void global_filetable_del(struct file *filp); +void global_filetable_delayed_print(struct mount *mnt); + +#else /* i.e NOT CONFIG_FILE_TABLE_DEBUG */ + +static inline void global_filetable_print_warning_once(void) +{ +} + +static inline void global_filetable_add(struct file *filp) +{ +} + +static inline void global_filetable_del(struct file *filp) +{ +} + +static inline void global_filetable_delayed_print(struct mount *mnt) +{ +} + +#endif /* CONFIG_FILE_TABLE_DEBUG */ diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 624a57a9c4aa..c6a499b7547e 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -275,11 +275,11 @@ loop: goto loop; end_loop: - write_unlock(&journal->j_state_lock); del_timer_sync(&journal->j_commit_timer); journal->j_task = NULL; wake_up(&journal->j_wait_done_commit); jbd_debug(1, "Journal thread exiting.\n"); + write_unlock(&journal->j_state_lock); return 0; } diff --git a/fs/mbcache.c b/fs/mbcache.c index 187477ded6b3..de509271d031 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -222,8 +222,19 @@ __mb_cache_entry_release(struct mb_cache_entry *ce) * then reacquire the lock in the proper order. */ spin_lock(&mb_cache_spinlock); - if (list_empty(&ce->e_lru_list)) - list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); + /* + * Evaluate the conditions under global lock mb_cache_spinlock, + * to check if mb_cache_entry_get() is running now + * and has already deleted the entry from mb_cache_lru_list + * and incremented ce->e_refcnt to prevent further additions + * to mb_cache_lru_list. + */ + if (!(ce->e_used || ce->e_queued || + atomic_read(&ce->e_refcnt))) { + if (list_empty(&ce->e_lru_list)) + list_add_tail(&ce->e_lru_list, + &mb_cache_lru_list); + } spin_unlock(&mb_cache_spinlock); } __spin_unlock_mb_cache_entry(ce); @@ -262,7 +273,6 @@ mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) list_del_init(&ce->e_lru_list); if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)) continue; - spin_unlock(&mb_cache_spinlock); /* Prevent any find or get operation on the entry */ hlist_bl_lock(ce->e_block_hash_p); hlist_bl_lock(ce->e_index_hash_p); @@ -271,10 +281,10 @@ mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) !list_empty(&ce->e_lru_list)) { hlist_bl_unlock(ce->e_index_hash_p); hlist_bl_unlock(ce->e_block_hash_p); - spin_lock(&mb_cache_spinlock); continue; } __mb_cache_entry_unhash_unlock(ce); + spin_unlock(&mb_cache_spinlock); list_add_tail(&ce->e_lru_list, &free_list); spin_lock(&mb_cache_spinlock); } @@ -516,7 +526,6 @@ mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)) continue; - spin_unlock(&mb_cache_spinlock); /* * Prevent any find or get operation on the * entry. @@ -530,13 +539,13 @@ mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) hlist_bl_unlock(ce->e_index_hash_p); hlist_bl_unlock(ce->e_block_hash_p); l = &mb_cache_lru_list; - spin_lock(&mb_cache_spinlock); continue; } mb_assert(list_empty(&ce->e_lru_list)); mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))); __mb_cache_entry_unhash_unlock(ce); + spin_unlock(&mb_cache_spinlock); goto found; } } @@ -670,6 +679,7 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, cache->c_bucket_bits); block_hash_p = &cache->c_block_hash[bucket]; /* First serialize access to the block corresponding hash chain. */ + spin_lock(&mb_cache_spinlock); hlist_bl_lock(block_hash_p); hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) { mb_assert(ce->e_block_hash_p == block_hash_p); @@ -678,9 +688,11 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, * Prevent a free from removing the entry. */ atomic_inc(&ce->e_refcnt); + if (!list_empty(&ce->e_lru_list)) + list_del_init(&ce->e_lru_list); hlist_bl_unlock(block_hash_p); + spin_unlock(&mb_cache_spinlock); __spin_lock_mb_cache_entry(ce); - atomic_dec(&ce->e_refcnt); if (ce->e_used > 0) { DEFINE_WAIT(wait); while (ce->e_used > 0) { @@ -695,13 +707,9 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, finish_wait(&mb_cache_queue, &wait); } ce->e_used += 1 + MB_CACHE_WRITER; + atomic_dec(&ce->e_refcnt); __spin_unlock_mb_cache_entry(ce); - if (!list_empty(&ce->e_lru_list)) { - spin_lock(&mb_cache_spinlock); - list_del_init(&ce->e_lru_list); - spin_unlock(&mb_cache_spinlock); - } if (!__mb_cache_entry_is_block_hashed(ce)) { __mb_cache_entry_release(ce); return NULL; @@ -710,6 +718,7 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, } } hlist_bl_unlock(block_hash_p); + spin_unlock(&mb_cache_spinlock); return NULL; } diff --git a/fs/namei.c b/fs/namei.c index c54aaa759ed1..1f2e81c76021 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2690,8 +2690,14 @@ int vfs_create2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, if (error) return error; error = dir->i_op->create(dir, dentry, mode, want_excl); + if (error) + return error; + error = security_inode_post_create(dir, dentry, mode); + if (error) + return error; if (!error) fsnotify_create(dir, dentry); + return error; } EXPORT_SYMBOL(vfs_create2); @@ -3376,6 +3382,8 @@ out2: error = -ESTALE; } file = ERR_PTR(error); + } else { + global_filetable_add(file); } return file; } @@ -3543,8 +3551,16 @@ int vfs_mknod2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, u return error; error = dir->i_op->mknod(dir, dentry, mode, dev); + if (error) + return error; + + error = security_inode_post_create(dir, dentry, mode); + if (error) + return error; + if (!error) fsnotify_create(dir, dentry); + return error; } EXPORT_SYMBOL(vfs_mknod2); diff --git a/fs/namespace.c b/fs/namespace.c index 7e14bf1c851c..adbe44dda88f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1594,6 +1594,8 @@ static int do_umount(struct mount *mnt, int flags) } unlock_mount_hash(); namespace_unlock(); + if (retval == -EBUSY) + global_filetable_delayed_print(mnt); return retval; } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 354013ea22ec..67c6c650b21e 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1079,7 +1079,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_time_gran = 1; sb->s_max_links = NILFS_LINK_MAX; - sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info; + sb->s_bdi = bdev_get_queue(sb->s_bdev)->backing_dev_info; err = load_nilfs(nilfs, sb); if (err) diff --git a/fs/proc/array.c b/fs/proc/array.c index b6c00ce0e29e..d5c6f5b38617 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -171,15 +171,15 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, seq_printf(m, "State:\t%s\n" "Tgid:\t%d\n" - "Ngid:\t%d\n" "Pid:\t%d\n" "PPid:\t%d\n" "TracerPid:\t%d\n" "Uid:\t%d\t%d\t%d\t%d\n" "Gid:\t%d\t%d\t%d\t%d\n" + "Ngid:\t%d\n" "FDSize:\t%d\nGroups:\t", get_task_state(p), - tgid, ngid, pid_nr_ns(pid, ns), ppid, tpid, + tgid, pid_nr_ns(pid, ns), ppid, tpid, from_kuid_munged(user_ns, cred->uid), from_kuid_munged(user_ns, cred->euid), from_kuid_munged(user_ns, cred->suid), @@ -188,7 +188,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, from_kgid_munged(user_ns, cred->egid), from_kgid_munged(user_ns, cred->sgid), from_kgid_munged(user_ns, cred->fsgid), - max_fds); + ngid, max_fds); group_info = cred->group_info; for (g = 0; g < group_info->ngroups; g++) diff --git a/fs/proc/base.c b/fs/proc/base.c index deafb880368b..cdd820771425 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1018,15 +1018,20 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, int oom_adj = OOM_ADJUST_MIN; size_t len; unsigned long flags; + int mult = 1; if (!task) return -ESRCH; if (lock_task_sighand(task, &flags)) { - if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) + if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) { oom_adj = OOM_ADJUST_MAX; - else - oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / - OOM_SCORE_ADJ_MAX; + } else { + if (task->signal->oom_score_adj < 0) + mult = -1; + oom_adj = roundup(mult * task->signal->oom_score_adj * + -OOM_DISABLE, OOM_SCORE_ADJ_MAX) / + OOM_SCORE_ADJ_MAX * mult; + } unlock_task_sighand(task, &flags); } put_task_struct(task); @@ -1410,6 +1415,204 @@ static const struct file_operations proc_pid_sched_operations = { #endif +/* + * Print out various scheduling related per-task fields: + */ + +#ifdef CONFIG_SMP + +static int sched_wake_up_idle_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + seq_printf(m, "%d\n", sched_get_wake_up_idle(p)); + + put_task_struct(p); + + return 0; +} + +static ssize_t +sched_wake_up_idle_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *p; + char buffer[PROC_NUMBUF]; + int wake_up_idle, err; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out; + } + + err = kstrtoint(strstrip(buffer), 0, &wake_up_idle); + if (err) + goto out; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + err = sched_set_wake_up_idle(p, wake_up_idle); + + put_task_struct(p); + +out: + return err < 0 ? err : count; +} + +static int sched_wake_up_idle_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, sched_wake_up_idle_show, inode); +} + +static const struct file_operations proc_pid_sched_wake_up_idle_operations = { + .open = sched_wake_up_idle_open, + .read = seq_read, + .write = sched_wake_up_idle_write, + .llseek = seq_lseek, + .release = single_release, +}; + +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_SCHED_HMP + +static int sched_init_task_load_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + seq_printf(m, "%d\n", sched_get_init_task_load(p)); + + put_task_struct(p); + + return 0; +} + +static ssize_t +sched_init_task_load_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *p; + char buffer[PROC_NUMBUF]; + int init_task_load, err; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out; + } + + err = kstrtoint(strstrip(buffer), 0, &init_task_load); + if (err) + goto out; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + err = sched_set_init_task_load(p, init_task_load); + + put_task_struct(p); + +out: + return err < 0 ? err : count; +} + +static int sched_init_task_load_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, sched_init_task_load_show, inode); +} + +static const struct file_operations proc_pid_sched_init_task_load_operations = { + .open = sched_init_task_load_open, + .read = seq_read, + .write = sched_init_task_load_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int sched_group_id_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + seq_printf(m, "%d\n", sched_get_group_id(p)); + + put_task_struct(p); + + return 0; +} + +static ssize_t +sched_group_id_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *p; + char buffer[PROC_NUMBUF]; + int group_id, err; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out; + } + + err = kstrtoint(strstrip(buffer), 0, &group_id); + if (err) + goto out; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + err = sched_set_group_id(p, group_id); + + put_task_struct(p); + +out: + return err < 0 ? err : count; +} + +static int sched_group_id_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, sched_group_id_show, inode); +} + +static const struct file_operations proc_pid_sched_group_id_operations = { + .open = sched_group_id_open, + .read = seq_read, + .write = sched_group_id_write, + .llseek = seq_lseek, + .release = single_release, +}; + +#endif /* CONFIG_SCHED_HMP */ + #ifdef CONFIG_SCHED_AUTOGROUP /* * Print out autogroup related information: @@ -2825,6 +3028,13 @@ static const struct pid_entry tgid_base_stuff[] = { ONE("status", S_IRUGO, proc_pid_status), ONE("personality", S_IRUSR, proc_pid_personality), ONE("limits", S_IRUGO, proc_pid_limits), +#ifdef CONFIG_SMP + REG("sched_wake_up_idle", S_IRUGO|S_IWUSR, proc_pid_sched_wake_up_idle_operations), +#endif +#ifdef CONFIG_SCHED_HMP + REG("sched_init_task_load", S_IRUGO|S_IWUSR, proc_pid_sched_init_task_load_operations), + REG("sched_group_id", S_IRUGO|S_IWUGO, proc_pid_sched_group_id_operations), +#endif #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif @@ -2849,6 +3059,9 @@ static const struct pid_entry tgid_base_stuff[] = { REG("mounts", S_IRUGO, proc_mounts_operations), REG("mountinfo", S_IRUGO, proc_mountinfo_operations), REG("mountstats", S_IRUSR, proc_mountstats_operations), +#ifdef CONFIG_PROCESS_RECLAIM + REG("reclaim", S_IWUSR, proc_reclaim_operations), +#endif #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_pid_smaps_operations), diff --git a/fs/proc/internal.h b/fs/proc/internal.h index aa2781095bd1..ef2b01533c97 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -209,6 +209,7 @@ struct pde_opener { extern const struct inode_operations proc_link_inode_operations; extern const struct inode_operations proc_pid_link_inode_operations; +extern const struct file_operations proc_reclaim_operations; extern void proc_init_inodecache(void); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index fa0af59ebaea..f6c512a550e5 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -14,6 +14,8 @@ #include <linux/swapops.h> #include <linux/mmu_notifier.h> #include <linux/page_idle.h> +#include <linux/mm_inline.h> +#include <linux/ctype.h> #include <asm/elf.h> #include <asm/uaccess.h> @@ -391,7 +393,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) name = "[stack]"; goto done; } - if (vma_get_anon_name(vma)) { seq_pad(m, ' '); seq_print_vma_name(m, vma); @@ -1424,6 +1425,241 @@ const struct file_operations proc_pagemap_operations = { }; #endif /* CONFIG_PROC_PAGE_MONITOR */ +#ifdef CONFIG_PROCESS_RECLAIM +static int reclaim_pte_range(pmd_t *pmd, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + struct reclaim_param *rp = walk->private; + struct vm_area_struct *vma = rp->vma; + pte_t *pte, ptent; + spinlock_t *ptl; + struct page *page; + LIST_HEAD(page_list); + int isolated; + int reclaimed; + + split_huge_page_pmd(vma, addr, pmd); + if (pmd_trans_unstable(pmd) || !rp->nr_to_reclaim) + return 0; +cont: + isolated = 0; + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + for (; addr != end; pte++, addr += PAGE_SIZE) { + ptent = *pte; + if (!pte_present(ptent)) + continue; + + page = vm_normal_page(vma, addr, ptent); + if (!page) + continue; + + if (isolate_lru_page(page)) + continue; + + list_add(&page->lru, &page_list); + inc_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); + isolated++; + rp->nr_scanned++; + if ((isolated >= SWAP_CLUSTER_MAX) || !rp->nr_to_reclaim) + break; + } + pte_unmap_unlock(pte - 1, ptl); + reclaimed = reclaim_pages_from_list(&page_list, vma); + rp->nr_reclaimed += reclaimed; + rp->nr_to_reclaim -= reclaimed; + if (rp->nr_to_reclaim < 0) + rp->nr_to_reclaim = 0; + + if (rp->nr_to_reclaim && (addr != end)) + goto cont; + + cond_resched(); + return 0; +} + +enum reclaim_type { + RECLAIM_FILE, + RECLAIM_ANON, + RECLAIM_ALL, + RECLAIM_RANGE, +}; + +struct reclaim_param reclaim_task_anon(struct task_struct *task, + int nr_to_reclaim) +{ + struct mm_struct *mm; + struct vm_area_struct *vma; + struct mm_walk reclaim_walk = {}; + struct reclaim_param rp; + + rp.nr_reclaimed = 0; + rp.nr_scanned = 0; + get_task_struct(task); + mm = get_task_mm(task); + if (!mm) + goto out; + + reclaim_walk.mm = mm; + reclaim_walk.pmd_entry = reclaim_pte_range; + + rp.nr_to_reclaim = nr_to_reclaim; + reclaim_walk.private = &rp; + + down_read(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (is_vm_hugetlb_page(vma)) + continue; + + if (vma->vm_file) + continue; + + if (vma->vm_flags & VM_LOCKED) + continue; + + if (!rp.nr_to_reclaim) + break; + + rp.vma = vma; + walk_page_range(vma->vm_start, vma->vm_end, + &reclaim_walk); + } + + flush_tlb_mm(mm); + up_read(&mm->mmap_sem); + mmput(mm); +out: + put_task_struct(task); + return rp; +} + +static ssize_t reclaim_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task; + char buffer[200]; + struct mm_struct *mm; + struct vm_area_struct *vma; + enum reclaim_type type; + char *type_buf; + struct mm_walk reclaim_walk = {}; + unsigned long start = 0; + unsigned long end = 0; + struct reclaim_param rp; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + + if (copy_from_user(buffer, buf, count)) + return -EFAULT; + + type_buf = strstrip(buffer); + if (!strcmp(type_buf, "file")) + type = RECLAIM_FILE; + else if (!strcmp(type_buf, "anon")) + type = RECLAIM_ANON; + else if (!strcmp(type_buf, "all")) + type = RECLAIM_ALL; + else if (isdigit(*type_buf)) + type = RECLAIM_RANGE; + else + goto out_err; + + if (type == RECLAIM_RANGE) { + char *token; + unsigned long long len, len_in, tmp; + token = strsep(&type_buf, " "); + if (!token) + goto out_err; + tmp = memparse(token, &token); + if (tmp & ~PAGE_MASK || tmp > ULONG_MAX) + goto out_err; + start = tmp; + + token = strsep(&type_buf, " "); + if (!token) + goto out_err; + len_in = memparse(token, &token); + len = (len_in + ~PAGE_MASK) & PAGE_MASK; + if (len > ULONG_MAX) + goto out_err; + /* + * Check to see whether len was rounded up from small -ve + * to zero. + */ + if (len_in && !len) + goto out_err; + + end = start + len; + if (end < start) + goto out_err; + } + + task = get_proc_task(file->f_path.dentry->d_inode); + if (!task) + return -ESRCH; + + mm = get_task_mm(task); + if (!mm) + goto out; + + reclaim_walk.mm = mm; + reclaim_walk.pmd_entry = reclaim_pte_range; + + rp.nr_to_reclaim = ~0; + rp.nr_reclaimed = 0; + reclaim_walk.private = &rp; + + down_read(&mm->mmap_sem); + if (type == RECLAIM_RANGE) { + vma = find_vma(mm, start); + while (vma) { + if (vma->vm_start > end) + break; + if (is_vm_hugetlb_page(vma)) + continue; + + rp.vma = vma; + walk_page_range(max(vma->vm_start, start), + min(vma->vm_end, end), + &reclaim_walk); + vma = vma->vm_next; + } + } else { + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (is_vm_hugetlb_page(vma)) + continue; + + if (type == RECLAIM_ANON && vma->vm_file) + continue; + + if (type == RECLAIM_FILE && !vma->vm_file) + continue; + + rp.vma = vma; + walk_page_range(vma->vm_start, vma->vm_end, + &reclaim_walk); + } + } + + flush_tlb_mm(mm); + up_read(&mm->mmap_sem); + mmput(mm); +out: + put_task_struct(task); + return count; + +out_err: + return -EINVAL; +} + +const struct file_operations proc_reclaim_operations = { + .write = reclaim_write, + .llseek = noop_llseek, +}; +#endif + #ifdef CONFIG_NUMA struct numa_maps { diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 2a4520a63993..293cec31a5e8 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -596,7 +596,7 @@ static const char *sdcardfs_follow_link(struct dentry *dentry, void **cookie) static int sdcardfs_permission_wrn(struct inode *inode, int mask) { - WARN_RATELIMIT(1, "sdcardfs does not support permission. Use permission2.\n"); + pr_debug("sdcardfs does not support permission. Use permission2.\n"); return -EINVAL; } diff --git a/fs/super.c b/fs/super.c index c96434ea71e2..cbd4fab271d4 100644 --- a/fs/super.c +++ b/fs/super.c @@ -968,7 +968,7 @@ static int set_bdev_super(struct super_block *s, void *data) * We set the bdi here to the queue backing, file systems can * overwrite this in ->fill_super() */ - s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; + s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info; return 0; } diff --git a/fs/timerfd.c b/fs/timerfd.c index 1327a02ec778..0548c572839c 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -50,7 +50,8 @@ static DEFINE_SPINLOCK(cancel_lock); static inline bool isalarm(struct timerfd_ctx *ctx) { return ctx->clockid == CLOCK_REALTIME_ALARM || - ctx->clockid == CLOCK_BOOTTIME_ALARM; + ctx->clockid == CLOCK_BOOTTIME_ALARM || + ctx->clockid == CLOCK_POWEROFF_ALARM; } /* @@ -142,7 +143,8 @@ static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) { spin_lock(&ctx->cancel_lock); if ((ctx->clockid == CLOCK_REALTIME || - ctx->clockid == CLOCK_REALTIME_ALARM) && + ctx->clockid == CLOCK_REALTIME_ALARM || + ctx->clockid == CLOCK_POWEROFF_ALARM) && (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) { if (!ctx->might_cancel) { ctx->might_cancel = true; @@ -174,6 +176,7 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags, enum hrtimer_mode htmode; ktime_t texp; int clockid = ctx->clockid; + enum alarmtimer_type type; htmode = (flags & TFD_TIMER_ABSTIME) ? HRTIMER_MODE_ABS: HRTIMER_MODE_REL; @@ -184,10 +187,8 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags, ctx->tintv = timespec_to_ktime(ktmr->it_interval); if (isalarm(ctx)) { - alarm_init(&ctx->t.alarm, - ctx->clockid == CLOCK_REALTIME_ALARM ? - ALARM_REALTIME : ALARM_BOOTTIME, - timerfd_alarmproc); + type = clock2alarm(ctx->clockid); + alarm_init(&ctx->t.alarm, type, timerfd_alarmproc); } else { hrtimer_init(&ctx->t.tmr, clockid, htmode); hrtimer_set_expires(&ctx->t.tmr, texp); @@ -387,6 +388,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) { int ufd; struct timerfd_ctx *ctx; + enum alarmtimer_type type; /* Check the TFD_* constants for consistency. */ BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); @@ -397,7 +399,8 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) clockid != CLOCK_REALTIME && clockid != CLOCK_REALTIME_ALARM && clockid != CLOCK_BOOTTIME && - clockid != CLOCK_BOOTTIME_ALARM)) + clockid != CLOCK_BOOTTIME_ALARM && + clockid != CLOCK_POWEROFF_ALARM)) return -EINVAL; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -408,13 +411,12 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) spin_lock_init(&ctx->cancel_lock); ctx->clockid = clockid; - if (isalarm(ctx)) - alarm_init(&ctx->t.alarm, - ctx->clockid == CLOCK_REALTIME_ALARM ? - ALARM_REALTIME : ALARM_BOOTTIME, - timerfd_alarmproc); - else + if (isalarm(ctx)) { + type = clock2alarm(ctx->clockid); + alarm_init(&ctx->t.alarm, type, timerfd_alarmproc); + } else { hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS); + } ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 }); @@ -486,6 +488,10 @@ static int do_timerfd_settime(int ufd, int flags, ret = timerfd_setup(ctx, flags, new); spin_unlock_irq(&ctx->wqh.lock); + + if (ctx->clockid == CLOCK_POWEROFF_ALARM) + set_power_on_alarm(); + fdput(f); return ret; } diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index 92a8491a8f8c..c0a95e393347 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h @@ -34,6 +34,12 @@ * node. We use "r5" hash borrowed from reiserfs. */ +/* + * Lot's of the key helpers require a struct ubifs_info *c as the first parameter. + * But we are not using it at all currently. That's designed for future extensions of + * different c->key_format. But right now, there is only one key type, UBIFS_SIMPLE_KEY_FMT. + */ + #ifndef __UBIFS_KEY_H__ #define __UBIFS_KEY_H__ diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index b5bf23b34241..de6f82d4eda2 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -268,7 +268,7 @@ static int check_namespace(const struct qstr *nm) if (!strncmp(nm->name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) { - if (nm->name[sizeof(XATTR_TRUSTED_PREFIX) - 1] == '\0') + if (nm->name[XATTR_TRUSTED_PREFIX_LEN] == '\0') return -EINVAL; type = TRUSTED_XATTR; } else if (!strncmp(nm->name, XATTR_USER_PREFIX, @@ -278,7 +278,7 @@ static int check_namespace(const struct qstr *nm) type = USER_XATTR; } else if (!strncmp(nm->name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) { - if (nm->name[sizeof(XATTR_SECURITY_PREFIX) - 1] == '\0') + if (nm->name[XATTR_SECURITY_PREFIX_LEN] == '\0') return -EINVAL; type = SECURITY_XATTR; } else diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index c5101a3295d8..62ba66e1c598 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -289,8 +289,10 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) if (type == ACL_TYPE_ACCESS) { umode_t mode; - + struct posix_acl *old_acl = acl; error = posix_acl_update_mode(inode, &mode, &acl); + if (!acl) + posix_acl_release(old_acl); if (error) return error; error = xfs_set_mode(inode, mode); |