summaryrefslogtreecommitdiff
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/Kconfig54
-rw-r--r--fs/ext4/crypto_fname.c5
-rw-r--r--fs/ext4/crypto_key.c4
-rw-r--r--fs/ext4/crypto_policy.c17
-rw-r--r--fs/ext4/ext4.h4
-rw-r--r--fs/ext4/extents.c6
-rw-r--r--fs/ext4/file.c68
-rw-r--r--fs/ext4/ialloc.c6
-rw-r--r--fs/ext4/indirect.c1
-rw-r--r--fs/ext4/inode.c49
-rw-r--r--fs/ext4/ioctl.c1
-rw-r--r--fs/ext4/mballoc.c16
-rw-r--r--fs/ext4/migrate.c17
-rw-r--r--fs/ext4/mmp.c48
-rw-r--r--fs/ext4/namei.c63
-rw-r--r--fs/ext4/page-io.c26
-rw-r--r--fs/ext4/readpage.c8
-rw-r--r--fs/ext4/super.c68
18 files changed, 334 insertions, 127 deletions
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index bf8bc8aba471..47728da7702c 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -1,5 +1,38 @@
+# Ext3 configs are here for backward compatibility with old configs which may
+# have EXT3_FS set but not EXT4_FS set and thus would result in non-bootable
+# kernels after the removal of ext3 driver.
+config EXT3_FS
+ tristate "The Extended 3 (ext3) filesystem"
+ # These must match EXT4_FS selects...
+ select EXT4_FS
+ select JBD2
+ select CRC16
+ select CRYPTO
+ select CRYPTO_CRC32C
+ help
+ This config option is here only for backward compatibility. ext3
+ filesystem is now handled by the ext4 driver.
+
+config EXT3_FS_POSIX_ACL
+ bool "Ext3 POSIX Access Control Lists"
+ depends on EXT3_FS
+ select EXT4_FS_POSIX_ACL
+ select FS_POSIX_ACL
+ help
+ This config option is here only for backward compatibility. ext3
+ filesystem is now handled by the ext4 driver.
+
+config EXT3_FS_SECURITY
+ bool "Ext3 Security Labels"
+ depends on EXT3_FS
+ select EXT4_FS_SECURITY
+ help
+ This config option is here only for backward compatibility. ext3
+ filesystem is now handled by the ext4 driver.
+
config EXT4_FS
tristate "The Extended 4 (ext4) filesystem"
+ # Please update EXT3_FS selects when changing these
select JBD2
select CRC16
select CRYPTO
@@ -16,26 +49,27 @@ config EXT4_FS
up fsck time. For more information, please see the web pages at
http://ext4.wiki.kernel.org.
- The ext4 filesystem will support mounting an ext3
- filesystem; while there will be some performance gains from
- the delayed allocation and inode table readahead, the best
- performance gains will require enabling ext4 features in the
- filesystem, or formatting a new filesystem as an ext4
- filesystem initially.
+ The ext4 filesystem supports mounting an ext3 filesystem; while there
+ are some performance gains from the delayed allocation and inode
+ table readahead, the best performance gains require enabling ext4
+ features in the filesystem using tune2fs, or formatting a new
+ filesystem as an ext4 filesystem initially. Without explicit enabling
+ of ext4 features, the on disk filesystem format stays fully backward
+ compatible.
To compile this file system support as a module, choose M here. The
module will be called ext4.
If unsure, say N.
-config EXT4_USE_FOR_EXT23
+config EXT4_USE_FOR_EXT2
bool "Use ext4 for ext2/ext3 file systems"
depends on EXT4_FS
- depends on EXT3_FS=n || EXT2_FS=n
+ depends on EXT2_FS=n
default y
help
- Allow the ext4 file system driver code to be used for ext2 or
- ext3 file system mounts. This allows users to reduce their
+ Allow the ext4 file system driver code to be used for ext2
+ file system mounts. This allows users to reduce their
compiled kernel size by using one file system driver for
ext2, ext3, and ext4 file systems.
diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c
index 7dc4eb55913c..847f919c84d9 100644
--- a/fs/ext4/crypto_fname.c
+++ b/fs/ext4/crypto_fname.c
@@ -19,7 +19,6 @@
#include <linux/gfp.h>
#include <linux/kernel.h>
#include <linux/key.h>
-#include <linux/key.h>
#include <linux/list.h>
#include <linux/mempool.h>
#include <linux/random.h>
@@ -329,6 +328,10 @@ int _ext4_fname_disk_to_usr(struct inode *inode,
return oname->len;
}
}
+ if (iname->len < EXT4_CRYPTO_BLOCK_SIZE) {
+ EXT4_ERROR_INODE(inode, "encrypted inode too small");
+ return -EUCLEAN;
+ }
if (EXT4_I(inode)->i_crypt_info)
return ext4_fname_decrypt(inode, iname, oname);
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
index 442d24e8efc0..1d510c11b100 100644
--- a/fs/ext4/crypto_key.c
+++ b/fs/ext4/crypto_key.c
@@ -30,7 +30,7 @@ static void derive_crypt_complete(struct crypto_async_request *req, int rc)
/**
* ext4_derive_key_aes() - Derive a key using AES-128-ECB
- * @deriving_key: Encryption key used for derivatio.
+ * @deriving_key: Encryption key used for derivation.
* @source_key: Source key to which to apply derivation.
* @derived_key: Derived key.
*
@@ -220,6 +220,8 @@ retry:
BUG_ON(master_key->size != EXT4_AES_256_XTS_KEY_SIZE);
res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
raw_key);
+ if (res)
+ goto out;
got_key:
ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
if (!ctfm || IS_ERR(ctfm)) {
diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c
index 02c4e5df7afb..a640ec2c4b13 100644
--- a/fs/ext4/crypto_policy.c
+++ b/fs/ext4/crypto_policy.c
@@ -12,6 +12,7 @@
#include <linux/string.h>
#include <linux/types.h>
+#include "ext4_jbd2.h"
#include "ext4.h"
#include "xattr.h"
@@ -49,7 +50,8 @@ static int ext4_create_encryption_context_from_policy(
struct inode *inode, const struct ext4_encryption_policy *policy)
{
struct ext4_encryption_context ctx;
- int res = 0;
+ handle_t *handle;
+ int res, res2;
res = ext4_convert_inline_data(inode);
if (res)
@@ -78,11 +80,22 @@ static int ext4_create_encryption_context_from_policy(
BUILD_BUG_ON(sizeof(ctx.nonce) != EXT4_KEY_DERIVATION_NONCE_SIZE);
get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
+ handle = ext4_journal_start(inode, EXT4_HT_MISC,
+ ext4_jbd2_credits_xattr(inode));
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
sizeof(ctx), 0);
- if (!res)
+ if (!res) {
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
+ res = ext4_mark_inode_dirty(handle, inode);
+ if (res)
+ EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
+ }
+ res2 = ext4_journal_stop(handle);
+ if (!res)
+ res = res2;
return res;
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index f5e9f04220c1..fd1f28be5296 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -187,7 +187,7 @@ typedef struct ext4_io_end {
} ext4_io_end_t;
struct ext4_io_submit {
- int io_op;
+ struct writeback_control *io_wbc;
struct bio *io_bio;
ext4_io_end_t *io_end;
sector_t io_next_block;
@@ -2272,6 +2272,8 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
int ext4_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
+int ext4_get_block_dax(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index aadb72828834..2553aa8b608d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -504,7 +504,7 @@ __read_extent_tree_block(const char *function, unsigned int line,
struct buffer_head *bh;
int err;
- bh = sb_getblk(inode->i_sb, pblk);
+ bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS);
if (unlikely(!bh))
return ERR_PTR(-ENOMEM);
@@ -1089,7 +1089,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
err = -EIO;
goto cleanup;
}
- bh = sb_getblk(inode->i_sb, newblock);
+ bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
if (unlikely(!bh)) {
err = -ENOMEM;
goto cleanup;
@@ -1283,7 +1283,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
if (newblock == 0)
return err;
- bh = sb_getblk(inode->i_sb, newblock);
+ bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
if (unlikely(!bh))
return -ENOMEM;
lock_buffer(bh);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index bc313ac5d3fa..113837e7ba98 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -22,6 +22,7 @@
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/path.h>
+#include <linux/dax.h>
#include <linux/quotaops.h>
#include <linux/pagevec.h>
#include <linux/uio.h>
@@ -195,7 +196,7 @@ out:
static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
{
struct inode *inode = bh->b_assoc_map->host;
- /* XXX: breaks on 32-bit > 16GB. Is that even supported? */
+ /* XXX: breaks on 32-bit > 16TB. Is that even supported? */
loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
int err;
if (!uptodate)
@@ -206,17 +207,74 @@ static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- return dax_fault(vma, vmf, ext4_get_block, ext4_end_io_unwritten);
- /* Is this the right get_block? */
+ int result;
+ handle_t *handle = NULL;
+ struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+ bool write = vmf->flags & FAULT_FLAG_WRITE;
+
+ if (write) {
+ sb_start_pagefault(sb);
+ file_update_time(vma->vm_file);
+ handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
+ EXT4_DATA_TRANS_BLOCKS(sb));
+ }
+
+ if (IS_ERR(handle))
+ result = VM_FAULT_SIGBUS;
+ else
+ result = __dax_fault(vma, vmf, ext4_get_block_dax,
+ ext4_end_io_unwritten);
+
+ if (write) {
+ if (!IS_ERR(handle))
+ ext4_journal_stop(handle);
+ sb_end_pagefault(sb);
+ }
+
+ return result;
+}
+
+static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd, unsigned int flags)
+{
+ int result;
+ handle_t *handle = NULL;
+ struct inode *inode = file_inode(vma->vm_file);
+ struct super_block *sb = inode->i_sb;
+ bool write = flags & FAULT_FLAG_WRITE;
+
+ if (write) {
+ sb_start_pagefault(sb);
+ file_update_time(vma->vm_file);
+ handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
+ ext4_chunk_trans_blocks(inode,
+ PMD_SIZE / PAGE_SIZE));
+ }
+
+ if (IS_ERR(handle))
+ result = VM_FAULT_SIGBUS;
+ else
+ result = __dax_pmd_fault(vma, addr, pmd, flags,
+ ext4_get_block_dax, ext4_end_io_unwritten);
+
+ if (write) {
+ if (!IS_ERR(handle))
+ ext4_journal_stop(handle);
+ sb_end_pagefault(sb);
+ }
+
+ return result;
}
static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- return dax_mkwrite(vma, vmf, ext4_get_block, ext4_end_io_unwritten);
+ return dax_mkwrite(vma, vmf, ext4_get_block_dax,
+ ext4_end_io_unwritten);
}
static const struct vm_operations_struct ext4_dax_vm_ops = {
.fault = ext4_dax_fault,
+ .pmd_fault = ext4_dax_pmd_fault,
.page_mkwrite = ext4_dax_mkwrite,
.pfn_mkwrite = dax_pfn_mkwrite,
};
@@ -244,7 +302,7 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
file_accessed(file);
if (IS_DAX(file_inode(file))) {
vma->vm_ops = &ext4_dax_vm_ops;
- vma->vm_flags |= VM_MIXEDMAP;
+ vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
} else {
vma->vm_ops = &ext4_file_vm_ops;
}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 173c1ae21395..619bfc1fda8c 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -721,7 +721,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
struct ext4_group_desc *gdp = NULL;
struct ext4_inode_info *ei;
struct ext4_sb_info *sbi;
- int ret2, err = 0;
+ int ret2, err;
struct inode *ret;
ext4_group_t i;
ext4_group_t flex_group;
@@ -769,7 +769,9 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
inode->i_gid = dir->i_gid;
} else
inode_init_owner(inode, dir, mode);
- dquot_initialize(inode);
+ err = dquot_initialize(inode);
+ if (err)
+ goto out;
if (!goal)
goal = sbi->s_inode_goal;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 4f6ac499f09e..2468261748b2 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -22,6 +22,7 @@
#include "ext4_jbd2.h"
#include "truncate.h"
+#include <linux/dax.h>
#include <linux/uio.h>
#include <trace/events/ext4.h>
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 41f8e55afcd1..612fbcf76b5c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -22,6 +22,7 @@
#include <linux/time.h>
#include <linux/highuid.h>
#include <linux/pagemap.h>
+#include <linux/dax.h>
#include <linux/quotaops.h>
#include <linux/string.h>
#include <linux/buffer_head.h>
@@ -1323,7 +1324,7 @@ static void ext4_da_page_release_reservation(struct page *page,
unsigned int offset,
unsigned int length)
{
- int to_release = 0;
+ int to_release = 0, contiguous_blks = 0;
struct buffer_head *head, *bh;
unsigned int curr_off = 0;
struct inode *inode = page->mapping->host;
@@ -1344,14 +1345,23 @@ static void ext4_da_page_release_reservation(struct page *page,
if ((offset <= curr_off) && (buffer_delay(bh))) {
to_release++;
+ contiguous_blks++;
clear_buffer_delay(bh);
+ } else if (contiguous_blks) {
+ lblk = page->index <<
+ (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ lblk += (curr_off >> inode->i_blkbits) -
+ contiguous_blks;
+ ext4_es_remove_extent(inode, lblk, contiguous_blks);
+ contiguous_blks = 0;
}
curr_off = next_off;
} while ((bh = bh->b_this_page) != head);
- if (to_release) {
+ if (contiguous_blks) {
lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
- ext4_es_remove_extent(inode, lblk, to_release);
+ lblk += (curr_off >> inode->i_blkbits) - contiguous_blks;
+ ext4_es_remove_extent(inode, lblk, contiguous_blks);
}
/* If we have released all the blocks belonging to a cluster, then we
@@ -3011,6 +3021,17 @@ static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
EXT4_GET_BLOCKS_NO_LOCK);
}
+int ext4_get_block_dax(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ int flags = EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_UNWRIT_EXT;
+ if (create)
+ flags |= EXT4_GET_BLOCKS_CREATE;
+ ext4_debug("ext4_get_block_dax: inode %lu, create flag %d\n",
+ inode->i_ino, create);
+ return _ext4_get_block(inode, iblock, bh_result, flags);
+}
+
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private)
{
@@ -4344,7 +4365,12 @@ static void ext4_update_other_inodes_time(struct super_block *sb,
int inode_size = EXT4_INODE_SIZE(sb);
oi.orig_ino = orig_ino;
- ino = (orig_ino & ~(inodes_per_block - 1)) + 1;
+ /*
+ * Calculate the first inode in the inode table block. Inode
+ * numbers are one-based. That is, the first inode in a block
+ * (assuming 4k blocks and 256 byte inodes) is (n*16 + 1).
+ */
+ ino = ((orig_ino - 1) & ~(inodes_per_block - 1)) + 1;
for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
if (ino == orig_ino)
continue;
@@ -4647,8 +4673,11 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (error)
return error;
- if (is_quota_modification(inode, attr))
- dquot_initialize(inode);
+ if (is_quota_modification(inode, attr)) {
+ error = dquot_initialize(inode);
+ if (error)
+ return error;
+ }
if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
(ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
handle_t *handle;
@@ -4711,6 +4740,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
error = ext4_orphan_add(handle, inode);
orphan = 1;
}
+ /*
+ * Update c/mtime on truncate up, ext4_truncate() will
+ * update c/mtime in shrink case below
+ */
+ if (!shrink) {
+ inode->i_mtime = ext4_current_time(inode);
+ inode->i_ctime = inode->i_mtime;
+ }
down_write(&EXT4_I(inode)->i_data_sem);
EXT4_I(inode)->i_disksize = attr->ia_size;
rc = ext4_mark_inode_dirty(handle, inode);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index cb8451246b30..1346cfa355d0 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -755,7 +755,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return err;
}
case EXT4_IOC_MOVE_EXT:
- case FITRIM:
case EXT4_IOC_RESIZE_FS:
case EXT4_IOC_PRECACHE_EXTENTS:
case EXT4_IOC_SET_ENCRYPTION_POLICY:
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f6aedf88da43..34b610ea5030 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4816,18 +4816,12 @@ do_more:
/*
* blocks being freed are metadata. these blocks shouldn't
* be used until this transaction is committed
+ *
+ * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed
+ * to fail.
*/
- retry:
- new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
- if (!new_entry) {
- /*
- * We use a retry loop because
- * ext4_free_blocks() is not allowed to fail.
- */
- cond_resched();
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- goto retry;
- }
+ new_entry = kmem_cache_alloc(ext4_free_data_cachep,
+ GFP_NOFS|__GFP_NOFAIL);
new_entry->efd_start_cluster = bit;
new_entry->efd_group = block_group;
new_entry->efd_count = count_clusters;
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b52374e42102..6163ad21cb0e 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -620,6 +620,7 @@ int ext4_ind_migrate(struct inode *inode)
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_extent *ex;
unsigned int i, len;
+ ext4_lblk_t start, end;
ext4_fsblk_t blk;
handle_t *handle;
int ret;
@@ -633,6 +634,14 @@ int ext4_ind_migrate(struct inode *inode)
EXT4_FEATURE_RO_COMPAT_BIGALLOC))
return -EOPNOTSUPP;
+ /*
+ * In order to get correct extent info, force all delayed allocation
+ * blocks to be allocated, otherwise delayed allocation blocks may not
+ * be reflected and bypass the checks on extent header.
+ */
+ if (test_opt(inode->i_sb, DELALLOC))
+ ext4_alloc_da_blocks(inode);
+
handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -650,11 +659,13 @@ int ext4_ind_migrate(struct inode *inode)
goto errout;
}
if (eh->eh_entries == 0)
- blk = len = 0;
+ blk = len = start = end = 0;
else {
len = le16_to_cpu(ex->ee_len);
blk = ext4_ext_pblock(ex);
- if (len > EXT4_NDIR_BLOCKS) {
+ start = le32_to_cpu(ex->ee_block);
+ end = start + len - 1;
+ if (end >= EXT4_NDIR_BLOCKS) {
ret = -EOPNOTSUPP;
goto errout;
}
@@ -662,7 +673,7 @@ int ext4_ind_migrate(struct inode *inode)
ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
memset(ei->i_data, 0, sizeof(ei->i_data));
- for (i=0; i < len; i++)
+ for (i = start; i <= end; i++)
ei->i_data[i] = cpu_to_le32(blk++);
ext4_mark_inode_dirty(handle, inode);
errout:
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 8313ca3324ec..6eb1a619890c 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -69,6 +69,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
ext4_fsblk_t mmp_block)
{
struct mmp_struct *mmp;
+ int ret;
if (*bh)
clear_buffer_uptodate(*bh);
@@ -76,33 +77,36 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
/* This would be sb_bread(sb, mmp_block), except we need to be sure
* that the MD RAID device cache has been bypassed, and that the read
* is not blocked in the elevator. */
- if (!*bh)
+ if (!*bh) {
*bh = sb_getblk(sb, mmp_block);
- if (!*bh)
- return -ENOMEM;
- if (*bh) {
- get_bh(*bh);
- lock_buffer(*bh);
- (*bh)->b_end_io = end_buffer_read_sync;
- submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
- wait_on_buffer(*bh);
- if (!buffer_uptodate(*bh)) {
- brelse(*bh);
- *bh = NULL;
+ if (!*bh) {
+ ret = -ENOMEM;
+ goto warn_exit;
}
}
- if (unlikely(!*bh)) {
- ext4_warning(sb, "Error while reading MMP block %llu",
- mmp_block);
- return -EIO;
+
+ get_bh(*bh);
+ lock_buffer(*bh);
+ (*bh)->b_end_io = end_buffer_read_sync;
+ submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
+ wait_on_buffer(*bh);
+ if (!buffer_uptodate(*bh)) {
+ brelse(*bh);
+ *bh = NULL;
+ ret = -EIO;
+ goto warn_exit;
}
mmp = (struct mmp_struct *)((*bh)->b_data);
- if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC ||
- !ext4_mmp_csum_verify(sb, mmp))
- return -EINVAL;
-
- return 0;
+ if (le32_to_cpu(mmp->mmp_magic) == EXT4_MMP_MAGIC &&
+ ext4_mmp_csum_verify(sb, mmp))
+ return 0;
+ ret = -EINVAL;
+
+warn_exit:
+ ext4_warning(sb, "Error %d while reading MMP block %llu",
+ ret, mmp_block);
+ return ret;
}
/*
@@ -111,7 +115,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
const char *function, unsigned int line, const char *msg)
{
- __ext4_warning(sb, function, line, msg);
+ __ext4_warning(sb, function, line, "%s", msg);
__ext4_warning(sb, function, line,
"MMP failure info: last update time: %llu, last update "
"node: %s, last update device: %s\n",
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 011dcfb5cce3..9f61e7679a6d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2436,7 +2436,9 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
struct inode *inode;
int err, credits, retries = 0;
- dquot_initialize(dir);
+ err = dquot_initialize(dir);
+ if (err)
+ return err;
credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
@@ -2470,7 +2472,9 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
if (!new_valid_dev(rdev))
return -EINVAL;
- dquot_initialize(dir);
+ err = dquot_initialize(dir);
+ if (err)
+ return err;
credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
@@ -2499,7 +2503,9 @@ static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
struct inode *inode;
int err, retries = 0;
- dquot_initialize(dir);
+ err = dquot_initialize(dir);
+ if (err)
+ return err;
retry:
inode = ext4_new_inode_start_handle(dir, mode,
@@ -2612,7 +2618,9 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (EXT4_DIR_LINK_MAX(dir))
return -EMLINK;
- dquot_initialize(dir);
+ err = dquot_initialize(dir);
+ if (err)
+ return err;
credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
@@ -2910,8 +2918,12 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
/* Initialize quotas before so that eventual writes go in
* separate transaction */
- dquot_initialize(dir);
- dquot_initialize(d_inode(dentry));
+ retval = dquot_initialize(dir);
+ if (retval)
+ return retval;
+ retval = dquot_initialize(d_inode(dentry));
+ if (retval)
+ return retval;
retval = -ENOENT;
bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
@@ -2980,8 +2992,12 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
trace_ext4_unlink_enter(dir, dentry);
/* Initialize quotas before so that eventual writes go
* in separate transaction */
- dquot_initialize(dir);
- dquot_initialize(d_inode(dentry));
+ retval = dquot_initialize(dir);
+ if (retval)
+ return retval;
+ retval = dquot_initialize(d_inode(dentry));
+ if (retval)
+ return retval;
retval = -ENOENT;
bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
@@ -3066,7 +3082,9 @@ static int ext4_symlink(struct inode *dir,
goto err_free_sd;
}
- dquot_initialize(dir);
+ err = dquot_initialize(dir);
+ if (err)
+ goto err_free_sd;
if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
/*
@@ -3197,7 +3215,9 @@ static int ext4_link(struct dentry *old_dentry,
if (ext4_encrypted_inode(dir) &&
!ext4_is_child_context_consistent_with_parent(dir, inode))
return -EPERM;
- dquot_initialize(dir);
+ err = dquot_initialize(dir);
+ if (err)
+ return err;
retry:
handle = ext4_journal_start(dir, EXT4_HT_DIR,
@@ -3476,13 +3496,20 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
int credits;
u8 old_file_type;
- dquot_initialize(old.dir);
- dquot_initialize(new.dir);
+ retval = dquot_initialize(old.dir);
+ if (retval)
+ return retval;
+ retval = dquot_initialize(new.dir);
+ if (retval)
+ return retval;
/* Initialize quotas before so that eventual writes go
* in separate transaction */
- if (new.inode)
- dquot_initialize(new.inode);
+ if (new.inode) {
+ retval = dquot_initialize(new.inode);
+ if (retval)
+ return retval;
+ }
old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
if (IS_ERR(old.bh))
@@ -3678,8 +3705,12 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
new.inode)))
return -EPERM;
- dquot_initialize(old.dir);
- dquot_initialize(new.dir);
+ retval = dquot_initialize(old.dir);
+ if (retval)
+ return retval;
+ retval = dquot_initialize(new.dir);
+ if (retval)
+ return retval;
old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
&old.de, &old.inlined);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 5602450f03f6..84ba4d2b3a35 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -61,7 +61,6 @@ static void buffer_io_error(struct buffer_head *bh)
static void ext4_finish_bio(struct bio *bio)
{
int i;
- int error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec;
bio_for_each_segment_all(bvec, bio, i) {
@@ -88,7 +87,7 @@ static void ext4_finish_bio(struct bio *bio)
}
#endif
- if (error) {
+ if (bio->bi_error) {
SetPageError(page);
set_bit(AS_EIO, &page->mapping->flags);
}
@@ -107,7 +106,7 @@ static void ext4_finish_bio(struct bio *bio)
continue;
}
clear_buffer_async_write(bh);
- if (error)
+ if (bio->bi_error)
buffer_io_error(bh);
} while ((bh = bh->b_this_page) != head);
bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
@@ -310,27 +309,25 @@ ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
}
/* BIO completion function for page writeback */
-static void ext4_end_bio(struct bio *bio, int error)
+static void ext4_end_bio(struct bio *bio)
{
ext4_io_end_t *io_end = bio->bi_private;
sector_t bi_sector = bio->bi_iter.bi_sector;
BUG_ON(!io_end);
bio->bi_end_io = NULL;
- if (test_bit(BIO_UPTODATE, &bio->bi_flags))
- error = 0;
- if (error) {
+ if (bio->bi_error) {
struct inode *inode = io_end->inode;
ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
"(offset %llu size %ld starting block %llu)",
- error, inode->i_ino,
+ bio->bi_error, inode->i_ino,
(unsigned long long) io_end->offset,
(long) io_end->size,
(unsigned long long)
bi_sector >> (inode->i_blkbits - 9));
- mapping_set_error(inode->i_mapping, error);
+ mapping_set_error(inode->i_mapping, bio->bi_error);
}
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
@@ -357,8 +354,10 @@ void ext4_io_submit(struct ext4_io_submit *io)
struct bio *bio = io->io_bio;
if (bio) {
+ int io_op = io->io_wbc->sync_mode == WB_SYNC_ALL ?
+ WRITE_SYNC : WRITE;
bio_get(io->io_bio);
- submit_bio(io->io_op, io->io_bio);
+ submit_bio(io_op, io->io_bio);
bio_put(io->io_bio);
}
io->io_bio = NULL;
@@ -367,7 +366,7 @@ void ext4_io_submit(struct ext4_io_submit *io)
void ext4_io_submit_init(struct ext4_io_submit *io,
struct writeback_control *wbc)
{
- io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
+ io->io_wbc = wbc;
io->io_bio = NULL;
io->io_end = NULL;
}
@@ -375,12 +374,12 @@ void ext4_io_submit_init(struct ext4_io_submit *io,
static int io_submit_init_bio(struct ext4_io_submit *io,
struct buffer_head *bh)
{
- int nvecs = bio_get_nr_vecs(bh->b_bdev);
struct bio *bio;
- bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
+ bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
if (!bio)
return -ENOMEM;
+ wbc_init_bio(io->io_wbc, bio);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
bio->bi_end_io = ext4_end_bio;
@@ -409,6 +408,7 @@ submit_and_retry:
ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
goto submit_and_retry;
+ wbc_account_io(io->io_wbc, page, bh->b_size);
io->io_next_block++;
return 0;
}
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index ec3ef93a52db..e26803fb210d 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -98,7 +98,7 @@ static inline bool ext4_bio_encrypted(struct bio *bio)
* status of that page is hard. See end_buffer_async_read() for the details.
* There is no point in duplicating all that complexity.
*/
-static void mpage_end_io(struct bio *bio, int err)
+static void mpage_end_io(struct bio *bio)
{
struct bio_vec *bv;
int i;
@@ -106,7 +106,7 @@ static void mpage_end_io(struct bio *bio, int err)
if (ext4_bio_encrypted(bio)) {
struct ext4_crypto_ctx *ctx = bio->bi_private;
- if (err) {
+ if (bio->bi_error) {
ext4_release_crypto_ctx(ctx);
} else {
INIT_WORK(&ctx->r.work, completion_pages);
@@ -118,7 +118,7 @@ static void mpage_end_io(struct bio *bio, int err)
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
- if (!err) {
+ if (!bio->bi_error) {
SetPageUptodate(page);
} else {
ClearPageUptodate(page);
@@ -284,7 +284,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
goto set_error_page;
}
bio = bio_alloc(GFP_KERNEL,
- min_t(int, nr_pages, bio_get_nr_vecs(bdev)));
+ min_t(int, nr_pages, BIO_MAX_PAGES));
if (!bio) {
if (ctx)
ext4_release_crypto_ctx(ctx);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 58987b5c514b..a63c7b0a10cf 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -60,6 +60,7 @@ static struct ext4_lazy_init *ext4_li_info;
static struct mutex ext4_li_mtx;
static struct ext4_features *ext4_feat;
static int ext4_mballoc_ready;
+static struct ratelimit_state ext4_mount_msg_ratelimit;
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
@@ -84,7 +85,7 @@ static void ext4_unregister_li_request(struct super_block *sb);
static void ext4_clear_request_list(void);
static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t);
-#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
+#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
static struct file_system_type ext2_fs_type = {
.owner = THIS_MODULE,
.name = "ext2",
@@ -100,7 +101,6 @@ MODULE_ALIAS("ext2");
#endif
-#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static struct file_system_type ext3_fs_type = {
.owner = THIS_MODULE,
.name = "ext3",
@@ -111,9 +111,6 @@ static struct file_system_type ext3_fs_type = {
MODULE_ALIAS_FS("ext3");
MODULE_ALIAS("ext3");
#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
-#else
-#define IS_EXT3_SB(sb) (0)
-#endif
static int ext4_verify_csum_type(struct super_block *sb,
struct ext4_super_block *es)
@@ -325,6 +322,22 @@ static void save_error_info(struct super_block *sb, const char *func,
ext4_commit_super(sb, 1);
}
+/*
+ * The del_gendisk() function uninitializes the disk-specific data
+ * structures, including the bdi structure, without telling anyone
+ * else. Once this happens, any attempt to call mark_buffer_dirty()
+ * (for example, by ext4_commit_super), will cause a kernel OOPS.
+ * This is a kludge to prevent these oops until we can put in a proper
+ * hook in del_gendisk() to inform the VFS and file system layers.
+ */
+static int block_device_ejected(struct super_block *sb)
+{
+ struct inode *bd_inode = sb->s_bdev->bd_inode;
+ struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
+
+ return bdi->dev == NULL;
+}
+
static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
{
struct super_block *sb = journal->j_private;
@@ -1394,9 +1407,9 @@ static const struct mount_opts {
{Opt_stripe, 0, MOPT_GTE0},
{Opt_resuid, 0, MOPT_GTE0},
{Opt_resgid, 0, MOPT_GTE0},
- {Opt_journal_dev, 0, MOPT_GTE0},
- {Opt_journal_path, 0, MOPT_STRING},
- {Opt_journal_ioprio, 0, MOPT_GTE0},
+ {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0},
+ {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING},
+ {Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0},
{Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
{Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
{Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
@@ -1763,10 +1776,10 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
}
if (sbi->s_qf_names[USRQUOTA])
- seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
+ seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]);
if (sbi->s_qf_names[GRPQUOTA])
- seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
+ seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]);
#endif
}
@@ -3643,6 +3656,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
if (test_opt(sb, DELALLOC))
clear_opt(sb, DELALLOC);
+ } else {
+ sb->s_iflags |= SB_I_CGROUPWB;
}
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -4275,9 +4290,10 @@ no_journal:
"the device does not support discard");
}
- ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
- "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
- *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
+ if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
+ ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
+ "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
+ *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
if (es->s_error_count)
mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
@@ -4617,7 +4633,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
int error = 0;
- if (!sbh)
+ if (!sbh || block_device_ejected(sb))
return error;
if (buffer_write_io_error(sbh)) {
/*
@@ -4665,7 +4681,8 @@ static int ext4_commit_super(struct super_block *sb, int sync)
ext4_superblock_csum_set(sb);
mark_buffer_dirty(sbh);
if (sync) {
- error = sync_dirty_buffer(sbh);
+ error = __sync_dirty_buffer(sbh,
+ test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC);
if (error)
return error;
@@ -4833,10 +4850,11 @@ static int ext4_freeze(struct super_block *sb)
error = jbd2_journal_flush(journal);
if (error < 0)
goto out;
+
+ /* Journal blocked and flushed, clear needs_recovery flag. */
+ EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
}
- /* Journal blocked and flushed, clear needs_recovery flag. */
- EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
error = ext4_commit_super(sb, 1);
out:
if (journal)
@@ -4854,8 +4872,11 @@ static int ext4_unfreeze(struct super_block *sb)
if (sb->s_flags & MS_RDONLY)
return 0;
- /* Reset the needs_recovery flag before the fs is unlocked. */
- EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+ if (EXT4_SB(sb)->s_journal) {
+ /* Reset the needs_recovery flag before the fs is unlocked. */
+ EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+ }
+
ext4_commit_super(sb, 1);
return 0;
}
@@ -5500,7 +5521,7 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
}
-#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
+#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
static inline void register_as_ext2(void)
{
int err = register_filesystem(&ext2_fs_type);
@@ -5530,7 +5551,6 @@ static inline void unregister_as_ext2(void) { }
static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
#endif
-#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static inline void register_as_ext3(void)
{
int err = register_filesystem(&ext3_fs_type);
@@ -5556,11 +5576,6 @@ static inline int ext3_feature_set_ok(struct super_block *sb)
return 0;
return 1;
}
-#else
-static inline void register_as_ext3(void) { }
-static inline void unregister_as_ext3(void) { }
-static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; }
-#endif
static struct file_system_type ext4_fs_type = {
.owner = THIS_MODULE,
@@ -5610,6 +5625,7 @@ static int __init ext4_init_fs(void)
{
int i, err;
+ ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
ext4_li_info = NULL;
mutex_init(&ext4_li_mtx);