summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c2
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/ctree.c14
-rw-r--r--fs/btrfs/ctree.h141
-rw-r--r--fs/btrfs/dev-replace.c2
-rw-r--r--fs/btrfs/disk-io.c88
-rw-r--r--fs/btrfs/extent-tree.c112
-rw-r--r--fs/btrfs/extent_io.c43
-rw-r--r--fs/btrfs/extent_io.h19
-rw-r--r--fs/btrfs/extent_map.c2
-rw-r--r--fs/btrfs/extent_map.h10
-rw-r--r--fs/btrfs/scrub.c2
-rw-r--r--fs/btrfs/send.c11
-rw-r--r--fs/btrfs/struct-funcs.c9
-rw-r--r--fs/btrfs/tree-checker.c649
-rw-r--r--fs/btrfs/tree-checker.h38
-rw-r--r--fs/btrfs/volumes.c139
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/cachefiles/rdwr.c3
-rw-r--r--fs/ceph/caps.c1
-rw-r--r--fs/cifs/connect.c53
-rw-r--r--fs/cifs/file.c16
-rw-r--r--fs/cifs/readdir.c9
-rw-r--r--fs/cifs/smb2file.c8
-rw-r--r--fs/cifs/smb2maperror.c4
-rw-r--r--fs/cifs/smb2ops.c6
-rw-r--r--fs/cifs/smb2pdu.c4
-rw-r--r--fs/cifs/transport.c2
-rw-r--r--fs/dcache.c6
-rw-r--r--fs/debugfs/inode.c7
-rw-r--r--fs/dlm/ast.c10
-rw-r--r--fs/dlm/lock.c17
-rw-r--r--fs/dlm/lockspace.c2
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/exec.c9
-rw-r--r--fs/exportfs/expfs.c2
-rw-r--r--fs/ext4/inline.c11
-rw-r--r--fs/ext4/resize.c2
-rw-r--r--fs/ext4/super.c13
-rw-r--r--fs/f2fs/acl.c20
-rw-r--r--fs/f2fs/checkpoint.c33
-rw-r--r--fs/f2fs/data.c206
-rw-r--r--fs/f2fs/debug.c31
-rw-r--r--fs/f2fs/dir.c20
-rw-r--r--fs/f2fs/f2fs.h116
-rw-r--r--fs/f2fs/file.c47
-rw-r--r--fs/f2fs/gc.c81
-rw-r--r--fs/f2fs/inline.c20
-rw-r--r--fs/f2fs/inode.c22
-rw-r--r--fs/f2fs/namei.c8
-rw-r--r--fs/f2fs/node.c40
-rw-r--r--fs/f2fs/node.h2
-rw-r--r--fs/f2fs/recovery.c4
-rw-r--r--fs/f2fs/segment.c101
-rw-r--r--fs/f2fs/segment.h2
-rw-r--r--fs/f2fs/shrinker.c2
-rw-r--r--fs/f2fs/super.c175
-rw-r--r--fs/f2fs/sysfs.c27
-rw-r--r--fs/f2fs/xattr.c22
-rw-r--r--fs/fscache/object.c3
-rw-r--r--fs/fuse/dev.c5
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/hfs/btree.c3
-rw-r--r--fs/hfsplus/btree.c3
-rw-r--r--fs/hugetlbfs/inode.c30
-rw-r--r--fs/jffs2/super.c3
-rw-r--r--fs/nfs/super.c3
-rw-r--r--fs/nfsd/nfsctl.c2
-rw-r--r--fs/ocfs2/buffer_head_io.c2
-rw-r--r--fs/ocfs2/export.c2
-rw-r--r--fs/ocfs2/localalloc.c9
-rw-r--r--fs/ocfs2/move_extents.c47
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c19
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--fs/pstore/platform.c4
-rw-r--r--fs/pstore/ram_core.c5
-rw-r--r--fs/read_write.c4
-rw-r--r--fs/squashfs/Kconfig28
-rw-r--r--fs/squashfs/Makefile3
-rw-r--r--fs/squashfs/block.c549
-rw-r--r--fs/squashfs/cache.c73
-rw-r--r--fs/squashfs/decompressor.c55
-rw-r--r--fs/squashfs/file.c140
-rw-r--r--fs/squashfs/file_cache.c38
-rw-r--r--fs/squashfs/file_direct.c245
-rw-r--r--fs/squashfs/lz4_wrapper.c32
-rw-r--r--fs/squashfs/lzo_wrapper.c40
-rw-r--r--fs/squashfs/page_actor.c175
-rw-r--r--fs/squashfs/page_actor.h84
-rw-r--r--fs/squashfs/squashfs.h11
-rw-r--r--fs/squashfs/squashfs_fs_sb.h2
-rw-r--r--fs/squashfs/super.c7
-rw-r--r--fs/squashfs/xz_wrapper.c15
-rw-r--r--fs/squashfs/zlib_wrapper.c14
-rw-r--r--fs/super.c30
-rw-r--r--fs/sysv/inode.c2
-rw-r--r--fs/timerfd.c32
-rw-r--r--fs/udf/inode.c6
-rw-r--r--fs/userfaultfd.c14
-rw-r--r--fs/xfs/libxfs/xfs_attr.c9
101 files changed, 2631 insertions, 1574 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 3fe07571f942..e8810a263bf3 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -40,6 +40,7 @@
#include <linux/ramfs.h>
#include <linux/percpu-refcount.h>
#include <linux/mount.h>
+#include <linux/nospec.h>
#include <asm/kmap_types.h>
#include <asm/uaccess.h>
@@ -1064,6 +1065,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
if (!table || id >= table->nr)
goto out;
+ id = array_index_nospec(id, table->nr);
ctx = rcu_dereference(table->table[id]);
if (ctx && ctx->user_id == ctx_id) {
if (percpu_ref_tryget_live(&ctx->users))
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 6d1d0b93b1aa..c792df826e12 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -9,7 +9,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
- uuid-tree.o props.o hash.o
+ uuid-tree.o props.o hash.o tree-checker.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 38ee08675468..8f4baa3cb992 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1726,20 +1726,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
return err;
}
-/*
- * The leaf data grows from end-to-front in the node.
- * this returns the address of the start of the last item,
- * which is the stop of the leaf data stack
- */
-static inline unsigned int leaf_data_end(struct btrfs_root *root,
- struct extent_buffer *leaf)
-{
- u32 nr = btrfs_header_nritems(leaf);
- if (nr == 0)
- return BTRFS_LEAF_DATA_SIZE(root);
- return btrfs_item_offset_nr(leaf, nr - 1);
-}
-
/*
* search for key in the extent_buffer. The items start at offset p,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e847573c6db0..4a91d3119e59 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -35,6 +35,7 @@
#include <linux/btrfs.h>
#include <linux/workqueue.h>
#include <linux/security.h>
+#include <linux/sizes.h>
#include "extent_io.h"
#include "extent_map.h"
#include "async-thread.h"
@@ -897,6 +898,7 @@ struct btrfs_balance_item {
#define BTRFS_FILE_EXTENT_INLINE 0
#define BTRFS_FILE_EXTENT_REG 1
#define BTRFS_FILE_EXTENT_PREALLOC 2
+#define BTRFS_FILE_EXTENT_TYPES 2
struct btrfs_file_extent_item {
/*
@@ -2283,7 +2285,7 @@ do { \
#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31)
struct btrfs_map_token {
- struct extent_buffer *eb;
+ const struct extent_buffer *eb;
char *kaddr;
unsigned long offset;
};
@@ -2314,18 +2316,19 @@ static inline void btrfs_init_map_token (struct btrfs_map_token *token)
sizeof(((type *)0)->member)))
#define DECLARE_BTRFS_SETGET_BITS(bits) \
-u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \
- unsigned long off, \
- struct btrfs_map_token *token); \
-void btrfs_set_token_##bits(struct extent_buffer *eb, void *ptr, \
+u##bits btrfs_get_token_##bits(const struct extent_buffer *eb, \
+ const void *ptr, unsigned long off, \
+ struct btrfs_map_token *token); \
+void btrfs_set_token_##bits(struct extent_buffer *eb, const void *ptr, \
unsigned long off, u##bits val, \
struct btrfs_map_token *token); \
-static inline u##bits btrfs_get_##bits(struct extent_buffer *eb, void *ptr, \
+static inline u##bits btrfs_get_##bits(const struct extent_buffer *eb, \
+ const void *ptr, \
unsigned long off) \
{ \
return btrfs_get_token_##bits(eb, ptr, off, NULL); \
} \
-static inline void btrfs_set_##bits(struct extent_buffer *eb, void *ptr, \
+static inline void btrfs_set_##bits(struct extent_buffer *eb, void *ptr,\
unsigned long off, u##bits val) \
{ \
btrfs_set_token_##bits(eb, ptr, off, val, NULL); \
@@ -2337,7 +2340,8 @@ DECLARE_BTRFS_SETGET_BITS(32)
DECLARE_BTRFS_SETGET_BITS(64)
#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
-static inline u##bits btrfs_##name(struct extent_buffer *eb, type *s) \
+static inline u##bits btrfs_##name(const struct extent_buffer *eb, \
+ const type *s) \
{ \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
return btrfs_get_##bits(eb, s, offsetof(type, member)); \
@@ -2348,7 +2352,8 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, type *s, \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
btrfs_set_##bits(eb, s, offsetof(type, member), val); \
} \
-static inline u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, \
+static inline u##bits btrfs_token_##name(const struct extent_buffer *eb,\
+ const type *s, \
struct btrfs_map_token *token) \
{ \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
@@ -2363,9 +2368,9 @@ static inline void btrfs_set_token_##name(struct extent_buffer *eb, \
}
#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
-static inline u##bits btrfs_##name(struct extent_buffer *eb) \
+static inline u##bits btrfs_##name(const struct extent_buffer *eb) \
{ \
- type *p = page_address(eb->pages[0]); \
+ const type *p = page_address(eb->pages[0]); \
u##bits res = le##bits##_to_cpu(p->member); \
return res; \
} \
@@ -2377,7 +2382,7 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, \
}
#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \
-static inline u##bits btrfs_##name(type *s) \
+static inline u##bits btrfs_##name(const type *s) \
{ \
return le##bits##_to_cpu(s->member); \
} \
@@ -2678,7 +2683,7 @@ static inline unsigned long btrfs_node_key_ptr_offset(int nr)
sizeof(struct btrfs_key_ptr) * nr;
}
-void btrfs_node_key(struct extent_buffer *eb,
+void btrfs_node_key(const struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr);
static inline void btrfs_set_node_key(struct extent_buffer *eb,
@@ -2707,28 +2712,28 @@ static inline struct btrfs_item *btrfs_item_nr(int nr)
return (struct btrfs_item *)btrfs_item_nr_offset(nr);
}
-static inline u32 btrfs_item_end(struct extent_buffer *eb,
+static inline u32 btrfs_item_end(const struct extent_buffer *eb,
struct btrfs_item *item)
{
return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item);
}
-static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr)
+static inline u32 btrfs_item_end_nr(const struct extent_buffer *eb, int nr)
{
return btrfs_item_end(eb, btrfs_item_nr(nr));
}
-static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr)
+static inline u32 btrfs_item_offset_nr(const struct extent_buffer *eb, int nr)
{
return btrfs_item_offset(eb, btrfs_item_nr(nr));
}
-static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr)
+static inline u32 btrfs_item_size_nr(const struct extent_buffer *eb, int nr)
{
return btrfs_item_size(eb, btrfs_item_nr(nr));
}
-static inline void btrfs_item_key(struct extent_buffer *eb,
+static inline void btrfs_item_key(const struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr)
{
struct btrfs_item *item = btrfs_item_nr(nr);
@@ -2764,8 +2769,8 @@ BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item,
BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item,
transid, 64);
-static inline void btrfs_dir_item_key(struct extent_buffer *eb,
- struct btrfs_dir_item *item,
+static inline void btrfs_dir_item_key(const struct extent_buffer *eb,
+ const struct btrfs_dir_item *item,
struct btrfs_disk_key *key)
{
read_eb_member(eb, item, struct btrfs_dir_item, location, key);
@@ -2773,7 +2778,7 @@ static inline void btrfs_dir_item_key(struct extent_buffer *eb,
static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
struct btrfs_dir_item *item,
- struct btrfs_disk_key *key)
+ const struct btrfs_disk_key *key)
{
write_eb_member(eb, item, struct btrfs_dir_item, location, key);
}
@@ -2785,8 +2790,8 @@ BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
generation, 64);
-static inline void btrfs_free_space_key(struct extent_buffer *eb,
- struct btrfs_free_space_header *h,
+static inline void btrfs_free_space_key(const struct extent_buffer *eb,
+ const struct btrfs_free_space_header *h,
struct btrfs_disk_key *key)
{
read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
@@ -2794,7 +2799,7 @@ static inline void btrfs_free_space_key(struct extent_buffer *eb,
static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
struct btrfs_free_space_header *h,
- struct btrfs_disk_key *key)
+ const struct btrfs_disk_key *key)
{
write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
}
@@ -2821,25 +2826,25 @@ static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk,
disk->objectid = cpu_to_le64(cpu->objectid);
}
-static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb,
- struct btrfs_key *key, int nr)
+static inline void btrfs_node_key_to_cpu(const struct extent_buffer *eb,
+ struct btrfs_key *key, int nr)
{
struct btrfs_disk_key disk_key;
btrfs_node_key(eb, &disk_key, nr);
btrfs_disk_key_to_cpu(key, &disk_key);
}
-static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb,
- struct btrfs_key *key, int nr)
+static inline void btrfs_item_key_to_cpu(const struct extent_buffer *eb,
+ struct btrfs_key *key, int nr)
{
struct btrfs_disk_key disk_key;
btrfs_item_key(eb, &disk_key, nr);
btrfs_disk_key_to_cpu(key, &disk_key);
}
-static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb,
- struct btrfs_dir_item *item,
- struct btrfs_key *key)
+static inline void btrfs_dir_item_key_to_cpu(const struct extent_buffer *eb,
+ const struct btrfs_dir_item *item,
+ struct btrfs_key *key)
{
struct btrfs_disk_key disk_key;
btrfs_dir_item_key(eb, item, &disk_key);
@@ -2872,7 +2877,7 @@ BTRFS_SETGET_STACK_FUNCS(stack_header_nritems, struct btrfs_header,
nritems, 32);
BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64);
-static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag)
+static inline int btrfs_header_flag(const struct extent_buffer *eb, u64 flag)
{
return (btrfs_header_flags(eb) & flag) == flag;
}
@@ -2891,7 +2896,7 @@ static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
return (flags & flag) == flag;
}
-static inline int btrfs_header_backref_rev(struct extent_buffer *eb)
+static inline int btrfs_header_backref_rev(const struct extent_buffer *eb)
{
u64 flags = btrfs_header_flags(eb);
return flags >> BTRFS_BACKREF_REV_SHIFT;
@@ -2911,12 +2916,12 @@ static inline unsigned long btrfs_header_fsid(void)
return offsetof(struct btrfs_header, fsid);
}
-static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
+static inline unsigned long btrfs_header_chunk_tree_uuid(const struct extent_buffer *eb)
{
return offsetof(struct btrfs_header, chunk_tree_uuid);
}
-static inline int btrfs_is_leaf(struct extent_buffer *eb)
+static inline int btrfs_is_leaf(const struct extent_buffer *eb)
{
return btrfs_header_level(eb) == 0;
}
@@ -2950,12 +2955,12 @@ BTRFS_SETGET_STACK_FUNCS(root_stransid, struct btrfs_root_item,
BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item,
rtransid, 64);
-static inline bool btrfs_root_readonly(struct btrfs_root *root)
+static inline bool btrfs_root_readonly(const struct btrfs_root *root)
{
return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0;
}
-static inline bool btrfs_root_dead(struct btrfs_root *root)
+static inline bool btrfs_root_dead(const struct btrfs_root *root)
{
return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0;
}
@@ -3012,51 +3017,51 @@ BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
/* struct btrfs_balance_item */
BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64);
-static inline void btrfs_balance_data(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
+static inline void btrfs_balance_data(const struct extent_buffer *eb,
+ const struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
read_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
}
static inline void btrfs_set_balance_data(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
- struct btrfs_disk_balance_args *ba)
+ struct btrfs_balance_item *bi,
+ const struct btrfs_disk_balance_args *ba)
{
write_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
}
-static inline void btrfs_balance_meta(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
+static inline void btrfs_balance_meta(const struct extent_buffer *eb,
+ const struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
read_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
}
static inline void btrfs_set_balance_meta(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
- struct btrfs_disk_balance_args *ba)
+ struct btrfs_balance_item *bi,
+ const struct btrfs_disk_balance_args *ba)
{
write_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
}
-static inline void btrfs_balance_sys(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
+static inline void btrfs_balance_sys(const struct extent_buffer *eb,
+ const struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
read_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
}
static inline void btrfs_set_balance_sys(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
- struct btrfs_disk_balance_args *ba)
+ struct btrfs_balance_item *bi,
+ const struct btrfs_disk_balance_args *ba)
{
write_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
}
static inline void
btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
- struct btrfs_disk_balance_args *disk)
+ const struct btrfs_disk_balance_args *disk)
{
memset(cpu, 0, sizeof(*cpu));
@@ -3076,7 +3081,7 @@ btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
static inline void
btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk,
- struct btrfs_balance_args *cpu)
+ const struct btrfs_balance_args *cpu)
{
memset(disk, 0, sizeof(*disk));
@@ -3144,7 +3149,7 @@ BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64);
BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
uuid_tree_generation, 64);
-static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
+static inline int btrfs_super_csum_size(const struct btrfs_super_block *s)
{
u16 t = btrfs_super_csum_type(s);
/*
@@ -3158,6 +3163,21 @@ static inline unsigned long btrfs_leaf_data(struct extent_buffer *l)
return offsetof(struct btrfs_leaf, items);
}
+/*
+ * The leaf data grows from end-to-front in the node.
+ * this returns the address of the start of the last item,
+ * which is the stop of the leaf data stack
+ */
+static inline unsigned int leaf_data_end(const struct btrfs_root *root,
+ const struct extent_buffer *leaf)
+{
+ u32 nr = btrfs_header_nritems(leaf);
+
+ if (nr == 0)
+ return BTRFS_LEAF_DATA_SIZE(root);
+ return btrfs_item_offset_nr(leaf, nr - 1);
+}
+
/* struct btrfs_file_extent_item */
BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr,
@@ -3174,7 +3194,7 @@ BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression,
struct btrfs_file_extent_item, compression, 8);
static inline unsigned long
-btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e)
+btrfs_file_extent_inline_start(const struct btrfs_file_extent_item *e)
{
return (unsigned long)e + BTRFS_FILE_EXTENT_INLINE_DATA_START;
}
@@ -3208,8 +3228,9 @@ BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item,
* size of any extent headers. If a file is compressed on disk, this is
* the compressed size
*/
-static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
- struct btrfs_item *e)
+static inline u32 btrfs_file_extent_inline_item_len(
+ const struct extent_buffer *eb,
+ struct btrfs_item *e)
{
return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START;
}
@@ -3217,9 +3238,9 @@ static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
/* this returns the number of file bytes represented by the inline item.
* If an item is compressed, this is the uncompressed size
*/
-static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
- int slot,
- struct btrfs_file_extent_item *fi)
+static inline u32 btrfs_file_extent_inline_len(const struct extent_buffer *eb,
+ int slot,
+ const struct btrfs_file_extent_item *fi)
{
struct btrfs_map_token token;
@@ -3241,8 +3262,8 @@ static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
/* btrfs_dev_stats_item */
-static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb,
- struct btrfs_dev_stats_item *ptr,
+static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
+ const struct btrfs_dev_stats_item *ptr,
int index)
{
u64 val;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 176a27bc63aa..81e5bc62e8e3 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -620,7 +620,7 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
em = lookup_extent_mapping(em_tree, start, (u64)-1);
if (!em)
break;
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++)
if (srcdev == map->stripes[i].dev)
map->stripes[i].dev = tgtdev;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1f21c6c33228..78722aaffecd 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -49,6 +49,7 @@
#include "raid56.h"
#include "sysfs.h"
#include "qgroup.h"
+#include "tree-checker.h"
#ifdef CONFIG_X86
#include <asm/cpufeature.h>
@@ -522,72 +523,6 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
return ret;
}
-#define CORRUPT(reason, eb, root, slot) \
- btrfs_crit(root->fs_info, "corrupt leaf, %s: block=%llu," \
- "root=%llu, slot=%d", reason, \
- btrfs_header_bytenr(eb), root->objectid, slot)
-
-static noinline int check_leaf(struct btrfs_root *root,
- struct extent_buffer *leaf)
-{
- struct btrfs_key key;
- struct btrfs_key leaf_key;
- u32 nritems = btrfs_header_nritems(leaf);
- int slot;
-
- if (nritems == 0)
- return 0;
-
- /* Check the 0 item */
- if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
- BTRFS_LEAF_DATA_SIZE(root)) {
- CORRUPT("invalid item offset size pair", leaf, root, 0);
- return -EIO;
- }
-
- /*
- * Check to make sure each items keys are in the correct order and their
- * offsets make sense. We only have to loop through nritems-1 because
- * we check the current slot against the next slot, which verifies the
- * next slot's offset+size makes sense and that the current's slot
- * offset is correct.
- */
- for (slot = 0; slot < nritems - 1; slot++) {
- btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
- btrfs_item_key_to_cpu(leaf, &key, slot + 1);
-
- /* Make sure the keys are in the right order */
- if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
- CORRUPT("bad key order", leaf, root, slot);
- return -EIO;
- }
-
- /*
- * Make sure the offset and ends are right, remember that the
- * item data starts at the end of the leaf and grows towards the
- * front.
- */
- if (btrfs_item_offset_nr(leaf, slot) !=
- btrfs_item_end_nr(leaf, slot + 1)) {
- CORRUPT("slot offset bad", leaf, root, slot);
- return -EIO;
- }
-
- /*
- * Check to make sure that we don't point outside of the leaf,
- * just incase all the items are consistent to eachother, but
- * all point outside of the leaf.
- */
- if (btrfs_item_end_nr(leaf, slot) >
- BTRFS_LEAF_DATA_SIZE(root)) {
- CORRUPT("slot end outside of leaf", leaf, root, slot);
- return -EIO;
- }
- }
-
- return 0;
-}
-
static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
u64 phy_offset, struct page *page,
u64 start, u64 end, int mirror)
@@ -654,11 +589,14 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
* that we don't try and read the other copies of this block, just
* return -EIO.
*/
- if (found_level == 0 && check_leaf(root, eb)) {
+ if (found_level == 0 && btrfs_check_leaf_full(root, eb)) {
set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = -EIO;
}
+ if (found_level > 0 && btrfs_check_node(root, eb))
+ ret = -EIO;
+
if (!ret)
set_extent_buffer_uptodate(eb);
err:
@@ -3958,7 +3896,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
buf->len,
root->fs_info->dirty_metadata_batch);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
- if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) {
+ /*
+ * Since btrfs_mark_buffer_dirty() can be called with item pointer set
+ * but item data not updated.
+ * So here we should only check item pointers, not item data.
+ */
+ if (btrfs_header_level(buf) == 0 &&
+ btrfs_check_leaf_relaxed(root, buf)) {
btrfs_print_leaf(root, buf);
ASSERT(0);
}
@@ -4167,6 +4111,14 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
spin_lock(&fs_info->ordered_root_lock);
}
spin_unlock(&fs_info->ordered_root_lock);
+
+ /*
+ * We need this here because if we've been flipped read-only we won't
+ * get sync() from the umount, so we need to make sure any ordered
+ * extents that haven't had their dirty pages IO start writeout yet
+ * actually get run and error out properly.
+ */
+ btrfs_wait_ordered_roots(fs_info, -1);
}
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 13ff0fdae03e..978bbfed5a2c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2342,7 +2342,13 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
ins.type = BTRFS_EXTENT_ITEM_KEY;
}
- BUG_ON(node->ref_mod != 1);
+ if (node->ref_mod != 1) {
+ btrfs_err(root->fs_info,
+ "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
+ node->bytenr, node->ref_mod, node->action, ref_root,
+ parent);
+ return -EIO;
+ }
if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
BUG_ON(!extent_op || !extent_op->update_flags);
ret = alloc_reserved_tree_block(trans, root,
@@ -9481,6 +9487,8 @@ static int find_first_block_group(struct btrfs_root *root,
int ret = 0;
struct btrfs_key found_key;
struct extent_buffer *leaf;
+ struct btrfs_block_group_item bg;
+ u64 flags;
int slot;
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
@@ -9502,7 +9510,47 @@ static int find_first_block_group(struct btrfs_root *root,
if (found_key.objectid >= key->objectid &&
found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
- ret = 0;
+ struct extent_map_tree *em_tree;
+ struct extent_map *em;
+
+ em_tree = &root->fs_info->mapping_tree.map_tree;
+ read_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, found_key.objectid,
+ found_key.offset);
+ read_unlock(&em_tree->lock);
+ if (!em) {
+ btrfs_err(root->fs_info,
+ "logical %llu len %llu found bg but no related chunk",
+ found_key.objectid, found_key.offset);
+ ret = -ENOENT;
+ } else if (em->start != found_key.objectid ||
+ em->len != found_key.offset) {
+ btrfs_err(root->fs_info,
+ "block group %llu len %llu mismatch with chunk %llu len %llu",
+ found_key.objectid, found_key.offset,
+ em->start, em->len);
+ ret = -EUCLEAN;
+ } else {
+ read_extent_buffer(leaf, &bg,
+ btrfs_item_ptr_offset(leaf, slot),
+ sizeof(bg));
+ flags = btrfs_block_group_flags(&bg) &
+ BTRFS_BLOCK_GROUP_TYPE_MASK;
+
+ if (flags != (em->map_lookup->type &
+ BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+ btrfs_err(root->fs_info,
+"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
+ found_key.objectid,
+ found_key.offset, flags,
+ (BTRFS_BLOCK_GROUP_TYPE_MASK &
+ em->map_lookup->type));
+ ret = -EUCLEAN;
+ } else {
+ ret = 0;
+ }
+ }
+ free_extent_map(em);
goto out;
}
path->slots[0]++;
@@ -9717,6 +9765,62 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
return cache;
}
+
+/*
+ * Iterate all chunks and verify that each of them has the corresponding block
+ * group
+ */
+static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+ struct extent_map *em;
+ struct btrfs_block_group_cache *bg;
+ u64 start = 0;
+ int ret = 0;
+
+ while (1) {
+ read_lock(&map_tree->map_tree.lock);
+ /*
+ * lookup_extent_mapping will return the first extent map
+ * intersecting the range, so setting @len to 1 is enough to
+ * get the first chunk.
+ */
+ em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
+ read_unlock(&map_tree->map_tree.lock);
+ if (!em)
+ break;
+
+ bg = btrfs_lookup_block_group(fs_info, em->start);
+ if (!bg) {
+ btrfs_err(fs_info,
+ "chunk start=%llu len=%llu doesn't have corresponding block group",
+ em->start, em->len);
+ ret = -EUCLEAN;
+ free_extent_map(em);
+ break;
+ }
+ if (bg->key.objectid != em->start ||
+ bg->key.offset != em->len ||
+ (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
+ (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+ btrfs_err(fs_info,
+"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
+ em->start, em->len,
+ em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
+ bg->key.objectid, bg->key.offset,
+ bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
+ ret = -EUCLEAN;
+ free_extent_map(em);
+ btrfs_put_block_group(bg);
+ break;
+ }
+ start = em->start + em->len;
+ free_extent_map(em);
+ btrfs_put_block_group(bg);
+ }
+ return ret;
+}
+
int btrfs_read_block_groups(struct btrfs_root *root)
{
struct btrfs_path *path;
@@ -9903,7 +10007,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
}
init_global_block_rsv(info);
- ret = 0;
+ ret = check_chunk_block_group_mappings(info);
error:
btrfs_free_path(path);
return ret;
@@ -10388,7 +10492,7 @@ btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
* more device items and remove one chunk item), but this is done at
* btrfs_remove_chunk() through a call to check_system_chunk().
*/
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
num_items = 3 + map->num_stripes;
free_extent_map(em);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 7de8d545f4d6..573c9d62cfc9 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3847,8 +3847,10 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
struct block_device *bdev = fs_info->fs_devices->latest_bdev;
struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
u64 offset = eb->start;
+ u32 nritems;
unsigned long i, num_pages;
unsigned long bio_flags = 0;
+ unsigned long start, end;
int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META;
int ret = 0;
@@ -3858,6 +3860,23 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)
bio_flags = EXTENT_BIO_TREE_LOG;
+ /* set btree blocks beyond nritems with 0 to avoid stale content. */
+ nritems = btrfs_header_nritems(eb);
+ if (btrfs_header_level(eb) > 0) {
+ end = btrfs_node_key_ptr_offset(nritems);
+
+ memset_extent_buffer(eb, 0, end, eb->len - end);
+ } else {
+ /*
+ * leaf:
+ * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0
+ */
+ start = btrfs_item_nr_offset(nritems);
+ end = btrfs_leaf_data(eb) +
+ leaf_data_end(fs_info->tree_root, eb);
+ memset_extent_buffer(eb, 0, start, end - start);
+ }
+
for (i = 0; i < num_pages; i++) {
struct page *p = eb->pages[i];
@@ -5351,9 +5370,8 @@ unlock_exit:
return ret;
}
-void read_extent_buffer(struct extent_buffer *eb, void *dstv,
- unsigned long start,
- unsigned long len)
+void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
+ unsigned long start, unsigned long len)
{
size_t cur;
size_t offset;
@@ -5382,9 +5400,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
}
}
-int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
- unsigned long start,
- unsigned long len)
+int read_extent_buffer_to_user(const struct extent_buffer *eb,
+ void __user *dstv,
+ unsigned long start, unsigned long len)
{
size_t cur;
size_t offset;
@@ -5419,10 +5437,10 @@ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
return ret;
}
-int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
- unsigned long min_len, char **map,
- unsigned long *map_start,
- unsigned long *map_len)
+int map_private_extent_buffer(const struct extent_buffer *eb,
+ unsigned long start, unsigned long min_len,
+ char **map, unsigned long *map_start,
+ unsigned long *map_len)
{
size_t offset = start & (PAGE_CACHE_SIZE - 1);
char *kaddr;
@@ -5457,9 +5475,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
return 0;
}
-int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
- unsigned long start,
- unsigned long len)
+int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
+ unsigned long start, unsigned long len)
{
size_t cur;
size_t offset;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index f4c1ae11855f..751435967724 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -308,14 +308,13 @@ static inline void extent_buffer_get(struct extent_buffer *eb)
atomic_inc(&eb->refs);
}
-int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
- unsigned long start,
- unsigned long len);
-void read_extent_buffer(struct extent_buffer *eb, void *dst,
+int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
+ unsigned long start, unsigned long len);
+void read_extent_buffer(const struct extent_buffer *eb, void *dst,
unsigned long start,
unsigned long len);
-int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst,
- unsigned long start,
+int read_extent_buffer_to_user(const struct extent_buffer *eb,
+ void __user *dst, unsigned long start,
unsigned long len);
void write_extent_buffer(struct extent_buffer *eb, const void *src,
unsigned long start, unsigned long len);
@@ -334,10 +333,10 @@ int set_extent_buffer_uptodate(struct extent_buffer *eb);
int clear_extent_buffer_uptodate(struct extent_buffer *eb);
int extent_buffer_uptodate(struct extent_buffer *eb);
int extent_buffer_under_io(struct extent_buffer *eb);
-int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
- unsigned long min_len, char **map,
- unsigned long *map_start,
- unsigned long *map_len);
+int map_private_extent_buffer(const struct extent_buffer *eb,
+ unsigned long offset, unsigned long min_len,
+ char **map, unsigned long *map_start,
+ unsigned long *map_len);
int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 6a98bddd8f33..84fb56d5c018 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -76,7 +76,7 @@ void free_extent_map(struct extent_map *em)
WARN_ON(extent_map_in_tree(em));
WARN_ON(!list_empty(&em->list));
if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
- kfree(em->bdev);
+ kfree(em->map_lookup);
kmem_cache_free(extent_map_cache, em);
}
}
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index b2991fd8583e..eb8b8fae036b 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -32,7 +32,15 @@ struct extent_map {
u64 block_len;
u64 generation;
unsigned long flags;
- struct block_device *bdev;
+ union {
+ struct block_device *bdev;
+
+ /*
+ * used for chunk mappings
+ * flags & EXTENT_FLAG_FS_MAPPING must be set
+ */
+ struct map_lookup *map_lookup;
+ };
atomic_t refs;
unsigned int compress_type;
struct list_head list;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 6dca9f937bf6..cc9ccc42f469 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3460,7 +3460,7 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
return ret;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (em->start != chunk_offset)
goto out;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 83c73738165e..40d1ab957fb6 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -3232,7 +3232,8 @@ static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
kfree(m);
}
-static void tail_append_pending_moves(struct pending_dir_move *moves,
+static void tail_append_pending_moves(struct send_ctx *sctx,
+ struct pending_dir_move *moves,
struct list_head *stack)
{
if (list_empty(&moves->list)) {
@@ -3243,6 +3244,10 @@ static void tail_append_pending_moves(struct pending_dir_move *moves,
list_add_tail(&moves->list, stack);
list_splice_tail(&list, stack);
}
+ if (!RB_EMPTY_NODE(&moves->node)) {
+ rb_erase(&moves->node, &sctx->pending_dir_moves);
+ RB_CLEAR_NODE(&moves->node);
+ }
}
static int apply_children_dir_moves(struct send_ctx *sctx)
@@ -3257,7 +3262,7 @@ static int apply_children_dir_moves(struct send_ctx *sctx)
return 0;
INIT_LIST_HEAD(&stack);
- tail_append_pending_moves(pm, &stack);
+ tail_append_pending_moves(sctx, pm, &stack);
while (!list_empty(&stack)) {
pm = list_first_entry(&stack, struct pending_dir_move, list);
@@ -3268,7 +3273,7 @@ static int apply_children_dir_moves(struct send_ctx *sctx)
goto out;
pm = get_pending_dir_moves(sctx, parent_ino);
if (pm)
- tail_append_pending_moves(pm, &stack);
+ tail_append_pending_moves(sctx, pm, &stack);
}
return 0;
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index b976597b0721..63ffd213b0b7 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -50,8 +50,8 @@ static inline void put_unaligned_le8(u8 val, void *p)
*/
#define DEFINE_BTRFS_SETGET_BITS(bits) \
-u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \
- unsigned long off, \
+u##bits btrfs_get_token_##bits(const struct extent_buffer *eb, \
+ const void *ptr, unsigned long off, \
struct btrfs_map_token *token) \
{ \
unsigned long part_offset = (unsigned long)ptr; \
@@ -90,7 +90,8 @@ u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \
return res; \
} \
void btrfs_set_token_##bits(struct extent_buffer *eb, \
- void *ptr, unsigned long off, u##bits val, \
+ const void *ptr, unsigned long off, \
+ u##bits val, \
struct btrfs_map_token *token) \
{ \
unsigned long part_offset = (unsigned long)ptr; \
@@ -133,7 +134,7 @@ DEFINE_BTRFS_SETGET_BITS(16)
DEFINE_BTRFS_SETGET_BITS(32)
DEFINE_BTRFS_SETGET_BITS(64)
-void btrfs_node_key(struct extent_buffer *eb,
+void btrfs_node_key(const struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr)
{
unsigned long ptr = btrfs_node_key_ptr_offset(nr);
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
new file mode 100644
index 000000000000..5b98f3c76ce4
--- /dev/null
+++ b/fs/btrfs/tree-checker.c
@@ -0,0 +1,649 @@
+/*
+ * Copyright (C) Qu Wenruo 2017. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program.
+ */
+
+/*
+ * The module is used to catch unexpected/corrupted tree block data.
+ * Such behavior can be caused either by a fuzzed image or bugs.
+ *
+ * The objective is to do leaf/node validation checks when tree block is read
+ * from disk, and check *every* possible member, so other code won't
+ * need to checking them again.
+ *
+ * Due to the potential and unwanted damage, every checker needs to be
+ * carefully reviewed otherwise so it does not prevent mount of valid images.
+ */
+
+#include "ctree.h"
+#include "tree-checker.h"
+#include "disk-io.h"
+#include "compression.h"
+#include "hash.h"
+#include "volumes.h"
+
+#define CORRUPT(reason, eb, root, slot) \
+ btrfs_crit(root->fs_info, \
+ "corrupt %s, %s: block=%llu, root=%llu, slot=%d", \
+ btrfs_header_level(eb) == 0 ? "leaf" : "node", \
+ reason, btrfs_header_bytenr(eb), root->objectid, slot)
+
+/*
+ * Error message should follow the following format:
+ * corrupt <type>: <identifier>, <reason>[, <bad_value>]
+ *
+ * @type: leaf or node
+ * @identifier: the necessary info to locate the leaf/node.
+ * It's recommened to decode key.objecitd/offset if it's
+ * meaningful.
+ * @reason: describe the error
+ * @bad_value: optional, it's recommened to output bad value and its
+ * expected value (range).
+ *
+ * Since comma is used to separate the components, only space is allowed
+ * inside each component.
+ */
+
+/*
+ * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
+ * Allows callers to customize the output.
+ */
+__printf(4, 5)
+static void generic_err(const struct btrfs_root *root,
+ const struct extent_buffer *eb, int slot,
+ const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ btrfs_crit(root->fs_info,
+ "corrupt %s: root=%llu block=%llu slot=%d, %pV",
+ btrfs_header_level(eb) == 0 ? "leaf" : "node",
+ root->objectid, btrfs_header_bytenr(eb), slot, &vaf);
+ va_end(args);
+}
+
+static int check_extent_data_item(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ struct btrfs_file_extent_item *fi;
+ u32 sectorsize = root->sectorsize;
+ u32 item_size = btrfs_item_size_nr(leaf, slot);
+
+ if (!IS_ALIGNED(key->offset, sectorsize)) {
+ CORRUPT("unaligned key offset for file extent",
+ leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+
+ if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
+ CORRUPT("invalid file extent type", leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ /*
+ * Support for new compression/encrption must introduce incompat flag,
+ * and must be caught in open_ctree().
+ */
+ if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
+ CORRUPT("invalid file extent compression", leaf, root, slot);
+ return -EUCLEAN;
+ }
+ if (btrfs_file_extent_encryption(leaf, fi)) {
+ CORRUPT("invalid file extent encryption", leaf, root, slot);
+ return -EUCLEAN;
+ }
+ if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
+ /* Inline extent must have 0 as key offset */
+ if (key->offset) {
+ CORRUPT("inline extent has non-zero key offset",
+ leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ /* Compressed inline extent has no on-disk size, skip it */
+ if (btrfs_file_extent_compression(leaf, fi) !=
+ BTRFS_COMPRESS_NONE)
+ return 0;
+
+ /* Uncompressed inline extent size must match item size */
+ if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
+ btrfs_file_extent_ram_bytes(leaf, fi)) {
+ CORRUPT("plaintext inline extent has invalid size",
+ leaf, root, slot);
+ return -EUCLEAN;
+ }
+ return 0;
+ }
+
+ /* Regular or preallocated extent has fixed item size */
+ if (item_size != sizeof(*fi)) {
+ CORRUPT(
+ "regluar or preallocated extent data item size is invalid",
+ leaf, root, slot);
+ return -EUCLEAN;
+ }
+ if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) ||
+ !IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) ||
+ !IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) ||
+ !IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) ||
+ !IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) {
+ CORRUPT(
+ "regular or preallocated extent data item has unaligned value",
+ leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ return 0;
+}
+
+static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ u32 sectorsize = root->sectorsize;
+ u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy);
+
+ if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
+ CORRUPT("invalid objectid for csum item", leaf, root, slot);
+ return -EUCLEAN;
+ }
+ if (!IS_ALIGNED(key->offset, sectorsize)) {
+ CORRUPT("unaligned key offset for csum item", leaf, root, slot);
+ return -EUCLEAN;
+ }
+ if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
+ CORRUPT("unaligned csum item size", leaf, root, slot);
+ return -EUCLEAN;
+ }
+ return 0;
+}
+
+/*
+ * Customized reported for dir_item, only important new info is key->objectid,
+ * which represents inode number
+ */
+__printf(4, 5)
+static void dir_item_err(const struct btrfs_root *root,
+ const struct extent_buffer *eb, int slot,
+ const char *fmt, ...)
+{
+ struct btrfs_key key;
+ struct va_format vaf;
+ va_list args;
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ btrfs_crit(root->fs_info,
+ "corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
+ btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
+ btrfs_header_bytenr(eb), slot, key.objectid, &vaf);
+ va_end(args);
+}
+
+static int check_dir_item(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ struct btrfs_dir_item *di;
+ u32 item_size = btrfs_item_size_nr(leaf, slot);
+ u32 cur = 0;
+
+ di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
+ while (cur < item_size) {
+ u32 name_len;
+ u32 data_len;
+ u32 max_name_len;
+ u32 total_size;
+ u32 name_hash;
+ u8 dir_type;
+
+ /* header itself should not cross item boundary */
+ if (cur + sizeof(*di) > item_size) {
+ dir_item_err(root, leaf, slot,
+ "dir item header crosses item boundary, have %zu boundary %u",
+ cur + sizeof(*di), item_size);
+ return -EUCLEAN;
+ }
+
+ /* dir type check */
+ dir_type = btrfs_dir_type(leaf, di);
+ if (dir_type >= BTRFS_FT_MAX) {
+ dir_item_err(root, leaf, slot,
+ "invalid dir item type, have %u expect [0, %u)",
+ dir_type, BTRFS_FT_MAX);
+ return -EUCLEAN;
+ }
+
+ if (key->type == BTRFS_XATTR_ITEM_KEY &&
+ dir_type != BTRFS_FT_XATTR) {
+ dir_item_err(root, leaf, slot,
+ "invalid dir item type for XATTR key, have %u expect %u",
+ dir_type, BTRFS_FT_XATTR);
+ return -EUCLEAN;
+ }
+ if (dir_type == BTRFS_FT_XATTR &&
+ key->type != BTRFS_XATTR_ITEM_KEY) {
+ dir_item_err(root, leaf, slot,
+ "xattr dir type found for non-XATTR key");
+ return -EUCLEAN;
+ }
+ if (dir_type == BTRFS_FT_XATTR)
+ max_name_len = XATTR_NAME_MAX;
+ else
+ max_name_len = BTRFS_NAME_LEN;
+
+ /* Name/data length check */
+ name_len = btrfs_dir_name_len(leaf, di);
+ data_len = btrfs_dir_data_len(leaf, di);
+ if (name_len > max_name_len) {
+ dir_item_err(root, leaf, slot,
+ "dir item name len too long, have %u max %u",
+ name_len, max_name_len);
+ return -EUCLEAN;
+ }
+ if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)) {
+ dir_item_err(root, leaf, slot,
+ "dir item name and data len too long, have %u max %zu",
+ name_len + data_len,
+ BTRFS_MAX_XATTR_SIZE(root));
+ return -EUCLEAN;
+ }
+
+ if (data_len && dir_type != BTRFS_FT_XATTR) {
+ dir_item_err(root, leaf, slot,
+ "dir item with invalid data len, have %u expect 0",
+ data_len);
+ return -EUCLEAN;
+ }
+
+ total_size = sizeof(*di) + name_len + data_len;
+
+ /* header and name/data should not cross item boundary */
+ if (cur + total_size > item_size) {
+ dir_item_err(root, leaf, slot,
+ "dir item data crosses item boundary, have %u boundary %u",
+ cur + total_size, item_size);
+ return -EUCLEAN;
+ }
+
+ /*
+ * Special check for XATTR/DIR_ITEM, as key->offset is name
+ * hash, should match its name
+ */
+ if (key->type == BTRFS_DIR_ITEM_KEY ||
+ key->type == BTRFS_XATTR_ITEM_KEY) {
+ char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
+
+ read_extent_buffer(leaf, namebuf,
+ (unsigned long)(di + 1), name_len);
+ name_hash = btrfs_name_hash(namebuf, name_len);
+ if (key->offset != name_hash) {
+ dir_item_err(root, leaf, slot,
+ "name hash mismatch with key, have 0x%016x expect 0x%016llx",
+ name_hash, key->offset);
+ return -EUCLEAN;
+ }
+ }
+ cur += total_size;
+ di = (struct btrfs_dir_item *)((void *)di + total_size);
+ }
+ return 0;
+}
+
+__printf(4, 5)
+__cold
+static void block_group_err(const struct btrfs_fs_info *fs_info,
+ const struct extent_buffer *eb, int slot,
+ const char *fmt, ...)
+{
+ struct btrfs_key key;
+ struct va_format vaf;
+ va_list args;
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ btrfs_crit(fs_info,
+ "corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
+ btrfs_header_level(eb) == 0 ? "leaf" : "node",
+ btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
+ key.objectid, key.offset, &vaf);
+ va_end(args);
+}
+
+static int check_block_group_item(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ struct btrfs_block_group_item bgi;
+ u32 item_size = btrfs_item_size_nr(leaf, slot);
+ u64 flags;
+ u64 type;
+
+ /*
+ * Here we don't really care about alignment since extent allocator can
+ * handle it. We care more about the size, as if one block group is
+ * larger than maximum size, it's must be some obvious corruption.
+ */
+ if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) {
+ block_group_err(fs_info, leaf, slot,
+ "invalid block group size, have %llu expect (0, %llu]",
+ key->offset, BTRFS_MAX_DATA_CHUNK_SIZE);
+ return -EUCLEAN;
+ }
+
+ if (item_size != sizeof(bgi)) {
+ block_group_err(fs_info, leaf, slot,
+ "invalid item size, have %u expect %zu",
+ item_size, sizeof(bgi));
+ return -EUCLEAN;
+ }
+
+ read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
+ sizeof(bgi));
+ if (btrfs_block_group_chunk_objectid(&bgi) !=
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
+ block_group_err(fs_info, leaf, slot,
+ "invalid block group chunk objectid, have %llu expect %llu",
+ btrfs_block_group_chunk_objectid(&bgi),
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+ return -EUCLEAN;
+ }
+
+ if (btrfs_block_group_used(&bgi) > key->offset) {
+ block_group_err(fs_info, leaf, slot,
+ "invalid block group used, have %llu expect [0, %llu)",
+ btrfs_block_group_used(&bgi), key->offset);
+ return -EUCLEAN;
+ }
+
+ flags = btrfs_block_group_flags(&bgi);
+ if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
+ block_group_err(fs_info, leaf, slot,
+"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
+ flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
+ hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
+ return -EUCLEAN;
+ }
+
+ type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
+ if (type != BTRFS_BLOCK_GROUP_DATA &&
+ type != BTRFS_BLOCK_GROUP_METADATA &&
+ type != BTRFS_BLOCK_GROUP_SYSTEM &&
+ type != (BTRFS_BLOCK_GROUP_METADATA |
+ BTRFS_BLOCK_GROUP_DATA)) {
+ block_group_err(fs_info, leaf, slot,
+"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
+ type, hweight64(type),
+ BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
+ BTRFS_BLOCK_GROUP_SYSTEM,
+ BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
+ return -EUCLEAN;
+ }
+ return 0;
+}
+
+/*
+ * Common point to switch the item-specific validation.
+ */
+static int check_leaf_item(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ int ret = 0;
+
+ switch (key->type) {
+ case BTRFS_EXTENT_DATA_KEY:
+ ret = check_extent_data_item(root, leaf, key, slot);
+ break;
+ case BTRFS_EXTENT_CSUM_KEY:
+ ret = check_csum_item(root, leaf, key, slot);
+ break;
+ case BTRFS_DIR_ITEM_KEY:
+ case BTRFS_DIR_INDEX_KEY:
+ case BTRFS_XATTR_ITEM_KEY:
+ ret = check_dir_item(root, leaf, key, slot);
+ break;
+ case BTRFS_BLOCK_GROUP_ITEM_KEY:
+ ret = check_block_group_item(root->fs_info, leaf, key, slot);
+ break;
+ }
+ return ret;
+}
+
+static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
+ bool check_item_data)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ /* No valid key type is 0, so all key should be larger than this key */
+ struct btrfs_key prev_key = {0, 0, 0};
+ struct btrfs_key key;
+ u32 nritems = btrfs_header_nritems(leaf);
+ int slot;
+
+ if (btrfs_header_level(leaf) != 0) {
+ generic_err(root, leaf, 0,
+ "invalid level for leaf, have %d expect 0",
+ btrfs_header_level(leaf));
+ return -EUCLEAN;
+ }
+
+ /*
+ * Extent buffers from a relocation tree have a owner field that
+ * corresponds to the subvolume tree they are based on. So just from an
+ * extent buffer alone we can not find out what is the id of the
+ * corresponding subvolume tree, so we can not figure out if the extent
+ * buffer corresponds to the root of the relocation tree or not. So
+ * skip this check for relocation trees.
+ */
+ if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
+ u64 owner = btrfs_header_owner(leaf);
+ struct btrfs_root *check_root;
+
+ /* These trees must never be empty */
+ if (owner == BTRFS_ROOT_TREE_OBJECTID ||
+ owner == BTRFS_CHUNK_TREE_OBJECTID ||
+ owner == BTRFS_EXTENT_TREE_OBJECTID ||
+ owner == BTRFS_DEV_TREE_OBJECTID ||
+ owner == BTRFS_FS_TREE_OBJECTID ||
+ owner == BTRFS_DATA_RELOC_TREE_OBJECTID) {
+ generic_err(root, leaf, 0,
+ "invalid root, root %llu must never be empty",
+ owner);
+ return -EUCLEAN;
+ }
+ key.objectid = owner;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ check_root = btrfs_get_fs_root(fs_info, &key, false);
+ /*
+ * The only reason we also check NULL here is that during
+ * open_ctree() some roots has not yet been set up.
+ */
+ if (!IS_ERR_OR_NULL(check_root)) {
+ struct extent_buffer *eb;
+
+ eb = btrfs_root_node(check_root);
+ /* if leaf is the root, then it's fine */
+ if (leaf != eb) {
+ CORRUPT("non-root leaf's nritems is 0",
+ leaf, check_root, 0);
+ free_extent_buffer(eb);
+ return -EUCLEAN;
+ }
+ free_extent_buffer(eb);
+ }
+ return 0;
+ }
+
+ if (nritems == 0)
+ return 0;
+
+ /*
+ * Check the following things to make sure this is a good leaf, and
+ * leaf users won't need to bother with similar sanity checks:
+ *
+ * 1) key ordering
+ * 2) item offset and size
+ * No overlap, no hole, all inside the leaf.
+ * 3) item content
+ * If possible, do comprehensive sanity check.
+ * NOTE: All checks must only rely on the item data itself.
+ */
+ for (slot = 0; slot < nritems; slot++) {
+ u32 item_end_expected;
+ int ret;
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+
+ /* Make sure the keys are in the right order */
+ if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
+ CORRUPT("bad key order", leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ /*
+ * Make sure the offset and ends are right, remember that the
+ * item data starts at the end of the leaf and grows towards the
+ * front.
+ */
+ if (slot == 0)
+ item_end_expected = BTRFS_LEAF_DATA_SIZE(root);
+ else
+ item_end_expected = btrfs_item_offset_nr(leaf,
+ slot - 1);
+ if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
+ CORRUPT("slot offset bad", leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ /*
+ * Check to make sure that we don't point outside of the leaf,
+ * just in case all the items are consistent to each other, but
+ * all point outside of the leaf.
+ */
+ if (btrfs_item_end_nr(leaf, slot) >
+ BTRFS_LEAF_DATA_SIZE(root)) {
+ CORRUPT("slot end outside of leaf", leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ /* Also check if the item pointer overlaps with btrfs item. */
+ if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
+ btrfs_item_ptr_offset(leaf, slot)) {
+ CORRUPT("slot overlap with its data", leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ if (check_item_data) {
+ /*
+ * Check if the item size and content meet other
+ * criteria
+ */
+ ret = check_leaf_item(root, leaf, &key, slot);
+ if (ret < 0)
+ return ret;
+ }
+
+ prev_key.objectid = key.objectid;
+ prev_key.type = key.type;
+ prev_key.offset = key.offset;
+ }
+
+ return 0;
+}
+
+int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf)
+{
+ return check_leaf(root, leaf, true);
+}
+
+int btrfs_check_leaf_relaxed(struct btrfs_root *root,
+ struct extent_buffer *leaf)
+{
+ return check_leaf(root, leaf, false);
+}
+
+int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
+{
+ unsigned long nr = btrfs_header_nritems(node);
+ struct btrfs_key key, next_key;
+ int slot;
+ int level = btrfs_header_level(node);
+ u64 bytenr;
+ int ret = 0;
+
+ if (level <= 0 || level >= BTRFS_MAX_LEVEL) {
+ generic_err(root, node, 0,
+ "invalid level for node, have %d expect [1, %d]",
+ level, BTRFS_MAX_LEVEL - 1);
+ return -EUCLEAN;
+ }
+ if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) {
+ btrfs_crit(root->fs_info,
+"corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%zu]",
+ root->objectid, node->start,
+ nr == 0 ? "small" : "large", nr,
+ BTRFS_NODEPTRS_PER_BLOCK(root));
+ return -EUCLEAN;
+ }
+
+ for (slot = 0; slot < nr - 1; slot++) {
+ bytenr = btrfs_node_blockptr(node, slot);
+ btrfs_node_key_to_cpu(node, &key, slot);
+ btrfs_node_key_to_cpu(node, &next_key, slot + 1);
+
+ if (!bytenr) {
+ generic_err(root, node, slot,
+ "invalid NULL node pointer");
+ ret = -EUCLEAN;
+ goto out;
+ }
+ if (!IS_ALIGNED(bytenr, root->sectorsize)) {
+ generic_err(root, node, slot,
+ "unaligned pointer, have %llu should be aligned to %u",
+ bytenr, root->sectorsize);
+ ret = -EUCLEAN;
+ goto out;
+ }
+
+ if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
+ generic_err(root, node, slot,
+ "bad key order, current (%llu %u %llu) next (%llu %u %llu)",
+ key.objectid, key.type, key.offset,
+ next_key.objectid, next_key.type,
+ next_key.offset);
+ ret = -EUCLEAN;
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h
new file mode 100644
index 000000000000..3d53e8d6fda0
--- /dev/null
+++ b/fs/btrfs/tree-checker.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) Qu Wenruo 2017. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program.
+ */
+
+#ifndef __BTRFS_TREE_CHECKER__
+#define __BTRFS_TREE_CHECKER__
+
+#include "ctree.h"
+#include "extent_io.h"
+
+/*
+ * Comprehensive leaf checker.
+ * Will check not only the item pointers, but also every possible member
+ * in item data.
+ */
+int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf);
+
+/*
+ * Less strict leaf checker.
+ * Will only check item pointers, not reading item data.
+ */
+int btrfs_check_leaf_relaxed(struct btrfs_root *root,
+ struct extent_buffer *leaf);
+int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node);
+
+#endif
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b4d63a9842fa..5e8fe8f3942d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1184,7 +1184,7 @@ again:
struct map_lookup *map;
int i;
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++) {
u64 end;
@@ -2757,7 +2757,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
free_extent_map(em);
return -EINVAL;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
lock_chunks(root->fs_info->chunk_root);
check_system_chunk(trans, extent_root, map->type);
unlock_chunks(root->fs_info->chunk_root);
@@ -4540,7 +4540,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
if (type & BTRFS_BLOCK_GROUP_DATA) {
max_stripe_size = 1024 * 1024 * 1024;
- max_chunk_size = 10 * max_stripe_size;
+ max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
if (!devs_max)
devs_max = BTRFS_MAX_DEVS(info->chunk_root);
} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
@@ -4731,7 +4731,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
goto error;
}
set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
- em->bdev = (struct block_device *)map;
+ em->map_lookup = map;
em->start = start;
em->len = num_bytes;
em->block_start = 0;
@@ -4826,7 +4826,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
return -EINVAL;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
item_size = btrfs_chunk_item_size(map->num_stripes);
stripe_size = em->orig_block_len;
@@ -4968,7 +4968,7 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
if (!em)
return 1;
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++) {
if (map->stripes[i].dev->missing) {
miss_ndevs++;
@@ -5048,7 +5048,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
return 1;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
ret = map->num_stripes;
else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
@@ -5091,7 +5091,7 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
BUG_ON(!em);
BUG_ON(em->start > logical || em->start + em->len < logical);
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
len = map->stripe_len * nr_data_stripes(map);
free_extent_map(em);
@@ -5112,7 +5112,7 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
BUG_ON(!em);
BUG_ON(em->start > logical || em->start + em->len < logical);
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
ret = 1;
free_extent_map(em);
@@ -5271,7 +5271,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
return -EINVAL;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
offset = logical - em->start;
stripe_len = map->stripe_len;
@@ -5813,7 +5813,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
free_extent_map(em);
return -EIO;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
length = em->len;
rmap_len = map->stripe_len;
@@ -6208,6 +6208,101 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
return dev;
}
+/* Return -EIO if any error, otherwise return 0. */
+static int btrfs_check_chunk_valid(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk, u64 logical)
+{
+ u64 length;
+ u64 stripe_len;
+ u16 num_stripes;
+ u16 sub_stripes;
+ u64 type;
+ u64 features;
+ bool mixed = false;
+
+ length = btrfs_chunk_length(leaf, chunk);
+ stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+ num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+ sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+ type = btrfs_chunk_type(leaf, chunk);
+
+ if (!num_stripes) {
+ btrfs_err(root->fs_info, "invalid chunk num_stripes: %u",
+ num_stripes);
+ return -EIO;
+ }
+ if (!IS_ALIGNED(logical, root->sectorsize)) {
+ btrfs_err(root->fs_info,
+ "invalid chunk logical %llu", logical);
+ return -EIO;
+ }
+ if (btrfs_chunk_sector_size(leaf, chunk) != root->sectorsize) {
+ btrfs_err(root->fs_info, "invalid chunk sectorsize %u",
+ btrfs_chunk_sector_size(leaf, chunk));
+ return -EIO;
+ }
+ if (!length || !IS_ALIGNED(length, root->sectorsize)) {
+ btrfs_err(root->fs_info,
+ "invalid chunk length %llu", length);
+ return -EIO;
+ }
+ if (!is_power_of_2(stripe_len)) {
+ btrfs_err(root->fs_info, "invalid chunk stripe length: %llu",
+ stripe_len);
+ return -EIO;
+ }
+ if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+ type) {
+ btrfs_err(root->fs_info, "unrecognized chunk type: %llu",
+ ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+ BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+ btrfs_chunk_type(leaf, chunk));
+ return -EIO;
+ }
+
+ if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
+ btrfs_err(root->fs_info, "missing chunk type flag: 0x%llx", type);
+ return -EIO;
+ }
+
+ if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
+ (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
+ btrfs_err(root->fs_info,
+ "system chunk with data or metadata type: 0x%llx", type);
+ return -EIO;
+ }
+
+ features = btrfs_super_incompat_flags(root->fs_info->super_copy);
+ if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
+ mixed = true;
+
+ if (!mixed) {
+ if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
+ (type & BTRFS_BLOCK_GROUP_DATA)) {
+ btrfs_err(root->fs_info,
+ "mixed chunk type in non-mixed mode: 0x%llx", type);
+ return -EIO;
+ }
+ }
+
+ if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
+ (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
+ (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
+ (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
+ (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) ||
+ ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
+ num_stripes != 1)) {
+ btrfs_err(root->fs_info,
+ "invalid num_stripes:sub_stripes %u:%u for profile %llu",
+ num_stripes, sub_stripes,
+ type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
+ return -EIO;
+ }
+
+ return 0;
+}
+
static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
struct extent_buffer *leaf,
struct btrfs_chunk *chunk)
@@ -6217,6 +6312,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
struct extent_map *em;
u64 logical;
u64 length;
+ u64 stripe_len;
u64 devid;
u8 uuid[BTRFS_UUID_SIZE];
int num_stripes;
@@ -6225,6 +6321,12 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
logical = key->offset;
length = btrfs_chunk_length(leaf, chunk);
+ stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+ num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+
+ ret = btrfs_check_chunk_valid(root, leaf, chunk, logical);
+ if (ret)
+ return ret;
read_lock(&map_tree->map_tree.lock);
em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
@@ -6241,7 +6343,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
em = alloc_extent_map();
if (!em)
return -ENOMEM;
- num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
if (!map) {
free_extent_map(em);
@@ -6249,7 +6350,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
}
set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
- em->bdev = (struct block_device *)map;
+ em->map_lookup = map;
em->start = logical;
em->len = length;
em->orig_start = 0;
@@ -6473,6 +6574,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
u32 array_size;
u32 len = 0;
u32 cur_offset;
+ u64 type;
struct btrfs_key key;
ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize);
@@ -6539,6 +6641,15 @@ int btrfs_read_sys_array(struct btrfs_root *root)
break;
}
+ type = btrfs_chunk_type(sb, chunk);
+ if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
+ btrfs_err(root->fs_info,
+ "invalid chunk type %llu in sys_array at offset %u",
+ type, cur_offset);
+ ret = -EIO;
+ break;
+ }
+
len = btrfs_chunk_item_size(num_stripes);
if (cur_offset + len > array_size)
goto out_short_read;
@@ -6948,7 +7059,7 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_root *root,
/* In order to kick the device replace finish process */
lock_chunks(root);
list_for_each_entry(em, &transaction->pending_chunks, list) {
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++) {
dev = map->stripes[i].dev;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index d5c84f6b1353..3c651df420be 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -24,6 +24,8 @@
#include <linux/btrfs.h>
#include "async-thread.h"
+#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)
+
extern struct mutex uuid_mutex;
#define BTRFS_STRIPE_LEN (64 * 1024)
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 5b68cf526887..c05ab2ec0fef 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -963,11 +963,8 @@ error:
void cachefiles_uncache_page(struct fscache_object *_object, struct page *page)
{
struct cachefiles_object *object;
- struct cachefiles_cache *cache;
object = container_of(_object, struct cachefiles_object, fscache);
- cache = container_of(object->fscache.cache,
- struct cachefiles_cache, cache);
_enter("%p,{%lu}", object, page->index);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 0e3de1bb6500..e7b54514d99a 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3243,7 +3243,6 @@ retry:
tcap->cap_id = t_cap_id;
tcap->seq = t_seq - 1;
tcap->issue_seq = t_seq - 1;
- tcap->mseq = t_mseq;
tcap->issued |= issued;
tcap->implemented |= issued;
if (cap == ci->i_auth_cap)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 1eeb4780c3ed..eacf57c24ca9 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -48,6 +48,7 @@
#include "cifs_unicode.h"
#include "cifs_debug.h"
#include "cifs_fs_sb.h"
+#include "dns_resolve.h"
#include "ntlmssp.h"
#include "nterr.h"
#include "rfc1002pdu.h"
@@ -304,6 +305,53 @@ static int cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data,
const char *devname);
/*
+ * Resolve hostname and set ip addr in tcp ses. Useful for hostnames that may
+ * get their ip addresses changed at some point.
+ *
+ * This should be called with server->srv_mutex held.
+ */
+#ifdef CONFIG_CIFS_DFS_UPCALL
+static int reconn_set_ipaddr(struct TCP_Server_Info *server)
+{
+ int rc;
+ int len;
+ char *unc, *ipaddr = NULL;
+
+ if (!server->hostname)
+ return -EINVAL;
+
+ len = strlen(server->hostname) + 3;
+
+ unc = kmalloc(len, GFP_KERNEL);
+ if (!unc) {
+ cifs_dbg(FYI, "%s: failed to create UNC path\n", __func__);
+ return -ENOMEM;
+ }
+ snprintf(unc, len, "\\\\%s", server->hostname);
+
+ rc = dns_resolve_server_name_to_ip(unc, &ipaddr);
+ kfree(unc);
+
+ if (rc < 0) {
+ cifs_dbg(FYI, "%s: failed to resolve server part of %s to IP: %d\n",
+ __func__, server->hostname, rc);
+ return rc;
+ }
+
+ rc = cifs_convert_address((struct sockaddr *)&server->dstaddr, ipaddr,
+ strlen(ipaddr));
+ kfree(ipaddr);
+
+ return !rc ? -1 : 0;
+}
+#else
+static inline int reconn_set_ipaddr(struct TCP_Server_Info *server)
+{
+ return 0;
+}
+#endif
+
+/*
* cifs tcp session reconnection
*
* mark tcp session as reconnecting so temporarily locked
@@ -400,6 +448,11 @@ cifs_reconnect(struct TCP_Server_Info *server)
rc = generic_ip_connect(server);
if (rc) {
cifs_dbg(FYI, "reconnect error %d\n", rc);
+ rc = reconn_set_ipaddr(server);
+ if (rc) {
+ cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n",
+ __func__, rc);
+ }
mutex_unlock(&server->srv_mutex);
msleep(3000);
} else {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 79a1bad88931..e5357c7b5d4c 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1073,14 +1073,18 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
/*
* Accessing maxBuf is racy with cifs_reconnect - need to store value
- * and check it for zero before using.
+ * and check it before using.
*/
max_buf = tcon->ses->server->maxBuf;
- if (!max_buf) {
+ if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
free_xid(xid);
return -EINVAL;
}
+ BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
+ PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
+ PAGE_SIZE);
max_num = (max_buf - sizeof(struct smb_hdr)) /
sizeof(LOCKING_ANDX_RANGE);
buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
@@ -1404,12 +1408,16 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
/*
* Accessing maxBuf is racy with cifs_reconnect - need to store value
- * and check it for zero before using.
+ * and check it before using.
*/
max_buf = tcon->ses->server->maxBuf;
- if (!max_buf)
+ if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
return -EINVAL;
+ BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
+ PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
+ PAGE_SIZE);
max_num = (max_buf - sizeof(struct smb_hdr)) /
sizeof(LOCKING_ANDX_RANGE);
buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 57b039ebfb1f..43fa471c88d7 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -652,7 +652,14 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
/* scan and find it */
int i;
char *cur_ent;
- char *end_of_smb = cfile->srch_inf.ntwrk_buf_start +
+ char *end_of_smb;
+
+ if (cfile->srch_inf.ntwrk_buf_start == NULL) {
+ cifs_dbg(VFS, "ntwrk_buf_start is NULL during readdir\n");
+ return -EIO;
+ }
+
+ end_of_smb = cfile->srch_inf.ntwrk_buf_start +
server->ops->calc_smb_size(
cfile->srch_inf.ntwrk_buf_start);
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index b2aff0c6f22c..dee5250701de 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -123,12 +123,14 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
/*
* Accessing maxBuf is racy with cifs_reconnect - need to store value
- * and check it for zero before using.
+ * and check it before using.
*/
max_buf = tcon->ses->server->maxBuf;
- if (!max_buf)
+ if (max_buf < sizeof(struct smb2_lock_element))
return -EINVAL;
+ BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf, PAGE_SIZE);
max_num = max_buf / sizeof(struct smb2_lock_element);
buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
if (!buf)
@@ -265,6 +267,8 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
return -EINVAL;
}
+ BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf, PAGE_SIZE);
max_num = max_buf / sizeof(struct smb2_lock_element);
buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
if (!buf) {
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 8257a5a97cc0..98c25b969ab8 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -377,8 +377,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_NONEXISTENT_EA_ENTRY, -EIO, "STATUS_NONEXISTENT_EA_ENTRY"},
{STATUS_NO_EAS_ON_FILE, -ENODATA, "STATUS_NO_EAS_ON_FILE"},
{STATUS_EA_CORRUPT_ERROR, -EIO, "STATUS_EA_CORRUPT_ERROR"},
- {STATUS_FILE_LOCK_CONFLICT, -EIO, "STATUS_FILE_LOCK_CONFLICT"},
- {STATUS_LOCK_NOT_GRANTED, -EIO, "STATUS_LOCK_NOT_GRANTED"},
+ {STATUS_FILE_LOCK_CONFLICT, -EACCES, "STATUS_FILE_LOCK_CONFLICT"},
+ {STATUS_LOCK_NOT_GRANTED, -EACCES, "STATUS_LOCK_NOT_GRANTED"},
{STATUS_DELETE_PENDING, -ENOENT, "STATUS_DELETE_PENDING"},
{STATUS_CTL_FILE_NOT_SUPPORTED, -ENOSYS,
"STATUS_CTL_FILE_NOT_SUPPORTED"},
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 2725085a3f9f..eae3cdffaf7f 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -143,14 +143,14 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
scredits = server->credits;
/* can deadlock with reopen */
- if (scredits == 1) {
+ if (scredits <= 8) {
*num = SMB2_MAX_BUFFER_SIZE;
*credits = 0;
break;
}
- /* leave one credit for a possible reopen */
- scredits--;
+ /* leave some credits for reopen and other ops */
+ scredits -= 8;
*num = min_t(unsigned int, size,
scredits * SMB2_MAX_BUFFER_SIZE);
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index f7111bb88ec1..5e21d58c49ef 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2523,8 +2523,8 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
if (rc == -ENODATA && rsp->hdr.Status == STATUS_NO_MORE_FILES) {
srch_inf->endOfSearch = true;
rc = 0;
- }
- cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE);
+ } else
+ cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE);
goto qdir_exit;
}
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 54af10204e83..1cf0a336ec06 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -360,7 +360,7 @@ uncork:
if (rc < 0 && rc != -EINTR)
cifs_dbg(VFS, "Error %d sending data on socket to server\n",
rc);
- else
+ else if (rc > 0)
rc = 0;
return rc;
diff --git a/fs/dcache.c b/fs/dcache.c
index 86f52a555dec..2416ad64cc62 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1155,15 +1155,11 @@ static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
*/
void shrink_dcache_sb(struct super_block *sb)
{
- long freed;
-
do {
LIST_HEAD(dispose);
- freed = list_lru_walk(&sb->s_dentry_lru,
+ list_lru_walk(&sb->s_dentry_lru,
dentry_lru_isolate_shrink, &dispose, 1024);
-
- this_cpu_sub(nr_dentry_unused, freed);
shrink_dentry_list(&dispose);
cond_resched();
} while (list_lru_count(&sb->s_dentry_lru) > 0);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index e49ba072bd64..22fe11baef2b 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -671,6 +671,13 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
struct dentry *dentry = NULL, *trap;
struct name_snapshot old_name;
+ if (IS_ERR(old_dir))
+ return old_dir;
+ if (IS_ERR(new_dir))
+ return new_dir;
+ if (IS_ERR_OR_NULL(old_dentry))
+ return old_dentry;
+
trap = lock_rename(new_dir, old_dir);
/* Source or destination directories don't exist? */
if (d_really_is_negative(old_dir) || d_really_is_negative(new_dir))
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index dcea1e37a1b7..f18619bc2e09 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -290,6 +290,8 @@ void dlm_callback_suspend(struct dlm_ls *ls)
flush_workqueue(ls->ls_callback_wq);
}
+#define MAX_CB_QUEUE 25
+
void dlm_callback_resume(struct dlm_ls *ls)
{
struct dlm_lkb *lkb, *safe;
@@ -300,15 +302,23 @@ void dlm_callback_resume(struct dlm_ls *ls)
if (!ls->ls_callback_wq)
return;
+more:
mutex_lock(&ls->ls_cb_mutex);
list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) {
list_del_init(&lkb->lkb_cb_list);
queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
count++;
+ if (count == MAX_CB_QUEUE)
+ break;
}
mutex_unlock(&ls->ls_cb_mutex);
if (count)
log_rinfo(ls, "dlm_callback_resume %d", count);
+ if (count == MAX_CB_QUEUE) {
+ count = 0;
+ cond_resched();
+ goto more;
+ }
}
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 35502d4046f5..3a7f401e943c 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1210,6 +1210,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
if (rv < 0) {
log_error(ls, "create_lkb idr error %d", rv);
+ dlm_free_lkb(lkb);
return rv;
}
@@ -4177,6 +4178,7 @@ static int receive_convert(struct dlm_ls *ls, struct dlm_message *ms)
(unsigned long long)lkb->lkb_recover_seq,
ms->m_header.h_nodeid, ms->m_lkid);
error = -ENOENT;
+ dlm_put_lkb(lkb);
goto fail;
}
@@ -4230,6 +4232,7 @@ static int receive_unlock(struct dlm_ls *ls, struct dlm_message *ms)
lkb->lkb_id, lkb->lkb_remid,
ms->m_header.h_nodeid, ms->m_lkid);
error = -ENOENT;
+ dlm_put_lkb(lkb);
goto fail;
}
@@ -5792,20 +5795,20 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
goto out;
}
}
-
- /* After ua is attached to lkb it will be freed by dlm_free_lkb().
- When DLM_IFL_USER is set, the dlm knows that this is a userspace
- lock and that lkb_astparam is the dlm_user_args structure. */
-
error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
fake_astfn, ua, fake_bastfn, &args);
- lkb->lkb_flags |= DLM_IFL_USER;
-
if (error) {
+ kfree(ua->lksb.sb_lvbptr);
+ ua->lksb.sb_lvbptr = NULL;
+ kfree(ua);
__put_lkb(ls, lkb);
goto out;
}
+ /* After ua is attached to lkb it will be freed by dlm_free_lkb().
+ When DLM_IFL_USER is set, the dlm knows that this is a userspace
+ lock and that lkb_astparam is the dlm_user_args structure. */
+ lkb->lkb_flags |= DLM_IFL_USER;
error = request_lock(ls, lkb, name, namelen, &args);
switch (error) {
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index f3e72787e7f9..30e4e01db35a 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -673,11 +673,11 @@ static int new_lockspace(const char *name, const char *cluster,
kfree(ls->ls_recover_buf);
out_lkbidr:
idr_destroy(&ls->ls_lkbidr);
+ out_rsbtbl:
for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) {
if (ls->ls_remove_names[i])
kfree(ls->ls_remove_names[i]);
}
- out_rsbtbl:
vfree(ls->ls_rsbtbl);
out_lsfree:
if (do_unreg)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 066df649a6b0..ac21caad6729 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1035,7 +1035,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
* semantics). All the events that happen during that period of time are
* chained in ep->ovflist and requeued later on.
*/
- if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
+ if (ep->ovflist != EP_UNACTIVE_PTR) {
if (epi->next == EP_UNACTIVE_PTR) {
epi->next = ep->ovflist;
ep->ovflist = epi;
diff --git a/fs/exec.c b/fs/exec.c
index 6bbea71c6d92..ab6e34e1de21 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -191,6 +191,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
{
struct page *page;
int ret;
+ unsigned int gup_flags = FOLL_FORCE;
#ifdef CONFIG_STACK_GROWSUP
if (write) {
@@ -199,8 +200,12 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
return NULL;
}
#endif
- ret = get_user_pages(current, bprm->mm, pos,
- 1, write, 1, &page, NULL);
+
+ if (write)
+ gup_flags |= FOLL_WRITE;
+
+ ret = get_user_pages(current, bprm->mm, pos, 1, gup_flags,
+ &page, NULL);
if (ret <= 0)
return NULL;
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 714cd37a6ba3..6599c6124552 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -76,7 +76,7 @@ static bool dentry_connected(struct dentry *dentry)
struct dentry *parent = dget_parent(dentry);
dput(dentry);
- if (IS_ROOT(dentry)) {
+ if (dentry == parent) {
dput(parent);
return false;
}
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index cab694540930..ec506c2733ee 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -715,8 +715,11 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
if (!PageUptodate(page)) {
ret = ext4_read_inline_page(inode, page);
- if (ret < 0)
+ if (ret < 0) {
+ unlock_page(page);
+ put_page(page);
goto out_up_read;
+ }
}
ret = 1;
@@ -1870,12 +1873,12 @@ int ext4_inline_data_fiemap(struct inode *inode,
physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data;
physical += offsetof(struct ext4_inode, i_block);
- if (physical)
- error = fiemap_fill_next_extent(fieinfo, start, physical,
- inline_len, flags);
brelse(iloc.bh);
out:
up_read(&EXT4_I(inode)->xattr_sem);
+ if (physical)
+ error = fiemap_fill_next_extent(fieinfo, start, physical,
+ inline_len, flags);
return (error < 0 ? error : 0);
}
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index bad13f049fb0..2fc1564f62dd 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1600,7 +1600,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
}
if (reserved_gdb || gdb_off == 0) {
- if (ext4_has_feature_resize_inode(sb) ||
+ if (!ext4_has_feature_resize_inode(sb) ||
!le16_to_cpu(es->s_reserved_gdt_blocks)) {
ext4_warning(sb,
"No reserved GDT blocks, can't resize");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2cd426a4161d..0ec3689a8990 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1048,6 +1048,16 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
ext4_nfs_get_inode);
}
+static int ext4_nfs_commit_metadata(struct inode *inode)
+{
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL
+ };
+
+ trace_ext4_nfs_commit_metadata(inode);
+ return ext4_write_inode(inode, &wbc);
+}
+
/*
* Try to release metadata pages (indirect blocks, directories) which are
* mapped via the block device. Since these pages could have journal heads
@@ -1142,6 +1152,7 @@ static const struct export_operations ext4_export_ops = {
.fh_to_dentry = ext4_fh_to_dentry,
.fh_to_parent = ext4_fh_to_parent,
.get_parent = ext4_get_parent,
+ .commit_metadata = ext4_nfs_commit_metadata,
};
enum {
@@ -5187,9 +5198,9 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
qf_inode->i_flags |= S_NOQUOTA;
lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
err = dquot_enable(qf_inode, type, format_id, flags);
- iput(qf_inode);
if (err)
lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
+ iput(qf_inode);
return err;
}
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index f82f916e1f2c..7ebf7b958f9e 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -160,7 +160,7 @@ static void *f2fs_acl_to_disk(struct f2fs_sb_info *sbi,
return (void *)f2fs_acl;
fail:
- kfree(f2fs_acl);
+ kvfree(f2fs_acl);
return ERR_PTR(-EINVAL);
}
@@ -190,7 +190,7 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type,
acl = NULL;
else
acl = ERR_PTR(retval);
- kfree(value);
+ kvfree(value);
if (!IS_ERR(acl))
set_cached_acl(inode, type, acl);
@@ -243,7 +243,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0);
- kfree(value);
+ kvfree(value);
if (!error)
set_cached_acl(inode, type, acl);
@@ -355,12 +355,14 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode,
return PTR_ERR(p);
clone = f2fs_acl_clone(p, GFP_NOFS);
- if (!clone)
- goto no_mem;
+ if (!clone) {
+ ret = -ENOMEM;
+ goto release_acl;
+ }
ret = f2fs_acl_create_masq(clone, mode);
if (ret < 0)
- goto no_mem_clone;
+ goto release_clone;
if (ret == 0)
posix_acl_release(clone);
@@ -374,11 +376,11 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode,
return 0;
-no_mem_clone:
+release_clone:
posix_acl_release(clone);
-no_mem:
+release_acl:
posix_acl_release(p);
- return -ENOMEM;
+ return ret;
}
int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 4ed2d2a0bf02..940372ebfc60 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -44,7 +44,7 @@ repeat:
cond_resched();
goto repeat;
}
- f2fs_wait_on_page_writeback(page, META, true);
+ f2fs_wait_on_page_writeback(page, META, true, true);
if (!PageUptodate(page))
SetPageUptodate(page);
return page;
@@ -371,9 +371,8 @@ continue_unlock:
goto continue_unlock;
}
- f2fs_wait_on_page_writeback(page, META, true);
+ f2fs_wait_on_page_writeback(page, META, true, true);
- BUG_ON(PageWriteback(page));
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
@@ -912,7 +911,7 @@ free_fail_no_cp:
f2fs_put_page(cp1, 1);
f2fs_put_page(cp2, 1);
fail_no_cp:
- kfree(sbi->ckpt);
+ kvfree(sbi->ckpt);
return -EINVAL;
}
@@ -1291,11 +1290,11 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi,
struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
int err;
+ f2fs_wait_on_page_writeback(page, META, true, true);
+
memcpy(page_address(page), src, PAGE_SIZE);
- set_page_dirty(page);
- f2fs_wait_on_page_writeback(page, META, true);
- f2fs_bug_on(sbi, PageWriteback(page));
+ set_page_dirty(page);
if (unlikely(!clear_page_dirty_for_io(page)))
f2fs_bug_on(sbi, 1);
@@ -1329,11 +1328,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
int err;
/* Flush all the NAT/SIT pages */
- while (get_pages(sbi, F2FS_DIRTY_META)) {
- f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
- if (unlikely(f2fs_cp_error(sbi)))
- break;
- }
+ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
+ f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
+ !f2fs_cp_error(sbi));
/*
* modify checkpoint
@@ -1406,14 +1403,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
for (i = 0; i < nm_i->nat_bits_blocks; i++)
f2fs_update_meta_page(sbi, nm_i->nat_bits +
(i << F2FS_BLKSIZE_BITS), blk + i);
-
- /* Flush all the NAT BITS pages */
- while (get_pages(sbi, F2FS_DIRTY_META)) {
- f2fs_sync_meta_pages(sbi, META, LONG_MAX,
- FS_CP_META_IO);
- if (unlikely(f2fs_cp_error(sbi)))
- break;
- }
}
/* write out checkpoint buffer at block 0 */
@@ -1449,6 +1438,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* Here, we have one bio having CP pack except cp pack 2 page */
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
+ f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
+ !f2fs_cp_error(sbi));
/* wait for previous submitted meta pages writeback */
f2fs_wait_on_all_pages_writeback(sbi);
@@ -1466,7 +1457,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
* invalidate intermediate page cache borrowed from meta inode
* which are used for migration of encrypted inode's blocks.
*/
- if (f2fs_sb_has_encrypt(sbi->sb))
+ if (f2fs_sb_has_encrypt(sbi))
invalidate_mapping_pages(META_MAPPING(sbi),
MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8286f2114c37..84b3ee71d175 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -142,6 +142,8 @@ static bool f2fs_bio_post_read_required(struct bio *bio)
static void f2fs_read_end_io(struct bio *bio)
{
+ struct page *first_page = bio->bi_io_vec[0].bv_page;
+
if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_READ_IO)) {
f2fs_show_injection_info(FAULT_READ_IO);
bio->bi_error = -EIO;
@@ -155,6 +157,13 @@ static void f2fs_read_end_io(struct bio *bio)
return;
}
+ if (first_page != NULL &&
+ __read_io_type(first_page) == F2FS_RD_DATA) {
+ trace_android_fs_dataread_end(first_page->mapping->host,
+ page_offset(first_page),
+ bio->bi_iter.bi_size);
+ }
+
__read_end_io(bio);
}
@@ -321,6 +330,32 @@ submit_io:
submit_bio(bio_op(bio), bio);
}
+static void __f2fs_submit_read_bio(struct f2fs_sb_info *sbi,
+ struct bio *bio, enum page_type type)
+{
+ if (trace_android_fs_dataread_start_enabled() && (type == DATA)) {
+ struct page *first_page = bio->bi_io_vec[0].bv_page;
+
+ if (first_page != NULL &&
+ __read_io_type(first_page) == F2FS_RD_DATA) {
+ char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+ path = android_fstrace_get_pathname(pathbuf,
+ MAX_TRACE_PATHBUF_LEN,
+ first_page->mapping->host);
+
+ trace_android_fs_dataread_start(
+ first_page->mapping->host,
+ page_offset(first_page),
+ bio->bi_iter.bi_size,
+ current->pid,
+ path,
+ current->comm);
+ }
+ }
+ __submit_bio(sbi, bio, type);
+}
+
static void __submit_merged_bio(struct f2fs_bio_info *io)
{
struct f2fs_io_info *fio = &io->fio;
@@ -370,29 +405,6 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
return false;
}
-static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
- struct page *page, nid_t ino,
- enum page_type type)
-{
- enum page_type btype = PAGE_TYPE_OF_BIO(type);
- enum temp_type temp;
- struct f2fs_bio_info *io;
- bool ret = false;
-
- for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
- io = sbi->write_io[btype] + temp;
-
- down_read(&io->io_rwsem);
- ret = __has_merged_page(io, inode, page, ino);
- up_read(&io->io_rwsem);
-
- /* TODO: use HOT temp only for meta pages now. */
- if (ret || btype == META)
- break;
- }
- return ret;
-}
-
static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
enum page_type type, enum temp_type temp)
{
@@ -418,13 +430,19 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
nid_t ino, enum page_type type, bool force)
{
enum temp_type temp;
-
- if (!force && !has_merged_page(sbi, inode, page, ino, type))
- return;
+ bool ret = true;
for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
+ if (!force) {
+ enum page_type btype = PAGE_TYPE_OF_BIO(type);
+ struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
- __f2fs_submit_merged_write(sbi, type, temp);
+ down_read(&io->io_rwsem);
+ ret = __has_merged_page(io, inode, page, ino);
+ up_read(&io->io_rwsem);
+ }
+ if (ret)
+ __f2fs_submit_merged_write(sbi, type, temp);
/* TODO: use HOT temp only for meta pages now. */
if (type >= META)
@@ -485,7 +503,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
inc_page_count(fio->sbi, is_read_io(fio->op) ?
__read_io_type(page): WB_DATA_TYPE(fio->page));
- __submit_bio(fio->sbi, bio, fio->type);
+ __f2fs_submit_read_bio(fio->sbi, bio, fio->type);
return 0;
}
@@ -615,7 +633,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
}
ClearPageError(page);
inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
- __submit_bio(F2FS_I_SB(inode), bio, DATA);
+ __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
return 0;
}
@@ -641,7 +659,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn)
*/
void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
{
- f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
+ f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
__set_data_blkaddr(dn);
if (set_page_dirty(dn->node_page))
dn->node_changed = true;
@@ -671,7 +689,7 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
dn->ofs_in_node, count);
- f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
+ f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
for (; count > 0; dn->ofs_in_node++) {
block_t blkaddr = datablock_addr(dn->inode,
@@ -955,6 +973,9 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
return err;
}
+ if (direct_io && allow_outplace_dio(inode, iocb, from))
+ return 0;
+
if (is_inode_flag_set(inode, FI_NO_PREALLOC))
return 0;
@@ -968,6 +989,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
map.m_next_pgofs = NULL;
map.m_next_extent = NULL;
map.m_seg_type = NO_CHECK_TYPE;
+ map.m_may_create = true;
if (direct_io) {
map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
@@ -1026,7 +1048,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
unsigned int maxblocks = map->m_len;
struct dnode_of_data dn;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- int mode = create ? ALLOC_NODE : LOOKUP_NODE;
+ int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
pgoff_t pgofs, end_offset, end;
int err = 0, ofs = 1;
unsigned int ofs_in_node, last_ofs_in_node;
@@ -1046,6 +1068,10 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
end = pgofs + maxblocks;
if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
+ if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
+ map->m_may_create)
+ goto next_dnode;
+
map->m_pblk = ei.blk + pgofs - ei.fofs;
map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
map->m_flags = F2FS_MAP_MAPPED;
@@ -1060,7 +1086,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
}
next_dnode:
- if (create)
+ if (map->m_may_create)
__do_map_lock(sbi, flag, true);
/* When reading holes, we need its node page */
@@ -1097,11 +1123,13 @@ next_block:
if (is_valid_data_blkaddr(sbi, blkaddr)) {
/* use out-place-update for driect IO under LFS mode */
- if (test_opt(sbi, LFS) && create &&
- flag == F2FS_GET_BLOCK_DIO) {
+ if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
+ map->m_may_create) {
err = __allocate_data_block(&dn, map->m_seg_type);
- if (!err)
+ if (!err) {
+ blkaddr = dn.data_blkaddr;
set_inode_flag(inode, FI_APPEND_WRITE);
+ }
}
} else {
if (create) {
@@ -1207,7 +1235,7 @@ skip:
f2fs_put_dnode(&dn);
- if (create) {
+ if (map->m_may_create) {
__do_map_lock(sbi, flag, false);
f2fs_balance_fs(sbi, dn.node_changed);
}
@@ -1233,7 +1261,7 @@ sync_out:
}
f2fs_put_dnode(&dn);
unlock_out:
- if (create) {
+ if (map->m_may_create) {
__do_map_lock(sbi, flag, false);
f2fs_balance_fs(sbi, dn.node_changed);
}
@@ -1255,6 +1283,7 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
map.m_next_pgofs = NULL;
map.m_next_extent = NULL;
map.m_seg_type = NO_CHECK_TYPE;
+ map.m_may_create = false;
last_lblk = F2FS_BLK_ALIGN(pos + len);
while (map.m_lblk < last_lblk) {
@@ -1269,7 +1298,7 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
static int __get_data_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create, int flag,
- pgoff_t *next_pgofs, int seg_type)
+ pgoff_t *next_pgofs, int seg_type, bool may_write)
{
struct f2fs_map_blocks map;
int err;
@@ -1279,6 +1308,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
map.m_next_pgofs = next_pgofs;
map.m_next_extent = NULL;
map.m_seg_type = seg_type;
+ map.m_may_create = may_write;
err = f2fs_map_blocks(inode, &map, create, flag);
if (!err) {
@@ -1295,16 +1325,25 @@ static int get_data_block(struct inode *inode, sector_t iblock,
{
return __get_data_block(inode, iblock, bh_result, create,
flag, next_pgofs,
- NO_CHECK_TYPE);
+ NO_CHECK_TYPE, create);
+}
+
+static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ return __get_data_block(inode, iblock, bh_result, create,
+ F2FS_GET_BLOCK_DIO, NULL,
+ f2fs_rw_hint_to_seg_type(inode->i_write_hint),
+ true);
}
static int get_data_block_dio(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
return __get_data_block(inode, iblock, bh_result, create,
- F2FS_GET_BLOCK_DIO, NULL,
- f2fs_rw_hint_to_seg_type(
- inode->i_write_hint));
+ F2FS_GET_BLOCK_DIO, NULL,
+ f2fs_rw_hint_to_seg_type(inode->i_write_hint),
+ false);
}
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
@@ -1316,7 +1355,7 @@ static int get_data_block_bmap(struct inode *inode, sector_t iblock,
return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_BMAP, NULL,
- NO_CHECK_TYPE);
+ NO_CHECK_TYPE, create);
}
static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
@@ -1523,6 +1562,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
map.m_next_pgofs = NULL;
map.m_next_extent = NULL;
map.m_seg_type = NO_CHECK_TYPE;
+ map.m_may_create = false;
for (; nr_pages; nr_pages--) {
if (pages) {
@@ -1592,7 +1632,7 @@ got_it:
if (bio && (last_block_in_bio != block_nr - 1 ||
!__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
submit_and_realloc:
- __submit_bio(F2FS_I_SB(inode), bio, DATA);
+ __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
bio = NULL;
}
if (bio == NULL) {
@@ -1624,7 +1664,7 @@ set_error_page:
goto next_page;
confused:
if (bio) {
- __submit_bio(F2FS_I_SB(inode), bio, DATA);
+ __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
bio = NULL;
}
unlock_page(page);
@@ -1634,7 +1674,7 @@ next_page:
}
BUG_ON(pages && !list_empty(pages));
if (bio)
- __submit_bio(F2FS_I_SB(inode), bio, DATA);
+ __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
return 0;
}
@@ -1852,6 +1892,8 @@ got_it:
if (fio->need_lock == LOCK_REQ)
f2fs_unlock_op(fio->sbi);
err = f2fs_inplace_write_data(fio);
+ if (err && PageWriteback(page))
+ end_page_writeback(page);
trace_f2fs_do_write_data_page(fio->page, IPU);
set_inode_flag(inode, FI_UPDATE_WRITE);
return err;
@@ -2139,12 +2181,11 @@ continue_unlock:
if (PageWriteback(page)) {
if (wbc->sync_mode != WB_SYNC_NONE)
f2fs_wait_on_page_writeback(page,
- DATA, true);
+ DATA, true, true);
else
goto continue_unlock;
}
- BUG_ON(PageWriteback(page));
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
@@ -2321,6 +2362,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
bool locked = false;
struct extent_info ei = {0,0,0};
int err = 0;
+ int flag;
/*
* we already allocated all the blocks, so we don't need to get
@@ -2330,9 +2372,15 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
!is_inode_flag_set(inode, FI_NO_PREALLOC))
return 0;
+ /* f2fs_lock_op avoids race between write CP and convert_inline_page */
+ if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode))
+ flag = F2FS_GET_BLOCK_DEFAULT;
+ else
+ flag = F2FS_GET_BLOCK_PRE_AIO;
+
if (f2fs_has_inline_data(inode) ||
(pos & PAGE_MASK) >= i_size_read(inode)) {
- __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
+ __do_map_lock(sbi, flag, true);
locked = true;
}
restart:
@@ -2370,6 +2418,7 @@ restart:
f2fs_put_dnode(&dn);
__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
true);
+ WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
locked = true;
goto restart;
}
@@ -2383,7 +2432,7 @@ out:
f2fs_put_dnode(&dn);
unlock_out:
if (locked)
- __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
+ __do_map_lock(sbi, flag, false);
return err;
}
@@ -2464,7 +2513,7 @@ repeat:
}
}
- f2fs_wait_on_page_writeback(page, DATA, false);
+ f2fs_wait_on_page_writeback(page, DATA, false, true);
if (len == PAGE_SIZE || PageUptodate(page))
return 0;
@@ -2556,6 +2605,53 @@ static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
return 0;
}
+static void f2fs_dio_end_io(struct bio *bio)
+{
+ struct f2fs_private_dio *dio = bio->bi_private;
+
+ dec_page_count(F2FS_I_SB(dio->inode),
+ dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
+
+ bio->bi_private = dio->orig_private;
+ bio->bi_end_io = dio->orig_end_io;
+
+ kvfree(dio);
+
+ bio_endio(bio);
+}
+
+static void f2fs_dio_submit_bio(int rw, struct bio *bio, struct inode *inode,
+ loff_t file_offset)
+{
+ struct f2fs_private_dio *dio;
+ bool write = (rw == REQ_OP_WRITE);
+ int err;
+
+ dio = f2fs_kzalloc(F2FS_I_SB(inode),
+ sizeof(struct f2fs_private_dio), GFP_NOFS);
+ if (!dio) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ dio->inode = inode;
+ dio->orig_end_io = bio->bi_end_io;
+ dio->orig_private = bio->bi_private;
+ dio->write = write;
+
+ bio->bi_end_io = f2fs_dio_end_io;
+ bio->bi_private = dio;
+
+ inc_page_count(F2FS_I_SB(inode),
+ write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
+
+ submit_bio(rw, bio);
+ return;
+out:
+ bio->bi_error = -EIO;
+ bio_endio(bio);
+}
+
static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
loff_t offset)
{
@@ -2625,7 +2721,11 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
down_read(&fi->i_gc_rwsem[READ]);
}
- err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);
+ err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
+ iter, offset,
+ rw == WRITE ? get_data_block_dio_write :
+ get_data_block_dio, NULL, f2fs_dio_submit_bio,
+ DIO_LOCKING | DIO_SKIP_HOLES);
if (do_opu)
up_read(&fi->i_gc_rwsem[READ]);
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 139b4d5c83d5..f05b37ef7182 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -53,6 +53,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->vw_cnt = atomic_read(&sbi->vw_cnt);
si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt);
+ si->nr_dio_read = get_pages(sbi, F2FS_DIO_READ);
+ si->nr_dio_write = get_pages(sbi, F2FS_DIO_WRITE);
si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
si->nr_rd_data = get_pages(sbi, F2FS_RD_DATA);
@@ -62,7 +64,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->nr_flushed =
atomic_read(&SM_I(sbi)->fcc_info->issued_flush);
si->nr_flushing =
- atomic_read(&SM_I(sbi)->fcc_info->issing_flush);
+ atomic_read(&SM_I(sbi)->fcc_info->queued_flush);
si->flush_list_empty =
llist_empty(&SM_I(sbi)->fcc_info->issue_list);
}
@@ -70,7 +72,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->nr_discarded =
atomic_read(&SM_I(sbi)->dcc_info->issued_discard);
si->nr_discarding =
- atomic_read(&SM_I(sbi)->dcc_info->issing_discard);
+ atomic_read(&SM_I(sbi)->dcc_info->queued_discard);
si->nr_discard_cmd =
atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt);
si->undiscard_blks = SM_I(sbi)->dcc_info->undiscard_blks;
@@ -94,8 +96,10 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->free_secs = free_sections(sbi);
si->prefree_count = prefree_segments(sbi);
si->dirty_count = dirty_segments(sbi);
- si->node_pages = NODE_MAPPING(sbi)->nrpages;
- si->meta_pages = META_MAPPING(sbi)->nrpages;
+ if (sbi->node_inode)
+ si->node_pages = NODE_MAPPING(sbi)->nrpages;
+ if (sbi->meta_inode)
+ si->meta_pages = META_MAPPING(sbi)->nrpages;
si->nats = NM_I(sbi)->nat_cnt;
si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
si->sits = MAIN_SEGS(sbi);
@@ -173,7 +177,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
static void update_mem_info(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
- unsigned npages;
int i;
if (si->base_mem)
@@ -197,7 +200,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
si->base_mem += SIT_VBLOCK_MAP_SIZE;
- if (sbi->segs_per_sec > 1)
+ if (__is_large_section(sbi))
si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry);
si->base_mem += __bitmap_size(sbi, SIT_BITMAP);
@@ -256,10 +259,14 @@ get_cache:
sizeof(struct extent_node);
si->page_mem = 0;
- npages = NODE_MAPPING(sbi)->nrpages;
- si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
- npages = META_MAPPING(sbi)->nrpages;
- si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
+ if (sbi->node_inode) {
+ unsigned npages = NODE_MAPPING(sbi)->nrpages;
+ si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
+ }
+ if (sbi->meta_inode) {
+ unsigned npages = META_MAPPING(sbi)->nrpages;
+ si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
+ }
}
static int stat_show(struct seq_file *s, void *v)
@@ -374,6 +381,8 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
+ seq_printf(s, " - DIO (R: %4d, W: %4d)\n",
+ si->nr_dio_read, si->nr_dio_write);
seq_printf(s, " - IO_R (Data: %4d, Node: %4d, Meta: %4d\n",
si->nr_rd_data, si->nr_rd_node, si->nr_rd_meta);
seq_printf(s, " - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), "
@@ -510,7 +519,7 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
list_del(&si->stat_list);
mutex_unlock(&f2fs_stat_mutex);
- kfree(si);
+ kvfree(si);
}
int __init f2fs_create_root_stats(void)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index eedc07b46a52..719ba4d92098 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -293,7 +293,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
{
enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA;
lock_page(page);
- f2fs_wait_on_page_writeback(page, type, true);
+ f2fs_wait_on_page_writeback(page, type, true, true);
de->ino = cpu_to_le32(inode->i_ino);
set_de_type(de, inode->i_mode);
set_page_dirty(page);
@@ -307,7 +307,7 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage)
{
struct f2fs_inode *ri;
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
/* copy name info. to this inode page */
ri = F2FS_INODE(ipage);
@@ -550,7 +550,7 @@ start:
++level;
goto start;
add_dentry:
- f2fs_wait_on_page_writeback(dentry_page, DATA, true);
+ f2fs_wait_on_page_writeback(dentry_page, DATA, true, true);
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
@@ -705,7 +705,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
return f2fs_delete_inline_entry(dentry, page, dir, inode);
lock_page(page);
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
dentry_blk = page_address(page);
bit_pos = dentry - dentry_blk->dentry;
@@ -808,6 +808,17 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
de_name.name = d->filename[bit_pos];
de_name.len = le16_to_cpu(de->name_len);
+ /* check memory boundary before moving forward */
+ bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
+ if (unlikely(bit_pos > d->max)) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: corrupted namelen=%d, run fsck to fix.",
+ __func__, le16_to_cpu(de->name_len));
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ err = -EINVAL;
+ goto out;
+ }
+
if (f2fs_encrypted_inode(d->inode)) {
int save_len = fstr->len;
@@ -830,7 +841,6 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
if (readdir_ra)
f2fs_ra_node_page(sbi, le32_to_cpu(de->ino));
- bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
ctx->pos = start_pos + bit_pos;
}
out:
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 85542cba3fa2..b401578f8fb4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -69,7 +69,7 @@ struct f2fs_fault_info {
unsigned int inject_type;
};
-extern char *f2fs_fault_name[FAULT_MAX];
+extern const char *f2fs_fault_name[FAULT_MAX];
#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type)))
#endif
@@ -154,12 +154,13 @@ struct f2fs_mount_info {
#define F2FS_FEATURE_VERITY 0x0400 /* reserved */
#define F2FS_FEATURE_SB_CHKSUM 0x0800
-#define F2FS_HAS_FEATURE(sb, mask) \
- ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
-#define F2FS_SET_FEATURE(sb, mask) \
- (F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask))
-#define F2FS_CLEAR_FEATURE(sb, mask) \
- (F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask))
+#define __F2FS_HAS_FEATURE(raw_super, mask) \
+ ((raw_super->feature & cpu_to_le32(mask)) != 0)
+#define F2FS_HAS_FEATURE(sbi, mask) __F2FS_HAS_FEATURE(sbi->raw_super, mask)
+#define F2FS_SET_FEATURE(sbi, mask) \
+ (sbi->raw_super->feature |= cpu_to_le32(mask))
+#define F2FS_CLEAR_FEATURE(sbi, mask) \
+ (sbi->raw_super->feature &= ~cpu_to_le32(mask))
/* bio stuffs */
#define REQ_OP_READ READ
@@ -347,7 +348,7 @@ struct discard_cmd {
struct block_device *bdev; /* bdev */
unsigned short ref; /* reference count */
unsigned char state; /* state */
- unsigned char issuing; /* issuing discard */
+ unsigned char queued; /* queued discard */
int error; /* bio error */
spinlock_t lock; /* for state/bio_ref updating */
unsigned short bio_ref; /* bio reference count */
@@ -389,7 +390,7 @@ struct discard_cmd_control {
unsigned int undiscard_blks; /* # of undiscard blocks */
unsigned int next_pos; /* next discard position */
atomic_t issued_discard; /* # of issued discard */
- atomic_t issing_discard; /* # of issing discard */
+ atomic_t queued_discard; /* # of queued discard */
atomic_t discard_cmd_cnt; /* # of cached cmd count */
struct rb_root root; /* root of discard rb-tree */
bool rbtree_check; /* config for consistence check */
@@ -479,6 +480,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
#define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */
#define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */
#define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */
+#define F2FS_GOING_DOWN_NEED_FSCK 0x4 /* going down to trigger fsck */
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
@@ -617,16 +619,8 @@ struct extent_info {
};
struct extent_node {
- struct rb_node rb_node;
- union {
- struct {
- unsigned int fofs;
- unsigned int len;
- u32 blk;
- };
- struct extent_info ei; /* extent info */
-
- };
+ struct rb_node rb_node; /* rb node located in rb-tree */
+ struct extent_info ei; /* extent info */
struct list_head list; /* node in global extent list of sbi */
struct extent_tree *et; /* extent tree pointer */
};
@@ -661,6 +655,7 @@ struct f2fs_map_blocks {
pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */
pgoff_t *m_next_extent; /* point to next possible extent */
int m_seg_type;
+ bool m_may_create; /* indicate it is from write path */
};
/* for flag in get_data_block */
@@ -949,7 +944,7 @@ struct flush_cmd_control {
struct task_struct *f2fs_issue_flush; /* flush thread */
wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */
atomic_t issued_flush; /* # of issued flushes */
- atomic_t issing_flush; /* # of issing flushes */
+ atomic_t queued_flush; /* # of queued flushes */
struct llist_head issue_list; /* list for command issue */
struct llist_node *dispatch_list; /* list for command dispatch */
};
@@ -1016,6 +1011,8 @@ enum count_type {
F2FS_RD_DATA,
F2FS_RD_NODE,
F2FS_RD_META,
+ F2FS_DIO_WRITE,
+ F2FS_DIO_READ,
NR_COUNT_TYPE,
};
@@ -1230,8 +1227,6 @@ struct f2fs_sb_info {
/* for bio operations */
struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */
- struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE];
- /* bio ordering for NODE/DATA */
/* keep migration IO order for LFS mode */
struct rw_semaphore io_order_lock;
mempool_t *write_io_dummy; /* Dummy pages */
@@ -1323,6 +1318,7 @@ struct f2fs_sb_info {
struct f2fs_gc_kthread *gc_thread; /* GC thread */
unsigned int cur_victim_sec; /* current victim section num */
unsigned int gc_mode; /* current GC state */
+ unsigned int next_victim_seg[2]; /* next segment in victim section */
/* for skip statistic */
unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
unsigned long long skipped_gc_rwsem; /* FG_GC only */
@@ -1332,6 +1328,8 @@ struct f2fs_sb_info {
/* maximum # of trials to find a victim segment for SSR and GC */
unsigned int max_victim_search;
+ /* migration granularity of garbage collection, unit: segment */
+ unsigned int migration_granularity;
/*
* for stat information.
@@ -1390,6 +1388,13 @@ struct f2fs_sb_info {
__u32 s_chksum_seed;
};
+struct f2fs_private_dio {
+ struct inode *inode;
+ void *orig_private;
+ bio_end_io_t *orig_end_io;
+ bool write;
+};
+
#ifdef CONFIG_F2FS_FAULT_INJECTION
#define f2fs_show_injection_info(type) \
printk_ratelimited("%sF2FS-fs : inject %s in %s of %pF\n", \
@@ -1668,12 +1673,16 @@ static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
{
unsigned long flags;
- set_sbi_flag(sbi, SBI_NEED_FSCK);
+ /*
+ * In order to re-enable nat_bits we need to call fsck.f2fs by
+ * set_sbi_flag(sbi, SBI_NEED_FSCK). But it may give huge cost,
+ * so let's rely on regular fsck or unclean shutdown.
+ */
if (lock)
spin_lock_irqsave(&sbi->cp_lock, flags);
__clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
- kfree(NM_I(sbi)->nat_bits);
+ kvfree(NM_I(sbi)->nat_bits);
NM_I(sbi)->nat_bits = NULL;
if (lock)
spin_unlock_irqrestore(&sbi->cp_lock, flags);
@@ -2206,7 +2215,11 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
{
if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) ||
get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) ||
- get_pages(sbi, F2FS_WB_CP_DATA))
+ get_pages(sbi, F2FS_WB_CP_DATA) ||
+ get_pages(sbi, F2FS_DIO_READ) ||
+ get_pages(sbi, F2FS_DIO_WRITE) ||
+ atomic_read(&SM_I(sbi)->dcc_info->queued_discard) ||
+ atomic_read(&SM_I(sbi)->fcc_info->queued_flush))
return false;
return f2fs_time_over(sbi, type);
}
@@ -2430,6 +2443,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode,
case FI_NEW_INODE:
if (set)
return;
+ /* fall through */
case FI_DATA_EXIST:
case FI_INLINE_DOTS:
case FI_PIN_FILE:
@@ -2732,32 +2746,47 @@ static inline bool is_dot_dotdot(const struct qstr *str)
static inline bool f2fs_may_extent_tree(struct inode *inode)
{
- if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) ||
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+ if (!test_opt(sbi, EXTENT_CACHE) ||
is_inode_flag_set(inode, FI_NO_EXTENT))
return false;
+ /*
+ * for recovered files during mount do not create extents
+ * if shrinker is not registered.
+ */
+ if (list_empty(&sbi->s_list))
+ return false;
+
return S_ISREG(inode->i_mode);
}
+static inline void *kvmalloc(size_t size, gfp_t flags)
+{
+ void *ret;
+
+ ret = kmalloc(size, flags | __GFP_NOWARN);
+ if (!ret)
+ ret = __vmalloc(size, flags, PAGE_KERNEL);
+ return ret;
+}
+
static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
+ void *ret;
+
if (time_to_inject(sbi, FAULT_KMALLOC)) {
f2fs_show_injection_info(FAULT_KMALLOC);
return NULL;
}
- return kmalloc(size, flags);
-}
-
-static inline void *kvmalloc(size_t size, gfp_t flags)
-{
- void *ret;
+ ret = kmalloc(size, flags);
+ if (ret)
+ return ret;
- ret = kmalloc(size, flags | __GFP_NOWARN);
- if (!ret)
- ret = __vmalloc(size, flags, PAGE_KERNEL);
- return ret;
+ return kvmalloc(size, flags);
}
static inline void *kvzalloc(size_t size, gfp_t flags)
@@ -2842,6 +2871,8 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
spin_unlock(&sbi->iostat_lock);
}
+#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1)
+
#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \
(!is_read_io(fio->op) || fio->is_meta))
@@ -3086,7 +3117,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
struct f2fs_summary *sum, int type,
struct f2fs_io_info *fio, bool add_list);
void f2fs_wait_on_page_writeback(struct page *page,
- enum page_type type, bool ordered);
+ enum page_type type, bool ordered, bool locked);
void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr);
void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
block_t len);
@@ -3226,6 +3257,7 @@ struct f2fs_stat_info {
int total_count, utilization;
int bg_gc, nr_wb_cp_data, nr_wb_data;
int nr_rd_data, nr_rd_node, nr_rd_meta;
+ int nr_dio_read, nr_dio_write;
unsigned int io_skip_bggc, other_skip_bggc;
int nr_flushing, nr_flushed, flush_list_empty;
int nr_discarding, nr_discarded;
@@ -3537,9 +3569,9 @@ static inline bool f2fs_post_read_required(struct inode *inode)
}
#define F2FS_FEATURE_FUNCS(name, flagname) \
-static inline int f2fs_sb_has_##name(struct super_block *sb) \
+static inline int f2fs_sb_has_##name(struct f2fs_sb_info *sbi) \
{ \
- return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_##flagname); \
+ return F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_##flagname); \
}
F2FS_FEATURE_FUNCS(encrypt, ENCRYPT);
@@ -3569,7 +3601,7 @@ static inline int get_blkz_type(struct f2fs_sb_info *sbi,
static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi)
{
- return f2fs_sb_has_blkzoned(sbi->sb);
+ return f2fs_sb_has_blkzoned(sbi);
}
static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi)
@@ -3644,7 +3676,7 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
* for blkzoned device, fallback direct IO to buffered IO, so
* all IOs can be serialized by log-structured write.
*/
- if (f2fs_sb_has_blkzoned(sbi->sb))
+ if (f2fs_sb_has_blkzoned(sbi))
return true;
if (test_opt(sbi, LFS) && (rw == WRITE) &&
block_unaligned_IO(inode, iocb, iter))
@@ -3667,7 +3699,7 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
{
#ifdef CONFIG_QUOTA
- if (f2fs_sb_has_quota_ino(sbi->sb))
+ if (f2fs_sb_has_quota_ino(sbi))
return true;
if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 3c61f9619be1..b44e02b8c677 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -85,7 +85,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
}
/* fill the page */
- f2fs_wait_on_page_writeback(page, DATA, false);
+ f2fs_wait_on_page_writeback(page, DATA, false, true);
/* wait for GCed page writeback via META_MAPPING */
f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
@@ -219,6 +219,9 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
trace_f2fs_sync_file_enter(inode);
+ if (S_ISDIR(inode->i_mode))
+ goto go_write;
+
/* if fdatasync is triggered, let's do in-place-update */
if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
set_inode_flag(inode, FI_NEED_IPU);
@@ -578,7 +581,7 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
if (IS_ERR(page))
return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page);
truncate_out:
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
zero_user(page, offset, PAGE_SIZE - offset);
/* An encrypted inode should have a key and truncate the last page. */
@@ -700,7 +703,7 @@ int f2fs_getattr(struct vfsmount *mnt,
unsigned int flags;
if (f2fs_has_extra_attr(inode) &&
- f2fs_sb_has_inode_crtime(inode->i_sb) &&
+ f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) &&
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
stat->result_mask |= STATX_BTIME;
stat->btime.tv_sec = fi->i_crtime.tv_sec;
@@ -899,7 +902,7 @@ static int fill_zero(struct inode *inode, pgoff_t index,
if (IS_ERR(page))
return PTR_ERR(page);
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
zero_user(page, start, len);
set_page_dirty(page);
f2fs_put_page(page, 1);
@@ -1503,7 +1506,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
- .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE };
+ .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE,
+ .m_may_create = true };
pgoff_t pg_end;
loff_t new_size = i_size_read(inode);
loff_t off_end;
@@ -1746,10 +1750,12 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- if (!get_dirty_pages(inode))
- goto skip_flush;
-
- f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
+ /*
+ * Should wait end_io to count F2FS_WB_CP_DATA correctly by
+ * f2fs_is_atomic_file.
+ */
+ if (get_dirty_pages(inode))
+ f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
"Unexpected flush for atomic writes: ino=%lu, npages=%u",
inode->i_ino, get_dirty_pages(inode));
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
@@ -1757,7 +1763,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
-skip_flush:
+
set_inode_flag(inode, FI_ATOMIC_FILE);
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -1962,6 +1968,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
f2fs_stop_checkpoint(sbi, false);
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
+ case F2FS_GOING_DOWN_NEED_FSCK:
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ /* do checkpoint only */
+ ret = f2fs_sync_fs(sb, 1);
+ if (ret)
+ goto out;
+ break;
default:
ret = -EINVAL;
goto out;
@@ -2030,7 +2043,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
- if (!f2fs_sb_has_encrypt(inode->i_sb))
+ if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode)))
return -EOPNOTSUPP;
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
@@ -2040,7 +2053,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
{
- if (!f2fs_sb_has_encrypt(file_inode(filp)->i_sb))
+ if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
return -EOPNOTSUPP;
return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
}
@@ -2051,7 +2064,7 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int err;
- if (!f2fs_sb_has_encrypt(inode->i_sb))
+ if (!f2fs_sb_has_encrypt(sbi))
return -EOPNOTSUPP;
err = mnt_want_write_file(filp);
@@ -2155,7 +2168,7 @@ do_more:
}
ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start));
- range.start += sbi->blocks_per_seg;
+ range.start += BLKS_PER_SEC(sbi);
if (range.start <= end)
goto do_more;
out:
@@ -2197,7 +2210,8 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
{
struct inode *inode = file_inode(filp);
struct f2fs_map_blocks map = { .m_next_extent = NULL,
- .m_seg_type = NO_CHECK_TYPE };
+ .m_seg_type = NO_CHECK_TYPE ,
+ .m_may_create = false };
struct extent_info ei = {0, 0, 0};
pgoff_t pg_start, pg_end, next_pgofs;
unsigned int blk_per_seg = sbi->blocks_per_seg;
@@ -2560,7 +2574,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
return -EFAULT;
if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num ||
- sbi->segs_per_sec != 1) {
+ __is_large_section(sbi)) {
f2fs_msg(sbi->sb, KERN_WARNING,
"Can't flush %u in %d for segs_per_sec %u != 1\n",
range.dev_num, sbi->s_ndevs,
@@ -2711,6 +2725,7 @@ int f2fs_precache_extents(struct inode *inode)
map.m_next_pgofs = NULL;
map.m_next_extent = &m_next_extent;
map.m_seg_type = NO_CHECK_TYPE;
+ map.m_may_create = false;
end = F2FS_I_SB(inode)->max_file_blocks;
while (map.m_lblk < end) {
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 843430b55a73..3624d1336bd1 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -142,7 +142,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(gc_th->f2fs_gc_task)) {
err = PTR_ERR(gc_th->f2fs_gc_task);
- kfree(gc_th);
+ kvfree(gc_th);
sbi->gc_thread = NULL;
}
out:
@@ -155,7 +155,7 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
if (!gc_th)
return;
kthread_stop(gc_th->f2fs_gc_task);
- kfree(gc_th);
+ kvfree(gc_th);
sbi->gc_thread = NULL;
}
@@ -323,8 +323,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
p.min_cost = get_max_cost(sbi, &p);
if (*result != NULL_SEGNO) {
- if (IS_DATASEG(get_seg_entry(sbi, *result)->type) &&
- get_valid_blocks(sbi, *result, false) &&
+ if (get_valid_blocks(sbi, *result, false) &&
!sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
p.min_segno = *result;
goto out;
@@ -333,6 +332,22 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
if (p.max_search == 0)
goto out;
+ if (__is_large_section(sbi) && p.alloc_mode == LFS) {
+ if (sbi->next_victim_seg[BG_GC] != NULL_SEGNO) {
+ p.min_segno = sbi->next_victim_seg[BG_GC];
+ *result = p.min_segno;
+ sbi->next_victim_seg[BG_GC] = NULL_SEGNO;
+ goto got_result;
+ }
+ if (gc_type == FG_GC &&
+ sbi->next_victim_seg[FG_GC] != NULL_SEGNO) {
+ p.min_segno = sbi->next_victim_seg[FG_GC];
+ *result = p.min_segno;
+ sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
+ goto got_result;
+ }
+ }
+
last_victim = sm->last_victim[p.gc_mode];
if (p.alloc_mode == LFS && gc_type == FG_GC) {
p.min_segno = check_bg_victims(sbi);
@@ -395,6 +410,8 @@ next:
}
if (p.min_segno != NULL_SEGNO) {
got_it:
+ *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
+got_result:
if (p.alloc_mode == LFS) {
secno = GET_SEC_FROM_SEG(sbi, p.min_segno);
if (gc_type == FG_GC)
@@ -402,13 +419,13 @@ got_it:
else
set_bit(secno, dirty_i->victim_secmap);
}
- *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
+ }
+out:
+ if (p.min_segno != NULL_SEGNO)
trace_f2fs_get_victim(sbi->sb, type, gc_type, &p,
sbi->cur_victim_sec,
prefree_segments(sbi), free_segments(sbi));
- }
-out:
mutex_unlock(&dirty_i->seglist_lock);
return (p.min_segno == NULL_SEGNO) ? 0 : 1;
@@ -658,6 +675,14 @@ got_it:
fio.page = page;
fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
+ /*
+ * don't cache encrypted data into meta inode until previous dirty
+ * data were writebacked to avoid racing between GC and flush.
+ */
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
+
+ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
+
fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi),
dn.data_blkaddr,
FGP_LOCK | FGP_CREAT, GFP_NOFS);
@@ -743,7 +768,9 @@ static int move_data_block(struct inode *inode, block_t bidx,
* don't cache encrypted data into meta inode until previous dirty
* data were writebacked to avoid racing between GC and flush.
*/
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
+
+ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
if (err)
@@ -802,8 +829,8 @@ static int move_data_block(struct inode *inode, block_t bidx,
}
write_page:
+ f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true);
set_page_dirty(fio.encrypted_page);
- f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true);
if (clear_page_dirty_for_io(fio.encrypted_page))
dec_page_count(fio.sbi, F2FS_DIRTY_META);
@@ -811,7 +838,7 @@ write_page:
ClearPageError(page);
/* allocate block address */
- f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
+ f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true);
fio.op = REQ_OP_WRITE;
fio.op_flags = REQ_SYNC | REQ_NOIDLE;
@@ -897,8 +924,9 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type,
bool is_dirty = PageDirty(page);
retry:
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
+
set_page_dirty(page);
- f2fs_wait_on_page_writeback(page, DATA, true);
if (clear_page_dirty_for_io(page)) {
inode_dec_dirty_pages(inode);
f2fs_remove_dirty_inode(inode);
@@ -1093,15 +1121,18 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
struct blk_plug plug;
unsigned int segno = start_segno;
unsigned int end_segno = start_segno + sbi->segs_per_sec;
- int seg_freed = 0;
+ int seg_freed = 0, migrated = 0;
unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
SUM_TYPE_DATA : SUM_TYPE_NODE;
int submitted = 0;
+ if (__is_large_section(sbi))
+ end_segno = rounddown(end_segno, sbi->segs_per_sec);
+
/* readahead multi ssa blocks those have contiguous address */
- if (sbi->segs_per_sec > 1)
+ if (__is_large_section(sbi))
f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
- sbi->segs_per_sec, META_SSA, true);
+ end_segno - segno, META_SSA, true);
/* reference all summary page */
while (segno < end_segno) {
@@ -1130,10 +1161,13 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
GET_SUM_BLOCK(sbi, segno));
f2fs_put_page(sum_page, 0);
- if (get_valid_blocks(sbi, segno, false) == 0 ||
- !PageUptodate(sum_page) ||
- unlikely(f2fs_cp_error(sbi)))
- goto next;
+ if (get_valid_blocks(sbi, segno, false) == 0)
+ goto freed;
+ if (__is_large_section(sbi) &&
+ migrated >= sbi->migration_granularity)
+ goto skip;
+ if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi)))
+ goto skip;
sum = page_address(sum_page);
if (type != GET_SUM_TYPE((&sum->footer))) {
@@ -1141,7 +1175,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
"type [%d, %d] in SSA and SIT",
segno, type, GET_SUM_TYPE((&sum->footer)));
set_sbi_flag(sbi, SBI_NEED_FSCK);
- goto next;
+ goto skip;
}
/*
@@ -1160,10 +1194,15 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
stat_inc_seg_count(sbi, type, gc_type);
+freed:
if (gc_type == FG_GC &&
get_valid_blocks(sbi, segno, false) == 0)
seg_freed++;
-next:
+ migrated++;
+
+ if (__is_large_section(sbi) && segno + 1 < end_segno)
+ sbi->next_victim_seg[gc_type] = segno + 1;
+skip:
f2fs_put_page(sum_page, 0);
}
@@ -1307,7 +1346,7 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES;
/* give warm/cold data area from slower device */
- if (sbi->s_ndevs && sbi->segs_per_sec == 1)
+ if (sbi->s_ndevs && !__is_large_section(sbi))
SIT_I(sbi)->last_victim[ALLOC_NEXT] =
GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index fdf4abcf2dab..b63b2ae1acc7 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -73,7 +73,7 @@ void f2fs_truncate_inline_inode(struct inode *inode,
addr = inline_data_addr(inode, ipage);
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
memset(addr + from, 0, MAX_INLINE_DATA(inode) - from);
set_page_dirty(ipage);
@@ -179,7 +179,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
fio.old_blkaddr = dn->data_blkaddr;
set_inode_flag(dn->inode, FI_HOT_DATA);
f2fs_outplace_write_data(dn, &fio);
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
if (dirty) {
inode_dec_dirty_pages(dn->inode);
f2fs_remove_dirty_inode(dn->inode);
@@ -254,7 +254,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
f2fs_bug_on(F2FS_I_SB(inode), page->index);
- f2fs_wait_on_page_writeback(dn.inode_page, NODE, true);
+ f2fs_wait_on_page_writeback(dn.inode_page, NODE, true, true);
src_addr = kmap_atomic(page);
dst_addr = inline_data_addr(inode, dn.inode_page);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode));
@@ -295,7 +295,7 @@ process_inline:
ipage = f2fs_get_node_page(sbi, inode->i_ino);
f2fs_bug_on(sbi, IS_ERR(ipage));
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
src_addr = inline_data_addr(inode, npage);
dst_addr = inline_data_addr(inode, ipage);
@@ -409,7 +409,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
goto out;
}
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
dentry_blk = page_address(page);
@@ -519,18 +519,18 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
stat_dec_inline_dir(dir);
clear_inode_flag(dir, FI_INLINE_DENTRY);
- kfree(backup_dentry);
+ kvfree(backup_dentry);
return 0;
recover:
lock_page(ipage);
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir));
f2fs_i_depth_write(dir, 0);
f2fs_i_size_write(dir, MAX_INLINE_DATA(dir));
set_page_dirty(ipage);
f2fs_put_page(ipage, 1);
- kfree(backup_dentry);
+ kvfree(backup_dentry);
return err;
}
@@ -583,7 +583,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
}
}
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
name_hash = f2fs_dentry_hash(new_name, NULL);
f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos);
@@ -615,7 +615,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
int i;
lock_page(page);
- f2fs_wait_on_page_writeback(page, NODE, true);
+ f2fs_wait_on_page_writeback(page, NODE, true, true);
inline_dentry = inline_data_addr(dir, page);
make_dentry_ptr_inline(dir, &d, inline_dentry);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 1b1f22faff22..3e739e4986b8 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -103,7 +103,7 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage)
while (start < end) {
if (*start++) {
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
set_inode_flag(inode, FI_DATA_EXIST);
set_raw_inline(inode, F2FS_INODE(ipage));
@@ -118,7 +118,7 @@ static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page
{
struct f2fs_inode *ri = &F2FS_NODE(page)->i;
- if (!f2fs_sb_has_inode_chksum(sbi->sb))
+ if (!f2fs_sb_has_inode_chksum(sbi))
return false;
if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR))
@@ -218,7 +218,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
return false;
}
- if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)
+ if (f2fs_sb_has_flexible_inline_xattr(sbi)
&& !f2fs_has_extra_attr(inode)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
@@ -228,7 +228,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
}
if (f2fs_has_extra_attr(inode) &&
- !f2fs_sb_has_extra_attr(sbi->sb)) {
+ !f2fs_sb_has_extra_attr(sbi)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: inode (ino=%lx) is with extra_attr, "
@@ -340,7 +340,7 @@ static int do_read_inode(struct inode *inode)
fi->i_extra_isize = f2fs_has_extra_attr(inode) ?
le16_to_cpu(ri->i_extra_isize) : 0;
- if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
+ if (f2fs_sb_has_flexible_inline_xattr(sbi)) {
fi->i_inline_xattr_size = le16_to_cpu(ri->i_inline_xattr_size);
} else if (f2fs_has_inline_xattr(inode) ||
f2fs_has_inline_dentry(inode)) {
@@ -390,14 +390,14 @@ static int do_read_inode(struct inode *inode)
if (fi->i_flags & F2FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
- if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi->sb) &&
+ if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi) &&
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid))
i_projid = (projid_t)le32_to_cpu(ri->i_projid);
else
i_projid = F2FS_DEF_PROJID;
fi->i_projid = make_kprojid(&init_user_ns, i_projid);
- if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi->sb) &&
+ if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi) &&
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
fi->i_crtime.tv_sec = le64_to_cpu(ri->i_crtime);
fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec);
@@ -497,7 +497,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
struct f2fs_inode *ri;
struct extent_tree *et = F2FS_I(inode)->extent_tree;
- f2fs_wait_on_page_writeback(node_page, NODE, true);
+ f2fs_wait_on_page_writeback(node_page, NODE, true, true);
set_page_dirty(node_page);
f2fs_inode_synced(inode);
@@ -542,11 +542,11 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
if (f2fs_has_extra_attr(inode)) {
ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize);
- if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)->sb))
+ if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)))
ri->i_inline_xattr_size =
cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size);
- if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) &&
+ if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) &&
F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
i_projid)) {
projid_t i_projid;
@@ -556,7 +556,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
ri->i_projid = cpu_to_le32(i_projid);
}
- if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)->sb) &&
+ if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) &&
F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
i_crtime)) {
ri->i_crtime =
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index f2cdf47b76b6..367302907d07 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -61,7 +61,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
goto fail;
}
- if (f2fs_sb_has_project_quota(sbi->sb) &&
+ if (f2fs_sb_has_project_quota(sbi) &&
(F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL))
F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid;
else
@@ -79,7 +79,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
f2fs_may_encrypt(inode))
f2fs_set_encrypted_inode(inode);
- if (f2fs_sb_has_extra_attr(sbi->sb)) {
+ if (f2fs_sb_has_extra_attr(sbi)) {
set_inode_flag(inode, FI_EXTRA_ATTR);
F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE;
}
@@ -92,7 +92,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
if (f2fs_may_inline_dentry(inode))
set_inode_flag(inode, FI_INLINE_DENTRY);
- if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
+ if (f2fs_sb_has_flexible_inline_xattr(sbi)) {
f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode));
if (f2fs_has_inline_xattr(inode))
xattr_size = F2FS_OPTION(sbi).inline_xattr_size;
@@ -634,7 +634,7 @@ out_f2fs_handle_failed_inode:
f2fs_handle_failed_inode(inode);
out_free_encrypted_link:
if (disk_link.name != (unsigned char *)symname)
- kfree(disk_link.name);
+ kvfree(disk_link.name);
return err;
}
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 08edc0930831..f6a1e1e0f891 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -826,6 +826,7 @@ static int truncate_node(struct dnode_of_data *dn)
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct node_info ni;
int err;
+ pgoff_t index;
err = f2fs_get_node_info(sbi, dn->nid, &ni);
if (err)
@@ -845,10 +846,11 @@ static int truncate_node(struct dnode_of_data *dn)
clear_node_page_dirty(dn->node_page);
set_sbi_flag(sbi, SBI_IS_DIRTY);
+ index = dn->node_page->index;
f2fs_put_page(dn->node_page, 1);
invalidate_mapping_pages(NODE_MAPPING(sbi),
- dn->node_page->index, dn->node_page->index);
+ index, index);
dn->node_page = NULL;
trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
@@ -1104,7 +1106,7 @@ skip_partial:
ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
lock_page(page);
BUG_ON(page->mapping != NODE_MAPPING(sbi));
- f2fs_wait_on_page_writeback(page, NODE, true);
+ f2fs_wait_on_page_writeback(page, NODE, true, true);
ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
set_page_dirty(page);
unlock_page(page);
@@ -1232,7 +1234,7 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs)
new_ni.version = 0;
set_node_addr(sbi, &new_ni, NEW_ADDR, false);
- f2fs_wait_on_page_writeback(page, NODE, true);
+ f2fs_wait_on_page_writeback(page, NODE, true, true);
fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
set_cold_node(page, S_ISDIR(dn->inode->i_mode));
if (!PageUptodate(page))
@@ -1598,10 +1600,10 @@ int f2fs_move_node_page(struct page *node_page, int gc_type)
.for_reclaim = 0,
};
+ f2fs_wait_on_page_writeback(node_page, NODE, true, true);
+
set_page_dirty(node_page);
- f2fs_wait_on_page_writeback(node_page, NODE, true);
- f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page));
if (!clear_page_dirty_for_io(node_page)) {
err = -EAGAIN;
goto out_page;
@@ -1689,8 +1691,7 @@ continue_unlock:
goto continue_unlock;
}
- f2fs_wait_on_page_writeback(page, NODE, true);
- BUG_ON(PageWriteback(page));
+ f2fs_wait_on_page_writeback(page, NODE, true, true);
set_fsync_mark(page, 0);
set_dentry_mark(page, 0);
@@ -1741,7 +1742,7 @@ continue_unlock:
"Retry to write fsync mark: ino=%u, idx=%lx",
ino, last_page->index);
lock_page(last_page);
- f2fs_wait_on_page_writeback(last_page, NODE, true);
+ f2fs_wait_on_page_writeback(last_page, NODE, true, true);
set_page_dirty(last_page);
unlock_page(last_page);
goto retry;
@@ -1822,9 +1823,8 @@ continue_unlock:
goto lock_node;
}
- f2fs_wait_on_page_writeback(page, NODE, true);
+ f2fs_wait_on_page_writeback(page, NODE, true, true);
- BUG_ON(PageWriteback(page));
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
@@ -1891,7 +1891,7 @@ int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
get_page(page);
spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
- f2fs_wait_on_page_writeback(page, NODE, true);
+ f2fs_wait_on_page_writeback(page, NODE, true, false);
if (TestClearPageError(page))
ret = -EIO;
@@ -2472,7 +2472,7 @@ void f2fs_recover_inline_xattr(struct inode *inode, struct page *page)
src_addr = inline_xattr_addr(inode, page);
inline_size = inline_xattr_size(inode);
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
memcpy(dst_addr, src_addr, inline_size);
update_inode:
f2fs_update_inode(inode, ipage);
@@ -2566,17 +2566,17 @@ retry:
if (dst->i_inline & F2FS_EXTRA_ATTR) {
dst->i_extra_isize = src->i_extra_isize;
- if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) &&
+ if (f2fs_sb_has_flexible_inline_xattr(sbi) &&
F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
i_inline_xattr_size))
dst->i_inline_xattr_size = src->i_inline_xattr_size;
- if (f2fs_sb_has_project_quota(sbi->sb) &&
+ if (f2fs_sb_has_project_quota(sbi) &&
F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
i_projid))
dst->i_projid = src->i_projid;
- if (f2fs_sb_has_inode_crtime(sbi->sb) &&
+ if (f2fs_sb_has_inode_crtime(sbi) &&
F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
i_crtime_nsec)) {
dst->i_crtime = src->i_crtime;
@@ -3118,17 +3118,17 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
for (i = 0; i < nm_i->nat_blocks; i++)
kvfree(nm_i->free_nid_bitmap[i]);
- kfree(nm_i->free_nid_bitmap);
+ kvfree(nm_i->free_nid_bitmap);
}
kvfree(nm_i->free_nid_count);
- kfree(nm_i->nat_bitmap);
- kfree(nm_i->nat_bits);
+ kvfree(nm_i->nat_bitmap);
+ kvfree(nm_i->nat_bits);
#ifdef CONFIG_F2FS_CHECK_FS
- kfree(nm_i->nat_bitmap_mir);
+ kvfree(nm_i->nat_bitmap_mir);
#endif
sbi->nm_info = NULL;
- kfree(nm_i);
+ kvfree(nm_i);
}
int __init f2fs_create_node_manager_caches(void)
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 1c73d879a9bc..e05af5df5648 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -361,7 +361,7 @@ static inline int set_nid(struct page *p, int off, nid_t nid, bool i)
{
struct f2fs_node *rn = F2FS_NODE(p);
- f2fs_wait_on_page_writeback(p, NODE, true);
+ f2fs_wait_on_page_writeback(p, NODE, true, true);
if (i)
rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 336273a81fba..aa3f5633a20c 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -250,7 +250,7 @@ static int recover_inode(struct inode *inode, struct page *page)
i_gid_write(inode, le32_to_cpu(raw->i_gid));
if (raw->i_inline & F2FS_EXTRA_ATTR) {
- if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) &&
+ if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) &&
F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize),
i_projid)) {
projid_t i_projid;
@@ -531,7 +531,7 @@ retry_dn:
goto out;
}
- f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
+ f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true);
err = f2fs_get_node_info(sbi, dn.nid, &ni);
if (err)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index fb4be4629add..ccefcf8016c4 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -229,7 +229,7 @@ static int __revoke_inmem_pages(struct inode *inode,
lock_page(page);
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
if (recover) {
struct dnode_of_data dn;
@@ -387,8 +387,9 @@ static int __f2fs_commit_inmem_pages(struct inode *inode)
if (page->mapping == inode->i_mapping) {
trace_f2fs_commit_inmem_page(page, INMEM);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
+
set_page_dirty(page);
- f2fs_wait_on_page_writeback(page, DATA, true);
if (clear_page_dirty_for_io(page)) {
inode_dec_dirty_pages(inode);
f2fs_remove_dirty_inode(inode);
@@ -620,14 +621,16 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
return 0;
if (!test_opt(sbi, FLUSH_MERGE)) {
+ atomic_inc(&fcc->queued_flush);
ret = submit_flush_wait(sbi, ino);
+ atomic_dec(&fcc->queued_flush);
atomic_inc(&fcc->issued_flush);
return ret;
}
- if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) {
+ if (atomic_inc_return(&fcc->queued_flush) == 1 || sbi->s_ndevs > 1) {
ret = submit_flush_wait(sbi, ino);
- atomic_dec(&fcc->issing_flush);
+ atomic_dec(&fcc->queued_flush);
atomic_inc(&fcc->issued_flush);
return ret;
@@ -646,14 +649,14 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
if (fcc->f2fs_issue_flush) {
wait_for_completion(&cmd.wait);
- atomic_dec(&fcc->issing_flush);
+ atomic_dec(&fcc->queued_flush);
} else {
struct llist_node *list;
list = llist_del_all(&fcc->issue_list);
if (!list) {
wait_for_completion(&cmd.wait);
- atomic_dec(&fcc->issing_flush);
+ atomic_dec(&fcc->queued_flush);
} else {
struct flush_cmd *tmp, *next;
@@ -662,7 +665,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
llist_for_each_entry_safe(tmp, next, list, llnode) {
if (tmp == &cmd) {
cmd.ret = ret;
- atomic_dec(&fcc->issing_flush);
+ atomic_dec(&fcc->queued_flush);
continue;
}
tmp->ret = ret;
@@ -691,7 +694,7 @@ int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
if (!fcc)
return -ENOMEM;
atomic_set(&fcc->issued_flush, 0);
- atomic_set(&fcc->issing_flush, 0);
+ atomic_set(&fcc->queued_flush, 0);
init_waitqueue_head(&fcc->flush_wait_queue);
init_llist_head(&fcc->issue_list);
SM_I(sbi)->fcc_info = fcc;
@@ -703,7 +706,7 @@ init_thread:
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(fcc->f2fs_issue_flush)) {
err = PTR_ERR(fcc->f2fs_issue_flush);
- kfree(fcc);
+ kvfree(fcc);
SM_I(sbi)->fcc_info = NULL;
return err;
}
@@ -722,7 +725,7 @@ void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
kthread_stop(flush_thread);
}
if (free) {
- kfree(fcc);
+ kvfree(fcc);
SM_I(sbi)->fcc_info = NULL;
}
}
@@ -907,7 +910,7 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
dc->len = len;
dc->ref = 0;
dc->state = D_PREP;
- dc->issuing = 0;
+ dc->queued = 0;
dc->error = 0;
init_completion(&dc->wait);
list_add_tail(&dc->list, pend_list);
@@ -939,7 +942,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc,
struct discard_cmd *dc)
{
if (dc->state == D_DONE)
- atomic_sub(dc->issuing, &dcc->issing_discard);
+ atomic_sub(dc->queued, &dcc->queued_discard);
list_del(&dc->list);
rb_erase(&dc->rb_node, &dcc->root);
@@ -1223,12 +1226,12 @@ submit:
dc->bio_ref++;
spin_unlock_irqrestore(&dc->lock, flags);
- atomic_inc(&dcc->issing_discard);
- dc->issuing++;
+ atomic_inc(&dcc->queued_discard);
+ dc->queued++;
list_move_tail(&dc->list, wait_list);
/* sanity check on discard range */
- __check_sit_bitmap(sbi, start, start + len);
+ __check_sit_bitmap(sbi, lstart, lstart + len);
bio->bi_private = dc;
bio->bi_end_io = f2fs_submit_discard_endio;
@@ -1725,6 +1728,10 @@ static int issue_discard_thread(void *data)
if (dcc->discard_wake)
dcc->discard_wake = 0;
+ /* clean up pending candidates before going to sleep */
+ if (atomic_read(&dcc->queued_discard))
+ __wait_all_discard_cmd(sbi, NULL);
+
if (try_to_freeze())
continue;
if (f2fs_readonly(sbi->sb))
@@ -1810,7 +1817,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkstart, block_t blklen)
{
#ifdef CONFIG_BLK_DEV_ZONED
- if (f2fs_sb_has_blkzoned(sbi->sb) &&
+ if (f2fs_sb_has_blkzoned(sbi) &&
bdev_zoned_model(bdev) != BLK_ZONED_NONE)
return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
#endif
@@ -1958,7 +1965,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
unsigned int start = 0, end = -1;
unsigned int secno, start_segno;
bool force = (cpc->reason & CP_DISCARD);
- bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
+ bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
mutex_lock(&dirty_i->seglist_lock);
@@ -1990,7 +1997,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
(end - 1) <= cpc->trim_end)
continue;
- if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) {
+ if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) {
f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
(end - start) << sbi->log_blocks_per_seg);
continue;
@@ -2022,7 +2029,7 @@ find_next:
sbi->blocks_per_seg, cur_pos);
len = next_pos - cur_pos;
- if (f2fs_sb_has_blkzoned(sbi->sb) ||
+ if (f2fs_sb_has_blkzoned(sbi) ||
(force && len < cpc->trim_minlen))
goto skip;
@@ -2070,7 +2077,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&dcc->fstrim_list);
mutex_init(&dcc->cmd_lock);
atomic_set(&dcc->issued_discard, 0);
- atomic_set(&dcc->issing_discard, 0);
+ atomic_set(&dcc->queued_discard, 0);
atomic_set(&dcc->discard_cmd_cnt, 0);
dcc->nr_discards = 0;
dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
@@ -2086,7 +2093,7 @@ init_thread:
"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(dcc->f2fs_issue_discard)) {
err = PTR_ERR(dcc->f2fs_issue_discard);
- kfree(dcc);
+ kvfree(dcc);
SM_I(sbi)->dcc_info = NULL;
return err;
}
@@ -2103,7 +2110,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
f2fs_stop_discard_thread(sbi);
- kfree(dcc);
+ kvfree(dcc);
SM_I(sbi)->dcc_info = NULL;
}
@@ -2222,7 +2229,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
/* update total number of valid blocks to be written in ckpt area */
SIT_I(sbi)->written_valid_blocks += del;
- if (sbi->segs_per_sec > 1)
+ if (__is_large_section(sbi))
get_sec_entry(sbi, segno)->valid_blocks += del;
}
@@ -2488,7 +2495,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
{
/* if segs_per_sec is large than 1, we need to keep original policy. */
- if (sbi->segs_per_sec != 1)
+ if (__is_large_section(sbi))
return CURSEG_I(sbi, type)->segno;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
@@ -2798,7 +2805,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
struct discard_policy dpolicy;
unsigned long long trimmed = 0;
int err = 0;
- bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
+ bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
return -EINVAL;
@@ -3349,16 +3356,18 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
}
void f2fs_wait_on_page_writeback(struct page *page,
- enum page_type type, bool ordered)
+ enum page_type type, bool ordered, bool locked)
{
if (PageWriteback(page)) {
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
- if (ordered)
+ if (ordered) {
wait_on_page_writeback(page);
- else
+ f2fs_bug_on(sbi, locked && PageWriteback(page));
+ } else {
wait_for_stable_page(page);
+ }
}
}
@@ -3375,7 +3384,7 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
if (cpage) {
- f2fs_wait_on_page_writeback(cpage, DATA, true);
+ f2fs_wait_on_page_writeback(cpage, DATA, true, true);
f2fs_put_page(cpage, 1);
}
}
@@ -3957,7 +3966,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
if (!sit_i->tmp_map)
return -ENOMEM;
- if (sbi->segs_per_sec > 1) {
+ if (__is_large_section(sbi)) {
sit_i->sec_entries =
f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
MAIN_SECS(sbi)),
@@ -4112,7 +4121,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
se->valid_blocks;
}
- if (sbi->segs_per_sec > 1)
+ if (__is_large_section(sbi))
get_sec_entry(sbi, start)->valid_blocks +=
se->valid_blocks;
}
@@ -4156,7 +4165,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
sbi->discard_blks -= se->valid_blocks;
}
- if (sbi->segs_per_sec > 1) {
+ if (__is_large_section(sbi)) {
get_sec_entry(sbi, start)->valid_blocks +=
se->valid_blocks;
get_sec_entry(sbi, start)->valid_blocks -=
@@ -4391,7 +4400,7 @@ static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
destroy_victim_secmap(sbi);
SM_I(sbi)->dirty_info = NULL;
- kfree(dirty_i);
+ kvfree(dirty_i);
}
static void destroy_curseg(struct f2fs_sb_info *sbi)
@@ -4403,10 +4412,10 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
return;
SM_I(sbi)->curseg_array = NULL;
for (i = 0; i < NR_CURSEG_TYPE; i++) {
- kfree(array[i].sum_blk);
- kfree(array[i].journal);
+ kvfree(array[i].sum_blk);
+ kvfree(array[i].journal);
}
- kfree(array);
+ kvfree(array);
}
static void destroy_free_segmap(struct f2fs_sb_info *sbi)
@@ -4417,7 +4426,7 @@ static void destroy_free_segmap(struct f2fs_sb_info *sbi)
SM_I(sbi)->free_info = NULL;
kvfree(free_i->free_segmap);
kvfree(free_i->free_secmap);
- kfree(free_i);
+ kvfree(free_i);
}
static void destroy_sit_info(struct f2fs_sb_info *sbi)
@@ -4430,26 +4439,26 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
if (sit_i->sentries) {
for (start = 0; start < MAIN_SEGS(sbi); start++) {
- kfree(sit_i->sentries[start].cur_valid_map);
+ kvfree(sit_i->sentries[start].cur_valid_map);
#ifdef CONFIG_F2FS_CHECK_FS
- kfree(sit_i->sentries[start].cur_valid_map_mir);
+ kvfree(sit_i->sentries[start].cur_valid_map_mir);
#endif
- kfree(sit_i->sentries[start].ckpt_valid_map);
- kfree(sit_i->sentries[start].discard_map);
+ kvfree(sit_i->sentries[start].ckpt_valid_map);
+ kvfree(sit_i->sentries[start].discard_map);
}
}
- kfree(sit_i->tmp_map);
+ kvfree(sit_i->tmp_map);
kvfree(sit_i->sentries);
kvfree(sit_i->sec_entries);
kvfree(sit_i->dirty_sentries_bitmap);
SM_I(sbi)->sit_info = NULL;
- kfree(sit_i->sit_bitmap);
+ kvfree(sit_i->sit_bitmap);
#ifdef CONFIG_F2FS_CHECK_FS
- kfree(sit_i->sit_bitmap_mir);
+ kvfree(sit_i->sit_bitmap_mir);
#endif
- kfree(sit_i);
+ kvfree(sit_i);
}
void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
@@ -4465,7 +4474,7 @@ void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
destroy_free_segmap(sbi);
destroy_sit_info(sbi);
sbi->sm_info = NULL;
- kfree(sm_info);
+ kvfree(sm_info);
}
int __init f2fs_create_segment_manager_caches(void)
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index ab3465faddf1..a77f76f528b6 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -333,7 +333,7 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
* In order to get # of valid blocks in a section instantly from many
* segments, f2fs manages two counting structures separately.
*/
- if (use_section && sbi->segs_per_sec > 1)
+ if (use_section && __is_large_section(sbi))
return get_sec_entry(sbi, segno)->valid_blocks;
else
return get_seg_entry(sbi, segno)->valid_blocks;
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 9e13db994fdf..a467aca29cfe 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -135,6 +135,6 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi)
f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi));
spin_lock(&f2fs_list_lock);
- list_del(&sbi->s_list);
+ list_del_init(&sbi->s_list);
spin_unlock(&f2fs_list_lock);
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index c551d5336473..a9e7a1e62c66 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -38,7 +38,7 @@ static struct kmem_cache *f2fs_inode_cachep;
#ifdef CONFIG_F2FS_FAULT_INJECTION
-char *f2fs_fault_name[FAULT_MAX] = {
+const char *f2fs_fault_name[FAULT_MAX] = {
[FAULT_KMALLOC] = "kmalloc",
[FAULT_KVMALLOC] = "kvmalloc",
[FAULT_PAGE_ALLOC] = "page alloc",
@@ -259,7 +259,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype,
"quota options when quota turned on");
return -EINVAL;
}
- if (f2fs_sb_has_quota_ino(sb)) {
+ if (f2fs_sb_has_quota_ino(sbi)) {
f2fs_msg(sb, KERN_INFO,
"QUOTA feature is enabled, so ignore qf_name");
return 0;
@@ -289,7 +289,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype,
set_opt(sbi, QUOTA);
return 0;
errout:
- kfree(qname);
+ kvfree(qname);
return ret;
}
@@ -302,7 +302,7 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype)
" when quota turned on");
return -EINVAL;
}
- kfree(F2FS_OPTION(sbi).s_qf_names[qtype]);
+ kvfree(F2FS_OPTION(sbi).s_qf_names[qtype]);
F2FS_OPTION(sbi).s_qf_names[qtype] = NULL;
return 0;
}
@@ -314,7 +314,7 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
* 'grpquota' mount options are allowed even without quota feature
* to support legacy quotas in quota files.
*/
- if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi->sb)) {
+ if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) {
f2fs_msg(sbi->sb, KERN_ERR, "Project quota feature not enabled. "
"Cannot enable project quota enforcement.");
return -1;
@@ -348,7 +348,7 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
}
}
- if (f2fs_sb_has_quota_ino(sbi->sb) && F2FS_OPTION(sbi).s_jquota_fmt) {
+ if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) {
f2fs_msg(sbi->sb, KERN_INFO,
"QUOTA feature is enabled, so ignore jquota_fmt");
F2FS_OPTION(sbi).s_jquota_fmt = 0;
@@ -399,10 +399,10 @@ static int parse_options(struct super_block *sb, char *options)
set_opt(sbi, BG_GC);
set_opt(sbi, FORCE_FG_GC);
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
case Opt_disable_roll_forward:
set_opt(sbi, DISABLE_ROLL_FORWARD);
@@ -417,7 +417,7 @@ static int parse_options(struct super_block *sb, char *options)
set_opt(sbi, DISCARD);
break;
case Opt_nodiscard:
- if (f2fs_sb_has_blkzoned(sb)) {
+ if (f2fs_sb_has_blkzoned(sbi)) {
f2fs_msg(sb, KERN_WARNING,
"discard is required for zoned block devices");
return -EINVAL;
@@ -566,11 +566,11 @@ static int parse_options(struct super_block *sb, char *options)
return -ENOMEM;
if (strlen(name) == 8 &&
!strncmp(name, "adaptive", 8)) {
- if (f2fs_sb_has_blkzoned(sb)) {
+ if (f2fs_sb_has_blkzoned(sbi)) {
f2fs_msg(sb, KERN_WARNING,
"adaptive mode is not allowed with "
"zoned block device feature");
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
@@ -578,10 +578,10 @@ static int parse_options(struct super_block *sb, char *options)
!strncmp(name, "lfs", 3)) {
set_opt_mode(sbi, F2FS_MOUNT_LFS);
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
case Opt_io_size_bits:
if (args->from && match_int(args, &arg))
@@ -714,10 +714,10 @@ static int parse_options(struct super_block *sb, char *options)
!strncmp(name, "fs-based", 8)) {
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS;
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
case Opt_alloc:
name = match_strdup(&args[0]);
@@ -731,10 +731,10 @@ static int parse_options(struct super_block *sb, char *options)
!strncmp(name, "reuse", 5)) {
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
case Opt_fsync:
name = match_strdup(&args[0]);
@@ -751,14 +751,14 @@ static int parse_options(struct super_block *sb, char *options)
F2FS_OPTION(sbi).fsync_mode =
FSYNC_MODE_NOBARRIER;
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
case Opt_test_dummy_encryption:
#ifdef CONFIG_F2FS_FS_ENCRYPTION
- if (!f2fs_sb_has_encrypt(sb)) {
+ if (!f2fs_sb_has_encrypt(sbi)) {
f2fs_msg(sb, KERN_ERR, "Encrypt feature is off");
return -EINVAL;
}
@@ -783,10 +783,10 @@ static int parse_options(struct super_block *sb, char *options)
!strncmp(name, "disable", 7)) {
set_opt(sbi, DISABLE_CHECKPOINT);
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
default:
f2fs_msg(sb, KERN_ERR,
@@ -799,13 +799,13 @@ static int parse_options(struct super_block *sb, char *options)
if (f2fs_check_quota_options(sbi))
return -EINVAL;
#else
- if (f2fs_sb_has_quota_ino(sbi->sb) && !f2fs_readonly(sbi->sb)) {
+ if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) {
f2fs_msg(sbi->sb, KERN_INFO,
"Filesystem with quota feature cannot be mounted RDWR "
"without CONFIG_QUOTA");
return -EINVAL;
}
- if (f2fs_sb_has_project_quota(sbi->sb) && !f2fs_readonly(sbi->sb)) {
+ if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) {
f2fs_msg(sb, KERN_ERR,
"Filesystem with project quota feature cannot be "
"mounted RDWR without CONFIG_QUOTA");
@@ -821,8 +821,8 @@ static int parse_options(struct super_block *sb, char *options)
}
if (test_opt(sbi, INLINE_XATTR_SIZE)) {
- if (!f2fs_sb_has_extra_attr(sb) ||
- !f2fs_sb_has_flexible_inline_xattr(sb)) {
+ if (!f2fs_sb_has_extra_attr(sbi) ||
+ !f2fs_sb_has_flexible_inline_xattr(sbi)) {
f2fs_msg(sb, KERN_ERR,
"extra_attr or flexible_inline_xattr "
"feature is off");
@@ -1017,10 +1017,10 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
for (i = 0; i < sbi->s_ndevs; i++) {
blkdev_put(FDEV(i).bdev, FMODE_EXCL);
#ifdef CONFIG_BLK_DEV_ZONED
- kfree(FDEV(i).blkz_type);
+ kvfree(FDEV(i).blkz_type);
#endif
}
- kfree(sbi->devs);
+ kvfree(sbi->devs);
}
static void f2fs_put_super(struct super_block *sb)
@@ -1058,9 +1058,6 @@ static void f2fs_put_super(struct super_block *sb)
f2fs_write_checkpoint(sbi, &cpc);
}
- /* f2fs_write_checkpoint can update stat informaion */
- f2fs_destroy_stats(sbi);
-
/*
* normally superblock is clean, so we need to release this.
* In addition, EIO will skip do checkpoint, we need this as well.
@@ -1078,32 +1075,41 @@ static void f2fs_put_super(struct super_block *sb)
f2fs_bug_on(sbi, sbi->fsync_node_num);
iput(sbi->node_inode);
+ sbi->node_inode = NULL;
+
iput(sbi->meta_inode);
+ sbi->meta_inode = NULL;
+
+ /*
+ * iput() can update stat information, if f2fs_write_checkpoint()
+ * above failed with error.
+ */
+ f2fs_destroy_stats(sbi);
/* destroy f2fs internal modules */
f2fs_destroy_node_manager(sbi);
f2fs_destroy_segment_manager(sbi);
- kfree(sbi->ckpt);
+ kvfree(sbi->ckpt);
f2fs_unregister_sysfs(sbi);
sb->s_fs_info = NULL;
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
- kfree(sbi->raw_super);
+ kvfree(sbi->raw_super);
destroy_device_list(sbi);
if (sbi->write_io_dummy)
mempool_destroy(sbi->write_io_dummy);
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
- kfree(F2FS_OPTION(sbi).s_qf_names[i]);
+ kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
#endif
destroy_percpu_info(sbi);
for (i = 0; i < NR_PAGE_TYPE; i++)
- kfree(sbi->write_io[i]);
- kfree(sbi);
+ kvfree(sbi->write_io[i]);
+ kvfree(sbi);
}
int f2fs_sync_fs(struct super_block *sb, int sync)
@@ -1432,7 +1438,7 @@ static void default_options(struct f2fs_sb_info *sbi)
clear_opt(sbi, DISABLE_CHECKPOINT);
set_opt(sbi, FLUSH_MERGE);
set_opt(sbi, DISCARD);
- if (f2fs_sb_has_blkzoned(sbi->sb))
+ if (f2fs_sb_has_blkzoned(sbi))
set_opt_mode(sbi, F2FS_MOUNT_LFS);
else
set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
@@ -1458,19 +1464,16 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
sbi->sb->s_flags |= MS_ACTIVE;
- mutex_lock(&sbi->gc_mutex);
f2fs_update_time(sbi, DISABLE_TIME);
while (!f2fs_time_over(sbi, DISABLE_TIME)) {
+ mutex_lock(&sbi->gc_mutex);
err = f2fs_gc(sbi, true, false, NULL_SEGNO);
if (err == -ENODATA)
break;
- if (err && err != -EAGAIN) {
- mutex_unlock(&sbi->gc_mutex);
+ if (err && err != -EAGAIN)
return err;
- }
}
- mutex_unlock(&sbi->gc_mutex);
err = sync_filesystem(sbi->sb);
if (err)
@@ -1532,7 +1535,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
GFP_KERNEL);
if (!org_mount_opt.s_qf_names[i]) {
for (j = 0; j < i; j++)
- kfree(org_mount_opt.s_qf_names[j]);
+ kvfree(org_mount_opt.s_qf_names[j]);
return -ENOMEM;
}
} else {
@@ -1576,7 +1579,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
sb->s_flags &= ~MS_RDONLY;
if (sb_any_quota_suspended(sb)) {
dquot_resume(sb, -1);
- } else if (f2fs_sb_has_quota_ino(sb)) {
+ } else if (f2fs_sb_has_quota_ino(sbi)) {
err = f2fs_enable_quotas(sb);
if (err)
goto restore_opts;
@@ -1652,7 +1655,7 @@ skip:
#ifdef CONFIG_QUOTA
/* Release old quota file names */
for (i = 0; i < MAXQUOTAS; i++)
- kfree(org_mount_opt.s_qf_names[i]);
+ kvfree(org_mount_opt.s_qf_names[i]);
#endif
/* Update the POSIXACL Flag */
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -1673,7 +1676,7 @@ restore_opts:
#ifdef CONFIG_QUOTA
F2FS_OPTION(sbi).s_jquota_fmt = org_mount_opt.s_jquota_fmt;
for (i = 0; i < MAXQUOTAS; i++) {
- kfree(F2FS_OPTION(sbi).s_qf_names[i]);
+ kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
F2FS_OPTION(sbi).s_qf_names[i] = org_mount_opt.s_qf_names[i];
}
#endif
@@ -1818,7 +1821,7 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly)
int enabled = 0;
int i, err;
- if (f2fs_sb_has_quota_ino(sbi->sb) && rdonly) {
+ if (f2fs_sb_has_quota_ino(sbi) && rdonly) {
err = f2fs_enable_quotas(sbi->sb);
if (err) {
f2fs_msg(sbi->sb, KERN_ERR,
@@ -1849,7 +1852,7 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id,
unsigned long qf_inum;
int err;
- BUG_ON(!f2fs_sb_has_quota_ino(sb));
+ BUG_ON(!f2fs_sb_has_quota_ino(F2FS_SB(sb)));
qf_inum = f2fs_qf_ino(sb, type);
if (!qf_inum)
@@ -1994,7 +1997,7 @@ static int f2fs_quota_off(struct super_block *sb, int type)
goto out_put;
err = dquot_quota_off(sb, type);
- if (err || f2fs_sb_has_quota_ino(sb))
+ if (err || f2fs_sb_has_quota_ino(F2FS_SB(sb)))
goto out_put;
inode_lock(inode);
@@ -2177,7 +2180,7 @@ static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len,
* if LOST_FOUND feature is enabled.
*
*/
- if (f2fs_sb_has_lost_found(sbi->sb) &&
+ if (f2fs_sb_has_lost_found(sbi) &&
inode->i_ino == F2FS_ROOT_INO(sbi))
return -EPERM;
@@ -2400,7 +2403,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
__u32 crc = 0;
/* Check checksum_offset and crc in superblock */
- if (le32_to_cpu(raw_super->feature) & F2FS_FEATURE_SB_CHKSUM) {
+ if (__F2FS_HAS_FEATURE(raw_super, F2FS_FEATURE_SB_CHKSUM)) {
crc_offset = le32_to_cpu(raw_super->checksum_offset);
if (crc_offset !=
offsetof(struct f2fs_super_block, crc)) {
@@ -2500,10 +2503,10 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return 1;
}
- if (segment_count > (le32_to_cpu(raw_super->block_count) >> 9)) {
+ if (segment_count > (le64_to_cpu(raw_super->block_count) >> 9)) {
f2fs_msg(sb, KERN_INFO,
- "Wrong segment_count / block_count (%u > %u)",
- segment_count, le32_to_cpu(raw_super->block_count));
+ "Wrong segment_count / block_count (%u > %llu)",
+ segment_count, le64_to_cpu(raw_super->block_count));
return 1;
}
@@ -2678,7 +2681,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
static void init_sb_info(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = sbi->raw_super;
- int i, j;
+ int i;
sbi->log_sectors_per_block =
le32_to_cpu(raw_super->log_sectors_per_block);
@@ -2696,7 +2699,10 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
sbi->cur_victim_sec = NULL_SECNO;
+ sbi->next_victim_seg[BG_GC] = NULL_SEGNO;
+ sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
+ sbi->migration_granularity = sbi->segs_per_sec;
sbi->dir_level = DEF_DIR_LEVEL;
sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
@@ -2714,9 +2720,6 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&sbi->s_list);
mutex_init(&sbi->umount_mutex);
- for (i = 0; i < NR_PAGE_TYPE - 1; i++)
- for (j = HOT; j < NR_TEMP_TYPE; j++)
- mutex_init(&sbi->wio_mutex[i][j]);
init_rwsem(&sbi->io_order_lock);
spin_lock_init(&sbi->cp_lock);
@@ -2753,7 +2756,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
unsigned int n = 0;
int err = -EIO;
- if (!f2fs_sb_has_blkzoned(sbi->sb))
+ if (!f2fs_sb_has_blkzoned(sbi))
return 0;
if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
@@ -2804,7 +2807,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
}
}
- kfree(zones);
+ kvfree(zones);
return err;
}
@@ -2864,7 +2867,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
/* No valid superblock */
if (!*raw_super)
- kfree(super);
+ kvfree(super);
else
err = 0;
@@ -2884,7 +2887,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
}
/* we should update superblock crc here */
- if (!recover && f2fs_sb_has_sb_chksum(sbi->sb)) {
+ if (!recover && f2fs_sb_has_sb_chksum(sbi)) {
crc = f2fs_crc32(sbi, F2FS_RAW_SUPER(sbi),
offsetof(struct f2fs_super_block, crc));
F2FS_RAW_SUPER(sbi)->crc = cpu_to_le32(crc);
@@ -2976,7 +2979,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
#ifdef CONFIG_BLK_DEV_ZONED
if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
- !f2fs_sb_has_blkzoned(sbi->sb)) {
+ !f2fs_sb_has_blkzoned(sbi)) {
f2fs_msg(sbi->sb, KERN_ERR,
"Zoned block device feature not enabled\n");
return -EINVAL;
@@ -3072,7 +3075,7 @@ try_onemore:
sbi->raw_super = raw_super;
/* precompute checksum seed for metadata */
- if (f2fs_sb_has_inode_chksum(sb))
+ if (f2fs_sb_has_inode_chksum(sbi))
sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid,
sizeof(raw_super->uuid));
@@ -3082,7 +3085,7 @@ try_onemore:
* devices, but mandatory for host-managed zoned block devices.
*/
#ifndef CONFIG_BLK_DEV_ZONED
- if (f2fs_sb_has_blkzoned(sb)) {
+ if (f2fs_sb_has_blkzoned(sbi)) {
f2fs_msg(sb, KERN_ERR,
"Zoned block device support is not enabled\n");
err = -EOPNOTSUPP;
@@ -3109,13 +3112,13 @@ try_onemore:
#ifdef CONFIG_QUOTA
sb->dq_op = &f2fs_quota_operations;
- if (f2fs_sb_has_quota_ino(sb))
+ if (f2fs_sb_has_quota_ino(sbi))
sb->s_qcop = &dquot_quotactl_sysfile_ops;
else
sb->s_qcop = &f2fs_quotactl_ops;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
- if (f2fs_sb_has_quota_ino(sbi->sb)) {
+ if (f2fs_sb_has_quota_ino(sbi)) {
for (i = 0; i < MAXQUOTAS; i++) {
if (f2fs_qf_ino(sbi->sb, i))
sbi->nquota_files++;
@@ -3266,30 +3269,30 @@ try_onemore:
f2fs_build_gc_manager(sbi);
+ err = f2fs_build_stats(sbi);
+ if (err)
+ goto free_nm;
+
/* get an inode for node space */
sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
if (IS_ERR(sbi->node_inode)) {
f2fs_msg(sb, KERN_ERR, "Failed to read node inode");
err = PTR_ERR(sbi->node_inode);
- goto free_nm;
+ goto free_stats;
}
- err = f2fs_build_stats(sbi);
- if (err)
- goto free_node_inode;
-
/* read root inode and dentry */
root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
if (IS_ERR(root)) {
f2fs_msg(sb, KERN_ERR, "Failed to read root inode");
err = PTR_ERR(root);
- goto free_stats;
+ goto free_node_inode;
}
if (!S_ISDIR(root->i_mode) || !root->i_blocks ||
!root->i_size || !root->i_nlink) {
iput(root);
err = -EINVAL;
- goto free_stats;
+ goto free_node_inode;
}
sb->s_root = d_make_root(root); /* allocate root dentry */
@@ -3304,7 +3307,7 @@ try_onemore:
#ifdef CONFIG_QUOTA
/* Enable quota usage during mount */
- if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) {
+ if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) {
err = f2fs_enable_quotas(sb);
if (err)
f2fs_msg(sb, KERN_ERR,
@@ -3376,7 +3379,7 @@ skip_recovery:
if (err)
goto free_meta;
}
- kfree(options);
+ kvfree(options);
/* recover broken superblock */
if (recovery) {
@@ -3399,7 +3402,7 @@ skip_recovery:
free_meta:
#ifdef CONFIG_QUOTA
f2fs_truncate_quota_inode_pages(sb);
- if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb))
+ if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb))
f2fs_quota_off_umount(sbi->sb);
#endif
/*
@@ -3413,41 +3416,43 @@ free_meta:
free_root_inode:
dput(sb->s_root);
sb->s_root = NULL;
-free_stats:
- f2fs_destroy_stats(sbi);
free_node_inode:
f2fs_release_ino_entry(sbi, true);
truncate_inode_pages_final(NODE_MAPPING(sbi));
iput(sbi->node_inode);
+ sbi->node_inode = NULL;
+free_stats:
+ f2fs_destroy_stats(sbi);
free_nm:
f2fs_destroy_node_manager(sbi);
free_sm:
f2fs_destroy_segment_manager(sbi);
free_devices:
destroy_device_list(sbi);
- kfree(sbi->ckpt);
+ kvfree(sbi->ckpt);
free_meta_inode:
make_bad_inode(sbi->meta_inode);
iput(sbi->meta_inode);
+ sbi->meta_inode = NULL;
free_io_dummy:
mempool_destroy(sbi->write_io_dummy);
free_percpu:
destroy_percpu_info(sbi);
free_bio_info:
for (i = 0; i < NR_PAGE_TYPE; i++)
- kfree(sbi->write_io[i]);
+ kvfree(sbi->write_io[i]);
free_options:
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
- kfree(F2FS_OPTION(sbi).s_qf_names[i]);
+ kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
#endif
- kfree(options);
+ kvfree(options);
free_sb_buf:
- kfree(raw_super);
+ kvfree(raw_super);
free_sbi:
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
- kfree(sbi);
+ kvfree(sbi);
/* give only one another chance */
if (retry) {
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index db89741b219b..948c1a211341 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -90,34 +90,34 @@ static ssize_t features_show(struct f2fs_attr *a,
if (!sb->s_bdev->bd_part)
return snprintf(buf, PAGE_SIZE, "0\n");
- if (f2fs_sb_has_encrypt(sb))
+ if (f2fs_sb_has_encrypt(sbi))
len += snprintf(buf, PAGE_SIZE - len, "%s",
"encryption");
- if (f2fs_sb_has_blkzoned(sb))
+ if (f2fs_sb_has_blkzoned(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "blkzoned");
- if (f2fs_sb_has_extra_attr(sb))
+ if (f2fs_sb_has_extra_attr(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "extra_attr");
- if (f2fs_sb_has_project_quota(sb))
+ if (f2fs_sb_has_project_quota(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "projquota");
- if (f2fs_sb_has_inode_chksum(sb))
+ if (f2fs_sb_has_inode_chksum(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "inode_checksum");
- if (f2fs_sb_has_flexible_inline_xattr(sb))
+ if (f2fs_sb_has_flexible_inline_xattr(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "flexible_inline_xattr");
- if (f2fs_sb_has_quota_ino(sb))
+ if (f2fs_sb_has_quota_ino(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "quota_ino");
- if (f2fs_sb_has_inode_crtime(sb))
+ if (f2fs_sb_has_inode_crtime(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "inode_crtime");
- if (f2fs_sb_has_lost_found(sb))
+ if (f2fs_sb_has_lost_found(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "lost_found");
- if (f2fs_sb_has_sb_chksum(sb))
+ if (f2fs_sb_has_sb_chksum(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "sb_checksum");
len += snprintf(buf + len, PAGE_SIZE - len, "\n");
@@ -246,6 +246,11 @@ out:
return count;
}
+ if (!strcmp(a->attr.name, "migration_granularity")) {
+ if (t == 0 || t > sbi->segs_per_sec)
+ return -EINVAL;
+ }
+
if (!strcmp(a->attr.name, "trim_sections"))
return -EINVAL;
@@ -406,6 +411,7 @@ F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, migration_granularity, migration_granularity);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
@@ -460,6 +466,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(min_hot_blocks),
ATTR_LIST(min_ssr_sections),
ATTR_LIST(max_victim_search),
+ ATTR_LIST(migration_granularity),
ATTR_LIST(dir_level),
ATTR_LIST(ram_thresh),
ATTR_LIST(ra_nid_pages),
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index c7c3c2a63a85..71b27c55988a 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -334,7 +334,7 @@ static int read_xattr_block(struct inode *inode, void *txattr_addr)
static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
unsigned int index, unsigned int len,
const char *name, struct f2fs_xattr_entry **xe,
- void **base_addr)
+ void **base_addr, int *base_size)
{
void *cur_addr, *txattr_addr, *last_addr = NULL;
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
@@ -345,8 +345,8 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
if (!size && !inline_size)
return -ENODATA;
- txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode),
- inline_size + size + XATTR_PADDING_SIZE, GFP_NOFS);
+ *base_size = inline_size + size + XATTR_PADDING_SIZE;
+ txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), *base_size, GFP_NOFS);
if (!txattr_addr)
return -ENOMEM;
@@ -358,8 +358,10 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
*xe = __find_inline_xattr(inode, txattr_addr, &last_addr,
index, len, name);
- if (*xe)
+ if (*xe) {
+ *base_size = inline_size;
goto check;
+ }
}
/* read from xattr node block */
@@ -461,7 +463,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
}
f2fs_wait_on_page_writeback(ipage ? ipage : in_page,
- NODE, true);
+ NODE, true, true);
/* no need to use xattr node block */
if (hsize <= inline_size) {
err = f2fs_truncate_xattr_node(inode);
@@ -485,7 +487,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
goto in_page_out;
}
f2fs_bug_on(sbi, new_nid);
- f2fs_wait_on_page_writeback(xpage, NODE, true);
+ f2fs_wait_on_page_writeback(xpage, NODE, true, true);
} else {
struct dnode_of_data dn;
set_new_dnode(&dn, inode, NULL, NULL, new_nid);
@@ -520,6 +522,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
int error = 0;
unsigned int size, len;
void *base_addr = NULL;
+ int base_size;
if (name == NULL)
return -EINVAL;
@@ -530,7 +533,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
down_read(&F2FS_I(inode)->i_xattr_sem);
error = lookup_all_xattrs(inode, ipage, index, len, name,
- &entry, &base_addr);
+ &entry, &base_addr, &base_size);
up_read(&F2FS_I(inode)->i_xattr_sem);
if (error)
return error;
@@ -544,6 +547,11 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
if (buffer) {
char *pval = entry->e_name + entry->e_name_len;
+
+ if (base_size - (pval - (char *)base_addr) < size) {
+ error = -ERANGE;
+ goto out;
+ }
memcpy(buffer, pval, size);
}
error = size;
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 7a182c87f378..ab1d7f35f6c2 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -715,6 +715,9 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob
if (awaken)
wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING);
+ if (test_and_clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags))
+ wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP);
+
/* Prevent a race with our last child, which has to signal EV_CLEARED
* before dropping our spinlock.
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index c6e4e4c0221b..0fc9e87802b5 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1751,7 +1751,6 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
req->in.h.nodeid = outarg->nodeid;
req->in.numargs = 2;
req->in.argpages = 1;
- req->page_descs[0].offset = offset;
req->end = fuse_retrieve_end;
index = outarg->offset >> PAGE_CACHE_SHIFT;
@@ -1766,6 +1765,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
req->pages[req->num_pages] = page;
+ req->page_descs[req->num_pages].offset = offset;
req->page_descs[req->num_pages].length = this_num;
req->num_pages++;
@@ -2091,10 +2091,13 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
ret = fuse_dev_do_write(fud, &cs, len);
+ pipe_lock(pipe);
for (idx = 0; idx < nbuf; idx++) {
struct pipe_buffer *buf = &bufs[idx];
buf->ops->release(pipe, buf);
}
+ pipe_unlock(pipe);
+
out:
kfree(bufs);
return ret;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 5ff580633a63..690bbb22ed6b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1853,7 +1853,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
spin_unlock(&fc->lock);
dec_wb_stat(&bdi->wb, WB_WRITEBACK);
- dec_zone_page_state(page, NR_WRITEBACK_TEMP);
+ dec_zone_page_state(new_req->pages[0], NR_WRITEBACK_TEMP);
wb_writeout_inc(&bdi->wb);
fuse_writepage_free(fc, new_req);
fuse_request_free(new_req);
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 1ab19e660e69..1ff5774a5382 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -328,13 +328,14 @@ void hfs_bmap_free(struct hfs_bnode *node)
nidx -= len * 8;
i = node->next;
- hfs_bnode_put(node);
if (!i) {
/* panic */;
pr_crit("unable to free bnode %u. bmap not found!\n",
node->this);
+ hfs_bnode_put(node);
return;
}
+ hfs_bnode_put(node);
node = hfs_bnode_find(tree, i);
if (IS_ERR(node))
return;
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 3345c7553edc..7adc8a327e03 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -453,14 +453,15 @@ void hfs_bmap_free(struct hfs_bnode *node)
nidx -= len * 8;
i = node->next;
- hfs_bnode_put(node);
if (!i) {
/* panic */;
pr_crit("unable to free bnode %u. "
"bmap not found!\n",
node->this);
+ hfs_bnode_put(node);
return;
}
+ hfs_bnode_put(node);
node = hfs_bnode_find(tree, i);
if (IS_ERR(node))
return;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a17da8b57fc6..ab34f613fa85 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -118,6 +118,16 @@ static void huge_pagevec_release(struct pagevec *pvec)
pagevec_reinit(pvec);
}
+/*
+ * Mask used when checking the page offset value passed in via system
+ * calls. This value will be converted to a loff_t which is signed.
+ * Therefore, we want to check the upper PAGE_SHIFT + 1 bits of the
+ * value. The extra bit (- 1 in the shift value) is to take the sign
+ * bit into account.
+ */
+#define PGOFF_LOFFT_MAX \
+ (((1UL << (PAGE_SHIFT + 1)) - 1) << (BITS_PER_LONG - (PAGE_SHIFT + 1)))
+
static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file_inode(file);
@@ -136,17 +146,31 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
vma->vm_ops = &hugetlb_vm_ops;
+ /*
+ * page based offset in vm_pgoff could be sufficiently large to
+ * overflow a loff_t when converted to byte offset. This can
+ * only happen on architectures where sizeof(loff_t) ==
+ * sizeof(unsigned long). So, only check in those instances.
+ */
+ if (sizeof(unsigned long) == sizeof(loff_t)) {
+ if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
+ return -EINVAL;
+ }
+
+ /* must be huge page aligned */
if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
return -EINVAL;
vma_len = (loff_t)(vma->vm_end - vma->vm_start);
+ len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
+ /* check for overflow */
+ if (len < vma_len)
+ return -EINVAL;
mutex_lock(&inode->i_mutex);
file_accessed(file);
ret = -ENOMEM;
- len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
-
if (hugetlb_reserve_pages(inode,
vma->vm_pgoff >> huge_page_order(h),
len >> huge_page_shift(h), vma,
@@ -155,7 +179,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
ret = 0;
if (vma->vm_flags & VM_WRITE && inode->i_size < len)
- inode->i_size = len;
+ i_size_write(inode, len);
out:
mutex_unlock(&inode->i_mutex);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 1544f530ccd0..023e7f32ee1b 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -101,7 +101,8 @@ static int jffs2_sync_fs(struct super_block *sb, int wait)
struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
- cancel_delayed_work_sync(&c->wbuf_dwork);
+ if (jffs2_is_writebuffered(c))
+ cancel_delayed_work_sync(&c->wbuf_dwork);
#endif
mutex_lock(&c->alloc_sem);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 62f358f67764..412fcfbc50e2 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2376,8 +2376,7 @@ static int nfs_compare_mount_options(const struct super_block *s, const struct n
goto Ebusy;
if (a->acdirmax != b->acdirmax)
goto Ebusy;
- if (b->auth_info.flavor_len > 0 &&
- clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
+ if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
goto Ebusy;
return 1;
Ebusy:
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 9690cb4dd588..03c7a4e7b6ba 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1106,6 +1106,8 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
case 'Y':
case 'y':
case '1':
+ if (nn->nfsd_serv)
+ return -EBUSY;
nfsd4_end_grace(nn);
break;
default:
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 272269f1c310..9ee8bcfbf00f 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -146,7 +146,6 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
BUG();
}
- clear_buffer_uptodate(bh);
get_bh(bh); /* for end_buffer_read_sync() */
bh->b_end_io = end_buffer_read_sync;
submit_bh(READ, bh);
@@ -300,7 +299,6 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
continue;
}
- clear_buffer_uptodate(bh);
get_bh(bh); /* for end_buffer_read_sync() */
if (validate)
set_buffer_needs_validate(bh);
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 827fc9809bc2..3494e220b510 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -125,10 +125,10 @@ check_err:
check_gen:
if (handle->ih_generation != inode->i_generation) {
- iput(inode);
trace_ocfs2_get_dentry_generation((unsigned long long)blkno,
handle->ih_generation,
inode->i_generation);
+ iput(inode);
result = ERR_PTR(-ESTALE);
goto bail;
}
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 0a4457fb0711..85111d740c9d 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -345,13 +345,18 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
if (num_used
|| alloc->id1.bitmap1.i_used
|| alloc->id1.bitmap1.i_total
- || la->la_bm_off)
- mlog(ML_ERROR, "Local alloc hasn't been recovered!\n"
+ || la->la_bm_off) {
+ mlog(ML_ERROR, "inconsistent detected, clean journal with"
+ " unrecovered local alloc, please run fsck.ocfs2!\n"
"found = %u, set = %u, taken = %u, off = %u\n",
num_used, le32_to_cpu(alloc->id1.bitmap1.i_used),
le32_to_cpu(alloc->id1.bitmap1.i_total),
OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
+ status = -EINVAL;
+ goto bail;
+ }
+
osb->local_alloc_bh = alloc_bh;
osb->local_alloc_state = OCFS2_LA_ENABLED;
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 124471d26a73..c1a83c58456e 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -156,18 +156,14 @@ out:
}
/*
- * lock allocators, and reserving appropriate number of bits for
- * meta blocks and data clusters.
- *
- * in some cases, we don't need to reserve clusters, just let data_ac
- * be NULL.
+ * lock allocator, and reserve appropriate number of bits for
+ * meta blocks.
*/
-static int ocfs2_lock_allocators_move_extents(struct inode *inode,
+static int ocfs2_lock_meta_allocator_move_extents(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 clusters_to_move,
u32 extents_to_split,
struct ocfs2_alloc_context **meta_ac,
- struct ocfs2_alloc_context **data_ac,
int extra_blocks,
int *credits)
{
@@ -192,13 +188,6 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode,
goto out;
}
- if (data_ac) {
- ret = ocfs2_reserve_clusters(osb, clusters_to_move, data_ac);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
- }
*credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el);
@@ -260,10 +249,10 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
}
}
- ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1,
- &context->meta_ac,
- &context->data_ac,
- extra_blocks, &credits);
+ ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et,
+ *len, 1,
+ &context->meta_ac,
+ extra_blocks, &credits);
if (ret) {
mlog_errno(ret);
goto out;
@@ -286,6 +275,21 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
}
}
+ /*
+ * Make sure ocfs2_reserve_cluster is called after
+ * __ocfs2_flush_truncate_log, otherwise, dead lock may happen.
+ *
+ * If ocfs2_reserve_cluster is called
+ * before __ocfs2_flush_truncate_log, dead lock on global bitmap
+ * may happen.
+ *
+ */
+ ret = ocfs2_reserve_clusters(osb, *len, &context->data_ac);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_unlock_mutex;
+ }
+
handle = ocfs2_start_trans(osb, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
@@ -606,9 +610,10 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
}
}
- ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1,
- &context->meta_ac,
- NULL, extra_blocks, &credits);
+ ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et,
+ len, 1,
+ &context->meta_ac,
+ extra_blocks, &credits);
if (ret) {
mlog_errno(ret);
goto out;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 161441f52ebf..015cdc615dfb 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -333,7 +333,7 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
#ifdef CONFIG_SECCOMP
seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode);
#endif
- seq_printf(m, "\nSpeculation_Store_Bypass:\t");
+ seq_printf(m, "Speculation_Store_Bypass:\t");
switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) {
case -EINVAL:
seq_printf(m, "unknown");
diff --git a/fs/proc/base.c b/fs/proc/base.c
index ee9b7f0ea938..86cc5554198f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -255,7 +255,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
* Inherently racy -- command line shares address space
* with code and data.
*/
- rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0);
+ rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON);
if (rv <= 0)
goto out_free_page;
@@ -273,7 +273,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
int nr_read;
_count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
+ nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
if (nr_read < 0)
rv = nr_read;
if (nr_read <= 0)
@@ -308,7 +308,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
bool final;
_count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
+ nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
if (nr_read < 0)
rv = nr_read;
if (nr_read <= 0)
@@ -357,7 +357,7 @@ skip_argv:
bool final;
_count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
+ nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
if (nr_read < 0)
rv = nr_read;
if (nr_read <= 0)
@@ -869,6 +869,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
unsigned long addr = *ppos;
ssize_t copied;
char *page;
+ unsigned int flags;
if (!mm)
return 0;
@@ -881,6 +882,11 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
if (!atomic_inc_not_zero(&mm->mm_users))
goto free;
+ /* Maybe we should limit FOLL_FORCE to actual ptrace users? */
+ flags = FOLL_FORCE;
+ if (write)
+ flags |= FOLL_WRITE;
+
while (count > 0) {
int this_len = min_t(int, count, PAGE_SIZE);
@@ -889,7 +895,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
break;
}
- this_len = access_remote_vm(mm, addr, page, this_len, write);
+ this_len = access_remote_vm(mm, addr, page, this_len, flags);
if (!this_len) {
if (!copied)
copied = -EIO;
@@ -1001,8 +1007,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
max_len = min_t(size_t, PAGE_SIZE, count);
this_len = min(max_len, this_len);
- retval = access_remote_vm(mm, (env_start + src),
- page, this_len, 0);
+ retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON);
if (retval <= 0) {
ret = retval;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 99c4ffaa43a8..2df3b2358b0a 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -143,7 +143,7 @@ static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma)
struct page *page;
pages_pinned = get_user_pages(current, mm, page_start_vaddr,
- 1, 0, 0, &page, NULL);
+ 1, 0, &page, NULL);
if (pages_pinned < 1) {
seq_puts(m, "<fault>]");
return;
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 40a0fe0a4e05..6fbfa8189451 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -392,8 +392,8 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
} else {
spin_lock_irqsave(&psinfo->buf_lock, flags);
}
- memcpy(psinfo->buf, s, c);
- psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, 0, c, psinfo);
+ psinfo->write_buf(PSTORE_TYPE_CONSOLE, 0, &id, 0,
+ s, 0, c, psinfo);
spin_unlock_irqrestore(&psinfo->buf_lock, flags);
s += c;
c = e - s;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index ecdb3baa1283..11e558efd61e 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -488,6 +488,11 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
sig ^= PERSISTENT_RAM_SIG;
if (prz->buffer->sig == sig) {
+ if (buffer_size(prz) == 0) {
+ pr_debug("found existing empty buffer\n");
+ return 0;
+ }
+
if (buffer_size(prz) > prz->buffer_size ||
buffer_start(prz) > buffer_size(prz))
pr_info("found existing invalid buffer, size %zu, start %zu\n",
diff --git a/fs/read_write.c b/fs/read_write.c
index bfd1a5dddf6e..16e554ba885d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -363,8 +363,10 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
iter->type |= WRITE;
ret = file->f_op->write_iter(&kiocb, iter);
BUG_ON(ret == -EIOCBQUEUED);
- if (ret > 0)
+ if (ret > 0) {
*ppos = kiocb.ki_pos;
+ fsnotify_modify(file);
+ }
return ret;
}
EXPORT_SYMBOL(vfs_iter_write);
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index 6dd158a216f4..ffb093e72b6c 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -26,6 +26,34 @@ config SQUASHFS
If unsure, say N.
choice
+ prompt "File decompression options"
+ depends on SQUASHFS
+ help
+ Squashfs now supports two options for decompressing file
+ data. Traditionally Squashfs has decompressed into an
+ intermediate buffer and then memcopied it into the page cache.
+ Squashfs now supports the ability to decompress directly into
+ the page cache.
+
+ If unsure, select "Decompress file data into an intermediate buffer"
+
+config SQUASHFS_FILE_CACHE
+ bool "Decompress file data into an intermediate buffer"
+ help
+ Decompress file data into an intermediate buffer and then
+ memcopy it into the page cache.
+
+config SQUASHFS_FILE_DIRECT
+ bool "Decompress files directly into the page cache"
+ help
+ Directly decompress file data into the page cache.
+ Doing so can significantly improve performance because
+ it eliminates a memcpy and it also removes the lock contention
+ on the single buffer.
+
+endchoice
+
+choice
prompt "Decompressor parallelisation options"
depends on SQUASHFS
help
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index fe51f1507ed1..246a6f329d89 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -5,7 +5,8 @@
obj-$(CONFIG_SQUASHFS) += squashfs.o
squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
squashfs-y += namei.o super.o symlink.o decompressor.o
-squashfs-y += file_direct.o page_actor.o
+squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o
+squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index b3b95e2ae2ff..82bc942fc437 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -28,12 +28,9 @@
#include <linux/fs.h>
#include <linux/vfs.h>
-#include <linux/bio.h>
#include <linux/slab.h>
#include <linux/string.h>
-#include <linux/pagemap.h>
#include <linux/buffer_head.h>
-#include <linux/workqueue.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@@ -41,381 +38,45 @@
#include "decompressor.h"
#include "page_actor.h"
-static struct workqueue_struct *squashfs_read_wq;
-
-struct squashfs_read_request {
- struct super_block *sb;
- u64 index;
- int length;
- int compressed;
- int offset;
- u64 read_end;
- struct squashfs_page_actor *output;
- enum {
- SQUASHFS_COPY,
- SQUASHFS_DECOMPRESS,
- SQUASHFS_METADATA,
- } data_processing;
- bool synchronous;
-
- /*
- * If the read is synchronous, it is possible to retrieve information
- * about the request by setting these pointers.
- */
- int *res;
- int *bytes_read;
- int *bytes_uncompressed;
-
- int nr_buffers;
- struct buffer_head **bh;
- struct work_struct offload;
-};
-
-struct squashfs_bio_request {
- struct buffer_head **bh;
- int nr_buffers;
-};
-
-static int squashfs_bio_submit(struct squashfs_read_request *req);
-
-int squashfs_init_read_wq(void)
-{
- squashfs_read_wq = create_workqueue("SquashFS read wq");
- return !!squashfs_read_wq;
-}
-
-void squashfs_destroy_read_wq(void)
-{
- flush_workqueue(squashfs_read_wq);
- destroy_workqueue(squashfs_read_wq);
-}
-
-static void free_read_request(struct squashfs_read_request *req, int error)
-{
- if (!req->synchronous)
- squashfs_page_actor_free(req->output, error);
- if (req->res)
- *(req->res) = error;
- kfree(req->bh);
- kfree(req);
-}
-
-static void squashfs_process_blocks(struct squashfs_read_request *req)
-{
- int error = 0;
- int bytes, i, length;
- struct squashfs_sb_info *msblk = req->sb->s_fs_info;
- struct squashfs_page_actor *actor = req->output;
- struct buffer_head **bh = req->bh;
- int nr_buffers = req->nr_buffers;
-
- for (i = 0; i < nr_buffers; ++i) {
- if (!bh[i])
- continue;
- wait_on_buffer(bh[i]);
- if (!buffer_uptodate(bh[i]))
- error = -EIO;
- }
- if (error)
- goto cleanup;
-
- if (req->data_processing == SQUASHFS_METADATA) {
- /* Extract the length of the metadata block */
- if (req->offset != msblk->devblksize - 1) {
- length = le16_to_cpup((__le16 *)
- (bh[0]->b_data + req->offset));
- } else {
- length = (unsigned char)bh[0]->b_data[req->offset];
- length |= (unsigned char)bh[1]->b_data[0] << 8;
- }
- req->compressed = SQUASHFS_COMPRESSED(length);
- req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS
- : SQUASHFS_COPY;
- length = SQUASHFS_COMPRESSED_SIZE(length);
- if (req->index + length + 2 > req->read_end) {
- for (i = 0; i < nr_buffers; ++i)
- put_bh(bh[i]);
- kfree(bh);
- req->length = length;
- req->index += 2;
- squashfs_bio_submit(req);
- return;
- }
- req->length = length;
- req->offset = (req->offset + 2) % PAGE_SIZE;
- if (req->offset < 2) {
- put_bh(bh[0]);
- ++bh;
- --nr_buffers;
- }
- }
- if (req->bytes_read)
- *(req->bytes_read) = req->length;
-
- if (req->data_processing == SQUASHFS_COPY) {
- squashfs_bh_to_actor(bh, nr_buffers, req->output, req->offset,
- req->length, msblk->devblksize);
- } else if (req->data_processing == SQUASHFS_DECOMPRESS) {
- req->length = squashfs_decompress(msblk, bh, nr_buffers,
- req->offset, req->length, actor);
- if (req->length < 0) {
- error = -EIO;
- goto cleanup;
- }
- }
-
- /* Last page may have trailing bytes not filled */
- bytes = req->length % PAGE_SIZE;
- if (bytes && actor->page[actor->pages - 1])
- zero_user_segment(actor->page[actor->pages - 1], bytes,
- PAGE_SIZE);
-
-cleanup:
- if (req->bytes_uncompressed)
- *(req->bytes_uncompressed) = req->length;
- if (error) {
- for (i = 0; i < nr_buffers; ++i)
- if (bh[i])
- put_bh(bh[i]);
- }
- free_read_request(req, error);
-}
-
-static void read_wq_handler(struct work_struct *work)
-{
- squashfs_process_blocks(container_of(work,
- struct squashfs_read_request, offload));
-}
-
-static void squashfs_bio_end_io(struct bio *bio)
-{
- int i;
- int error = bio->bi_error;
- struct squashfs_bio_request *bio_req = bio->bi_private;
-
- bio_put(bio);
-
- for (i = 0; i < bio_req->nr_buffers; ++i) {
- if (!bio_req->bh[i])
- continue;
- if (!error)
- set_buffer_uptodate(bio_req->bh[i]);
- else
- clear_buffer_uptodate(bio_req->bh[i]);
- unlock_buffer(bio_req->bh[i]);
- }
- kfree(bio_req);
-}
-
-static int bh_is_optional(struct squashfs_read_request *req, int idx)
-{
- int start_idx, end_idx;
- struct squashfs_sb_info *msblk = req->sb->s_fs_info;
-
- start_idx = (idx * msblk->devblksize - req->offset) >> PAGE_SHIFT;
- end_idx = ((idx + 1) * msblk->devblksize - req->offset + 1) >> PAGE_SHIFT;
- if (start_idx >= req->output->pages)
- return 1;
- if (start_idx < 0)
- start_idx = end_idx;
- if (end_idx >= req->output->pages)
- end_idx = start_idx;
- return !req->output->page[start_idx] && !req->output->page[end_idx];
-}
-
-static int actor_getblks(struct squashfs_read_request *req, u64 block)
-{
- int i;
-
- req->bh = kmalloc_array(req->nr_buffers, sizeof(*(req->bh)), GFP_NOIO);
- if (!req->bh)
- return -ENOMEM;
-
- for (i = 0; i < req->nr_buffers; ++i) {
- /*
- * When dealing with an uncompressed block, the actor may
- * contains NULL pages. There's no need to read the buffers
- * associated with these pages.
- */
- if (!req->compressed && bh_is_optional(req, i)) {
- req->bh[i] = NULL;
- continue;
- }
- req->bh[i] = sb_getblk(req->sb, block + i);
- if (!req->bh[i]) {
- while (--i) {
- if (req->bh[i])
- put_bh(req->bh[i]);
- }
- return -1;
- }
- }
- return 0;
-}
-
-static int squashfs_bio_submit(struct squashfs_read_request *req)
+/*
+ * Read the metadata block length, this is stored in the first two
+ * bytes of the metadata block.
+ */
+static struct buffer_head *get_block_length(struct super_block *sb,
+ u64 *cur_index, int *offset, int *length)
{
- struct bio *bio = NULL;
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
struct buffer_head *bh;
- struct squashfs_bio_request *bio_req = NULL;
- int b = 0, prev_block = 0;
- struct squashfs_sb_info *msblk = req->sb->s_fs_info;
-
- u64 read_start = round_down(req->index, msblk->devblksize);
- u64 read_end = round_up(req->index + req->length, msblk->devblksize);
- sector_t block = read_start >> msblk->devblksize_log2;
- sector_t block_end = read_end >> msblk->devblksize_log2;
- int offset = read_start - round_down(req->index, PAGE_SIZE);
- int nr_buffers = block_end - block;
- int blksz = msblk->devblksize;
- int bio_max_pages = nr_buffers > BIO_MAX_PAGES ? BIO_MAX_PAGES
- : nr_buffers;
- /* Setup the request */
- req->read_end = read_end;
- req->offset = req->index - read_start;
- req->nr_buffers = nr_buffers;
- if (actor_getblks(req, block) < 0)
- goto getblk_failed;
-
- /* Create and submit the BIOs */
- for (b = 0; b < nr_buffers; ++b, offset += blksz) {
- bh = req->bh[b];
- if (!bh || !trylock_buffer(bh))
- continue;
- if (buffer_uptodate(bh)) {
- unlock_buffer(bh);
- continue;
+ bh = sb_bread(sb, *cur_index);
+ if (bh == NULL)
+ return NULL;
+
+ if (msblk->devblksize - *offset == 1) {
+ *length = (unsigned char) bh->b_data[*offset];
+ put_bh(bh);
+ bh = sb_bread(sb, ++(*cur_index));
+ if (bh == NULL)
+ return NULL;
+ *length |= (unsigned char) bh->b_data[0] << 8;
+ *offset = 1;
+ } else {
+ *length = (unsigned char) bh->b_data[*offset] |
+ (unsigned char) bh->b_data[*offset + 1] << 8;
+ *offset += 2;
+
+ if (*offset == msblk->devblksize) {
+ put_bh(bh);
+ bh = sb_bread(sb, ++(*cur_index));
+ if (bh == NULL)
+ return NULL;
+ *offset = 0;
}
- offset %= PAGE_SIZE;
-
- /* Append the buffer to the current BIO if it is contiguous */
- if (bio && bio_req && prev_block + 1 == b) {
- if (bio_add_page(bio, bh->b_page, blksz, offset)) {
- bio_req->nr_buffers += 1;
- prev_block = b;
- continue;
- }
- }
-
- /* Otherwise, submit the current BIO and create a new one */
- if (bio)
- submit_bio(READ, bio);
- bio_req = kcalloc(1, sizeof(struct squashfs_bio_request),
- GFP_NOIO);
- if (!bio_req)
- goto req_alloc_failed;
- bio_req->bh = &req->bh[b];
- bio = bio_alloc(GFP_NOIO, bio_max_pages);
- if (!bio)
- goto bio_alloc_failed;
- bio->bi_bdev = req->sb->s_bdev;
- bio->bi_iter.bi_sector = (block + b)
- << (msblk->devblksize_log2 - 9);
- bio->bi_private = bio_req;
- bio->bi_end_io = squashfs_bio_end_io;
-
- bio_add_page(bio, bh->b_page, blksz, offset);
- bio_req->nr_buffers += 1;
- prev_block = b;
}
- if (bio)
- submit_bio(READ, bio);
- if (req->synchronous)
- squashfs_process_blocks(req);
- else {
- INIT_WORK(&req->offload, read_wq_handler);
- schedule_work(&req->offload);
- }
- return 0;
-
-bio_alloc_failed:
- kfree(bio_req);
-req_alloc_failed:
- unlock_buffer(bh);
- while (--nr_buffers >= b)
- if (req->bh[nr_buffers])
- put_bh(req->bh[nr_buffers]);
- while (--b >= 0)
- if (req->bh[b])
- wait_on_buffer(req->bh[b]);
-getblk_failed:
- free_read_request(req, -ENOMEM);
- return -ENOMEM;
+ return bh;
}
-static int read_metadata_block(struct squashfs_read_request *req,
- u64 *next_index)
-{
- int ret, error, bytes_read = 0, bytes_uncompressed = 0;
- struct squashfs_sb_info *msblk = req->sb->s_fs_info;
-
- if (req->index + 2 > msblk->bytes_used) {
- free_read_request(req, -EINVAL);
- return -EINVAL;
- }
- req->length = 2;
-
- /* Do not read beyond the end of the device */
- if (req->index + req->length > msblk->bytes_used)
- req->length = msblk->bytes_used - req->index;
- req->data_processing = SQUASHFS_METADATA;
-
- /*
- * Reading metadata is always synchronous because we don't know the
- * length in advance and the function is expected to update
- * 'next_index' and return the length.
- */
- req->synchronous = true;
- req->res = &error;
- req->bytes_read = &bytes_read;
- req->bytes_uncompressed = &bytes_uncompressed;
-
- TRACE("Metadata block @ 0x%llx, %scompressed size %d, src size %d\n",
- req->index, req->compressed ? "" : "un", bytes_read,
- req->output->length);
-
- ret = squashfs_bio_submit(req);
- if (ret)
- return ret;
- if (error)
- return error;
- if (next_index)
- *next_index += 2 + bytes_read;
- return bytes_uncompressed;
-}
-
-static int read_data_block(struct squashfs_read_request *req, int length,
- u64 *next_index, bool synchronous)
-{
- int ret, error = 0, bytes_uncompressed = 0, bytes_read = 0;
-
- req->compressed = SQUASHFS_COMPRESSED_BLOCK(length);
- req->length = length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
- req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS
- : SQUASHFS_COPY;
-
- req->synchronous = synchronous;
- if (synchronous) {
- req->res = &error;
- req->bytes_read = &bytes_read;
- req->bytes_uncompressed = &bytes_uncompressed;
- }
-
- TRACE("Data block @ 0x%llx, %scompressed size %d, src size %d\n",
- req->index, req->compressed ? "" : "un", req->length,
- req->output->length);
-
- ret = squashfs_bio_submit(req);
- if (ret)
- return ret;
- if (synchronous)
- ret = error ? error : bytes_uncompressed;
- if (next_index)
- *next_index += length;
- return ret;
-}
/*
* Read and decompress a metadata block or datablock. Length is non-zero
@@ -426,50 +87,130 @@ static int read_data_block(struct squashfs_read_request *req, int length,
* generated a larger block - this does occasionally happen with compression
* algorithms).
*/
-static int __squashfs_read_data(struct super_block *sb, u64 index, int length,
- u64 *next_index, struct squashfs_page_actor *output, bool sync)
+int squashfs_read_data(struct super_block *sb, u64 index, int length,
+ u64 *next_index, struct squashfs_page_actor *output)
{
- struct squashfs_read_request *req;
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ struct buffer_head **bh;
+ int offset = index & ((1 << msblk->devblksize_log2) - 1);
+ u64 cur_index = index >> msblk->devblksize_log2;
+ int bytes, compressed, b = 0, k = 0, avail, i;
- req = kcalloc(1, sizeof(struct squashfs_read_request), GFP_KERNEL);
- if (!req) {
- if (!sync)
- squashfs_page_actor_free(output, -ENOMEM);
+ bh = kcalloc(((output->length + msblk->devblksize - 1)
+ >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
+ if (bh == NULL)
return -ENOMEM;
- }
- req->sb = sb;
- req->index = index;
- req->output = output;
+ if (length) {
+ /*
+ * Datablock.
+ */
+ bytes = -offset;
+ compressed = SQUASHFS_COMPRESSED_BLOCK(length);
+ length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
+ if (next_index)
+ *next_index = index + length;
+
+ TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
+ index, compressed ? "" : "un", length, output->length);
+
+ if (length < 0 || length > output->length ||
+ (index + length) > msblk->bytes_used)
+ goto read_failure;
+
+ for (b = 0; bytes < length; b++, cur_index++) {
+ bh[b] = sb_getblk(sb, cur_index);
+ if (bh[b] == NULL)
+ goto block_release;
+ bytes += msblk->devblksize;
+ }
+ ll_rw_block(READ, b, bh);
+ } else {
+ /*
+ * Metadata block.
+ */
+ if ((index + 2) > msblk->bytes_used)
+ goto read_failure;
+
+ bh[0] = get_block_length(sb, &cur_index, &offset, &length);
+ if (bh[0] == NULL)
+ goto read_failure;
+ b = 1;
+
+ bytes = msblk->devblksize - offset;
+ compressed = SQUASHFS_COMPRESSED(length);
+ length = SQUASHFS_COMPRESSED_SIZE(length);
+ if (next_index)
+ *next_index = index + length + 2;
- if (next_index)
- *next_index = index;
+ TRACE("Block @ 0x%llx, %scompressed size %d\n", index,
+ compressed ? "" : "un", length);
- if (length)
- length = read_data_block(req, length, next_index, sync);
- else
- length = read_metadata_block(req, next_index);
+ if (length < 0 || length > output->length ||
+ (index + length) > msblk->bytes_used)
+ goto block_release;
- if (length < 0) {
- ERROR("squashfs_read_data failed to read block 0x%llx\n",
- (unsigned long long)index);
- return -EIO;
+ for (; bytes < length; b++) {
+ bh[b] = sb_getblk(sb, ++cur_index);
+ if (bh[b] == NULL)
+ goto block_release;
+ bytes += msblk->devblksize;
+ }
+ ll_rw_block(READ, b - 1, bh + 1);
}
- return length;
-}
+ for (i = 0; i < b; i++) {
+ wait_on_buffer(bh[i]);
+ if (!buffer_uptodate(bh[i]))
+ goto block_release;
+ }
-int squashfs_read_data(struct super_block *sb, u64 index, int length,
- u64 *next_index, struct squashfs_page_actor *output)
-{
- return __squashfs_read_data(sb, index, length, next_index, output,
- true);
-}
+ if (compressed) {
+ if (!msblk->stream)
+ goto read_failure;
+ length = squashfs_decompress(msblk, bh, b, offset, length,
+ output);
+ if (length < 0)
+ goto read_failure;
+ } else {
+ /*
+ * Block is uncompressed.
+ */
+ int in, pg_offset = 0;
+ void *data = squashfs_first_page(output);
+
+ for (bytes = length; k < b; k++) {
+ in = min(bytes, msblk->devblksize - offset);
+ bytes -= in;
+ while (in) {
+ if (pg_offset == PAGE_CACHE_SIZE) {
+ data = squashfs_next_page(output);
+ pg_offset = 0;
+ }
+ avail = min_t(int, in, PAGE_CACHE_SIZE -
+ pg_offset);
+ memcpy(data + pg_offset, bh[k]->b_data + offset,
+ avail);
+ in -= avail;
+ pg_offset += avail;
+ offset += avail;
+ }
+ offset = 0;
+ put_bh(bh[k]);
+ }
+ squashfs_finish_page(output);
+ }
-int squashfs_read_data_async(struct super_block *sb, u64 index, int length,
- u64 *next_index, struct squashfs_page_actor *output)
-{
+ kfree(bh);
+ return length;
+
+block_release:
+ for (; k < b; k++)
+ put_bh(bh[k]);
- return __squashfs_read_data(sb, index, length, next_index, output,
- false);
+read_failure:
+ ERROR("squashfs_read_data failed to read block 0x%llx\n",
+ (unsigned long long) index);
+ kfree(bh);
+ return -EIO;
}
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 7d78c3d28bbd..91ce49c05b7c 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -209,14 +209,17 @@ void squashfs_cache_put(struct squashfs_cache_entry *entry)
*/
void squashfs_cache_delete(struct squashfs_cache *cache)
{
- int i;
+ int i, j;
if (cache == NULL)
return;
for (i = 0; i < cache->entries; i++) {
- if (cache->entry[i].page)
- free_page_array(cache->entry[i].page, cache->pages);
+ if (cache->entry[i].data) {
+ for (j = 0; j < cache->pages; j++)
+ kfree(cache->entry[i].data[j]);
+ kfree(cache->entry[i].data);
+ }
kfree(cache->entry[i].actor);
}
@@ -233,7 +236,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache)
struct squashfs_cache *squashfs_cache_init(char *name, int entries,
int block_size)
{
- int i;
+ int i, j;
struct squashfs_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL);
if (cache == NULL) {
@@ -265,13 +268,22 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries,
init_waitqueue_head(&cache->entry[i].wait_queue);
entry->cache = cache;
entry->block = SQUASHFS_INVALID_BLK;
- entry->page = alloc_page_array(cache->pages, GFP_KERNEL);
- if (!entry->page) {
+ entry->data = kcalloc(cache->pages, sizeof(void *), GFP_KERNEL);
+ if (entry->data == NULL) {
ERROR("Failed to allocate %s cache entry\n", name);
goto cleanup;
}
- entry->actor = squashfs_page_actor_init(entry->page,
- cache->pages, 0, NULL);
+
+ for (j = 0; j < cache->pages; j++) {
+ entry->data[j] = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+ if (entry->data[j] == NULL) {
+ ERROR("Failed to allocate %s buffer\n", name);
+ goto cleanup;
+ }
+ }
+
+ entry->actor = squashfs_page_actor_init(entry->data,
+ cache->pages, 0);
if (entry->actor == NULL) {
ERROR("Failed to allocate %s cache entry\n", name);
goto cleanup;
@@ -302,20 +314,18 @@ int squashfs_copy_data(void *buffer, struct squashfs_cache_entry *entry,
return min(length, entry->length - offset);
while (offset < entry->length) {
- void *buff = kmap_atomic(entry->page[offset / PAGE_CACHE_SIZE])
- + (offset % PAGE_CACHE_SIZE);
+ void *buff = entry->data[offset / PAGE_CACHE_SIZE]
+ + (offset % PAGE_CACHE_SIZE);
int bytes = min_t(int, entry->length - offset,
PAGE_CACHE_SIZE - (offset % PAGE_CACHE_SIZE));
if (bytes >= remaining) {
memcpy(buffer, buff, remaining);
- kunmap_atomic(buff);
remaining = 0;
break;
}
memcpy(buffer, buff, bytes);
- kunmap_atomic(buff);
buffer += bytes;
remaining -= bytes;
offset += bytes;
@@ -409,38 +419,43 @@ struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *sb,
void *squashfs_read_table(struct super_block *sb, u64 block, int length)
{
int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- struct page **page;
- void *buff;
- int res;
+ int i, res;
+ void *table, *buffer, **data;
struct squashfs_page_actor *actor;
- page = alloc_page_array(pages, GFP_KERNEL);
- if (!page)
+ table = buffer = kmalloc(length, GFP_KERNEL);
+ if (table == NULL)
return ERR_PTR(-ENOMEM);
- actor = squashfs_page_actor_init(page, pages, length, NULL);
- if (actor == NULL) {
+ data = kcalloc(pages, sizeof(void *), GFP_KERNEL);
+ if (data == NULL) {
res = -ENOMEM;
goto failed;
}
+ actor = squashfs_page_actor_init(data, pages, length);
+ if (actor == NULL) {
+ res = -ENOMEM;
+ goto failed2;
+ }
+
+ for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
+ data[i] = buffer;
+
res = squashfs_read_data(sb, block, length |
SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor);
+ kfree(data);
+ kfree(actor);
+
if (res < 0)
- goto failed2;
+ goto failed;
- buff = kmalloc(length, GFP_KERNEL);
- if (!buff)
- goto failed2;
- squashfs_actor_to_buf(actor, buff, length);
- squashfs_page_actor_free(actor, 0);
- free_page_array(page, pages);
- return buff;
+ return table;
failed2:
- squashfs_page_actor_free(actor, 0);
+ kfree(data);
failed:
- free_page_array(page, pages);
+ kfree(table);
return ERR_PTR(res);
}
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index 7de35bf297aa..e9034bf6e5ae 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -24,8 +24,7 @@
#include <linux/types.h>
#include <linux/mutex.h>
#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/fs.h>
+#include <linux/buffer_head.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@@ -95,44 +94,40 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
static void *get_comp_opts(struct super_block *sb, unsigned short flags)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
- void *comp_opts, *buffer = NULL;
- struct page *page;
+ void *buffer = NULL, *comp_opts;
struct squashfs_page_actor *actor = NULL;
int length = 0;
- if (!SQUASHFS_COMP_OPTS(flags))
- return squashfs_comp_opts(msblk, buffer, length);
-
/*
* Read decompressor specific options from file system if present
*/
-
- page = alloc_page(GFP_KERNEL);
- if (!page)
- return ERR_PTR(-ENOMEM);
-
- actor = squashfs_page_actor_init(&page, 1, 0, NULL);
- if (actor == NULL) {
- comp_opts = ERR_PTR(-ENOMEM);
- goto actor_error;
- }
-
- length = squashfs_read_data(sb,
- sizeof(struct squashfs_super_block), 0, NULL, actor);
-
- if (length < 0) {
- comp_opts = ERR_PTR(length);
- goto read_error;
+ if (SQUASHFS_COMP_OPTS(flags)) {
+ buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+ if (buffer == NULL) {
+ comp_opts = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ actor = squashfs_page_actor_init(&buffer, 1, 0);
+ if (actor == NULL) {
+ comp_opts = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ length = squashfs_read_data(sb,
+ sizeof(struct squashfs_super_block), 0, NULL, actor);
+
+ if (length < 0) {
+ comp_opts = ERR_PTR(length);
+ goto out;
+ }
}
- buffer = kmap_atomic(page);
comp_opts = squashfs_comp_opts(msblk, buffer, length);
- kunmap_atomic(buffer);
-read_error:
- squashfs_page_actor_free(actor, 0);
-actor_error:
- __free_page(page);
+out:
+ kfree(actor);
+ kfree(buffer);
return comp_opts;
}
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 43d946617d2d..1ec7bae2751d 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -47,16 +47,12 @@
#include <linux/string.h>
#include <linux/pagemap.h>
#include <linux/mutex.h>
-#include <linux/mm_inline.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
#include "squashfs_fs_i.h"
#include "squashfs.h"
-// Backported from 4.5
-#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
-
/*
* Locate cache slot in range [offset, index] for specified inode. If
* there's more than one return the slot closest to index.
@@ -446,21 +442,6 @@ static int squashfs_readpage_fragment(struct page *page)
return res;
}
-static int squashfs_readpages_fragment(struct page *page,
- struct list_head *readahead_pages, struct address_space *mapping)
-{
- if (!page) {
- page = lru_to_page(readahead_pages);
- list_del(&page->lru);
- if (add_to_page_cache_lru(page, mapping, page->index,
- mapping_gfp_constraint(mapping, GFP_KERNEL))) {
- put_page(page);
- return 0;
- }
- }
- return squashfs_readpage_fragment(page);
-}
-
static int squashfs_readpage_sparse(struct page *page, int index, int file_end)
{
struct inode *inode = page->mapping->host;
@@ -473,105 +454,54 @@ static int squashfs_readpage_sparse(struct page *page, int index, int file_end)
return 0;
}
-static int squashfs_readpages_sparse(struct page *page,
- struct list_head *readahead_pages, int index, int file_end,
- struct address_space *mapping)
-{
- if (!page) {
- page = lru_to_page(readahead_pages);
- list_del(&page->lru);
- if (add_to_page_cache_lru(page, mapping, page->index,
- mapping_gfp_constraint(mapping, GFP_KERNEL))) {
- put_page(page);
- return 0;
- }
- }
- return squashfs_readpage_sparse(page, index, file_end);
-}
-
-static int __squashfs_readpages(struct file *file, struct page *page,
- struct list_head *readahead_pages, unsigned int nr_pages,
- struct address_space *mapping)
+static int squashfs_readpage(struct file *file, struct page *page)
{
- struct inode *inode = mapping->host;
+ struct inode *inode = page->mapping->host;
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
int file_end = i_size_read(inode) >> msblk->block_log;
int res;
-
- do {
- struct page *cur_page = page ? page
- : lru_to_page(readahead_pages);
- int page_index = cur_page->index;
- int index = page_index >> (msblk->block_log - PAGE_CACHE_SHIFT);
-
- if (page_index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT))
- return 1;
-
- if (index < file_end || squashfs_i(inode)->fragment_block ==
- SQUASHFS_INVALID_BLK) {
- u64 block = 0;
- int bsize = read_blocklist(inode, index, &block);
-
- if (bsize < 0)
- return -1;
-
- if (bsize == 0) {
- res = squashfs_readpages_sparse(page,
- readahead_pages, index, file_end,
- mapping);
- } else {
- res = squashfs_readpages_block(page,
- readahead_pages, &nr_pages, mapping,
- page_index, block, bsize);
- }
- } else {
- res = squashfs_readpages_fragment(page,
- readahead_pages, mapping);
- }
- if (res)
- return 0;
- page = NULL;
- } while (readahead_pages && !list_empty(readahead_pages));
-
- return 0;
-}
-
-static int squashfs_readpage(struct file *file, struct page *page)
-{
- int ret;
+ void *pageaddr;
TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
- page->index, squashfs_i(page->mapping->host)->start);
+ page->index, squashfs_i(inode)->start);
- get_page(page);
+ if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT))
+ goto out;
- ret = __squashfs_readpages(file, page, NULL, 1, page->mapping);
- if (ret) {
- flush_dcache_page(page);
- if (ret < 0)
- SetPageError(page);
- else
- SetPageUptodate(page);
- zero_user_segment(page, 0, PAGE_CACHE_SIZE);
- unlock_page(page);
- put_page(page);
- }
+ if (index < file_end || squashfs_i(inode)->fragment_block ==
+ SQUASHFS_INVALID_BLK) {
+ u64 block = 0;
+ int bsize = read_blocklist(inode, index, &block);
+ if (bsize < 0)
+ goto error_out;
- return 0;
-}
+ if (bsize == 0)
+ res = squashfs_readpage_sparse(page, index, file_end);
+ else
+ res = squashfs_readpage_block(page, block, bsize);
+ } else
+ res = squashfs_readpage_fragment(page);
+
+ if (!res)
+ return 0;
+
+error_out:
+ SetPageError(page);
+out:
+ pageaddr = kmap_atomic(page);
+ memset(pageaddr, 0, PAGE_CACHE_SIZE);
+ kunmap_atomic(pageaddr);
+ flush_dcache_page(page);
+ if (!PageError(page))
+ SetPageUptodate(page);
+ unlock_page(page);
-static int squashfs_readpages(struct file *file, struct address_space *mapping,
- struct list_head *pages, unsigned int nr_pages)
-{
- TRACE("Entered squashfs_readpages, %u pages, first page index %lx\n",
- nr_pages, lru_to_page(pages)->index);
- __squashfs_readpages(file, NULL, pages, nr_pages, mapping);
return 0;
}
const struct address_space_operations squashfs_aops = {
- .readpage = squashfs_readpage,
- .readpages = squashfs_readpages,
+ .readpage = squashfs_readpage
};
diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c
new file mode 100644
index 000000000000..f2310d2a2019
--- /dev/null
+++ b/fs/squashfs/file_cache.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/mutex.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+/* Read separately compressed datablock and memcopy into page cache */
+int squashfs_readpage_block(struct page *page, u64 block, int bsize)
+{
+ struct inode *i = page->mapping->host;
+ struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
+ block, bsize);
+ int res = buffer->error;
+
+ if (res)
+ ERROR("Unable to read page, block %llx, size %x\n", block,
+ bsize);
+ else
+ squashfs_copy_cache(page, buffer, buffer->length, 0);
+
+ squashfs_cache_put(buffer);
+ return res;
+}
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index c97af4c6ccd0..43e7a7eddac0 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -13,7 +13,6 @@
#include <linux/string.h>
#include <linux/pagemap.h>
#include <linux/mutex.h>
-#include <linux/mm_inline.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@@ -21,139 +20,157 @@
#include "squashfs.h"
#include "page_actor.h"
-// Backported from 4.5
-#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
+static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
+ int pages, struct page **page);
-static void release_actor_pages(struct page **page, int pages, int error)
-{
- int i;
-
- for (i = 0; i < pages; i++) {
- if (!page[i])
- continue;
- flush_dcache_page(page[i]);
- if (!error)
- SetPageUptodate(page[i]);
- else {
- SetPageError(page[i]);
- zero_user_segment(page[i], 0, PAGE_CACHE_SIZE);
- }
- unlock_page(page[i]);
- put_page(page[i]);
- }
- kfree(page);
-}
+/* Read separately compressed datablock directly into page cache */
+int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
-/*
- * Create a "page actor" which will kmap and kunmap the
- * page cache pages appropriately within the decompressor
- */
-static struct squashfs_page_actor *actor_from_page_cache(
- unsigned int actor_pages, struct page *target_page,
- struct list_head *rpages, unsigned int *nr_pages, int start_index,
- struct address_space *mapping)
{
+ struct inode *inode = target_page->mapping->host;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+
+ int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+ int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+ int start_index = target_page->index & ~mask;
+ int end_index = start_index | mask;
+ int i, n, pages, missing_pages, bytes, res = -ENOMEM;
struct page **page;
struct squashfs_page_actor *actor;
- int i, n;
- gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
-
- page = kmalloc_array(actor_pages, sizeof(void *), GFP_KERNEL);
- if (!page)
- return NULL;
-
- for (i = 0, n = start_index; i < actor_pages; i++, n++) {
- if (target_page == NULL && rpages && !list_empty(rpages)) {
- struct page *cur_page = lru_to_page(rpages);
-
- if (cur_page->index < start_index + actor_pages) {
- list_del(&cur_page->lru);
- --(*nr_pages);
- if (add_to_page_cache_lru(cur_page, mapping,
- cur_page->index, gfp))
- put_page(cur_page);
- else
- target_page = cur_page;
- } else
- rpages = NULL;
- }
+ void *pageaddr;
+
+ if (end_index > file_end)
+ end_index = file_end;
+
+ pages = end_index - start_index + 1;
- if (target_page && target_page->index == n) {
- page[i] = target_page;
- target_page = NULL;
- } else {
- page[i] = grab_cache_page_nowait(mapping, n);
- if (page[i] == NULL)
- continue;
+ page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL);
+ if (page == NULL)
+ return res;
+
+ /*
+ * Create a "page actor" which will kmap and kunmap the
+ * page cache pages appropriately within the decompressor
+ */
+ actor = squashfs_page_actor_init_special(page, pages, 0);
+ if (actor == NULL)
+ goto out;
+
+ /* Try to grab all the pages covered by the Squashfs block */
+ for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) {
+ page[i] = (n == target_page->index) ? target_page :
+ grab_cache_page_nowait(target_page->mapping, n);
+
+ if (page[i] == NULL) {
+ missing_pages++;
+ continue;
}
if (PageUptodate(page[i])) {
unlock_page(page[i]);
- put_page(page[i]);
+ page_cache_release(page[i]);
page[i] = NULL;
+ missing_pages++;
}
}
- actor = squashfs_page_actor_init(page, actor_pages, 0,
- release_actor_pages);
- if (!actor) {
- release_actor_pages(page, actor_pages, -ENOMEM);
- kfree(page);
- return NULL;
+ if (missing_pages) {
+ /*
+ * Couldn't get one or more pages, this page has either
+ * been VM reclaimed, but others are still in the page cache
+ * and uptodate, or we're racing with another thread in
+ * squashfs_readpage also trying to grab them. Fall back to
+ * using an intermediate buffer.
+ */
+ res = squashfs_read_cache(target_page, block, bsize, pages,
+ page);
+ if (res < 0)
+ goto mark_errored;
+
+ goto out;
}
- return actor;
-}
-int squashfs_readpages_block(struct page *target_page,
- struct list_head *readahead_pages,
- unsigned int *nr_pages,
- struct address_space *mapping,
- int page_index, u64 block, int bsize)
+ /* Decompress directly into the page cache buffers */
+ res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
+ if (res < 0)
+ goto mark_errored;
+
+ /* Last page may have trailing bytes not filled */
+ bytes = res % PAGE_CACHE_SIZE;
+ if (bytes) {
+ pageaddr = kmap_atomic(page[pages - 1]);
+ memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
+ kunmap_atomic(pageaddr);
+ }
-{
- struct squashfs_page_actor *actor;
- struct inode *inode = mapping->host;
- struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
- int start_index, end_index, file_end, actor_pages, res;
- int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+ /* Mark pages as uptodate, unlock and release */
+ for (i = 0; i < pages; i++) {
+ flush_dcache_page(page[i]);
+ SetPageUptodate(page[i]);
+ unlock_page(page[i]);
+ if (page[i] != target_page)
+ page_cache_release(page[i]);
+ }
- /*
- * If readpage() is called on an uncompressed datablock, we can just
- * read the pages instead of fetching the whole block.
- * This greatly improves the performance when a process keep doing
- * random reads because we only fetch the necessary data.
- * The readahead algorithm will take care of doing speculative reads
- * if necessary.
- * We can't read more than 1 block even if readahead provides use more
- * pages because we don't know yet if the next block is compressed or
- * not.
+ kfree(actor);
+ kfree(page);
+
+ return 0;
+
+mark_errored:
+ /* Decompression failed, mark pages as errored. Target_page is
+ * dealt with by the caller
*/
- if (bsize && !SQUASHFS_COMPRESSED_BLOCK(bsize)) {
- u64 block_end = block + msblk->block_size;
-
- block += (page_index & mask) * PAGE_CACHE_SIZE;
- actor_pages = (block_end - block) / PAGE_CACHE_SIZE;
- if (*nr_pages < actor_pages)
- actor_pages = *nr_pages;
- start_index = page_index;
- bsize = min_t(int, bsize, (PAGE_CACHE_SIZE * actor_pages)
- | SQUASHFS_COMPRESSED_BIT_BLOCK);
- } else {
- file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
- start_index = page_index & ~mask;
- end_index = start_index | mask;
- if (end_index > file_end)
- end_index = file_end;
- actor_pages = end_index - start_index + 1;
+ for (i = 0; i < pages; i++) {
+ if (page[i] == NULL || page[i] == target_page)
+ continue;
+ flush_dcache_page(page[i]);
+ SetPageError(page[i]);
+ unlock_page(page[i]);
+ page_cache_release(page[i]);
}
- actor = actor_from_page_cache(actor_pages, target_page,
- readahead_pages, nr_pages, start_index,
- mapping);
- if (!actor)
- return -ENOMEM;
+out:
+ kfree(actor);
+ kfree(page);
+ return res;
+}
+
+
+static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
+ int pages, struct page **page)
+{
+ struct inode *i = target_page->mapping->host;
+ struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
+ block, bsize);
+ int bytes = buffer->length, res = buffer->error, n, offset = 0;
+ void *pageaddr;
+
+ if (res) {
+ ERROR("Unable to read page, block %llx, size %x\n", block,
+ bsize);
+ goto out;
+ }
+
+ for (n = 0; n < pages && bytes > 0; n++,
+ bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
+ int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
+
+ if (page[n] == NULL)
+ continue;
+
+ pageaddr = kmap_atomic(page[n]);
+ squashfs_copy_data(pageaddr, buffer, offset, avail);
+ memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
+ kunmap_atomic(pageaddr);
+ flush_dcache_page(page[n]);
+ SetPageUptodate(page[n]);
+ unlock_page(page[n]);
+ if (page[n] != target_page)
+ page_cache_release(page[n]);
+ }
- res = squashfs_read_data_async(inode->i_sb, block, bsize, NULL,
- actor);
- return res < 0 ? res : 0;
+out:
+ squashfs_cache_put(buffer);
+ return res;
}
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index df4fa3c7ddd0..c31e2bc9c081 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c
@@ -94,17 +94,39 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
struct buffer_head **bh, int b, int offset, int length,
struct squashfs_page_actor *output)
{
- int res;
- size_t dest_len = output->length;
struct squashfs_lz4 *stream = strm;
+ void *buff = stream->input, *data;
+ int avail, i, bytes = length, res;
+ size_t dest_len = output->length;
+
+ for (i = 0; i < b; i++) {
+ avail = min(bytes, msblk->devblksize - offset);
+ memcpy(buff, bh[i]->b_data + offset, avail);
+ buff += avail;
+ bytes -= avail;
+ offset = 0;
+ put_bh(bh[i]);
+ }
- squashfs_bh_to_buf(bh, b, stream->input, offset, length,
- msblk->devblksize);
res = lz4_decompress_unknownoutputsize(stream->input, length,
stream->output, &dest_len);
if (res)
return -EIO;
- squashfs_buf_to_actor(stream->output, output, dest_len);
+
+ bytes = dest_len;
+ data = squashfs_first_page(output);
+ buff = stream->output;
+ while (data) {
+ if (bytes <= PAGE_CACHE_SIZE) {
+ memcpy(data, buff, bytes);
+ break;
+ }
+ memcpy(data, buff, PAGE_CACHE_SIZE);
+ buff += PAGE_CACHE_SIZE;
+ bytes -= PAGE_CACHE_SIZE;
+ data = squashfs_next_page(output);
+ }
+ squashfs_finish_page(output);
return dest_len;
}
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 2c844d53a59e..244b9fbfff7b 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -79,19 +79,45 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
struct buffer_head **bh, int b, int offset, int length,
struct squashfs_page_actor *output)
{
- int res;
- size_t out_len = output->length;
struct squashfs_lzo *stream = strm;
+ void *buff = stream->input, *data;
+ int avail, i, bytes = length, res;
+ size_t out_len = output->length;
+
+ for (i = 0; i < b; i++) {
+ avail = min(bytes, msblk->devblksize - offset);
+ memcpy(buff, bh[i]->b_data + offset, avail);
+ buff += avail;
+ bytes -= avail;
+ offset = 0;
+ put_bh(bh[i]);
+ }
- squashfs_bh_to_buf(bh, b, stream->input, offset, length,
- msblk->devblksize);
res = lzo1x_decompress_safe(stream->input, (size_t)length,
stream->output, &out_len);
if (res != LZO_E_OK)
- return -EIO;
- squashfs_buf_to_actor(stream->output, output, out_len);
+ goto failed;
- return out_len;
+ res = bytes = (int)out_len;
+ data = squashfs_first_page(output);
+ buff = stream->output;
+ while (data) {
+ if (bytes <= PAGE_CACHE_SIZE) {
+ memcpy(data, buff, bytes);
+ break;
+ } else {
+ memcpy(data, buff, PAGE_CACHE_SIZE);
+ buff += PAGE_CACHE_SIZE;
+ bytes -= PAGE_CACHE_SIZE;
+ data = squashfs_next_page(output);
+ }
+ }
+ squashfs_finish_page(output);
+
+ return res;
+
+failed:
+ return -EIO;
}
const struct squashfs_decompressor squashfs_lzo_comp_ops = {
diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
index 53863508e400..5a1c11f56441 100644
--- a/fs/squashfs/page_actor.c
+++ b/fs/squashfs/page_actor.c
@@ -9,145 +9,92 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
-#include <linux/buffer_head.h>
#include "page_actor.h"
-struct squashfs_page_actor *squashfs_page_actor_init(struct page **page,
- int pages, int length, void (*release_pages)(struct page **, int, int))
-{
- struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
-
- if (actor == NULL)
- return NULL;
+/*
+ * This file contains implementations of page_actor for decompressing into
+ * an intermediate buffer, and for decompressing directly into the
+ * page cache.
+ *
+ * Calling code should avoid sleeping between calls to squashfs_first_page()
+ * and squashfs_finish_page().
+ */
- actor->length = length ? : pages * PAGE_CACHE_SIZE;
- actor->page = page;
- actor->pages = pages;
- actor->next_page = 0;
- actor->pageaddr = NULL;
- actor->release_pages = release_pages;
- return actor;
+/* Implementation of page_actor for decompressing into intermediate buffer */
+static void *cache_first_page(struct squashfs_page_actor *actor)
+{
+ actor->next_page = 1;
+ return actor->buffer[0];
}
-void squashfs_page_actor_free(struct squashfs_page_actor *actor, int error)
+static void *cache_next_page(struct squashfs_page_actor *actor)
{
- if (!actor)
- return;
+ if (actor->next_page == actor->pages)
+ return NULL;
- if (actor->release_pages)
- actor->release_pages(actor->page, actor->pages, error);
- kfree(actor);
+ return actor->buffer[actor->next_page++];
}
-void squashfs_actor_to_buf(struct squashfs_page_actor *actor, void *buf,
- int length)
+static void cache_finish_page(struct squashfs_page_actor *actor)
{
- void *pageaddr;
- int pos = 0, avail, i;
-
- for (i = 0; i < actor->pages && pos < length; ++i) {
- avail = min_t(int, length - pos, PAGE_CACHE_SIZE);
- if (actor->page[i]) {
- pageaddr = kmap_atomic(actor->page[i]);
- memcpy(buf + pos, pageaddr, avail);
- kunmap_atomic(pageaddr);
- }
- pos += avail;
- }
+ /* empty */
}
-void squashfs_buf_to_actor(void *buf, struct squashfs_page_actor *actor,
- int length)
+struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
+ int pages, int length)
{
- void *pageaddr;
- int pos = 0, avail, i;
-
- for (i = 0; i < actor->pages && pos < length; ++i) {
- avail = min_t(int, length - pos, PAGE_CACHE_SIZE);
- if (actor->page[i]) {
- pageaddr = kmap_atomic(actor->page[i]);
- memcpy(pageaddr, buf + pos, avail);
- kunmap_atomic(pageaddr);
- }
- pos += avail;
- }
+ struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+
+ if (actor == NULL)
+ return NULL;
+
+ actor->length = length ? : pages * PAGE_CACHE_SIZE;
+ actor->buffer = buffer;
+ actor->pages = pages;
+ actor->next_page = 0;
+ actor->squashfs_first_page = cache_first_page;
+ actor->squashfs_next_page = cache_next_page;
+ actor->squashfs_finish_page = cache_finish_page;
+ return actor;
}
-void squashfs_bh_to_actor(struct buffer_head **bh, int nr_buffers,
- struct squashfs_page_actor *actor, int offset, int length, int blksz)
+/* Implementation of page_actor for decompressing directly into page cache. */
+static void *direct_first_page(struct squashfs_page_actor *actor)
{
- void *kaddr = NULL;
- int bytes = 0, pgoff = 0, b = 0, p = 0, avail, i;
-
- while (bytes < length) {
- if (actor->page[p]) {
- kaddr = kmap_atomic(actor->page[p]);
- while (pgoff < PAGE_CACHE_SIZE && bytes < length) {
- avail = min_t(int, blksz - offset,
- PAGE_CACHE_SIZE - pgoff);
- memcpy(kaddr + pgoff, bh[b]->b_data + offset,
- avail);
- pgoff += avail;
- bytes += avail;
- offset = (offset + avail) % blksz;
- if (!offset) {
- put_bh(bh[b]);
- ++b;
- }
- }
- kunmap_atomic(kaddr);
- pgoff = 0;
- } else {
- for (i = 0; i < PAGE_CACHE_SIZE / blksz; ++i) {
- if (bh[b])
- put_bh(bh[b]);
- ++b;
- }
- bytes += PAGE_CACHE_SIZE;
- }
- ++p;
- }
+ actor->next_page = 1;
+ return actor->pageaddr = kmap_atomic(actor->page[0]);
}
-void squashfs_bh_to_buf(struct buffer_head **bh, int nr_buffers, void *buf,
- int offset, int length, int blksz)
+static void *direct_next_page(struct squashfs_page_actor *actor)
{
- int i, avail, bytes = 0;
-
- for (i = 0; i < nr_buffers && bytes < length; ++i) {
- avail = min_t(int, length - bytes, blksz - offset);
- if (bh[i]) {
- memcpy(buf + bytes, bh[i]->b_data + offset, avail);
- put_bh(bh[i]);
- }
- bytes += avail;
- offset = 0;
- }
+ if (actor->pageaddr)
+ kunmap_atomic(actor->pageaddr);
+
+ return actor->pageaddr = actor->next_page == actor->pages ? NULL :
+ kmap_atomic(actor->page[actor->next_page++]);
}
-void free_page_array(struct page **page, int nr_pages)
+static void direct_finish_page(struct squashfs_page_actor *actor)
{
- int i;
-
- for (i = 0; i < nr_pages; ++i)
- __free_page(page[i]);
- kfree(page);
+ if (actor->pageaddr)
+ kunmap_atomic(actor->pageaddr);
}
-struct page **alloc_page_array(int nr_pages, int gfp_mask)
+struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page,
+ int pages, int length)
{
- int i;
- struct page **page;
+ struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
- page = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
- if (!page)
+ if (actor == NULL)
return NULL;
- for (i = 0; i < nr_pages; ++i) {
- page[i] = alloc_page(gfp_mask);
- if (!page[i]) {
- free_page_array(page, i);
- return NULL;
- }
- }
- return page;
+
+ actor->length = length ? : pages * PAGE_CACHE_SIZE;
+ actor->page = page;
+ actor->pages = pages;
+ actor->next_page = 0;
+ actor->pageaddr = NULL;
+ actor->squashfs_first_page = direct_first_page;
+ actor->squashfs_next_page = direct_next_page;
+ actor->squashfs_finish_page = direct_finish_page;
+ return actor;
}
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
index aa1ed790b5a3..26dd82008b82 100644
--- a/fs/squashfs/page_actor.h
+++ b/fs/squashfs/page_actor.h
@@ -5,61 +5,77 @@
* Phillip Lougher <phillip@squashfs.org.uk>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level squashfsory.
+ * the COPYING file in the top-level directory.
*/
+#ifndef CONFIG_SQUASHFS_FILE_DIRECT
struct squashfs_page_actor {
- struct page **page;
- void *pageaddr;
+ void **page;
int pages;
int length;
int next_page;
- void (*release_pages)(struct page **, int, int);
};
-extern struct squashfs_page_actor *squashfs_page_actor_init(struct page **,
- int, int, void (*)(struct page **, int, int));
-extern void squashfs_page_actor_free(struct squashfs_page_actor *, int);
+static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page,
+ int pages, int length)
+{
+ struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
-extern void squashfs_actor_to_buf(struct squashfs_page_actor *, void *, int);
-extern void squashfs_buf_to_actor(void *, struct squashfs_page_actor *, int);
-extern void squashfs_bh_to_actor(struct buffer_head **, int,
- struct squashfs_page_actor *, int, int, int);
-extern void squashfs_bh_to_buf(struct buffer_head **, int, void *, int, int,
- int);
+ if (actor == NULL)
+ return NULL;
+
+ actor->length = length ? : pages * PAGE_CACHE_SIZE;
+ actor->page = page;
+ actor->pages = pages;
+ actor->next_page = 0;
+ return actor;
+}
-/*
- * Calling code should avoid sleeping between calls to squashfs_first_page()
- * and squashfs_finish_page().
- */
static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
{
actor->next_page = 1;
- return actor->pageaddr = actor->page[0] ? kmap_atomic(actor->page[0])
- : NULL;
+ return actor->page[0];
}
static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
{
- if (!IS_ERR_OR_NULL(actor->pageaddr))
- kunmap_atomic(actor->pageaddr);
-
- if (actor->next_page == actor->pages)
- return actor->pageaddr = ERR_PTR(-ENODATA);
-
- actor->pageaddr = actor->page[actor->next_page] ?
- kmap_atomic(actor->page[actor->next_page]) : NULL;
- ++actor->next_page;
- return actor->pageaddr;
+ return actor->next_page == actor->pages ? NULL :
+ actor->page[actor->next_page++];
}
static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
{
- if (!IS_ERR_OR_NULL(actor->pageaddr))
- kunmap_atomic(actor->pageaddr);
+ /* empty */
}
+#else
+struct squashfs_page_actor {
+ union {
+ void **buffer;
+ struct page **page;
+ };
+ void *pageaddr;
+ void *(*squashfs_first_page)(struct squashfs_page_actor *);
+ void *(*squashfs_next_page)(struct squashfs_page_actor *);
+ void (*squashfs_finish_page)(struct squashfs_page_actor *);
+ int pages;
+ int length;
+ int next_page;
+};
-extern struct page **alloc_page_array(int, int);
-extern void free_page_array(struct page **, int);
-
+extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int);
+extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page
+ **, int, int);
+static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
+{
+ return actor->squashfs_first_page(actor);
+}
+static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
+{
+ return actor->squashfs_next_page(actor);
+}
+static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
+{
+ actor->squashfs_finish_page(actor);
+}
+#endif
#endif
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 6093579c6c5d..887d6d270080 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -28,14 +28,8 @@
#define WARNING(s, args...) pr_warn("SQUASHFS: "s, ## args)
/* block.c */
-extern int squashfs_init_read_wq(void);
-extern void squashfs_destroy_read_wq(void);
extern int squashfs_read_data(struct super_block *, u64, int, u64 *,
struct squashfs_page_actor *);
-extern int squashfs_read_data(struct super_block *, u64, int, u64 *,
- struct squashfs_page_actor *);
-extern int squashfs_read_data_async(struct super_block *, u64, int, u64 *,
- struct squashfs_page_actor *);
/* cache.c */
extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
@@ -76,9 +70,8 @@ extern __le64 *squashfs_read_fragment_index_table(struct super_block *,
void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int,
int);
-/* file_direct.c */
-extern int squashfs_readpages_block(struct page *, struct list_head *,
- unsigned int *, struct address_space *, int, u64, int);
+/* file_xxx.c */
+extern int squashfs_readpage_block(struct page *, u64, int);
/* id.c */
extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *);
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 3b767ce1e46d..ef69c31947bf 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -49,7 +49,7 @@ struct squashfs_cache_entry {
int num_waiters;
wait_queue_head_t wait_queue;
struct squashfs_cache *cache;
- struct page **page;
+ void **data;
struct squashfs_page_actor *actor;
};
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 4ffbddb688aa..93aa3e23c845 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -445,15 +445,9 @@ static int __init init_squashfs_fs(void)
if (err)
return err;
- if (!squashfs_init_read_wq()) {
- destroy_inodecache();
- return -ENOMEM;
- }
-
err = register_filesystem(&squashfs_fs_type);
if (err) {
destroy_inodecache();
- squashfs_destroy_read_wq();
return err;
}
@@ -467,7 +461,6 @@ static void __exit exit_squashfs_fs(void)
{
unregister_filesystem(&squashfs_fs_type);
destroy_inodecache();
- squashfs_destroy_read_wq();
}
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index 14cd373e1897..c609624e4b8a 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -55,7 +55,7 @@ static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk,
struct comp_opts *opts;
int err = 0, n;
- opts = kmalloc(sizeof(*opts), GFP_ATOMIC);
+ opts = kmalloc(sizeof(*opts), GFP_KERNEL);
if (opts == NULL) {
err = -ENOMEM;
goto out2;
@@ -136,7 +136,6 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
enum xz_ret xz_err;
int avail, total = 0, k = 0;
struct squashfs_xz *stream = strm;
- void *buf = NULL;
xz_dec_reset(stream->state);
stream->buf.in_pos = 0;
@@ -157,20 +156,12 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
if (stream->buf.out_pos == stream->buf.out_size) {
stream->buf.out = squashfs_next_page(output);
- if (!IS_ERR(stream->buf.out)) {
+ if (stream->buf.out != NULL) {
stream->buf.out_pos = 0;
total += PAGE_CACHE_SIZE;
}
}
- if (!stream->buf.out) {
- if (!buf) {
- buf = kmalloc(PAGE_CACHE_SIZE, GFP_ATOMIC);
- if (!buf)
- goto out;
- }
- stream->buf.out = buf;
- }
xz_err = xz_dec_run(stream->state, &stream->buf);
if (stream->buf.in_pos == stream->buf.in_size && k < b)
@@ -182,13 +173,11 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
if (xz_err != XZ_STREAM_END || k < b)
goto out;
- kfree(buf);
return total + stream->buf.out_pos;
out:
for (; k < b; k++)
put_bh(bh[k]);
- kfree(buf);
return -EIO;
}
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 09c892b5308e..8727caba6882 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -66,7 +66,6 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
struct buffer_head **bh, int b, int offset, int length,
struct squashfs_page_actor *output)
{
- void *buf = NULL;
int zlib_err, zlib_init = 0, k = 0;
z_stream *stream = strm;
@@ -85,19 +84,10 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
if (stream->avail_out == 0) {
stream->next_out = squashfs_next_page(output);
- if (!IS_ERR(stream->next_out))
+ if (stream->next_out != NULL)
stream->avail_out = PAGE_CACHE_SIZE;
}
- if (!stream->next_out) {
- if (!buf) {
- buf = kmalloc(PAGE_CACHE_SIZE, GFP_ATOMIC);
- if (!buf)
- goto out;
- }
- stream->next_out = buf;
- }
-
if (!zlib_init) {
zlib_err = zlib_inflateInit(stream);
if (zlib_err != Z_OK) {
@@ -125,13 +115,11 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
if (k < b)
goto out;
- kfree(buf);
return stream->total_out;
out:
for (; k < b; k++)
put_bh(bh[k]);
- kfree(buf);
return -EIO;
}
diff --git a/fs/super.c b/fs/super.c
index bc7ae0f327d0..689ec96c43f8 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -118,13 +118,23 @@ static unsigned long super_cache_count(struct shrinker *shrink,
sb = container_of(shrink, struct super_block, s_shrink);
/*
- * Don't call trylock_super as it is a potential
- * scalability bottleneck. The counts could get updated
- * between super_cache_count and super_cache_scan anyway.
- * Call to super_cache_count with shrinker_rwsem held
- * ensures the safety of call to list_lru_shrink_count() and
- * s_op->nr_cached_objects().
+ * We don't call trylock_super() here as it is a scalability bottleneck,
+ * so we're exposed to partial setup state. The shrinker rwsem does not
+ * protect filesystem operations backing list_lru_shrink_count() or
+ * s_op->nr_cached_objects(). Counts can change between
+ * super_cache_count and super_cache_scan, so we really don't need locks
+ * here.
+ *
+ * However, if we are currently mounting the superblock, the underlying
+ * filesystem might be in a state of partial construction and hence it
+ * is dangerous to access it. trylock_super() uses a MS_BORN check to
+ * avoid this situation, so do the same here. The memory barrier is
+ * matched with the one in mount_fs() as we don't hold locks here.
*/
+ if (!(sb->s_flags & MS_BORN))
+ return 0;
+ smp_rmb();
+
if (sb->s_op && sb->s_op->nr_cached_objects)
total_objects = sb->s_op->nr_cached_objects(sb, sc);
@@ -1151,6 +1161,14 @@ mount_fs(struct file_system_type *type, int flags, const char *name, struct vfsm
sb = root->d_sb;
BUG_ON(!sb);
WARN_ON(!sb->s_bdi);
+
+ /*
+ * Write barrier is for super_cache_count(). We place it before setting
+ * MS_BORN as the data dependency between the two functions is the
+ * superblock structure contents that we just set up, not the MS_BORN
+ * flag.
+ */
+ smp_wmb();
sb->s_flags |= MS_BORN;
error = security_sb_kern_mount(sb, flags, secdata);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 02fa1dcc5969..29f5b2e589a1 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -275,7 +275,7 @@ static int __sysv_write_inode(struct inode *inode, int wait)
}
}
brelse(bh);
- return 0;
+ return err;
}
int sysv_write_inode(struct inode *inode, struct writeback_control *wbc)
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 0548c572839c..1327a02ec778 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -50,8 +50,7 @@ static DEFINE_SPINLOCK(cancel_lock);
static inline bool isalarm(struct timerfd_ctx *ctx)
{
return ctx->clockid == CLOCK_REALTIME_ALARM ||
- ctx->clockid == CLOCK_BOOTTIME_ALARM ||
- ctx->clockid == CLOCK_POWEROFF_ALARM;
+ ctx->clockid == CLOCK_BOOTTIME_ALARM;
}
/*
@@ -143,8 +142,7 @@ static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
{
spin_lock(&ctx->cancel_lock);
if ((ctx->clockid == CLOCK_REALTIME ||
- ctx->clockid == CLOCK_REALTIME_ALARM ||
- ctx->clockid == CLOCK_POWEROFF_ALARM) &&
+ ctx->clockid == CLOCK_REALTIME_ALARM) &&
(flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) {
if (!ctx->might_cancel) {
ctx->might_cancel = true;
@@ -176,7 +174,6 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
enum hrtimer_mode htmode;
ktime_t texp;
int clockid = ctx->clockid;
- enum alarmtimer_type type;
htmode = (flags & TFD_TIMER_ABSTIME) ?
HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
@@ -187,8 +184,10 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
ctx->tintv = timespec_to_ktime(ktmr->it_interval);
if (isalarm(ctx)) {
- type = clock2alarm(ctx->clockid);
- alarm_init(&ctx->t.alarm, type, timerfd_alarmproc);
+ alarm_init(&ctx->t.alarm,
+ ctx->clockid == CLOCK_REALTIME_ALARM ?
+ ALARM_REALTIME : ALARM_BOOTTIME,
+ timerfd_alarmproc);
} else {
hrtimer_init(&ctx->t.tmr, clockid, htmode);
hrtimer_set_expires(&ctx->t.tmr, texp);
@@ -388,7 +387,6 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
{
int ufd;
struct timerfd_ctx *ctx;
- enum alarmtimer_type type;
/* Check the TFD_* constants for consistency. */
BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
@@ -399,8 +397,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
clockid != CLOCK_REALTIME &&
clockid != CLOCK_REALTIME_ALARM &&
clockid != CLOCK_BOOTTIME &&
- clockid != CLOCK_BOOTTIME_ALARM &&
- clockid != CLOCK_POWEROFF_ALARM))
+ clockid != CLOCK_BOOTTIME_ALARM))
return -EINVAL;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -411,12 +408,13 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
spin_lock_init(&ctx->cancel_lock);
ctx->clockid = clockid;
- if (isalarm(ctx)) {
- type = clock2alarm(ctx->clockid);
- alarm_init(&ctx->t.alarm, type, timerfd_alarmproc);
- } else {
+ if (isalarm(ctx))
+ alarm_init(&ctx->t.alarm,
+ ctx->clockid == CLOCK_REALTIME_ALARM ?
+ ALARM_REALTIME : ALARM_BOOTTIME,
+ timerfd_alarmproc);
+ else
hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS);
- }
ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
@@ -488,10 +486,6 @@ static int do_timerfd_settime(int ufd, int flags,
ret = timerfd_setup(ctx, flags, new);
spin_unlock_irq(&ctx->wqh.lock);
-
- if (ctx->clockid == CLOCK_POWEROFF_ALARM)
- set_power_on_alarm();
-
fdput(f);
return ret;
}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 0e659d9c69a1..613193c6bb42 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1364,6 +1364,12 @@ reread:
iinfo->i_alloc_type = le16_to_cpu(fe->icbTag.flags) &
ICBTAG_FLAG_AD_MASK;
+ if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_SHORT &&
+ iinfo->i_alloc_type != ICBTAG_FLAG_AD_LONG &&
+ iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
+ ret = -EIO;
+ goto out;
+ }
iinfo->i_unique = 0;
iinfo->i_lenEAttr = 0;
iinfo->i_lenExtents = 0;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index d859d8bd1f96..e7cc0d860499 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -793,6 +793,18 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
goto out_unlock;
/*
+ * UFFDIO_COPY will fill file holes even without
+ * PROT_WRITE. This check enforces that if this is a
+ * MAP_SHARED, the process has write permission to the backing
+ * file. If VM_MAYWRITE is set it also enforces that on a
+ * MAP_SHARED vma: there is no F_WRITE_SEAL and no further
+ * F_WRITE_SEAL can be taken until the vma is destroyed.
+ */
+ ret = -EPERM;
+ if (unlikely(!(cur->vm_flags & VM_MAYWRITE)))
+ goto out_unlock;
+
+ /*
* Check that this vma isn't already owned by a
* different userfaultfd. We can't allow more than one
* userfaultfd to own a single vma simultaneously or we
@@ -817,6 +829,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
BUG_ON(vma->vm_ops);
BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
vma->vm_userfaultfd_ctx.ctx != ctx);
+ WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
/*
* Nothing to do: this vma is already registered into this
@@ -953,6 +966,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
cond_resched();
BUG_ON(vma->vm_ops);
+ WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
/*
* Nothing to do: this vma is already registered into this
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index fb9636cc927c..5d8d12746e6e 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -528,7 +528,14 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
if (args->flags & ATTR_CREATE)
return retval;
retval = xfs_attr_shortform_remove(args);
- ASSERT(retval == 0);
+ if (retval)
+ return retval;
+ /*
+ * Since we have removed the old attr, clear ATTR_REPLACE so
+ * that the leaf format add routine won't trip over the attr
+ * not being around.
+ */
+ args->flags &= ~ATTR_REPLACE;
}
if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||