summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig6
-rw-r--r--fs/Makefile1
-rw-r--r--fs/aio.c5
-rw-r--r--fs/block_dev.c32
-rw-r--r--fs/btrfs/extent_io.c19
-rw-r--r--fs/btrfs/inode.c16
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/buffer.c60
-rw-r--r--fs/ceph/addr.c14
-rw-r--r--fs/cifs/file.c8
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/crypto/crypto.c47
-rw-r--r--fs/crypto/fname.c32
-rw-r--r--fs/crypto/fscrypt_private.h23
-rw-r--r--fs/crypto/hooks.c5
-rw-r--r--fs/crypto/keyinfo.c278
-rw-r--r--fs/dcache.c24
-rw-r--r--fs/direct-io.c14
-rw-r--r--fs/drop_caches.c2
-rw-r--r--fs/ecryptfs/Makefile2
-rw-r--r--fs/ecryptfs/crypto.c193
-rw-r--r--fs/ecryptfs/debug.c29
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h83
-rw-r--r--fs/ecryptfs/events.c393
-rw-r--r--fs/ecryptfs/file.c32
-rw-r--r--fs/ecryptfs/inode.c14
-rw-r--r--fs/ecryptfs/keystore.c94
-rw-r--r--fs/ecryptfs/main.c64
-rw-r--r--fs/ecryptfs/mmap.c6
-rw-r--r--fs/ecryptfs/super.c14
-rw-r--r--fs/eventpoll.c10
-rw-r--r--fs/ext2/namei.c6
-rw-r--r--fs/ext4/Kconfig10
-rw-r--r--fs/ext4/Makefile2
-rw-r--r--fs/ext4/crypto.c19
-rw-r--r--fs/ext4/crypto_fname.c6
-rw-r--r--fs/ext4/crypto_key.c104
-rw-r--r--fs/ext4/crypto_policy.c14
-rw-r--r--fs/ext4/dir.c7
-rw-r--r--fs/ext4/ext4.h40
-rw-r--r--fs/ext4/ext4_crypto.h11
-rw-r--r--fs/ext4/ext4_ice.c109
-rw-r--r--fs/ext4/ext4_ice.h104
-rw-r--r--fs/ext4/extents.c12
-rw-r--r--fs/ext4/inline.c10
-rw-r--r--fs/ext4/inode.c51
-rw-r--r--fs/ext4/namei.c21
-rw-r--r--fs/ext4/page-io.c7
-rw-r--r--fs/ext4/readpage.c16
-rw-r--r--fs/f2fs/acl.c22
-rw-r--r--fs/f2fs/acl.h5
-rw-r--r--fs/f2fs/checkpoint.c532
-rw-r--r--fs/f2fs/data.c1213
-rw-r--r--fs/f2fs/debug.c102
-rw-r--r--fs/f2fs/dir.c146
-rw-r--r--fs/f2fs/extent_cache.c85
-rw-r--r--fs/f2fs/f2fs.h1080
-rw-r--r--fs/f2fs/file.c846
-rw-r--r--fs/f2fs/gc.c731
-rw-r--r--fs/f2fs/gc.h7
-rw-r--r--fs/f2fs/hash.c5
-rw-r--r--fs/f2fs/inline.c152
-rw-r--r--fs/f2fs/inode.c296
-rw-r--r--fs/f2fs/namei.c163
-rw-r--r--fs/f2fs/node.c833
-rw-r--r--fs/f2fs/node.h16
-rw-r--r--fs/f2fs/recovery.c265
-rw-r--r--fs/f2fs/segment.c1289
-rw-r--r--fs/f2fs/segment.h86
-rw-r--r--fs/f2fs/shrinker.c11
-rw-r--r--fs/f2fs/super.c1408
-rw-r--r--fs/f2fs/sysfs.c178
-rw-r--r--fs/f2fs/trace.c17
-rw-r--r--fs/f2fs/trace.h5
-rw-r--r--fs/f2fs/xattr.c133
-rw-r--r--fs/f2fs/xattr.h13
-rw-r--r--fs/fat/fatent.c7
-rw-r--r--fs/fat/inode.c5
-rw-r--r--fs/file_table.c137
-rw-r--r--fs/fuse/Makefile2
-rw-r--r--fs/fuse/dev.c13
-rw-r--r--fs/fuse/dir.c3
-rw-r--r--fs/fuse/file.c83
-rw-r--r--fs/fuse/fuse_i.h11
-rw-r--r--fs/fuse/fuse_passthrough.h31
-rw-r--r--fs/fuse/inode.c8
-rw-r--r--fs/fuse/passthrough.c132
-rw-r--r--fs/gfs2/acl.c5
-rw-r--r--fs/gfs2/aops.c20
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/inode.c25
-rw-r--r--fs/internal.h27
-rw-r--r--fs/jffs2/dir.c12
-rw-r--r--fs/jfs/namei.c12
-rw-r--r--fs/mbcache.c33
-rw-r--r--fs/namei.c106
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nilfs2/btree.c4
-rw-r--r--fs/nilfs2/namei.c6
-rw-r--r--fs/nilfs2/page.c7
-rw-r--r--fs/nilfs2/segment.c12
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/overlayfs/copy_up.c2
-rw-r--r--fs/overlayfs/dir.c22
-rw-r--r--fs/overlayfs/overlayfs.h1
-rw-r--r--fs/overlayfs/readdir.c2
-rw-r--r--fs/overlayfs/super.c30
-rw-r--r--fs/proc/array.c189
-rw-r--r--fs/proc/base.c223
-rw-r--r--fs/proc/internal.h4
-rw-r--r--fs/proc/stat.c49
-rw-r--r--fs/proc/task_mmu.c423
-rw-r--r--fs/pstore/inode.c31
-rw-r--r--fs/pstore/platform.c5
-rw-r--r--fs/pstore/ram.c2
-rw-r--r--fs/pstore/ram_core.c4
-rw-r--r--fs/reiserfs/namei.c12
-rw-r--r--fs/sdcardfs/dentry.c7
-rw-r--r--fs/sdcardfs/derived_perm.c7
-rw-r--r--fs/sdcardfs/inode.c2
-rw-r--r--fs/sdcardfs/main.c25
-rw-r--r--fs/sdcardfs/sdcardfs.h2
-rw-r--r--fs/sdcardfs/super.c6
-rw-r--r--fs/sdfat/Kconfig126
-rw-r--r--fs/sdfat/LICENSE339
-rw-r--r--fs/sdfat/Makefile24
-rw-r--r--fs/sdfat/README.md19
-rw-r--r--fs/sdfat/amap_smart.c1314
-rw-r--r--fs/sdfat/amap_smart.h137
-rw-r--r--fs/sdfat/api.c636
-rw-r--r--fs/sdfat/api.h409
-rw-r--r--fs/sdfat/blkdev.c416
-rw-r--r--fs/sdfat/cache.c846
-rw-r--r--fs/sdfat/config.h146
-rw-r--r--fs/sdfat/core.c3694
-rw-r--r--fs/sdfat/core.h221
-rw-r--r--fs/sdfat/core_exfat.c1560
-rw-r--r--fs/sdfat/core_fat.c1465
-rw-r--r--fs/sdfat/dfr.c1372
-rw-r--r--fs/sdfat/dfr.h261
-rw-r--r--fs/sdfat/extent.c351
-rw-r--r--fs/sdfat/fatent.c420
-rw-r--r--fs/sdfat/misc.c464
-rw-r--r--fs/sdfat/mpage.c635
-rw-r--r--fs/sdfat/nls.c478
-rw-r--r--fs/sdfat/sdfat.c5255
-rw-r--r--fs/sdfat/sdfat.h528
-rw-r--r--fs/sdfat/sdfat_fs.h423
-rw-r--r--fs/sdfat/statistics.c281
-rw-r--r--fs/sdfat/upcase.h407
-rw-r--r--fs/sdfat/version.h25
-rw-r--r--fs/sdfat/xattr.c132
-rw-r--r--fs/select.c3
-rw-r--r--fs/seq_file.c57
-rw-r--r--fs/squashfs/Kconfig28
-rw-r--r--fs/squashfs/Makefile3
-rw-r--r--fs/squashfs/block.c549
-rw-r--r--fs/squashfs/cache.c73
-rw-r--r--fs/squashfs/decompressor.c55
-rw-r--r--fs/squashfs/file.c140
-rw-r--r--fs/squashfs/file_cache.c38
-rw-r--r--fs/squashfs/file_direct.c245
-rw-r--r--fs/squashfs/lz4_wrapper.c32
-rw-r--r--fs/squashfs/lzo_wrapper.c40
-rw-r--r--fs/squashfs/page_actor.c175
-rw-r--r--fs/squashfs/page_actor.h84
-rw-r--r--fs/squashfs/squashfs.h11
-rw-r--r--fs/squashfs/squashfs_fs_sb.h2
-rw-r--r--fs/squashfs/super.c7
-rw-r--r--fs/squashfs/xz_wrapper.c15
-rw-r--r--fs/squashfs/zlib_wrapper.c14
-rw-r--r--fs/super.c2
-rw-r--r--fs/timerfd.c12
-rw-r--r--fs/ubifs/key.h6
-rw-r--r--fs/ubifs/xattr.c4
-rw-r--r--fs/udf/namei.c6
-rw-r--r--fs/ufs/namei.c6
-rw-r--r--fs/userfaultfd.c14
-rw-r--r--fs/xfs/xfs_acl.c4
179 files changed, 32778 insertions, 4699 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 80af05163579..4d90075f54bf 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -103,6 +103,7 @@ if BLOCK
menu "DOS/FAT/NT Filesystems"
source "fs/fat/Kconfig"
+source "fs/sdfat/Kconfig"
source "fs/ntfs/Kconfig"
endmenu
@@ -284,4 +285,9 @@ endif # NETWORK_FILESYSTEMS
source "fs/nls/Kconfig"
source "fs/dlm/Kconfig"
+config FILE_TABLE_DEBUG
+ bool "Enable FILE_TABLE_DEBUG"
+ help
+ This option enables debug of the open files using a global filetable
+
endmenu
diff --git a/fs/Makefile b/fs/Makefile
index 4644db462ba9..debf0742b3c9 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_HUGETLBFS) += hugetlbfs/
obj-$(CONFIG_CODA_FS) += coda/
obj-$(CONFIG_MINIX_FS) += minix/
obj-$(CONFIG_FAT_FS) += fat/
+obj-$(CONFIG_SDFAT_FS) += sdfat/
obj-$(CONFIG_BFS_FS) += bfs/
obj-$(CONFIG_ISO9660_FS) += isofs/
obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+
diff --git a/fs/aio.c b/fs/aio.c
index 7187d03aa0bc..e8810a263bf3 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -263,6 +263,7 @@ static int __init aio_setup(void)
aio_mnt = kern_mount(&aio_fs);
if (IS_ERR(aio_mnt))
panic("Failed to create aio fs mount.");
+ aio_mnt->mnt_flags |= MNT_NOEXEC;
kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
@@ -1338,7 +1339,7 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
{
struct kioctx *ioctx = NULL;
- unsigned long ctx;
+ unsigned long ctx = 0;
long ret;
ret = get_user(ctx, ctxp);
@@ -1471,6 +1472,7 @@ rw_common:
len = ret;
+ get_file(file);
if (rw == WRITE)
file_start_write(file);
@@ -1478,6 +1480,7 @@ rw_common:
if (rw == WRITE)
file_end_write(file);
+ fput(file);
kfree(iovec);
break;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index a71d442ef7d0..83c007612a68 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -532,6 +532,7 @@ static void init_once(void *foo)
#ifdef CONFIG_SYSFS
INIT_LIST_HEAD(&bdev->bd_holder_disks);
#endif
+ bdev->bd_bdi = &noop_backing_dev_info;
inode_init_once(&ei->vfs_inode);
/* Initialize mutex for freeze. */
mutex_init(&bdev->bd_fsfreeze_mutex);
@@ -557,6 +558,12 @@ static void bdev_evict_inode(struct inode *inode)
}
list_del_init(&bdev->bd_list);
spin_unlock(&bdev_lock);
+ /* Detach inode from wb early as bdi_put() may free bdi->wb */
+ inode_detach_wb(inode);
+ if (bdev->bd_bdi != &noop_backing_dev_info) {
+ bdi_put(bdev->bd_bdi);
+ bdev->bd_bdi = &noop_backing_dev_info;
+ }
}
static const struct super_operations bdev_sops = {
@@ -623,6 +630,21 @@ static int bdev_set(struct inode *inode, void *data)
static LIST_HEAD(all_bdevs);
+/*
+ * If there is a bdev inode for this device, unhash it so that it gets evicted
+ * as soon as last inode reference is dropped.
+ */
+void bdev_unhash_inode(dev_t dev)
+{
+ struct inode *inode;
+
+ inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev);
+ if (inode) {
+ remove_inode_hash(inode);
+ iput(inode);
+ }
+}
+
struct block_device *bdget(dev_t dev)
{
struct block_device *bdev;
@@ -1199,6 +1221,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
bdev->bd_disk = disk;
bdev->bd_queue = disk->queue;
bdev->bd_contains = bdev;
+
bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0;
if (!partno) {
ret = -ENXIO;
@@ -1271,6 +1294,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
(bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
bdev->bd_inode->i_flags &= ~S_DAX;
}
+
+ if (bdev->bd_bdi == &noop_backing_dev_info)
+ bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
} else {
if (bdev->bd_contains == bdev) {
ret = 0;
@@ -1535,12 +1561,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
kill_bdev(bdev);
bdev_write_inode(bdev);
- /*
- * Detaching bdev inode from its wb in __destroy_inode()
- * is too late: the queue which embeds its bdi (along with
- * root wb) can be gone as soon as we put_disk() below.
- */
- inode_detach_wb(bdev->bd_inode);
}
if (bdev->bd_contains == bdev) {
if (disk->fops->release)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index b28bc7690d4b..42c745dbccfa 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3951,8 +3951,8 @@ retry:
if (wbc->sync_mode == WB_SYNC_ALL)
tag_pages_for_writeback(mapping, index, end);
while (!done && !nr_to_write_done && (index <= end) &&
- (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+ (nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
+ tag))) {
unsigned i;
scanned = 1;
@@ -3962,11 +3962,6 @@ retry:
if (!PagePrivate(page))
continue;
- if (!wbc->range_cyclic && page->index > end) {
- done = 1;
- break;
- }
-
spin_lock(&mapping->private_lock);
if (!PagePrivate(page)) {
spin_unlock(&mapping->private_lock);
@@ -4099,8 +4094,8 @@ retry:
if (wbc->sync_mode == WB_SYNC_ALL)
tag_pages_for_writeback(mapping, index, end);
while (!done && !nr_to_write_done && (index <= end) &&
- (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+ (nr_pages = pagevec_lookup_range_tag(&pvec, mapping,
+ &index, end, tag))) {
unsigned i;
scanned = 1;
@@ -4124,12 +4119,6 @@ retry:
continue;
}
- if (!wbc->range_cyclic && page->index > end) {
- done = 1;
- unlock_page(page);
- continue;
- }
-
if (wbc->sync_mode != WB_SYNC_NONE) {
if (PageWriteback(page))
flush_fn(data);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index da4ad006739d..ca30e3670b42 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6473,7 +6473,8 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
goto out_unlock_inode;
} else {
btrfs_update_inode(trans, root, inode);
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
}
out_unlock:
@@ -6548,7 +6549,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
goto out_unlock_inode;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
out_unlock:
btrfs_end_transaction(trans, root);
@@ -6691,7 +6693,12 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (err)
goto out_fail_inode;
- d_instantiate_new(dentry, inode);
+ d_instantiate(dentry, inode);
+ /*
+ * mkdir is special. We're unlocking after we call d_instantiate
+ * to avoid a race with nfsd calling d_instantiate.
+ */
+ unlock_new_inode(inode);
drop_on_err = 0;
out_fail:
@@ -9850,7 +9857,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
goto out_unlock_inode;
}
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
out_unlock:
btrfs_end_transaction(trans, root);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ff4df1783219..0cbc7af54d0b 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3676,7 +3676,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
src_offset = btrfs_item_ptr_offset(src, start_slot + i);
- if ((i == (nr - 1)))
+ if (i == nr - 1)
last_key = ins_keys[i];
if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
diff --git a/fs/buffer.c b/fs/buffer.c
index f278e27bd8c0..8ee41694a5d4 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -621,6 +621,18 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
}
EXPORT_SYMBOL(mark_buffer_dirty_inode);
+#ifdef CONFIG_BLK_DEV_IO_TRACE
+static inline void save_dirty_task(struct page *page)
+{
+ /* Save the task that is dirtying this page */
+ page->tsk_dirty = current;
+}
+#else
+static inline void save_dirty_task(struct page *page)
+{
+}
+#endif
+
/*
* Mark the page dirty, and set it dirty in the radix tree, and mark the inode
* dirty.
@@ -641,6 +653,7 @@ static void __set_page_dirty(struct page *page, struct address_space *mapping,
account_page_dirtied(page, mapping, memcg);
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
+ save_dirty_task(page);
}
spin_unlock_irqrestore(&mapping->tree_lock, flags);
}
@@ -1466,12 +1479,48 @@ static bool has_bh_in_lru(int cpu, void *dummy)
return 0;
}
+static void __evict_bh_lru(void *arg)
+{
+ struct bh_lru *b = &get_cpu_var(bh_lrus);
+ struct buffer_head *bh = arg;
+ int i;
+
+ for (i = 0; i < BH_LRU_SIZE; i++) {
+ if (b->bhs[i] == bh) {
+ brelse(b->bhs[i]);
+ b->bhs[i] = NULL;
+ goto out;
+ }
+ }
+out:
+ put_cpu_var(bh_lrus);
+}
+
+static bool bh_exists_in_lru(int cpu, void *arg)
+{
+ struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
+ struct buffer_head *bh = arg;
+ int i;
+
+ for (i = 0; i < BH_LRU_SIZE; i++) {
+ if (b->bhs[i] == bh)
+ return 1;
+ }
+
+ return 0;
+
+}
void invalidate_bh_lrus(void)
{
on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
}
EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
+static void evict_bh_lrus(struct buffer_head *bh)
+{
+ on_each_cpu_cond(bh_exists_in_lru, __evict_bh_lru, bh, 1, GFP_ATOMIC);
+}
+
void set_bh_page(struct buffer_head *bh,
struct page *page, unsigned long offset)
{
@@ -3199,8 +3248,15 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
do {
if (buffer_write_io_error(bh) && page->mapping)
set_bit(AS_EIO, &page->mapping->flags);
- if (buffer_busy(bh))
- goto failed;
+ if (buffer_busy(bh)) {
+ /*
+ * Check if the busy failure was due to an
+ * outstanding LRU reference
+ */
+ evict_bh_lrus(bh);
+ if (buffer_busy(bh))
+ goto failed;
+ }
bh = bh->b_this_page;
} while (bh != head);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 26de74684c17..bec37093a3de 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -778,8 +778,7 @@ retry:
struct page **pages = NULL;
mempool_t *pool = NULL; /* Becomes non-null if mempool used */
struct page *page;
- int want;
- u64 offset, len;
+ u64 offset = 0, len = 0;
long writeback_stat;
next = 0;
@@ -788,14 +787,9 @@ retry:
get_more_pages:
first = -1;
- want = min(end - index,
- min((pgoff_t)PAGEVEC_SIZE,
- max_pages - (pgoff_t)locked_pages) - 1)
- + 1;
- pvec_pages = pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_DIRTY,
- want);
- dout("pagevec_lookup_tag got %d\n", pvec_pages);
+ pvec_pages = pagevec_lookup_range_tag(&pvec, mapping, &index,
+ end, PAGECACHE_TAG_DIRTY);
+ dout("pagevec_lookup_range_tag got %d\n", pvec_pages);
if (!pvec_pages && !locked_pages)
break;
for (i = 0; i < pvec_pages && locked_pages < max_pages; i++) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 5bc617cb7721..62cc0c22db63 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3452,13 +3452,13 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
* should have access to this page, we're safe to simply set
* PG_locked without checking it first.
*/
- __set_page_locked(page);
+ __SetPageLocked(page);
rc = add_to_page_cache_locked(page, mapping,
page->index, gfp);
/* give up if we can't stick it in the cache */
if (rc) {
- __clear_page_locked(page);
+ __ClearPageLocked(page);
return rc;
}
@@ -3479,9 +3479,9 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
if (*bytes + PAGE_CACHE_SIZE > rsize)
break;
- __set_page_locked(page);
+ __SetPageLocked(page);
if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
- __clear_page_locked(page);
+ __ClearPageLocked(page);
break;
}
list_move_tail(&page->lru, tmplist);
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 5af973621c73..2acdc6ddefb5 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -871,6 +871,9 @@ COMPATIBLE_IOCTL(TIOCGPTN)
COMPATIBLE_IOCTL(TIOCSPTLCK)
COMPATIBLE_IOCTL(TIOCSERGETLSR)
COMPATIBLE_IOCTL(TIOCSIG)
+COMPATIBLE_IOCTL(TIOCPMGET)
+COMPATIBLE_IOCTL(TIOCPMPUT)
+COMPATIBLE_IOCTL(TIOCPMACT)
#ifdef TIOCSRS485
COMPATIBLE_IOCTL(TIOCSRS485)
#endif
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 0758d32ad01b..0f46cf550907 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -162,12 +162,8 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
}
req = skcipher_request_alloc(tfm, gfp_flags);
- if (!req) {
- printk_ratelimited(KERN_ERR
- "%s: crypto_request_alloc() failed\n",
- __func__);
+ if (!req)
return -ENOMEM;
- }
skcipher_request_set_callback(
req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
@@ -184,9 +180,10 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
skcipher_request_free(req);
if (res) {
- printk_ratelimited(KERN_ERR
- "%s: crypto_skcipher_encrypt() returned %d\n",
- __func__, res);
+ fscrypt_err(inode->i_sb,
+ "%scryption failed for inode %lu, block %llu: %d",
+ (rw == FS_DECRYPT ? "de" : "en"),
+ inode->i_ino, lblk_num, res);
return res;
}
return 0;
@@ -332,7 +329,6 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
return 0;
}
- /* this should eventually be an flag in d_flags */
spin_lock(&dentry->d_lock);
cached_with_key = dentry->d_flags & DCACHE_ENCRYPTED_WITH_KEY;
spin_unlock(&dentry->d_lock);
@@ -359,7 +355,6 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
const struct dentry_operations fscrypt_d_ops = {
.d_revalidate = fscrypt_d_revalidate,
};
-EXPORT_SYMBOL(fscrypt_d_ops);
void fscrypt_restore_control_page(struct page *page)
{
@@ -428,13 +423,43 @@ fail:
return res;
}
+void fscrypt_msg(struct super_block *sb, const char *level,
+ const char *fmt, ...)
+{
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+ struct va_format vaf;
+ va_list args;
+
+ if (!__ratelimit(&rs))
+ return;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ if (sb)
+ printk("%sfscrypt (%s): %pV\n", level, sb->s_id, &vaf);
+ else
+ printk("%sfscrypt: %pV\n", level, &vaf);
+ va_end(args);
+}
+
/**
* fscrypt_init() - Set up for fs encryption.
*/
static int __init fscrypt_init(void)
{
+ /*
+ * Use an unbound workqueue to allow bios to be decrypted in parallel
+ * even when they happen to complete on the same CPU. This sacrifices
+ * locality, but it's worthwhile since decryption is CPU-intensive.
+ *
+ * Also use a high-priority workqueue to prioritize decryption work,
+ * which blocks reads from completing, over regular application tasks.
+ */
fscrypt_read_workqueue = alloc_workqueue("fscrypt_read_queue",
- WQ_HIGHPRI, 0);
+ WQ_UNBOUND | WQ_HIGHPRI,
+ num_online_cpus());
if (!fscrypt_read_workqueue)
goto fail;
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index b18fa323d1d9..1bdb9f226eec 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -58,11 +58,8 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname,
/* Set up the encryption request */
req = skcipher_request_alloc(tfm, GFP_NOFS);
- if (!req) {
- printk_ratelimited(KERN_ERR
- "%s: skcipher_request_alloc() failed\n", __func__);
+ if (!req)
return -ENOMEM;
- }
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
crypto_req_done, &wait);
@@ -73,8 +70,9 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname,
res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
skcipher_request_free(req);
if (res < 0) {
- printk_ratelimited(KERN_ERR
- "%s: Error (error code %d)\n", __func__, res);
+ fscrypt_err(inode->i_sb,
+ "Filename encryption failed for inode %lu: %d",
+ inode->i_ino, res);
return res;
}
@@ -95,23 +93,14 @@ static int fname_decrypt(struct inode *inode,
struct skcipher_request *req = NULL;
DECLARE_CRYPTO_WAIT(wait);
struct scatterlist src_sg, dst_sg;
- struct fscrypt_info *ci = inode->i_crypt_info;
- struct crypto_skcipher *tfm = ci->ci_ctfm;
+ struct crypto_skcipher *tfm = inode->i_crypt_info->ci_ctfm;
int res = 0;
char iv[FS_CRYPTO_BLOCK_SIZE];
- unsigned lim;
-
- lim = inode->i_sb->s_cop->max_namelen(inode);
- if (iname->len <= 0 || iname->len > lim)
- return -EIO;
/* Allocate request */
req = skcipher_request_alloc(tfm, GFP_NOFS);
- if (!req) {
- printk_ratelimited(KERN_ERR
- "%s: crypto_request_alloc() failed\n", __func__);
+ if (!req)
return -ENOMEM;
- }
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
crypto_req_done, &wait);
@@ -126,8 +115,9 @@ static int fname_decrypt(struct inode *inode,
res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait);
skcipher_request_free(req);
if (res < 0) {
- printk_ratelimited(KERN_ERR
- "%s: Error (error code %d)\n", __func__, res);
+ fscrypt_err(inode->i_sb,
+ "Filename decryption failed for inode %lu: %d",
+ inode->i_ino, res);
return res;
}
@@ -340,12 +330,12 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
return 0;
}
ret = fscrypt_get_encryption_info(dir);
- if (ret && ret != -EOPNOTSUPP)
+ if (ret)
return ret;
if (dir->i_crypt_info) {
if (!fscrypt_fname_encrypted_size(dir, iname->len,
- dir->i_sb->s_cop->max_namelen(dir),
+ dir->i_sb->s_cop->max_namelen,
&fname->crypto_buf.len))
return -ENAMETOOLONG;
fname->crypto_buf.name = kmalloc(fname->crypto_buf.len,
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index fe6f6524c1aa..92c6c0ace1b1 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -17,15 +17,7 @@
/* Encryption parameters */
#define FS_IV_SIZE 16
-#define FS_AES_128_ECB_KEY_SIZE 16
-#define FS_AES_128_CBC_KEY_SIZE 16
-#define FS_AES_128_CTS_KEY_SIZE 16
-#define FS_AES_256_GCM_KEY_SIZE 32
-#define FS_AES_256_CBC_KEY_SIZE 32
-#define FS_AES_256_CTS_KEY_SIZE 32
-#define FS_AES_256_XTS_KEY_SIZE 64
-
-#define FS_KEY_DERIVATION_NONCE_SIZE 16
+#define FS_KEY_DERIVATION_NONCE_SIZE 16
/**
* Encryption context for inode
@@ -101,10 +93,6 @@ static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS)
return true;
- if (contents_mode == FS_ENCRYPTION_MODE_SPECK128_256_XTS &&
- filenames_mode == FS_ENCRYPTION_MODE_SPECK128_256_CTS)
- return true;
-
return false;
}
@@ -119,6 +107,15 @@ extern int fscrypt_do_page_crypto(const struct inode *inode,
gfp_t gfp_flags);
extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
gfp_t gfp_flags);
+extern const struct dentry_operations fscrypt_d_ops;
+
+extern void __printf(3, 4) __cold
+fscrypt_msg(struct super_block *sb, const char *level, const char *fmt, ...);
+
+#define fscrypt_warn(sb, fmt, ...) \
+ fscrypt_msg(sb, KERN_WARNING, fmt, ##__VA_ARGS__)
+#define fscrypt_err(sb, fmt, ...) \
+ fscrypt_msg(sb, KERN_ERR, fmt, ##__VA_ARGS__)
/* fname.c */
extern int fname_encrypt(struct inode *inode, const struct qstr *iname,
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index bc010e4609ef..b5328a0c6364 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -39,8 +39,9 @@ int fscrypt_file_open(struct inode *inode, struct file *filp)
dir = dget_parent(file_dentry(filp));
if (IS_ENCRYPTED(d_inode(dir)) &&
!fscrypt_has_permitted_context(d_inode(dir), inode)) {
- pr_warn_ratelimited("fscrypt: inconsistent encryption contexts: %lu/%lu",
- d_inode(dir)->i_ino, inode->i_ino);
+ fscrypt_warn(inode->i_sb,
+ "inconsistent encryption contexts: %lu/%lu",
+ d_inode(dir)->i_ino, inode->i_ino);
err = -EPERM;
}
dput(dir);
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 472f69188a96..3cbe0eff8767 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -18,17 +18,16 @@
static struct crypto_shash *essiv_hash_tfm;
-/**
- * derive_key_aes() - Derive a key using AES-128-ECB
- * @deriving_key: Encryption key used for derivation.
- * @source_key: Source key to which to apply derivation.
- * @derived_raw_key: Derived raw key.
+/*
+ * Key derivation function. This generates the derived key by encrypting the
+ * master key with AES-128-ECB using the inode's nonce as the AES key.
*
- * Return: Zero on success; non-zero otherwise.
+ * The master key must be at least as long as the derived key. If the master
+ * key is longer, then only the first 'derived_keysize' bytes are used.
*/
-static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE],
- const struct fscrypt_key *source_key,
- u8 derived_raw_key[FS_MAX_KEY_SIZE])
+static int derive_key_aes(const u8 *master_key,
+ const struct fscrypt_context *ctx,
+ u8 *derived_key, unsigned int derived_keysize)
{
int res = 0;
struct skcipher_request *req = NULL;
@@ -50,14 +49,13 @@ static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE],
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
crypto_req_done, &wait);
- res = crypto_skcipher_setkey(tfm, deriving_key,
- FS_AES_128_ECB_KEY_SIZE);
+ res = crypto_skcipher_setkey(tfm, ctx->nonce, sizeof(ctx->nonce));
if (res < 0)
goto out;
- sg_init_one(&src_sg, source_key->raw, source_key->size);
- sg_init_one(&dst_sg, derived_raw_key, source_key->size);
- skcipher_request_set_crypt(req, &src_sg, &dst_sg, source_key->size,
+ sg_init_one(&src_sg, master_key, derived_keysize);
+ sg_init_one(&dst_sg, derived_key, derived_keysize);
+ skcipher_request_set_crypt(req, &src_sg, &dst_sg, derived_keysize,
NULL);
res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
out:
@@ -66,103 +64,137 @@ out:
return res;
}
-static int validate_user_key(struct fscrypt_info *crypt_info,
- struct fscrypt_context *ctx, u8 *raw_key,
- const char *prefix, int min_keysize)
+/*
+ * Search the current task's subscribed keyrings for a "logon" key with
+ * description prefix:descriptor, and if found acquire a read lock on it and
+ * return a pointer to its validated payload in *payload_ret.
+ */
+static struct key *
+find_and_lock_process_key(const char *prefix,
+ const u8 descriptor[FS_KEY_DESCRIPTOR_SIZE],
+ unsigned int min_keysize,
+ const struct fscrypt_key **payload_ret)
{
char *description;
- struct key *keyring_key;
- struct fscrypt_key *master_key;
+ struct key *key;
const struct user_key_payload *ukp;
- int res;
+ const struct fscrypt_key *payload;
description = kasprintf(GFP_NOFS, "%s%*phN", prefix,
- FS_KEY_DESCRIPTOR_SIZE,
- ctx->master_key_descriptor);
+ FS_KEY_DESCRIPTOR_SIZE, descriptor);
if (!description)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
- keyring_key = request_key(&key_type_logon, description, NULL);
+ key = request_key(&key_type_logon, description, NULL);
kfree(description);
- if (IS_ERR(keyring_key))
- return PTR_ERR(keyring_key);
- down_read(&keyring_key->sem);
-
- if (keyring_key->type != &key_type_logon) {
- printk_once(KERN_WARNING
- "%s: key type must be logon\n", __func__);
- res = -ENOKEY;
- goto out;
- }
- ukp = user_key_payload(keyring_key);
- if (!ukp) {
- /* key was revoked before we acquired its semaphore */
- res = -EKEYREVOKED;
- goto out;
+ if (IS_ERR(key))
+ return key;
+
+ down_read(&key->sem);
+ ukp = user_key_payload(key);
+
+ if (!ukp) /* was the key revoked before we acquired its semaphore? */
+ goto invalid;
+
+ payload = (const struct fscrypt_key *)ukp->data;
+
+ if (ukp->datalen != sizeof(struct fscrypt_key) ||
+ payload->size < 1 || payload->size > FS_MAX_KEY_SIZE) {
+ fscrypt_warn(NULL,
+ "key with description '%s' has invalid payload",
+ key->description);
+ goto invalid;
}
- if (ukp->datalen != sizeof(struct fscrypt_key)) {
- res = -EINVAL;
- goto out;
+
+ if (payload->size < min_keysize) {
+ fscrypt_warn(NULL,
+ "key with description '%s' is too short (got %u bytes, need %u+ bytes)",
+ key->description, payload->size, min_keysize);
+ goto invalid;
}
- master_key = (struct fscrypt_key *)ukp->data;
- BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE);
-
- if (master_key->size < min_keysize || master_key->size > FS_MAX_KEY_SIZE
- || master_key->size % AES_BLOCK_SIZE != 0) {
- printk_once(KERN_WARNING
- "%s: key size incorrect: %d\n",
- __func__, master_key->size);
- res = -ENOKEY;
- goto out;
+
+ *payload_ret = payload;
+ return key;
+
+invalid:
+ up_read(&key->sem);
+ key_put(key);
+ return ERR_PTR(-ENOKEY);
+}
+
+/* Find the master key, then derive the inode's actual encryption key */
+static int find_and_derive_key(const struct inode *inode,
+ const struct fscrypt_context *ctx,
+ u8 *derived_key, unsigned int derived_keysize)
+{
+ struct key *key;
+ const struct fscrypt_key *payload;
+ int err;
+
+ key = find_and_lock_process_key(FS_KEY_DESC_PREFIX,
+ ctx->master_key_descriptor,
+ derived_keysize, &payload);
+ if (key == ERR_PTR(-ENOKEY) && inode->i_sb->s_cop->key_prefix) {
+ key = find_and_lock_process_key(inode->i_sb->s_cop->key_prefix,
+ ctx->master_key_descriptor,
+ derived_keysize, &payload);
}
- res = derive_key_aes(ctx->nonce, master_key, raw_key);
-out:
- up_read(&keyring_key->sem);
- key_put(keyring_key);
- return res;
+ if (IS_ERR(key))
+ return PTR_ERR(key);
+ err = derive_key_aes(payload->raw, ctx, derived_key, derived_keysize);
+ up_read(&key->sem);
+ key_put(key);
+ return err;
}
-static const struct {
+static struct fscrypt_mode {
+ const char *friendly_name;
const char *cipher_str;
int keysize;
+ bool logged_impl_name;
} available_modes[] = {
- [FS_ENCRYPTION_MODE_AES_256_XTS] = { "xts(aes)",
- FS_AES_256_XTS_KEY_SIZE },
- [FS_ENCRYPTION_MODE_AES_256_CTS] = { "cts(cbc(aes))",
- FS_AES_256_CTS_KEY_SIZE },
- [FS_ENCRYPTION_MODE_AES_128_CBC] = { "cbc(aes)",
- FS_AES_128_CBC_KEY_SIZE },
- [FS_ENCRYPTION_MODE_AES_128_CTS] = { "cts(cbc(aes))",
- FS_AES_128_CTS_KEY_SIZE },
- [FS_ENCRYPTION_MODE_SPECK128_256_XTS] = { "xts(speck128)", 64 },
- [FS_ENCRYPTION_MODE_SPECK128_256_CTS] = { "cts(cbc(speck128))", 32 },
+ [FS_ENCRYPTION_MODE_AES_256_XTS] = {
+ .friendly_name = "AES-256-XTS",
+ .cipher_str = "xts(aes)",
+ .keysize = 64,
+ },
+ [FS_ENCRYPTION_MODE_AES_256_CTS] = {
+ .friendly_name = "AES-256-CTS-CBC",
+ .cipher_str = "cts(cbc(aes))",
+ .keysize = 32,
+ },
+ [FS_ENCRYPTION_MODE_AES_128_CBC] = {
+ .friendly_name = "AES-128-CBC",
+ .cipher_str = "cbc(aes)",
+ .keysize = 16,
+ },
+ [FS_ENCRYPTION_MODE_AES_128_CTS] = {
+ .friendly_name = "AES-128-CTS-CBC",
+ .cipher_str = "cts(cbc(aes))",
+ .keysize = 16,
+ },
};
-static int determine_cipher_type(struct fscrypt_info *ci, struct inode *inode,
- const char **cipher_str_ret, int *keysize_ret)
+static struct fscrypt_mode *
+select_encryption_mode(const struct fscrypt_info *ci, const struct inode *inode)
{
- u32 mode;
-
if (!fscrypt_valid_enc_modes(ci->ci_data_mode, ci->ci_filename_mode)) {
- pr_warn_ratelimited("fscrypt: inode %lu uses unsupported encryption modes (contents mode %d, filenames mode %d)\n",
- inode->i_ino,
- ci->ci_data_mode, ci->ci_filename_mode);
- return -EINVAL;
+ fscrypt_warn(inode->i_sb,
+ "inode %lu uses unsupported encryption modes (contents mode %d, filenames mode %d)",
+ inode->i_ino, ci->ci_data_mode,
+ ci->ci_filename_mode);
+ return ERR_PTR(-EINVAL);
}
- if (S_ISREG(inode->i_mode)) {
- mode = ci->ci_data_mode;
- } else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) {
- mode = ci->ci_filename_mode;
- } else {
- WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n",
- inode->i_ino, (inode->i_mode & S_IFMT));
- return -EINVAL;
- }
+ if (S_ISREG(inode->i_mode))
+ return &available_modes[ci->ci_data_mode];
+
+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+ return &available_modes[ci->ci_filename_mode];
- *cipher_str_ret = available_modes[mode].cipher_str;
- *keysize_ret = available_modes[mode].keysize;
- return 0;
+ WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n",
+ inode->i_ino, (inode->i_mode & S_IFMT));
+ return ERR_PTR(-EINVAL);
}
static void put_crypt_info(struct fscrypt_info *ci)
@@ -185,8 +217,9 @@ static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt)
tfm = crypto_alloc_shash("sha256", 0, 0);
if (IS_ERR(tfm)) {
- pr_warn_ratelimited("fscrypt: error allocating SHA-256 transform: %ld\n",
- PTR_ERR(tfm));
+ fscrypt_warn(NULL,
+ "error allocating SHA-256 transform: %ld",
+ PTR_ERR(tfm));
return PTR_ERR(tfm);
}
prev_tfm = cmpxchg(&essiv_hash_tfm, NULL, tfm);
@@ -246,8 +279,7 @@ int fscrypt_get_encryption_info(struct inode *inode)
struct fscrypt_info *crypt_info;
struct fscrypt_context ctx;
struct crypto_skcipher *ctfm;
- const char *cipher_str;
- int keysize;
+ struct fscrypt_mode *mode;
u8 *raw_key = NULL;
int res;
@@ -291,57 +323,59 @@ int fscrypt_get_encryption_info(struct inode *inode)
memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
sizeof(crypt_info->ci_master_key));
- res = determine_cipher_type(crypt_info, inode, &cipher_str, &keysize);
- if (res)
+ mode = select_encryption_mode(crypt_info, inode);
+ if (IS_ERR(mode)) {
+ res = PTR_ERR(mode);
goto out;
+ }
/*
* This cannot be a stack buffer because it is passed to the scatterlist
* crypto API as part of key derivation.
*/
res = -ENOMEM;
- raw_key = kmalloc(FS_MAX_KEY_SIZE, GFP_NOFS);
+ raw_key = kmalloc(mode->keysize, GFP_NOFS);
if (!raw_key)
goto out;
- res = validate_user_key(crypt_info, &ctx, raw_key, FS_KEY_DESC_PREFIX,
- keysize);
- if (res && inode->i_sb->s_cop->key_prefix) {
- int res2 = validate_user_key(crypt_info, &ctx, raw_key,
- inode->i_sb->s_cop->key_prefix,
- keysize);
- if (res2) {
- if (res2 == -ENOKEY)
- res = -ENOKEY;
- goto out;
- }
- } else if (res) {
+ res = find_and_derive_key(inode, &ctx, raw_key, mode->keysize);
+ if (res)
goto out;
- }
- ctfm = crypto_alloc_skcipher(cipher_str, 0, 0);
- if (!ctfm || IS_ERR(ctfm)) {
- res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
- pr_debug("%s: error %d (inode %lu) allocating crypto tfm\n",
- __func__, res, inode->i_ino);
+
+ ctfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0);
+ if (IS_ERR(ctfm)) {
+ res = PTR_ERR(ctfm);
+ fscrypt_warn(inode->i_sb,
+ "error allocating '%s' transform for inode %lu: %d",
+ mode->cipher_str, inode->i_ino, res);
goto out;
}
+ if (unlikely(!mode->logged_impl_name)) {
+ /*
+ * fscrypt performance can vary greatly depending on which
+ * crypto algorithm implementation is used. Help people debug
+ * performance problems by logging the ->cra_driver_name the
+ * first time a mode is used. Note that multiple threads can
+ * race here, but it doesn't really matter.
+ */
+ mode->logged_impl_name = true;
+ pr_info("fscrypt: %s using implementation \"%s\"\n",
+ mode->friendly_name,
+ crypto_skcipher_alg(ctfm)->base.cra_driver_name);
+ }
crypt_info->ci_ctfm = ctfm;
- crypto_skcipher_clear_flags(ctfm, ~0);
crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_WEAK_KEY);
- /*
- * if the provided key is longer than keysize, we use the first
- * keysize bytes of the derived key only
- */
- res = crypto_skcipher_setkey(ctfm, raw_key, keysize);
+ res = crypto_skcipher_setkey(ctfm, raw_key, mode->keysize);
if (res)
goto out;
if (S_ISREG(inode->i_mode) &&
crypt_info->ci_data_mode == FS_ENCRYPTION_MODE_AES_128_CBC) {
- res = init_essiv_generator(crypt_info, raw_key, keysize);
+ res = init_essiv_generator(crypt_info, raw_key, mode->keysize);
if (res) {
- pr_debug("%s: error %d (inode %lu) allocating essiv tfm\n",
- __func__, res, inode->i_ino);
+ fscrypt_warn(inode->i_sb,
+ "error initializing ESSIV generator for inode %lu: %d",
+ inode->i_ino, res);
goto out;
}
}
diff --git a/fs/dcache.c b/fs/dcache.c
index 9aa4a81dc9b3..d065854deb23 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1394,7 +1394,7 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
goto out;
if (dentry->d_flags & DCACHE_SHRINK_LIST) {
- data->found++;
+ goto out;
} else {
if (dentry->d_flags & DCACHE_LRU_LIST)
d_lru_del(dentry);
@@ -1892,28 +1892,6 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
EXPORT_SYMBOL(d_instantiate_unique);
-/*
- * This should be equivalent to d_instantiate() + unlock_new_inode(),
- * with lockdep-related part of unlock_new_inode() done before
- * anything else. Use that instead of open-coding d_instantiate()/
- * unlock_new_inode() combinations.
- */
-void d_instantiate_new(struct dentry *entry, struct inode *inode)
-{
- BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
- BUG_ON(!inode);
- lockdep_annotate_inode_mutex_key(inode);
- spin_lock(&inode->i_lock);
- __d_instantiate(entry, inode);
- WARN_ON(!(inode->i_state & I_NEW));
- inode->i_state &= ~I_NEW;
- smp_mb();
- wake_up_bit(&inode->i_state, __I_NEW);
- spin_unlock(&inode->i_lock);
- security_d_instantiate(entry, inode);
-}
-EXPORT_SYMBOL(d_instantiate_new);
-
/**
* d_instantiate_no_diralias - instantiate a non-aliased dentry
* @entry: dentry to complete
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 49c06f3cd952..5afb6e260c84 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -399,6 +399,7 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
if (dio->is_async && dio->rw == READ && dio->should_dirty)
bio_set_pages_dirty(bio);
+ bio->bi_dio_inode = dio->inode;
dio->bio_bdev = bio->bi_bdev;
if (sdio->submit_io) {
@@ -413,6 +414,19 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
sdio->logical_offset_in_bio = 0;
}
+struct inode *dio_bio_get_inode(struct bio *bio)
+{
+ struct inode *inode = NULL;
+
+ if (bio == NULL)
+ return NULL;
+
+ inode = bio->bi_dio_inode;
+
+ return inode;
+}
+EXPORT_SYMBOL(dio_bio_get_inode);
+
/*
* Release any resources in case of a failure
*/
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 280460fef066..44de6b82b16f 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -13,7 +13,7 @@
/* A global variable is a bit ugly, but it keeps the code simple */
int sysctl_drop_caches;
-static void drop_pagecache_sb(struct super_block *sb, void *unused)
+void drop_pagecache_sb(struct super_block *sb, void *unused)
{
struct inode *inode, *toput_inode = NULL;
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index 49678a69947d..c29cdd20d08a 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -4,7 +4,7 @@
obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
-ecryptfs-y := dentry.o file.o inode.o main.o super.o mmap.o read_write.o \
+ecryptfs-y := dentry.o file.o inode.o main.o super.o mmap.o read_write.o events.o \
crypto.o keystore.o kthread.o debug.o
ecryptfs-$(CONFIG_ECRYPT_FS_MESSAGING) += messaging.o miscdev.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index f246f1760ba2..27d1db57f578 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -35,6 +35,7 @@
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <asm/unaligned.h>
+#include <linux/ecryptfs.h>
#include "ecryptfs_kernel.h"
#define DECRYPT 0
@@ -350,9 +351,9 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
|| !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED));
if (unlikely(ecryptfs_verbosity > 0)) {
ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n",
- crypt_stat->key_size);
+ ecryptfs_get_key_size_to_enc_data(crypt_stat));
ecryptfs_dump_hex(crypt_stat->key,
- crypt_stat->key_size);
+ ecryptfs_get_key_size_to_enc_data(crypt_stat));
}
init_completion(&ecr.completion);
@@ -371,7 +372,7 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
/* Consider doing this once, when the file is opened */
if (!(crypt_stat->flags & ECRYPTFS_KEY_SET)) {
rc = crypto_ablkcipher_setkey(crypt_stat->tfm, crypt_stat->key,
- crypt_stat->key_size);
+ ecryptfs_get_key_size_to_enc_data(crypt_stat));
if (rc) {
ecryptfs_printk(KERN_ERR,
"Error setting key; rc = [%d]\n",
@@ -466,6 +467,30 @@ out:
return rc;
}
+static void init_ecryption_parameters(bool *hw_crypt, bool *cipher_supported,
+ struct ecryptfs_crypt_stat *crypt_stat)
+{
+ if (!hw_crypt || !cipher_supported)
+ return;
+
+ *cipher_supported = false;
+ *hw_crypt = false;
+
+ if (get_events() && get_events()->is_cipher_supported_cb) {
+ *cipher_supported =
+ get_events()->is_cipher_supported_cb(crypt_stat);
+ if (*cipher_supported) {
+
+ /**
+ * we should apply external algorythm
+ * assume that is_hw_crypt() cbck is supplied
+ */
+ if (get_events()->is_hw_crypt_cb)
+ *hw_crypt = get_events()->is_hw_crypt_cb();
+ }
+ }
+}
+
/**
* ecryptfs_encrypt_page
* @page: Page mapped from the eCryptfs inode for the file; contains
@@ -491,11 +516,18 @@ int ecryptfs_encrypt_page(struct page *page)
loff_t extent_offset;
loff_t lower_offset;
int rc = 0;
+ bool is_hw_crypt;
+ bool is_cipher_supported;
+
ecryptfs_inode = page->mapping->host;
crypt_stat =
&(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat);
BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED));
+
+ init_ecryption_parameters(&is_hw_crypt,
+ &is_cipher_supported, crypt_stat);
+
enc_extent_page = alloc_page(GFP_USER);
if (!enc_extent_page) {
rc = -ENOMEM;
@@ -503,24 +535,51 @@ int ecryptfs_encrypt_page(struct page *page)
"encrypted extent\n");
goto out;
}
-
- for (extent_offset = 0;
- extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
- extent_offset++) {
- rc = crypt_extent(crypt_stat, enc_extent_page, page,
- extent_offset, ENCRYPT);
- if (rc) {
- printk(KERN_ERR "%s: Error encrypting extent; "
- "rc = [%d]\n", __func__, rc);
- goto out;
+ if (is_hw_crypt) {
+ /* no need for encryption */
+ } else {
+ for (extent_offset = 0;
+ extent_offset <
+ (PAGE_CACHE_SIZE / crypt_stat->extent_size);
+ extent_offset++) {
+
+ if (is_cipher_supported) {
+ if (!get_events()->encrypt_cb) {
+ rc = -EPERM;
+ goto out;
+ }
+ rc = get_events()->encrypt_cb(page,
+ enc_extent_page,
+ ecryptfs_inode_to_lower(
+ ecryptfs_inode),
+ extent_offset);
+ } else {
+ rc = crypt_extent(crypt_stat,
+ enc_extent_page, page,
+ extent_offset, ENCRYPT);
+ }
+ if (rc) {
+ ecryptfs_printk(KERN_ERR,
+ "%s: Error encrypting; rc = [%d]\n",
+ __func__, rc);
+ goto out;
+ }
}
}
lower_offset = lower_offset_for_page(crypt_stat, page);
- enc_extent_virt = kmap(enc_extent_page);
+ if (is_hw_crypt)
+ enc_extent_virt = kmap(page);
+ else
+ enc_extent_virt = kmap(enc_extent_page);
+
rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, lower_offset,
PAGE_CACHE_SIZE);
- kunmap(enc_extent_page);
+ if (!is_hw_crypt)
+ kunmap(enc_extent_page);
+ else
+ kunmap(page);
+
if (rc < 0) {
ecryptfs_printk(KERN_ERR,
"Error attempting to write lower page; rc = [%d]\n",
@@ -559,6 +618,8 @@ int ecryptfs_decrypt_page(struct page *page)
unsigned long extent_offset;
loff_t lower_offset;
int rc = 0;
+ bool is_cipher_supported;
+ bool is_hw_crypt;
ecryptfs_inode = page->mapping->host;
crypt_stat =
@@ -577,13 +638,33 @@ int ecryptfs_decrypt_page(struct page *page)
goto out;
}
+ init_ecryption_parameters(&is_hw_crypt,
+ &is_cipher_supported, crypt_stat);
+
+ if (is_hw_crypt) {
+ rc = 0;
+ return rc;
+ }
+
for (extent_offset = 0;
extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
extent_offset++) {
- rc = crypt_extent(crypt_stat, page, page,
+ if (is_cipher_supported) {
+ if (!get_events()->decrypt_cb) {
+ rc = -EPERM;
+ goto out;
+ }
+
+ rc = get_events()->decrypt_cb(page, page,
+ ecryptfs_inode_to_lower(ecryptfs_inode),
+ extent_offset);
+
+ } else
+ rc = crypt_extent(crypt_stat, page, page,
extent_offset, DECRYPT);
+
if (rc) {
- printk(KERN_ERR "%s: Error encrypting extent; "
+ ecryptfs_printk(KERN_ERR, "%s: Error decrypting extent;"
"rc = [%d]\n", __func__, rc);
goto out;
}
@@ -612,7 +693,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
"Initializing cipher [%s]; strlen = [%d]; "
"key_size_bits = [%zd]\n",
crypt_stat->cipher, (int)strlen(crypt_stat->cipher),
- crypt_stat->key_size << 3);
+ ecryptfs_get_key_size_to_enc_data(crypt_stat) << 3);
mutex_lock(&crypt_stat->cs_tfm_mutex);
if (crypt_stat->tfm) {
rc = 0;
@@ -694,7 +775,7 @@ int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat)
goto out;
}
rc = ecryptfs_calculate_md5(dst, crypt_stat, crypt_stat->key,
- crypt_stat->key_size);
+ ecryptfs_get_key_size_to_enc_data(crypt_stat));
if (rc) {
ecryptfs_printk(KERN_WARNING, "Error attempting to compute "
"MD5 while generating root IV\n");
@@ -721,6 +802,31 @@ static void ecryptfs_generate_new_key(struct ecryptfs_crypt_stat *crypt_stat)
}
}
+static int ecryptfs_generate_new_salt(struct ecryptfs_crypt_stat *crypt_stat)
+{
+ size_t salt_size = 0;
+
+ salt_size = ecryptfs_get_salt_size_for_cipher(crypt_stat);
+
+ if (0 == salt_size)
+ return 0;
+
+ if (!ecryptfs_check_space_for_salt(crypt_stat->key_size, salt_size)) {
+ ecryptfs_printk(KERN_WARNING, "not enough space for salt\n");
+ crypt_stat->flags |= ECRYPTFS_SECURITY_WARNING;
+ return -EINVAL;
+ }
+
+ get_random_bytes(crypt_stat->key + crypt_stat->key_size, salt_size);
+ if (unlikely(ecryptfs_verbosity > 0)) {
+ ecryptfs_printk(KERN_DEBUG, "Generated new session salt:\n");
+ ecryptfs_dump_hex(crypt_stat->key + crypt_stat->key_size,
+ salt_size);
+ }
+
+ return 0;
+}
+
/**
* ecryptfs_copy_mount_wide_flags_to_inode_flags
* @crypt_stat: The inode's cryptographic context
@@ -823,7 +929,6 @@ int ecryptfs_new_file_context(struct inode *ecryptfs_inode)
struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
&ecryptfs_superblock_to_private(
ecryptfs_inode->i_sb)->mount_crypt_stat;
- int cipher_name_len;
int rc = 0;
ecryptfs_set_default_crypt_stat_vals(crypt_stat, mount_crypt_stat);
@@ -837,15 +942,19 @@ int ecryptfs_new_file_context(struct inode *ecryptfs_inode)
"to the inode key sigs; rc = [%d]\n", rc);
goto out;
}
- cipher_name_len =
- strlen(mount_crypt_stat->global_default_cipher_name);
- memcpy(crypt_stat->cipher,
+ strlcpy(crypt_stat->cipher,
mount_crypt_stat->global_default_cipher_name,
- cipher_name_len);
- crypt_stat->cipher[cipher_name_len] = '\0';
+ sizeof(crypt_stat->cipher));
+
+ strlcpy(crypt_stat->cipher_mode,
+ mount_crypt_stat->global_default_cipher_mode,
+ sizeof(crypt_stat->cipher_mode));
+
crypt_stat->key_size =
mount_crypt_stat->global_default_cipher_key_size;
ecryptfs_generate_new_key(crypt_stat);
+ ecryptfs_generate_new_salt(crypt_stat);
+
rc = ecryptfs_init_crypt_ctx(crypt_stat);
if (rc)
ecryptfs_printk(KERN_ERR, "Error initializing cryptographic "
@@ -971,7 +1080,8 @@ ecryptfs_cipher_code_str_map[] = {
{"twofish", RFC2440_CIPHER_TWOFISH},
{"cast6", RFC2440_CIPHER_CAST_6},
{"aes", RFC2440_CIPHER_AES_192},
- {"aes", RFC2440_CIPHER_AES_256}
+ {"aes", RFC2440_CIPHER_AES_256},
+ {"aes_xts", RFC2440_CIPHER_AES_XTS_256}
};
/**
@@ -999,6 +1109,11 @@ u8 ecryptfs_code_for_cipher_string(char *cipher_name, size_t key_bytes)
case 32:
code = RFC2440_CIPHER_AES_256;
}
+ } else if (strcmp(cipher_name, "aes_xts") == 0) {
+ switch (key_bytes) {
+ case 32:
+ code = RFC2440_CIPHER_AES_XTS_256;
+ }
} else {
for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++)
if (strcmp(cipher_name, map[i].cipher_str) == 0) {
@@ -1038,9 +1153,24 @@ int ecryptfs_read_and_validate_header_region(struct inode *inode)
u8 file_size[ECRYPTFS_SIZE_AND_MARKER_BYTES];
u8 *marker = file_size + ECRYPTFS_FILE_SIZE_BYTES;
int rc;
+ unsigned int ra_pages_org;
+ struct file *lower_file = NULL;
+
+ if (!inode)
+ return -EIO;
+ lower_file = ecryptfs_inode_to_private(inode)->lower_file;
+ if (!lower_file)
+ return -EIO;
+
+ /*disable read a head mechanism for a while */
+ ra_pages_org = lower_file->f_ra.ra_pages;
+ lower_file->f_ra.ra_pages = 0;
rc = ecryptfs_read_lower(file_size, 0, ECRYPTFS_SIZE_AND_MARKER_BYTES,
inode);
+ lower_file->f_ra.ra_pages = ra_pages_org;
+ /* restore read a head mechanism */
+
if (rc < 0)
return rc;
else if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES)
@@ -1434,6 +1564,11 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
&ecryptfs_superblock_to_private(
ecryptfs_dentry->d_sb)->mount_crypt_stat;
+ unsigned int ra_pages_org;
+ struct file *lower_file =
+ ecryptfs_inode_to_private(ecryptfs_inode)->lower_file;
+ if (!lower_file)
+ return -EIO;
ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat,
mount_crypt_stat);
@@ -1445,8 +1580,14 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
__func__);
goto out;
}
+ /*disable read a head mechanism */
+ ra_pages_org = lower_file->f_ra.ra_pages;
+ lower_file->f_ra.ra_pages = 0;
+
rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size,
ecryptfs_inode);
+ lower_file->f_ra.ra_pages = ra_pages_org; /* restore it back */
+
if (rc >= 0)
rc = ecryptfs_read_headers_virt(page_virt, crypt_stat,
ecryptfs_dentry,
diff --git a/fs/ecryptfs/debug.c b/fs/ecryptfs/debug.c
index 3d2bdf546ec6..ea2c4a97fddc 100644
--- a/fs/ecryptfs/debug.c
+++ b/fs/ecryptfs/debug.c
@@ -119,3 +119,32 @@ void ecryptfs_dump_hex(char *data, int bytes)
printk("\n");
}
+void ecryptfs_dump_salt_hex(char *data, int key_size,
+ const struct ecryptfs_crypt_stat *crypt_stat)
+{
+ size_t salt_size = ecryptfs_get_salt_size_for_cipher(crypt_stat);
+
+ if (0 == salt_size)
+ return;
+
+ if (!ecryptfs_check_space_for_salt(key_size, salt_size))
+ return;
+
+ ecryptfs_printk(KERN_DEBUG, "Decrypted session salt key:\n");
+ ecryptfs_dump_hex(data + key_size, salt_size);
+}
+
+void ecryptfs_dump_cipher(struct ecryptfs_crypt_stat *stat)
+{
+ if (!stat)
+ return;
+
+ if (stat->cipher != NULL)
+ ecryptfs_printk(KERN_DEBUG,
+ "ecryptfs cipher is %s\n", stat->cipher);
+
+ if (stat->cipher_mode != NULL)
+ ecryptfs_printk(KERN_DEBUG, "ecryptfs cipher mode is %s\n",
+ stat->cipher_mode);
+
+}
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index eae9cdb8af46..f5908e91eb17 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -254,6 +254,7 @@ struct ecryptfs_crypt_stat {
struct mutex cs_tfm_mutex;
struct mutex cs_hash_tfm_mutex;
struct mutex cs_mutex;
+ unsigned char cipher_mode[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
};
/* inode private data. */
@@ -354,6 +355,8 @@ struct ecryptfs_mount_crypt_stat {
unsigned char global_default_fn_cipher_name[
ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
char global_default_fnek_sig[ECRYPTFS_SIG_SIZE_HEX + 1];
+ unsigned char global_default_cipher_mode[ECRYPTFS_MAX_CIPHER_NAME_SIZE
+ + 1];
};
/* superblock private data. */
@@ -536,6 +539,53 @@ ecryptfs_dentry_to_lower_path(struct dentry *dentry)
return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path;
}
+/**
+ * Given a cipher and mode strings, the function
+ * concatenates them to create a new string of
+ * <cipher>_<mode> format.
+ */
+static inline unsigned char *ecryptfs_get_full_cipher(
+ unsigned char *cipher, unsigned char *mode,
+ unsigned char *final, size_t final_size)
+{
+ memset(final, 0, final_size);
+
+ if (strlen(mode) > 0) {
+ snprintf(final, final_size, "%s_%s", cipher, mode);
+ return final;
+ }
+
+ return cipher;
+}
+
+/**
+ * Given a <cipher>[_<mode>] formatted string, the function
+ * extracts cipher string and/or mode string.
+ * Note: the passed cipher and/or mode strings will be null-terminated.
+ */
+static inline void ecryptfs_parse_full_cipher(
+ char *s, char *cipher, char *mode)
+{
+ char input[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1+1];
+ /* +1 for '_'; +1 for '\0' */
+ char *p;
+ char *input_p = input;
+
+ if (s == NULL || cipher == NULL)
+ return;
+
+ memset(input, 0, sizeof(input));
+ strlcpy(input, s, sizeof(input));
+
+ p = strsep(&input_p, "_");
+ strlcpy(cipher, p, ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1);
+
+
+ /* check if mode is specified */
+ if (input_p != NULL && mode != NULL)
+ strlcpy(mode, input_p, ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1);
+}
+
#define ecryptfs_printk(type, fmt, arg...) \
__ecryptfs_printk(type "%s: " fmt, __func__, ## arg);
__printf(1, 2)
@@ -584,6 +634,10 @@ int ecryptfs_encrypt_and_encode_filename(
const char *name, size_t name_size);
struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry);
void ecryptfs_dump_hex(char *data, int bytes);
+void ecryptfs_dump_salt_hex(char *data, int key_size,
+ const struct ecryptfs_crypt_stat *crypt_stat);
+extern void ecryptfs_dump_cipher(struct ecryptfs_crypt_stat *stat);
+
int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
int sg_size);
int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat);
@@ -727,4 +781,33 @@ int ecryptfs_set_f_namelen(long *namelen, long lower_namelen,
int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
loff_t offset);
+void clean_inode_pages(struct address_space *mapping,
+ pgoff_t start, pgoff_t end);
+
+void ecryptfs_drop_pagecache_sb(struct super_block *sb, void *unused);
+
+void ecryptfs_free_events(void);
+
+void ecryptfs_freepage(struct page *page);
+
+struct ecryptfs_events *get_events(void);
+
+size_t ecryptfs_get_salt_size_for_cipher(
+ const struct ecryptfs_crypt_stat *crypt_stat);
+
+size_t ecryptfs_get_salt_size_for_cipher_mount(
+ const struct ecryptfs_mount_crypt_stat *mount_crypt_stat);
+
+size_t ecryptfs_get_key_size_to_enc_data(
+ const struct ecryptfs_crypt_stat *crypt_stat);
+
+size_t ecryptfs_get_key_size_to_store_key(
+ const struct ecryptfs_crypt_stat *crypt_stat);
+
+size_t ecryptfs_get_key_size_to_restore_key(size_t stored_key_size,
+ const struct ecryptfs_crypt_stat *crypt_stat);
+
+bool ecryptfs_check_space_for_salt(const size_t key_size,
+ const size_t salt_size);
+
#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/events.c b/fs/ecryptfs/events.c
new file mode 100644
index 000000000000..12e26c683cf6
--- /dev/null
+++ b/fs/ecryptfs/events.c
@@ -0,0 +1,393 @@
+/**
+ * eCryptfs: Linux filesystem encryption layer
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/string.h>
+#include <linux/ecryptfs.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/random.h>
+#include "ecryptfs_kernel.h"
+
+static DEFINE_MUTEX(events_mutex);
+struct ecryptfs_events *events_ptr = NULL;
+static int handle;
+
+void ecryptfs_free_events(void)
+{
+ mutex_lock(&events_mutex);
+ if (events_ptr != NULL) {
+ kfree(events_ptr);
+ events_ptr = NULL;
+ }
+
+ mutex_unlock(&events_mutex);
+}
+
+/**
+ * Register to ecryptfs events, by passing callback
+ * functions to be called upon events occurence.
+ * The function returns a handle to be passed
+ * to unregister function.
+ */
+int ecryptfs_register_to_events(const struct ecryptfs_events *ops)
+{
+ int ret_value = 0;
+
+ if (!ops)
+ return -EINVAL;
+
+ mutex_lock(&events_mutex);
+
+ if (events_ptr != NULL) {
+ ecryptfs_printk(KERN_ERR,
+ "already registered!\n");
+ ret_value = -EPERM;
+ goto out;
+ }
+ events_ptr =
+ kzalloc(sizeof(struct ecryptfs_events), GFP_KERNEL);
+
+ if (!events_ptr) {
+ ecryptfs_printk(KERN_ERR, "malloc failure\n");
+ ret_value = -ENOMEM;
+ goto out;
+ }
+ /* copy the callbacks */
+ events_ptr->open_cb = ops->open_cb;
+ events_ptr->release_cb = ops->release_cb;
+ events_ptr->encrypt_cb = ops->encrypt_cb;
+ events_ptr->decrypt_cb = ops->decrypt_cb;
+ events_ptr->is_cipher_supported_cb =
+ ops->is_cipher_supported_cb;
+ events_ptr->is_hw_crypt_cb = ops->is_hw_crypt_cb;
+ events_ptr->get_salt_key_size_cb = ops->get_salt_key_size_cb;
+
+ get_random_bytes(&handle, sizeof(handle));
+ ret_value = handle;
+
+out:
+ mutex_unlock(&events_mutex);
+ return ret_value;
+}
+
+/**
+ * Unregister from ecryptfs events.
+ */
+int ecryptfs_unregister_from_events(int user_handle)
+{
+ int ret_value = 0;
+
+ mutex_lock(&events_mutex);
+
+ if (!events_ptr) {
+ ret_value = -EINVAL;
+ goto out;
+ }
+ if (user_handle != handle) {
+ ret_value = ECRYPTFS_INVALID_EVENTS_HANDLE;
+ goto out;
+ }
+
+ kfree(events_ptr);
+ events_ptr = NULL;
+
+out:
+ mutex_unlock(&events_mutex);
+ return ret_value;
+}
+
+/**
+ * This function decides whether the passed file offset
+ * belongs to ecryptfs metadata or not.
+ * The caller must pass ecryptfs data, which was received in one
+ * of the callback invocations.
+ */
+bool ecryptfs_is_page_in_metadata(const void *data, pgoff_t offset)
+{
+
+ struct ecryptfs_crypt_stat *stat = NULL;
+ bool ret = true;
+
+ if (!data) {
+ ecryptfs_printk(KERN_ERR, "ecryptfs_is_page_in_metadata: invalid data parameter\n");
+ ret = false;
+ goto end;
+ }
+ stat = (struct ecryptfs_crypt_stat *)data;
+
+ if (stat->flags & ECRYPTFS_METADATA_IN_XATTR) {
+ ret = false;
+ goto end;
+ }
+
+ if (offset >= (stat->metadata_size/PAGE_CACHE_SIZE)) {
+ ret = false;
+ goto end;
+ }
+end:
+ return ret;
+}
+
+/**
+ * Given two ecryptfs data, the function
+ * decides whether they are equal.
+ */
+inline bool ecryptfs_is_data_equal(const void *data1, const void *data2)
+{
+ /* pointer comparison*/
+ return data1 == data2;
+}
+
+/**
+ * Given ecryptfs data, the function
+ * returns appropriate key size.
+ */
+size_t ecryptfs_get_key_size(const void *data)
+{
+
+ struct ecryptfs_crypt_stat *stat = NULL;
+
+ if (!data)
+ return 0;
+
+ stat = (struct ecryptfs_crypt_stat *)data;
+ return stat->key_size;
+}
+
+/**
+ * Given ecryptfs data, the function
+ * returns appropriate salt size.
+ *
+ * !!! crypt_stat cipher name and mode must be initialized
+ */
+size_t ecryptfs_get_salt_size(const void *data)
+{
+ if (!data) {
+ ecryptfs_printk(KERN_ERR,
+ "ecryptfs_get_salt_size: invalid data parameter\n");
+ return 0;
+ }
+
+ return ecryptfs_get_salt_size_for_cipher(data);
+
+}
+
+/**
+ * Given ecryptfs data and cipher string, the function
+ * returns true if provided cipher and the one in ecryptfs match.
+ */
+bool ecryptfs_cipher_match(const void *data,
+ const unsigned char *cipher, size_t cipher_size)
+{
+ unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1];
+ const unsigned char *ecryptfs_cipher = NULL;
+ struct ecryptfs_crypt_stat *stat = NULL;
+
+ if (!data || !cipher) {
+ ecryptfs_printk(KERN_ERR,
+ "ecryptfs_get_cipher: invalid data parameter\n");
+ return false;
+ }
+
+ if (!cipher_size || cipher_size > sizeof(final)) {
+ ecryptfs_printk(KERN_ERR,
+ "ecryptfs_get_cipher: cipher_size\n");
+ return false;
+ }
+
+ stat = (struct ecryptfs_crypt_stat *)data;
+ ecryptfs_cipher = ecryptfs_get_full_cipher(stat->cipher,
+ stat->cipher_mode,
+ final, sizeof(final));
+
+ if (!ecryptfs_cipher) {
+ ecryptfs_printk(KERN_ERR,
+ "ecryptfs_get_cipher: internal error while parsing cipher\n");
+ return false;
+ }
+
+ if (strcmp(ecryptfs_cipher, cipher)) {
+ if (ecryptfs_verbosity > 0)
+ ecryptfs_dump_cipher(stat);
+
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * Given ecryptfs data, the function
+ * returns file encryption key.
+ */
+const unsigned char *ecryptfs_get_key(const void *data)
+{
+
+ struct ecryptfs_crypt_stat *stat = NULL;
+
+ if (!data) {
+ ecryptfs_printk(KERN_ERR,
+ "ecryptfs_get_key: invalid data parameter\n");
+ return NULL;
+ }
+ stat = (struct ecryptfs_crypt_stat *)data;
+ return stat->key;
+}
+
+/**
+ * Given ecryptfs data, the function
+ * returns file encryption salt.
+ */
+const unsigned char *ecryptfs_get_salt(const void *data)
+{
+ struct ecryptfs_crypt_stat *stat = NULL;
+
+ if (!data) {
+ ecryptfs_printk(KERN_ERR,
+ "ecryptfs_get_salt: invalid data parameter\n");
+ return NULL;
+ }
+ stat = (struct ecryptfs_crypt_stat *)data;
+ return stat->key + ecryptfs_get_salt_size(data);
+}
+
+/**
+ * Returns ecryptfs events pointer
+ */
+inline struct ecryptfs_events *get_events(void)
+{
+ return events_ptr;
+}
+
+/**
+ * If external crypto module requires salt in addition to key,
+ * we store it as part of key array (if there is enough space)
+ * Checks whether a salt key can fit into array allocated for
+ * regular key
+ */
+bool ecryptfs_check_space_for_salt(const size_t key_size,
+ const size_t salt_size)
+{
+ if ((salt_size + key_size) > ECRYPTFS_MAX_KEY_BYTES)
+ return false;
+
+ return true;
+}
+
+/*
+ * If there is salt that is used by external crypto module, it is stored
+ * in the same array where regular key is. Salt is going to be used by
+ * external crypto module only, so for all internal crypto operations salt
+ * should be ignored.
+ *
+ * Get key size in cases where it is going to be used for data encryption
+ * or for all other general purposes
+ */
+size_t ecryptfs_get_key_size_to_enc_data(
+ const struct ecryptfs_crypt_stat *crypt_stat)
+{
+ if (!crypt_stat)
+ return 0;
+
+ return crypt_stat->key_size;
+}
+
+/*
+ * If there is salt that is used by external crypto module, it is stored
+ * in the same array where regular key is. Salt is going to be used by
+ * external crypto module only, but we still need to save and restore it
+ * (in encrypted form) as part of ecryptfs header along with the regular
+ * key.
+ *
+ * Get key size in cases where it is going to be stored persistently
+ *
+ * !!! crypt_stat cipher name and mode must be initialized
+ */
+size_t ecryptfs_get_key_size_to_store_key(
+ const struct ecryptfs_crypt_stat *crypt_stat)
+{
+ size_t salt_size = 0;
+
+ if (!crypt_stat)
+ return 0;
+
+ salt_size = ecryptfs_get_salt_size(crypt_stat);
+
+ if (!ecryptfs_check_space_for_salt(crypt_stat->key_size, salt_size)) {
+ ecryptfs_printk(KERN_WARNING,
+ "ecryptfs_get_key_size_to_store_key: not enough space for salt\n");
+ return crypt_stat->key_size;
+ }
+
+ return crypt_stat->key_size + salt_size;
+}
+
+/*
+ * If there is salt that is used by external crypto module, it is stored
+ * in the same array where regular key is. Salt is going to be used by
+ * external crypto module only, but we still need to save and restore it
+ * (in encrypted form) as part of ecryptfs header along with the regular
+ * key.
+ *
+ * Get key size in cases where it is going to be restored from storage
+ *
+ * !!! crypt_stat cipher name and mode must be initialized
+ */
+size_t ecryptfs_get_key_size_to_restore_key(size_t stored_key_size,
+ const struct ecryptfs_crypt_stat *crypt_stat)
+{
+ size_t salt_size = 0;
+
+ if (!crypt_stat)
+ return 0;
+
+ salt_size = ecryptfs_get_salt_size_for_cipher(crypt_stat);
+
+ if (salt_size >= stored_key_size) {
+ ecryptfs_printk(KERN_WARNING,
+ "ecryptfs_get_key_size_to_restore_key: salt %zu >= stred size %zu\n",
+ salt_size, stored_key_size);
+
+ return stored_key_size;
+ }
+
+ return stored_key_size - salt_size;
+}
+
+/**
+ * Given crypt_stat, the function returns appropriate salt size.
+ */
+size_t ecryptfs_get_salt_size_for_cipher(
+ const struct ecryptfs_crypt_stat *crypt_stat)
+{
+ if (!get_events() || !(get_events()->get_salt_key_size_cb))
+ return 0;
+
+ return get_events()->get_salt_key_size_cb(crypt_stat);
+}
+
+/**
+ * Given mount_crypt_stat, the function returns appropriate salt size.
+ */
+size_t ecryptfs_get_salt_size_for_cipher_mount(
+ const struct ecryptfs_mount_crypt_stat *crypt_stat)
+{
+ if (!get_events() || !(get_events()->get_salt_key_size_cb))
+ return 0;
+
+ return get_events()->get_salt_key_size_cb(crypt_stat);
+}
+
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 27794b137b24..c93fe5fce41e 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -31,6 +31,7 @@
#include <linux/security.h>
#include <linux/compat.h>
#include <linux/fs_stack.h>
+#include <linux/ecryptfs.h>
#include "ecryptfs_kernel.h"
/**
@@ -196,6 +197,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
int rc = 0;
struct ecryptfs_crypt_stat *crypt_stat = NULL;
struct dentry *ecryptfs_dentry = file->f_path.dentry;
+ int ret;
+
+
/* Private value of ecryptfs_dentry allocated in
* ecryptfs_lookup() */
struct ecryptfs_file_info *file_info;
@@ -235,12 +239,39 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
}
ecryptfs_set_file_lower(
file, ecryptfs_inode_to_private(inode)->lower_file);
+ if (d_is_dir(ecryptfs_dentry)) {
+ ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
+ mutex_lock(&crypt_stat->cs_mutex);
+ crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
+ mutex_unlock(&crypt_stat->cs_mutex);
+ rc = 0;
+ goto out;
+ }
+
rc = read_or_initialize_metadata(ecryptfs_dentry);
if (rc)
goto out_put;
ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = "
"[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino,
(unsigned long long)i_size_read(inode));
+
+ if (get_events() && get_events()->open_cb) {
+
+ ret = vfs_fsync(file, false);
+
+ if (ret)
+ ecryptfs_printk(KERN_ERR,
+ "failed to sync file ret = %d.\n", ret);
+
+ get_events()->open_cb(ecryptfs_inode_to_lower(inode),
+ crypt_stat);
+
+ if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) {
+ truncate_inode_pages(inode->i_mapping, 0);
+ truncate_inode_pages(
+ ecryptfs_inode_to_lower(inode)->i_mapping, 0);
+ }
+ }
goto out;
out_put:
ecryptfs_put_lower_file(inode);
@@ -307,6 +338,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
ecryptfs_put_lower_file(inode);
kmem_cache_free(ecryptfs_file_info_cache,
ecryptfs_file_to_private(file));
+
return 0;
}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 34897aeb4a66..9e143b0db9e8 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -261,12 +261,15 @@ out:
*
* Returns zero on success; non-zero on error condition
*/
+
+
static int
ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
umode_t mode, bool excl)
{
struct inode *ecryptfs_inode;
int rc;
+ struct ecryptfs_crypt_stat *crypt_stat;
ecryptfs_inode = ecryptfs_do_create(directory_inode, ecryptfs_dentry,
mode);
@@ -276,6 +279,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
rc = PTR_ERR(ecryptfs_inode);
goto out;
}
+
/* At this point, a file exists on "disk"; we need to make sure
* that this on disk file is prepared to be an ecryptfs file */
rc = ecryptfs_initialize_file(ecryptfs_dentry, ecryptfs_inode);
@@ -287,7 +291,15 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
iput(ecryptfs_inode);
goto out;
}
- d_instantiate_new(ecryptfs_dentry, ecryptfs_inode);
+
+ crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
+ if (get_events() && get_events()->open_cb)
+ get_events()->open_cb(
+ ecryptfs_inode_to_lower(ecryptfs_inode),
+ crypt_stat);
+
+ unlock_new_inode(ecryptfs_inode);
+ d_instantiate(ecryptfs_dentry, ecryptfs_inode);
out:
return rc;
}
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 37920394c64c..ac9b48aee73f 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -315,7 +315,8 @@ write_tag_66_packet(char *signature, u8 cipher_code,
* | File Encryption Key Size | 1 or 2 bytes |
* | File Encryption Key | arbitrary |
*/
- data_len = (5 + ECRYPTFS_SIG_SIZE_HEX + crypt_stat->key_size);
+ data_len = (5 + ECRYPTFS_SIG_SIZE_HEX +
+ ecryptfs_get_key_size_to_store_key(crypt_stat));
*packet = kmalloc(data_len, GFP_KERNEL);
message = *packet;
if (!message) {
@@ -335,8 +336,9 @@ write_tag_66_packet(char *signature, u8 cipher_code,
memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX);
i += ECRYPTFS_SIG_SIZE_HEX;
/* The encrypted key includes 1 byte cipher code and 2 byte checksum */
- rc = ecryptfs_write_packet_length(&message[i], crypt_stat->key_size + 3,
- &packet_size_len);
+ rc = ecryptfs_write_packet_length(&message[i],
+ ecryptfs_get_key_size_to_store_key(crypt_stat) + 3,
+ &packet_size_len);
if (rc) {
ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet "
"header; cannot generate packet length\n");
@@ -344,9 +346,10 @@ write_tag_66_packet(char *signature, u8 cipher_code,
}
i += packet_size_len;
message[i++] = cipher_code;
- memcpy(&message[i], crypt_stat->key, crypt_stat->key_size);
- i += crypt_stat->key_size;
- for (j = 0; j < crypt_stat->key_size; j++)
+ memcpy(&message[i], crypt_stat->key,
+ ecryptfs_get_key_size_to_store_key(crypt_stat));
+ i += ecryptfs_get_key_size_to_store_key(crypt_stat);
+ for (j = 0; j < ecryptfs_get_key_size_to_store_key(crypt_stat); j++)
checksum += crypt_stat->key[j];
message[i++] = (checksum / 256) % 256;
message[i++] = (checksum % 256);
@@ -925,6 +928,7 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
struct ecryptfs_parse_tag_70_packet_silly_stack *s;
struct key *auth_tok_key = NULL;
int rc = 0;
+ char full_cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
(*packet_size) = 0;
(*filename_size) = 0;
@@ -984,12 +988,13 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
s->fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX] = '\0';
(*packet_size) += ECRYPTFS_SIG_SIZE;
s->cipher_code = data[(*packet_size)++];
- rc = ecryptfs_cipher_code_to_string(s->cipher_string, s->cipher_code);
+ rc = ecryptfs_cipher_code_to_string(full_cipher, s->cipher_code);
if (rc) {
printk(KERN_WARNING "%s: Cipher code [%d] is invalid\n",
__func__, s->cipher_code);
goto out;
}
+ ecryptfs_parse_full_cipher(full_cipher, s->cipher_string, 0);
rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key,
&s->auth_tok, mount_crypt_stat,
s->fnek_sig_hex);
@@ -1158,6 +1163,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
char *payload = NULL;
size_t payload_len = 0;
int rc;
+ char full_cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok);
if (rc) {
@@ -1191,21 +1197,31 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
rc);
goto out;
}
- auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY;
- memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key,
- auth_tok->session_key.decrypted_key_size);
- crypt_stat->key_size = auth_tok->session_key.decrypted_key_size;
- rc = ecryptfs_cipher_code_to_string(crypt_stat->cipher, cipher_code);
+
+ rc = ecryptfs_cipher_code_to_string(full_cipher, cipher_code);
if (rc) {
ecryptfs_printk(KERN_ERR, "Cipher code [%d] is invalid\n",
cipher_code)
- goto out;
+ goto out;
}
+
+ auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY;
+ memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key,
+ auth_tok->session_key.decrypted_key_size);
+ crypt_stat->key_size = ecryptfs_get_key_size_to_restore_key(
+ auth_tok->session_key.decrypted_key_size, crypt_stat);
+
+ ecryptfs_parse_full_cipher(full_cipher,
+ crypt_stat->cipher, crypt_stat->cipher_mode);
+
crypt_stat->flags |= ECRYPTFS_KEY_VALID;
if (ecryptfs_verbosity > 0) {
ecryptfs_printk(KERN_DEBUG, "Decrypted session key:\n");
ecryptfs_dump_hex(crypt_stat->key,
crypt_stat->key_size);
+
+ ecryptfs_dump_salt_hex(crypt_stat->key, crypt_stat->key_size,
+ crypt_stat);
}
out:
kfree(msg);
@@ -1387,6 +1403,7 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
size_t length_size;
int rc = 0;
+ char full_cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
(*packet_size) = 0;
(*new_auth_tok) = NULL;
@@ -1460,10 +1477,13 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
rc = -EINVAL;
goto out_free;
}
- rc = ecryptfs_cipher_code_to_string(crypt_stat->cipher,
+ rc = ecryptfs_cipher_code_to_string(full_cipher,
(u16)data[(*packet_size)]);
if (rc)
goto out_free;
+ ecryptfs_parse_full_cipher(full_cipher,
+ crypt_stat->cipher, crypt_stat->cipher_mode);
+
/* A little extra work to differentiate among the AES key
* sizes; see RFC2440 */
switch(data[(*packet_size)++]) {
@@ -1472,7 +1492,10 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
break;
default:
crypt_stat->key_size =
- (*new_auth_tok)->session_key.encrypted_key_size;
+ ecryptfs_get_key_size_to_restore_key(
+ (*new_auth_tok)->session_key.encrypted_key_size,
+ crypt_stat);
+
}
rc = ecryptfs_init_crypt_ctx(crypt_stat);
if (rc)
@@ -1720,7 +1743,7 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
mutex_lock(tfm_mutex);
rc = crypto_blkcipher_setkey(
desc.tfm, auth_tok->token.password.session_key_encryption_key,
- crypt_stat->key_size);
+ auth_tok->token.password.session_key_encryption_key_bytes);
if (unlikely(rc < 0)) {
mutex_unlock(tfm_mutex);
printk(KERN_ERR "Error setting key for crypto context\n");
@@ -1743,6 +1766,8 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
crypt_stat->key_size);
ecryptfs_dump_hex(crypt_stat->key,
crypt_stat->key_size);
+ ecryptfs_dump_salt_hex(crypt_stat->key, crypt_stat->key_size,
+ crypt_stat);
}
out:
return rc;
@@ -1979,12 +2004,17 @@ pki_encrypt_session_key(struct key *auth_tok_key,
size_t payload_len = 0;
struct ecryptfs_message *msg;
int rc;
+ unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1];
rc = write_tag_66_packet(auth_tok->token.private_key.signature,
- ecryptfs_code_for_cipher_string(
- crypt_stat->cipher,
- crypt_stat->key_size),
- crypt_stat, &payload, &payload_len);
+ ecryptfs_code_for_cipher_string(
+ ecryptfs_get_full_cipher(
+ crypt_stat->cipher,
+ crypt_stat->cipher_mode,
+ final, sizeof(final)),
+ ecryptfs_get_key_size_to_enc_data(
+ crypt_stat)),
+ crypt_stat, &payload, &payload_len);
up_write(&(auth_tok_key->sem));
key_put(auth_tok_key);
if (rc) {
@@ -2042,7 +2072,7 @@ write_tag_1_packet(char *dest, size_t *remaining_bytes,
ecryptfs_from_hex(key_rec->sig, auth_tok->token.private_key.signature,
ECRYPTFS_SIG_SIZE);
encrypted_session_key_valid = 0;
- for (i = 0; i < crypt_stat->key_size; i++)
+ for (i = 0; i < ecryptfs_get_key_size_to_store_key(crypt_stat); i++)
encrypted_session_key_valid |=
auth_tok->session_key.encrypted_key[i];
if (encrypted_session_key_valid) {
@@ -2196,6 +2226,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
u8 cipher_code;
size_t packet_size_length;
size_t max_packet_size;
+ unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1];
struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
crypt_stat->mount_crypt_stat;
struct blkcipher_desc desc = {
@@ -2228,13 +2259,14 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
mount_crypt_stat->global_default_cipher_key_size;
if (auth_tok->session_key.encrypted_key_size == 0)
auth_tok->session_key.encrypted_key_size =
- crypt_stat->key_size;
+ ecryptfs_get_key_size_to_store_key(crypt_stat);
if (crypt_stat->key_size == 24
&& strcmp("aes", crypt_stat->cipher) == 0) {
memset((crypt_stat->key + 24), 0, 8);
auth_tok->session_key.encrypted_key_size = 32;
} else
- auth_tok->session_key.encrypted_key_size = crypt_stat->key_size;
+ auth_tok->session_key.encrypted_key_size =
+ ecryptfs_get_key_size_to_store_key(crypt_stat);
key_rec->enc_key_size =
auth_tok->session_key.encrypted_key_size;
encrypted_session_key_valid = 0;
@@ -2258,8 +2290,8 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
auth_tok->token.password.
session_key_encryption_key_bytes);
memcpy(session_key_encryption_key,
- auth_tok->token.password.session_key_encryption_key,
- crypt_stat->key_size);
+ auth_tok->token.password.session_key_encryption_key,
+ auth_tok->token.password.session_key_encryption_key_bytes);
ecryptfs_printk(KERN_DEBUG,
"Cached session key encryption key:\n");
if (ecryptfs_verbosity > 0)
@@ -2292,7 +2324,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
}
mutex_lock(tfm_mutex);
rc = crypto_blkcipher_setkey(desc.tfm, session_key_encryption_key,
- crypt_stat->key_size);
+ auth_tok->token.password.session_key_encryption_key_bytes);
if (rc < 0) {
mutex_unlock(tfm_mutex);
ecryptfs_printk(KERN_ERR, "Error setting key for crypto "
@@ -2301,7 +2333,9 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
}
rc = 0;
ecryptfs_printk(KERN_DEBUG, "Encrypting [%zd] bytes of the key\n",
- crypt_stat->key_size);
+ crypt_stat->key_size);
+ ecryptfs_printk(KERN_DEBUG, "Encrypting [%zd] bytes of the salt key\n",
+ ecryptfs_get_salt_size_for_cipher(crypt_stat));
rc = crypto_blkcipher_encrypt(&desc, dst_sg, src_sg,
(*key_rec).enc_key_size);
mutex_unlock(tfm_mutex);
@@ -2350,8 +2384,10 @@ encrypted_session_key_set:
dest[(*packet_size)++] = 0x04; /* version 4 */
/* TODO: Break from RFC2440 so that arbitrary ciphers can be
* specified with strings */
- cipher_code = ecryptfs_code_for_cipher_string(crypt_stat->cipher,
- crypt_stat->key_size);
+ cipher_code = ecryptfs_code_for_cipher_string(
+ ecryptfs_get_full_cipher(crypt_stat->cipher,
+ crypt_stat->cipher_mode, final, sizeof(final)),
+ crypt_stat->key_size);
if (cipher_code == 0) {
ecryptfs_printk(KERN_WARNING, "Unable to generate code for "
"cipher [%s]\n", crypt_stat->cipher);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index cd2a3199a814..7d63228b9657 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -156,16 +156,41 @@ int ecryptfs_get_lower_file(struct dentry *dentry, struct inode *inode)
void ecryptfs_put_lower_file(struct inode *inode)
{
+ int ret = 0;
struct ecryptfs_inode_info *inode_info;
+ bool clear_cache_needed = false;
inode_info = ecryptfs_inode_to_private(inode);
if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count,
&inode_info->lower_file_mutex)) {
+
+ if (get_events() && get_events()->is_hw_crypt_cb &&
+ get_events()->is_hw_crypt_cb())
+ clear_cache_needed = true;
+
filemap_write_and_wait(inode->i_mapping);
+ if (clear_cache_needed) {
+ ret = vfs_fsync(inode_info->lower_file, false);
+
+ if (ret)
+ pr_err("failed to sync file ret = %d.\n", ret);
+ }
fput(inode_info->lower_file);
inode_info->lower_file = NULL;
mutex_unlock(&inode_info->lower_file_mutex);
+
+ if (clear_cache_needed) {
+ truncate_inode_pages_fill_zero(inode->i_mapping, 0);
+ truncate_inode_pages_fill_zero(
+ ecryptfs_inode_to_lower(inode)->i_mapping, 0);
+ }
+
+ if (get_events() && get_events()->release_cb)
+ get_events()->release_cb(
+ ecryptfs_inode_to_lower(inode));
}
+
+
}
enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
@@ -280,6 +305,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
char *cipher_key_bytes_src;
char *fn_cipher_key_bytes_src;
u8 cipher_code;
+ unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1];
*check_ruid = 0;
@@ -309,12 +335,14 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
case ecryptfs_opt_ecryptfs_cipher:
cipher_name_src = args[0].from;
cipher_name_dst =
- mount_crypt_stat->
- global_default_cipher_name;
- strncpy(cipher_name_dst, cipher_name_src,
- ECRYPTFS_MAX_CIPHER_NAME_SIZE);
- cipher_name_dst[ECRYPTFS_MAX_CIPHER_NAME_SIZE] = '\0';
+ mount_crypt_stat->global_default_cipher_name;
+
+ ecryptfs_parse_full_cipher(cipher_name_src,
+ mount_crypt_stat->global_default_cipher_name,
+ mount_crypt_stat->global_default_cipher_mode);
+
cipher_name_set = 1;
+
break;
case ecryptfs_opt_ecryptfs_key_bytes:
cipher_key_bytes_src = args[0].from;
@@ -411,24 +439,35 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
strcpy(mount_crypt_stat->global_default_cipher_name,
ECRYPTFS_DEFAULT_CIPHER);
}
+
if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
&& !fn_cipher_name_set)
strcpy(mount_crypt_stat->global_default_fn_cipher_name,
mount_crypt_stat->global_default_cipher_name);
+
if (!cipher_key_bytes_set)
mount_crypt_stat->global_default_cipher_key_size = 0;
+
if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
&& !fn_cipher_key_bytes_set)
mount_crypt_stat->global_default_fn_cipher_key_bytes =
mount_crypt_stat->global_default_cipher_key_size;
cipher_code = ecryptfs_code_for_cipher_string(
- mount_crypt_stat->global_default_cipher_name,
+ ecryptfs_get_full_cipher(
+ mount_crypt_stat->global_default_cipher_name,
+ mount_crypt_stat->global_default_cipher_mode,
+ final, sizeof(final)),
mount_crypt_stat->global_default_cipher_key_size);
if (!cipher_code) {
- ecryptfs_printk(KERN_ERR,
- "eCryptfs doesn't support cipher: %s",
- mount_crypt_stat->global_default_cipher_name);
+ ecryptfs_printk(
+ KERN_ERR,
+ "eCryptfs doesn't support cipher: %s and key size %zu",
+ ecryptfs_get_full_cipher(
+ mount_crypt_stat->global_default_cipher_name,
+ mount_crypt_stat->global_default_cipher_mode,
+ final, sizeof(final)),
+ mount_crypt_stat->global_default_cipher_key_size);
rc = -EINVAL;
goto out;
}
@@ -488,6 +527,7 @@ static struct file_system_type ecryptfs_fs_type;
* @dev_name: The path to mount over
* @raw_data: The options passed into the kernel
*/
+
static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *raw_data)
{
@@ -563,6 +603,11 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
ecryptfs_set_superblock_lower(s, path.dentry->d_sb);
+
+ if (get_events() && get_events()->is_hw_crypt_cb &&
+ get_events()->is_hw_crypt_cb())
+ drop_pagecache_sb(ecryptfs_superblock_to_lower(s), 0);
+
/**
* Set the POSIX ACL flag based on whether they're enabled in the lower
* mount.
@@ -901,6 +946,7 @@ static void __exit ecryptfs_exit(void)
do_sysfs_unregistration();
unregister_filesystem(&ecryptfs_fs_type);
ecryptfs_free_kmem_caches();
+ ecryptfs_free_events();
}
MODULE_AUTHOR("Michael A. Halcrow <mhalcrow@us.ibm.com>");
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index caba848ac763..bdbc72d52438 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -552,10 +552,16 @@ static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block)
return rc;
}
+void ecryptfs_freepage(struct page *page)
+{
+ zero_user(page, 0, PAGE_CACHE_SIZE);
+}
+
const struct address_space_operations ecryptfs_aops = {
.writepage = ecryptfs_writepage,
.readpage = ecryptfs_readpage,
.write_begin = ecryptfs_write_begin,
.write_end = ecryptfs_write_end,
.bmap = ecryptfs_bmap,
+ .freepage = ecryptfs_freepage,
};
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index afa1b81c3418..25e436ddcf8e 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -69,6 +69,9 @@ static void ecryptfs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
struct ecryptfs_inode_info *inode_info;
+ if (inode == NULL)
+ return;
+
inode_info = ecryptfs_inode_to_private(inode);
kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
@@ -88,9 +91,12 @@ static void ecryptfs_destroy_inode(struct inode *inode)
struct ecryptfs_inode_info *inode_info;
inode_info = ecryptfs_inode_to_private(inode);
+
BUG_ON(inode_info->lower_file);
+
ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
call_rcu(&inode->i_rcu, ecryptfs_i_callback);
+
}
/**
@@ -149,6 +155,9 @@ static int ecryptfs_show_options(struct seq_file *m, struct dentry *root)
struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
&ecryptfs_superblock_to_private(sb)->mount_crypt_stat;
struct ecryptfs_global_auth_tok *walker;
+ unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1];
+
+ memset(final, 0, sizeof(final));
mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
list_for_each_entry(walker,
@@ -162,7 +171,10 @@ static int ecryptfs_show_options(struct seq_file *m, struct dentry *root)
mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
seq_printf(m, ",ecryptfs_cipher=%s",
- mount_crypt_stat->global_default_cipher_name);
+ ecryptfs_get_full_cipher(
+ mount_crypt_stat->global_default_cipher_name,
+ mount_crypt_stat->global_default_cipher_mode,
+ final, sizeof(final)));
if (mount_crypt_stat->global_default_cipher_key_size)
seq_printf(m, ",ecryptfs_key_bytes=%zd",
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index e6a26429d70c..b484d4500687 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1235,14 +1235,22 @@ static int ep_create_wakeup_source(struct epitem *epi)
{
struct name_snapshot n;
struct wakeup_source *ws;
+ char task_comm_buf[TASK_COMM_LEN];
+ char buf[64];
+
+ get_task_comm(task_comm_buf, current);
if (!epi->ep->ws) {
- epi->ep->ws = wakeup_source_register("eventpoll");
+ snprintf(buf, sizeof(buf), "epoll_%.*s_epollfd",
+ (int)sizeof(task_comm_buf), task_comm_buf);
+ epi->ep->ws = wakeup_source_register(buf);
if (!epi->ep->ws)
return -ENOMEM;
}
take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry);
+ snprintf(buf, sizeof(buf), "epoll_%.*s_file:%s",
+ (int)sizeof(task_comm_buf), task_comm_buf, n.name);
ws = wakeup_source_register(n.name);
release_dentry_name_snapshot(&n);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index da3d40ef1668..3267a80dbbe2 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -40,7 +40,8 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
{
int err = ext2_add_link(dentry, inode);
if (!err) {
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
return 0;
}
inode_dec_link_count(inode);
@@ -266,7 +267,8 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
if (err)
goto out_fail;
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
out:
return err;
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 3c8293215603..ebaff5ab93da 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -118,10 +118,16 @@ config EXT4_ENCRYPTION
decrypted pages in the page cache.
config EXT4_FS_ENCRYPTION
- bool
- default y
+ bool "Ext4 FS Encryption"
+ default n
depends on EXT4_ENCRYPTION
+config EXT4_FS_ICE_ENCRYPTION
+ bool "Ext4 Encryption with ICE support"
+ default n
+ depends on EXT4_FS_ENCRYPTION
+ depends on PFK
+
config EXT4_DEBUG
bool "EXT4 debugging support"
depends on EXT4_FS
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index f52cf54f0cbc..1cabbd9a9229 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -14,3 +14,5 @@ ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
ext4-$(CONFIG_EXT4_FS_ENCRYPTION) += crypto_policy.o crypto.o \
crypto_key.o crypto_fname.o
+
+ext4-$(CONFIG_EXT4_FS_ICE_ENCRYPTION) += ext4_ice.o
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index f6096ee77662..f5099a3386ec 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -389,14 +389,12 @@ int ext4_decrypt(struct page *page)
page->index, page, page, GFP_NOFS);
}
-int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
+int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
+ ext4_fsblk_t pblk, ext4_lblk_t len)
{
struct ext4_crypto_ctx *ctx;
struct page *ciphertext_page = NULL;
struct bio *bio;
- ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
- ext4_fsblk_t pblk = ext4_ext_pblock(ex);
- unsigned int len = ext4_ext_get_actual_len(ex);
int ret, err = 0;
#if 0
@@ -457,17 +455,10 @@ errout:
return err;
}
-bool ext4_valid_enc_modes(uint32_t contents_mode, uint32_t filenames_mode)
+bool ext4_valid_contents_enc_mode(uint32_t mode)
{
- if (contents_mode == EXT4_ENCRYPTION_MODE_AES_256_XTS) {
- return (filenames_mode == EXT4_ENCRYPTION_MODE_AES_256_CTS ||
- filenames_mode == EXT4_ENCRYPTION_MODE_AES_256_HEH);
- }
-
- if (contents_mode == EXT4_ENCRYPTION_MODE_SPECK128_256_XTS)
- return filenames_mode == EXT4_ENCRYPTION_MODE_SPECK128_256_CTS;
-
- return false;
+ return (mode == EXT4_ENCRYPTION_MODE_AES_256_XTS ||
+ mode == EXT4_ENCRYPTION_MODE_PRIVATE);
}
/**
diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c
index 5e5afb6ef71a..026716bdbbfc 100644
--- a/fs/ext4/crypto_fname.c
+++ b/fs/ext4/crypto_fname.c
@@ -42,6 +42,12 @@ static void ext4_dir_crypt_complete(struct crypto_async_request *req, int res)
complete(&ecr->completion);
}
+bool ext4_valid_filenames_enc_mode(uint32_t mode)
+{
+ return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS ||
+ mode == EXT4_ENCRYPTION_MODE_AES_256_HEH);
+}
+
static unsigned max_name_len(struct inode *inode)
{
return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize :
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
index 68225223ffd8..d3d6b28ce9b9 100644
--- a/fs/ext4/crypto_key.c
+++ b/fs/ext4/crypto_key.c
@@ -15,6 +15,7 @@
#include <uapi/linux/keyctl.h>
#include "ext4.h"
+#include "ext4_ice.h"
#include "xattr.h"
static void derive_crypt_complete(struct crypto_async_request *req, int rc)
@@ -173,6 +174,8 @@ void ext4_free_crypt_info(struct ext4_crypt_info *ci)
if (!ci)
return;
+ if (ci->ci_keyring_key)
+ key_put(ci->ci_keyring_key);
crypto_free_ablkcipher(ci->ci_ctfm);
kmem_cache_free(ext4_crypt_info_cachep, ci);
}
@@ -194,7 +197,13 @@ void ext4_free_encryption_info(struct inode *inode,
ext4_free_crypt_info(ci);
}
-int ext4_get_encryption_info(struct inode *inode)
+static int ext4_default_data_encryption_mode(void)
+{
+ return ext4_is_ice_enabled() ? EXT4_ENCRYPTION_MODE_PRIVATE :
+ EXT4_ENCRYPTION_MODE_AES_256_XTS;
+}
+
+int _ext4_get_encryption_info(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_crypt_info *crypt_info;
@@ -207,24 +216,32 @@ int ext4_get_encryption_info(struct inode *inode)
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct crypto_ablkcipher *ctfm;
const char *cipher_str;
- char raw_key[EXT4_MAX_KEY_SIZE];
- char mode;
+ int for_fname = 0;
+ int mode;
int res;
- if (ei->i_crypt_info)
- return 0;
-
res = ext4_init_crypto();
if (res)
return res;
+retry:
+ crypt_info = ACCESS_ONCE(ei->i_crypt_info);
+ if (crypt_info) {
+ if (!crypt_info->ci_keyring_key ||
+ key_validate(crypt_info->ci_keyring_key) == 0)
+ return 0;
+ ext4_free_encryption_info(inode, crypt_info);
+ goto retry;
+ }
+
res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
&ctx, sizeof(ctx));
if (res < 0) {
if (!DUMMY_ENCRYPTION_ENABLED(sbi))
return res;
- ctx.contents_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS;
+ ctx.contents_encryption_mode =
+ ext4_default_data_encryption_mode();
ctx.filenames_encryption_mode =
EXT4_ENCRYPTION_MODE_AES_256_CTS;
ctx.flags = 0;
@@ -240,14 +257,15 @@ int ext4_get_encryption_info(struct inode *inode)
crypt_info->ci_data_mode = ctx.contents_encryption_mode;
crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
crypt_info->ci_ctfm = NULL;
+ crypt_info->ci_keyring_key = NULL;
memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
sizeof(crypt_info->ci_master_key));
- if (S_ISREG(inode->i_mode))
- mode = crypt_info->ci_data_mode;
- else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
- mode = crypt_info->ci_filename_mode;
- else
+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+ for_fname = 1;
+ else if (!S_ISREG(inode->i_mode))
BUG();
+ mode = for_fname ? crypt_info->ci_filename_mode :
+ crypt_info->ci_data_mode;
switch (mode) {
case EXT4_ENCRYPTION_MODE_AES_256_XTS:
cipher_str = "xts(aes)";
@@ -255,15 +273,11 @@ int ext4_get_encryption_info(struct inode *inode)
case EXT4_ENCRYPTION_MODE_AES_256_CTS:
cipher_str = "cts(cbc(aes))";
break;
+ case EXT4_ENCRYPTION_MODE_PRIVATE:
+ cipher_str = "bugon";
case EXT4_ENCRYPTION_MODE_AES_256_HEH:
cipher_str = "heh(aes)";
break;
- case EXT4_ENCRYPTION_MODE_SPECK128_256_XTS:
- cipher_str = "xts(speck128)";
- break;
- case EXT4_ENCRYPTION_MODE_SPECK128_256_CTS:
- cipher_str = "cts(cbc(speck128))";
- break;
default:
printk_once(KERN_WARNING
"ext4: unsupported key mode %d (ino %u)\n",
@@ -272,7 +286,7 @@ int ext4_get_encryption_info(struct inode *inode)
goto out;
}
if (DUMMY_ENCRYPTION_ENABLED(sbi)) {
- memset(raw_key, 0x42, EXT4_AES_256_XTS_KEY_SIZE);
+ memset(crypt_info->ci_raw_key, 0x42, EXT4_AES_256_XTS_KEY_SIZE);
goto got_key;
}
memcpy(full_key_descriptor, EXT4_KEY_DESC_PREFIX,
@@ -288,6 +302,7 @@ int ext4_get_encryption_info(struct inode *inode)
keyring_key = NULL;
goto out;
}
+ crypt_info->ci_keyring_key = keyring_key;
if (keyring_key->type != &key_type_logon) {
printk_once(KERN_WARNING
"ext4: key type must be logon\n");
@@ -318,36 +333,49 @@ int ext4_get_encryption_info(struct inode *inode)
up_read(&keyring_key->sem);
goto out;
}
- res = ext4_derive_key(&ctx, master_key->raw, raw_key);
+ res = ext4_derive_key(&ctx, master_key->raw,
+ crypt_info->ci_raw_key);
up_read(&keyring_key->sem);
if (res)
goto out;
got_key:
- ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
- if (!ctfm || IS_ERR(ctfm)) {
- res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
- printk(KERN_DEBUG
- "%s: error %d (inode %u) allocating crypto tfm\n",
- __func__, res, (unsigned) inode->i_ino);
+ if (for_fname ||
+ (crypt_info->ci_data_mode != EXT4_ENCRYPTION_MODE_PRIVATE)) {
+ ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
+ if (!ctfm || IS_ERR(ctfm)) {
+ res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
+ pr_debug("%s: error %d (inode %u) allocating crypto tfm\n",
+ __func__, res, (unsigned) inode->i_ino);
+ goto out;
+ }
+ crypt_info->ci_ctfm = ctfm;
+ crypto_ablkcipher_clear_flags(ctfm, ~0);
+ crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm),
+ CRYPTO_TFM_REQ_WEAK_KEY);
+ res = crypto_ablkcipher_setkey(ctfm, crypt_info->ci_raw_key,
+ ext4_encryption_key_size(mode));
+ if (res)
+ goto out;
+ memzero_explicit(crypt_info->ci_raw_key,
+ sizeof(crypt_info->ci_raw_key));
+ } else if (!ext4_is_ice_enabled()) {
+ pr_warn("%s: ICE support not available\n",
+ __func__);
+ res = -EINVAL;
goto out;
}
- crypt_info->ci_ctfm = ctfm;
- crypto_ablkcipher_clear_flags(ctfm, ~0);
- crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm),
- CRYPTO_TFM_REQ_WEAK_KEY);
- res = crypto_ablkcipher_setkey(ctfm, raw_key,
- ext4_encryption_key_size(mode));
- if (res)
- goto out;
+ if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) != NULL) {
+ ext4_free_crypt_info(crypt_info);
+ goto retry;
+ }
+ return 0;
- if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) == NULL)
- crypt_info = NULL;
out:
if (res == -ENOKEY)
res = 0;
- key_put(keyring_key);
+ memzero_explicit(crypt_info->ci_raw_key,
+ sizeof(crypt_info->ci_raw_key));
ext4_free_crypt_info(crypt_info);
- memzero_explicit(raw_key, sizeof(raw_key));
return res;
}
diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c
index d4b6f3c06ff0..77bd7bfb6329 100644
--- a/fs/ext4/crypto_policy.c
+++ b/fs/ext4/crypto_policy.c
@@ -60,12 +60,16 @@ static int ext4_create_encryption_context_from_policy(
ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
EXT4_KEY_DESCRIPTOR_SIZE);
- if (!ext4_valid_enc_modes(policy->contents_encryption_mode,
- policy->filenames_encryption_mode)) {
+ if (!ext4_valid_contents_enc_mode(policy->contents_encryption_mode)) {
printk(KERN_WARNING
- "%s: Invalid encryption modes (contents %d, filenames %d)\n",
- __func__, policy->contents_encryption_mode,
- policy->filenames_encryption_mode);
+ "%s: Invalid contents encryption mode %d\n", __func__,
+ policy->contents_encryption_mode);
+ return -EINVAL;
+ }
+ if (!ext4_valid_filenames_enc_mode(policy->filenames_encryption_mode)) {
+ printk(KERN_WARNING
+ "%s: Invalid filenames encryption mode %d\n", __func__,
+ policy->filenames_encryption_mode);
return -EINVAL;
}
if (policy->flags & ~EXT4_POLICY_FLAGS_VALID)
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 7b626e942987..3dc54352c9e7 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -168,8 +168,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
index, 1);
file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
bh = ext4_bread(NULL, inode, map.m_lblk, 0);
- if (IS_ERR(bh))
- return PTR_ERR(bh);
+ if (IS_ERR(bh)) {
+ err = PTR_ERR(bh);
+ bh = NULL;
+ goto errout;
+ }
}
if (!bh) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6998e3eaab4c..969beec3ff7e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -589,8 +589,7 @@ enum {
#define EXT4_ENCRYPTION_MODE_AES_256_GCM 2
#define EXT4_ENCRYPTION_MODE_AES_256_CBC 3
#define EXT4_ENCRYPTION_MODE_AES_256_CTS 4
-#define EXT4_ENCRYPTION_MODE_SPECK128_256_XTS 7
-#define EXT4_ENCRYPTION_MODE_SPECK128_256_CTS 8
+#define EXT4_ENCRYPTION_MODE_PRIVATE 127
#define EXT4_ENCRYPTION_MODE_AES_256_HEH 126
#include "ext4_crypto.h"
@@ -2273,7 +2272,7 @@ int ext4_get_policy(struct inode *inode,
/* crypto.c */
extern struct kmem_cache *ext4_crypt_info_cachep;
-bool ext4_valid_enc_modes(uint32_t contents_mode, uint32_t filenames_mode);
+bool ext4_valid_contents_enc_mode(uint32_t mode);
uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size);
extern struct workqueue_struct *ext4_read_workqueue;
struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode,
@@ -2284,7 +2283,8 @@ struct page *ext4_encrypt(struct inode *inode,
struct page *plaintext_page,
gfp_t gfp_flags);
int ext4_decrypt(struct page *page);
-int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex);
+int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
+ ext4_fsblk_t pblk, ext4_lblk_t len);
extern const struct dentry_operations ext4_encrypted_d_ops;
#ifdef CONFIG_EXT4_FS_ENCRYPTION
@@ -2304,6 +2304,7 @@ static inline int ext4_sb_has_crypto(struct super_block *sb)
#endif
/* crypto_fname.c */
+bool ext4_valid_filenames_enc_mode(uint32_t mode);
u32 ext4_fname_crypto_round_up(u32 size, u32 blksize);
unsigned ext4_fname_encrypted_size(struct inode *inode, u32 ilen);
int ext4_fname_crypto_alloc_buffer(struct inode *inode,
@@ -2347,17 +2348,37 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
/* crypto_key.c */
void ext4_free_crypt_info(struct ext4_crypt_info *ci);
void ext4_free_encryption_info(struct inode *inode, struct ext4_crypt_info *ci);
+int _ext4_get_encryption_info(struct inode *inode);
#ifdef CONFIG_EXT4_FS_ENCRYPTION
int ext4_has_encryption_key(struct inode *inode);
-int ext4_get_encryption_info(struct inode *inode);
+static inline int ext4_get_encryption_info(struct inode *inode)
+{
+ struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
+
+ if (!ci ||
+ (ci->ci_keyring_key &&
+ (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+ (1 << KEY_FLAG_REVOKED) |
+ (1 << KEY_FLAG_DEAD)))))
+ return _ext4_get_encryption_info(inode);
+ return 0;
+}
static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode)
{
return EXT4_I(inode)->i_crypt_info;
}
+static inline int ext4_using_hardware_encryption(struct inode *inode)
+{
+ struct ext4_crypt_info *ci = ext4_encryption_info(inode);
+
+ return S_ISREG(inode->i_mode) && ci &&
+ ci->ci_data_mode == EXT4_ENCRYPTION_MODE_PRIVATE;
+}
+
#else
static inline int ext4_has_encryption_key(struct inode *inode)
{
@@ -2371,6 +2392,10 @@ static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode)
{
return NULL;
}
+static inline int ext4_using_hardware_encryption(struct inode *inode)
+{
+ return 0;
+}
#endif
@@ -2542,6 +2567,8 @@ extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim);
+extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
+ ext4_fsblk_t pblk, ext4_lblk_t len);
/* indirect.c */
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
@@ -3045,8 +3072,7 @@ extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
struct page *page);
extern int ext4_try_add_inline_entry(handle_t *handle,
struct ext4_filename *fname,
- struct dentry *dentry,
- struct inode *inode);
+ struct inode *dir, struct inode *inode);
extern int ext4_try_create_inline_dir(handle_t *handle,
struct inode *parent,
struct inode *inode);
diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h
index f7ba3be9d7ac..e28cc5aab04a 100644
--- a/fs/ext4/ext4_crypto.h
+++ b/fs/ext4/ext4_crypto.h
@@ -12,6 +12,7 @@
#define _EXT4_CRYPTO_H
#include <linux/fs.h>
+#include <linux/pfk.h>
#define EXT4_KEY_DESCRIPTOR_SIZE 8
@@ -63,6 +64,7 @@ struct ext4_encryption_context {
#define EXT4_AES_256_CTS_KEY_SIZE 32
#define EXT4_AES_256_HEH_KEY_SIZE 32
#define EXT4_AES_256_XTS_KEY_SIZE 64
+#define EXT4_PRIVATE_KEY_SIZE 64
#define EXT4_MAX_KEY_SIZE 64
#define EXT4_KEY_DESC_PREFIX "ext4:"
@@ -80,9 +82,13 @@ struct ext4_crypt_info {
char ci_filename_mode;
char ci_flags;
struct crypto_ablkcipher *ci_ctfm;
+ struct key *ci_keyring_key;
char ci_master_key[EXT4_KEY_DESCRIPTOR_SIZE];
+ char ci_raw_key[EXT4_MAX_KEY_SIZE];
};
+
+
#define EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
#define EXT4_WRITE_PATH_FL 0x00000002
@@ -115,6 +121,7 @@ static inline int ext4_encryption_key_size(int mode)
{
switch (mode) {
case EXT4_ENCRYPTION_MODE_AES_256_XTS:
+ case EXT4_ENCRYPTION_MODE_PRIVATE:
return EXT4_AES_256_XTS_KEY_SIZE;
case EXT4_ENCRYPTION_MODE_AES_256_GCM:
return EXT4_AES_256_GCM_KEY_SIZE;
@@ -124,10 +131,6 @@ static inline int ext4_encryption_key_size(int mode)
return EXT4_AES_256_CTS_KEY_SIZE;
case EXT4_ENCRYPTION_MODE_AES_256_HEH:
return EXT4_AES_256_HEH_KEY_SIZE;
- case EXT4_ENCRYPTION_MODE_SPECK128_256_XTS:
- return 64;
- case EXT4_ENCRYPTION_MODE_SPECK128_256_CTS:
- return 32;
default:
BUG();
}
diff --git a/fs/ext4/ext4_ice.c b/fs/ext4/ext4_ice.c
new file mode 100644
index 000000000000..d85bcb8ea1ba
--- /dev/null
+++ b/fs/ext4/ext4_ice.c
@@ -0,0 +1,109 @@
+/* Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "ext4_ice.h"
+#include "ext4_crypto.h"
+
+
+/*
+ * Retrieves encryption key from the inode
+ */
+char *ext4_get_ice_encryption_key(const struct inode *inode)
+{
+ struct ext4_crypt_info *ci = NULL;
+
+ if (!inode)
+ return NULL;
+
+ ci = ext4_encryption_info((struct inode *)inode);
+ if (!ci)
+ return NULL;
+
+ return &(ci->ci_raw_key[0]);
+}
+
+/*
+ * Retrieves encryption salt from the inode
+ */
+char *ext4_get_ice_encryption_salt(const struct inode *inode)
+{
+ struct ext4_crypt_info *ci = NULL;
+
+ if (!inode)
+ return NULL;
+
+ ci = ext4_encryption_info((struct inode *)inode);
+ if (!ci)
+ return NULL;
+
+ return &(ci->ci_raw_key[ext4_get_ice_encryption_key_size(inode)]);
+}
+
+/*
+ * returns true if the cipher mode in inode is AES XTS
+ */
+int ext4_is_aes_xts_cipher(const struct inode *inode)
+{
+ struct ext4_crypt_info *ci = NULL;
+
+ ci = ext4_encryption_info((struct inode *)inode);
+ if (!ci)
+ return 0;
+
+ return (ci->ci_data_mode == EXT4_ENCRYPTION_MODE_PRIVATE);
+}
+
+/*
+ * returns true if encryption info in both inodes is equal
+ */
+int ext4_is_ice_encryption_info_equal(const struct inode *inode1,
+ const struct inode *inode2)
+{
+ char *key1 = NULL;
+ char *key2 = NULL;
+ char *salt1 = NULL;
+ char *salt2 = NULL;
+
+ if (!inode1 || !inode2)
+ return 0;
+
+ if (inode1 == inode2)
+ return 1;
+
+ /* both do not belong to ice, so we don't care, they are equal for us */
+ if (!ext4_should_be_processed_by_ice(inode1) &&
+ !ext4_should_be_processed_by_ice(inode2))
+ return 1;
+
+ /* one belongs to ice, the other does not -> not equal */
+ if (ext4_should_be_processed_by_ice(inode1) ^
+ ext4_should_be_processed_by_ice(inode2))
+ return 0;
+
+ key1 = ext4_get_ice_encryption_key(inode1);
+ key2 = ext4_get_ice_encryption_key(inode2);
+ salt1 = ext4_get_ice_encryption_salt(inode1);
+ salt2 = ext4_get_ice_encryption_salt(inode2);
+
+ /* key and salt should not be null by this point */
+ if (!key1 || !key2 || !salt1 || !salt2 ||
+ (ext4_get_ice_encryption_key_size(inode1) !=
+ ext4_get_ice_encryption_key_size(inode2)) ||
+ (ext4_get_ice_encryption_salt_size(inode1) !=
+ ext4_get_ice_encryption_salt_size(inode2)))
+ return 0;
+
+ return ((memcmp(key1, key2,
+ ext4_get_ice_encryption_key_size(inode1)) == 0) &&
+ (memcmp(salt1, salt2,
+ ext4_get_ice_encryption_salt_size(inode1)) == 0));
+}
diff --git a/fs/ext4/ext4_ice.h b/fs/ext4/ext4_ice.h
new file mode 100644
index 000000000000..5257edabd6b2
--- /dev/null
+++ b/fs/ext4/ext4_ice.h
@@ -0,0 +1,104 @@
+/* Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _EXT4_ICE_H
+#define _EXT4_ICE_H
+
+#include "ext4.h"
+#include "ext4_crypto.h"
+
+#ifdef CONFIG_EXT4_FS_ICE_ENCRYPTION
+static inline int ext4_should_be_processed_by_ice(const struct inode *inode)
+{
+ if (!ext4_encrypted_inode((struct inode *)inode))
+ return 0;
+
+ return ext4_using_hardware_encryption((struct inode *)inode);
+}
+
+static inline int ext4_is_ice_enabled(void)
+{
+ return 1;
+}
+
+int ext4_is_aes_xts_cipher(const struct inode *inode);
+
+char *ext4_get_ice_encryption_key(const struct inode *inode);
+char *ext4_get_ice_encryption_salt(const struct inode *inode);
+
+int ext4_is_ice_encryption_info_equal(const struct inode *inode1,
+ const struct inode *inode2);
+
+static inline size_t ext4_get_ice_encryption_key_size(
+ const struct inode *inode)
+{
+ return EXT4_AES_256_XTS_KEY_SIZE / 2;
+}
+
+static inline size_t ext4_get_ice_encryption_salt_size(
+ const struct inode *inode)
+{
+ return EXT4_AES_256_XTS_KEY_SIZE / 2;
+}
+
+#else
+static inline int ext4_should_be_processed_by_ice(const struct inode *inode)
+{
+ return 0;
+}
+static inline int ext4_is_ice_enabled(void)
+{
+ return 0;
+}
+
+static inline char *ext4_get_ice_encryption_key(const struct inode *inode)
+{
+ return NULL;
+}
+
+static inline char *ext4_get_ice_encryption_salt(const struct inode *inode)
+{
+ return NULL;
+}
+
+static inline size_t ext4_get_ice_encryption_key_size(
+ const struct inode *inode)
+{
+ return 0;
+}
+
+static inline size_t ext4_get_ice_encryption_salt_size(
+ const struct inode *inode)
+{
+ return 0;
+}
+
+static inline int ext4_is_xts_cipher(const struct inode *inode)
+{
+ return 0;
+}
+
+static inline int ext4_is_ice_encryption_info_equal(
+ const struct inode *inode1,
+ const struct inode *inode2)
+{
+ return 0;
+}
+
+static inline int ext4_is_aes_xts_cipher(const struct inode *inode)
+{
+ return 0;
+}
+
+#endif
+
+#endif /* _EXT4_ICE_H */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 07ae78ba27a1..4feac4bf90f4 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3156,19 +3156,11 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
{
ext4_fsblk_t ee_pblock;
unsigned int ee_len;
- int ret;
ee_len = ext4_ext_get_actual_len(ex);
ee_pblock = ext4_ext_pblock(ex);
-
- if (ext4_encrypted_inode(inode))
- return ext4_encrypted_zeroout(inode, ex);
-
- ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
- if (ret > 0)
- ret = 0;
-
- return ret;
+ return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock,
+ ee_len);
}
/*
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 792649dad953..1016a8ddd3b0 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1024,12 +1024,11 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
*/
static int ext4_add_dirent_to_inline(handle_t *handle,
struct ext4_filename *fname,
- struct dentry *dentry,
+ struct inode *dir,
struct inode *inode,
struct ext4_iloc *iloc,
void *inline_start, int inline_size)
{
- struct inode *dir = d_inode(dentry->d_parent);
int err;
struct ext4_dir_entry_2 *de;
@@ -1273,12 +1272,11 @@ out:
* the new created block.
*/
int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
- struct dentry *dentry, struct inode *inode)
+ struct inode *dir, struct inode *inode)
{
int ret, inline_size, no_expand;
void *inline_start;
struct ext4_iloc iloc;
- struct inode *dir = d_inode(dentry->d_parent);
ret = ext4_get_inode_loc(dir, &iloc);
if (ret)
@@ -1292,7 +1290,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
EXT4_INLINE_DOTDOT_SIZE;
inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
- ret = ext4_add_dirent_to_inline(handle, fname, dentry, inode, &iloc,
+ ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc,
inline_start, inline_size);
if (ret != -ENOSPC)
goto out;
@@ -1313,7 +1311,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
if (inline_size) {
inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
- ret = ext4_add_dirent_to_inline(handle, fname, dentry,
+ ret = ext4_add_dirent_to_inline(handle, fname, dir,
inode, &iloc, inline_start,
inline_size);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1c27c8993505..4fbede7d7e2f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -42,6 +42,7 @@
#include "xattr.h"
#include "acl.h"
#include "truncate.h"
+#include "ext4_ice.h"
#include <trace/events/ext4.h>
#include <trace/events/android_fs.h>
@@ -392,6 +393,21 @@ static int __check_block_validity(struct inode *inode, const char *func,
return 0;
}
+int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
+ ext4_lblk_t len)
+{
+ int ret;
+
+ if (ext4_encrypted_inode(inode))
+ return ext4_encrypted_zeroout(inode, lblk, pblk, len);
+
+ ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS);
+ if (ret > 0)
+ ret = 0;
+
+ return ret;
+}
+
#define check_block_validity(inode, map) \
__check_block_validity((inode), __func__, __LINE__, (map))
@@ -1002,7 +1018,8 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
ll_rw_block(READ, 1, &bh);
*wait_bh++ = bh;
decrypt = ext4_encrypted_inode(inode) &&
- S_ISREG(inode->i_mode);
+ S_ISREG(inode->i_mode) &&
+ !ext4_is_ice_enabled();
}
}
/*
@@ -2414,8 +2431,8 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
mpd->map.m_len = 0;
mpd->next_page = index;
while (index <= end) {
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+ nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
+ tag);
if (nr_pages == 0)
goto out;
@@ -2423,16 +2440,6 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
struct page *page = pvec.pages[i];
/*
- * At this point, the page may be truncated or
- * invalidated (changing page->mapping to NULL), or
- * even swizzled back from swapper_space to tmpfs file
- * mapping. However, page->index will not change
- * because we have a reference on the page.
- */
- if (page->index > end)
- goto out;
-
- /*
* Accumulated enough dirty pages? This doesn't apply
* to WB_SYNC_ALL mode. For integrity sync we have to
* keep going because someone may be concurrently
@@ -3309,7 +3316,9 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
get_block_func = ext4_get_block_write;
dio_flags = DIO_LOCKING;
}
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#if defined(CONFIG_EXT4_FS_ENCRYPTION) && \
+!defined(CONFIG_EXT4_FS_ICE_ENCRYPTION)
+
BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
#endif
if (IS_DAX(inode))
@@ -3383,7 +3392,9 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
return 0;
}
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#if defined(CONFIG_EXT4_FS_ENCRYPTION) && \
+!defined(CONFIG_EXT4_FS_ICE_ENCRYPTION)
+
if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode))
return 0;
#endif
@@ -3583,7 +3594,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
if (!buffer_uptodate(bh))
goto unlock;
if (S_ISREG(inode->i_mode) &&
- ext4_encrypted_inode(inode)) {
+ ext4_encrypted_inode(inode) &&
+ !ext4_using_hardware_encryption(inode)) {
/* We expect the key to be set. */
BUG_ON(!ext4_has_encryption_key(inode));
BUG_ON(blocksize != PAGE_CACHE_SIZE);
@@ -3756,6 +3768,7 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
{
+#if 0
struct super_block *sb = inode->i_sb;
ext4_lblk_t first_block, stop_block;
struct address_space *mapping = inode->i_mapping;
@@ -3895,6 +3908,12 @@ out_dio:
out_mutex:
mutex_unlock(&inode->i_mutex);
return ret;
+#else
+ /*
+ * Disabled as per b/28760453
+ */
+ return -EOPNOTSUPP;
+#endif
}
int ext4_inode_attach_jinode(struct inode *inode)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 8cd2a7e1eef1..313a61626d2d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -274,7 +274,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
struct ext4_filename *fname,
struct ext4_dir_entry_2 **res_dir);
static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
- struct dentry *dentry, struct inode *inode);
+ struct inode *dir, struct inode *inode);
/* checksumming functions */
void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
@@ -1931,10 +1931,9 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
* directory, and adds the dentry to the indexed directory.
*/
static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
- struct dentry *dentry,
+ struct inode *dir,
struct inode *inode, struct buffer_head *bh)
{
- struct inode *dir = d_inode(dentry->d_parent);
struct buffer_head *bh2;
struct dx_root *root;
struct dx_frame frames[2], *frame;
@@ -2087,8 +2086,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
return retval;
if (ext4_has_inline_data(dir)) {
- retval = ext4_try_add_inline_entry(handle, &fname,
- dentry, inode);
+ retval = ext4_try_add_inline_entry(handle, &fname, dir, inode);
if (retval < 0)
goto out;
if (retval == 1) {
@@ -2098,7 +2096,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
}
if (is_dx(dir)) {
- retval = ext4_dx_add_entry(handle, &fname, dentry, inode);
+ retval = ext4_dx_add_entry(handle, &fname, dir, inode);
if (!retval || (retval != ERR_BAD_DX_DIR))
goto out;
/* Can we just ignore htree data? */
@@ -2127,7 +2125,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
if (blocks == 1 && !dx_fallback &&
ext4_has_feature_dir_index(sb)) {
- retval = make_indexed_dir(handle, &fname, dentry,
+ retval = make_indexed_dir(handle, &fname, dir,
inode, bh);
bh = NULL; /* make_indexed_dir releases bh */
goto out;
@@ -2162,12 +2160,11 @@ out:
* Returns 0 for success, or a negative error value
*/
static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
- struct dentry *dentry, struct inode *inode)
+ struct inode *dir, struct inode *inode)
{
struct dx_frame frames[2], *frame;
struct dx_entry *entries, *at;
struct buffer_head *bh;
- struct inode *dir = d_inode(dentry->d_parent);
struct super_block *sb = dir->i_sb;
struct ext4_dir_entry_2 *de;
int err;
@@ -2418,7 +2415,8 @@ static int ext4_add_nondir(handle_t *handle,
int err = ext4_add_entry(handle, dentry, inode);
if (!err) {
ext4_mark_inode_dirty(handle, inode);
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
return 0;
}
drop_nlink(inode);
@@ -2657,7 +2655,8 @@ out_clear_inode:
err = ext4_mark_inode_dirty(handle, dir);
if (err)
goto out_clear_inode;
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 6ca56f5f72b5..978141e8b800 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -28,6 +28,7 @@
#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
+#include "ext4_ice.h"
static struct kmem_cache *io_end_cachep;
@@ -489,7 +490,11 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
gfp_t gfp_flags = GFP_NOFS;
retry_encrypt:
- data_page = ext4_encrypt(inode, page, gfp_flags);
+
+ if (!ext4_using_hardware_encryption(inode))
+ data_page = ext4_encrypt(inode, page, gfp_flags);
+
+
if (IS_ERR(data_page)) {
ret = PTR_ERR(data_page);
if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) {
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 783e33d839cf..99f1bd8c7f05 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -45,6 +45,7 @@
#include <linux/cleancache.h>
#include "ext4.h"
+#include "ext4_ice.h"
#include <trace/events/android_fs.h>
/*
@@ -63,12 +64,17 @@ static void completion_pages(struct work_struct *work)
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
- int ret = ext4_decrypt(page);
- if (ret) {
- WARN_ON_ONCE(1);
- SetPageError(page);
- } else
+ if (ext4_is_ice_enabled()) {
SetPageUptodate(page);
+ } else {
+ int ret = ext4_decrypt(page);
+
+ if (ret) {
+ WARN_ON_ONCE(1);
+ SetPageError(page);
+ } else
+ SetPageUptodate(page);
+ }
unlock_page(page);
}
ext4_release_crypto_ctx(ctx);
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 92bb650b9fa1..3d7b46d82929 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/acl.c
*
@@ -7,10 +8,6 @@
* Portions of this code from linux/fs/ext2/acl.c
*
* Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/f2fs_fs.h>
#include "f2fs.h"
@@ -53,6 +50,9 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size)
struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1);
const char *end = value + size;
+ if (size < sizeof(struct f2fs_acl_header))
+ return ERR_PTR(-EINVAL);
+
if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION))
return ERR_PTR(-EINVAL);
@@ -160,7 +160,7 @@ static void *f2fs_acl_to_disk(struct f2fs_sb_info *sbi,
return (void *)f2fs_acl;
fail:
- kfree(f2fs_acl);
+ kvfree(f2fs_acl);
return ERR_PTR(-EINVAL);
}
@@ -190,7 +190,7 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type,
acl = NULL;
else
acl = ERR_PTR(retval);
- kfree(value);
+ kvfree(value);
if (!IS_ERR(acl))
set_cached_acl(inode, type, acl);
@@ -243,7 +243,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0);
- kfree(value);
+ kvfree(value);
if (!error)
set_cached_acl(inode, type, acl);
@@ -288,7 +288,7 @@ static int f2fs_acl_create_masq(struct posix_acl *acl, umode_t *mode_p)
/* assert(atomic_read(acl->a_refcount) == 1); */
FOREACH_ACL_ENTRY(pa, acl, pe) {
- switch(pa->e_tag) {
+ switch (pa->e_tag) {
case ACL_USER_OBJ:
pa->e_perm &= (mode >> 6) | ~S_IRWXO;
mode &= (pa->e_perm << 6) | ~S_IRWXU;
@@ -329,7 +329,7 @@ static int f2fs_acl_create_masq(struct posix_acl *acl, umode_t *mode_p)
}
*mode_p = (*mode_p & ~S_IRWXUGO) | mode;
- return not_equiv;
+ return not_equiv;
}
static int f2fs_acl_create(struct inode *dir, umode_t *mode,
@@ -399,12 +399,16 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl,
ipage);
posix_acl_release(default_acl);
+ } else {
+ inode->i_default_acl = NULL;
}
if (acl) {
if (!error)
error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl,
ipage);
posix_acl_release(acl);
+ } else {
+ inode->i_acl = NULL;
}
return error;
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
index 2c685185c24d..b96823c59b15 100644
--- a/fs/f2fs/acl.h
+++ b/fs/f2fs/acl.h
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/acl.h
*
@@ -7,10 +8,6 @@
* Portions of this code from linux/fs/ext2/acl.h
*
* Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef __F2FS_ACL_H__
#define __F2FS_ACL_H__
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 494253a253f8..7d7d5dae1500 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/checkpoint.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/bio.h>
@@ -24,10 +21,11 @@
#include <trace/events/f2fs.h>
static struct kmem_cache *ino_entry_slab;
-struct kmem_cache *inode_entry_slab;
+struct kmem_cache *f2fs_inode_entry_slab;
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
{
+ f2fs_build_fault_attr(sbi, 0, 0);
set_ckpt_flags(sbi, CP_ERROR_FLAG);
if (!end_io)
f2fs_flush_merged_writes(sbi);
@@ -36,7 +34,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
/*
* We guarantee no failure on the returned page.
*/
-struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
+struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
struct address_space *mapping = META_MAPPING(sbi);
struct page *page = NULL;
@@ -46,7 +44,7 @@ repeat:
cond_resched();
goto repeat;
}
- f2fs_wait_on_page_writeback(page, META, true);
+ f2fs_wait_on_page_writeback(page, META, true, true);
if (!PageUptodate(page))
SetPageUptodate(page);
return page;
@@ -68,8 +66,9 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
.old_blkaddr = index,
.new_blkaddr = index,
.encrypted_page = NULL,
- .is_meta = is_meta,
+ .is_por = !is_meta,
};
+ int err;
if (unlikely(!is_meta))
fio.op_flags &= ~REQ_META;
@@ -84,9 +83,10 @@ repeat:
fio.page = page;
- if (f2fs_submit_page_bio(&fio)) {
+ err = f2fs_submit_page_bio(&fio);
+ if (err) {
f2fs_put_page(page, 1);
- goto repeat;
+ return ERR_PTR(err);
}
lock_page(page);
@@ -95,29 +95,67 @@ repeat:
goto repeat;
}
- /*
- * if there is any IO error when accessing device, make our filesystem
- * readonly and make sure do not write checkpoint with non-uptodate
- * meta page.
- */
- if (unlikely(!PageUptodate(page)))
- f2fs_stop_checkpoint(sbi, false);
+ if (unlikely(!PageUptodate(page))) {
+ f2fs_put_page(page, 1);
+ return ERR_PTR(-EIO);
+ }
out:
return page;
}
-struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
+struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
return __get_meta_page(sbi, index, true);
}
+struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+ struct page *page;
+ int count = 0;
+
+retry:
+ page = __get_meta_page(sbi, index, true);
+ if (IS_ERR(page)) {
+ if (PTR_ERR(page) == -EIO &&
+ ++count <= DEFAULT_RETRY_IO_COUNT)
+ goto retry;
+ f2fs_stop_checkpoint(sbi, false);
+ }
+ return page;
+}
+
/* for POR only */
-struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
+struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
return __get_meta_page(sbi, index, false);
}
-bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
+static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
+ int type)
+{
+ struct seg_entry *se;
+ unsigned int segno, offset;
+ bool exist;
+
+ if (type != DATA_GENERIC_ENHANCE && type != DATA_GENERIC_ENHANCE_READ)
+ return true;
+
+ segno = GET_SEGNO(sbi, blkaddr);
+ offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
+ se = get_seg_entry(sbi, segno);
+
+ exist = f2fs_test_bit(offset, se->cur_valid_map);
+ if (!exist && type == DATA_GENERIC_ENHANCE) {
+ f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
+ blkaddr, exist);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ WARN_ON(1);
+ }
+ return exist;
+}
+
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
{
switch (type) {
case META_NAT:
@@ -141,6 +179,25 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
blkaddr < MAIN_BLKADDR(sbi)))
return false;
break;
+ case DATA_GENERIC:
+ case DATA_GENERIC_ENHANCE:
+ case DATA_GENERIC_ENHANCE_READ:
+ if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
+ blkaddr < MAIN_BLKADDR(sbi))) {
+ f2fs_warn(sbi, "access invalid blkaddr:%u",
+ blkaddr);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ WARN_ON(1);
+ return false;
+ } else {
+ return __is_bitmap_valid(sbi, blkaddr, type);
+ }
+ break;
+ case META_GENERIC:
+ if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
+ blkaddr >= MAIN_BLKADDR(sbi)))
+ return false;
+ break;
default:
BUG();
}
@@ -151,7 +208,7 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
/*
* Readahead CP/NAT/SIT/SSA pages
*/
-int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
+int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
int type, bool sync)
{
struct page *page;
@@ -164,7 +221,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
REQ_RAHEAD,
.encrypted_page = NULL,
.in_list = false,
- .is_meta = (type != META_POR),
+ .is_por = (type == META_POR),
};
struct blk_plug plug;
@@ -174,7 +231,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
blk_start_plug(&plug);
for (; nrpages-- > 0; blkno++) {
- if (!is_valid_blkaddr(sbi, blkno, type))
+ if (!f2fs_is_valid_blkaddr(sbi, blkno, type))
goto out;
switch (type) {
@@ -220,7 +277,7 @@ out:
return blkno - start;
}
-void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
+void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
{
struct page *page;
bool readahead = false;
@@ -231,7 +288,7 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
f2fs_put_page(page, 0);
if (readahead)
- ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true);
+ f2fs_ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true);
}
static int __f2fs_write_meta_page(struct page *page,
@@ -242,22 +299,18 @@ static int __f2fs_write_meta_page(struct page *page,
trace_f2fs_writepage(page, META);
- if (unlikely(f2fs_cp_error(sbi))) {
- dec_page_count(sbi, F2FS_DIRTY_META);
- unlock_page(page);
- return 0;
- }
+ if (unlikely(f2fs_cp_error(sbi)))
+ goto redirty_out;
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
goto redirty_out;
- write_meta_page(sbi, page, io_type);
+ f2fs_do_write_meta_page(sbi, page, io_type);
dec_page_count(sbi, F2FS_DIRTY_META);
if (wbc->for_reclaim)
- f2fs_submit_merged_write_cond(sbi, page->mapping->host,
- 0, page->index, META);
+ f2fs_submit_merged_write_cond(sbi, NULL, page, 0, META);
unlock_page(page);
@@ -287,8 +340,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
goto skip_write;
/* collect a number of dirty meta pages and write together */
- if (wbc->for_kupdate ||
- get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
+ if (wbc->sync_mode != WB_SYNC_ALL &&
+ get_pages(sbi, F2FS_DIRTY_META) <
+ nr_pages_to_skip(sbi, META))
goto skip_write;
/* if locked failed, cp will flush dirty pages instead */
@@ -297,7 +351,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
trace_f2fs_writepages(mapping->host, wbc, META);
diff = nr_pages_to_write(sbi, META, wbc);
- written = sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO);
+ written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO);
mutex_unlock(&sbi->cp_mutex);
wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
return 0;
@@ -308,13 +362,14 @@ skip_write:
return 0;
}
-long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
+long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
long nr_to_write, enum iostat_type io_type)
{
struct address_space *mapping = META_MAPPING(sbi);
- pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX;
+ pgoff_t index = 0, prev = ULONG_MAX;
struct pagevec pvec;
long nwritten = 0;
+ int nr_pages;
struct writeback_control wbc = {
.for_reclaim = 0,
};
@@ -324,13 +379,9 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
blk_start_plug(&plug);
- while (index <= end) {
- int i, nr_pages;
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_DIRTY,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
- if (unlikely(nr_pages == 0))
- break;
+ while ((nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+ PAGECACHE_TAG_DIRTY))) {
+ int i;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
@@ -354,9 +405,8 @@ continue_unlock:
goto continue_unlock;
}
- f2fs_wait_on_page_writeback(page, META, true);
+ f2fs_wait_on_page_writeback(page, META, true, true);
- BUG_ON(PageWriteback(page));
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
@@ -390,7 +440,7 @@ static int f2fs_set_meta_page_dirty(struct page *page)
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
- SetPagePrivate(page);
+ f2fs_set_page_private(page, 0);
f2fs_trace_pid(page);
return 1;
}
@@ -461,20 +511,20 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
spin_unlock(&im->ino_lock);
}
-void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
+void f2fs_add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
/* add new dirty ino entry into list */
__add_ino_entry(sbi, ino, 0, type);
}
-void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
+void f2fs_remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
/* remove dirty ino entry from list */
__remove_ino_entry(sbi, ino, type);
}
/* mode should be APPEND_INO or UPDATE_INO */
-bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
+bool f2fs_exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
{
struct inode_management *im = &sbi->im[mode];
struct ino_entry *e;
@@ -485,7 +535,7 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
return e ? true : false;
}
-void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
+void f2fs_release_ino_entry(struct f2fs_sb_info *sbi, bool all)
{
struct ino_entry *e, *tmp;
int i;
@@ -504,13 +554,13 @@ void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
}
}
-void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
+void f2fs_set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
unsigned int devidx, int type)
{
__add_ino_entry(sbi, ino, devidx, type);
}
-bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
+bool f2fs_is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
unsigned int devidx, int type)
{
struct inode_management *im = &sbi->im[type];
@@ -525,20 +575,19 @@ bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
return is_dirty;
}
-int acquire_orphan_inode(struct f2fs_sb_info *sbi)
+int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi)
{
struct inode_management *im = &sbi->im[ORPHAN_INO];
int err = 0;
spin_lock(&im->ino_lock);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ORPHAN)) {
spin_unlock(&im->ino_lock);
f2fs_show_injection_info(FAULT_ORPHAN);
return -ENOSPC;
}
-#endif
+
if (unlikely(im->ino_num >= sbi->max_orphans))
err = -ENOSPC;
else
@@ -548,7 +597,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
return err;
}
-void release_orphan_inode(struct f2fs_sb_info *sbi)
+void f2fs_release_orphan_inode(struct f2fs_sb_info *sbi)
{
struct inode_management *im = &sbi->im[ORPHAN_INO];
@@ -558,14 +607,14 @@ void release_orphan_inode(struct f2fs_sb_info *sbi)
spin_unlock(&im->ino_lock);
}
-void add_orphan_inode(struct inode *inode)
+void f2fs_add_orphan_inode(struct inode *inode)
{
/* add new orphan ino entry into list */
__add_ino_entry(F2FS_I_SB(inode), inode->i_ino, 0, ORPHAN_INO);
- update_inode_page(inode);
+ f2fs_update_inode_page(inode);
}
-void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
+void f2fs_remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
/* remove orphan entry from orphan list */
__remove_ino_entry(sbi, ino, ORPHAN_INO);
@@ -575,12 +624,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
struct inode *inode;
struct node_info ni;
- int err = acquire_orphan_inode(sbi);
-
- if (err)
- goto err_out;
-
- __add_ino_entry(sbi, ino, 0, ORPHAN_INO);
+ int err;
inode = f2fs_iget_retry(sbi->sb, ino);
if (IS_ERR(inode)) {
@@ -593,34 +637,35 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
}
err = dquot_initialize(inode);
- if (err)
+ if (err) {
+ iput(inode);
goto err_out;
+ }
- dquot_initialize(inode);
clear_nlink(inode);
/* truncate all the data during iput */
iput(inode);
- get_node_info(sbi, ino, &ni);
+ err = f2fs_get_node_info(sbi, ino, &ni);
+ if (err)
+ goto err_out;
/* ENOMEM was fully retried in f2fs_evict_inode. */
if (ni.blk_addr != NULL_ADDR) {
err = -EIO;
goto err_out;
}
- __remove_ino_entry(sbi, ino, ORPHAN_INO);
return 0;
err_out:
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: orphan failed (ino=%x), run fsck to fix.",
- __func__, ino);
+ f2fs_warn(sbi, "%s: orphan failed (ino=%x), run fsck to fix.",
+ __func__, ino);
return err;
}
-int recover_orphan_inodes(struct f2fs_sb_info *sbi)
+int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
{
block_t start_blk, orphan_blocks, i, j;
unsigned int s_flags = sbi->sb->s_flags;
@@ -632,8 +677,13 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
return 0;
+ if (bdev_read_only(sbi->sb->s_bdev)) {
+ f2fs_info(sbi, "write access unavailable, skipping orphan cleanup");
+ return 0;
+ }
+
if (s_flags & MS_RDONLY) {
- f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
+ f2fs_info(sbi, "orphan cleanup on readonly fs");
sbi->sb->s_flags &= ~MS_RDONLY;
}
@@ -641,19 +691,28 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
/* Needed for iput() to work correctly and not trash data */
sbi->sb->s_flags |= MS_ACTIVE;
- /* Turn on quotas so that they are updated correctly */
+ /*
+ * Turn on quotas which were not enabled for read-only mounts if
+ * filesystem has quota feature, so that they are updated correctly.
+ */
quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY);
#endif
start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
- ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
+ f2fs_ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
for (i = 0; i < orphan_blocks; i++) {
- struct page *page = get_meta_page(sbi, start_blk + i);
+ struct page *page;
struct f2fs_orphan_block *orphan_blk;
+ page = f2fs_get_meta_page(sbi, start_blk + i);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto out;
+ }
+
orphan_blk = (struct f2fs_orphan_block *)page_address(page);
for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
@@ -668,6 +727,8 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
/* clear Orphan Flag */
clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG);
out:
+ set_sbi_flag(sbi, SBI_IS_RECOVERED);
+
#ifdef CONFIG_QUOTA
/* Turn quotas off */
if (quota_enabled)
@@ -701,7 +762,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
/* loop for each orphan inode entry and write them in Jornal block */
list_for_each_entry(orphan, head, list) {
if (!page) {
- page = grab_meta_page(sbi, start_blk++);
+ page = f2fs_grab_meta_page(sbi, start_blk++);
orphan_blk =
(struct f2fs_orphan_block *)page_address(page);
memset(orphan_blk, 0, sizeof(*orphan_blk));
@@ -735,27 +796,46 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
}
}
+static __u32 f2fs_checkpoint_chksum(struct f2fs_sb_info *sbi,
+ struct f2fs_checkpoint *ckpt)
+{
+ unsigned int chksum_ofs = le32_to_cpu(ckpt->checksum_offset);
+ __u32 chksum;
+
+ chksum = f2fs_crc32(sbi, ckpt, chksum_ofs);
+ if (chksum_ofs < CP_CHKSUM_OFFSET) {
+ chksum_ofs += sizeof(chksum);
+ chksum = f2fs_chksum(sbi, chksum, (__u8 *)ckpt + chksum_ofs,
+ F2FS_BLKSIZE - chksum_ofs);
+ }
+ return chksum;
+}
+
static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
struct f2fs_checkpoint **cp_block, struct page **cp_page,
unsigned long long *version)
{
- unsigned long blk_size = sbi->blocksize;
size_t crc_offset = 0;
- __u32 crc = 0;
+ __u32 crc;
+
+ *cp_page = f2fs_get_meta_page(sbi, cp_addr);
+ if (IS_ERR(*cp_page))
+ return PTR_ERR(*cp_page);
- *cp_page = get_meta_page(sbi, cp_addr);
*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
- if (crc_offset > (blk_size - sizeof(__le32))) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "invalid crc_offset: %zu", crc_offset);
+ if (crc_offset < CP_MIN_CHKSUM_OFFSET ||
+ crc_offset > CP_CHKSUM_OFFSET) {
+ f2fs_put_page(*cp_page, 1);
+ f2fs_warn(sbi, "invalid crc_offset: %zu", crc_offset);
return -EINVAL;
}
- crc = cur_cp_crc(*cp_block);
- if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
- f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
+ crc = f2fs_checkpoint_chksum(sbi, *cp_block);
+ if (crc != cur_cp_crc(*cp_block)) {
+ f2fs_put_page(*cp_page, 1);
+ f2fs_warn(sbi, "invalid crc value");
return -EINVAL;
}
@@ -774,14 +854,21 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
err = get_checkpoint_version(sbi, cp_addr, &cp_block,
&cp_page_1, version);
if (err)
- goto invalid_cp1;
+ return NULL;
+
+ if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
+ sbi->blocks_per_seg) {
+ f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u",
+ le32_to_cpu(cp_block->cp_pack_total_block_count));
+ goto invalid_cp;
+ }
pre_version = *version;
cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
err = get_checkpoint_version(sbi, cp_addr, &cp_block,
&cp_page_2, version);
if (err)
- goto invalid_cp2;
+ goto invalid_cp;
cur_version = *version;
if (cur_version == pre_version) {
@@ -789,14 +876,13 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
f2fs_put_page(cp_page_2, 1);
return cp_page_1;
}
-invalid_cp2:
f2fs_put_page(cp_page_2, 1);
-invalid_cp1:
+invalid_cp:
f2fs_put_page(cp_page_1, 1);
return NULL;
}
-int get_valid_checkpoint(struct f2fs_sb_info *sbi)
+int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
{
struct f2fs_checkpoint *cp_block;
struct f2fs_super_block *fsb = sbi->raw_super;
@@ -807,8 +893,10 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
unsigned int cp_blks = 1 + __cp_payload(sbi);
block_t cp_blk_no;
int i;
+ int err;
- sbi->ckpt = f2fs_kzalloc(sbi, cp_blks * blk_size, GFP_KERNEL);
+ sbi->ckpt = f2fs_kzalloc(sbi, array_size(blk_size, cp_blks),
+ GFP_KERNEL);
if (!sbi->ckpt)
return -ENOMEM;
/*
@@ -833,21 +921,24 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
} else if (cp2) {
cur_page = cp2;
} else {
+ err = -EFSCORRUPTED;
goto fail_no_cp;
}
cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
memcpy(sbi->ckpt, cp_block, blk_size);
- /* Sanity checking of checkpoint */
- if (sanity_check_ckpt(sbi))
- goto free_fail_no_cp;
-
if (cur_page == cp1)
sbi->cur_cp_pack = 1;
else
sbi->cur_cp_pack = 2;
+ /* Sanity checking of checkpoint */
+ if (f2fs_sanity_check_ckpt(sbi)) {
+ err = -EFSCORRUPTED;
+ goto free_fail_no_cp;
+ }
+
if (cp_blks <= 1)
goto done;
@@ -859,7 +950,11 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
void *sit_bitmap_ptr;
unsigned char *ckpt = (unsigned char *)sbi->ckpt;
- cur_page = get_meta_page(sbi, cp_blk_no + i);
+ cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i);
+ if (IS_ERR(cur_page)) {
+ err = PTR_ERR(cur_page);
+ goto free_fail_no_cp;
+ }
sit_bitmap_ptr = page_address(cur_page);
memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
f2fs_put_page(cur_page, 1);
@@ -873,8 +968,8 @@ free_fail_no_cp:
f2fs_put_page(cp1, 1);
f2fs_put_page(cp2, 1);
fail_no_cp:
- kfree(sbi->ckpt);
- return -EINVAL;
+ kvfree(sbi->ckpt);
+ return err;
}
static void __add_dirty_inode(struct inode *inode, enum inode_type type)
@@ -904,7 +999,7 @@ static void __remove_dirty_inode(struct inode *inode, enum inode_type type)
stat_dec_dirty_inode(F2FS_I_SB(inode), type);
}
-void update_dirty_page(struct inode *inode, struct page *page)
+void f2fs_update_dirty_page(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
@@ -919,11 +1014,11 @@ void update_dirty_page(struct inode *inode, struct page *page)
inode_inc_dirty_pages(inode);
spin_unlock(&sbi->inode_lock[type]);
- SetPagePrivate(page);
+ f2fs_set_page_private(page, 0);
f2fs_trace_pid(page);
}
-void remove_dirty_inode(struct inode *inode)
+void f2fs_remove_dirty_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
@@ -940,7 +1035,7 @@ void remove_dirty_inode(struct inode *inode)
spin_unlock(&sbi->inode_lock[type]);
}
-int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
+int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
{
struct list_head *head;
struct inode *inode;
@@ -971,22 +1066,18 @@ retry:
if (inode) {
unsigned long cur_ino = inode->i_ino;
- if (is_dir)
- F2FS_I(inode)->cp_task = current;
+ F2FS_I(inode)->cp_task = current;
filemap_fdatawrite(inode->i_mapping);
- if (is_dir)
- F2FS_I(inode)->cp_task = NULL;
+ F2FS_I(inode)->cp_task = NULL;
iput(inode);
/* We need to give cpu to another writers. */
- if (ino == cur_ino) {
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ if (ino == cur_ino)
cond_resched();
- } else {
+ else
ino = cur_ino;
- }
} else {
/*
* We should submit bio, since it exists several
@@ -1023,7 +1114,7 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
/* it's on eviction */
if (is_inode_flag_set(inode, FI_DIRTY_INODE))
- update_inode_page(inode);
+ f2fs_update_inode_page(inode);
iput(inode);
}
}
@@ -1043,6 +1134,28 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi)
ckpt->next_free_nid = cpu_to_le32(last_nid);
}
+static bool __need_flush_quota(struct f2fs_sb_info *sbi)
+{
+ bool ret = false;
+
+ if (!is_journalled_quota(sbi))
+ return false;
+
+ down_write(&sbi->quota_sem);
+ if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) {
+ ret = false;
+ } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) {
+ ret = false;
+ } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) {
+ clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
+ ret = true;
+ } else if (get_pages(sbi, F2FS_DIRTY_QDATA)) {
+ ret = true;
+ }
+ up_write(&sbi->quota_sem);
+ return ret;
+}
+
/*
* Freeze all the FS-operations for checkpoint.
*/
@@ -1054,20 +1167,40 @@ static int block_operations(struct f2fs_sb_info *sbi)
.for_reclaim = 0,
};
struct blk_plug plug;
- int err = 0;
+ int err = 0, cnt = 0;
blk_start_plug(&plug);
-retry_flush_dents:
+retry_flush_quotas:
f2fs_lock_all(sbi);
+ if (__need_flush_quota(sbi)) {
+ int locked;
+
+ if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
+ set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
+ goto retry_flush_dents;
+ }
+ f2fs_unlock_all(sbi);
+
+ /* only failed during mount/umount/freeze/quotactl */
+ locked = down_read_trylock(&sbi->sb->s_umount);
+ f2fs_quota_sync(sbi->sb, -1);
+ if (locked)
+ up_read(&sbi->sb->s_umount);
+ cond_resched();
+ goto retry_flush_quotas;
+ }
+
+retry_flush_dents:
/* write all the dirty dentry pages */
if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
f2fs_unlock_all(sbi);
- err = sync_dirty_inodes(sbi, DIR_INODE);
+ err = f2fs_sync_dirty_inodes(sbi, DIR_INODE);
if (err)
goto out;
cond_resched();
- goto retry_flush_dents;
+ goto retry_flush_quotas;
}
/*
@@ -1083,7 +1216,7 @@ retry_flush_dents:
if (err)
goto out;
cond_resched();
- goto retry_flush_dents;
+ goto retry_flush_quotas;
}
retry_flush_nodes:
@@ -1091,7 +1224,9 @@ retry_flush_nodes:
if (get_pages(sbi, F2FS_DIRTY_NODES)) {
up_write(&sbi->node_write);
- err = sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO);
+ atomic_inc(&sbi->wb_sync_req[NODE]);
+ err = f2fs_sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO);
+ atomic_dec(&sbi->wb_sync_req[NODE]);
if (err) {
up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
@@ -1118,7 +1253,7 @@ static void unblock_operations(struct f2fs_sb_info *sbi)
f2fs_unlock_all(sbi);
}
-static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
+void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
{
DEFINE_WAIT(wait);
@@ -1128,6 +1263,9 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
if (!get_pages(sbi, F2FS_WB_CP_DATA))
break;
+ if (unlikely(f2fs_cp_error(sbi)))
+ break;
+
io_schedule_timeout(5*HZ);
}
finish_wait(&sbi->cp_wait, &wait);
@@ -1166,9 +1304,28 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
else
__clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
- if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+ if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) ||
+ is_sbi_flag_set(sbi, SBI_IS_RESIZEFS))
__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
+ if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
+ __set_ckpt_flags(ckpt, CP_DISABLED_FLAG);
+ else
+ __clear_ckpt_flags(ckpt, CP_DISABLED_FLAG);
+
+ if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK))
+ __set_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG);
+ else
+ __clear_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG);
+
+ if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
+ __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
+ else
+ __clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
+
+ if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
+ __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
+
/* set this flag to activate crc|cp_ver for recovery */
__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
__clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
@@ -1185,24 +1342,28 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi,
/*
* pagevec_lookup_tag and lock_page again will take
- * some extra time. Therefore, update_meta_pages and
- * sync_meta_pages are combined in this function.
+ * some extra time. Therefore, f2fs_update_meta_pages and
+ * f2fs_sync_meta_pages are combined in this function.
*/
- struct page *page = grab_meta_page(sbi, blk_addr);
+ struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
int err;
+ f2fs_wait_on_page_writeback(page, META, true, true);
+
memcpy(page_address(page), src, PAGE_SIZE);
- set_page_dirty(page);
- f2fs_wait_on_page_writeback(page, META, true);
- f2fs_bug_on(sbi, PageWriteback(page));
+ set_page_dirty(page);
if (unlikely(!clear_page_dirty_for_io(page)))
f2fs_bug_on(sbi, 1);
/* writeout cp pack 2 page */
err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO);
- f2fs_bug_on(sbi, err);
+ if (unlikely(err && f2fs_cp_error(sbi))) {
+ f2fs_put_page(page, 1);
+ return;
+ }
+ f2fs_bug_on(sbi, err);
f2fs_put_page(page, 0);
/* submit checkpoint (with barrier if NOBARRIER is not set) */
@@ -1225,17 +1386,15 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
int err;
/* Flush all the NAT/SIT pages */
- while (get_pages(sbi, F2FS_DIRTY_META)) {
- sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
- if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
- }
+ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
+ f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
+ !f2fs_cp_error(sbi));
/*
* modify checkpoint
* version number is already updated
*/
- ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
+ ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi, true));
ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
ckpt->cur_node_segno[i] =
@@ -1255,7 +1414,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
}
/* 2 cp + n data seg summary + orphan inode blocks */
- data_sum_blocks = npages_for_summary_flush(sbi, false);
+ data_sum_blocks = f2fs_npages_for_summary_flush(sbi, false);
spin_lock_irqsave(&sbi->cp_lock, flags);
if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
__set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
@@ -1283,7 +1442,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
- crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset));
+ crc32 = f2fs_checkpoint_chksum(sbi, ckpt);
*((__le32 *)((unsigned char *)ckpt +
le32_to_cpu(ckpt->checksum_offset)))
= cpu_to_le32(crc32);
@@ -1300,22 +1459,15 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
for (i = 0; i < nm_i->nat_bits_blocks; i++)
- update_meta_page(sbi, nm_i->nat_bits +
+ f2fs_update_meta_page(sbi, nm_i->nat_bits +
(i << F2FS_BLKSIZE_BITS), blk + i);
-
- /* Flush all the NAT BITS pages */
- while (get_pages(sbi, F2FS_DIRTY_META)) {
- sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
- if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
- }
}
/* write out checkpoint buffer at block 0 */
- update_meta_page(sbi, ckpt, start_blk++);
+ f2fs_update_meta_page(sbi, ckpt, start_blk++);
for (i = 1; i < 1 + cp_payload_blks; i++)
- update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE,
+ f2fs_update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE,
start_blk++);
if (orphan_num) {
@@ -1323,7 +1475,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
start_blk += orphan_blocks;
}
- write_data_summaries(sbi, start_blk);
+ f2fs_write_data_summaries(sbi, start_blk);
start_blk += data_sum_blocks;
/* Record write statistics in the hot node summary */
@@ -1334,7 +1486,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written);
if (__remain_node_summaries(cpc->reason)) {
- write_node_summaries(sbi, start_blk);
+ f2fs_write_node_summaries(sbi, start_blk);
start_blk += NR_CURSEG_NODE_TYPE;
}
@@ -1343,13 +1495,12 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
percpu_counter_set(&sbi->alloc_valid_block_count, 0);
/* Here, we have one bio having CP pack except cp pack 2 page */
- sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
+ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
+ f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
+ !f2fs_cp_error(sbi));
/* wait for previous submitted meta pages writeback */
- wait_on_all_pages_writeback(sbi);
-
- if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
+ f2fs_wait_on_all_pages_writeback(sbi);
/* flush all device cache */
err = f2fs_flush_device_cache(sbi);
@@ -1358,15 +1509,28 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* barrier and flush checkpoint cp pack 2 page if it can */
commit_checkpoint(sbi, ckpt, start_blk);
- wait_on_all_pages_writeback(sbi);
+ f2fs_wait_on_all_pages_writeback(sbi);
- release_ino_entry(sbi, false);
+ /*
+ * invalidate intermediate page cache borrowed from meta inode
+ * which are used for migration of encrypted inode's blocks.
+ */
+ if (f2fs_sb_has_encrypt(sbi))
+ invalidate_mapping_pages(META_MAPPING(sbi),
+ MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
- if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
+ f2fs_release_ino_entry(sbi, false);
+
+ f2fs_reset_fsync_node_info(sbi);
clear_sbi_flag(sbi, SBI_IS_DIRTY);
clear_sbi_flag(sbi, SBI_NEED_CP);
+ clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
+
+ spin_lock(&sbi->stat_lock);
+ sbi->unusable_block_count = 0;
+ spin_unlock(&sbi->stat_lock);
+
__set_cp_next_pack(sbi);
/*
@@ -1379,18 +1543,26 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS));
- return 0;
+ return unlikely(f2fs_cp_error(sbi)) ? -EIO : 0;
}
/*
* We guarantee that this checkpoint procedure will not fail.
*/
-int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
unsigned long long ckpt_ver;
int err = 0;
+ if (f2fs_readonly(sbi->sb) || f2fs_hw_is_readonly(sbi))
+ return -EROFS;
+
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ if (cpc->reason != CP_PAUSE)
+ return 0;
+ f2fs_warn(sbi, "Start checkpoint disabled!");
+ }
mutex_lock(&sbi->cp_mutex);
if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
@@ -1401,10 +1573,6 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
err = -EIO;
goto out;
}
- if (f2fs_readonly(sbi->sb)) {
- err = -EROFS;
- goto out;
- }
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
@@ -1418,7 +1586,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* this is the case of multiple fstrims without any changes */
if (cpc->reason & CP_DISCARD) {
- if (!exist_trim_candidates(sbi, cpc)) {
+ if (!f2fs_exist_trim_candidates(sbi, cpc)) {
unblock_operations(sbi);
goto out;
}
@@ -1426,8 +1594,8 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (NM_I(sbi)->dirty_nat_cnt == 0 &&
SIT_I(sbi)->dirty_sentries == 0 &&
prefree_segments(sbi) == 0) {
- flush_sit_entries(sbi, cpc);
- clear_prefree_segments(sbi, cpc);
+ f2fs_flush_sit_entries(sbi, cpc);
+ f2fs_clear_prefree_segments(sbi, cpc);
unblock_operations(sbi);
goto out;
}
@@ -1442,22 +1610,24 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
/* write cached NAT/SIT entries to NAT/SIT area */
- flush_nat_entries(sbi, cpc);
- flush_sit_entries(sbi, cpc);
+ err = f2fs_flush_nat_entries(sbi, cpc);
+ if (err)
+ goto stop;
+
+ f2fs_flush_sit_entries(sbi, cpc);
/* unlock all the fs_lock[] in do_checkpoint() */
err = do_checkpoint(sbi, cpc);
if (err)
- release_discard_addrs(sbi);
+ f2fs_release_discard_addrs(sbi);
else
- clear_prefree_segments(sbi, cpc);
-
+ f2fs_clear_prefree_segments(sbi, cpc);
+stop:
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
if (cpc->reason & CP_RECOVERY)
- f2fs_msg(sbi->sb, KERN_NOTICE,
- "checkpoint: version = %llx", ckpt_ver);
+ f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver);
/* do checkpoint periodically */
f2fs_update_time(sbi, CP_TIME);
@@ -1467,7 +1637,7 @@ out:
return err;
}
-void init_ino_entry_info(struct f2fs_sb_info *sbi)
+void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
{
int i;
@@ -1485,23 +1655,23 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
F2FS_ORPHANS_PER_BLOCK;
}
-int __init create_checkpoint_caches(void)
+int __init f2fs_create_checkpoint_caches(void)
{
ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
sizeof(struct ino_entry));
if (!ino_entry_slab)
return -ENOMEM;
- inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry",
+ f2fs_inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry",
sizeof(struct inode_entry));
- if (!inode_entry_slab) {
+ if (!f2fs_inode_entry_slab) {
kmem_cache_destroy(ino_entry_slab);
return -ENOMEM;
}
return 0;
}
-void destroy_checkpoint_caches(void)
+void f2fs_destroy_checkpoint_caches(void)
{
kmem_cache_destroy(ino_entry_slab);
- kmem_cache_destroy(inode_entry_slab);
+ kmem_cache_destroy(f2fs_inode_entry_slab);
}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 3d846b027fa1..568aabab15a4 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/data.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
@@ -17,6 +14,7 @@
#include <linux/pagevec.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
+#include <linux/swap.h>
#include <linux/prefetch.h>
#include <linux/uio.h>
#include <linux/cleancache.h>
@@ -48,11 +46,30 @@ static bool __is_cp_guaranteed(struct page *page)
if (inode->i_ino == F2FS_META_INO(sbi) ||
inode->i_ino == F2FS_NODE_INO(sbi) ||
S_ISDIR(inode->i_mode) ||
+ (S_ISREG(inode->i_mode) &&
+ (f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
is_cold_data(page))
return true;
return false;
}
+static enum count_type __read_io_type(struct page *page)
+{
+ struct address_space *mapping = page_file_mapping(page);
+
+ if (mapping) {
+ struct inode *inode = mapping->host;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+ if (inode->i_ino == F2FS_META_INO(sbi))
+ return F2FS_RD_META;
+
+ if (inode->i_ino == F2FS_NODE_INO(sbi))
+ return F2FS_RD_NODE;
+ }
+ return F2FS_RD_DATA;
+}
+
/* postprocessing steps for read bios */
enum bio_post_read_step {
STEP_INITIAL = 0,
@@ -78,10 +95,12 @@ static void __read_end_io(struct bio *bio)
/* PG_error was set if any post_read step failed */
if (bio->bi_error || PageError(page)) {
ClearPageUptodate(page);
- SetPageError(page);
+ /* will re-read again later */
+ ClearPageError(page);
} else {
SetPageUptodate(page);
}
+ dec_page_count(F2FS_P_SB(page), __read_io_type(page));
unlock_page(page);
}
if (bio->bi_private)
@@ -124,12 +143,12 @@ static bool f2fs_bio_post_read_required(struct bio *bio)
static void f2fs_read_end_io(struct bio *bio)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
- f2fs_show_injection_info(FAULT_IO);
+ struct page *first_page = bio->bi_io_vec[0].bv_page;
+
+ if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_READ_IO)) {
+ f2fs_show_injection_info(FAULT_READ_IO);
bio->bi_error = -EIO;
}
-#endif
if (f2fs_bio_post_read_required(bio)) {
struct bio_post_read_ctx *ctx = bio->bi_private;
@@ -139,6 +158,13 @@ static void f2fs_read_end_io(struct bio *bio)
return;
}
+ if (first_page != NULL &&
+ __read_io_type(first_page) == F2FS_RD_DATA) {
+ trace_android_fs_dataread_end(first_page->mapping->host,
+ page_offset(first_page),
+ bio->bi_iter.bi_size);
+ }
+
__read_end_io(bio);
}
@@ -148,6 +174,11 @@ static void f2fs_write_end_io(struct bio *bio)
struct bio_vec *bvec;
int i;
+ if (time_to_inject(sbi, FAULT_WRITE_IO)) {
+ f2fs_show_injection_info(FAULT_WRITE_IO);
+ bio->bi_error = -EIO;
+ }
+
bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;
enum count_type type = WB_DATA_TYPE(page);
@@ -175,6 +206,8 @@ static void f2fs_write_end_io(struct bio *bio)
page->index != nid_of_node(page));
dec_page_count(sbi, type);
+ if (f2fs_in_warm_node_list(sbi, page))
+ f2fs_del_fsync_node_entry(sbi, page);
clear_cold_data(page);
end_page_writeback(page);
}
@@ -194,12 +227,14 @@ struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
struct block_device *bdev = sbi->sb->s_bdev;
int i;
- for (i = 0; i < sbi->s_ndevs; i++) {
- if (FDEV(i).start_blk <= blk_addr &&
- FDEV(i).end_blk >= blk_addr) {
- blk_addr -= FDEV(i).start_blk;
- bdev = FDEV(i).bdev;
- break;
+ if (f2fs_is_multi_device(sbi)) {
+ for (i = 0; i < sbi->s_ndevs; i++) {
+ if (FDEV(i).start_blk <= blk_addr &&
+ FDEV(i).end_blk >= blk_addr) {
+ blk_addr -= FDEV(i).start_blk;
+ bdev = FDEV(i).bdev;
+ break;
+ }
}
}
if (bio) {
@@ -213,6 +248,9 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
{
int i;
+ if (!f2fs_is_multi_device(sbi))
+ return 0;
+
for (i = 0; i < sbi->s_ndevs; i++)
if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
return i;
@@ -244,7 +282,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
} else {
bio->bi_end_io = f2fs_write_end_io;
bio->bi_private = sbi;
- bio->bi_write_hint = io_type_to_rw_hint(sbi, type, temp);
+ bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, type, temp);
}
if (wbc)
wbc_init_bio(wbc, bio);
@@ -261,7 +299,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
if (type != DATA && type != NODE)
goto submit_io;
- if (f2fs_sb_has_blkzoned(sbi->sb) && current->plug)
+ if (test_opt(sbi, LFS) && current->plug)
blk_finish_plug(current->plug);
start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
@@ -274,9 +312,10 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
for (; start < F2FS_IO_SIZE(sbi); start++) {
struct page *page =
mempool_alloc(sbi->write_io_dummy,
- GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL);
+ GFP_NOIO | __GFP_NOFAIL);
f2fs_bug_on(sbi, !page);
+ zero_user_segment(page, 0, PAGE_SIZE);
SetPagePrivate(page);
set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
lock_page(page);
@@ -298,6 +337,32 @@ submit_io:
submit_bio(bio_op(bio), bio);
}
+static void __f2fs_submit_read_bio(struct f2fs_sb_info *sbi,
+ struct bio *bio, enum page_type type)
+{
+ if (trace_android_fs_dataread_start_enabled() && (type == DATA)) {
+ struct page *first_page = bio->bi_io_vec[0].bv_page;
+
+ if (first_page != NULL &&
+ __read_io_type(first_page) == F2FS_RD_DATA) {
+ char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+ path = android_fstrace_get_pathname(pathbuf,
+ MAX_TRACE_PATHBUF_LEN,
+ first_page->mapping->host);
+
+ trace_android_fs_dataread_start(
+ first_page->mapping->host,
+ page_offset(first_page),
+ bio->bi_iter.bi_size,
+ current->pid,
+ path,
+ current->comm);
+ }
+ }
+ __submit_bio(sbi, bio, type);
+}
+
static void __submit_merged_bio(struct f2fs_bio_info *io)
{
struct f2fs_io_info *fio = &io->fio;
@@ -316,31 +381,30 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
io->bio = NULL;
}
-static bool __has_merged_page(struct f2fs_bio_info *io,
- struct inode *inode, nid_t ino, pgoff_t idx)
+static bool __has_merged_page(struct bio *bio, struct inode *inode,
+ struct page *page, nid_t ino)
{
struct bio_vec *bvec;
struct page *target;
int i;
- if (!io->bio)
+ if (!bio)
return false;
- if (!inode && !ino)
+ if (!inode && !page && !ino)
return true;
- bio_for_each_segment_all(bvec, io->bio, i) {
+ bio_for_each_segment_all(bvec, bio, i) {
if (bvec->bv_page->mapping)
target = bvec->bv_page;
else
target = fscrypt_control_page(bvec->bv_page);
- if (idx != target->index)
- continue;
-
if (inode && inode == target->mapping->host)
return true;
+ if (page && page == target)
+ return true;
if (ino && ino == ino_of_node(target))
return true;
}
@@ -348,28 +412,6 @@ static bool __has_merged_page(struct f2fs_bio_info *io,
return false;
}
-static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
- nid_t ino, pgoff_t idx, enum page_type type)
-{
- enum page_type btype = PAGE_TYPE_OF_BIO(type);
- enum temp_type temp;
- struct f2fs_bio_info *io;
- bool ret = false;
-
- for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
- io = sbi->write_io[btype] + temp;
-
- down_read(&io->io_rwsem);
- ret = __has_merged_page(io, inode, ino, idx);
- up_read(&io->io_rwsem);
-
- /* TODO: use HOT temp only for meta pages now. */
- if (ret || btype == META)
- break;
- }
- return ret;
-}
-
static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
enum page_type type, enum temp_type temp)
{
@@ -391,17 +433,23 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
}
static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
- struct inode *inode, nid_t ino, pgoff_t idx,
- enum page_type type, bool force)
+ struct inode *inode, struct page *page,
+ nid_t ino, enum page_type type, bool force)
{
enum temp_type temp;
-
- if (!force && !has_merged_page(sbi, inode, ino, idx, type))
- return;
+ bool ret = true;
for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
+ if (!force) {
+ enum page_type btype = PAGE_TYPE_OF_BIO(type);
+ struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
- __f2fs_submit_merged_write(sbi, type, temp);
+ down_read(&io->io_rwsem);
+ ret = __has_merged_page(io->bio, inode, page, ino);
+ up_read(&io->io_rwsem);
+ }
+ if (ret)
+ __f2fs_submit_merged_write(sbi, type, temp);
/* TODO: use HOT temp only for meta pages now. */
if (type >= META)
@@ -411,14 +459,14 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
{
- __submit_merged_write_cond(sbi, NULL, 0, 0, type, true);
+ __submit_merged_write_cond(sbi, NULL, NULL, 0, type, true);
}
void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
- struct inode *inode, nid_t ino, pgoff_t idx,
- enum page_type type)
+ struct inode *inode, struct page *page,
+ nid_t ino, enum page_type type)
{
- __submit_merged_write_cond(sbi, inode, ino, idx, type, false);
+ __submit_merged_write_cond(sbi, inode, page, ino, type, false);
}
void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
@@ -438,7 +486,11 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
struct page *page = fio->encrypted_page ?
fio->encrypted_page : fio->page;
- verify_block_addr(fio, fio->new_blkaddr);
+ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
+ fio->is_por ? META_POR : (__is_meta_io(fio) ?
+ META_GENERIC : DATA_GENERIC_ENHANCE)))
+ return -EFSCORRUPTED;
+
trace_f2fs_submit_page_bio(page, fio);
f2fs_trace_ios(fio, 0);
@@ -450,22 +502,80 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
bio_put(bio);
return -EFAULT;
}
+
+ if (fio->io_wbc && !is_read_io(fio->op))
+ wbc_account_io(fio->io_wbc, page, PAGE_SIZE);
+
bio_set_op_attrs(bio, fio->op, fio->op_flags);
- __submit_bio(fio->sbi, bio, fio->type);
+ inc_page_count(fio->sbi, is_read_io(fio->op) ?
+ __read_io_type(page): WB_DATA_TYPE(fio->page));
- if (!is_read_io(fio->op))
- inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page));
+ __f2fs_submit_read_bio(fio->sbi, bio, fio->type);
return 0;
}
-int f2fs_submit_page_write(struct f2fs_io_info *fio)
+int f2fs_merge_page_bio(struct f2fs_io_info *fio)
+{
+ struct bio *bio = *fio->bio;
+ struct page *page = fio->encrypted_page ?
+ fio->encrypted_page : fio->page;
+
+ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
+ __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
+ return -EFSCORRUPTED;
+
+ trace_f2fs_submit_page_bio(page, fio);
+ f2fs_trace_ios(fio, 0);
+
+ if (bio && (*fio->last_block + 1 != fio->new_blkaddr ||
+ !__same_bdev(fio->sbi, fio->new_blkaddr, bio))) {
+ __submit_bio(fio->sbi, bio, fio->type);
+ bio = NULL;
+ }
+alloc_new:
+ if (!bio) {
+ bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc,
+ BIO_MAX_PAGES, false, fio->type, fio->temp);
+ bio_set_op_attrs(bio, fio->op, fio->op_flags);
+ }
+
+ if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+ __submit_bio(fio->sbi, bio, fio->type);
+ bio = NULL;
+ goto alloc_new;
+ }
+
+ if (fio->io_wbc)
+ wbc_account_io(fio->io_wbc, page, PAGE_SIZE);
+
+ inc_page_count(fio->sbi, WB_DATA_TYPE(page));
+
+ *fio->last_block = fio->new_blkaddr;
+ *fio->bio = bio;
+
+ return 0;
+}
+
+static void f2fs_submit_ipu_bio(struct f2fs_sb_info *sbi, struct bio **bio,
+ struct page *page)
+{
+ if (!bio)
+ return;
+
+ if (!__has_merged_page(*bio, NULL, page, 0))
+ return;
+
+ __submit_bio(sbi, *bio, DATA);
+ *bio = NULL;
+}
+
+void f2fs_submit_page_write(struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = fio->sbi;
enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
struct page *bio_page;
- int err = 0;
f2fs_bug_on(sbi, is_read_io(fio->op));
@@ -475,7 +585,7 @@ next:
spin_lock(&io->io_lock);
if (list_empty(&io->io_list)) {
spin_unlock(&io->io_lock);
- goto out_fail;
+ goto out;
}
fio = list_first_entry(&io->io_list,
struct f2fs_io_info, list);
@@ -483,9 +593,7 @@ next:
spin_unlock(&io->io_lock);
}
- if (fio->old_blkaddr != NEW_ADDR)
- verify_block_addr(fio, fio->old_blkaddr);
- verify_block_addr(fio, fio->new_blkaddr);
+ verify_fio_blkaddr(fio);
bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
@@ -502,9 +610,9 @@ alloc_new:
if (io->bio == NULL) {
if ((fio->type == DATA || fio->type == NODE) &&
fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
- err = -EAGAIN;
dec_page_count(sbi, WB_DATA_TYPE(bio_page));
- goto out_fail;
+ fio->retry = true;
+ goto skip;
}
io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc,
BIO_MAX_PAGES, false,
@@ -524,16 +632,18 @@ alloc_new:
f2fs_trace_ios(fio, 0);
trace_f2fs_submit_page_write(fio->page, fio);
-
+skip:
if (fio->in_list)
goto next;
-out_fail:
+out:
+ if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
+ f2fs_is_checkpoint_ready(sbi))
+ __submit_merged_bio(io);
up_write(&io->io_rwsem);
- return err;
}
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
- unsigned nr_pages)
+ unsigned nr_pages, unsigned op_flag)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
@@ -545,7 +655,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
return ERR_PTR(-ENOMEM);
f2fs_target_device(sbi, blkaddr, bio);
bio->bi_end_io = f2fs_read_end_io;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
if (f2fs_encrypted_file(inode))
post_read_steps |= 1 << STEP_DECRYPT;
@@ -558,9 +668,6 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
ctx->bio = bio;
ctx->enabled_steps = post_read_steps;
bio->bi_private = ctx;
-
- /* wait the page to be moved by cleaning */
- f2fs_wait_on_block_writeback(sbi, blkaddr);
}
return bio;
@@ -570,16 +677,23 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
static int f2fs_submit_page_read(struct inode *inode, struct page *page,
block_t blkaddr)
{
- struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct bio *bio;
+ bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);
if (IS_ERR(bio))
return PTR_ERR(bio);
+ /* wait for GCed page writeback via META_MAPPING */
+ f2fs_wait_on_block_writeback(inode, blkaddr);
+
if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
bio_put(bio);
return -EFAULT;
}
- __submit_bio(F2FS_I_SB(inode), bio, DATA);
+ ClearPageError(page);
+ inc_page_count(sbi, F2FS_RD_DATA);
+ __f2fs_submit_read_bio(sbi, bio, DATA);
return 0;
}
@@ -603,9 +717,9 @@ static void __set_data_blkaddr(struct dnode_of_data *dn)
* ->node_page
* update block addresses in the node page
*/
-void set_data_blkaddr(struct dnode_of_data *dn)
+void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
{
- f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
+ f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
__set_data_blkaddr(dn);
if (set_page_dirty(dn->node_page))
dn->node_changed = true;
@@ -614,12 +728,12 @@ void set_data_blkaddr(struct dnode_of_data *dn)
void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
{
dn->data_blkaddr = blkaddr;
- set_data_blkaddr(dn);
+ f2fs_set_data_blkaddr(dn);
f2fs_update_extent_cache(dn);
}
/* dn->ofs_in_node will be returned with up-to-date last block pointer */
-int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
+int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
int err;
@@ -635,7 +749,7 @@ int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
dn->ofs_in_node, count);
- f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
+ f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
for (; count > 0; dn->ofs_in_node++) {
block_t blkaddr = datablock_addr(dn->inode,
@@ -653,12 +767,12 @@ int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
}
/* Should keep dn->ofs_in_node unchanged */
-int reserve_new_block(struct dnode_of_data *dn)
+int f2fs_reserve_new_block(struct dnode_of_data *dn)
{
unsigned int ofs_in_node = dn->ofs_in_node;
int ret;
- ret = reserve_new_blocks(dn, 1);
+ ret = f2fs_reserve_new_blocks(dn, 1);
dn->ofs_in_node = ofs_in_node;
return ret;
}
@@ -668,12 +782,12 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
bool need_put = dn->inode_page ? false : true;
int err;
- err = get_dnode_of_data(dn, index, ALLOC_NODE);
+ err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
if (err)
return err;
if (dn->data_blkaddr == NULL_ADDR)
- err = reserve_new_block(dn);
+ err = f2fs_reserve_new_block(dn);
if (err || need_put)
f2fs_put_dnode(dn);
return err;
@@ -692,7 +806,7 @@ int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
return f2fs_reserve_block(dn, index);
}
-struct page *get_read_data_page(struct inode *inode, pgoff_t index,
+struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
int op_flags, bool for_write)
{
struct address_space *mapping = inode->i_mapping;
@@ -707,11 +821,16 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index,
if (f2fs_lookup_extent_cache(inode, index, &ei)) {
dn.data_blkaddr = ei.blk + index - ei.fofs;
+ if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
+ DATA_GENERIC_ENHANCE_READ)) {
+ err = -EFSCORRUPTED;
+ goto put_err;
+ }
goto got_it;
}
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
+ err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
if (err)
goto put_err;
f2fs_put_dnode(&dn);
@@ -720,6 +839,13 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index,
err = -ENOENT;
goto put_err;
}
+ if (dn.data_blkaddr != NEW_ADDR &&
+ !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
+ dn.data_blkaddr,
+ DATA_GENERIC_ENHANCE)) {
+ err = -EFSCORRUPTED;
+ goto put_err;
+ }
got_it:
if (PageUptodate(page)) {
unlock_page(page);
@@ -730,7 +856,8 @@ got_it:
* A new dentry page is allocated but not able to be written, since its
* new inode page couldn't be allocated due to -ENOSPC.
* In such the case, its blkaddr can be remained as NEW_ADDR.
- * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
+ * see, f2fs_add_link -> f2fs_get_new_data_page ->
+ * f2fs_init_inode_metadata.
*/
if (dn.data_blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_SIZE);
@@ -750,7 +877,7 @@ put_err:
return ERR_PTR(err);
}
-struct page *find_data_page(struct inode *inode, pgoff_t index)
+struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index)
{
struct address_space *mapping = inode->i_mapping;
struct page *page;
@@ -760,7 +887,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index)
return page;
f2fs_put_page(page, 0);
- page = get_read_data_page(inode, index, REQ_SYNC, false);
+ page = f2fs_get_read_data_page(inode, index, REQ_SYNC, false);
if (IS_ERR(page))
return page;
@@ -780,13 +907,13 @@ struct page *find_data_page(struct inode *inode, pgoff_t index)
* Because, the callers, functions in dir.c and GC, should be able to know
* whether this page exists or not.
*/
-struct page *get_lock_data_page(struct inode *inode, pgoff_t index,
+struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
bool for_write)
{
struct address_space *mapping = inode->i_mapping;
struct page *page;
repeat:
- page = get_read_data_page(inode, index, REQ_SYNC, for_write);
+ page = f2fs_get_read_data_page(inode, index, REQ_SYNC, for_write);
if (IS_ERR(page))
return page;
@@ -812,7 +939,7 @@ repeat:
* Note that, ipage is set only by make_empty_dir, and if any error occur,
* ipage should be released by this function.
*/
-struct page *get_new_data_page(struct inode *inode,
+struct page *f2fs_get_new_data_page(struct inode *inode,
struct page *ipage, pgoff_t index, bool new_i_size)
{
struct address_space *mapping = inode->i_mapping;
@@ -851,7 +978,7 @@ struct page *get_new_data_page(struct inode *inode,
/* if ipage exists, blkaddr should be NEW_ADDR */
f2fs_bug_on(F2FS_I_SB(inode), ipage);
- page = get_lock_data_page(inode, index, true);
+ page = f2fs_get_lock_data_page(inode, index, true);
if (IS_ERR(page))
return page;
}
@@ -867,35 +994,39 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct f2fs_summary sum;
struct node_info ni;
- pgoff_t fofs;
+ block_t old_blkaddr;
blkcnt_t count = 1;
int err;
if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
return -EPERM;
+ err = f2fs_get_node_info(sbi, dn->nid, &ni);
+ if (err)
+ return err;
+
dn->data_blkaddr = datablock_addr(dn->inode,
dn->node_page, dn->ofs_in_node);
- if (dn->data_blkaddr == NEW_ADDR)
+ if (dn->data_blkaddr != NULL_ADDR)
goto alloc;
if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
return err;
alloc:
- get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
-
- allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
+ old_blkaddr = dn->data_blkaddr;
+ f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
&sum, seg_type, NULL, false);
- set_data_blkaddr(dn);
-
- /* update i_size */
- fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
- dn->ofs_in_node;
- if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT))
- f2fs_i_size_write(dn->inode,
- ((loff_t)(fofs + 1) << PAGE_SHIFT));
+ if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
+ invalidate_mapping_pages(META_MAPPING(sbi),
+ old_blkaddr, old_blkaddr);
+ f2fs_set_data_blkaddr(dn);
+
+ /*
+ * i_size will be updated by direct_IO. Otherwise, we'll get stale
+ * data from unwritten block via dio_read.
+ */
return 0;
}
@@ -914,6 +1045,9 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
return err;
}
+ if (direct_io && allow_outplace_dio(inode, iocb, from))
+ return 0;
+
if (is_inode_flag_set(inode, FI_NO_PREALLOC))
return 0;
@@ -927,10 +1061,11 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
map.m_next_pgofs = NULL;
map.m_next_extent = NULL;
map.m_seg_type = NO_CHECK_TYPE;
+ map.m_may_create = true;
if (direct_io) {
- map.m_seg_type = rw_hint_to_seg_type(iocb->ki_hint);
- flag = f2fs_force_buffered_io(inode, WRITE) ?
+ map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
+ flag = f2fs_force_buffered_io(inode, iocb, from) ?
F2FS_GET_BLOCK_PRE_AIO :
F2FS_GET_BLOCK_PRE_DIO;
goto map_blocks;
@@ -955,7 +1090,7 @@ map_blocks:
return err;
}
-static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
+void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
{
if (flag == F2FS_GET_BLOCK_PRE_AIO) {
if (lock)
@@ -985,7 +1120,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
unsigned int maxblocks = map->m_len;
struct dnode_of_data dn;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- int mode = create ? ALLOC_NODE : LOOKUP_NODE;
+ int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
pgoff_t pgofs, end_offset, end;
int err = 0, ofs = 1;
unsigned int ofs_in_node, last_ofs_in_node;
@@ -1005,21 +1140,30 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
end = pgofs + maxblocks;
if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
+ if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
+ map->m_may_create)
+ goto next_dnode;
+
map->m_pblk = ei.blk + pgofs - ei.fofs;
map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
map->m_flags = F2FS_MAP_MAPPED;
if (map->m_next_extent)
*map->m_next_extent = pgofs + map->m_len;
+
+ /* for hardware encryption, but to avoid potential issue in future */
+ if (flag == F2FS_GET_BLOCK_DIO)
+ f2fs_wait_on_block_writeback_range(inode,
+ map->m_pblk, map->m_len);
goto out;
}
next_dnode:
- if (create)
+ if (map->m_may_create)
__do_map_lock(sbi, flag, true);
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, pgofs, mode);
+ err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
if (err) {
if (flag == F2FS_GET_BLOCK_BMAP)
map->m_pblk = 0;
@@ -1027,10 +1171,10 @@ next_dnode:
err = 0;
if (map->m_next_pgofs)
*map->m_next_pgofs =
- get_next_page_offset(&dn, pgofs);
+ f2fs_get_next_page_offset(&dn, pgofs);
if (map->m_next_extent)
*map->m_next_extent =
- get_next_page_offset(&dn, pgofs);
+ f2fs_get_next_page_offset(&dn, pgofs);
}
goto unlock_out;
}
@@ -1043,7 +1187,23 @@ next_dnode:
next_block:
blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
+ err = -EFSCORRUPTED;
+ goto sync_out;
+ }
+
+ if (__is_valid_data_blkaddr(blkaddr)) {
+ /* use out-place-update for driect IO under LFS mode */
+ if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
+ map->m_may_create) {
+ err = __allocate_data_block(&dn, map->m_seg_type);
+ if (!err) {
+ blkaddr = dn.data_blkaddr;
+ set_inode_flag(inode, FI_APPEND_WRITE);
+ }
+ }
+ } else {
if (create) {
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
@@ -1055,6 +1215,8 @@ next_block:
last_ofs_in_node = dn.ofs_in_node;
}
} else {
+ WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO &&
+ flag != F2FS_GET_BLOCK_DIO);
err = __allocate_data_block(&dn,
map->m_seg_type);
if (!err)
@@ -1116,7 +1278,7 @@ skip:
(pgofs == end || dn.ofs_in_node == end_offset)) {
dn.ofs_in_node = ofs_in_node;
- err = reserve_new_blocks(&dn, prealloc);
+ err = f2fs_reserve_new_blocks(&dn, prealloc);
if (err)
goto sync_out;
@@ -1145,13 +1307,19 @@ skip:
f2fs_put_dnode(&dn);
- if (create) {
+ if (map->m_may_create) {
__do_map_lock(sbi, flag, false);
f2fs_balance_fs(sbi, dn.node_changed);
}
goto next_dnode;
sync_out:
+
+ /* for hardware encryption, but to avoid potential issue in future */
+ if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED)
+ f2fs_wait_on_block_writeback_range(inode,
+ map->m_pblk, map->m_len);
+
if (flag == F2FS_GET_BLOCK_PRECACHE) {
if (map->m_flags & F2FS_MAP_MAPPED) {
unsigned int ofs = start_pgofs - map->m_lblk;
@@ -1165,7 +1333,7 @@ sync_out:
}
f2fs_put_dnode(&dn);
unlock_out:
- if (create) {
+ if (map->m_may_create) {
__do_map_lock(sbi, flag, false);
f2fs_balance_fs(sbi, dn.node_changed);
}
@@ -1187,6 +1355,7 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
map.m_next_pgofs = NULL;
map.m_next_extent = NULL;
map.m_seg_type = NO_CHECK_TYPE;
+ map.m_may_create = false;
last_lblk = F2FS_BLK_ALIGN(pos + len);
while (map.m_lblk < last_lblk) {
@@ -1201,7 +1370,7 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
static int __get_data_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create, int flag,
- pgoff_t *next_pgofs, int seg_type)
+ pgoff_t *next_pgofs, int seg_type, bool may_write)
{
struct f2fs_map_blocks map;
int err;
@@ -1211,6 +1380,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
map.m_next_pgofs = next_pgofs;
map.m_next_extent = NULL;
map.m_seg_type = seg_type;
+ map.m_may_create = may_write;
err = f2fs_map_blocks(inode, &map, create, flag);
if (!err) {
@@ -1227,16 +1397,25 @@ static int get_data_block(struct inode *inode, sector_t iblock,
{
return __get_data_block(inode, iblock, bh_result, create,
flag, next_pgofs,
- NO_CHECK_TYPE);
+ NO_CHECK_TYPE, create);
+}
+
+static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ return __get_data_block(inode, iblock, bh_result, create,
+ F2FS_GET_BLOCK_DIO, NULL,
+ f2fs_rw_hint_to_seg_type(inode->i_write_hint),
+ true);
}
static int get_data_block_dio(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
return __get_data_block(inode, iblock, bh_result, create,
- F2FS_GET_BLOCK_DEFAULT, NULL,
- rw_hint_to_seg_type(
- inode->i_write_hint));
+ F2FS_GET_BLOCK_DIO, NULL,
+ f2fs_rw_hint_to_seg_type(inode->i_write_hint),
+ false);
}
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
@@ -1248,7 +1427,7 @@ static int get_data_block_bmap(struct inode *inode, sector_t iblock,
return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_BMAP, NULL,
- NO_CHECK_TYPE);
+ NO_CHECK_TYPE, create);
}
static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
@@ -1280,7 +1459,11 @@ static int f2fs_xattr_fiemap(struct inode *inode,
if (!page)
return -ENOMEM;
- get_node_info(sbi, inode->i_ino, &ni);
+ err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ if (err) {
+ f2fs_put_page(page, 1);
+ return err;
+ }
phys = (__u64)blk_to_logical(inode, ni.blk_addr);
offset = offsetof(struct f2fs_inode, i_addr) +
@@ -1307,7 +1490,11 @@ static int f2fs_xattr_fiemap(struct inode *inode,
if (!page)
return -ENOMEM;
- get_node_info(sbi, xnid, &ni);
+ err = f2fs_get_node_info(sbi, xnid, &ni);
+ if (err) {
+ f2fs_put_page(page, 1);
+ return err;
+ }
phys = (__u64)blk_to_logical(inode, ni.blk_addr);
len = inode->i_sb->s_blocksize;
@@ -1416,24 +1603,137 @@ out:
return ret;
}
+static int f2fs_read_single_page(struct inode *inode, struct page *page,
+ unsigned nr_pages,
+ struct f2fs_map_blocks *map,
+ struct bio **bio_ret,
+ sector_t *last_block_in_bio,
+ bool is_readahead)
+{
+ struct bio *bio = *bio_ret;
+ const unsigned blkbits = inode->i_blkbits;
+ const unsigned blocksize = 1 << blkbits;
+ sector_t block_in_file;
+ sector_t last_block;
+ sector_t last_block_in_file;
+ sector_t block_nr;
+ int ret = 0;
+
+ block_in_file = (sector_t)page_index(page);
+ last_block = block_in_file + nr_pages;
+ last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
+ blkbits;
+ if (last_block > last_block_in_file)
+ last_block = last_block_in_file;
+
+ /* just zeroing out page which is beyond EOF */
+ if (block_in_file >= last_block)
+ goto zero_out;
+ /*
+ * Map blocks using the previous result first.
+ */
+ if ((map->m_flags & F2FS_MAP_MAPPED) &&
+ block_in_file > map->m_lblk &&
+ block_in_file < (map->m_lblk + map->m_len))
+ goto got_it;
+
+ /*
+ * Then do more f2fs_map_blocks() calls until we are
+ * done with this page.
+ */
+ map->m_lblk = block_in_file;
+ map->m_len = last_block - block_in_file;
+
+ ret = f2fs_map_blocks(inode, map, 0, F2FS_GET_BLOCK_DEFAULT);
+ if (ret)
+ goto out;
+got_it:
+ if ((map->m_flags & F2FS_MAP_MAPPED)) {
+ block_nr = map->m_pblk + block_in_file - map->m_lblk;
+ SetPageMappedToDisk(page);
+
+ if (!PageUptodate(page) && (!PageSwapCache(page) &&
+ !cleancache_get_page(page))) {
+ SetPageUptodate(page);
+ goto confused;
+ }
+
+ if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
+ DATA_GENERIC_ENHANCE_READ)) {
+ ret = -EFSCORRUPTED;
+ goto out;
+ }
+ } else {
+zero_out:
+ zero_user_segment(page, 0, PAGE_SIZE);
+ if (!PageUptodate(page))
+ SetPageUptodate(page);
+ unlock_page(page);
+ goto out;
+ }
+
+ /*
+ * This page will go to BIO. Do we need to send this
+ * BIO off first?
+ */
+ if (bio && (*last_block_in_bio != block_nr - 1 ||
+ !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
+submit_and_realloc:
+ __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
+ bio = NULL;
+ }
+ if (bio == NULL) {
+ bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
+ is_readahead ? REQ_RAHEAD : 0);
+ if (IS_ERR(bio)) {
+ ret = PTR_ERR(bio);
+ bio = NULL;
+ goto out;
+ }
+ }
+
+ /*
+ * If the page is under writeback, we need to wait for
+ * its completion to see the correct decrypted data.
+ */
+ f2fs_wait_on_block_writeback(inode, block_nr);
+
+ if (bio_add_page(bio, page, blocksize, 0) < blocksize)
+ goto submit_and_realloc;
+
+ inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
+ ClearPageError(page);
+ *last_block_in_bio = block_nr;
+ goto out;
+confused:
+ if (bio) {
+ __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
+ bio = NULL;
+ }
+ unlock_page(page);
+out:
+ *bio_ret = bio;
+ return ret;
+}
+
/*
* This function was originally taken from fs/mpage.c, and customized for f2fs.
* Major change was from block_size == page_size in f2fs by default.
+ *
+ * Note that the aops->readpages() function is ONLY used for read-ahead. If
+ * this function ever deviates from doing just read-ahead, it should either
+ * use ->readpage() or do the necessary surgery to decouple ->readpages()
+ * from read-ahead.
*/
static int f2fs_mpage_readpages(struct address_space *mapping,
struct list_head *pages, struct page *page,
- unsigned nr_pages)
+ unsigned nr_pages, bool is_readahead)
{
struct bio *bio = NULL;
sector_t last_block_in_bio = 0;
struct inode *inode = mapping->host;
- const unsigned blkbits = inode->i_blkbits;
- const unsigned blocksize = 1 << blkbits;
- sector_t block_in_file;
- sector_t last_block;
- sector_t last_block_in_file;
- sector_t block_nr;
struct f2fs_map_blocks map;
+ int ret = 0;
map.m_pblk = 0;
map.m_lblk = 0;
@@ -1442,6 +1742,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
map.m_next_pgofs = NULL;
map.m_next_extent = NULL;
map.m_seg_type = NO_CHECK_TYPE;
+ map.m_may_create = false;
for (; nr_pages; nr_pages--) {
if (pages) {
@@ -1450,103 +1751,30 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
prefetchw(&page->flags);
list_del(&page->lru);
if (add_to_page_cache_lru(page, mapping,
- page->index, GFP_KERNEL))
+ page_index(page), GFP_KERNEL))
goto next_page;
}
- block_in_file = (sector_t)page->index;
- last_block = block_in_file + nr_pages;
- last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
- blkbits;
- if (last_block > last_block_in_file)
- last_block = last_block_in_file;
-
- /*
- * Map blocks using the previous result first.
- */
- if ((map.m_flags & F2FS_MAP_MAPPED) &&
- block_in_file > map.m_lblk &&
- block_in_file < (map.m_lblk + map.m_len))
- goto got_it;
-
- /*
- * Then do more f2fs_map_blocks() calls until we are
- * done with this page.
- */
- map.m_flags = 0;
-
- if (block_in_file < last_block) {
- map.m_lblk = block_in_file;
- map.m_len = last_block - block_in_file;
-
- if (f2fs_map_blocks(inode, &map, 0,
- F2FS_GET_BLOCK_DEFAULT))
- goto set_error_page;
- }
-got_it:
- if ((map.m_flags & F2FS_MAP_MAPPED)) {
- block_nr = map.m_pblk + block_in_file - map.m_lblk;
- SetPageMappedToDisk(page);
-
- if (!PageUptodate(page) && !cleancache_get_page(page)) {
- SetPageUptodate(page);
- goto confused;
- }
- } else {
+ ret = f2fs_read_single_page(inode, page, nr_pages, &map, &bio,
+ &last_block_in_bio, is_readahead);
+ if (ret) {
+ SetPageError(page);
zero_user_segment(page, 0, PAGE_SIZE);
- if (!PageUptodate(page))
- SetPageUptodate(page);
unlock_page(page);
- goto next_page;
- }
-
- /*
- * This page will go to BIO. Do we need to send this
- * BIO off first?
- */
- if (bio && (last_block_in_bio != block_nr - 1 ||
- !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
-submit_and_realloc:
- __submit_bio(F2FS_I_SB(inode), bio, DATA);
- bio = NULL;
- }
- if (bio == NULL) {
- bio = f2fs_grab_read_bio(inode, block_nr, nr_pages);
- if (IS_ERR(bio)) {
- bio = NULL;
- goto set_error_page;
- }
- }
-
- if (bio_add_page(bio, page, blocksize, 0) < blocksize)
- goto submit_and_realloc;
-
- last_block_in_bio = block_nr;
- goto next_page;
-set_error_page:
- SetPageError(page);
- zero_user_segment(page, 0, PAGE_SIZE);
- unlock_page(page);
- goto next_page;
-confused:
- if (bio) {
- __submit_bio(F2FS_I_SB(inode), bio, DATA);
- bio = NULL;
}
- unlock_page(page);
next_page:
if (pages)
put_page(page);
}
BUG_ON(pages && !list_empty(pages));
if (bio)
- __submit_bio(F2FS_I_SB(inode), bio, DATA);
- return 0;
+ __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
+ return pages ? 0 : ret;
}
static int f2fs_read_data_page(struct file *file, struct page *page)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = page_file_mapping(page)->host;
int ret = -EAGAIN;
trace_f2fs_readpage(page, DATA);
@@ -1555,7 +1783,8 @@ static int f2fs_read_data_page(struct file *file, struct page *page)
if (f2fs_has_inline_data(inode))
ret = f2fs_read_inline_data(inode, page);
if (ret == -EAGAIN)
- ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1);
+ ret = f2fs_mpage_readpages(page_file_mapping(page),
+ NULL, page, 1, false);
return ret;
}
@@ -1572,34 +1801,43 @@ static int f2fs_read_data_pages(struct file *file,
if (f2fs_has_inline_data(inode))
return 0;
- return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages);
+ return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true);
}
static int encrypt_one_page(struct f2fs_io_info *fio)
{
struct inode *inode = fio->page->mapping->host;
+ struct page *mpage;
gfp_t gfp_flags = GFP_NOFS;
if (!f2fs_encrypted_file(inode))
return 0;
/* wait for GCed page writeback via META_MAPPING */
- f2fs_wait_on_block_writeback(fio->sbi, fio->old_blkaddr);
+ f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
retry_encrypt:
fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
PAGE_SIZE, 0, fio->page->index, gfp_flags);
- if (!IS_ERR(fio->encrypted_page))
- return 0;
+ if (IS_ERR(fio->encrypted_page)) {
+ /* flush pending IOs and wait for a while in the ENOMEM case */
+ if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
+ f2fs_flush_merged_writes(fio->sbi);
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ gfp_flags |= __GFP_NOFAIL;
+ goto retry_encrypt;
+ }
+ return PTR_ERR(fio->encrypted_page);
+ }
- /* flush pending IOs and wait for a while in the ENOMEM case */
- if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
- f2fs_flush_merged_writes(fio->sbi);
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- gfp_flags |= __GFP_NOFAIL;
- goto retry_encrypt;
+ mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
+ if (mpage) {
+ if (PageUptodate(mpage))
+ memcpy(page_address(mpage),
+ page_address(fio->encrypted_page), PAGE_SIZE);
+ f2fs_put_page(mpage, 1);
}
- return PTR_ERR(fio->encrypted_page);
+ return 0;
}
static inline bool check_inplace_update_policy(struct inode *inode,
@@ -1610,12 +1848,12 @@ static inline bool check_inplace_update_policy(struct inode *inode,
if (policy & (0x1 << F2FS_IPU_FORCE))
return true;
- if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi))
+ if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi))
return true;
if (policy & (0x1 << F2FS_IPU_UTIL) &&
utilization(sbi) > SM_I(sbi)->min_ipu_util)
return true;
- if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && need_SSR(sbi) &&
+ if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) &&
utilization(sbi) > SM_I(sbi)->min_ipu_util)
return true;
@@ -1633,10 +1871,14 @@ static inline bool check_inplace_update_policy(struct inode *inode,
is_inode_flag_set(inode, FI_NEED_IPU))
return true;
+ if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+ !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
+ return true;
+
return false;
}
-bool should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
+bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
{
if (f2fs_is_pinned_file(inode))
return true;
@@ -1648,7 +1890,7 @@ bool should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
return check_inplace_update_policy(inode, fio);
}
-bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
+bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -1656,6 +1898,8 @@ bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
return true;
if (S_ISDIR(inode->i_mode))
return true;
+ if (IS_NOQUOTA(inode))
+ return true;
if (f2fs_is_atomic_file(inode))
return true;
if (fio) {
@@ -1663,6 +1907,9 @@ bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
return true;
if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
return true;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+ f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
+ return true;
}
return false;
}
@@ -1671,27 +1918,19 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio)
{
struct inode *inode = fio->page->mapping->host;
- if (should_update_outplace(inode, fio))
+ if (f2fs_should_update_outplace(inode, fio))
return false;
- return should_update_inplace(inode, fio);
-}
-
-static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio)
-{
- if (fio->old_blkaddr == NEW_ADDR)
- return false;
- if (fio->old_blkaddr == NULL_ADDR)
- return false;
- return true;
+ return f2fs_should_update_inplace(inode, fio);
}
-int do_write_data_page(struct f2fs_io_info *fio)
+int f2fs_do_write_data_page(struct f2fs_io_info *fio)
{
struct page *page = fio->page;
struct inode *inode = page->mapping->host;
struct dnode_of_data dn;
struct extent_info ei = {0,0,0};
+ struct node_info ni;
bool ipu_force = false;
int err = 0;
@@ -1700,18 +1939,20 @@ int do_write_data_page(struct f2fs_io_info *fio)
f2fs_lookup_extent_cache(inode, page->index, &ei)) {
fio->old_blkaddr = ei.blk + page->index - ei.fofs;
- if (valid_ipu_blkaddr(fio)) {
- ipu_force = true;
- fio->need_lock = LOCK_DONE;
- goto got_it;
- }
+ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
+ DATA_GENERIC_ENHANCE))
+ return -EFSCORRUPTED;
+
+ ipu_force = true;
+ fio->need_lock = LOCK_DONE;
+ goto got_it;
}
/* Deadlock due to between page->lock and f2fs_lock_op */
if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
return -EAGAIN;
- err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
+ err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
if (err)
goto out;
@@ -1720,14 +1961,23 @@ int do_write_data_page(struct f2fs_io_info *fio)
/* This page is already truncated */
if (fio->old_blkaddr == NULL_ADDR) {
ClearPageUptodate(page);
+ clear_cold_data(page);
goto out_writepage;
}
got_it:
+ if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
+ !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
+ DATA_GENERIC_ENHANCE)) {
+ err = -EFSCORRUPTED;
+ goto out_writepage;
+ }
/*
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
- if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) {
+ if (ipu_force ||
+ (__is_valid_data_blkaddr(fio->old_blkaddr) &&
+ need_inplace_update(fio))) {
err = encrypt_one_page(fio);
if (err)
goto out_writepage;
@@ -1737,9 +1987,17 @@ got_it:
f2fs_put_dnode(&dn);
if (fio->need_lock == LOCK_REQ)
f2fs_unlock_op(fio->sbi);
- err = rewrite_data_page(fio);
+ err = f2fs_inplace_write_data(fio);
+ if (err) {
+ if (f2fs_encrypted_file(inode))
+ fscrypt_pullback_bio_page(&fio->encrypted_page,
+ true);
+ if (PageWriteback(page))
+ end_page_writeback(page);
+ } else {
+ set_inode_flag(inode, FI_UPDATE_WRITE);
+ }
trace_f2fs_do_write_data_page(fio->page, IPU);
- set_inode_flag(inode, FI_UPDATE_WRITE);
return err;
}
@@ -1751,6 +2009,12 @@ got_it:
fio->need_lock = LOCK_REQ;
}
+ err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
+ if (err)
+ goto out_writepage;
+
+ fio->version = ni.version;
+
err = encrypt_one_page(fio);
if (err)
goto out_writepage;
@@ -1759,7 +2023,7 @@ got_it:
ClearPageError(page);
/* LFS mode write path */
- write_data_page(&dn, fio);
+ f2fs_outplace_write_data(&dn, fio);
trace_f2fs_do_write_data_page(page, OPU);
set_inode_flag(inode, FI_APPEND_WRITE);
if (page->index == 0)
@@ -1773,6 +2037,8 @@ out:
}
static int __write_data_page(struct page *page, bool *submitted,
+ struct bio **bio,
+ sector_t *last_block,
struct writeback_control *wbc,
enum iostat_type io_type)
{
@@ -1798,6 +2064,8 @@ static int __write_data_page(struct page *page, bool *submitted,
.need_lock = LOCK_RETRY,
.io_type = io_type,
.io_wbc = wbc,
+ .bio = bio,
+ .last_block = last_block,
};
trace_f2fs_writepage(page, DATA);
@@ -1805,6 +2073,12 @@ static int __write_data_page(struct page *page, bool *submitted,
/* we should bypass data pages to proceed the kworkder jobs */
if (unlikely(f2fs_cp_error(sbi))) {
mapping_set_error(page->mapping, -EIO);
+ /*
+ * don't drop any dirty dentry pages for keeping lastest
+ * directory structure.
+ */
+ if (S_ISDIR(inode->i_mode))
+ goto redirty_out;
goto out;
}
@@ -1829,13 +2103,13 @@ write:
/* we should not write 0'th page having journal header */
if (f2fs_is_volatile_file(inode) && (!page->index ||
(!wbc->for_reclaim &&
- available_free_memory(sbi, BASE_CHECK))))
+ f2fs_available_free_memory(sbi, BASE_CHECK))))
goto redirty_out;
/* Dentry blocks are controlled by checkpoint */
if (S_ISDIR(inode->i_mode)) {
fio.need_lock = LOCK_DONE;
- err = do_write_data_page(&fio);
+ err = f2fs_do_write_data_page(&fio);
goto done;
}
@@ -1854,10 +2128,10 @@ write:
}
if (err == -EAGAIN) {
- err = do_write_data_page(&fio);
+ err = f2fs_do_write_data_page(&fio);
if (err == -EAGAIN) {
fio.need_lock = LOCK_REQ;
- err = do_write_data_page(&fio);
+ err = f2fs_do_write_data_page(&fio);
}
}
@@ -1876,21 +2150,27 @@ done:
out:
inode_dec_dirty_pages(inode);
- if (err)
+ if (err) {
ClearPageUptodate(page);
+ clear_cold_data(page);
+ }
if (wbc->for_reclaim) {
- f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA);
+ f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
clear_inode_flag(inode, FI_HOT_DATA);
- remove_dirty_inode(inode);
+ f2fs_remove_dirty_inode(inode);
submitted = NULL;
}
unlock_page(page);
- if (!S_ISDIR(inode->i_mode))
+ if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
+ !F2FS_I(inode)->cp_task) {
+ f2fs_submit_ipu_bio(sbi, bio, page);
f2fs_balance_fs(sbi, need_balance_fs);
+ }
if (unlikely(f2fs_cp_error(sbi))) {
+ f2fs_submit_ipu_bio(sbi, bio, page);
f2fs_submit_merged_write(sbi, DATA);
submitted = NULL;
}
@@ -1917,7 +2197,7 @@ redirty_out:
static int f2fs_write_data_page(struct page *page,
struct writeback_control *wbc)
{
- return __write_data_page(page, NULL, wbc, FS_DATA_IO);
+ return __write_data_page(page, NULL, NULL, NULL, wbc, FS_DATA_IO);
}
/*
@@ -1932,15 +2212,18 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
int ret = 0;
int done = 0;
struct pagevec pvec;
+ struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
+ struct bio *bio = NULL;
+ sector_t last_block;
int nr_pages;
pgoff_t uninitialized_var(writeback_index);
pgoff_t index;
pgoff_t end; /* Inclusive */
pgoff_t done_index;
- pgoff_t last_idx = ULONG_MAX;
int cycled;
int range_whole = 0;
int tag;
+ int nwritten = 0;
pagevec_init(&pvec, 0);
@@ -1976,8 +2259,8 @@ retry:
while (!done && (index <= end)) {
int i;
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1);
+ nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
+ tag);
if (nr_pages == 0)
break;
@@ -1985,7 +2268,9 @@ retry:
struct page *page = pvec.pages[i];
bool submitted = false;
- if (page->index > end) {
+ /* give a priority to WB_SYNC threads */
+ if (atomic_read(&sbi->wb_sync_req[DATA]) &&
+ wbc->sync_mode == WB_SYNC_NONE) {
done = 1;
break;
}
@@ -2006,18 +2291,20 @@ continue_unlock:
}
if (PageWriteback(page)) {
- if (wbc->sync_mode != WB_SYNC_NONE)
+ if (wbc->sync_mode != WB_SYNC_NONE) {
f2fs_wait_on_page_writeback(page,
- DATA, true);
- else
+ DATA, true, true);
+ f2fs_submit_ipu_bio(sbi, &bio, page);
+ } else {
goto continue_unlock;
+ }
}
- BUG_ON(PageWriteback(page));
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
- ret = __write_data_page(page, &submitted, wbc, io_type);
+ ret = __write_data_page(page, &submitted, &bio,
+ &last_block, wbc, io_type);
if (unlikely(ret)) {
/*
* keep nr_to_write, since vfs uses this to
@@ -2041,12 +2328,10 @@ continue_unlock:
done = 1;
break;
} else if (submitted) {
- last_idx = page->index;
+ nwritten++;
}
- /* give a priority to WB_SYNC threads */
- if ((atomic_read(&F2FS_M_SB(mapping)->wb_sync_req) ||
- --wbc->nr_to_write <= 0) &&
+ if (--wbc->nr_to_write <= 0 &&
wbc->sync_mode == WB_SYNC_NONE) {
done = 1;
break;
@@ -2065,14 +2350,34 @@ continue_unlock:
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
mapping->writeback_index = done_index;
- if (last_idx != ULONG_MAX)
+ if (nwritten)
f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
- 0, last_idx, DATA);
+ NULL, 0, DATA);
+ /* submit cached bio of IPU write */
+ if (bio)
+ __submit_bio(sbi, bio, DATA);
return ret;
}
-int __f2fs_write_data_pages(struct address_space *mapping,
+static inline bool __should_serialize_io(struct inode *inode,
+ struct writeback_control *wbc)
+{
+ if (!S_ISREG(inode->i_mode))
+ return false;
+ if (IS_NOQUOTA(inode))
+ return false;
+ /* to avoid deadlock in path of data flush */
+ if (F2FS_I(inode)->cp_task)
+ return false;
+ if (wbc->sync_mode != WB_SYNC_ALL)
+ return true;
+ if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
+ return true;
+ return false;
+}
+
+static int __f2fs_write_data_pages(struct address_space *mapping,
struct writeback_control *wbc,
enum iostat_type io_type)
{
@@ -2080,6 +2385,7 @@ int __f2fs_write_data_pages(struct address_space *mapping,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct blk_plug plug;
int ret;
+ bool locked = false;
/* deal with chardevs and other special file */
if (!mapping->a_ops->writepage)
@@ -2093,9 +2399,10 @@ int __f2fs_write_data_pages(struct address_space *mapping,
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto skip_write;
- if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
+ if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
+ wbc->sync_mode == WB_SYNC_NONE &&
get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
- available_free_memory(sbi, DIRTY_DENTS))
+ f2fs_available_free_memory(sbi, DIRTY_DENTS))
goto skip_write;
/* skip writing during file defragment */
@@ -2106,22 +2413,30 @@ int __f2fs_write_data_pages(struct address_space *mapping,
/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
if (wbc->sync_mode == WB_SYNC_ALL)
- atomic_inc(&sbi->wb_sync_req);
- else if (atomic_read(&sbi->wb_sync_req))
+ atomic_inc(&sbi->wb_sync_req[DATA]);
+ else if (atomic_read(&sbi->wb_sync_req[DATA]))
goto skip_write;
+ if (__should_serialize_io(inode, wbc)) {
+ mutex_lock(&sbi->writepages);
+ locked = true;
+ }
+
blk_start_plug(&plug);
ret = f2fs_write_cache_pages(mapping, wbc, io_type);
blk_finish_plug(&plug);
+ if (locked)
+ mutex_unlock(&sbi->writepages);
+
if (wbc->sync_mode == WB_SYNC_ALL)
- atomic_dec(&sbi->wb_sync_req);
+ atomic_dec(&sbi->wb_sync_req[DATA]);
/*
* if some pages were truncated, we cannot guarantee its mapping->host
* to detect pending bios.
*/
- remove_dirty_inode(inode);
+ f2fs_remove_dirty_inode(inode);
return ret;
skip_write:
@@ -2146,10 +2461,15 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
loff_t i_size = i_size_read(inode);
if (to > i_size) {
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
+
truncate_pagecache(inode, i_size);
- truncate_blocks(inode, i_size, true);
+ if (!IS_NOQUOTA(inode))
+ f2fs_truncate_blocks(inode, i_size, true);
+
up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
@@ -2164,6 +2484,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
bool locked = false;
struct extent_info ei = {0,0,0};
int err = 0;
+ int flag;
/*
* we already allocated all the blocks, so we don't need to get
@@ -2173,14 +2494,20 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
!is_inode_flag_set(inode, FI_NO_PREALLOC))
return 0;
+ /* f2fs_lock_op avoids race between write CP and convert_inline_page */
+ if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode))
+ flag = F2FS_GET_BLOCK_DEFAULT;
+ else
+ flag = F2FS_GET_BLOCK_PRE_AIO;
+
if (f2fs_has_inline_data(inode) ||
(pos & PAGE_MASK) >= i_size_read(inode)) {
- __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
+ __do_map_lock(sbi, flag, true);
locked = true;
}
restart:
/* check inline_data */
- ipage = get_node_page(sbi, inode->i_ino);
+ ipage = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(ipage)) {
err = PTR_ERR(ipage);
goto unlock_out;
@@ -2190,7 +2517,7 @@ restart:
if (f2fs_has_inline_data(inode)) {
if (pos + len <= MAX_INLINE_DATA(inode)) {
- read_inline_data(page, ipage);
+ f2fs_do_read_inline_data(page, ipage);
set_inode_flag(inode, FI_DATA_EXIST);
if (inode->i_nlink)
set_inline_node(ipage);
@@ -2208,11 +2535,12 @@ restart:
dn.data_blkaddr = ei.blk + index - ei.fofs;
} else {
/* hole case */
- err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
+ err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
if (err || dn.data_blkaddr == NULL_ADDR) {
f2fs_put_dnode(&dn);
__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
true);
+ WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
locked = true;
goto restart;
}
@@ -2226,7 +2554,7 @@ out:
f2fs_put_dnode(&dn);
unlock_out:
if (locked)
- __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
+ __do_map_lock(sbi, flag, false);
return err;
}
@@ -2254,8 +2582,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
}
trace_f2fs_write_begin(inode, pos, len, flags);
- if (f2fs_is_atomic_file(inode) &&
- !available_free_memory(sbi, INMEM_PAGES)) {
+ err = f2fs_is_checkpoint_ready(sbi);
+ if (err)
+ goto fail;
+
+ if ((f2fs_is_atomic_file(inode) &&
+ !f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
+ is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
err = -ENOMEM;
drop_atomic = true;
goto fail;
@@ -2290,7 +2623,8 @@ repeat:
if (err)
goto fail;
- if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
+ if (need_balance && !IS_NOQUOTA(inode) &&
+ has_not_enough_free_secs(sbi, 0, 0)) {
unlock_page(page);
f2fs_balance_fs(sbi, true);
lock_page(page);
@@ -2301,11 +2635,7 @@ repeat:
}
}
- f2fs_wait_on_page_writeback(page, DATA, false);
-
- /* wait for GCed page writeback via META_MAPPING */
- if (f2fs_post_read_required(inode))
- f2fs_wait_on_block_writeback(sbi, blkaddr);
+ f2fs_wait_on_page_writeback(page, DATA, false, true);
if (len == PAGE_SIZE || PageUptodate(page))
return 0;
@@ -2319,6 +2649,11 @@ repeat:
zero_user_segment(page, 0, PAGE_SIZE);
SetPageUptodate(page);
} else {
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
+ DATA_GENERIC_ENHANCE_READ)) {
+ err = -EFSCORRUPTED;
+ goto fail;
+ }
err = f2fs_submit_page_read(inode, page, blkaddr);
if (err)
goto fail;
@@ -2339,7 +2674,7 @@ fail:
f2fs_put_page(page, 1);
f2fs_write_failed(mapping, pos + len);
if (drop_atomic)
- drop_inmem_pages_all(sbi);
+ f2fs_drop_inmem_pages_all(sbi, false);
return err;
}
@@ -2380,15 +2715,65 @@ unlock_out:
static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
loff_t offset)
{
- unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
+ unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
+ unsigned blkbits = i_blkbits;
+ unsigned blocksize_mask = (1 << blkbits) - 1;
+ unsigned long align = offset | iov_iter_alignment(iter);
+ struct block_device *bdev = inode->i_sb->s_bdev;
+
+ if (align & blocksize_mask) {
+ if (bdev)
+ blkbits = blksize_bits(bdev_logical_block_size(bdev));
+ blocksize_mask = (1 << blkbits) - 1;
+ if (align & blocksize_mask)
+ return -EINVAL;
+ return 1;
+ }
+ return 0;
+}
- if (offset & blocksize_mask)
- return -EINVAL;
+static void f2fs_dio_end_io(struct bio *bio)
+{
+ struct f2fs_private_dio *dio = bio->bi_private;
- if (iov_iter_alignment(iter) & blocksize_mask)
- return -EINVAL;
+ dec_page_count(F2FS_I_SB(dio->inode),
+ dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
- return 0;
+ bio->bi_private = dio->orig_private;
+ bio->bi_end_io = dio->orig_end_io;
+
+ kvfree(dio);
+
+ bio_endio(bio);
+}
+
+static void f2fs_dio_submit_bio(int rw, struct bio *bio, struct inode *inode,
+ loff_t file_offset)
+{
+ struct f2fs_private_dio *dio;
+ bool write = (rw == REQ_OP_WRITE);
+
+ dio = f2fs_kzalloc(F2FS_I_SB(inode),
+ sizeof(struct f2fs_private_dio), GFP_NOFS);
+ if (!dio)
+ goto out;
+
+ dio->inode = inode;
+ dio->orig_end_io = bio->bi_end_io;
+ dio->orig_private = bio->bi_private;
+ dio->write = write;
+
+ bio->bi_end_io = f2fs_dio_end_io;
+ bio->bi_private = dio;
+
+ inc_page_count(F2FS_I_SB(inode),
+ write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
+
+ submit_bio(rw, bio);
+ return;
+out:
+ bio->bi_error = -EIO;
+ bio_endio(bio);
}
static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
@@ -2397,17 +2782,19 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
size_t count = iov_iter_count(iter);
int rw = iov_iter_rw(iter);
int err;
enum rw_hint hint = iocb->ki_hint;
int whint_mode = F2FS_OPTION(sbi).whint_mode;
+ bool do_opu;
err = check_direct_IO(inode, iter, offset);
if (err)
- return err;
+ return err < 0 ? err : 0;
- if (f2fs_force_buffered_io(inode, rw))
+ if (f2fs_force_buffered_io(inode, iocb, iter))
return 0;
if (trace_android_fs_dataread_start_enabled() &&
@@ -2432,22 +2819,42 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
current->pid, path,
current->comm);
}
+
+ do_opu = allow_outplace_dio(inode, iocb, iter);
+
trace_f2fs_direct_IO_enter(inode, offset, count, rw);
if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
iocb->ki_hint = WRITE_LIFE_NOT_SET;
- if (!down_read_trylock(&F2FS_I(inode)->dio_rwsem[rw])) {
- if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
+ iocb->ki_hint = hint;
+ err = -EAGAIN;
+ goto out;
+ }
+ if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
+ up_read(&fi->i_gc_rwsem[rw]);
iocb->ki_hint = hint;
err = -EAGAIN;
goto out;
}
- down_read(&F2FS_I(inode)->dio_rwsem[rw]);
+ } else {
+ down_read(&fi->i_gc_rwsem[rw]);
+ if (do_opu)
+ down_read(&fi->i_gc_rwsem[READ]);
}
- err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);
- up_read(&F2FS_I(inode)->dio_rwsem[rw]);
+ err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
+ iter, offset,
+ rw == WRITE ? get_data_block_dio_write :
+ get_data_block_dio, NULL, f2fs_dio_submit_bio,
+ DIO_LOCKING | DIO_SKIP_HOLES);
+
+ if (do_opu)
+ up_read(&fi->i_gc_rwsem[READ]);
+
+ up_read(&fi->i_gc_rwsem[rw]);
if (rw == WRITE) {
if (whint_mode == WHINT_MODE_OFF)
@@ -2455,7 +2862,8 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
if (err > 0) {
f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
err);
- set_inode_flag(inode, FI_UPDATE_WRITE);
+ if (!do_opu)
+ set_inode_flag(inode, FI_UPDATE_WRITE);
} else if (err < 0) {
f2fs_write_failed(mapping, offset + count);
}
@@ -2490,16 +2898,16 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
dec_page_count(sbi, F2FS_DIRTY_NODES);
} else {
inode_dec_dirty_pages(inode);
- remove_dirty_inode(inode);
+ f2fs_remove_dirty_inode(inode);
}
}
- /* This is atomic written page, keep Private */
+ clear_cold_data(page);
+
if (IS_ATOMIC_WRITTEN_PAGE(page))
- return drop_inmem_page(inode, page);
+ return f2fs_drop_inmem_page(inode, page);
- set_page_private(page, 0);
- ClearPagePrivate(page);
+ f2fs_clear_page_private(page);
}
int f2fs_release_page(struct page *page, gfp_t wait)
@@ -2512,24 +2920,25 @@ int f2fs_release_page(struct page *page, gfp_t wait)
if (IS_ATOMIC_WRITTEN_PAGE(page))
return 0;
- set_page_private(page, 0);
- ClearPagePrivate(page);
+ clear_cold_data(page);
+ f2fs_clear_page_private(page);
return 1;
}
static int f2fs_set_data_page_dirty(struct page *page)
{
- struct address_space *mapping = page->mapping;
- struct inode *inode = mapping->host;
+ struct inode *inode = page_file_mapping(page)->host;
trace_f2fs_set_page_dirty(page, DATA);
if (!PageUptodate(page))
SetPageUptodate(page);
+ if (PageSwapCache(page))
+ return __set_page_dirty_nobuffers(page);
if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
- register_inmem_page(inode, page);
+ f2fs_register_inmem_page(inode, page);
return 1;
}
/*
@@ -2541,7 +2950,7 @@ static int f2fs_set_data_page_dirty(struct page *page)
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
- update_dirty_page(inode, page);
+ f2fs_update_dirty_page(inode, page);
return 1;
}
return 0;
@@ -2581,12 +2990,8 @@ int f2fs_migrate_page(struct address_space *mapping,
return -EAGAIN;
}
- /*
- * A reference is expected if PagePrivate set when move mapping,
- * however F2FS breaks this for maintaining dirty page counts when
- * truncating pages. So here adjusting the 'extra_count' make it work.
- */
- extra_count = (atomic_written ? 1 : 0) - page_has_private(page);
+ /* one extra reference was held for atomic_write page */
+ extra_count = atomic_written ? 1 : 0;
rc = migrate_page_move_mapping(mapping, newpage,
page, NULL, mode, extra_count);
if (rc != MIGRATEPAGE_SUCCESS) {
@@ -2607,9 +3012,10 @@ int f2fs_migrate_page(struct address_space *mapping,
get_page(newpage);
}
- if (PagePrivate(page))
- SetPagePrivate(newpage);
- set_page_private(newpage, page_private(page));
+ if (PagePrivate(page)) {
+ f2fs_set_page_private(newpage, page_private(page));
+ f2fs_clear_page_private(page);
+ }
migrate_page_copy(newpage, page);
@@ -2617,6 +3023,126 @@ int f2fs_migrate_page(struct address_space *mapping,
}
#endif
+#ifdef CONFIG_SWAP
+/* Copied from generic_swapfile_activate() to check any holes */
+static int check_swap_activate(struct file *swap_file, unsigned int max)
+{
+ struct address_space *mapping = swap_file->f_mapping;
+ struct inode *inode = mapping->host;
+ unsigned blocks_per_page;
+ unsigned long page_no;
+ unsigned blkbits;
+ sector_t probe_block;
+ sector_t last_block;
+ sector_t lowest_block = -1;
+ sector_t highest_block = 0;
+
+ blkbits = inode->i_blkbits;
+ blocks_per_page = PAGE_SIZE >> blkbits;
+
+ /*
+ * Map all the blocks into the extent list. This code doesn't try
+ * to be very smart.
+ */
+ probe_block = 0;
+ page_no = 0;
+ last_block = i_size_read(inode) >> blkbits;
+ while ((probe_block + blocks_per_page) <= last_block && page_no < max) {
+ unsigned block_in_page;
+ sector_t first_block;
+
+ cond_resched();
+
+ first_block = bmap(inode, probe_block);
+ if (first_block == 0)
+ goto bad_bmap;
+
+ /*
+ * It must be PAGE_SIZE aligned on-disk
+ */
+ if (first_block & (blocks_per_page - 1)) {
+ probe_block++;
+ goto reprobe;
+ }
+
+ for (block_in_page = 1; block_in_page < blocks_per_page;
+ block_in_page++) {
+ sector_t block;
+
+ block = bmap(inode, probe_block + block_in_page);
+ if (block == 0)
+ goto bad_bmap;
+ if (block != first_block + block_in_page) {
+ /* Discontiguity */
+ probe_block++;
+ goto reprobe;
+ }
+ }
+
+ first_block >>= (PAGE_SHIFT - blkbits);
+ if (page_no) { /* exclude the header page */
+ if (first_block < lowest_block)
+ lowest_block = first_block;
+ if (first_block > highest_block)
+ highest_block = first_block;
+ }
+
+ page_no++;
+ probe_block += blocks_per_page;
+reprobe:
+ continue;
+ }
+ return 0;
+
+bad_bmap:
+ pr_err("swapon: swapfile has holes\n");
+ return -EINVAL;
+}
+
+static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
+ sector_t *span)
+{
+ struct inode *inode = file_inode(file);
+ int ret;
+
+ if (!S_ISREG(inode->i_mode))
+ return -EINVAL;
+
+ if (f2fs_readonly(F2FS_I_SB(inode)->sb))
+ return -EROFS;
+
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
+
+ ret = check_swap_activate(file, sis->max);
+ if (ret)
+ return ret;
+
+ set_inode_flag(inode, FI_PIN_FILE);
+ f2fs_precache_extents(inode);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+ return 0;
+}
+
+static void f2fs_swap_deactivate(struct file *file)
+{
+ struct inode *inode = file_inode(file);
+
+ clear_inode_flag(inode, FI_PIN_FILE);
+}
+#else
+static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
+ sector_t *span)
+{
+ return -EOPNOTSUPP;
+}
+
+static void f2fs_swap_deactivate(struct file *file)
+{
+}
+#endif
+
const struct address_space_operations f2fs_dblock_aops = {
.readpage = f2fs_read_data_page,
.readpages = f2fs_read_data_pages,
@@ -2629,11 +3155,24 @@ const struct address_space_operations f2fs_dblock_aops = {
.releasepage = f2fs_release_page,
.direct_IO = f2fs_direct_IO,
.bmap = f2fs_bmap,
+ .swap_activate = f2fs_swap_activate,
+ .swap_deactivate = f2fs_swap_deactivate,
#ifdef CONFIG_MIGRATION
.migratepage = f2fs_migrate_page,
#endif
};
+void f2fs_clear_radix_tree_dirty_tag(struct page *page)
+{
+ struct address_space *mapping = page_mapping(page);
+ unsigned long flags;
+
+ spin_lock_irqsave(&mapping->tree_lock, flags);
+ radix_tree_tag_clear(&mapping->page_tree, page_index(page),
+ PAGECACHE_TAG_DIRTY);
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
+}
+
int __init f2fs_init_post_read_processing(void)
{
bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0);
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a66107b5cfff..9cadcf9f1ecf 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* f2fs debugging statistics
*
@@ -5,10 +6,6 @@
* http://www.samsung.com/
* Copyright (c) 2012 Linux Foundation
* Copyright (c) 2012 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
@@ -30,8 +27,15 @@ static DEFINE_MUTEX(f2fs_stat_mutex);
static void update_general_status(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
+ struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
int i;
+ /* these will be changed if online resize is done */
+ si->main_area_segs = le32_to_cpu(raw_super->segment_count_main);
+ si->main_area_sections = le32_to_cpu(raw_super->section_count);
+ si->main_area_zones = si->main_area_sections /
+ le32_to_cpu(raw_super->secs_per_zone);
+
/* validation check of the segment numbers */
si->hit_largest = atomic64_read(&sbi->read_hit_largest);
si->hit_cached = atomic64_read(&sbi->read_hit_cached);
@@ -56,13 +60,18 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->vw_cnt = atomic_read(&sbi->vw_cnt);
si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt);
+ si->nr_dio_read = get_pages(sbi, F2FS_DIO_READ);
+ si->nr_dio_write = get_pages(sbi, F2FS_DIO_WRITE);
si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
+ si->nr_rd_data = get_pages(sbi, F2FS_RD_DATA);
+ si->nr_rd_node = get_pages(sbi, F2FS_RD_NODE);
+ si->nr_rd_meta = get_pages(sbi, F2FS_RD_META);
if (SM_I(sbi) && SM_I(sbi)->fcc_info) {
si->nr_flushed =
atomic_read(&SM_I(sbi)->fcc_info->issued_flush);
si->nr_flushing =
- atomic_read(&SM_I(sbi)->fcc_info->issing_flush);
+ atomic_read(&SM_I(sbi)->fcc_info->queued_flush);
si->flush_list_empty =
llist_empty(&SM_I(sbi)->fcc_info->issue_list);
}
@@ -70,7 +79,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->nr_discarded =
atomic_read(&SM_I(sbi)->dcc_info->issued_discard);
si->nr_discarding =
- atomic_read(&SM_I(sbi)->dcc_info->issing_discard);
+ atomic_read(&SM_I(sbi)->dcc_info->queued_discard);
si->nr_discard_cmd =
atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt);
si->undiscard_blks = SM_I(sbi)->dcc_info->undiscard_blks;
@@ -94,8 +103,10 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->free_secs = free_sections(sbi);
si->prefree_count = prefree_segments(sbi);
si->dirty_count = dirty_segments(sbi);
- si->node_pages = NODE_MAPPING(sbi)->nrpages;
- si->meta_pages = META_MAPPING(sbi)->nrpages;
+ if (sbi->node_inode)
+ si->node_pages = NODE_MAPPING(sbi)->nrpages;
+ if (sbi->meta_inode)
+ si->meta_pages = META_MAPPING(sbi)->nrpages;
si->nats = NM_I(sbi)->nat_cnt;
si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
si->sits = MAIN_SEGS(sbi);
@@ -104,6 +115,10 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->avail_nids = NM_I(sbi)->available_nids;
si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID];
si->bg_gc = sbi->bg_gc;
+ si->io_skip_bggc = sbi->io_skip_bggc;
+ si->other_skip_bggc = sbi->other_skip_bggc;
+ si->skipped_atomic_files[BG_GC] = sbi->skipped_atomic_files[BG_GC];
+ si->skipped_atomic_files[FG_GC] = sbi->skipped_atomic_files[FG_GC];
si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
/ 2;
@@ -119,6 +134,9 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]);
}
+ for (i = META_CP; i < META_MAX; i++)
+ si->meta_count[i] = atomic_read(&sbi->meta_count[i]);
+
for (i = 0; i < 2; i++) {
si->segment_count[i] = sbi->segment_count[i];
si->block_count[i] = sbi->block_count[i];
@@ -166,7 +184,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
static void update_mem_info(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
- unsigned npages;
int i;
if (si->base_mem)
@@ -188,10 +205,9 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry);
si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi));
si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
- if (f2fs_discard_en(sbi))
- si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
+ si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
si->base_mem += SIT_VBLOCK_MAP_SIZE;
- if (sbi->segs_per_sec > 1)
+ if (__is_large_section(sbi))
si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry);
si->base_mem += __bitmap_size(sbi, SIT_BITMAP);
@@ -213,7 +229,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->base_mem += sizeof(struct f2fs_nm_info);
si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
- si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
+ si->base_mem += NM_I(sbi)->nat_blocks *
+ f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK);
si->base_mem += NM_I(sbi)->nat_blocks / 8;
si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short);
@@ -249,10 +266,14 @@ get_cache:
sizeof(struct extent_node);
si->page_mem = 0;
- npages = NODE_MAPPING(sbi)->nrpages;
- si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
- npages = META_MAPPING(sbi)->nrpages;
- si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
+ if (sbi->node_inode) {
+ unsigned npages = NODE_MAPPING(sbi)->nrpages;
+ si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
+ }
+ if (sbi->meta_inode) {
+ unsigned npages = META_MAPPING(sbi)->nrpages;
+ si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
+ }
}
static int stat_show(struct seq_file *s, void *v)
@@ -268,7 +289,8 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n",
si->sbi->sb->s_bdev, i++,
f2fs_readonly(si->sbi->sb) ? "RO": "RW",
- f2fs_cp_error(si->sbi) ? "Error": "Good");
+ is_set_ckpt_flags(si->sbi, CP_DISABLED_FLAG) ?
+ "Disabled": (f2fs_cp_error(si->sbi) ? "Error": "Good"));
seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
si->sit_area_segs, si->nat_area_segs);
seq_printf(s, "[SSA: %d] [MAIN: %d",
@@ -330,6 +352,13 @@ static int stat_show(struct seq_file *s, void *v)
si->prefree_count, si->free_segs, si->free_secs);
seq_printf(s, "CP calls: %d (BG: %d)\n",
si->cp_count, si->bg_cp_count);
+ seq_printf(s, " - cp blocks : %u\n", si->meta_count[META_CP]);
+ seq_printf(s, " - sit blocks : %u\n",
+ si->meta_count[META_SIT]);
+ seq_printf(s, " - nat blocks : %u\n",
+ si->meta_count[META_NAT]);
+ seq_printf(s, " - ssa blocks : %u\n",
+ si->meta_count[META_SSA]);
seq_printf(s, "GC calls: %d (BG: %d)\n",
si->call_count, si->bg_gc);
seq_printf(s, " - data segments : %d (%d)\n",
@@ -342,6 +371,12 @@ static int stat_show(struct seq_file *s, void *v)
si->bg_data_blks);
seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks,
si->bg_node_blks);
+ seq_printf(s, "Skipped : atomic write %llu (%llu)\n",
+ si->skipped_atomic_files[BG_GC] +
+ si->skipped_atomic_files[FG_GC],
+ si->skipped_atomic_files[BG_GC]);
+ seq_printf(s, "BG skip : IO: %u, Other: %u\n",
+ si->io_skip_bggc, si->other_skip_bggc);
seq_puts(s, "\nExtent Cache:\n");
seq_printf(s, " - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n",
si->hit_largest, si->hit_cached,
@@ -353,7 +388,11 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
- seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), "
+ seq_printf(s, " - DIO (R: %4d, W: %4d)\n",
+ si->nr_dio_read, si->nr_dio_write);
+ seq_printf(s, " - IO_R (Data: %4d, Node: %4d, Meta: %4d\n",
+ si->nr_rd_data, si->nr_rd_node, si->nr_rd_meta);
+ seq_printf(s, " - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), "
"Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n",
si->nr_wb_cp_data, si->nr_wb_data,
si->nr_flushing, si->nr_flushed,
@@ -438,6 +477,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_stat_info *si;
+ int i;
si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL);
if (!si)
@@ -463,6 +503,8 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
atomic_set(&sbi->inline_inode, 0);
atomic_set(&sbi->inline_dir, 0);
atomic_set(&sbi->inplace_count, 0);
+ for (i = META_CP; i < META_MAX; i++)
+ atomic_set(&sbi->meta_count[i], 0);
atomic_set(&sbi->aw_cnt, 0);
atomic_set(&sbi->vw_cnt, 0);
@@ -484,33 +526,19 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
list_del(&si->stat_list);
mutex_unlock(&f2fs_stat_mutex);
- kfree(si);
+ kvfree(si);
}
-int __init f2fs_create_root_stats(void)
+void __init f2fs_create_root_stats(void)
{
- struct dentry *file;
-
f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
- if (!f2fs_debugfs_root)
- return -ENOMEM;
- file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
- NULL, &stat_fops);
- if (!file) {
- debugfs_remove(f2fs_debugfs_root);
- f2fs_debugfs_root = NULL;
- return -ENOMEM;
- }
-
- return 0;
+ debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, NULL,
+ &stat_fops);
}
void f2fs_destroy_root_stats(void)
{
- if (!f2fs_debugfs_root)
- return;
-
debugfs_remove_recursive(f2fs_debugfs_root);
f2fs_debugfs_root = NULL;
}
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index b7ad932a05ab..d01f376291a3 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/dir.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
@@ -60,12 +57,12 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
[S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK,
};
-void set_de_type(struct f2fs_dir_entry *de, umode_t mode)
+static void set_de_type(struct f2fs_dir_entry *de, umode_t mode)
{
de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
}
-unsigned char get_de_type(struct f2fs_dir_entry *de)
+unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de)
{
if (de->file_type < F2FS_FT_MAX)
return f2fs_filetype_table[de->file_type];
@@ -97,14 +94,14 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
make_dentry_ptr_block(NULL, &d, dentry_blk);
- de = find_target_dentry(fname, namehash, max_slots, &d);
+ de = f2fs_find_target_dentry(fname, namehash, max_slots, &d);
if (de)
*res_page = dentry_page;
return de;
}
-struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
+struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname,
f2fs_hash_t namehash, int *max_slots,
struct f2fs_dentry_ptr *d)
{
@@ -171,7 +168,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
for (; bidx < end_block; bidx++) {
/* no need to allocate new dentry pages to all the indices */
- dentry_page = find_data_page(dir, bidx);
+ dentry_page = f2fs_find_data_page(dir, bidx);
if (IS_ERR(dentry_page)) {
if (PTR_ERR(dentry_page) == -ENOENT) {
room = true;
@@ -210,7 +207,7 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
if (f2fs_has_inline_dentry(dir)) {
*res_page = NULL;
- de = find_in_inline_dir(dir, fname, res_page);
+ de = f2fs_find_in_inline_dir(dir, fname, res_page);
goto out;
}
@@ -221,9 +218,8 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
max_depth = F2FS_I(dir)->i_current_depth;
if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
- f2fs_msg(F2FS_I_SB(dir)->sb, KERN_WARNING,
- "Corrupted max_depth of %lu: %u",
- dir->i_ino, max_depth);
+ f2fs_warn(F2FS_I_SB(dir), "Corrupted max_depth of %lu: %u",
+ dir->i_ino, max_depth);
max_depth = MAX_DIR_HASH_DEPTH;
f2fs_i_depth_write(dir, max_depth);
}
@@ -296,7 +292,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
{
enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA;
lock_page(page);
- f2fs_wait_on_page_writeback(page, type, true);
+ f2fs_wait_on_page_writeback(page, type, true, true);
de->ino = cpu_to_le32(inode->i_ino);
set_de_type(de, inode->i_mode);
set_page_dirty(page);
@@ -310,7 +306,7 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage)
{
struct f2fs_inode *ri;
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
/* copy name info. to this inode page */
ri = F2FS_INODE(ipage);
@@ -319,7 +315,7 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage)
set_page_dirty(ipage);
}
-void do_make_empty_dir(struct inode *inode, struct inode *parent,
+void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent,
struct f2fs_dentry_ptr *d)
{
struct qstr dot = QSTR_INIT(".", 1);
@@ -340,23 +336,23 @@ static int make_empty_dir(struct inode *inode,
struct f2fs_dentry_ptr d;
if (f2fs_has_inline_dentry(inode))
- return make_empty_inline_dir(inode, parent, page);
+ return f2fs_make_empty_inline_dir(inode, parent, page);
- dentry_page = get_new_data_page(inode, page, 0, true);
+ dentry_page = f2fs_get_new_data_page(inode, page, 0, true);
if (IS_ERR(dentry_page))
return PTR_ERR(dentry_page);
dentry_blk = page_address(dentry_page);
make_dentry_ptr_block(NULL, &d, dentry_blk);
- do_make_empty_dir(inode, parent, &d);
+ f2fs_do_make_empty_dir(inode, parent, &d);
set_page_dirty(dentry_page);
f2fs_put_page(dentry_page, 1);
return 0;
}
-struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
+struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir,
const struct qstr *new_name, const struct qstr *orig_name,
struct page *dpage)
{
@@ -365,7 +361,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
int err;
if (is_inode_flag_set(inode, FI_NEW_INODE)) {
- page = new_inode_page(inode);
+ page = f2fs_new_inode_page(inode);
if (IS_ERR(page))
return page;
@@ -395,7 +391,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
goto put_error;
}
} else {
- page = get_node_page(F2FS_I_SB(dir), inode->i_ino);
+ page = f2fs_get_node_page(F2FS_I_SB(dir), inode->i_ino);
if (IS_ERR(page))
return page;
}
@@ -418,19 +414,19 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
* we should remove this inode from orphan list.
*/
if (inode->i_nlink == 0)
- remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino);
+ f2fs_remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino);
f2fs_i_links_write(inode, true);
}
return page;
put_error:
clear_nlink(inode);
- update_inode(inode, page);
+ f2fs_update_inode(inode, page);
f2fs_put_page(page, 1);
return ERR_PTR(err);
}
-void update_parent_metadata(struct inode *dir, struct inode *inode,
+void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode,
unsigned int current_depth)
{
if (inode && is_inode_flag_set(inode, FI_NEW_INODE)) {
@@ -448,7 +444,7 @@ void update_parent_metadata(struct inode *dir, struct inode *inode,
clear_inode_flag(inode, FI_INC_LINK);
}
-int room_for_filename(const void *bitmap, int slots, int max_slots)
+int f2fs_room_for_filename(const void *bitmap, int slots, int max_slots)
{
int bit_start = 0;
int zero_start, zero_end;
@@ -517,12 +513,11 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
}
start:
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) {
f2fs_show_injection_info(FAULT_DIR_DEPTH);
return -ENOSPC;
}
-#endif
+
if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
return -ENOSPC;
@@ -537,12 +532,12 @@ start:
(le32_to_cpu(dentry_hash) % nbucket));
for (block = bidx; block <= (bidx + nblock - 1); block++) {
- dentry_page = get_new_data_page(dir, NULL, block, true);
+ dentry_page = f2fs_get_new_data_page(dir, NULL, block, true);
if (IS_ERR(dentry_page))
return PTR_ERR(dentry_page);
dentry_blk = page_address(dentry_page);
- bit_pos = room_for_filename(&dentry_blk->dentry_bitmap,
+ bit_pos = f2fs_room_for_filename(&dentry_blk->dentry_bitmap,
slots, NR_DENTRY_IN_BLOCK);
if (bit_pos < NR_DENTRY_IN_BLOCK)
goto add_dentry;
@@ -554,11 +549,11 @@ start:
++level;
goto start;
add_dentry:
- f2fs_wait_on_page_writeback(dentry_page, DATA, true);
+ f2fs_wait_on_page_writeback(dentry_page, DATA, true, true);
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
- page = init_inode_metadata(inode, dir, new_name,
+ page = f2fs_init_inode_metadata(inode, dir, new_name,
orig_name, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -576,7 +571,7 @@ add_dentry:
f2fs_put_page(page, 1);
}
- update_parent_metadata(dir, inode, current_depth);
+ f2fs_update_parent_metadata(dir, inode, current_depth);
fail:
if (inode)
up_write(&F2FS_I(inode)->i_sem);
@@ -586,7 +581,7 @@ fail:
return err;
}
-int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname,
+int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname,
struct inode *inode, nid_t ino, umode_t mode)
{
struct qstr new_name;
@@ -610,7 +605,7 @@ int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname,
* Caller should grab and release a rwsem by calling f2fs_lock_op() and
* f2fs_unlock_op().
*/
-int __f2fs_add_link(struct inode *dir, const struct qstr *name,
+int f2fs_do_add_link(struct inode *dir, const struct qstr *name,
struct inode *inode, nid_t ino, umode_t mode)
{
struct fscrypt_name fname;
@@ -639,7 +634,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
} else if (IS_ERR(page)) {
err = PTR_ERR(page);
} else {
- err = __f2fs_do_add_link(dir, &fname, inode, ino, mode);
+ err = f2fs_add_dentry(dir, &fname, inode, ino, mode);
}
fscrypt_free_filename(&fname);
return err;
@@ -651,7 +646,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
int err = 0;
down_write(&F2FS_I(inode)->i_sem);
- page = init_inode_metadata(inode, dir, NULL, NULL, NULL);
+ page = f2fs_init_inode_metadata(inode, dir, NULL, NULL, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
@@ -659,9 +654,9 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
f2fs_put_page(page, 1);
clear_inode_flag(inode, FI_NEW_INODE);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
fail:
up_write(&F2FS_I(inode)->i_sem);
- f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return err;
}
@@ -683,9 +678,9 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode)
up_write(&F2FS_I(inode)->i_sem);
if (inode->i_nlink == 0)
- add_orphan_inode(inode);
+ f2fs_add_orphan_inode(inode);
else
- release_orphan_inode(sbi);
+ f2fs_release_orphan_inode(sbi);
}
/*
@@ -698,20 +693,18 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
struct f2fs_dentry_block *dentry_blk;
unsigned int bit_pos;
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
- struct address_space *mapping = page_mapping(page);
- unsigned long flags;
int i;
f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
if (F2FS_OPTION(F2FS_I_SB(dir)).fsync_mode == FSYNC_MODE_STRICT)
- add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO);
+ f2fs_add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO);
if (f2fs_has_inline_dentry(dir))
return f2fs_delete_inline_entry(dentry, page, dir, inode);
lock_page(page);
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
dentry_blk = page_address(page);
bit_pos = dentry - dentry_blk->dentry;
@@ -731,17 +724,14 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
f2fs_drop_nlink(dir, inode);
if (bit_pos == NR_DENTRY_IN_BLOCK &&
- !truncate_hole(dir, page->index, page->index + 1)) {
- spin_lock_irqsave(&mapping->tree_lock, flags);
- radix_tree_tag_clear(&mapping->page_tree, page_index(page),
- PAGECACHE_TAG_DIRTY);
- spin_unlock_irqrestore(&mapping->tree_lock, flags);
-
+ !f2fs_truncate_hole(dir, page->index, page->index + 1)) {
+ f2fs_clear_radix_tree_dirty_tag(page);
clear_page_dirty_for_io(page);
- ClearPagePrivate(page);
+ f2fs_clear_page_private(page);
ClearPageUptodate(page);
+ clear_cold_data(page);
inode_dec_dirty_pages(dir);
- remove_dirty_inode(dir);
+ f2fs_remove_dirty_inode(dir);
}
f2fs_put_page(page, 1);
}
@@ -758,7 +748,7 @@ bool f2fs_empty_dir(struct inode *dir)
return f2fs_empty_inline_dir(dir);
for (bidx = 0; bidx < nblock; bidx++) {
- dentry_page = get_lock_data_page(dir, bidx, false);
+ dentry_page = f2fs_get_lock_data_page(dir, bidx, false);
if (IS_ERR(dentry_page)) {
if (PTR_ERR(dentry_page) == -ENOENT)
continue;
@@ -791,9 +781,15 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
struct f2fs_dir_entry *de = NULL;
struct fscrypt_str de_name = FSTR_INIT(NULL, 0);
struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode);
+ struct blk_plug plug;
+ bool readdir_ra = sbi->readdir_ra == 1;
+ int err = 0;
bit_pos = ((unsigned long)ctx->pos % d->max);
+ if (readdir_ra)
+ blk_start_plug(&plug);
+
while (bit_pos < d->max) {
bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos);
if (bit_pos >= d->max)
@@ -803,10 +799,14 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
if (de->name_len == 0) {
bit_pos++;
ctx->pos = start_pos + bit_pos;
+ printk_ratelimited(
+ "%s, invalid namelen(0), ino:%u, run fsck to fix.",
+ KERN_WARNING, le32_to_cpu(de->ino));
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
continue;
}
- d_type = get_de_type(de);
+ d_type = f2fs_get_de_type(de);
de_name.name = d->filename[bit_pos];
de_name.len = le16_to_cpu(de->name_len);
@@ -815,37 +815,41 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
if (unlikely(bit_pos > d->max ||
le16_to_cpu(de->name_len) > F2FS_NAME_LEN)) {
- f2fs_msg(F2FS_I_SB(d->inode)->sb, KERN_WARNING,
- "%s: corrupted namelen=%d, run fsck to fix.",
- __func__, le16_to_cpu(de->name_len));
- set_sbi_flag(F2FS_I_SB(d->inode)->sb->s_fs_info, SBI_NEED_FSCK);
- return -EINVAL;
+ f2fs_warn(sbi, "%s: corrupted namelen=%d, run fsck to fix.",
+ __func__, le16_to_cpu(de->name_len));
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ err = -EFSCORRUPTED;
+ goto out;
}
if (f2fs_encrypted_inode(d->inode)) {
int save_len = fstr->len;
- int err;
err = fscrypt_fname_disk_to_usr(d->inode,
- (u32)de->hash_code, 0,
- &de_name, fstr);
+ (u32)le32_to_cpu(de->hash_code),
+ 0, &de_name, fstr);
if (err)
- return err;
+ goto out;
de_name = *fstr;
fstr->len = save_len;
}
if (!dir_emit(ctx, de_name.name, de_name.len,
- le32_to_cpu(de->ino), d_type))
- return 1;
+ le32_to_cpu(de->ino), d_type)) {
+ err = 1;
+ goto out;
+ }
- if (sbi->readdir_ra == 1)
- ra_node_page(sbi, le32_to_cpu(de->ino));
+ if (readdir_ra)
+ f2fs_ra_node_page(sbi, le32_to_cpu(de->ino));
ctx->pos = start_pos + bit_pos;
}
- return 0;
+out:
+ if (readdir_ra)
+ blk_finish_plug(&plug);
+ return err;
}
static int f2fs_readdir(struct file *file, struct dir_context *ctx)
@@ -890,7 +894,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
page_cache_sync_readahead(inode->i_mapping, ra, file, n,
min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
- dentry_page = get_lock_data_page(inode, n, false);
+ dentry_page = f2fs_find_data_page(inode, n);
if (IS_ERR(dentry_page)) {
err = PTR_ERR(dentry_page);
if (err == -ENOENT) {
@@ -908,11 +912,11 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
err = f2fs_fill_dentries(ctx, &d,
n * NR_DENTRY_IN_BLOCK, &fstr);
if (err) {
- f2fs_put_page(dentry_page, 1);
+ f2fs_put_page(dentry_page, 0);
break;
}
- f2fs_put_page(dentry_page, 1);
+ f2fs_put_page(dentry_page, 0);
}
out_free:
fscrypt_fname_free_buffer(&fstr);
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index d5a861bf2b42..a77022601bed 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* f2fs extent cache support
*
@@ -5,10 +6,6 @@
* Copyright (c) 2015 Samsung Electronics
* Authors: Jaegeuk Kim <jaegeuk@kernel.org>
* Chao Yu <chao2.yu@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
@@ -49,7 +46,7 @@ static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root,
return NULL;
}
-struct rb_entry *__lookup_rb_tree(struct rb_root *root,
+struct rb_entry *f2fs_lookup_rb_tree(struct rb_root *root,
struct rb_entry *cached_re, unsigned int ofs)
{
struct rb_entry *re;
@@ -61,7 +58,7 @@ struct rb_entry *__lookup_rb_tree(struct rb_root *root,
return re;
}
-struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
+struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
struct rb_root *root, struct rb_node **parent,
unsigned int ofs)
{
@@ -92,7 +89,7 @@ struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
* in order to simpfy the insertion after.
* tree must stay unchanged between lookup and insertion.
*/
-struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root,
+struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root *root,
struct rb_entry *cached_re,
unsigned int ofs,
struct rb_entry **prev_entry,
@@ -159,7 +156,7 @@ lookup_neighbors:
return re;
}
-bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi,
+bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
struct rb_root *root)
{
#ifdef CONFIG_F2FS_CHECK_FS
@@ -178,10 +175,9 @@ bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi,
next_re = rb_entry(next, struct rb_entry, rb_node);
if (cur_re->ofs + cur_re->len > next_re->ofs) {
- f2fs_msg(sbi->sb, KERN_INFO, "inconsistent rbtree, "
- "cur(%u, %u) next(%u, %u)",
- cur_re->ofs, cur_re->len,
- next_re->ofs, next_re->len);
+ f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)",
+ cur_re->ofs, cur_re->len,
+ next_re->ofs, next_re->len);
return false;
}
@@ -308,14 +304,13 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
return count - atomic_read(&et->node_cnt);
}
-static void __drop_largest_extent(struct inode *inode,
+static void __drop_largest_extent(struct extent_tree *et,
pgoff_t fofs, unsigned int len)
{
- struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest;
-
- if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) {
- largest->len = 0;
- f2fs_mark_inode_dirty_sync(inode, true);
+ if (fofs < et->largest.fofs + et->largest.len &&
+ fofs + len > et->largest.fofs) {
+ et->largest.len = 0;
+ et->largest_updated = true;
}
}
@@ -390,7 +385,7 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
goto out;
}
- en = (struct extent_node *)__lookup_rb_tree(&et->root,
+ en = (struct extent_node *)f2fs_lookup_rb_tree(&et->root,
(struct rb_entry *)et->cached_en, pgofs);
if (!en)
goto out;
@@ -416,12 +411,11 @@ out:
return ret;
}
-static struct extent_node *__try_merge_extent_node(struct inode *inode,
+static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei,
struct extent_node *prev_ex,
struct extent_node *next_ex)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_node *en = NULL;
if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) {
@@ -443,7 +437,7 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode,
if (!en)
return NULL;
- __try_update_largest_extent(inode, et, en);
+ __try_update_largest_extent(et, en);
spin_lock(&sbi->extent_lock);
if (!list_empty(&en->list)) {
@@ -454,12 +448,11 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode,
return en;
}
-static struct extent_node *__insert_extent_tree(struct inode *inode,
+static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei,
struct rb_node **insert_p,
struct rb_node *insert_parent)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct rb_node **p;
struct rb_node *parent = NULL;
struct extent_node *en = NULL;
@@ -470,13 +463,13 @@ static struct extent_node *__insert_extent_tree(struct inode *inode,
goto do_insert;
}
- p = __lookup_rb_tree_for_insert(sbi, &et->root, &parent, ei->fofs);
+ p = f2fs_lookup_rb_tree_for_insert(sbi, &et->root, &parent, ei->fofs);
do_insert:
en = __attach_extent_node(sbi, et, ei, parent, p);
if (!en)
return NULL;
- __try_update_largest_extent(inode, et, en);
+ __try_update_largest_extent(et, en);
/* update in global extent list */
spin_lock(&sbi->extent_lock);
@@ -497,6 +490,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
struct rb_node **insert_p = NULL, *insert_parent = NULL;
unsigned int end = fofs + len;
unsigned int pos = (unsigned int)fofs;
+ bool updated = false;
if (!et)
return;
@@ -517,10 +511,10 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
* drop largest extent before lookup, in case it's already
* been shrunk from extent tree
*/
- __drop_largest_extent(inode, fofs, len);
+ __drop_largest_extent(et, fofs, len);
/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
- en = (struct extent_node *)__lookup_rb_tree_ret(&et->root,
+ en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root,
(struct rb_entry *)et->cached_en, fofs,
(struct rb_entry **)&prev_en,
(struct rb_entry **)&next_en,
@@ -550,7 +544,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
set_extent_info(&ei, end,
end - dei.fofs + dei.blk,
org_end - end);
- en1 = __insert_extent_tree(inode, et, &ei,
+ en1 = __insert_extent_tree(sbi, et, &ei,
NULL, NULL);
next_en = en1;
} else {
@@ -570,7 +564,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
}
if (parts)
- __try_update_largest_extent(inode, et, en);
+ __try_update_largest_extent(et, en);
else
__release_extent_node(sbi, et, en);
@@ -590,15 +584,16 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
if (blkaddr) {
set_extent_info(&ei, fofs, blkaddr, len);
- if (!__try_merge_extent_node(inode, et, &ei, prev_en, next_en))
- __insert_extent_tree(inode, et, &ei,
+ if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
+ __insert_extent_tree(sbi, et, &ei,
insert_p, insert_parent);
/* give up extent_cache, if split and small updates happen */
if (dei.len >= 1 &&
prev.len < F2FS_MIN_EXTENT_LEN &&
et->largest.len < F2FS_MIN_EXTENT_LEN) {
- __drop_largest_extent(inode, 0, UINT_MAX);
+ et->largest.len = 0;
+ et->largest_updated = true;
set_inode_flag(inode, FI_NO_EXTENT);
}
}
@@ -606,7 +601,15 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
if (is_inode_flag_set(inode, FI_NO_EXTENT))
__free_extent_tree(sbi, et);
+ if (et->largest_updated) {
+ et->largest_updated = false;
+ updated = true;
+ }
+
write_unlock(&et->lock);
+
+ if (updated)
+ f2fs_mark_inode_dirty_sync(inode, true);
}
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -705,6 +708,7 @@ void f2fs_drop_extent_tree(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et = F2FS_I(inode)->extent_tree;
+ bool updated = false;
if (!f2fs_may_extent_tree(inode))
return;
@@ -713,8 +717,13 @@ void f2fs_drop_extent_tree(struct inode *inode)
write_lock(&et->lock);
__free_extent_tree(sbi, et);
- __drop_largest_extent(inode, 0, UINT_MAX);
+ if (et->largest.len) {
+ et->largest.len = 0;
+ updated = true;
+ }
write_unlock(&et->lock);
+ if (updated)
+ f2fs_mark_inode_dirty_sync(inode, true);
}
void f2fs_destroy_extent_tree(struct inode *inode)
@@ -773,7 +782,7 @@ void f2fs_update_extent_cache(struct dnode_of_data *dn)
else
blkaddr = dn->data_blkaddr;
- fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
+ fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
dn->ofs_in_node;
f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1);
}
@@ -788,7 +797,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len);
}
-void init_extent_cache_info(struct f2fs_sb_info *sbi)
+void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
{
INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
mutex_init(&sbi->extent_tree_lock);
@@ -800,7 +809,7 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi)
atomic_set(&sbi->total_ext_node, 0);
}
-int __init create_extent_cache(void)
+int __init f2fs_create_extent_cache(void)
{
extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
sizeof(struct extent_tree));
@@ -815,7 +824,7 @@ int __init create_extent_cache(void)
return 0;
}
-void destroy_extent_cache(void)
+void f2fs_destroy_extent_cache(void)
{
kmem_cache_destroy(extent_node_slab);
kmem_cache_destroy(extent_tree_slab);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 97c17b3d984c..ae98cdf21f55 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1,16 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/f2fs.h
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef _LINUX_F2FS_H
#define _LINUX_F2FS_H
+#include <linux/uio.h>
#include <linux/types.h>
#include <linux/page-flags.h>
#include <linux/buffer_head.h>
@@ -26,6 +24,7 @@
#include <linux/quotaops.h>
#include <crypto/hash.h>
#include <linux/writeback.h>
+#include <linux/overflow.h>
#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_F2FS_FS_ENCRYPTION)
#include <linux/fscrypt.h>
@@ -42,7 +41,6 @@
} while (0)
#endif
-#ifdef CONFIG_F2FS_FAULT_INJECTION
enum {
FAULT_KMALLOC,
FAULT_KVMALLOC,
@@ -55,18 +53,23 @@ enum {
FAULT_DIR_DEPTH,
FAULT_EVICT_INODE,
FAULT_TRUNCATE,
- FAULT_IO,
+ FAULT_READ_IO,
FAULT_CHECKPOINT,
+ FAULT_DISCARD,
+ FAULT_WRITE_IO,
FAULT_MAX,
};
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+#define F2FS_ALL_FAULT_TYPE ((1 << FAULT_MAX) - 1)
+
struct f2fs_fault_info {
atomic_t inject_ops;
unsigned int inject_rate;
unsigned int inject_type;
};
-extern char *fault_name[FAULT_MAX];
+extern const char *f2fs_fault_name[FAULT_MAX];
#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type)))
#endif
@@ -98,6 +101,7 @@ extern char *fault_name[FAULT_MAX];
#define F2FS_MOUNT_QUOTA 0x00400000
#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000
#define F2FS_MOUNT_RESERVE_ROOT 0x01000000
+#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000
#define F2FS_OPTION(sbi) ((sbi)->mount_opt)
#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@ -135,6 +139,9 @@ struct f2fs_mount_info {
int alloc_mode; /* segment allocation policy */
int fsync_mode; /* fsync policy */
bool test_dummy_encryption; /* test dummy encryption */
+ block_t unusable_cap; /* Amount of space allowed to be
+ * unusable when disabling checkpoint
+ */
};
#define F2FS_FEATURE_ENCRYPT 0x0001
@@ -148,13 +155,15 @@ struct f2fs_mount_info {
#define F2FS_FEATURE_INODE_CRTIME 0x0100
#define F2FS_FEATURE_LOST_FOUND 0x0200
#define F2FS_FEATURE_VERITY 0x0400 /* reserved */
+#define F2FS_FEATURE_SB_CHKSUM 0x0800
-#define F2FS_HAS_FEATURE(sb, mask) \
- ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
-#define F2FS_SET_FEATURE(sb, mask) \
- (F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask))
-#define F2FS_CLEAR_FEATURE(sb, mask) \
- (F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask))
+#define __F2FS_HAS_FEATURE(raw_super, mask) \
+ ((raw_super->feature & cpu_to_le32(mask)) != 0)
+#define F2FS_HAS_FEATURE(sbi, mask) __F2FS_HAS_FEATURE(sbi->raw_super, mask)
+#define F2FS_SET_FEATURE(sbi, mask) \
+ (sbi->raw_super->feature |= cpu_to_le32(mask))
+#define F2FS_CLEAR_FEATURE(sbi, mask) \
+ (sbi->raw_super->feature &= ~cpu_to_le32(mask))
/* bio stuffs */
#define REQ_OP_READ READ
@@ -237,15 +246,19 @@ enum {
#define CP_RECOVERY 0x00000008
#define CP_DISCARD 0x00000010
#define CP_TRIMMED 0x00000020
+#define CP_PAUSE 0x00000040
#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */
-#define DEF_MAX_DISCARD_LEN 512 /* Max. 2MB per discard */
#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */
+#define DEF_MID_DISCARD_ISSUE_TIME 500 /* 500 ms, if device busy */
#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */
#define DEF_DISCARD_URGENT_UTIL 80 /* do more discard over 80% */
#define DEF_CP_INTERVAL 60 /* 60 secs */
#define DEF_IDLE_INTERVAL 5 /* 5 secs */
+#define DEF_DISABLE_INTERVAL 5 /* 5 secs */
+#define DEF_DISABLE_QUICK_INTERVAL 1 /* 1 secs */
+#define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */
struct cp_control {
int reason;
@@ -255,14 +268,24 @@ struct cp_control {
};
/*
- * For CP/NAT/SIT/SSA readahead
+ * indicate meta/data type
*/
enum {
META_CP,
META_NAT,
META_SIT,
META_SSA,
+ META_MAX,
META_POR,
+ DATA_GENERIC, /* check range only */
+ DATA_GENERIC_ENHANCE, /* strong check on range and segment bitmap */
+ DATA_GENERIC_ENHANCE_READ, /*
+ * strong check on range and segment
+ * bitmap but no warning due to race
+ * condition of read on truncated area
+ * by extent_cache
+ */
+ META_GENERIC,
};
/* for the list of ino */
@@ -287,6 +310,12 @@ struct inode_entry {
struct inode *inode; /* vfs inode pointer */
};
+struct fsync_node_entry {
+ struct list_head list; /* list head */
+ struct page *page; /* warm node page pointer */
+ unsigned int seq_id; /* sequence id */
+};
+
/* for the bitmap indicate blocks to be discarded */
struct discard_entry {
struct list_head list; /* list head */
@@ -300,12 +329,13 @@ struct discard_entry {
/* max discard pend list number */
#define MAX_PLIST_NUM 512
#define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \
- (MAX_PLIST_NUM - 1) : (blk_num - 1))
+ (MAX_PLIST_NUM - 1) : ((blk_num) - 1))
enum {
- D_PREP,
- D_SUBMIT,
- D_DONE,
+ D_PREP, /* initial */
+ D_PARTIAL, /* partially submitted */
+ D_SUBMIT, /* all submitted */
+ D_DONE, /* finished */
};
struct discard_info {
@@ -330,7 +360,10 @@ struct discard_cmd {
struct block_device *bdev; /* bdev */
unsigned short ref; /* reference count */
unsigned char state; /* state */
+ unsigned char queued; /* queued discard */
int error; /* bio error */
+ spinlock_t lock; /* for state/bio_ref updating */
+ unsigned short bio_ref; /* bio reference count */
};
enum {
@@ -344,12 +377,15 @@ enum {
struct discard_policy {
int type; /* type of discard */
unsigned int min_interval; /* used for candidates exist */
+ unsigned int mid_interval; /* used for device busy */
unsigned int max_interval; /* used for candidates not exist */
unsigned int max_requests; /* # of discards issued per round */
unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */
bool io_aware; /* issue discard in idle time */
bool sync; /* submit discard with REQ_SYNC flag */
+ bool ordered; /* issue discard by lba order */
unsigned int granularity; /* discard granularity */
+ int timeout; /* discard timeout for put_super */
};
struct discard_cmd_control {
@@ -365,10 +401,12 @@ struct discard_cmd_control {
unsigned int max_discards; /* max. discards to be issued */
unsigned int discard_granularity; /* discard granularity */
unsigned int undiscard_blks; /* # of undiscard blocks */
+ unsigned int next_pos; /* next discard position */
atomic_t issued_discard; /* # of issued discard */
- atomic_t issing_discard; /* # of issing discard */
+ atomic_t queued_discard; /* # of queued discard */
atomic_t discard_cmd_cnt; /* # of cached cmd count */
struct rb_root root; /* root of discard rb-tree */
+ bool rbtree_check; /* config for consistence check */
};
/* for the list of fsync inodes, used only during recovery */
@@ -441,6 +479,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
#define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, __u32)
#define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32)
#define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15)
+#define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64)
#define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY
#define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
@@ -455,6 +494,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
#define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */
#define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */
#define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */
+#define F2FS_GOING_DOWN_NEED_FSCK 0x4 /* going down to trigger fsck */
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
@@ -490,7 +530,6 @@ struct f2fs_flush_device {
/* for inline stuff */
#define DEF_INLINE_RESERVED_SIZE 1
-#define DEF_MIN_INLINE_SIZE 1
static inline int get_extra_isize(struct inode *inode);
static inline int get_inline_xattr_addrs(struct inode *inode);
#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \
@@ -502,8 +541,8 @@ static inline int get_inline_xattr_addrs(struct inode *inode);
#define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \
((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
BITS_PER_BYTE + 1))
-#define INLINE_DENTRY_BITMAP_SIZE(inode) ((NR_INLINE_DENTRY(inode) + \
- BITS_PER_BYTE - 1) / BITS_PER_BYTE)
+#define INLINE_DENTRY_BITMAP_SIZE(inode) \
+ DIV_ROUND_UP(NR_INLINE_DENTRY(inode), BITS_PER_BYTE)
#define INLINE_RESERVED_SIZE(inode) (MAX_INLINE_DATA(inode) - \
((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
NR_INLINE_DENTRY(inode) + \
@@ -565,13 +604,15 @@ enum {
*/
};
+#define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */
+
+/* maximum retry quota flush count */
+#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8
+
#define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */
#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
-/* vector size for gang look-up from extent cache that consists of radix tree */
-#define EXT_TREE_VEC_SIZE 64
-
/* for in-memory extent cache entry */
#define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */
@@ -591,16 +632,8 @@ struct extent_info {
};
struct extent_node {
- struct rb_node rb_node;
- union {
- struct {
- unsigned int fofs;
- unsigned int len;
- u32 blk;
- };
- struct extent_info ei; /* extent info */
-
- };
+ struct rb_node rb_node; /* rb node located in rb-tree */
+ struct extent_info ei; /* extent info */
struct list_head list; /* node in global extent list of sbi */
struct extent_tree *et; /* extent tree pointer */
};
@@ -613,6 +646,7 @@ struct extent_tree {
struct list_head list; /* to be used by sbi->zombie_list */
rwlock_t lock; /* protect extent info rb-tree */
atomic_t node_cnt; /* # of extent node in rb-tree*/
+ bool largest_updated; /* largest extent updated */
};
/*
@@ -634,6 +668,7 @@ struct f2fs_map_blocks {
pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */
pgoff_t *m_next_extent; /* point to next possible extent */
int m_seg_type;
+ bool m_may_create; /* indicate it is from write path */
};
/* for flag in get_data_block */
@@ -641,6 +676,7 @@ enum {
F2FS_GET_BLOCK_DEFAULT,
F2FS_GET_BLOCK_FIEMAP,
F2FS_GET_BLOCK_BMAP,
+ F2FS_GET_BLOCK_DIO,
F2FS_GET_BLOCK_PRE_DIO,
F2FS_GET_BLOCK_PRE_AIO,
F2FS_GET_BLOCK_PRECACHE,
@@ -657,6 +693,8 @@ enum {
#define FADVISE_HOT_BIT 0x20
#define FADVISE_VERITY_BIT 0x40 /* reserved */
+#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
+
#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT)
#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT)
#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT)
@@ -676,15 +714,20 @@ enum {
#define DEF_DIR_LEVEL 0
+enum {
+ GC_FAILURE_PIN,
+ GC_FAILURE_ATOMIC,
+ MAX_GC_FAILURE
+};
+
struct f2fs_inode_info {
struct inode vfs_inode; /* serve a vfs inode */
unsigned long i_flags; /* keep an inode flags for ioctl */
unsigned char i_advise; /* use to give file attribute hints */
unsigned char i_dir_level; /* use for dentry level for large dir */
- union {
- unsigned int i_current_depth; /* only for directory depth */
- unsigned short i_gc_failures; /* only for regular file */
- };
+ unsigned int i_current_depth; /* only for directory depth */
+ /* for gc failure statistic */
+ unsigned int i_gc_failures[MAX_GC_FAILURE];
unsigned int i_pino; /* parent inode number */
umode_t i_acl_mode; /* keep file acl mode temporarily */
@@ -712,7 +755,9 @@ struct f2fs_inode_info {
struct task_struct *inmem_task; /* store inmemory task */
struct mutex inmem_lock; /* lock for inmemory pages */
struct extent_tree *extent_tree; /* cached extent_tree entry */
- struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */
+
+ /* avoid racing between foreground op and gc */
+ struct rw_semaphore i_gc_rwsem[2];
struct rw_semaphore i_mmap_sem;
struct rw_semaphore i_xattr_sem; /* avoid racing between reading and changing EAs */
@@ -748,22 +793,22 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
}
static inline bool __is_discard_mergeable(struct discard_info *back,
- struct discard_info *front)
+ struct discard_info *front, unsigned int max_len)
{
return (back->lstart + back->len == front->lstart) &&
- (back->len + front->len < DEF_MAX_DISCARD_LEN);
+ (back->len + front->len <= max_len);
}
static inline bool __is_discard_back_mergeable(struct discard_info *cur,
- struct discard_info *back)
+ struct discard_info *back, unsigned int max_len)
{
- return __is_discard_mergeable(back, cur);
+ return __is_discard_mergeable(back, cur, max_len);
}
static inline bool __is_discard_front_mergeable(struct discard_info *cur,
- struct discard_info *front)
+ struct discard_info *front, unsigned int max_len)
{
- return __is_discard_mergeable(cur, front);
+ return __is_discard_mergeable(cur, front, max_len);
}
static inline bool __is_extent_mergeable(struct extent_info *back,
@@ -786,12 +831,12 @@ static inline bool __is_front_mergeable(struct extent_info *cur,
}
extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync);
-static inline void __try_update_largest_extent(struct inode *inode,
- struct extent_tree *et, struct extent_node *en)
+static inline void __try_update_largest_extent(struct extent_tree *et,
+ struct extent_node *en)
{
if (en->ei.len > et->largest.len) {
et->largest = en->ei;
- f2fs_mark_inode_dirty_sync(inode, true);
+ et->largest_updated = true;
}
}
@@ -818,6 +863,7 @@ struct f2fs_nm_info {
struct radix_tree_root nat_set_root;/* root of the nat set cache */
struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */
struct list_head nat_entries; /* cached nat entry list (clean) */
+ spinlock_t nat_list_lock; /* protect clean nat entry list */
unsigned int nat_cnt; /* the # of cached nat entries */
unsigned int dirty_nat_cnt; /* total num of nat entries in set */
unsigned int nat_blocks; /* # of nat blocks */
@@ -911,7 +957,7 @@ struct flush_cmd_control {
struct task_struct *f2fs_issue_flush; /* flush thread */
wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */
atomic_t issued_flush; /* # of issued flushes */
- atomic_t issing_flush; /* # of issing flushes */
+ atomic_t queued_flush; /* # of queued flushes */
struct llist_head issue_list; /* list for command issue */
struct llist_node *dispatch_list; /* list for command dispatch */
};
@@ -944,6 +990,7 @@ struct f2fs_sm_info {
unsigned int ipu_policy; /* in-place-update policy */
unsigned int min_ipu_util; /* in-place-update threshold */
unsigned int min_fsync_blocks; /* threshold for fsync */
+ unsigned int min_seq_blocks; /* threshold for sequential blocks */
unsigned int min_hot_blocks; /* threshold for hot block allocation */
unsigned int min_ssr_sections; /* threshold to trigger SSR allocation */
@@ -974,6 +1021,11 @@ enum count_type {
F2FS_DIRTY_IMETA,
F2FS_WB_CP_DATA,
F2FS_WB_DATA,
+ F2FS_RD_DATA,
+ F2FS_RD_NODE,
+ F2FS_RD_META,
+ F2FS_DIO_WRITE,
+ F2FS_DIO_READ,
NR_COUNT_TYPE,
};
@@ -1061,9 +1113,13 @@ struct f2fs_io_info {
bool submitted; /* indicate IO submission */
int need_lock; /* indicate we need to lock cp_rwsem */
bool in_list; /* indicate fio is in io_list */
- bool is_meta; /* indicate borrow meta inode mapping or not */
+ bool is_por; /* indicate IO is from recovery or not */
+ bool retry; /* need to reallocate block address */
enum iostat_type io_type; /* io type */
struct writeback_control *io_wbc; /* writeback control */
+ struct bio **bio; /* bio for ipu */
+ sector_t *last_block; /* last block number in bio */
+ unsigned char version; /* version of the node */
};
#define is_read_io(rw) ((rw) == READ)
@@ -1086,8 +1142,8 @@ struct f2fs_dev_info {
block_t start_blk;
block_t end_blk;
#ifdef CONFIG_BLK_DEV_ZONED
- unsigned int nr_blkz; /* Total number of zones */
- u8 *blkz_type; /* Array of zones type */
+ unsigned int nr_blkz; /* Total number of zones */
+ unsigned long *blkz_seq; /* Bitmap indicating sequential zones */
#endif
};
@@ -1115,15 +1171,34 @@ enum {
SBI_POR_DOING, /* recovery is doing or not */
SBI_NEED_SB_WRITE, /* need to recover superblock */
SBI_NEED_CP, /* need to checkpoint */
+ SBI_IS_SHUTDOWN, /* shutdown by ioctl */
+ SBI_IS_RECOVERED, /* recovered orphan/data */
+ SBI_CP_DISABLED, /* CP was disabled last mount */
+ SBI_CP_DISABLED_QUICK, /* CP was disabled quickly */
+ SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */
+ SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */
+ SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */
+ SBI_IS_RESIZEFS, /* resizefs is in process */
};
enum {
CP_TIME,
REQ_TIME,
+ DISCARD_TIME,
+ GC_TIME,
+ DISABLE_TIME,
+ UMOUNT_DISCARD_TIMEOUT,
MAX_TIME,
};
enum {
+ GC_NORMAL,
+ GC_IDLE_CB,
+ GC_IDLE_GREEDY,
+ GC_URGENT,
+};
+
+enum {
WHINT_MODE_OFF, /* not pass down write hints */
WHINT_MODE_USER, /* try to pass down hints given by users */
WHINT_MODE_FS, /* pass down hints with F2FS policy */
@@ -1154,6 +1229,7 @@ struct f2fs_sb_info {
struct rw_semaphore sb_lock; /* lock for raw super block */
int valid_super_block; /* valid super block no */
unsigned long s_flag; /* flags for sbi */
+ struct mutex writepages; /* mutex for writepages() */
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int blocks_per_blkz; /* F2FS blocks per zone */
@@ -1169,8 +1245,8 @@ struct f2fs_sb_info {
/* for bio operations */
struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */
- struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE];
- /* bio ordering for NODE/DATA */
+ /* keep migration IO order for LFS mode */
+ struct rw_semaphore io_order_lock;
mempool_t *write_io_dummy; /* Dummy pages */
/* for checkpoint */
@@ -1188,12 +1264,18 @@ struct f2fs_sb_info {
struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
+ spinlock_t fsync_node_lock; /* for node entry lock */
+ struct list_head fsync_node_list; /* node list head */
+ unsigned int fsync_seg_id; /* sequence id */
+ unsigned int fsync_node_num; /* number of node entries */
+
/* for orphan inode, use 0'th array */
unsigned int max_orphans; /* max orphan inodes */
/* for inode management */
struct list_head inode_list[NR_INODE_TYPE]; /* dirty inode list */
spinlock_t inode_lock[NR_INODE_TYPE]; /* for dirty inode list lock */
+ struct mutex flush_lock; /* for flush exclusion */
/* for extent tree cache */
struct radix_tree_root extent_tree_root;/* cache extent cache entries */
@@ -1217,11 +1299,11 @@ struct f2fs_sb_info {
unsigned int segs_per_sec; /* segments per section */
unsigned int secs_per_zone; /* sections per zone */
unsigned int total_sections; /* total section count */
+ struct mutex resize_mutex; /* for resize exclusion */
unsigned int total_node_count; /* total node block count */
unsigned int total_valid_node_count; /* valid node block count */
loff_t max_file_blocks; /* max block index of file */
int dir_level; /* directory level */
- unsigned int trigger_ssr_threshold; /* threshold to trigger ssr */
int readdir_ra; /* readahead inode in readdir */
block_t user_block_count; /* # of user blocks */
@@ -1231,9 +1313,11 @@ struct f2fs_sb_info {
block_t reserved_blocks; /* configurable reserved blocks */
block_t current_reserved_blocks; /* current reserved blocks */
- unsigned int nquota_files; /* # of quota sysfile */
+ /* Additional tracking for no checkpoint mode */
+ block_t unusable_block_count; /* # of blocks saved by last cp */
- u32 s_next_generation; /* for NFS support */
+ unsigned int nquota_files; /* # of quota sysfile */
+ struct rw_semaphore quota_sem; /* blocking cp for flags */
/* # of pages, see count_type */
atomic_t nr_pages[NR_COUNT_TYPE];
@@ -1241,7 +1325,7 @@ struct f2fs_sb_info {
struct percpu_counter alloc_valid_block_count;
/* writeback control */
- atomic_t wb_sync_req; /* count # of WB_SYNC threads */
+ atomic_t wb_sync_req[META]; /* count # of WB_SYNC threads */
/* valid inode count */
struct percpu_counter total_valid_inode_count;
@@ -1252,15 +1336,19 @@ struct f2fs_sb_info {
struct mutex gc_mutex; /* mutex for GC */
struct f2fs_gc_kthread *gc_thread; /* GC thread */
unsigned int cur_victim_sec; /* current victim section num */
-
- /* threshold for converting bg victims for fg */
- u64 fggc_threshold;
+ unsigned int gc_mode; /* current GC state */
+ unsigned int next_victim_seg[2]; /* next segment in victim section */
+ /* for skip statistic */
+ unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
+ unsigned long long skipped_gc_rwsem; /* FG_GC only */
/* threshold for gc trials on pinned files */
u64 gc_pin_file_threshold;
/* maximum # of trials to find a victim segment for SSR and GC */
unsigned int max_victim_search;
+ /* migration granularity of garbage collection, unit: segment */
+ unsigned int migration_granularity;
/*
* for stat information.
@@ -1268,6 +1356,7 @@ struct f2fs_sb_info {
*/
#ifdef CONFIG_F2FS_STAT_FS
struct f2fs_stat_info *stat_info; /* FS status information */
+ atomic_t meta_count[META_MAX]; /* # of meta blocks */
unsigned int segment_count[2]; /* # of allocated segments */
unsigned int block_count[2]; /* # of allocated blocks */
atomic_t inplace_count; /* # of inplace update */
@@ -1283,6 +1372,8 @@ struct f2fs_sb_info {
atomic_t max_aw_cnt; /* max # of atomic writes */
atomic_t max_vw_cnt; /* max # of volatile writes */
int bg_gc; /* background gc calls */
+ unsigned int io_skip_bggc; /* skip background gc for in-flight IO */
+ unsigned int other_skip_bggc; /* skip background gc for other reasons */
unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */
#endif
spinlock_t stat_lock; /* lock for stat operations */
@@ -1316,10 +1407,17 @@ struct f2fs_sb_info {
__u32 s_chksum_seed;
};
+struct f2fs_private_dio {
+ struct inode *inode;
+ void *orig_private;
+ bio_end_io_t *orig_end_io;
+ bool write;
+};
+
#ifdef CONFIG_F2FS_FAULT_INJECTION
-#define f2fs_show_injection_info(type) \
- printk("%sF2FS-fs : inject %s in %s of %pF\n", \
- KERN_INFO, fault_name[type], \
+#define f2fs_show_injection_info(type) \
+ printk_ratelimited("%sF2FS-fs : inject %s in %s of %pF\n", \
+ KERN_INFO, f2fs_fault_name[type], \
__func__, __builtin_return_address(0))
static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
{
@@ -1338,8 +1436,25 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
}
return false;
}
+#else
+#define f2fs_show_injection_info(type) do { } while (0)
+static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
+{
+ return false;
+}
#endif
+/*
+ * Test if the mounted volume is a multi-device volume.
+ * - For a single regular disk volume, sbi->s_ndevs is 0.
+ * - For a single zoned disk volume, sbi->s_ndevs is 1.
+ * - For a multi-device volume, sbi->s_ndevs is always 2 or more.
+ */
+static inline bool f2fs_is_multi_device(struct f2fs_sb_info *sbi)
+{
+ return sbi->s_ndevs > 1;
+}
+
/* For write statistics. Suppose sector size is 512 bytes,
* and the return value is in kbytes. s is of struct f2fs_sb_info.
*/
@@ -1349,7 +1464,15 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
{
- sbi->last_time[type] = jiffies;
+ unsigned long now = jiffies;
+
+ sbi->last_time[type] = now;
+
+ /* DISCARD_TIME and GC_TIME are based on REQ_TIME */
+ if (type == REQ_TIME) {
+ sbi->last_time[DISCARD_TIME] = now;
+ sbi->last_time[GC_TIME] = now;
+ }
}
static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
@@ -1359,16 +1482,18 @@ static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
return time_after(jiffies, sbi->last_time[type] + interval);
}
-static inline bool is_idle(struct f2fs_sb_info *sbi)
+static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi,
+ int type)
{
- struct block_device *bdev = sbi->sb->s_bdev;
- struct request_queue *q = bdev_get_queue(bdev);
- struct request_list *rl = &q->root_rl;
+ unsigned long interval = sbi->interval_time[type] * HZ;
+ unsigned int wait_ms = 0;
+ long delta;
- if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
- return 0;
+ delta = (sbi->last_time[type] + interval) - jiffies;
+ if (delta > 0)
+ wait_ms = jiffies_to_msecs(delta);
- return f2fs_time_over(sbi, REQ_TIME);
+ return wait_ms;
}
/*
@@ -1435,7 +1560,7 @@ static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping)
static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page)
{
- return F2FS_M_SB(page->mapping);
+ return F2FS_M_SB(page_file_mapping(page));
}
static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi)
@@ -1578,12 +1703,16 @@ static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
{
unsigned long flags;
- set_sbi_flag(sbi, SBI_NEED_FSCK);
+ /*
+ * In order to re-enable nat_bits we need to call fsck.f2fs by
+ * set_sbi_flag(sbi, SBI_NEED_FSCK). But it may give huge cost,
+ * so let's rely on regular fsck or unclean shutdown.
+ */
if (lock)
spin_lock_irqsave(&sbi->cp_lock, flags);
__clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
- kfree(NM_I(sbi)->nat_bits);
+ kvfree(NM_I(sbi)->nat_bits);
NM_I(sbi)->nat_bits = NULL;
if (lock)
spin_unlock_irqrestore(&sbi->cp_lock, flags);
@@ -1645,18 +1774,6 @@ static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi)
}
/*
- * Check whether the given nid is within node id range.
- */
-static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
-{
- if (unlikely(nid < F2FS_ROOT_INO(sbi)))
- return -EINVAL;
- if (unlikely(nid >= NM_I(sbi)->max_nid))
- return -EINVAL;
- return 0;
-}
-
-/*
* Check whether the inode has blocks or not
*/
static inline int F2FS_HAS_BLOCKS(struct inode *inode)
@@ -1702,13 +1819,12 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
if (ret)
return ret;
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_BLOCK)) {
f2fs_show_injection_info(FAULT_BLOCK);
release = *count;
goto enospc;
}
-#endif
+
/*
* let's increase this in prior to actual block count change in order
* for f2fs_sync_file to avoid data races when deciding checkpoint.
@@ -1722,7 +1838,12 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
if (!__allow_reserved_blocks(sbi, inode, true))
avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
-
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ if (avail_user_block_count > sbi->unusable_block_count)
+ avail_user_block_count -= sbi->unusable_block_count;
+ else
+ avail_user_block_count = 0;
+ }
if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
diff = sbi->total_valid_block_count - avail_user_block_count;
if (diff > *count)
@@ -1732,22 +1853,38 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
sbi->total_valid_block_count -= diff;
if (!*count) {
spin_unlock(&sbi->stat_lock);
- percpu_counter_sub(&sbi->alloc_valid_block_count, diff);
goto enospc;
}
}
spin_unlock(&sbi->stat_lock);
- if (unlikely(release))
+ if (unlikely(release)) {
+ percpu_counter_sub(&sbi->alloc_valid_block_count, release);
dquot_release_reservation_block(inode, release);
+ }
f2fs_i_blocks_write(inode, *count, true, true);
return 0;
enospc:
+ percpu_counter_sub(&sbi->alloc_valid_block_count, release);
dquot_release_reservation_block(inode, release);
return -ENOSPC;
}
+__printf(2, 3)
+void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...);
+
+#define f2fs_err(sbi, fmt, ...) \
+ f2fs_printk(sbi, KERN_ERR fmt, ##__VA_ARGS__)
+#define f2fs_warn(sbi, fmt, ...) \
+ f2fs_printk(sbi, KERN_WARNING fmt, ##__VA_ARGS__)
+#define f2fs_notice(sbi, fmt, ...) \
+ f2fs_printk(sbi, KERN_NOTICE fmt, ##__VA_ARGS__)
+#define f2fs_info(sbi, fmt, ...) \
+ f2fs_printk(sbi, KERN_INFO fmt, ##__VA_ARGS__)
+#define f2fs_debug(sbi, fmt, ...) \
+ f2fs_printk(sbi, KERN_DEBUG fmt, ##__VA_ARGS__)
+
static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
struct inode *inode,
block_t count)
@@ -1756,13 +1893,20 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
spin_lock(&sbi->stat_lock);
f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
- f2fs_bug_on(sbi, inode->i_blocks < sectors);
sbi->total_valid_block_count -= (block_t)count;
if (sbi->reserved_blocks &&
sbi->current_reserved_blocks < sbi->reserved_blocks)
sbi->current_reserved_blocks = min(sbi->reserved_blocks,
sbi->current_reserved_blocks + count);
spin_unlock(&sbi->stat_lock);
+ if (unlikely(inode->i_blocks < sectors)) {
+ f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu, sectors:%llu",
+ inode->i_ino,
+ (unsigned long long)inode->i_blocks,
+ (unsigned long long)sectors);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ return;
+ }
f2fs_i_blocks_write(inode, count, false, true);
}
@@ -1770,11 +1914,12 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
{
atomic_inc(&sbi->nr_pages[count_type]);
- if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES ||
- count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA)
- return;
-
- set_sbi_flag(sbi, SBI_IS_DIRTY);
+ if (count_type == F2FS_DIRTY_DENTS ||
+ count_type == F2FS_DIRTY_NODES ||
+ count_type == F2FS_DIRTY_META ||
+ count_type == F2FS_DIRTY_QDATA ||
+ count_type == F2FS_DIRTY_IMETA)
+ set_sbi_flag(sbi, SBI_IS_DIRTY);
}
static inline void inode_inc_dirty_pages(struct inode *inode)
@@ -1859,7 +2004,11 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
if (is_set_ckpt_flags(sbi, CP_LARGE_NAT_BITMAP_FLAG)) {
offset = (flag == SIT_BITMAP) ?
le32_to_cpu(ckpt->nat_ver_bitmap_bytesize) : 0;
- return &ckpt->sit_nat_version_bitmap + offset;
+ /*
+ * if large_nat_bitmap feature is enabled, leave checksum
+ * protection for all nat/sit bitmaps.
+ */
+ return &ckpt->sit_nat_version_bitmap + offset + sizeof(__le32);
}
if (__cp_payload(sbi) > 0) {
@@ -1906,21 +2055,25 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
struct inode *inode, bool is_inode)
{
block_t valid_block_count;
- unsigned int valid_node_count;
- bool quota = inode && !is_inode;
+ unsigned int valid_node_count, user_block_count;
+ int err;
- if (quota) {
- int ret = dquot_reserve_block(inode, 1);
- if (ret)
- return ret;
+ if (is_inode) {
+ if (inode) {
+ err = dquot_alloc_inode(inode);
+ if (err)
+ return err;
+ }
+ } else {
+ err = dquot_reserve_block(inode, 1);
+ if (err)
+ return err;
}
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_BLOCK)) {
f2fs_show_injection_info(FAULT_BLOCK);
goto enospc;
}
-#endif
spin_lock(&sbi->stat_lock);
@@ -1929,8 +2082,11 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
if (!__allow_reserved_blocks(sbi, inode, false))
valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+ user_block_count = sbi->user_block_count;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+ user_block_count -= sbi->unusable_block_count;
- if (unlikely(valid_block_count > sbi->user_block_count)) {
+ if (unlikely(valid_block_count > user_block_count)) {
spin_unlock(&sbi->stat_lock);
goto enospc;
}
@@ -1956,8 +2112,12 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
return 0;
enospc:
- if (quota)
+ if (is_inode) {
+ if (inode)
+ dquot_free_inode(inode);
+ } else {
dquot_release_reservation_block(inode, 1);
+ }
return -ENOSPC;
}
@@ -1968,7 +2128,6 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
f2fs_bug_on(sbi, !sbi->total_valid_block_count);
f2fs_bug_on(sbi, !sbi->total_valid_node_count);
- f2fs_bug_on(sbi, !is_inode && !inode->i_blocks);
sbi->total_valid_node_count--;
sbi->total_valid_block_count--;
@@ -1978,8 +2137,18 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
spin_unlock(&sbi->stat_lock);
- if (!is_inode)
+ if (is_inode) {
+ dquot_free_inode(inode);
+ } else {
+ if (unlikely(inode->i_blocks == 0)) {
+ f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu",
+ inode->i_ino,
+ (unsigned long long)inode->i_blocks);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ return;
+ }
f2fs_i_blocks_write(inode, 1, false, true);
+ }
}
static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
@@ -2005,17 +2174,23 @@ static inline s64 valid_inode_count(struct f2fs_sb_info *sbi)
static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
pgoff_t index, bool for_write)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- struct page *page = find_lock_page(mapping, index);
+ struct page *page;
- if (page)
- return page;
+ if (IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)) {
+ if (!for_write)
+ page = find_get_page_flags(mapping, index,
+ FGP_LOCK | FGP_ACCESSED);
+ else
+ page = find_lock_page(mapping, index);
+ if (page)
+ return page;
- if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) {
- f2fs_show_injection_info(FAULT_PAGE_ALLOC);
- return NULL;
+ if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) {
+ f2fs_show_injection_info(FAULT_PAGE_ALLOC);
+ return NULL;
+ }
}
-#endif
+
if (!for_write)
return grab_cache_page(mapping, index);
return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
@@ -2025,12 +2200,11 @@ static inline struct page *f2fs_pagecache_get_page(
struct address_space *mapping, pgoff_t index,
int fgp_flags, gfp_t gfp_mask)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) {
f2fs_show_injection_info(FAULT_PAGE_GET);
return NULL;
}
-#endif
+
return pagecache_get_page(mapping, index, fgp_flags, gfp_mask);
}
@@ -2095,15 +2269,37 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi,
bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages);
return bio;
}
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ALLOC_BIO)) {
f2fs_show_injection_info(FAULT_ALLOC_BIO);
return NULL;
}
-#endif
+
return bio_alloc(GFP_KERNEL, npages);
}
+static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
+{
+ if (sbi->gc_mode == GC_URGENT)
+ return true;
+
+ if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) ||
+ get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) ||
+ get_pages(sbi, F2FS_WB_CP_DATA) ||
+ get_pages(sbi, F2FS_DIO_READ) ||
+ get_pages(sbi, F2FS_DIO_WRITE))
+ return false;
+
+ if (type != DISCARD_TIME && SM_I(sbi) && SM_I(sbi)->dcc_info &&
+ atomic_read(&SM_I(sbi)->dcc_info->queued_discard))
+ return false;
+
+ if (SM_I(sbi) && SM_I(sbi)->fcc_info &&
+ atomic_read(&SM_I(sbi)->fcc_info->queued_flush))
+ return false;
+
+ return f2fs_time_over(sbi, type);
+}
+
static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
unsigned long index, void *item)
{
@@ -2214,9 +2410,27 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr)
*addr ^= mask;
}
-#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL))
-#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL)
-#define F2FS_FL_INHERITED (FS_PROJINHERIT_FL)
+/*
+ * On-disk inode flags (f2fs_inode::i_flags)
+ */
+#define F2FS_SYNC_FL 0x00000008 /* Synchronous updates */
+#define F2FS_IMMUTABLE_FL 0x00000010 /* Immutable file */
+#define F2FS_APPEND_FL 0x00000020 /* writes to file may only append */
+#define F2FS_NODUMP_FL 0x00000040 /* do not dump file */
+#define F2FS_NOATIME_FL 0x00000080 /* do not update atime */
+#define F2FS_INDEX_FL 0x00001000 /* hash-indexed directory */
+#define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
+#define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
+
+/* Flags that should be inherited by new inodes from their parent. */
+#define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \
+ F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL)
+
+/* Flags that are appropriate for regular files (all but dir-specific ones). */
+#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL))
+
+/* Flags that are appropriate for non-directories/regular files. */
+#define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL)
static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags)
{
@@ -2259,6 +2473,7 @@ enum {
FI_EXTRA_ATTR, /* indicate file has extra attribute */
FI_PROJ_INHERIT, /* indicate file inherits projectid */
FI_PIN_FILE, /* indicate file should not be gced */
+ FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */
};
static inline void __mark_inode_dirty_flag(struct inode *inode,
@@ -2271,6 +2486,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode,
case FI_NEW_INODE:
if (set)
return;
+ /* fall through */
case FI_DATA_EXIST:
case FI_INLINE_DOTS:
case FI_PIN_FILE:
@@ -2357,7 +2573,7 @@ static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth)
static inline void f2fs_i_gc_failures_write(struct inode *inode,
unsigned int count)
{
- F2FS_I(inode)->i_gc_failures = count;
+ F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = count;
f2fs_mark_inode_dirty_sync(inode, true);
}
@@ -2423,9 +2639,18 @@ static inline int f2fs_has_inline_xattr(struct inode *inode)
return is_inode_flag_set(inode, FI_INLINE_XATTR);
}
+#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a))
+
static inline unsigned int addrs_per_inode(struct inode *inode)
{
- return CUR_ADDRS_PER_INODE(inode) - get_inline_xattr_addrs(inode);
+ unsigned int addrs = CUR_ADDRS_PER_INODE(inode) -
+ get_inline_xattr_addrs(inode);
+ return ALIGN_DOWN(addrs, 1);
+}
+
+static inline unsigned int addrs_per_block(struct inode *inode)
+{
+ return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, 1);
}
static inline void *inline_xattr_addr(struct inode *inode, struct page *page)
@@ -2438,7 +2663,9 @@ static inline void *inline_xattr_addr(struct inode *inode, struct page *page)
static inline int inline_xattr_size(struct inode *inode)
{
- return get_inline_xattr_addrs(inode) * sizeof(__le32);
+ if (f2fs_has_inline_xattr(inode))
+ return get_inline_xattr_addrs(inode) * sizeof(__le32);
+ return 0;
}
static inline int f2fs_has_inline_data(struct inode *inode)
@@ -2573,33 +2800,47 @@ static inline bool is_dot_dotdot(const struct qstr *str)
static inline bool f2fs_may_extent_tree(struct inode *inode)
{
- if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) ||
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+ if (!test_opt(sbi, EXTENT_CACHE) ||
is_inode_flag_set(inode, FI_NO_EXTENT))
return false;
+ /*
+ * for recovered files during mount do not create extents
+ * if shrinker is not registered.
+ */
+ if (list_empty(&sbi->s_list))
+ return false;
+
return S_ISREG(inode->i_mode);
}
+static inline void *kvmalloc(size_t size, gfp_t flags)
+{
+ void *ret;
+
+ ret = kmalloc(size, flags | __GFP_NOWARN);
+ if (!ret)
+ ret = __vmalloc(size, flags, PAGE_KERNEL);
+ return ret;
+}
+
static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
+ void *ret;
+
if (time_to_inject(sbi, FAULT_KMALLOC)) {
f2fs_show_injection_info(FAULT_KMALLOC);
return NULL;
}
-#endif
- return kmalloc(size, flags);
-}
-static inline void *kvmalloc(size_t size, gfp_t flags)
-{
- void *ret;
+ ret = kmalloc(size, flags);
+ if (ret)
+ return ret;
- ret = kmalloc(size, flags | __GFP_NOWARN);
- if (!ret)
- ret = __vmalloc(size, flags, PAGE_KERNEL);
- return ret;
+ return kvmalloc(size, flags);
}
static inline void *kvzalloc(size_t size, gfp_t flags)
@@ -2621,12 +2862,11 @@ static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi,
static inline void *f2fs_kvmalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_KVMALLOC)) {
f2fs_show_injection_info(FAULT_KVMALLOC);
return NULL;
}
-#endif
+
return kvmalloc(size, flags);
}
@@ -2646,7 +2886,7 @@ static inline int get_inline_xattr_addrs(struct inode *inode)
return F2FS_I(inode)->i_inline_xattr_size;
}
-#define get_inode_mode(i) \
+#define f2fs_get_inode_mode(i) \
((is_inode_flag_set(i, FI_ACL_MODE)) ? \
(F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
@@ -2656,9 +2896,9 @@ static inline int get_inline_xattr_addrs(struct inode *inode)
#define F2FS_OLD_ATTRIBUTE_SIZE (offsetof(struct f2fs_inode, i_addr))
#define F2FS_FITS_IN_INODE(f2fs_inode, extra_isize, field) \
- ((offsetof(typeof(*f2fs_inode), field) + \
+ ((offsetof(typeof(*(f2fs_inode)), field) + \
sizeof((f2fs_inode)->field)) \
- <= (F2FS_OLD_ATTRIBUTE_SIZE + extra_isize)) \
+ <= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \
static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
{
@@ -2685,18 +2925,62 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
spin_unlock(&sbi->iostat_lock);
}
+#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1)
+
+#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META)
+
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type);
+static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
+{
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
+ f2fs_err(sbi, "invalid blkaddr: %u, type: %d, run fsck to fix.",
+ blkaddr, type);
+ f2fs_bug_on(sbi, 1);
+ }
+}
+
+static inline bool __is_valid_data_blkaddr(block_t blkaddr)
+{
+ if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
+ return false;
+ return true;
+}
+
+static inline void f2fs_set_page_private(struct page *page,
+ unsigned long data)
+{
+ if (PagePrivate(page))
+ return;
+
+ get_page(page);
+ SetPagePrivate(page);
+ set_page_private(page, data);
+}
+
+static inline void f2fs_clear_page_private(struct page *page)
+{
+ if (!PagePrivate(page))
+ return;
+
+ set_page_private(page, 0);
+ ClearPagePrivate(page);
+ f2fs_put_page(page, 0);
+}
+
/*
* file.c
*/
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
-void truncate_data_blocks(struct dnode_of_data *dn);
-int truncate_blocks(struct inode *inode, u64 from, bool lock);
+void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
+int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
int f2fs_truncate(struct inode *inode);
int f2fs_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
int f2fs_setattr(struct dentry *dentry, struct iattr *attr);
-int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end);
-void truncate_data_blocks_range(struct dnode_of_data *dn, int count);
+int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end);
+void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count);
int f2fs_precache_extents(struct inode *inode);
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
@@ -2710,38 +2994,37 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page);
void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page);
struct inode *f2fs_iget(struct super_block *sb, unsigned long ino);
struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino);
-int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink);
-void update_inode(struct inode *inode, struct page *node_page);
-void update_inode_page(struct inode *inode);
+int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink);
+void f2fs_update_inode(struct inode *inode, struct page *node_page);
+void f2fs_update_inode_page(struct inode *inode);
int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc);
void f2fs_evict_inode(struct inode *inode);
-void handle_failed_inode(struct inode *inode);
+void f2fs_handle_failed_inode(struct inode *inode);
/*
* namei.c
*/
-int update_extension_list(struct f2fs_sb_info *sbi, const char *name,
+int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
bool hot, bool set);
struct dentry *f2fs_get_parent(struct dentry *child);
/*
* dir.c
*/
-void set_de_type(struct f2fs_dir_entry *de, umode_t mode);
-unsigned char get_de_type(struct f2fs_dir_entry *de);
-struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
+unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de);
+struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname,
f2fs_hash_t namehash, int *max_slots,
struct f2fs_dentry_ptr *d);
int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
unsigned int start_pos, struct fscrypt_str *fstr);
-void do_make_empty_dir(struct inode *inode, struct inode *parent,
+void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent,
struct f2fs_dentry_ptr *d);
-struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
+struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir,
const struct qstr *new_name,
const struct qstr *orig_name, struct page *dpage);
-void update_parent_metadata(struct inode *dir, struct inode *inode,
+void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode,
unsigned int current_depth);
-int room_for_filename(const void *bitmap, int slots, int max_slots);
+int f2fs_room_for_filename(const void *bitmap, int slots, int max_slots);
void f2fs_drop_nlink(struct inode *dir, struct inode *inode);
struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
struct fscrypt_name *fname, struct page **res_page);
@@ -2758,9 +3041,9 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
const struct qstr *orig_name,
struct inode *inode, nid_t ino, umode_t mode);
-int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname,
+int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname,
struct inode *inode, nid_t ino, umode_t mode);
-int __f2fs_add_link(struct inode *dir, const struct qstr *name,
+int f2fs_do_add_link(struct inode *dir, const struct qstr *name,
struct inode *inode, nid_t ino, umode_t mode);
void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
struct inode *dir, struct inode *inode);
@@ -2769,7 +3052,7 @@ bool f2fs_empty_dir(struct inode *dir);
static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
{
- return __f2fs_add_link(d_inode(dentry->d_parent), &dentry->d_name,
+ return f2fs_do_add_link(d_inode(dentry->d_parent), &dentry->d_name,
inode, inode->i_ino, inode->i_mode);
}
@@ -2779,12 +3062,11 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
int f2fs_inode_dirtied(struct inode *inode, bool sync);
void f2fs_inode_synced(struct inode *inode);
int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
+int f2fs_quota_sync(struct super_block *sb, int type);
void f2fs_quota_off_umount(struct super_block *sb);
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
int f2fs_sync_fs(struct super_block *sb, int sync);
-extern __printf(3, 4)
-void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
-int sanity_check_ckpt(struct f2fs_sb_info *sbi);
+int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi);
/*
* hash.c
@@ -2798,138 +3080,161 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info,
struct dnode_of_data;
struct node_info;
-bool available_free_memory(struct f2fs_sb_info *sbi, int type);
-int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
-bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
-bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
-void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni);
-pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
-int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
-int truncate_inode_blocks(struct inode *inode, pgoff_t from);
-int truncate_xattr_node(struct inode *inode);
-int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino);
-int remove_inode_page(struct inode *inode);
-struct page *new_inode_page(struct inode *inode);
-struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs);
-void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid);
-struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid);
-struct page *get_node_page_ra(struct page *parent, int start);
-void move_node_page(struct page *node_page, int gc_type);
-int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
- struct writeback_control *wbc, bool atomic);
-int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc,
+int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid);
+bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type);
+bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page);
+void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi);
+void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page);
+void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi);
+int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
+bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
+bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
+int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
+ struct node_info *ni);
+pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
+int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
+int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from);
+int f2fs_truncate_xattr_node(struct inode *inode);
+int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
+ unsigned int seq_id);
+int f2fs_remove_inode_page(struct inode *inode);
+struct page *f2fs_new_inode_page(struct inode *inode);
+struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs);
+void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid);
+struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid);
+struct page *f2fs_get_node_page_ra(struct page *parent, int start);
+int f2fs_move_node_page(struct page *node_page, int gc_type);
+int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
+ struct writeback_control *wbc, bool atomic,
+ unsigned int *seq_id);
+int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
+ struct writeback_control *wbc,
bool do_balance, enum iostat_type io_type);
-void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
-bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
-void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
-void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
-int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink);
-void recover_inline_xattr(struct inode *inode, struct page *page);
-int recover_xattr_data(struct inode *inode, struct page *page);
-int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
-void restore_node_summary(struct f2fs_sb_info *sbi,
+int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
+bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
+void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
+void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
+int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink);
+void f2fs_recover_inline_xattr(struct inode *inode, struct page *page);
+int f2fs_recover_xattr_data(struct inode *inode, struct page *page);
+int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
+int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum);
-void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
-int build_node_manager(struct f2fs_sb_info *sbi);
-void destroy_node_manager(struct f2fs_sb_info *sbi);
-int __init create_node_manager_caches(void);
-void destroy_node_manager_caches(void);
+int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+int f2fs_build_node_manager(struct f2fs_sb_info *sbi);
+void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi);
+int __init f2fs_create_node_manager_caches(void);
+void f2fs_destroy_node_manager_caches(void);
/*
* segment.c
*/
-bool need_SSR(struct f2fs_sb_info *sbi);
-void register_inmem_page(struct inode *inode, struct page *page);
-void drop_inmem_pages_all(struct f2fs_sb_info *sbi);
-void drop_inmem_pages(struct inode *inode);
-void drop_inmem_page(struct inode *inode, struct page *page);
-int commit_inmem_pages(struct inode *inode);
+bool f2fs_need_SSR(struct f2fs_sb_info *sbi);
+void f2fs_register_inmem_page(struct inode *inode, struct page *page);
+void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure);
+void f2fs_drop_inmem_pages(struct inode *inode);
+void f2fs_drop_inmem_page(struct inode *inode, struct page *page);
+int f2fs_commit_inmem_pages(struct inode *inode);
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need);
void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi);
int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino);
-int create_flush_cmd_control(struct f2fs_sb_info *sbi);
+int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi);
int f2fs_flush_device_cache(struct f2fs_sb_info *sbi);
-void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
-void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
-bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
-void drop_discard_cmd(struct f2fs_sb_info *sbi);
-void stop_discard_thread(struct f2fs_sb_info *sbi);
-bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
-void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc);
-void release_discard_addrs(struct f2fs_sb_info *sbi);
-int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
-void allocate_new_segments(struct f2fs_sb_info *sbi);
+void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
+void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
+bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
+void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi);
+void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
+bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi);
+void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
+ struct cp_control *cpc);
+void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
+block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
+int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
+void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
+int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
+void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+ unsigned int start, unsigned int end);
+void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
-bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc);
-struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno);
-void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr);
-void write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
+bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
+ struct cp_control *cpc);
+struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno);
+void f2fs_update_meta_page(struct f2fs_sb_info *sbi, void *src,
+ block_t blk_addr);
+void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
enum iostat_type io_type);
-void write_node_page(unsigned int nid, struct f2fs_io_info *fio);
-void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio);
-int rewrite_data_page(struct f2fs_io_info *fio);
-void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio);
+void f2fs_outplace_write_data(struct dnode_of_data *dn,
+ struct f2fs_io_info *fio);
+int f2fs_inplace_write_data(struct f2fs_io_info *fio);
+void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t old_blkaddr, block_t new_blkaddr,
bool recover_curseg, bool recover_newaddr);
void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
block_t old_addr, block_t new_addr,
unsigned char version, bool recover_curseg,
bool recover_newaddr);
-void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type,
struct f2fs_io_info *fio, bool add_list);
void f2fs_wait_on_page_writeback(struct page *page,
- enum page_type type, bool ordered);
-void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr);
-void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
-void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
-int lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
+ enum page_type type, bool ordered, bool locked);
+void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr);
+void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
+ block_t len);
+void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
+void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
+int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
unsigned int val, int alloc);
-void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
-int build_segment_manager(struct f2fs_sb_info *sbi);
-void destroy_segment_manager(struct f2fs_sb_info *sbi);
-int __init create_segment_manager_caches(void);
-void destroy_segment_manager_caches(void);
-int rw_hint_to_seg_type(enum rw_hint hint);
-enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, enum page_type type,
- enum temp_type temp);
+void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+int f2fs_build_segment_manager(struct f2fs_sb_info *sbi);
+void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi);
+int __init f2fs_create_segment_manager_caches(void);
+void f2fs_destroy_segment_manager_caches(void);
+int f2fs_rw_hint_to_seg_type(enum rw_hint hint);
+enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
+ enum page_type type, enum temp_type temp);
/*
* checkpoint.c
*/
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
-struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
-struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
-struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
-bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type);
-int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
+struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
+struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
+struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index);
+struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type);
+int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
int type, bool sync);
-void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index);
-long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
+void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index);
+long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
long nr_to_write, enum iostat_type io_type);
-void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
-void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
-void release_ino_entry(struct f2fs_sb_info *sbi, bool all);
-bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode);
-void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
+void f2fs_add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
+void f2fs_remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
+void f2fs_release_ino_entry(struct f2fs_sb_info *sbi, bool all);
+bool f2fs_exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode);
+void f2fs_set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
unsigned int devidx, int type);
-bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
+bool f2fs_is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
unsigned int devidx, int type);
int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi);
-int acquire_orphan_inode(struct f2fs_sb_info *sbi);
-void release_orphan_inode(struct f2fs_sb_info *sbi);
-void add_orphan_inode(struct inode *inode);
-void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino);
-int recover_orphan_inodes(struct f2fs_sb_info *sbi);
-int get_valid_checkpoint(struct f2fs_sb_info *sbi);
-void update_dirty_page(struct inode *inode, struct page *page);
-void remove_dirty_inode(struct inode *inode);
-int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
-int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
-void init_ino_entry_info(struct f2fs_sb_info *sbi);
-int __init create_checkpoint_caches(void);
-void destroy_checkpoint_caches(void);
+int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi);
+void f2fs_release_orphan_inode(struct f2fs_sb_info *sbi);
+void f2fs_add_orphan_inode(struct inode *inode);
+void f2fs_remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino);
+int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi);
+int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi);
+void f2fs_update_dirty_page(struct inode *inode, struct page *page);
+void f2fs_remove_dirty_inode(struct inode *inode);
+int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
+void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi);
+int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi);
+int __init f2fs_create_checkpoint_caches(void);
+void f2fs_destroy_checkpoint_caches(void);
/*
* data.c
@@ -2938,38 +3243,37 @@ int f2fs_init_post_read_processing(void);
void f2fs_destroy_post_read_processing(void);
void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
- struct inode *inode, nid_t ino, pgoff_t idx,
- enum page_type type);
+ struct inode *inode, struct page *page,
+ nid_t ino, enum page_type type);
void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi);
int f2fs_submit_page_bio(struct f2fs_io_info *fio);
-int f2fs_submit_page_write(struct f2fs_io_info *fio);
+int f2fs_merge_page_bio(struct f2fs_io_info *fio);
+void f2fs_submit_page_write(struct f2fs_io_info *fio);
struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
block_t blk_addr, struct bio *bio);
int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr);
-void set_data_blkaddr(struct dnode_of_data *dn);
+void f2fs_set_data_blkaddr(struct dnode_of_data *dn);
void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr);
-int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
-int reserve_new_block(struct dnode_of_data *dn);
+int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
+int f2fs_reserve_new_block(struct dnode_of_data *dn);
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from);
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
-struct page *get_read_data_page(struct inode *inode, pgoff_t index,
+struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
int op_flags, bool for_write);
-struct page *find_data_page(struct inode *inode, pgoff_t index);
-struct page *get_lock_data_page(struct inode *inode, pgoff_t index,
+struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index);
+struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
bool for_write);
-struct page *get_new_data_page(struct inode *inode,
+struct page *f2fs_get_new_data_page(struct inode *inode,
struct page *ipage, pgoff_t index, bool new_i_size);
-int do_write_data_page(struct f2fs_io_info *fio);
+int f2fs_do_write_data_page(struct f2fs_io_info *fio);
+void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
int create, int flag);
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len);
-bool should_update_inplace(struct inode *inode, struct f2fs_io_info *fio);
-bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio);
-int __f2fs_write_data_pages(struct address_space *mapping,
- struct writeback_control *wbc,
- enum iostat_type io_type);
+bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio);
+bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio);
void f2fs_invalidate_page(struct page *page, unsigned int offset,
unsigned int length);
int f2fs_release_page(struct page *page, gfp_t wait);
@@ -2978,22 +3282,24 @@ int f2fs_migrate_page(struct address_space *mapping, struct page *newpage,
struct page *page, enum migrate_mode mode);
#endif
bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
+void f2fs_clear_radix_tree_dirty_tag(struct page *page);
/*
* gc.c
*/
-int start_gc_thread(struct f2fs_sb_info *sbi);
-void stop_gc_thread(struct f2fs_sb_info *sbi);
-block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
+int f2fs_start_gc_thread(struct f2fs_sb_info *sbi);
+void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
+block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
unsigned int segno);
-void build_gc_manager(struct f2fs_sb_info *sbi);
+void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
+int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count);
/*
* recovery.c
*/
-int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only);
-bool space_for_roll_forward(struct f2fs_sb_info *sbi);
+int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only);
+bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi);
/*
* debug.c
@@ -3015,6 +3321,9 @@ struct f2fs_stat_info {
int free_nids, avail_nids, alloc_nids;
int total_count, utilization;
int bg_gc, nr_wb_cp_data, nr_wb_data;
+ int nr_rd_data, nr_rd_node, nr_rd_meta;
+ int nr_dio_read, nr_dio_write;
+ unsigned int io_skip_bggc, other_skip_bggc;
int nr_flushing, nr_flushed, flush_list_empty;
int nr_discarding, nr_discarded;
int nr_discard_cmd;
@@ -3031,10 +3340,12 @@ struct f2fs_stat_info {
int bg_node_segs, bg_data_segs;
int tot_blks, data_blks, node_blks;
int bg_data_blks, bg_node_blks;
+ unsigned long long skipped_atomic_files[2];
int curseg[NR_CURSEG_TYPE];
int cursec[NR_CURSEG_TYPE];
int curzone[NR_CURSEG_TYPE];
+ unsigned int meta_count[META_MAX];
unsigned int segment_count[2];
unsigned int block_count[2];
unsigned int inplace_count;
@@ -3050,6 +3361,8 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
#define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++)
#define stat_inc_call_count(si) ((si)->call_count++)
#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
+#define stat_io_skip_bggc_count(sbi) ((sbi)->io_skip_bggc++)
+#define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++)
#define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++)
#define stat_dec_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]--)
#define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext))
@@ -3086,6 +3399,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
if (f2fs_has_inline_dentry(inode)) \
(atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \
} while (0)
+#define stat_inc_meta_count(sbi, blkaddr) \
+ do { \
+ if (blkaddr < SIT_I(sbi)->sit_base_addr) \
+ atomic_inc(&(sbi)->meta_count[META_CP]); \
+ else if (blkaddr < NM_I(sbi)->nat_blkaddr) \
+ atomic_inc(&(sbi)->meta_count[META_SIT]); \
+ else if (blkaddr < SM_I(sbi)->ssa_blkaddr) \
+ atomic_inc(&(sbi)->meta_count[META_NAT]); \
+ else if (blkaddr < SM_I(sbi)->main_blkaddr) \
+ atomic_inc(&(sbi)->meta_count[META_SSA]); \
+ } while (0)
#define stat_inc_seg_type(sbi, curseg) \
((sbi)->segment_count[(curseg)->alloc_type]++)
#define stat_inc_block_count(sbi, curseg) \
@@ -3148,13 +3472,15 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
int f2fs_build_stats(struct f2fs_sb_info *sbi);
void f2fs_destroy_stats(struct f2fs_sb_info *sbi);
-int __init f2fs_create_root_stats(void);
+void __init f2fs_create_root_stats(void);
void f2fs_destroy_root_stats(void);
#else
#define stat_inc_cp_count(si) do { } while (0)
#define stat_inc_bg_cp_count(si) do { } while (0)
#define stat_inc_call_count(si) do { } while (0)
#define stat_inc_bggc_count(si) do { } while (0)
+#define stat_io_skip_bggc_count(sbi) do { } while (0)
+#define stat_other_skip_bggc_count(sbi) do { } while (0)
#define stat_inc_dirty_inode(sbi, type) do { } while (0)
#define stat_dec_dirty_inode(sbi, type) do { } while (0)
#define stat_inc_total_hit(sb) do { } while (0)
@@ -3173,6 +3499,7 @@ void f2fs_destroy_root_stats(void);
#define stat_inc_volatile_write(inode) do { } while (0)
#define stat_dec_volatile_write(inode) do { } while (0)
#define stat_update_max_volatile_write(inode) do { } while (0)
+#define stat_inc_meta_count(sbi, blkaddr) do { } while (0)
#define stat_inc_seg_type(sbi, curseg) do { } while (0)
#define stat_inc_block_count(sbi, curseg) do { } while (0)
#define stat_inc_inplace_blocks(sbi) do { } while (0)
@@ -3183,7 +3510,7 @@ void f2fs_destroy_root_stats(void);
static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
-static inline int __init f2fs_create_root_stats(void) { return 0; }
+static inline void __init f2fs_create_root_stats(void) { }
static inline void f2fs_destroy_root_stats(void) { }
#endif
@@ -3197,29 +3524,31 @@ extern const struct inode_operations f2fs_dir_inode_operations;
extern const struct inode_operations f2fs_symlink_inode_operations;
extern const struct inode_operations f2fs_encrypted_symlink_inode_operations;
extern const struct inode_operations f2fs_special_inode_operations;
-extern struct kmem_cache *inode_entry_slab;
+extern struct kmem_cache *f2fs_inode_entry_slab;
/*
* inline.c
*/
bool f2fs_may_inline_data(struct inode *inode);
bool f2fs_may_inline_dentry(struct inode *inode);
-void read_inline_data(struct page *page, struct page *ipage);
-void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from);
+void f2fs_do_read_inline_data(struct page *page, struct page *ipage);
+void f2fs_truncate_inline_inode(struct inode *inode,
+ struct page *ipage, u64 from);
int f2fs_read_inline_data(struct inode *inode, struct page *page);
int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page);
int f2fs_convert_inline_inode(struct inode *inode);
int f2fs_write_inline_data(struct inode *inode, struct page *page);
-bool recover_inline_data(struct inode *inode, struct page *npage);
-struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
+bool f2fs_recover_inline_data(struct inode *inode, struct page *npage);
+struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
struct fscrypt_name *fname, struct page **res_page);
-int make_empty_inline_dir(struct inode *inode, struct inode *parent,
+int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
struct page *ipage);
int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
const struct qstr *orig_name,
struct inode *inode, nid_t ino, umode_t mode);
-void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
- struct inode *dir, struct inode *inode);
+void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry,
+ struct page *page, struct inode *dir,
+ struct inode *inode);
bool f2fs_empty_inline_dir(struct inode *dir);
int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
struct fscrypt_str *fstr);
@@ -3240,17 +3569,17 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
/*
* extent_cache.c
*/
-struct rb_entry *__lookup_rb_tree(struct rb_root *root,
+struct rb_entry *f2fs_lookup_rb_tree(struct rb_root *root,
struct rb_entry *cached_re, unsigned int ofs);
-struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
+struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
struct rb_root *root, struct rb_node **parent,
unsigned int ofs);
-struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root,
+struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root *root,
struct rb_entry *cached_re, unsigned int ofs,
struct rb_entry **prev_entry, struct rb_entry **next_entry,
struct rb_node ***insert_p, struct rb_node **insert_parent,
bool force);
-bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi,
+bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
struct rb_root *root);
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
@@ -3262,9 +3591,9 @@ bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
void f2fs_update_extent_cache(struct dnode_of_data *dn);
void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
pgoff_t fofs, block_t blkaddr, unsigned int len);
-void init_extent_cache_info(struct f2fs_sb_info *sbi);
-int __init create_extent_cache(void);
-void destroy_extent_cache(void);
+void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi);
+int __init f2fs_create_extent_cache(void);
+void f2fs_destroy_extent_cache(void);
/*
* sysfs.c
@@ -3291,7 +3620,7 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode)
{
#ifdef CONFIG_F2FS_FS_ENCRYPTION
file_set_encrypt(inode);
- inode->i_flags |= S_ENCRYPTED;
+ f2fs_set_inode_flags(inode);
#endif
}
@@ -3305,9 +3634,9 @@ static inline bool f2fs_post_read_required(struct inode *inode)
}
#define F2FS_FEATURE_FUNCS(name, flagname) \
-static inline int f2fs_sb_has_##name(struct super_block *sb) \
+static inline int f2fs_sb_has_##name(struct f2fs_sb_info *sbi) \
{ \
- return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_##flagname); \
+ return F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_##flagname); \
}
F2FS_FEATURE_FUNCS(encrypt, ENCRYPT);
@@ -3319,28 +3648,66 @@ F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR);
F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO);
F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME);
F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND);
+F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM);
#ifdef CONFIG_BLK_DEV_ZONED
-static inline int get_blkz_type(struct f2fs_sb_info *sbi,
- struct block_device *bdev, block_t blkaddr)
+static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
+ block_t blkaddr)
{
unsigned int zno = blkaddr >> sbi->log_blocks_per_blkz;
+
+ return test_bit(zno, FDEV(devi).blkz_seq);
+}
+#endif
+
+static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi)
+{
+ return f2fs_sb_has_blkzoned(sbi);
+}
+
+static inline bool f2fs_bdev_support_discard(struct block_device *bdev)
+{
+ return blk_queue_discard(bdev_get_queue(bdev)) ||
+#ifdef CONFIG_BLK_DEV_ZONED
+ bdev_is_zoned(bdev);
+#else
+ 0;
+#endif
+}
+
+static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi)
+{
int i;
+ if (!f2fs_is_multi_device(sbi))
+ return f2fs_bdev_support_discard(sbi->sb->s_bdev);
+
for (i = 0; i < sbi->s_ndevs; i++)
- if (FDEV(i).bdev == bdev)
- return FDEV(i).blkz_type[zno];
- return -EINVAL;
+ if (f2fs_bdev_support_discard(FDEV(i).bdev))
+ return true;
+ return false;
}
-#endif
-static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi)
+static inline bool f2fs_realtime_discard_enable(struct f2fs_sb_info *sbi)
{
- struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev);
+ return (test_opt(sbi, DISCARD) && f2fs_hw_support_discard(sbi)) ||
+ f2fs_hw_should_discard(sbi);
+}
+
+static inline bool f2fs_hw_is_readonly(struct f2fs_sb_info *sbi)
+{
+ int i;
- return blk_queue_discard(q) || f2fs_sb_has_blkzoned(sbi->sb);
+ if (!f2fs_is_multi_device(sbi))
+ return bdev_read_only(sbi->sb->s_bdev);
+
+ for (i = 0; i < sbi->s_ndevs; i++)
+ if (bdev_read_only(FDEV(i).bdev))
+ return true;
+ return false;
}
+
static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt)
{
clear_opt(sbi, ADAPTIVE);
@@ -3363,15 +3730,78 @@ static inline bool f2fs_may_encrypt(struct inode *inode)
return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode));
#else
- return 0;
+ return false;
#endif
}
-static inline bool f2fs_force_buffered_io(struct inode *inode, int rw)
+static inline int block_unaligned_IO(struct inode *inode,
+ struct kiocb *iocb, struct iov_iter *iter)
+{
+ unsigned int i_blkbits = READ_ONCE(inode->i_blkbits);
+ unsigned int blocksize_mask = (1 << i_blkbits) - 1;
+ loff_t offset = iocb->ki_pos;
+ unsigned long align = offset | iov_iter_alignment(iter);
+
+ return align & blocksize_mask;
+}
+
+static inline int allow_outplace_dio(struct inode *inode,
+ struct kiocb *iocb, struct iov_iter *iter)
{
- return (f2fs_post_read_required(inode) ||
- (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) ||
- F2FS_I_SB(inode)->s_ndevs);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ int rw = iov_iter_rw(iter);
+
+ return (test_opt(sbi, LFS) && (rw == WRITE) &&
+ !block_unaligned_IO(inode, iocb, iter));
}
+static inline bool f2fs_force_buffered_io(struct inode *inode,
+ struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ int rw = iov_iter_rw(iter);
+
+ if (f2fs_post_read_required(inode))
+ return true;
+ if (f2fs_is_multi_device(sbi))
+ return true;
+ /*
+ * for blkzoned device, fallback direct IO to buffered IO, so
+ * all IOs can be serialized by log-structured write.
+ */
+ if (f2fs_sb_has_blkzoned(sbi))
+ return true;
+ if (test_opt(sbi, LFS) && (rw == WRITE) &&
+ block_unaligned_IO(inode, iocb, iter))
+ return true;
+ if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED) &&
+ !(inode->i_flags & S_SWAPFILE))
+ return true;
+
+ return false;
+}
+
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
+ unsigned int type);
+#else
+#define f2fs_build_fault_attr(sbi, rate, type) do { } while (0)
+#endif
+
+static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
+{
+#ifdef CONFIG_QUOTA
+ if (f2fs_sb_has_quota_ino(sbi))
+ return true;
+ if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
+ F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
+ F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
+ return true;
#endif
+ return false;
+}
+
+#define EFSBADCRC EBADMSG /* Bad CRC detected */
+#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
+
+#endif /* _LINUX_F2FS_H */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index eadd80ca1f27..aa8a31be2eb2 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/file.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
@@ -44,6 +41,8 @@ static int f2fs_filemap_fault(struct vm_area_struct *vma,
err = filemap_fault(vma, vmf);
up_read(&F2FS_I(inode)->i_mmap_sem);
+ trace_f2fs_filemap_fault(inode, vmf->pgoff, (unsigned long)err);
+
return err;
}
@@ -53,7 +52,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
struct page *page = vmf->page;
struct inode *inode = file_inode(vma->vm_file);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct dnode_of_data dn;
+ struct dnode_of_data dn = { .node_changed = false };
int err;
if (unlikely(f2fs_cp_error(sbi))) {
@@ -65,19 +64,6 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
- /* block allocation */
- f2fs_lock_op(sbi);
- set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = f2fs_reserve_block(&dn, page->index);
- if (err) {
- f2fs_unlock_op(sbi);
- goto out;
- }
- f2fs_put_dnode(&dn);
- f2fs_unlock_op(sbi);
-
- f2fs_balance_fs(sbi, dn.node_changed);
-
file_update_time(vma->vm_file);
down_read(&F2FS_I(inode)->i_mmap_sem);
lock_page(page);
@@ -89,16 +75,34 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
goto out_sem;
}
+ /* block allocation */
+ __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = f2fs_get_block(&dn, page->index);
+ f2fs_put_dnode(&dn);
+ __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
+ if (err) {
+ unlock_page(page);
+ goto out_sem;
+ }
+
+ /* fill the page */
+ f2fs_wait_on_page_writeback(page, DATA, false, true);
+
+ /* wait for GCed page writeback via META_MAPPING */
+ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
+
/*
* check to see if the page is mapped already (no holes)
*/
if (PageMappedToDisk(page))
- goto mapped;
+ goto out_sem;
/* page is wholly or partially inside EOF */
if (((loff_t)(page->index + 1) << PAGE_SHIFT) >
i_size_read(inode)) {
- unsigned offset;
+ loff_t offset;
+
offset = i_size_read(inode) & ~PAGE_MASK;
zero_user_segment(page, offset, PAGE_SIZE);
}
@@ -107,21 +111,15 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
SetPageUptodate(page);
f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE);
+ f2fs_update_time(sbi, REQ_TIME);
trace_f2fs_vm_page_mkwrite(page, DATA);
-mapped:
- /* fill the page */
- f2fs_wait_on_page_writeback(page, DATA, false);
-
- /* wait for GCed page writeback via META_MAPPING */
- if (f2fs_post_read_required(inode))
- f2fs_wait_on_block_writeback(sbi, dn.data_blkaddr);
-
out_sem:
up_read(&F2FS_I(inode)->i_mmap_sem);
-out:
+
+ f2fs_balance_fs(sbi, dn.node_changed);
+
sb_end_pagefault(inode->i_sb);
- f2fs_update_time(sbi, REQ_TIME);
err:
return block_page_mkwrite_return(err);
}
@@ -160,17 +158,18 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
cp_reason = CP_SB_NEED_CP;
else if (file_wrong_pino(inode))
cp_reason = CP_WRONG_PINO;
- else if (!space_for_roll_forward(sbi))
+ else if (!f2fs_space_for_roll_forward(sbi))
cp_reason = CP_NO_SPC_ROLL;
- else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
+ else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
cp_reason = CP_NODE_NEED_CP;
else if (test_opt(sbi, FASTBOOT))
cp_reason = CP_FASTBOOT_MODE;
else if (F2FS_OPTION(sbi).active_logs == 2)
cp_reason = CP_SPEC_LOG_NUM;
else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT &&
- need_dentry_mark(sbi, inode->i_ino) &&
- exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO))
+ f2fs_need_dentry_mark(sbi, inode->i_ino) &&
+ f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino,
+ TRANS_DIR_INO))
cp_reason = CP_RECOVER_DIR;
return cp_reason;
@@ -181,7 +180,7 @@ static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino)
struct page *i = find_get_page(NODE_MAPPING(sbi), ino);
bool ret = false;
/* But we need to avoid that there are some inode updates */
- if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino))
+ if ((i && PageDirty(i)) || f2fs_need_inode_block_update(sbi, ino))
ret = true;
f2fs_put_page(i, 0);
return ret;
@@ -214,8 +213,10 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
.nr_to_write = LONG_MAX,
.for_reclaim = 0,
};
+ unsigned int seq_id = 0;
- if (unlikely(f2fs_readonly(inode->i_sb)))
+ if (unlikely(f2fs_readonly(inode->i_sb) ||
+ is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
return 0;
trace_f2fs_sync_file_enter(inode);
@@ -244,14 +245,14 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
* if there is no written data, don't waste time to write recovery info.
*/
if (!is_inode_flag_set(inode, FI_APPEND_WRITE) &&
- !exist_written_data(sbi, ino, APPEND_INO)) {
+ !f2fs_exist_written_data(sbi, ino, APPEND_INO)) {
/* it may call write_inode just prior to fsync */
if (need_inode_page_update(sbi, ino))
goto go_write;
if (is_inode_flag_set(inode, FI_UPDATE_WRITE) ||
- exist_written_data(sbi, ino, UPDATE_INO))
+ f2fs_exist_written_data(sbi, ino, UPDATE_INO))
goto flush_out;
goto out;
}
@@ -278,7 +279,9 @@ go_write:
goto out;
}
sync_nodes:
- ret = fsync_node_pages(sbi, inode, &wbc, atomic);
+ atomic_inc(&sbi->wb_sync_req[NODE]);
+ ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id);
+ atomic_dec(&sbi->wb_sync_req[NODE]);
if (ret)
goto out;
@@ -288,7 +291,7 @@ sync_nodes:
goto out;
}
- if (need_inode_block_update(sbi, ino)) {
+ if (f2fs_need_inode_block_update(sbi, ino)) {
f2fs_mark_inode_dirty_sync(inode, true);
f2fs_write_inode(inode, NULL);
goto sync_nodes;
@@ -303,21 +306,21 @@ sync_nodes:
* given fsync mark.
*/
if (!atomic) {
- ret = wait_on_node_pages_writeback(sbi, ino);
+ ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id);
if (ret)
goto out;
}
/* once recovery info is written, don't need to tack this */
- remove_ino_entry(sbi, ino, APPEND_INO);
+ f2fs_remove_ino_entry(sbi, ino, APPEND_INO);
clear_inode_flag(inode, FI_APPEND_WRITE);
flush_out:
if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER)
ret = f2fs_issue_flush(sbi, inode->i_ino);
if (!ret) {
- remove_ino_entry(sbi, ino, UPDATE_INO);
+ f2fs_remove_ino_entry(sbi, ino, UPDATE_INO);
clear_inode_flag(inode, FI_UPDATE_WRITE);
- remove_ino_entry(sbi, ino, FLUSH_INO);
+ f2fs_remove_ino_entry(sbi, ino, FLUSH_INO);
}
f2fs_update_time(sbi, REQ_TIME);
out:
@@ -336,28 +339,29 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
static pgoff_t __get_first_dirty_index(struct address_space *mapping,
pgoff_t pgofs, int whence)
{
- struct pagevec pvec;
+ struct page *page;
int nr_pages;
if (whence != SEEK_DATA)
return 0;
/* find first dirty page index */
- pagevec_init(&pvec, 0);
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs,
- PAGECACHE_TAG_DIRTY, 1);
- pgofs = nr_pages ? pvec.pages[0]->index : ULONG_MAX;
- pagevec_release(&pvec);
+ nr_pages = find_get_pages_tag(mapping, &pgofs, PAGECACHE_TAG_DIRTY,
+ 1, &page);
+ if (!nr_pages)
+ return ULONG_MAX;
+ pgofs = page->index;
+ put_page(page);
return pgofs;
}
-static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
- int whence)
+static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr,
+ pgoff_t dirty, pgoff_t pgofs, int whence)
{
switch (whence) {
case SEEK_DATA:
if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
- (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR))
+ __is_valid_data_blkaddr(blkaddr))
return true;
break;
case SEEK_HOLE:
@@ -397,13 +401,13 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE);
+ err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE);
if (err && err != -ENOENT) {
goto fail;
} else if (err == -ENOENT) {
/* direct node does not exists */
if (whence == SEEK_DATA) {
- pgofs = get_next_page_offset(&dn, pgofs);
+ pgofs = f2fs_get_next_page_offset(&dn, pgofs);
continue;
} else {
goto found;
@@ -417,10 +421,19 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
dn.ofs_in_node++, pgofs++,
data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
block_t blkaddr;
+
blkaddr = datablock_addr(dn.inode,
dn.node_page, dn.ofs_in_node);
- if (__found_offset(blkaddr, dirty, pgofs, whence)) {
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
+ blkaddr, DATA_GENERIC_ENHANCE)) {
+ f2fs_put_dnode(&dn);
+ goto fail;
+ }
+
+ if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty,
+ pgofs, whence)) {
f2fs_put_dnode(&dn);
goto found;
}
@@ -491,7 +504,7 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
return dquot_file_open(inode, filp);
}
-void truncate_data_blocks_range(struct dnode_of_data *dn, int count)
+void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct f2fs_node *raw_node;
@@ -507,12 +520,19 @@ void truncate_data_blocks_range(struct dnode_of_data *dn, int count)
for (; count > 0; count--, addr++, dn->ofs_in_node++) {
block_t blkaddr = le32_to_cpu(*addr);
+
if (blkaddr == NULL_ADDR)
continue;
dn->data_blkaddr = NULL_ADDR;
- set_data_blkaddr(dn);
- invalidate_blocks(sbi, blkaddr);
+ f2fs_set_data_blkaddr(dn);
+
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, blkaddr,
+ DATA_GENERIC_ENHANCE))
+ continue;
+
+ f2fs_invalidate_blocks(sbi, blkaddr);
if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN);
nr_free++;
@@ -524,7 +544,7 @@ void truncate_data_blocks_range(struct dnode_of_data *dn, int count)
* once we invalidate valid blkaddr in range [ofs, ofs + count],
* we will invalidate all blkaddr in the whole range.
*/
- fofs = start_bidx_of_node(ofs_of_node(dn->node_page),
+ fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page),
dn->inode) + ofs;
f2fs_update_extent_cache_range(dn, fofs, 0, len);
dec_valid_block_count(sbi, dn->inode, nr_free);
@@ -536,15 +556,15 @@ void truncate_data_blocks_range(struct dnode_of_data *dn, int count)
dn->ofs_in_node, nr_free);
}
-void truncate_data_blocks(struct dnode_of_data *dn)
+void f2fs_truncate_data_blocks(struct dnode_of_data *dn)
{
- truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
+ f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode));
}
static int truncate_partial_data_page(struct inode *inode, u64 from,
bool cache_only)
{
- unsigned offset = from & (PAGE_SIZE - 1);
+ loff_t offset = from & (PAGE_SIZE - 1);
pgoff_t index = from >> PAGE_SHIFT;
struct address_space *mapping = inode->i_mapping;
struct page *page;
@@ -560,11 +580,11 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
return 0;
}
- page = get_lock_data_page(inode, index, true);
+ page = f2fs_get_lock_data_page(inode, index, true);
if (IS_ERR(page))
return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page);
truncate_out:
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
zero_user(page, offset, PAGE_SIZE - offset);
/* An encrypted inode should have a key and truncate the last page. */
@@ -575,7 +595,7 @@ truncate_out:
return 0;
}
-int truncate_blocks(struct inode *inode, u64 from, bool lock)
+int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct dnode_of_data dn;
@@ -594,21 +614,21 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
if (lock)
f2fs_lock_op(sbi);
- ipage = get_node_page(sbi, inode->i_ino);
+ ipage = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(ipage)) {
err = PTR_ERR(ipage);
goto out;
}
if (f2fs_has_inline_data(inode)) {
- truncate_inline_inode(inode, ipage, from);
+ f2fs_truncate_inline_inode(inode, ipage, from);
f2fs_put_page(ipage, 1);
truncate_page = true;
goto out;
}
set_new_dnode(&dn, inode, ipage, NULL, 0);
- err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA);
+ err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA);
if (err) {
if (err == -ENOENT)
goto free_next;
@@ -621,13 +641,13 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
f2fs_bug_on(sbi, count < 0);
if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
- truncate_data_blocks_range(&dn, count);
+ f2fs_truncate_data_blocks_range(&dn, count);
free_from += count;
}
f2fs_put_dnode(&dn);
free_next:
- err = truncate_inode_blocks(inode, free_from);
+ err = f2fs_truncate_inode_blocks(inode, free_from);
out:
if (lock)
f2fs_unlock_op(sbi);
@@ -653,12 +673,11 @@ int f2fs_truncate(struct inode *inode)
trace_f2fs_truncate(inode);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) {
f2fs_show_injection_info(FAULT_TRUNCATE);
return -EIO;
}
-#endif
+
/* we should check inline_data size */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
@@ -666,7 +685,7 @@ int f2fs_truncate(struct inode *inode)
return err;
}
- err = truncate_blocks(inode, i_size_read(inode), true);
+ err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
if (err)
return err;
@@ -685,27 +704,26 @@ int f2fs_getattr(struct vfsmount *mnt,
unsigned int flags;
if (f2fs_has_extra_attr(inode) &&
- f2fs_sb_has_inode_crtime(inode->i_sb) &&
+ f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) &&
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
stat->result_mask |= STATX_BTIME;
stat->btime.tv_sec = fi->i_crtime.tv_sec;
stat->btime.tv_nsec = fi->i_crtime.tv_nsec;
}
- flags = fi->i_flags & (FS_FL_USER_VISIBLE | FS_PROJINHERIT_FL);
- if (flags & FS_APPEND_FL)
+ flags = fi->i_flags;
+ if (flags & F2FS_APPEND_FL)
stat->attributes |= STATX_ATTR_APPEND;
- if (flags & FS_COMPR_FL)
+ if (flags & F2FS_COMPR_FL)
stat->attributes |= STATX_ATTR_COMPRESSED;
- if (f2fs_encrypted_inode(inode))
+ if (IS_ENCRYPTED(inode))
stat->attributes |= STATX_ATTR_ENCRYPTED;
- if (flags & FS_IMMUTABLE_FL)
+ if (flags & F2FS_IMMUTABLE_FL)
stat->attributes |= STATX_ATTR_IMMUTABLE;
- if (flags & FS_NODUMP_FL)
+ if (flags & F2FS_NODUMP_FL)
stat->attributes |= STATX_ATTR_NODUMP;
stat->attributes_mask |= (STATX_ATTR_APPEND |
- STATX_ATTR_COMPRESSED |
STATX_ATTR_ENCRYPTED |
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
@@ -755,7 +773,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
int err;
- bool size_changed = false;
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
return -EIO;
@@ -777,28 +794,47 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
!uid_eq(attr->ia_uid, inode->i_uid)) ||
(attr->ia_valid & ATTR_GID &&
!gid_eq(attr->ia_gid, inode->i_gid))) {
+ f2fs_lock_op(F2FS_I_SB(inode));
err = dquot_transfer(inode, attr);
- if (err)
+ if (err) {
+ set_sbi_flag(F2FS_I_SB(inode),
+ SBI_QUOTA_NEED_REPAIR);
+ f2fs_unlock_op(F2FS_I_SB(inode));
return err;
+ }
+ /*
+ * update uid/gid under lock_op(), so that dquot and inode can
+ * be updated atomically.
+ */
+ if (attr->ia_valid & ATTR_UID)
+ inode->i_uid = attr->ia_uid;
+ if (attr->ia_valid & ATTR_GID)
+ inode->i_gid = attr->ia_gid;
+ f2fs_mark_inode_dirty_sync(inode, true);
+ f2fs_unlock_op(F2FS_I_SB(inode));
}
if (attr->ia_valid & ATTR_SIZE) {
- if (attr->ia_size <= i_size_read(inode)) {
- down_write(&F2FS_I(inode)->i_mmap_sem);
- truncate_setsize(inode, attr->ia_size);
+ bool to_smaller = (attr->ia_size <= i_size_read(inode));
+
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+
+ truncate_setsize(inode, attr->ia_size);
+
+ if (to_smaller)
err = f2fs_truncate(inode);
- up_write(&F2FS_I(inode)->i_mmap_sem);
- if (err)
- return err;
- } else {
- /*
- * do not trim all blocks after i_size if target size is
- * larger than i_size.
- */
- down_write(&F2FS_I(inode)->i_mmap_sem);
- truncate_setsize(inode, attr->ia_size);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ /*
+ * do not trim all blocks after i_size if target size is
+ * larger than i_size.
+ */
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ if (err)
+ return err;
+
+ if (!to_smaller) {
/* should convert inline inode here */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
@@ -811,14 +847,12 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
down_write(&F2FS_I(inode)->i_sem);
F2FS_I(inode)->last_disk_size = i_size_read(inode);
up_write(&F2FS_I(inode)->i_sem);
-
- size_changed = true;
}
__setattr_copy(inode, attr);
if (attr->ia_valid & ATTR_MODE) {
- err = posix_acl_chmod(inode, get_inode_mode(inode));
+ err = posix_acl_chmod(inode, f2fs_get_inode_mode(inode));
if (err || is_inode_flag_set(inode, FI_ACL_MODE)) {
inode->i_mode = F2FS_I(inode)->i_acl_mode;
clear_inode_flag(inode, FI_ACL_MODE);
@@ -826,7 +860,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
}
/* file size may changed here */
- f2fs_mark_inode_dirty_sync(inode, size_changed);
+ f2fs_mark_inode_dirty_sync(inode, true);
/* inode change will produce dirty node pages flushed by checkpoint */
f2fs_balance_fs(F2FS_I_SB(inode), true);
@@ -860,20 +894,20 @@ static int fill_zero(struct inode *inode, pgoff_t index,
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
- page = get_new_data_page(inode, NULL, index, false);
+ page = f2fs_get_new_data_page(inode, NULL, index, false);
f2fs_unlock_op(sbi);
if (IS_ERR(page))
return PTR_ERR(page);
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
zero_user(page, start, len);
set_page_dirty(page);
f2fs_put_page(page, 1);
return 0;
}
-int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
+int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
{
int err;
@@ -882,10 +916,11 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
pgoff_t end_offset, count;
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, pg_start, LOOKUP_NODE);
+ err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE);
if (err) {
if (err == -ENOENT) {
- pg_start = get_next_page_offset(&dn, pg_start);
+ pg_start = f2fs_get_next_page_offset(&dn,
+ pg_start);
continue;
}
return err;
@@ -896,7 +931,7 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset);
- truncate_data_blocks_range(&dn, count);
+ f2fs_truncate_data_blocks_range(&dn, count);
f2fs_put_dnode(&dn);
pg_start += count;
@@ -947,14 +982,19 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
blk_start = (loff_t)pg_start << PAGE_SHIFT;
blk_end = (loff_t)pg_end << PAGE_SHIFT;
+
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
+
truncate_inode_pages_range(mapping, blk_start,
blk_end - 1);
f2fs_lock_op(sbi);
- ret = truncate_hole(inode, pg_start, pg_end);
+ ret = f2fs_truncate_hole(inode, pg_start, pg_end);
f2fs_unlock_op(sbi);
+
up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
@@ -970,13 +1010,14 @@ static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr,
next_dnode:
set_new_dnode(&dn, inode, NULL, NULL, 0);
- ret = get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
+ ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
if (ret && ret != -ENOENT) {
return ret;
} else if (ret == -ENOENT) {
if (dn.max_level == 0)
return -ENOENT;
- done = min((pgoff_t)ADDRS_PER_BLOCK - dn.ofs_in_node, len);
+ done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - dn.ofs_in_node,
+ len);
blkaddr += done;
do_replace += done;
goto next;
@@ -987,7 +1028,15 @@ next_dnode:
for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) {
*blkaddr = datablock_addr(dn.inode,
dn.node_page, dn.ofs_in_node);
- if (!is_checkpointed_data(sbi, *blkaddr)) {
+
+ if (__is_valid_data_blkaddr(*blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, *blkaddr,
+ DATA_GENERIC_ENHANCE)) {
+ f2fs_put_dnode(&dn);
+ return -EFSCORRUPTED;
+ }
+
+ if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) {
if (test_opt(sbi, LFS)) {
f2fs_put_dnode(&dn);
@@ -1020,10 +1069,10 @@ static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr,
continue;
set_new_dnode(&dn, inode, NULL, NULL, 0);
- ret = get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA);
+ ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA);
if (ret) {
dec_valid_block_count(sbi, inode, 1);
- invalidate_blocks(sbi, *blkaddr);
+ f2fs_invalidate_blocks(sbi, *blkaddr);
} else {
f2fs_update_data_blkaddr(&dn, *blkaddr);
}
@@ -1053,18 +1102,23 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
pgoff_t ilen;
set_new_dnode(&dn, dst_inode, NULL, NULL, 0);
- ret = get_dnode_of_data(&dn, dst + i, ALLOC_NODE);
+ ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE);
if (ret)
return ret;
- get_node_info(sbi, dn.nid, &ni);
+ ret = f2fs_get_node_info(sbi, dn.nid, &ni);
+ if (ret) {
+ f2fs_put_dnode(&dn);
+ return ret;
+ }
+
ilen = min((pgoff_t)
ADDRS_PER_PAGE(dn.node_page, dst_inode) -
dn.ofs_in_node, len - i);
do {
dn.data_blkaddr = datablock_addr(dn.inode,
dn.node_page, dn.ofs_in_node);
- truncate_data_blocks_range(&dn, 1);
+ f2fs_truncate_data_blocks_range(&dn, 1);
if (do_replace[i]) {
f2fs_i_blocks_write(src_inode,
@@ -1087,10 +1141,11 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
} else {
struct page *psrc, *pdst;
- psrc = get_lock_data_page(src_inode, src + i, true);
+ psrc = f2fs_get_lock_data_page(src_inode,
+ src + i, true);
if (IS_ERR(psrc))
return PTR_ERR(psrc);
- pdst = get_new_data_page(dst_inode, NULL, dst + i,
+ pdst = f2fs_get_new_data_page(dst_inode, NULL, dst + i,
true);
if (IS_ERR(pdst)) {
f2fs_put_page(psrc, 1);
@@ -1101,7 +1156,8 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
f2fs_put_page(pdst, 1);
f2fs_put_page(psrc, 1);
- ret = truncate_hole(src_inode, src + i, src + i + 1);
+ ret = f2fs_truncate_hole(src_inode,
+ src + i, src + i + 1);
if (ret)
return ret;
i++;
@@ -1120,15 +1176,17 @@ static int __exchange_data_block(struct inode *src_inode,
int ret;
while (len) {
- olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len);
+ olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len);
src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode),
- sizeof(block_t) * olen, GFP_KERNEL);
+ array_size(olen, sizeof(block_t)),
+ GFP_KERNEL);
if (!src_blkaddr)
return -ENOMEM;
do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode),
- sizeof(int) * olen, GFP_KERNEL);
+ array_size(olen, sizeof(int)),
+ GFP_KERNEL);
if (!do_replace) {
kvfree(src_blkaddr);
return -ENOMEM;
@@ -1154,31 +1212,39 @@ static int __exchange_data_block(struct inode *src_inode,
return 0;
roll_back:
- __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, len);
+ __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen);
kvfree(src_blkaddr);
kvfree(do_replace);
return ret;
}
-static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
+static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+ pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+ pgoff_t start = offset >> PAGE_SHIFT;
+ pgoff_t end = (offset + len) >> PAGE_SHIFT;
int ret;
f2fs_balance_fs(sbi, true);
- f2fs_lock_op(sbi);
- f2fs_drop_extent_tree(inode);
+ /* avoid gc operation during block exchange */
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+ f2fs_lock_op(sbi);
+ f2fs_drop_extent_tree(inode);
+ truncate_pagecache(inode, offset);
ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
f2fs_unlock_op(sbi);
+
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
{
- pgoff_t pg_start, pg_end;
loff_t new_size;
int ret;
@@ -1193,37 +1259,27 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
if (ret)
return ret;
- pg_start = offset >> PAGE_SHIFT;
- pg_end = (offset + len) >> PAGE_SHIFT;
-
- /* avoid gc operation during block exchange */
- down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
-
- down_write(&F2FS_I(inode)->i_mmap_sem);
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
if (ret)
- goto out_unlock;
-
- truncate_pagecache(inode, offset);
+ return ret;
- ret = f2fs_do_collapse(inode, pg_start, pg_end);
+ ret = f2fs_do_collapse(inode, offset, len);
if (ret)
- goto out_unlock;
+ return ret;
/* write out all moved pages, if possible */
+ down_write(&F2FS_I(inode)->i_mmap_sem);
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
new_size = i_size_read(inode) - len;
truncate_pagecache(inode, new_size);
- ret = truncate_blocks(inode, new_size, true);
+ ret = f2fs_truncate_blocks(inode, new_size, true);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
if (!ret)
f2fs_i_size_write(inode, new_size);
-out_unlock:
- up_write(&F2FS_I(inode)->i_mmap_sem);
- up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
return ret;
}
@@ -1243,7 +1299,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
}
dn->ofs_in_node = ofs_in_node;
- ret = reserve_new_blocks(dn, count);
+ ret = f2fs_reserve_new_blocks(dn, count);
if (ret)
return ret;
@@ -1252,7 +1308,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
dn->data_blkaddr = datablock_addr(dn->inode,
dn->node_page, dn->ofs_in_node);
/*
- * reserve_new_blocks will not guarantee entire block
+ * f2fs_reserve_new_blocks will not guarantee entire block
* allocation.
*/
if (dn->data_blkaddr == NULL_ADDR) {
@@ -1260,9 +1316,9 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
break;
}
if (dn->data_blkaddr != NEW_ADDR) {
- invalidate_blocks(sbi, dn->data_blkaddr);
+ f2fs_invalidate_blocks(sbi, dn->data_blkaddr);
dn->data_blkaddr = NEW_ADDR;
- set_data_blkaddr(dn);
+ f2fs_set_data_blkaddr(dn);
}
}
@@ -1289,12 +1345,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret)
return ret;
- down_write(&F2FS_I(inode)->i_mmap_sem);
ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
if (ret)
- goto out_sem;
-
- truncate_pagecache_range(inode, offset, offset + len - 1);
+ return ret;
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1306,7 +1359,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = fill_zero(inode, pg_start, off_start,
off_end - off_start);
if (ret)
- goto out_sem;
+ return ret;
new_size = max_t(loff_t, new_size, offset + len);
} else {
@@ -1314,7 +1367,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = fill_zero(inode, pg_start++, off_start,
PAGE_SIZE - off_start);
if (ret)
- goto out_sem;
+ return ret;
new_size = max_t(loff_t, new_size,
(loff_t)pg_start << PAGE_SHIFT);
@@ -1325,12 +1378,21 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
unsigned int end_offset;
pgoff_t end;
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+
+ truncate_pagecache_range(inode,
+ (loff_t)index << PAGE_SHIFT,
+ ((loff_t)pg_end << PAGE_SHIFT) - 1);
+
f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
- ret = get_dnode_of_data(&dn, index, ALLOC_NODE);
+ ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
if (ret) {
f2fs_unlock_op(sbi);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
@@ -1339,7 +1401,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = f2fs_do_zero_range(&dn, index, end);
f2fs_put_dnode(&dn);
+
f2fs_unlock_op(sbi);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
f2fs_balance_fs(sbi, dn.node_changed);
@@ -1367,9 +1432,6 @@ out:
else
f2fs_i_size_write(inode, new_size);
}
-out_sem:
- up_write(&F2FS_I(inode)->i_mmap_sem);
-
return ret;
}
@@ -1398,25 +1460,26 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
f2fs_balance_fs(sbi, true);
- /* avoid gc operation during block exchange */
- down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
-
down_write(&F2FS_I(inode)->i_mmap_sem);
- ret = truncate_blocks(inode, i_size_read(inode), true);
+ ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
if (ret)
- goto out;
+ return ret;
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
if (ret)
- goto out;
-
- truncate_pagecache(inode, offset);
+ return ret;
pg_start = offset >> PAGE_SHIFT;
pg_end = (offset + len) >> PAGE_SHIFT;
delta = pg_end - pg_start;
- idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+ idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+
+ /* avoid gc operation during block exchange */
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+ truncate_pagecache(inode, offset);
while (!ret && idx > pg_start) {
nr = idx - pg_start;
@@ -1431,16 +1494,17 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
idx + delta, nr, false);
f2fs_unlock_op(sbi);
}
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
/* write out all moved pages, if possible */
+ down_write(&F2FS_I(inode)->i_mmap_sem);
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
if (!ret)
f2fs_i_size_write(inode, new_size);
-out:
- up_write(&F2FS_I(inode)->i_mmap_sem);
- up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
return ret;
}
@@ -1449,7 +1513,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
- .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE };
+ .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE,
+ .m_may_create = true };
pgoff_t pg_end;
loff_t new_size = i_size_read(inode);
loff_t off_end;
@@ -1473,7 +1538,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
if (off_end)
map.m_len++;
- err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
+ if (f2fs_is_pinned_file(inode))
+ map.m_seg_type = CURSEG_COLD_DATA;
+
+ err = f2fs_map_blocks(inode, &map, 1, (f2fs_is_pinned_file(inode) ?
+ F2FS_GET_BLOCK_PRE_DIO :
+ F2FS_GET_BLOCK_PRE_AIO));
if (err) {
pgoff_t last_off;
@@ -1483,7 +1553,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
last_off = map.m_lblk + map.m_len - 1;
/* update new size to the failed position */
- new_size = (last_off == pg_end) ? offset + len:
+ new_size = (last_off == pg_end) ? offset + len :
(loff_t)(last_off + 1) << PAGE_SHIFT;
} else {
new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end;
@@ -1563,13 +1633,13 @@ static int f2fs_release_file(struct inode *inode, struct file *filp)
/* some remained atomic pages should discarded */
if (f2fs_is_atomic_file(inode))
- drop_inmem_pages(inode);
+ f2fs_drop_inmem_pages(inode);
if (f2fs_is_volatile_file(inode)) {
- clear_inode_flag(inode, FI_VOLATILE_FILE);
- stat_dec_volatile_write(inode);
set_inode_flag(inode, FI_DROP_CACHE);
filemap_fdatawrite(inode->i_mapping);
clear_inode_flag(inode, FI_DROP_CACHE);
+ clear_inode_flag(inode, FI_VOLATILE_FILE);
+ stat_dec_volatile_write(inode);
}
return 0;
}
@@ -1586,63 +1656,156 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id)
*/
if (f2fs_is_atomic_file(inode) &&
F2FS_I(inode)->inmem_task == current)
- drop_inmem_pages(inode);
+ f2fs_drop_inmem_pages(inode);
+ return 0;
+}
+
+static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+
+ /* Is it quota file? Do not allow user to mess with it */
+ if (IS_NOQUOTA(inode))
+ return -EPERM;
+
+ fi->i_flags = iflags | (fi->i_flags & ~mask);
+
+ if (fi->i_flags & F2FS_PROJINHERIT_FL)
+ set_inode_flag(inode, FI_PROJ_INHERIT);
+ else
+ clear_inode_flag(inode, FI_PROJ_INHERIT);
+
+ inode->i_ctime = current_time(inode);
+ f2fs_set_inode_flags(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
return 0;
}
+/* FS_IOC_GETFLAGS and FS_IOC_SETFLAGS support */
+
+/*
+ * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry
+ * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to
+ * F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add
+ * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL.
+ */
+
+static const struct {
+ u32 iflag;
+ u32 fsflag;
+} f2fs_fsflags_map[] = {
+ { F2FS_SYNC_FL, FS_SYNC_FL },
+ { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL },
+ { F2FS_APPEND_FL, FS_APPEND_FL },
+ { F2FS_NODUMP_FL, FS_NODUMP_FL },
+ { F2FS_NOATIME_FL, FS_NOATIME_FL },
+ { F2FS_INDEX_FL, FS_INDEX_FL },
+ { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL },
+ { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL },
+};
+
+#define F2FS_GETTABLE_FS_FL ( \
+ FS_SYNC_FL | \
+ FS_IMMUTABLE_FL | \
+ FS_APPEND_FL | \
+ FS_NODUMP_FL | \
+ FS_NOATIME_FL | \
+ FS_INDEX_FL | \
+ FS_DIRSYNC_FL | \
+ FS_PROJINHERIT_FL | \
+ FS_ENCRYPT_FL | \
+ FS_INLINE_DATA_FL | \
+ FS_NOCOW_FL)
+
+#define F2FS_SETTABLE_FS_FL ( \
+ FS_SYNC_FL | \
+ FS_IMMUTABLE_FL | \
+ FS_APPEND_FL | \
+ FS_NODUMP_FL | \
+ FS_NOATIME_FL | \
+ FS_DIRSYNC_FL | \
+ FS_PROJINHERIT_FL)
+
+/* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */
+static inline u32 f2fs_iflags_to_fsflags(u32 iflags)
+{
+ u32 fsflags = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
+ if (iflags & f2fs_fsflags_map[i].iflag)
+ fsflags |= f2fs_fsflags_map[i].fsflag;
+
+ return fsflags;
+}
+
+/* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */
+static inline u32 f2fs_fsflags_to_iflags(u32 fsflags)
+{
+ u32 iflags = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
+ if (fsflags & f2fs_fsflags_map[i].fsflag)
+ iflags |= f2fs_fsflags_map[i].iflag;
+
+ return iflags;
+}
+
static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_inode_info *fi = F2FS_I(inode);
- unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE;
- return put_user(flags, (int __user *)arg);
+ u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags);
+
+ if (IS_ENCRYPTED(inode))
+ fsflags |= FS_ENCRYPT_FL;
+ if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
+ fsflags |= FS_INLINE_DATA_FL;
+ if (is_inode_flag_set(inode, FI_PIN_FILE))
+ fsflags |= FS_NOCOW_FL;
+
+ fsflags &= F2FS_GETTABLE_FS_FL;
+
+ return put_user(fsflags, (int __user *)arg);
}
static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_inode_info *fi = F2FS_I(inode);
- unsigned int flags;
- unsigned int oldflags;
+ u32 fsflags, old_fsflags;
+ u32 iflags;
int ret;
if (!inode_owner_or_capable(inode))
return -EACCES;
- if (get_user(flags, (int __user *)arg))
+ if (get_user(fsflags, (int __user *)arg))
return -EFAULT;
+ if (fsflags & ~F2FS_GETTABLE_FS_FL)
+ return -EOPNOTSUPP;
+ fsflags &= F2FS_SETTABLE_FS_FL;
+
+ iflags = f2fs_fsflags_to_iflags(fsflags);
+ if (f2fs_mask_flags(inode->i_mode, iflags) != iflags)
+ return -EOPNOTSUPP;
+
ret = mnt_want_write_file(filp);
if (ret)
return ret;
inode_lock(inode);
- /* Is it quota file? Do not allow user to mess with it */
- if (IS_NOQUOTA(inode)) {
- ret = -EPERM;
- goto unlock_out;
- }
-
- flags = f2fs_mask_flags(inode->i_mode, flags);
-
- oldflags = fi->i_flags;
-
- if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
- if (!capable(CAP_LINUX_IMMUTABLE)) {
- ret = -EPERM;
- goto unlock_out;
- }
- }
-
- flags = flags & FS_FL_USER_MODIFIABLE;
- flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
- fi->i_flags = flags;
+ old_fsflags = f2fs_iflags_to_fsflags(fi->i_flags);
+ ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags);
+ if (ret)
+ goto out;
- inode->i_ctime = current_time(inode);
- f2fs_set_inode_flags(inode);
- f2fs_mark_inode_dirty_sync(inode, false);
-unlock_out:
+ ret = f2fs_setflags_common(inode, iflags,
+ f2fs_fsflags_to_iflags(F2FS_SETTABLE_FS_FL));
+out:
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
@@ -1658,6 +1821,8 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
static int f2fs_ioc_start_atomic_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int ret;
if (!inode_owner_or_capable(inode))
@@ -1672,31 +1837,42 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
inode_lock(inode);
- if (f2fs_is_atomic_file(inode))
+ if (f2fs_is_atomic_file(inode)) {
+ if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST))
+ ret = -EINVAL;
goto out;
+ }
ret = f2fs_convert_inline_inode(inode);
if (ret)
goto out;
- set_inode_flag(inode, FI_ATOMIC_FILE);
- set_inode_flag(inode, FI_HOT_DATA);
- f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- if (!get_dirty_pages(inode))
- goto inc_stat;
-
- f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
- "Unexpected flush for atomic writes: ino=%lu, npages=%u",
- inode->i_ino, get_dirty_pages(inode));
+ /*
+ * Should wait end_io to count F2FS_WB_CP_DATA correctly by
+ * f2fs_is_atomic_file.
+ */
+ if (get_dirty_pages(inode))
+ f2fs_warn(F2FS_I_SB(inode), "Unexpected flush for atomic writes: ino=%lu, npages=%u",
+ inode->i_ino, get_dirty_pages(inode));
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
if (ret) {
- clear_inode_flag(inode, FI_ATOMIC_FILE);
- clear_inode_flag(inode, FI_HOT_DATA);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
-inc_stat:
+ spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+ if (list_empty(&fi->inmem_ilist))
+ list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
+ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+
+ /* add inode in inmem_list first and set atomic_file */
+ set_inode_flag(inode, FI_ATOMIC_FILE);
+ clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
F2FS_I(inode)->inmem_task = current;
stat_inc_atomic_write(inode);
stat_update_max_atomic_write(inode);
@@ -1718,29 +1894,31 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
if (ret)
return ret;
- inode_lock(inode);
+ f2fs_balance_fs(F2FS_I_SB(inode), true);
- down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+ inode_lock(inode);
- if (f2fs_is_volatile_file(inode))
+ if (f2fs_is_volatile_file(inode)) {
+ ret = -EINVAL;
goto err_out;
+ }
if (f2fs_is_atomic_file(inode)) {
- ret = commit_inmem_pages(inode);
+ ret = f2fs_commit_inmem_pages(inode);
if (ret)
goto err_out;
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
- if (!ret) {
- clear_inode_flag(inode, FI_ATOMIC_FILE);
- clear_inode_flag(inode, FI_HOT_DATA);
- stat_dec_atomic_write(inode);
- }
+ if (!ret)
+ f2fs_drop_inmem_pages(inode);
} else {
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
}
err_out:
- up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+ if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
+ clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
+ ret = -EINVAL;
+ }
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
@@ -1825,13 +2003,15 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
inode_lock(inode);
if (f2fs_is_atomic_file(inode))
- drop_inmem_pages(inode);
+ f2fs_drop_inmem_pages(inode);
if (f2fs_is_volatile_file(inode)) {
clear_inode_flag(inode, FI_VOLATILE_FILE);
stat_dec_volatile_write(inode);
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
}
+ clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
+
inode_unlock(inode);
mnt_drop_write_file(filp);
@@ -1845,7 +2025,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct super_block *sb = sbi->sb;
__u32 in;
- int ret;
+ int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -1853,9 +2033,11 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
if (get_user(in, (__u32 __user *)arg))
return -EFAULT;
- ret = mnt_want_write_file(filp);
- if (ret)
- return ret;
+ if (in != F2FS_GOING_DOWN_FULLSYNC) {
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+ }
switch (in) {
case F2FS_GOING_DOWN_FULLSYNC:
@@ -1866,6 +2048,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
}
if (sb) {
f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
thaw_bdev(sb->s_bdev, sb);
}
break;
@@ -1875,28 +2058,42 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
if (ret)
goto out;
f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_NOSYNC:
f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_METAFLUSH:
- sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
+ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
+ case F2FS_GOING_DOWN_NEED_FSCK:
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
+ set_sbi_flag(sbi, SBI_IS_DIRTY);
+ /* do checkpoint only */
+ ret = f2fs_sync_fs(sb, 1);
+ goto out;
default:
ret = -EINVAL;
goto out;
}
- stop_gc_thread(sbi);
- stop_discard_thread(sbi);
+ f2fs_stop_gc_thread(sbi);
+ f2fs_stop_discard_thread(sbi);
- drop_discard_cmd(sbi);
+ f2fs_drop_discard_cmd(sbi);
clear_opt(sbi, DISCARD);
f2fs_update_time(sbi, REQ_TIME);
out:
- mnt_drop_write_file(filp);
+ if (in != F2FS_GOING_DOWN_FULLSYNC)
+ mnt_drop_write_file(filp);
+
+ trace_f2fs_shutdown(sbi, in, ret);
+
return ret;
}
@@ -1911,7 +2108,7 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!blk_queue_discard(q))
+ if (!f2fs_hw_support_discard(F2FS_SB(sb)))
return -EOPNOTSUPP;
if (copy_from_user(&range, (struct fstrim_range __user *)arg,
@@ -1950,7 +2147,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
- if (!f2fs_sb_has_encrypt(inode->i_sb))
+ if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode)))
return -EOPNOTSUPP;
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
@@ -1960,7 +2157,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
{
- if (!f2fs_sb_has_encrypt(file_inode(filp)->i_sb))
+ if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
return -EOPNOTSUPP;
return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
}
@@ -1971,7 +2168,7 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int err;
- if (!f2fs_sb_has_encrypt(inode->i_sb))
+ if (!f2fs_sb_has_encrypt(sbi))
return -EOPNOTSUPP;
err = mnt_want_write_file(filp);
@@ -2055,15 +2252,15 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
if (f2fs_readonly(sbi->sb))
return -EROFS;
+ end = range.start + range.len;
+ if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) {
+ return -EINVAL;
+ }
+
ret = mnt_want_write_file(filp);
if (ret)
return ret;
- end = range.start + range.len;
- if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) {
- ret = -EINVAL;
- goto out;
- }
do_more:
if (!range.sync) {
if (!mutex_trylock(&sbi->gc_mutex)) {
@@ -2075,7 +2272,7 @@ do_more:
}
ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start));
- range.start += sbi->blocks_per_seg;
+ range.start += BLKS_PER_SEC(sbi);
if (range.start <= end)
goto do_more;
out:
@@ -2095,6 +2292,11 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
if (f2fs_readonly(sbi->sb))
return -EROFS;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled.");
+ return -EINVAL;
+ }
+
ret = mnt_want_write_file(filp);
if (ret)
return ret;
@@ -2111,8 +2313,9 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
{
struct inode *inode = file_inode(filp);
struct f2fs_map_blocks map = { .m_next_extent = NULL,
- .m_seg_type = NO_CHECK_TYPE };
- struct extent_info ei = {0,0,0};
+ .m_seg_type = NO_CHECK_TYPE ,
+ .m_may_create = false };
+ struct extent_info ei = {0, 0, 0};
pgoff_t pg_start, pg_end, next_pgofs;
unsigned int blk_per_seg = sbi->blocks_per_seg;
unsigned int total = 0, sec_num;
@@ -2121,7 +2324,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
int err;
/* if in-place-update policy is enabled, don't waste time here */
- if (should_update_inplace(inode, NULL))
+ if (f2fs_should_update_inplace(inode, NULL))
return -EINVAL;
pg_start = range->start >> PAGE_SHIFT;
@@ -2179,7 +2382,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
if (!fragmented)
goto out;
- sec_num = (total + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi);
+ sec_num = DIV_ROUND_UP(total, BLKS_PER_SEC(sbi));
/*
* make sure there are enough free section for LFS allocation, this can
@@ -2216,7 +2419,7 @@ do_map:
while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) {
struct page *page;
- page = get_lock_data_page(inode, idx, true);
+ page = f2fs_get_lock_data_page(inode, idx, true);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto clear_out;
@@ -2327,15 +2530,10 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
}
inode_lock(src);
- down_write(&F2FS_I(src)->dio_rwsem[WRITE]);
if (src != dst) {
ret = -EBUSY;
if (!inode_trylock(dst))
goto out;
- if (!down_write_trylock(&F2FS_I(dst)->dio_rwsem[WRITE])) {
- inode_unlock(dst);
- goto out;
- }
}
ret = -EINVAL;
@@ -2380,6 +2578,14 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
goto out_unlock;
f2fs_balance_fs(sbi, true);
+
+ down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
+ if (src != dst) {
+ ret = -EBUSY;
+ if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
+ goto out_src;
+ }
+
f2fs_lock_op(sbi);
ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS,
pos_out >> F2FS_BLKSIZE_BITS,
@@ -2392,13 +2598,15 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
f2fs_i_size_write(dst, dst_osize);
}
f2fs_unlock_op(sbi);
+
+ if (src != dst)
+ up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
+out_src:
+ up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
out_unlock:
- if (src != dst) {
- up_write(&F2FS_I(dst)->dio_rwsem[WRITE]);
+ if (src != dst)
inode_unlock(dst);
- }
out:
- up_write(&F2FS_I(src)->dio_rwsem[WRITE]);
inode_unlock(src);
return ret;
}
@@ -2461,16 +2669,17 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
if (f2fs_readonly(sbi->sb))
return -EROFS;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+ return -EINVAL;
+
if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
sizeof(range)))
return -EFAULT;
- if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num ||
- sbi->segs_per_sec != 1) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Can't flush %u in %d for segs_per_sec %u != 1\n",
- range.dev_num, sbi->s_ndevs,
- sbi->segs_per_sec);
+ if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num ||
+ __is_large_section(sbi)) {
+ f2fs_warn(sbi, "Can't flush %u in %d for segs_per_sec %u != 1",
+ range.dev_num, sbi->s_ndevs, sbi->segs_per_sec);
return -EINVAL;
}
@@ -2525,12 +2734,13 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
/* Use i_gc_failures for normal file as a risk signal. */
if (inc)
- f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1);
+ f2fs_i_gc_failures_write(inode,
+ fi->i_gc_failures[GC_FAILURE_PIN] + 1);
- if (fi->i_gc_failures > sbi->gc_pin_file_threshold) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: Enable GC = ino %lx after %x GC trials\n",
- __func__, inode->i_ino, fi->i_gc_failures);
+ if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) {
+ f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials",
+ __func__, inode->i_ino,
+ fi->i_gc_failures[GC_FAILURE_PIN]);
clear_inode_flag(inode, FI_PIN_FILE);
return -EAGAIN;
}
@@ -2543,9 +2753,6 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
__u32 pin;
int ret = 0;
- if (!inode_owner_or_capable(inode))
- return -EACCES;
-
if (get_user(pin, (__u32 __user *)arg))
return -EFAULT;
@@ -2561,14 +2768,14 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
inode_lock(inode);
- if (should_update_outplace(inode, NULL)) {
+ if (f2fs_should_update_outplace(inode, NULL)) {
ret = -EINVAL;
goto out;
}
if (!pin) {
clear_inode_flag(inode, FI_PIN_FILE);
- F2FS_I(inode)->i_gc_failures = 1;
+ f2fs_i_gc_failures_write(inode, 0);
goto done;
}
@@ -2581,7 +2788,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
goto out;
set_inode_flag(inode, FI_PIN_FILE);
- ret = F2FS_I(inode)->i_gc_failures;
+ ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
done:
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
out:
@@ -2596,7 +2803,7 @@ static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg)
__u32 pin = 0;
if (is_inode_flag_set(inode, FI_PIN_FILE))
- pin = F2FS_I(inode)->i_gc_failures;
+ pin = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
return put_user(pin, (u32 __user *)arg);
}
@@ -2615,14 +2822,15 @@ int f2fs_precache_extents(struct inode *inode)
map.m_next_pgofs = NULL;
map.m_next_extent = &m_next_extent;
map.m_seg_type = NO_CHECK_TYPE;
+ map.m_may_create = false;
end = F2FS_I_SB(inode)->max_file_blocks;
while (map.m_lblk < end) {
map.m_len = end - map.m_lblk;
- down_write(&fi->dio_rwsem[WRITE]);
+ down_write(&fi->i_gc_rwsem[WRITE]);
err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_PRECACHE);
- up_write(&fi->dio_rwsem[WRITE]);
+ up_write(&fi->i_gc_rwsem[WRITE]);
if (err)
return err;
@@ -2637,6 +2845,27 @@ static int f2fs_ioc_precache_extents(struct file *filp, unsigned long arg)
return f2fs_precache_extents(file_inode(filp));
}
+static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
+ __u64 block_count;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (f2fs_readonly(sbi->sb))
+ return -EROFS;
+
+ if (copy_from_user(&block_count, (void __user *)arg,
+ sizeof(block_count)))
+ return -EFAULT;
+
+ ret = f2fs_resize_fs(sbi, block_count);
+
+ return ret;
+}
+
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
@@ -2689,6 +2918,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_set_pin_file(filp, arg);
case F2FS_IOC_PRECACHE_EXTENTS:
return f2fs_ioc_precache_extents(filp, arg);
+ case F2FS_IOC_RESIZE_FS:
+ return f2fs_ioc_resize_fs(filp, arg);
default:
return -ENOTTY;
}
@@ -2698,18 +2929,23 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- struct blk_plug plug;
ssize_t ret;
- if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
- return -EIO;
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
+ ret = -EIO;
+ goto out;
+ }
- if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
- return -EINVAL;
+ if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) {
+ ret = -EINVAL;
+ goto out;
+ }
if (!inode_trylock(inode)) {
- if (iocb->ki_flags & IOCB_NOWAIT)
- return -EAGAIN;
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
+ }
inode_lock(inode);
}
@@ -2722,16 +2958,16 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (iov_iter_fault_in_readable(from, iov_iter_count(from)))
set_inode_flag(inode, FI_NO_PREALLOC);
- if ((iocb->ki_flags & IOCB_NOWAIT) &&
- (iocb->ki_flags & IOCB_DIRECT)) {
- if (!f2fs_overwrite_io(inode, iocb->ki_pos,
+ if ((iocb->ki_flags & IOCB_NOWAIT)) {
+ if (!f2fs_overwrite_io(inode, iocb->ki_pos,
iov_iter_count(from)) ||
- f2fs_has_inline_data(inode) ||
- f2fs_force_buffered_io(inode, WRITE)) {
- inode_unlock(inode);
- return -EAGAIN;
- }
-
+ f2fs_has_inline_data(inode) ||
+ f2fs_force_buffered_io(inode, iocb, from)) {
+ clear_inode_flag(inode, FI_NO_PREALLOC);
+ inode_unlock(inode);
+ ret = -EAGAIN;
+ goto out;
+ }
} else {
preallocated = true;
target_size = iocb->ki_pos + iov_iter_count(from);
@@ -2740,12 +2976,11 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (err) {
clear_inode_flag(inode, FI_NO_PREALLOC);
inode_unlock(inode);
- return err;
+ ret = err;
+ goto out;
}
}
- blk_start_plug(&plug);
ret = __generic_file_write_iter(iocb, from);
- blk_finish_plug(&plug);
clear_inode_flag(inode, FI_NO_PREALLOC);
/* if we couldn't write data, we should deallocate blocks. */
@@ -2756,7 +2991,9 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
}
inode_unlock(inode);
-
+out:
+ trace_f2fs_file_write_iter(inode, iocb->ki_pos,
+ iov_iter_count(from), ret);
if (ret > 0) {
ssize_t err;
@@ -2799,6 +3036,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case F2FS_IOC_GET_PIN_FILE:
case F2FS_IOC_SET_PIN_FILE:
case F2FS_IOC_PRECACHE_EXTENTS:
+ case F2FS_IOC_RESIZE_FS:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index d9b799d38fc6..bac60511c9a2 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/gc.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/module.h>
@@ -43,25 +40,28 @@ static int gc_thread_func(void *data)
if (gc_th->gc_wake)
gc_th->gc_wake = 0;
- if (try_to_freeze())
+ if (try_to_freeze()) {
+ stat_other_skip_bggc_count(sbi);
continue;
+ }
if (kthread_should_stop())
break;
if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) {
increase_sleep_time(gc_th, &wait_ms);
+ stat_other_skip_bggc_count(sbi);
continue;
}
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(FAULT_CHECKPOINT);
f2fs_stop_checkpoint(sbi, false);
}
-#endif
- if (!sb_start_write_trylock(sbi->sb))
+ if (!sb_start_write_trylock(sbi->sb)) {
+ stat_other_skip_bggc_count(sbi);
continue;
+ }
/*
* [GC triggering condition]
@@ -76,18 +76,21 @@ static int gc_thread_func(void *data)
* invalidated soon after by user update or deletion.
* So, I'd like to wait some time to collect dirty segments.
*/
- if (gc_th->gc_urgent) {
+ if (sbi->gc_mode == GC_URGENT) {
wait_ms = gc_th->urgent_sleep_time;
mutex_lock(&sbi->gc_mutex);
goto do_gc;
}
- if (!mutex_trylock(&sbi->gc_mutex))
+ if (!mutex_trylock(&sbi->gc_mutex)) {
+ stat_other_skip_bggc_count(sbi);
goto next;
+ }
- if (!is_idle(sbi)) {
+ if (!is_idle(sbi, GC_TIME)) {
increase_sleep_time(gc_th, &wait_ms);
mutex_unlock(&sbi->gc_mutex);
+ stat_io_skip_bggc_count(sbi);
goto next;
}
@@ -114,7 +117,7 @@ next:
return 0;
}
-int start_gc_thread(struct f2fs_sb_info *sbi)
+int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
{
struct f2fs_gc_kthread *gc_th;
dev_t dev = sbi->sb->s_bdev->bd_dev;
@@ -131,8 +134,6 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;
- gc_th->gc_idle = 0;
- gc_th->gc_urgent = 0;
gc_th->gc_wake= 0;
sbi->gc_thread = gc_th;
@@ -141,38 +142,36 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(gc_th->f2fs_gc_task)) {
err = PTR_ERR(gc_th->f2fs_gc_task);
- kfree(gc_th);
+ kvfree(gc_th);
sbi->gc_thread = NULL;
}
out:
return err;
}
-void stop_gc_thread(struct f2fs_sb_info *sbi)
+void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
{
struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
if (!gc_th)
return;
kthread_stop(gc_th->f2fs_gc_task);
- kfree(gc_th);
+ kvfree(gc_th);
sbi->gc_thread = NULL;
}
-static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
+static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
{
int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
- if (!gc_th)
- return gc_mode;
-
- if (gc_th->gc_idle) {
- if (gc_th->gc_idle == 1)
- gc_mode = GC_CB;
- else if (gc_th->gc_idle == 2)
- gc_mode = GC_GREEDY;
- }
- if (gc_th->gc_urgent)
+ switch (sbi->gc_mode) {
+ case GC_IDLE_CB:
+ gc_mode = GC_CB;
+ break;
+ case GC_IDLE_GREEDY:
+ case GC_URGENT:
gc_mode = GC_GREEDY;
+ break;
+ }
return gc_mode;
}
@@ -187,7 +186,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
p->max_search = dirty_i->nr_dirty[type];
p->ofs_unit = 1;
} else {
- p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
+ p->gc_mode = select_gc_type(sbi, gc_type);
p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
p->max_search = dirty_i->nr_dirty[DIRTY];
p->ofs_unit = sbi->segs_per_sec;
@@ -195,7 +194,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
/* we need to check every dirty segments in the FG_GC case */
if (gc_type != FG_GC &&
- (sbi->gc_thread && !sbi->gc_thread->gc_urgent) &&
+ (sbi->gc_mode != GC_URGENT) &&
p->max_search > sbi->max_victim_search)
p->max_search = sbi->max_victim_search;
@@ -234,10 +233,6 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) {
if (sec_usage_check(sbi, secno))
continue;
-
- if (no_fggc_candidate(sbi, secno))
- continue;
-
clear_bit(secno, dirty_i->victim_secmap);
return GET_SEG_FROM_SEC(sbi, secno);
}
@@ -316,10 +311,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
struct sit_info *sm = SIT_I(sbi);
struct victim_sel_policy p;
unsigned int secno, last_victim;
- unsigned int last_segment = MAIN_SEGS(sbi);
+ unsigned int last_segment;
unsigned int nsearched = 0;
mutex_lock(&dirty_i->seglist_lock);
+ last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec;
p.alloc_mode = alloc_mode;
select_policy(sbi, gc_type, type, &p);
@@ -328,8 +324,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
p.min_cost = get_max_cost(sbi, &p);
if (*result != NULL_SEGNO) {
- if (IS_DATASEG(get_seg_entry(sbi, *result)->type) &&
- get_valid_blocks(sbi, *result, false) &&
+ if (get_valid_blocks(sbi, *result, false) &&
!sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
p.min_segno = *result;
goto out;
@@ -338,6 +333,22 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
if (p.max_search == 0)
goto out;
+ if (__is_large_section(sbi) && p.alloc_mode == LFS) {
+ if (sbi->next_victim_seg[BG_GC] != NULL_SEGNO) {
+ p.min_segno = sbi->next_victim_seg[BG_GC];
+ *result = p.min_segno;
+ sbi->next_victim_seg[BG_GC] = NULL_SEGNO;
+ goto got_result;
+ }
+ if (gc_type == FG_GC &&
+ sbi->next_victim_seg[FG_GC] != NULL_SEGNO) {
+ p.min_segno = sbi->next_victim_seg[FG_GC];
+ *result = p.min_segno;
+ sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
+ goto got_result;
+ }
+ }
+
last_victim = sm->last_victim[p.gc_mode];
if (p.alloc_mode == LFS && gc_type == FG_GC) {
p.min_segno = check_bg_victims(sbi);
@@ -375,10 +386,12 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
if (sec_usage_check(sbi, secno))
goto next;
- if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
+ /* Don't touch checkpointed data */
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+ get_ckpt_valid_blocks(sbi, segno) &&
+ p.alloc_mode != SSR))
goto next;
- if (gc_type == FG_GC && p.alloc_mode == LFS &&
- no_fggc_candidate(sbi, secno))
+ if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
goto next;
cost = get_gc_cost(sbi, segno, &p);
@@ -393,12 +406,15 @@ next:
sm->last_victim[p.gc_mode] = last_victim + 1;
else
sm->last_victim[p.gc_mode] = segno + 1;
- sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi);
+ sm->last_victim[p.gc_mode] %=
+ (MAIN_SECS(sbi) * sbi->segs_per_sec);
break;
}
}
if (p.min_segno != NULL_SEGNO) {
got_it:
+ *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
+got_result:
if (p.alloc_mode == LFS) {
secno = GET_SEC_FROM_SEG(sbi, p.min_segno);
if (gc_type == FG_GC)
@@ -406,13 +422,13 @@ got_it:
else
set_bit(secno, dirty_i->victim_secmap);
}
- *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
+ }
+out:
+ if (p.min_segno != NULL_SEGNO)
trace_f2fs_get_victim(sbi->sb, type, gc_type, &p,
sbi->cur_victim_sec,
prefree_segments(sbi), free_segments(sbi));
- }
-out:
mutex_unlock(&dirty_i->seglist_lock);
return (p.min_segno == NULL_SEGNO) ? 0 : 1;
@@ -440,7 +456,7 @@ static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode)
iput(inode);
return;
}
- new_ie = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
+ new_ie = f2fs_kmem_cache_alloc(f2fs_inode_entry_slab, GFP_NOFS);
new_ie->inode = inode;
f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie);
@@ -454,7 +470,7 @@ static void put_gc_inode(struct gc_inode_list *gc_list)
radix_tree_delete(&gc_list->iroot, ie->inode->i_ino);
iput(ie->inode);
list_del(&ie->list);
- kmem_cache_free(inode_entry_slab, ie);
+ kmem_cache_free(f2fs_inode_entry_slab, ie);
}
}
@@ -477,65 +493,81 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
* On validity, copy that node with cold status, otherwise (invalid node)
* ignore that.
*/
-static void gc_node_segment(struct f2fs_sb_info *sbi,
+static int gc_node_segment(struct f2fs_sb_info *sbi,
struct f2fs_summary *sum, unsigned int segno, int gc_type)
{
struct f2fs_summary *entry;
block_t start_addr;
int off;
int phase = 0;
+ bool fggc = (gc_type == FG_GC);
+ int submitted = 0;
start_addr = START_BLOCK(sbi, segno);
next_step:
entry = sum;
+ if (fggc && phase == 2)
+ atomic_inc(&sbi->wb_sync_req[NODE]);
+
for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
nid_t nid = le32_to_cpu(entry->nid);
struct page *node_page;
struct node_info ni;
+ int err;
/* stop BG_GC if there is not enough free sections. */
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0))
- return;
+ return submitted;
if (check_valid_map(sbi, segno, off) == 0)
continue;
if (phase == 0) {
- ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1,
+ f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1,
META_NAT, true);
continue;
}
if (phase == 1) {
- ra_node_page(sbi, nid);
+ f2fs_ra_node_page(sbi, nid);
continue;
}
/* phase == 2 */
- node_page = get_node_page(sbi, nid);
+ node_page = f2fs_get_node_page(sbi, nid);
if (IS_ERR(node_page))
continue;
- /* block may become invalid during get_node_page */
+ /* block may become invalid during f2fs_get_node_page */
if (check_valid_map(sbi, segno, off) == 0) {
f2fs_put_page(node_page, 1);
continue;
}
- get_node_info(sbi, nid, &ni);
+ if (f2fs_get_node_info(sbi, nid, &ni)) {
+ f2fs_put_page(node_page, 1);
+ continue;
+ }
+
if (ni.blk_addr != start_addr + off) {
f2fs_put_page(node_page, 1);
continue;
}
- move_node_page(node_page, gc_type);
+ err = f2fs_move_node_page(node_page, gc_type);
+ if (!err && gc_type == FG_GC)
+ submitted++;
stat_inc_node_blk_count(sbi, 1, gc_type);
}
if (++phase < 3)
goto next_step;
+
+ if (fggc)
+ atomic_dec(&sbi->wb_sync_req[NODE]);
+ return submitted;
}
/*
@@ -545,7 +577,7 @@ next_step:
* as indirect or double indirect node blocks, are given, it must be a caller's
* bug.
*/
-block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode)
+block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode)
{
unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4;
unsigned int bidx;
@@ -562,7 +594,7 @@ block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode)
int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
bidx = node_ofs - 5 - dec;
}
- return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode);
+ return bidx * ADDRS_PER_BLOCK(inode) + ADDRS_PER_INODE(inode);
}
static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
@@ -576,16 +608,18 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
nid = le32_to_cpu(sum->nid);
ofs_in_node = le16_to_cpu(sum->ofs_in_node);
- node_page = get_node_page(sbi, nid);
+ node_page = f2fs_get_node_page(sbi, nid);
if (IS_ERR(node_page))
return false;
- get_node_info(sbi, nid, dni);
+ if (f2fs_get_node_info(sbi, nid, dni)) {
+ f2fs_put_page(node_page, 1);
+ return false;
+ }
if (sum->version != dni->version) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: valid data with mismatched node version.",
- __func__);
+ f2fs_warn(sbi, "%s: valid data with mismatched node version.",
+ __func__);
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
@@ -598,12 +632,95 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
return true;
}
+static int ra_data_block(struct inode *inode, pgoff_t index)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct address_space *mapping = inode->i_mapping;
+ struct dnode_of_data dn;
+ struct page *page;
+ struct extent_info ei = {0, 0, 0};
+ struct f2fs_io_info fio = {
+ .sbi = sbi,
+ .ino = inode->i_ino,
+ .type = DATA,
+ .temp = COLD,
+ .op = REQ_OP_READ,
+ .op_flags = 0,
+ .encrypted_page = NULL,
+ .in_list = false,
+ .retry = false,
+ };
+ int err;
+
+ page = f2fs_grab_cache_page(mapping, index, true);
+ if (!page)
+ return -ENOMEM;
+
+ if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+ dn.data_blkaddr = ei.blk + index - ei.fofs;
+ if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
+ DATA_GENERIC_ENHANCE_READ))) {
+ err = -EFSCORRUPTED;
+ goto put_page;
+ }
+ goto got_it;
+ }
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
+ if (err)
+ goto put_page;
+ f2fs_put_dnode(&dn);
+
+ if (!__is_valid_data_blkaddr(dn.data_blkaddr)) {
+ err = -ENOENT;
+ goto put_page;
+ }
+ if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
+ DATA_GENERIC_ENHANCE))) {
+ err = -EFSCORRUPTED;
+ goto put_page;
+ }
+got_it:
+ /* read page */
+ fio.page = page;
+ fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
+
+ /*
+ * don't cache encrypted data into meta inode until previous dirty
+ * data were writebacked to avoid racing between GC and flush.
+ */
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
+
+ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
+
+ fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi),
+ dn.data_blkaddr,
+ FGP_LOCK | FGP_CREAT, GFP_NOFS);
+ if (!fio.encrypted_page) {
+ err = -ENOMEM;
+ goto put_page;
+ }
+
+ err = f2fs_submit_page_bio(&fio);
+ if (err)
+ goto put_encrypted_page;
+ f2fs_put_page(fio.encrypted_page, 0);
+ f2fs_put_page(page, 1);
+ return 0;
+put_encrypted_page:
+ f2fs_put_page(fio.encrypted_page, 1);
+put_page:
+ f2fs_put_page(page, 1);
+ return err;
+}
+
/*
* Move data block via META_MAPPING while keeping locked data page.
* This can be used to move blocks, aka LBAs, directly on disk.
*/
-static void move_data_block(struct inode *inode, block_t bidx,
- unsigned int segno, int off)
+static int move_data_block(struct inode *inode, block_t bidx,
+ int gc_type, unsigned int segno, int off)
{
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
@@ -614,37 +731,47 @@ static void move_data_block(struct inode *inode, block_t bidx,
.op_flags = REQ_SYNC,
.encrypted_page = NULL,
.in_list = false,
+ .retry = false,
};
struct dnode_of_data dn;
struct f2fs_summary sum;
struct node_info ni;
- struct page *page;
+ struct page *page, *mpage;
block_t newaddr;
- int err;
+ int err = 0;
+ bool lfs_mode = test_opt(fio.sbi, LFS);
/* do not read out */
page = f2fs_grab_cache_page(inode->i_mapping, bidx, false);
if (!page)
- return;
+ return -ENOMEM;
- if (!check_valid_map(F2FS_I_SB(inode), segno, off))
+ if (!check_valid_map(F2FS_I_SB(inode), segno, off)) {
+ err = -ENOENT;
goto out;
+ }
- if (f2fs_is_atomic_file(inode))
+ if (f2fs_is_atomic_file(inode)) {
+ F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++;
+ F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++;
+ err = -EAGAIN;
goto out;
+ }
if (f2fs_is_pinned_file(inode)) {
f2fs_pin_file_control(inode, true);
+ err = -EAGAIN;
goto out;
}
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
+ err = f2fs_get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
if (err)
goto out;
if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
ClearPageUptodate(page);
+ err = -ENOENT;
goto put_out;
}
@@ -652,43 +779,66 @@ static void move_data_block(struct inode *inode, block_t bidx,
* don't cache encrypted data into meta inode until previous dirty
* data were writebacked to avoid racing between GC and flush.
*/
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
+
+ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
+
+ err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
+ if (err)
+ goto put_out;
- get_node_info(fio.sbi, dn.nid, &ni);
set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
/* read page */
fio.page = page;
fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
- allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
+ if (lfs_mode)
+ down_write(&fio.sbi->io_order_lock);
+
+ mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi),
+ fio.old_blkaddr, false);
+ if (!mpage)
+ goto up_out;
+
+ fio.encrypted_page = mpage;
+
+ /* read source block in mpage */
+ if (!PageUptodate(mpage)) {
+ err = f2fs_submit_page_bio(&fio);
+ if (err) {
+ f2fs_put_page(mpage, 1);
+ goto up_out;
+ }
+ lock_page(mpage);
+ if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) ||
+ !PageUptodate(mpage))) {
+ err = -EIO;
+ f2fs_put_page(mpage, 1);
+ goto up_out;
+ }
+ }
+
+ f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
&sum, CURSEG_COLD_DATA, NULL, false);
fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS);
if (!fio.encrypted_page) {
err = -ENOMEM;
+ f2fs_put_page(mpage, 1);
goto recover_block;
}
- err = f2fs_submit_page_bio(&fio);
- if (err)
- goto put_page_out;
-
- /* write page */
- lock_page(fio.encrypted_page);
-
- if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) {
- err = -EIO;
- goto put_page_out;
- }
- if (unlikely(!PageUptodate(fio.encrypted_page))) {
- err = -EIO;
- goto put_page_out;
- }
+ /* write target block */
+ f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true);
+ memcpy(page_address(fio.encrypted_page),
+ page_address(mpage), PAGE_SIZE);
+ f2fs_put_page(mpage, 1);
+ invalidate_mapping_pages(META_MAPPING(fio.sbi),
+ fio.old_blkaddr, fio.old_blkaddr);
set_page_dirty(fio.encrypted_page);
- f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true);
if (clear_page_dirty_for_io(fio.encrypted_page))
dec_page_count(fio.sbi, F2FS_DIRTY_META);
@@ -696,13 +846,14 @@ static void move_data_block(struct inode *inode, block_t bidx,
ClearPageError(page);
/* allocate block address */
- f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
+ f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true);
fio.op = REQ_OP_WRITE;
fio.op_flags = REQ_SYNC | REQ_NOIDLE;
fio.new_blkaddr = newaddr;
- err = f2fs_submit_page_write(&fio);
- if (err) {
+ f2fs_submit_page_write(&fio);
+ if (fio.retry) {
+ err = -EAGAIN;
if (PageWriteback(fio.encrypted_page))
end_page_writeback(fio.encrypted_page);
goto put_page_out;
@@ -718,37 +869,51 @@ put_page_out:
f2fs_put_page(fio.encrypted_page, 1);
recover_block:
if (err)
- __f2fs_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr,
+ f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr,
true, true);
+up_out:
+ if (lfs_mode)
+ up_write(&fio.sbi->io_order_lock);
put_out:
f2fs_put_dnode(&dn);
out:
f2fs_put_page(page, 1);
+ return err;
}
-static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
+static int move_data_page(struct inode *inode, block_t bidx, int gc_type,
unsigned int segno, int off)
{
struct page *page;
+ int err = 0;
- page = get_lock_data_page(inode, bidx, true);
+ page = f2fs_get_lock_data_page(inode, bidx, true);
if (IS_ERR(page))
- return;
+ return PTR_ERR(page);
- if (!check_valid_map(F2FS_I_SB(inode), segno, off))
+ if (!check_valid_map(F2FS_I_SB(inode), segno, off)) {
+ err = -ENOENT;
goto out;
+ }
- if (f2fs_is_atomic_file(inode))
+ if (f2fs_is_atomic_file(inode)) {
+ F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++;
+ F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++;
+ err = -EAGAIN;
goto out;
+ }
if (f2fs_is_pinned_file(inode)) {
if (gc_type == FG_GC)
f2fs_pin_file_control(inode, true);
+ err = -EAGAIN;
goto out;
}
if (gc_type == BG_GC) {
- if (PageWriteback(page))
+ if (PageWriteback(page)) {
+ err = -EAGAIN;
goto out;
+ }
set_page_dirty(page);
set_cold_data(page);
} else {
@@ -766,26 +931,32 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
.io_type = FS_GC_DATA_IO,
};
bool is_dirty = PageDirty(page);
- int err;
retry:
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
+
set_page_dirty(page);
- f2fs_wait_on_page_writeback(page, DATA, true);
if (clear_page_dirty_for_io(page)) {
inode_dec_dirty_pages(inode);
- remove_dirty_inode(inode);
+ f2fs_remove_dirty_inode(inode);
}
set_cold_data(page);
- err = do_write_data_page(&fio);
- if (err == -ENOMEM && is_dirty) {
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- goto retry;
+ err = f2fs_do_write_data_page(&fio);
+ if (err) {
+ clear_cold_data(page);
+ if (err == -ENOMEM) {
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto retry;
+ }
+ if (is_dirty)
+ set_page_dirty(page);
}
}
out:
f2fs_put_page(page, 1);
+ return err;
}
/*
@@ -795,7 +966,7 @@ out:
* If the parent node is not valid or the data block address is different,
* the victim data block is ignored.
*/
-static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
{
struct super_block *sb = sbi->sb;
@@ -803,6 +974,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t start_addr;
int off;
int phase = 0;
+ int submitted = 0;
start_addr = START_BLOCK(sbi, segno);
@@ -819,19 +991,19 @@ next_step:
/* stop BG_GC if there is not enough free sections. */
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0))
- return;
+ return submitted;
if (check_valid_map(sbi, segno, off) == 0)
continue;
if (phase == 0) {
- ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1,
+ f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1,
META_NAT, true);
continue;
}
if (phase == 1) {
- ra_node_page(sbi, nid);
+ f2fs_ra_node_page(sbi, nid);
continue;
}
@@ -840,7 +1012,7 @@ next_step:
continue;
if (phase == 2) {
- ra_node_page(sbi, dni.ino);
+ f2fs_ra_node_page(sbi, dni.ino);
continue;
}
@@ -851,23 +1023,31 @@ next_step:
if (IS_ERR(inode) || is_bad_inode(inode))
continue;
- /* if inode uses special I/O path, let's go phase 3 */
- if (f2fs_post_read_required(inode)) {
- add_gc_inode(gc_list, inode);
+ if (!down_write_trylock(
+ &F2FS_I(inode)->i_gc_rwsem[WRITE])) {
+ iput(inode);
+ sbi->skipped_gc_rwsem++;
continue;
}
- if (!down_write_trylock(
- &F2FS_I(inode)->dio_rwsem[WRITE])) {
- iput(inode);
+ start_bidx = f2fs_start_bidx_of_node(nofs, inode) +
+ ofs_in_node;
+
+ if (f2fs_post_read_required(inode)) {
+ int err = ra_data_block(inode, start_bidx);
+
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ if (err) {
+ iput(inode);
+ continue;
+ }
+ add_gc_inode(gc_list, inode);
continue;
}
- start_bidx = start_bidx_of_node(nofs, inode);
- data_page = get_read_data_page(inode,
- start_bidx + ofs_in_node, REQ_RAHEAD,
- true);
- up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+ data_page = f2fs_get_read_data_page(inode,
+ start_bidx, REQ_RAHEAD, true);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (IS_ERR(data_page)) {
iput(inode);
continue;
@@ -883,13 +1063,15 @@ next_step:
if (inode) {
struct f2fs_inode_info *fi = F2FS_I(inode);
bool locked = false;
+ int err;
if (S_ISREG(inode->i_mode)) {
- if (!down_write_trylock(&fi->dio_rwsem[READ]))
+ if (!down_write_trylock(&fi->i_gc_rwsem[READ]))
continue;
if (!down_write_trylock(
- &fi->dio_rwsem[WRITE])) {
- up_write(&fi->dio_rwsem[READ]);
+ &fi->i_gc_rwsem[WRITE])) {
+ sbi->skipped_gc_rwsem++;
+ up_write(&fi->i_gc_rwsem[READ]);
continue;
}
locked = true;
@@ -898,17 +1080,22 @@ next_step:
inode_dio_wait(inode);
}
- start_bidx = start_bidx_of_node(nofs, inode)
+ start_bidx = f2fs_start_bidx_of_node(nofs, inode)
+ ofs_in_node;
if (f2fs_post_read_required(inode))
- move_data_block(inode, start_bidx, segno, off);
+ err = move_data_block(inode, start_bidx,
+ gc_type, segno, off);
else
- move_data_page(inode, start_bidx, gc_type,
+ err = move_data_page(inode, start_bidx, gc_type,
segno, off);
+ if (!err && (gc_type == FG_GC ||
+ f2fs_post_read_required(inode)))
+ submitted++;
+
if (locked) {
- up_write(&fi->dio_rwsem[WRITE]);
- up_write(&fi->dio_rwsem[READ]);
+ up_write(&fi->i_gc_rwsem[WRITE]);
+ up_write(&fi->i_gc_rwsem[READ]);
}
stat_inc_data_blk_count(sbi, 1, gc_type);
@@ -917,6 +1104,8 @@ next_step:
if (++phase < 5)
goto next_step;
+
+ return submitted;
}
static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
@@ -941,18 +1130,34 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
struct blk_plug plug;
unsigned int segno = start_segno;
unsigned int end_segno = start_segno + sbi->segs_per_sec;
- int seg_freed = 0;
+ int seg_freed = 0, migrated = 0;
unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
SUM_TYPE_DATA : SUM_TYPE_NODE;
+ int submitted = 0;
+
+ if (__is_large_section(sbi))
+ end_segno = rounddown(end_segno, sbi->segs_per_sec);
/* readahead multi ssa blocks those have contiguous address */
- if (sbi->segs_per_sec > 1)
- ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
- sbi->segs_per_sec, META_SSA, true);
+ if (__is_large_section(sbi))
+ f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
+ end_segno - segno, META_SSA, true);
/* reference all summary page */
while (segno < end_segno) {
- sum_page = get_sum_page(sbi, segno++);
+ sum_page = f2fs_get_sum_page(sbi, segno++);
+ if (IS_ERR(sum_page)) {
+ int err = PTR_ERR(sum_page);
+
+ end_segno = segno - 1;
+ for (segno = start_segno; segno < end_segno; segno++) {
+ sum_page = find_get_page(META_MAPPING(sbi),
+ GET_SUM_BLOCK(sbi, segno));
+ f2fs_put_page(sum_page, 0);
+ f2fs_put_page(sum_page, 0);
+ }
+ return err;
+ }
unlock_page(sum_page);
}
@@ -965,13 +1170,22 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
GET_SUM_BLOCK(sbi, segno));
f2fs_put_page(sum_page, 0);
- if (get_valid_blocks(sbi, segno, false) == 0 ||
- !PageUptodate(sum_page) ||
- unlikely(f2fs_cp_error(sbi)))
- goto next;
+ if (get_valid_blocks(sbi, segno, false) == 0)
+ goto freed;
+ if (__is_large_section(sbi) &&
+ migrated >= sbi->migration_granularity)
+ goto skip;
+ if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi)))
+ goto skip;
sum = page_address(sum_page);
- f2fs_bug_on(sbi, type != GET_SUM_TYPE((&sum->footer)));
+ if (type != GET_SUM_TYPE((&sum->footer))) {
+ f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT",
+ segno, type, GET_SUM_TYPE((&sum->footer)));
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_stop_checkpoint(sbi, false);
+ goto skip;
+ }
/*
* this is to avoid deadlock:
@@ -981,21 +1195,27 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
* - lock_page(sum_page)
*/
if (type == SUM_TYPE_NODE)
- gc_node_segment(sbi, sum->entries, segno, gc_type);
- else
- gc_data_segment(sbi, sum->entries, gc_list, segno,
+ submitted += gc_node_segment(sbi, sum->entries, segno,
gc_type);
+ else
+ submitted += gc_data_segment(sbi, sum->entries, gc_list,
+ segno, gc_type);
stat_inc_seg_count(sbi, type, gc_type);
+freed:
if (gc_type == FG_GC &&
get_valid_blocks(sbi, segno, false) == 0)
seg_freed++;
-next:
+ migrated++;
+
+ if (__is_large_section(sbi) && segno + 1 < end_segno)
+ sbi->next_victim_seg[gc_type] = segno + 1;
+skip:
f2fs_put_page(sum_page, 0);
}
- if (gc_type == FG_GC)
+ if (submitted)
f2fs_submit_merged_write(sbi,
(type == SUM_TYPE_NODE) ? NODE : DATA);
@@ -1018,6 +1238,9 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
.ilist = LIST_HEAD_INIT(gc_list.ilist),
.iroot = RADIX_TREE_INIT(GFP_NOFS),
};
+ unsigned long long last_skipped = sbi->skipped_atomic_files[FG_GC];
+ unsigned long long first_skipped;
+ unsigned int skipped_round = 0, round = 0;
trace_f2fs_gc_begin(sbi->sb, sync, background,
get_pages(sbi, F2FS_DIRTY_NODES),
@@ -1029,6 +1252,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
prefree_segments(sbi));
cpc.reason = __get_cp_reason(sbi);
+ sbi->skipped_gc_rwsem = 0;
+ first_skipped = last_skipped;
gc_more:
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) {
ret = -EINVAL;
@@ -1045,8 +1270,9 @@ gc_more:
* threshold, we can make them free by checkpoint. Then, we
* secure free segments which doesn't need fggc any more.
*/
- if (prefree_segments(sbi)) {
- ret = write_checkpoint(sbi, &cpc);
+ if (prefree_segments(sbi) &&
+ !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
+ ret = f2fs_write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
}
@@ -1069,17 +1295,36 @@ gc_more:
sec_freed++;
total_freed += seg_freed;
+ if (gc_type == FG_GC) {
+ if (sbi->skipped_atomic_files[FG_GC] > last_skipped ||
+ sbi->skipped_gc_rwsem)
+ skipped_round++;
+ last_skipped = sbi->skipped_atomic_files[FG_GC];
+ round++;
+ }
+
if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO;
- if (!sync) {
- if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
+ if (sync)
+ goto stop;
+
+ if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
+ if (skipped_round <= MAX_SKIP_GC_COUNT ||
+ skipped_round * 2 < round) {
segno = NULL_SEGNO;
goto gc_more;
}
- if (gc_type == FG_GC)
- ret = write_checkpoint(sbi, &cpc);
+ if (first_skipped < last_skipped &&
+ (last_skipped - first_skipped) >
+ sbi->skipped_gc_rwsem) {
+ f2fs_drop_inmem_pages_all(sbi, true);
+ segno = NULL_SEGNO;
+ goto gc_more;
+ }
+ if (gc_type == FG_GC && !is_sbi_flag_set(sbi, SBI_CP_DISABLED))
+ ret = f2fs_write_checkpoint(sbi, &cpc);
}
stop:
SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0;
@@ -1103,23 +1348,187 @@ stop:
return ret;
}
-void build_gc_manager(struct f2fs_sb_info *sbi)
+void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
{
- u64 main_count, resv_count, ovp_count;
-
DIRTY_I(sbi)->v_ops = &default_v_ops;
- /* threshold of # of valid blocks in a section for victims of FG_GC */
- main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg;
- resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg;
- ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
-
- sbi->fggc_threshold = div64_u64((main_count - ovp_count) *
- BLKS_PER_SEC(sbi), (main_count - resv_count));
sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES;
/* give warm/cold data area from slower device */
- if (sbi->s_ndevs && sbi->segs_per_sec == 1)
+ if (f2fs_is_multi_device(sbi) && !__is_large_section(sbi))
SIT_I(sbi)->last_victim[ALLOC_NEXT] =
GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
}
+
+static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start,
+ unsigned int end)
+{
+ int type;
+ unsigned int segno, next_inuse;
+ int err = 0;
+
+ /* Move out cursegs from the target range */
+ for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
+ allocate_segment_for_resize(sbi, type, start, end);
+
+ /* do GC to move out valid blocks in the range */
+ for (segno = start; segno <= end; segno += sbi->segs_per_sec) {
+ struct gc_inode_list gc_list = {
+ .ilist = LIST_HEAD_INIT(gc_list.ilist),
+ .iroot = RADIX_TREE_INIT(GFP_NOFS),
+ };
+
+ mutex_lock(&sbi->gc_mutex);
+ do_garbage_collect(sbi, segno, &gc_list, FG_GC);
+ mutex_unlock(&sbi->gc_mutex);
+ put_gc_inode(&gc_list);
+
+ if (get_valid_blocks(sbi, segno, true))
+ return -EAGAIN;
+ }
+
+ err = f2fs_sync_fs(sbi->sb, 1);
+ if (err)
+ return err;
+
+ next_inuse = find_next_inuse(FREE_I(sbi), end + 1, start);
+ if (next_inuse <= end) {
+ f2fs_err(sbi, "segno %u should be free but still inuse!",
+ next_inuse);
+ f2fs_bug_on(sbi, 1);
+ }
+ return err;
+}
+
+static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
+{
+ struct f2fs_super_block *raw_sb = F2FS_RAW_SUPER(sbi);
+ int section_count = le32_to_cpu(raw_sb->section_count);
+ int segment_count = le32_to_cpu(raw_sb->segment_count);
+ int segment_count_main = le32_to_cpu(raw_sb->segment_count_main);
+ long long block_count = le64_to_cpu(raw_sb->block_count);
+ int segs = secs * sbi->segs_per_sec;
+
+ raw_sb->section_count = cpu_to_le32(section_count + secs);
+ raw_sb->segment_count = cpu_to_le32(segment_count + segs);
+ raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs);
+ raw_sb->block_count = cpu_to_le64(block_count +
+ (long long)segs * sbi->blocks_per_seg);
+}
+
+static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
+{
+ int segs = secs * sbi->segs_per_sec;
+ long long user_block_count =
+ le64_to_cpu(F2FS_CKPT(sbi)->user_block_count);
+
+ SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs;
+ MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs;
+ FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs;
+ FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs;
+ F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count +
+ (long long)segs * sbi->blocks_per_seg);
+}
+
+int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
+{
+ __u64 old_block_count, shrunk_blocks;
+ unsigned int secs;
+ int gc_mode, gc_type;
+ int err = 0;
+ __u32 rem;
+
+ old_block_count = le64_to_cpu(F2FS_RAW_SUPER(sbi)->block_count);
+ if (block_count > old_block_count)
+ return -EINVAL;
+
+ /* new fs size should align to section size */
+ div_u64_rem(block_count, BLKS_PER_SEC(sbi), &rem);
+ if (rem)
+ return -EINVAL;
+
+ if (block_count == old_block_count)
+ return 0;
+
+ if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
+ f2fs_err(sbi, "Should run fsck to repair first.");
+ return -EFSCORRUPTED;
+ }
+
+ if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+ f2fs_err(sbi, "Checkpoint should be enabled.");
+ return -EINVAL;
+ }
+
+ freeze_bdev(sbi->sb->s_bdev);
+
+ shrunk_blocks = old_block_count - block_count;
+ secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi));
+ spin_lock(&sbi->stat_lock);
+ if (shrunk_blocks + valid_user_blocks(sbi) +
+ sbi->current_reserved_blocks + sbi->unusable_block_count +
+ F2FS_OPTION(sbi).root_reserved_blocks > sbi->user_block_count)
+ err = -ENOSPC;
+ else
+ sbi->user_block_count -= shrunk_blocks;
+ spin_unlock(&sbi->stat_lock);
+ if (err) {
+ thaw_bdev(sbi->sb->s_bdev, sbi->sb);
+ return err;
+ }
+
+ mutex_lock(&sbi->resize_mutex);
+ set_sbi_flag(sbi, SBI_IS_RESIZEFS);
+
+ mutex_lock(&DIRTY_I(sbi)->seglist_lock);
+
+ MAIN_SECS(sbi) -= secs;
+
+ for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++)
+ if (SIT_I(sbi)->last_victim[gc_mode] >=
+ MAIN_SECS(sbi) * sbi->segs_per_sec)
+ SIT_I(sbi)->last_victim[gc_mode] = 0;
+
+ for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++)
+ if (sbi->next_victim_seg[gc_type] >=
+ MAIN_SECS(sbi) * sbi->segs_per_sec)
+ sbi->next_victim_seg[gc_type] = NULL_SEGNO;
+
+ mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
+
+ err = free_segment_range(sbi, MAIN_SECS(sbi) * sbi->segs_per_sec,
+ MAIN_SEGS(sbi) - 1);
+ if (err)
+ goto out;
+
+ update_sb_metadata(sbi, -secs);
+
+ err = f2fs_commit_super(sbi, false);
+ if (err) {
+ update_sb_metadata(sbi, secs);
+ goto out;
+ }
+
+ update_fs_metadata(sbi, -secs);
+ clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
+ err = f2fs_sync_fs(sbi->sb, 1);
+ if (err) {
+ update_fs_metadata(sbi, secs);
+ update_sb_metadata(sbi, secs);
+ f2fs_commit_super(sbi, false);
+ }
+out:
+ if (err) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_err(sbi, "resize_fs failed, should run fsck to repair!");
+
+ MAIN_SECS(sbi) += secs;
+ spin_lock(&sbi->stat_lock);
+ sbi->user_block_count += shrunk_blocks;
+ spin_unlock(&sbi->stat_lock);
+ }
+ clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
+ mutex_unlock(&sbi->resize_mutex);
+ thaw_bdev(sbi->sb->s_bdev, sbi->sb);
+ return err;
+}
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index b0045d4c8d1e..bbac9d3787bd 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/gc.h
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#define GC_THREAD_MIN_WB_PAGES 1 /*
* a threshold to determine
@@ -36,8 +33,6 @@ struct f2fs_gc_kthread {
unsigned int no_gc_sleep_time;
/* for changing gc mode */
- unsigned int gc_idle;
- unsigned int gc_urgent;
unsigned int gc_wake;
};
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index eb2e031ea887..cc82f142f811 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/hash.c
*
@@ -7,10 +8,6 @@
* Portions of this code from linux/fs/ext3/hash.c
*
* Copyright (C) 2002 by Theodore Ts'o
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/types.h>
#include <linux/fs.h>
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index ac951ee9b20b..aef822b1fa9d 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/inline.c
* Copyright (c) 2013, Intel Corporation
* Authors: Huajun Li <huajun.li@intel.com>
* Haicheng Li <haicheng.li@intel.com>
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
@@ -43,7 +41,7 @@ bool f2fs_may_inline_dentry(struct inode *inode)
return true;
}
-void read_inline_data(struct page *page, struct page *ipage)
+void f2fs_do_read_inline_data(struct page *page, struct page *ipage)
{
struct inode *inode = page->mapping->host;
void *src_addr, *dst_addr;
@@ -65,7 +63,8 @@ void read_inline_data(struct page *page, struct page *ipage)
SetPageUptodate(page);
}
-void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from)
+void f2fs_truncate_inline_inode(struct inode *inode,
+ struct page *ipage, u64 from)
{
void *addr;
@@ -74,7 +73,7 @@ void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from)
addr = inline_data_addr(inode, ipage);
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
memset(addr + from, 0, MAX_INLINE_DATA(inode) - from);
set_page_dirty(ipage);
@@ -97,7 +96,7 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
path, current->comm);
}
- ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
+ ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(ipage)) {
trace_android_fs_dataread_end(inode, page_offset(page),
PAGE_SIZE);
@@ -115,7 +114,7 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
if (page->index)
zero_user_segment(page, 0, PAGE_SIZE);
else
- read_inline_data(page, ipage);
+ f2fs_do_read_inline_data(page, ipage);
if (!PageUptodate(page))
SetPageUptodate(page);
@@ -138,6 +137,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
.encrypted_page = NULL,
.io_type = FS_DATA_IO,
};
+ struct node_info ni;
int dirty, err;
if (!f2fs_exist_data(dn->inode))
@@ -147,9 +147,25 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
if (err)
return err;
+ err = f2fs_get_node_info(fio.sbi, dn->nid, &ni);
+ if (err) {
+ f2fs_put_dnode(dn);
+ return err;
+ }
+
+ fio.version = ni.version;
+
+ if (unlikely(dn->data_blkaddr != NEW_ADDR)) {
+ f2fs_put_dnode(dn);
+ set_sbi_flag(fio.sbi, SBI_NEED_FSCK);
+ f2fs_warn(fio.sbi, "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.",
+ __func__, dn->inode->i_ino, dn->data_blkaddr);
+ return -EFSCORRUPTED;
+ }
+
f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page));
- read_inline_data(page, dn->inode_page);
+ f2fs_do_read_inline_data(page, dn->inode_page);
set_page_dirty(page);
/* clear dirty state */
@@ -160,18 +176,18 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
ClearPageError(page);
fio.old_blkaddr = dn->data_blkaddr;
set_inode_flag(dn->inode, FI_HOT_DATA);
- write_data_page(dn, &fio);
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_outplace_write_data(dn, &fio);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
if (dirty) {
inode_dec_dirty_pages(dn->inode);
- remove_dirty_inode(dn->inode);
+ f2fs_remove_dirty_inode(dn->inode);
}
/* this converted inline_data should be recovered. */
set_inode_flag(dn->inode, FI_APPEND_WRITE);
/* clear inline data and flag after data writeback */
- truncate_inline_inode(dn->inode, dn->inode_page, 0);
+ f2fs_truncate_inline_inode(dn->inode, dn->inode_page, 0);
clear_inline_node(dn->inode_page);
clear_out:
stat_dec_inline_inode(dn->inode);
@@ -196,7 +212,7 @@ int f2fs_convert_inline_inode(struct inode *inode)
f2fs_lock_op(sbi);
- ipage = get_node_page(sbi, inode->i_ino);
+ ipage = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(ipage)) {
err = PTR_ERR(ipage);
goto out;
@@ -222,12 +238,10 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
{
void *src_addr, *dst_addr;
struct dnode_of_data dn;
- struct address_space *mapping = page_mapping(page);
- unsigned long flags;
int err;
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
+ err = f2fs_get_dnode_of_data(&dn, 0, LOOKUP_NODE);
if (err)
return err;
@@ -238,17 +252,14 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
f2fs_bug_on(F2FS_I_SB(inode), page->index);
- f2fs_wait_on_page_writeback(dn.inode_page, NODE, true);
+ f2fs_wait_on_page_writeback(dn.inode_page, NODE, true, true);
src_addr = kmap_atomic(page);
dst_addr = inline_data_addr(inode, dn.inode_page);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode));
kunmap_atomic(src_addr);
set_page_dirty(dn.inode_page);
- spin_lock_irqsave(&mapping->tree_lock, flags);
- radix_tree_tag_clear(&mapping->page_tree, page_index(page),
- PAGECACHE_TAG_DIRTY);
- spin_unlock_irqrestore(&mapping->tree_lock, flags);
+ f2fs_clear_radix_tree_dirty_tag(page);
set_inode_flag(inode, FI_APPEND_WRITE);
set_inode_flag(inode, FI_DATA_EXIST);
@@ -258,7 +269,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
return 0;
}
-bool recover_inline_data(struct inode *inode, struct page *npage)
+bool f2fs_recover_inline_data(struct inode *inode, struct page *npage)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode *ri = NULL;
@@ -279,10 +290,10 @@ bool recover_inline_data(struct inode *inode, struct page *npage)
if (f2fs_has_inline_data(inode) &&
ri && (ri->i_inline & F2FS_INLINE_DATA)) {
process_inline:
- ipage = get_node_page(sbi, inode->i_ino);
+ ipage = f2fs_get_node_page(sbi, inode->i_ino);
f2fs_bug_on(sbi, IS_ERR(ipage));
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
src_addr = inline_data_addr(inode, npage);
dst_addr = inline_data_addr(inode, ipage);
@@ -297,20 +308,20 @@ process_inline:
}
if (f2fs_has_inline_data(inode)) {
- ipage = get_node_page(sbi, inode->i_ino);
+ ipage = f2fs_get_node_page(sbi, inode->i_ino);
f2fs_bug_on(sbi, IS_ERR(ipage));
- truncate_inline_inode(inode, ipage, 0);
+ f2fs_truncate_inline_inode(inode, ipage, 0);
clear_inode_flag(inode, FI_INLINE_DATA);
f2fs_put_page(ipage, 1);
} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
- if (truncate_blocks(inode, 0, false))
+ if (f2fs_truncate_blocks(inode, 0, false))
return false;
goto process_inline;
}
return false;
}
-struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
+struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
struct fscrypt_name *fname, struct page **res_page)
{
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
@@ -321,7 +332,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
void *inline_dentry;
f2fs_hash_t namehash;
- ipage = get_node_page(sbi, dir->i_ino);
+ ipage = f2fs_get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage)) {
*res_page = ipage;
return NULL;
@@ -332,7 +343,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
inline_dentry = inline_data_addr(dir, ipage);
make_dentry_ptr_inline(dir, &d, inline_dentry);
- de = find_target_dentry(fname, namehash, NULL, &d);
+ de = f2fs_find_target_dentry(fname, namehash, NULL, &d);
unlock_page(ipage);
if (de)
*res_page = ipage;
@@ -342,7 +353,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
return de;
}
-int make_empty_inline_dir(struct inode *inode, struct inode *parent,
+int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
struct page *ipage)
{
struct f2fs_dentry_ptr d;
@@ -351,7 +362,7 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent,
inline_dentry = inline_data_addr(inode, ipage);
make_dentry_ptr_inline(inode, &d, inline_dentry);
- do_make_empty_dir(inode, parent, &d);
+ f2fs_do_make_empty_dir(inode, parent, &d);
set_page_dirty(ipage);
@@ -385,8 +396,16 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
if (err)
goto out;
- f2fs_wait_on_page_writeback(page, DATA, true);
- zero_user_segment(page, MAX_INLINE_DATA(dir), PAGE_SIZE);
+ if (unlikely(dn.data_blkaddr != NEW_ADDR)) {
+ f2fs_put_dnode(&dn);
+ set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK);
+ f2fs_warn(F2FS_P_SB(page), "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.",
+ __func__, dir->i_ino, dn.data_blkaddr);
+ err = -EFSCORRUPTED;
+ goto out;
+ }
+
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
dentry_blk = page_address(page);
@@ -410,11 +429,19 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
set_page_dirty(page);
/* clear inline dir and flag after data writeback */
- truncate_inline_inode(dir, ipage, 0);
+ f2fs_truncate_inline_inode(dir, ipage, 0);
stat_dec_inline_dir(dir);
clear_inode_flag(dir, FI_INLINE_DENTRY);
+ /*
+ * should retrieve reserved space which was used to keep
+ * inline_dentry's structure for backward compatibility.
+ */
+ if (!f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(dir)) &&
+ !f2fs_has_inline_xattr(dir))
+ F2FS_I(dir)->i_inline_xattr_size = 0;
+
f2fs_i_depth_write(dir, 1);
if (i_size_read(dir) < PAGE_SIZE)
f2fs_i_size_write(dir, PAGE_SIZE);
@@ -453,7 +480,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
new_name.len = le16_to_cpu(de->name_len);
ino = le32_to_cpu(de->ino);
- fake_mode = get_de_type(de) << S_SHIFT;
+ fake_mode = f2fs_get_de_type(de) << S_SHIFT;
err = f2fs_add_regular_entry(dir, &new_name, NULL, NULL,
ino, fake_mode);
@@ -465,8 +492,8 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
return 0;
punch_dentry_pages:
truncate_inode_pages(&dir->i_data, 0);
- truncate_blocks(dir, 0, false);
- remove_dirty_inode(dir);
+ f2fs_truncate_blocks(dir, 0, false);
+ f2fs_remove_dirty_inode(dir);
return err;
}
@@ -484,7 +511,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
}
memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA(dir));
- truncate_inline_inode(dir, ipage, 0);
+ f2fs_truncate_inline_inode(dir, ipage, 0);
unlock_page(ipage);
@@ -496,17 +523,27 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
stat_dec_inline_dir(dir);
clear_inode_flag(dir, FI_INLINE_DENTRY);
- kfree(backup_dentry);
+
+ /*
+ * should retrieve reserved space which was used to keep
+ * inline_dentry's structure for backward compatibility.
+ */
+ if (!f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(dir)) &&
+ !f2fs_has_inline_xattr(dir))
+ F2FS_I(dir)->i_inline_xattr_size = 0;
+
+ kvfree(backup_dentry);
return 0;
recover:
lock_page(ipage);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir));
f2fs_i_depth_write(dir, 0);
f2fs_i_size_write(dir, MAX_INLINE_DATA(dir));
set_page_dirty(ipage);
f2fs_put_page(ipage, 1);
- kfree(backup_dentry);
+ kvfree(backup_dentry);
return err;
}
@@ -533,14 +570,14 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
struct page *page = NULL;
int err = 0;
- ipage = get_node_page(sbi, dir->i_ino);
+ ipage = f2fs_get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage))
return PTR_ERR(ipage);
inline_dentry = inline_data_addr(dir, ipage);
make_dentry_ptr_inline(dir, &d, inline_dentry);
- bit_pos = room_for_filename(d.bitmap, slots, d.max);
+ bit_pos = f2fs_room_for_filename(d.bitmap, slots, d.max);
if (bit_pos >= d.max) {
err = f2fs_convert_inline_dir(dir, ipage, inline_dentry);
if (err)
@@ -551,7 +588,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
- page = init_inode_metadata(inode, dir, new_name,
+ page = f2fs_init_inode_metadata(inode, dir, new_name,
orig_name, ipage);
if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -559,7 +596,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
}
}
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
name_hash = f2fs_dentry_hash(new_name, NULL);
f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos);
@@ -572,7 +609,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
f2fs_put_page(page, 1);
}
- update_parent_metadata(dir, inode, 0);
+ f2fs_update_parent_metadata(dir, inode, 0);
fail:
if (inode)
up_write(&F2FS_I(inode)->i_sem);
@@ -591,7 +628,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
int i;
lock_page(page);
- f2fs_wait_on_page_writeback(page, NODE, true);
+ f2fs_wait_on_page_writeback(page, NODE, true, true);
inline_dentry = inline_data_addr(dir, page);
make_dentry_ptr_inline(dir, &d, inline_dentry);
@@ -618,7 +655,7 @@ bool f2fs_empty_inline_dir(struct inode *dir)
void *inline_dentry;
struct f2fs_dentry_ptr d;
- ipage = get_node_page(sbi, dir->i_ino);
+ ipage = f2fs_get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage))
return false;
@@ -649,10 +686,16 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
if (ctx->pos == d.max)
return 0;
- ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
+ ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(ipage))
return PTR_ERR(ipage);
+ /*
+ * f2fs_readdir was protected by inode.i_rwsem, it is safe to access
+ * ipage without page's lock held.
+ */
+ unlock_page(ipage);
+
inline_dentry = inline_data_addr(inode, ipage);
make_dentry_ptr_inline(inode, &d, inline_dentry);
@@ -661,7 +704,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
if (!err)
ctx->pos = d.max;
- f2fs_put_page(ipage, 1);
+ f2fs_put_page(ipage, 0);
return err < 0 ? err : 0;
}
@@ -675,7 +718,7 @@ int f2fs_inline_data_fiemap(struct inode *inode,
struct page *ipage;
int err = 0;
- ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
+ ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(ipage))
return PTR_ERR(ipage);
@@ -691,7 +734,10 @@ int f2fs_inline_data_fiemap(struct inode *inode,
ilen = start + len;
ilen -= start;
- get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
+ err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
+ if (err)
+ goto out;
+
byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits;
byteaddr += (char *)inline_data_addr(inode, ipage) -
(char *)F2FS_INODE(ipage);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 51846fc54fbd..2f02199047a9 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/inode.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
@@ -17,6 +14,7 @@
#include "f2fs.h"
#include "node.h"
#include "segment.h"
+#include "xattr.h"
#include <trace/events/f2fs.h>
@@ -36,15 +34,15 @@ void f2fs_set_inode_flags(struct inode *inode)
unsigned int flags = F2FS_I(inode)->i_flags;
unsigned int new_fl = 0;
- if (flags & FS_SYNC_FL)
+ if (flags & F2FS_SYNC_FL)
new_fl |= S_SYNC;
- if (flags & FS_APPEND_FL)
+ if (flags & F2FS_APPEND_FL)
new_fl |= S_APPEND;
- if (flags & FS_IMMUTABLE_FL)
+ if (flags & F2FS_IMMUTABLE_FL)
new_fl |= S_IMMUTABLE;
- if (flags & FS_NOATIME_FL)
+ if (flags & F2FS_NOATIME_FL)
new_fl |= S_NOATIME;
- if (flags & FS_DIRSYNC_FL)
+ if (flags & F2FS_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
if (f2fs_encrypted_inode(inode))
new_fl |= S_ENCRYPTED;
@@ -68,13 +66,16 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
}
}
-static bool __written_first_block(struct f2fs_inode *ri)
+static int __written_first_block(struct f2fs_sb_info *sbi,
+ struct f2fs_inode *ri)
{
block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]);
- if (addr != NEW_ADDR && addr != NULL_ADDR)
- return true;
- return false;
+ if (!__is_valid_data_blkaddr(addr))
+ return 1;
+ if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE))
+ return -EFSCORRUPTED;
+ return 0;
}
static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -103,7 +104,7 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage)
while (start < end) {
if (*start++) {
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
set_inode_flag(inode, FI_DATA_EXIST);
set_raw_inline(inode, F2FS_INODE(ipage));
@@ -117,15 +118,15 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage)
static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page)
{
struct f2fs_inode *ri = &F2FS_NODE(page)->i;
- int extra_isize = le32_to_cpu(ri->i_extra_isize);
- if (!f2fs_sb_has_inode_chksum(sbi->sb))
+ if (!f2fs_sb_has_inode_chksum(sbi))
return false;
- if (!RAW_IS_INODE(F2FS_NODE(page)) || !(ri->i_inline & F2FS_EXTRA_ATTR))
+ if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR))
return false;
- if (!F2FS_FITS_IN_INODE(ri, extra_isize, i_inode_checksum))
+ if (!F2FS_FITS_IN_INODE(ri, le16_to_cpu(ri->i_extra_isize),
+ i_inode_checksum))
return false;
return true;
@@ -159,8 +160,15 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page)
struct f2fs_inode *ri;
__u32 provided, calculated;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)))
+ return true;
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (!f2fs_enable_inode_chksum(sbi, page))
+#else
if (!f2fs_enable_inode_chksum(sbi, page) ||
PageDirty(page) || PageWriteback(page))
+#endif
return true;
ri = &F2FS_NODE(page)->i;
@@ -168,9 +176,8 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page)
calculated = f2fs_inode_chksum(sbi, page);
if (provided != calculated)
- f2fs_msg(sbi->sb, KERN_WARNING,
- "checksum invalid, ino = %x, %x vs. %x",
- ino_of_node(page), provided, calculated);
+ f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x",
+ page->index, ino_of_node(page), provided, calculated);
return provided == calculated;
}
@@ -185,6 +192,99 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page)
ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page));
}
+static bool sanity_check_inode(struct inode *inode, struct page *node_page)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ unsigned long long iblocks;
+
+ iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
+ if (!iblocks) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, run fsck to fix.",
+ __func__, inode->i_ino, iblocks);
+ return false;
+ }
+
+ if (ino_of_node(node_page) != nid_of_node(node_page)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%lx, ino,nid: [%u, %u] run fsck to fix.",
+ __func__, inode->i_ino,
+ ino_of_node(node_page), nid_of_node(node_page));
+ return false;
+ }
+
+ if (f2fs_sb_has_flexible_inline_xattr(sbi)
+ && !f2fs_has_extra_attr(inode)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: corrupted inode ino=%lx, run fsck to fix.",
+ __func__, inode->i_ino);
+ return false;
+ }
+
+ if (f2fs_has_extra_attr(inode) &&
+ !f2fs_sb_has_extra_attr(sbi)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off",
+ __func__, inode->i_ino);
+ return false;
+ }
+
+ if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE ||
+ fi->i_extra_isize % sizeof(__le32)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, max: %zu",
+ __func__, inode->i_ino, fi->i_extra_isize,
+ F2FS_TOTAL_EXTRA_ATTR_SIZE);
+ return false;
+ }
+
+ if (f2fs_has_extra_attr(inode) &&
+ f2fs_sb_has_flexible_inline_xattr(sbi) &&
+ f2fs_has_inline_xattr(inode) &&
+ (!fi->i_inline_xattr_size ||
+ fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %zu",
+ __func__, inode->i_ino, fi->i_inline_xattr_size,
+ MAX_INLINE_XATTR_SIZE);
+ return false;
+ }
+
+ if (F2FS_I(inode)->extent_tree) {
+ struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
+
+ if (ei->len &&
+ (!f2fs_is_valid_blkaddr(sbi, ei->blk,
+ DATA_GENERIC_ENHANCE) ||
+ !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
+ DATA_GENERIC_ENHANCE))) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
+ __func__, inode->i_ino,
+ ei->blk, ei->fofs, ei->len);
+ return false;
+ }
+ }
+
+ if (f2fs_has_inline_data(inode) &&
+ (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix",
+ __func__, inode->i_ino, inode->i_mode);
+ return false;
+ }
+
+ if (f2fs_has_inline_dentry(inode) && !S_ISDIR(inode->i_mode)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_dentry, run fsck to fix",
+ __func__, inode->i_ino, inode->i_mode);
+ return false;
+ }
+
+ return true;
+}
+
static int do_read_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -192,16 +292,13 @@ static int do_read_inode(struct inode *inode)
struct page *node_page;
struct f2fs_inode *ri;
projid_t i_projid;
+ int err;
/* Check if ino is within scope */
- if (check_nid_range(sbi, inode->i_ino)) {
- f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu",
- (unsigned long) inode->i_ino);
- WARN_ON(1);
+ if (f2fs_check_nid_range(sbi, inode->i_ino))
return -EINVAL;
- }
- node_page = get_node_page(sbi, inode->i_ino);
+ node_page = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(node_page))
return PTR_ERR(node_page);
@@ -221,10 +318,15 @@ static int do_read_inode(struct inode *inode)
inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
inode->i_generation = le32_to_cpu(ri->i_generation);
-
- fi->i_current_depth = le32_to_cpu(ri->i_current_depth);
+ if (S_ISDIR(inode->i_mode))
+ fi->i_current_depth = le32_to_cpu(ri->i_current_depth);
+ else if (S_ISREG(inode->i_mode))
+ fi->i_gc_failures[GC_FAILURE_PIN] =
+ le16_to_cpu(ri->i_gc_failures);
fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
fi->i_flags = le32_to_cpu(ri->i_flags);
+ if (S_ISREG(inode->i_mode))
+ fi->i_flags &= ~F2FS_PROJINHERIT_FL;
fi->flags = 0;
fi->i_advise = ri->i_advise;
fi->i_pino = le32_to_cpu(ri->i_pino);
@@ -238,8 +340,7 @@ static int do_read_inode(struct inode *inode)
fi->i_extra_isize = f2fs_has_extra_attr(inode) ?
le16_to_cpu(ri->i_extra_isize) : 0;
- if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
- f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode));
+ if (f2fs_sb_has_flexible_inline_xattr(sbi)) {
fi->i_inline_xattr_size = le16_to_cpu(ri->i_inline_xattr_size);
} else if (f2fs_has_inline_xattr(inode) ||
f2fs_has_inline_dentry(inode)) {
@@ -255,30 +356,48 @@ static int do_read_inode(struct inode *inode)
fi->i_inline_xattr_size = 0;
}
+ if (!sanity_check_inode(inode, node_page)) {
+ f2fs_put_page(node_page, 1);
+ return -EFSCORRUPTED;
+ }
+
/* check data exist */
if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
__recover_inline_status(inode, node_page);
+ /* try to recover cold bit for non-dir inode */
+ if (!S_ISDIR(inode->i_mode) && !is_cold_node(node_page)) {
+ set_cold_node(node_page, false);
+ set_page_dirty(node_page);
+ }
+
/* get rdev by using inline_info */
__get_inode_rdev(inode, ri);
- if (__written_first_block(ri))
- set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
+ if (S_ISREG(inode->i_mode)) {
+ err = __written_first_block(sbi, ri);
+ if (err < 0) {
+ f2fs_put_page(node_page, 1);
+ return err;
+ }
+ if (!err)
+ set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
+ }
- if (!need_inode_block_update(sbi, inode->i_ino))
+ if (!f2fs_need_inode_block_update(sbi, inode->i_ino))
fi->last_disk_size = inode->i_size;
- if (fi->i_flags & FS_PROJINHERIT_FL)
+ if (fi->i_flags & F2FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
- if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi->sb) &&
+ if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi) &&
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid))
i_projid = (projid_t)le32_to_cpu(ri->i_projid);
else
i_projid = F2FS_DEF_PROJID;
fi->i_projid = make_kprojid(&init_user_ns, i_projid);
- if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi->sb) &&
+ if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi) &&
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
fi->i_crtime.tv_sec = le64_to_cpu(ri->i_crtime);
fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec);
@@ -320,10 +439,10 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
make_now:
if (ino == F2FS_NODE_INO(sbi)) {
inode->i_mapping->a_ops = &f2fs_node_aops;
- mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+ mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
} else if (ino == F2FS_META_INO(sbi)) {
inode->i_mapping->a_ops = &f2fs_meta_aops;
- mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+ mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
} else if (S_ISREG(inode->i_mode)) {
inode->i_op = &f2fs_file_inode_operations;
inode->i_fop = &f2fs_file_operations;
@@ -354,6 +473,7 @@ make_now:
return inode;
bad_inode:
+ f2fs_inode_synced(inode);
iget_failed(inode);
trace_f2fs_iget_exit(inode, ret);
return ERR_PTR(ret);
@@ -373,12 +493,12 @@ retry:
return inode;
}
-void update_inode(struct inode *inode, struct page *node_page)
+void f2fs_update_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_inode *ri;
struct extent_tree *et = F2FS_I(inode)->extent_tree;
- f2fs_wait_on_page_writeback(node_page, NODE, true);
+ f2fs_wait_on_page_writeback(node_page, NODE, true, true);
set_page_dirty(node_page);
f2fs_inode_synced(inode);
@@ -408,7 +528,12 @@ void update_inode(struct inode *inode, struct page *node_page)
ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
ri->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
- ri->i_current_depth = cpu_to_le32(F2FS_I(inode)->i_current_depth);
+ if (S_ISDIR(inode->i_mode))
+ ri->i_current_depth =
+ cpu_to_le32(F2FS_I(inode)->i_current_depth);
+ else if (S_ISREG(inode->i_mode))
+ ri->i_gc_failures =
+ cpu_to_le16(F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]);
ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid);
ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
@@ -418,11 +543,11 @@ void update_inode(struct inode *inode, struct page *node_page)
if (f2fs_has_extra_attr(inode)) {
ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize);
- if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)->sb))
+ if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)))
ri->i_inline_xattr_size =
cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size);
- if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) &&
+ if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) &&
F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
i_projid)) {
projid_t i_projid;
@@ -432,7 +557,7 @@ void update_inode(struct inode *inode, struct page *node_page)
ri->i_projid = cpu_to_le32(i_projid);
}
- if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)->sb) &&
+ if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) &&
F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
i_crtime)) {
ri->i_crtime =
@@ -452,14 +577,18 @@ void update_inode(struct inode *inode, struct page *node_page)
F2FS_I(inode)->i_disk_time[1] = inode->i_ctime;
F2FS_I(inode)->i_disk_time[2] = inode->i_mtime;
F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime;
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ f2fs_inode_chksum_set(F2FS_I_SB(inode), node_page);
+#endif
}
-void update_inode_page(struct inode *inode)
+void f2fs_update_inode_page(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct page *node_page;
retry:
- node_page = get_node_page(sbi, inode->i_ino);
+ node_page = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(node_page)) {
int err = PTR_ERR(node_page);
if (err == -ENOMEM) {
@@ -470,7 +599,7 @@ retry:
}
return;
}
- update_inode(inode, node_page);
+ f2fs_update_inode(inode, node_page);
f2fs_put_page(node_page, 1);
}
@@ -485,11 +614,14 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
return 0;
+ if (f2fs_is_checkpoint_ready(sbi))
+ return -ENOSPC;
+
/*
* We need to balance fs here to prevent from producing dirty node pages
* during the urgent cleaning time when runing out of free sections.
*/
- update_inode_page(inode);
+ f2fs_update_inode_page(inode);
if (wbc && wbc->nr_to_write)
f2fs_balance_fs(sbi, true);
return 0;
@@ -506,7 +638,7 @@ void f2fs_evict_inode(struct inode *inode)
/* some remained atomic pages should discarded */
if (f2fs_is_atomic_file(inode))
- drop_inmem_pages(inode);
+ f2fs_drop_inmem_pages(inode);
trace_f2fs_evict_inode(inode);
truncate_inode_pages_final(&inode->i_data);
@@ -516,18 +648,22 @@ void f2fs_evict_inode(struct inode *inode)
goto out_clear;
f2fs_bug_on(sbi, get_dirty_pages(inode));
- remove_dirty_inode(inode);
+ f2fs_remove_dirty_inode(inode);
f2fs_destroy_extent_tree(inode);
if (inode->i_nlink || is_bad_inode(inode))
goto no_delete;
- dquot_initialize(inode);
+ err = dquot_initialize(inode);
+ if (err) {
+ err = 0;
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ }
- remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
- remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
- remove_ino_entry(sbi, inode->i_ino, FLUSH_INO);
+ f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
+ f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
+ f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO);
sb_start_intwrite(inode->i_sb);
set_inode_flag(inode, FI_NO_ALLOC);
@@ -536,15 +672,14 @@ retry:
if (F2FS_HAS_BLOCKS(inode))
err = f2fs_truncate(inode);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_EVICT_INODE)) {
f2fs_show_injection_info(FAULT_EVICT_INODE);
err = -EIO;
}
-#endif
+
if (!err) {
f2fs_lock_op(sbi);
- err = remove_inode_page(inode);
+ err = f2fs_remove_inode_page(inode);
f2fs_unlock_op(sbi);
if (err == -ENOENT)
err = 0;
@@ -556,9 +691,10 @@ retry:
goto retry;
}
- if (err)
- update_inode_page(inode);
- dquot_free_inode(inode);
+ if (err) {
+ f2fs_update_inode_page(inode);
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ }
sb_end_intwrite(inode->i_sb);
no_delete:
dquot_drop(inode);
@@ -567,7 +703,8 @@ no_delete:
stat_dec_inline_dir(inode);
stat_dec_inline_inode(inode);
- if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG)))
+ if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG) &&
+ !is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
else
f2fs_inode_synced(inode);
@@ -580,16 +717,19 @@ no_delete:
invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
if (inode->i_nlink) {
if (is_inode_flag_set(inode, FI_APPEND_WRITE))
- add_ino_entry(sbi, inode->i_ino, APPEND_INO);
+ f2fs_add_ino_entry(sbi, inode->i_ino, APPEND_INO);
if (is_inode_flag_set(inode, FI_UPDATE_WRITE))
- add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
+ f2fs_add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
}
if (is_inode_flag_set(inode, FI_FREE_NID)) {
- alloc_nid_failed(sbi, inode->i_ino);
+ f2fs_alloc_nid_failed(sbi, inode->i_ino);
clear_inode_flag(inode, FI_FREE_NID);
} else {
- f2fs_bug_on(sbi, err &&
- !exist_written_data(sbi, inode->i_ino, ORPHAN_INO));
+ /*
+ * If xattr nid is corrupted, we can reach out error condition,
+ * err & !f2fs_exist_written_data(sbi, inode->i_ino, ORPHAN_INO)).
+ * In that case, f2fs_check_nid_range() is enough to give a clue.
+ */
}
out_clear:
fscrypt_put_encryption_info(inode, NULL);
@@ -597,10 +737,11 @@ out_clear:
}
/* caller should call f2fs_lock_op() */
-void handle_failed_inode(struct inode *inode)
+void f2fs_handle_failed_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct node_info ni;
+ int err;
/*
* clear nlink of inode in order to release resource of inode
@@ -612,7 +753,7 @@ void handle_failed_inode(struct inode *inode)
* we must call this to avoid inode being remained as dirty, resulting
* in a panic when flushing dirty inodes in gdirty_list.
*/
- update_inode_page(inode);
+ f2fs_update_inode_page(inode);
f2fs_inode_synced(inode);
/* don't make bad inode, since it becomes a regular file. */
@@ -623,22 +764,27 @@ void handle_failed_inode(struct inode *inode)
* so we can prevent losing this orphan when encoutering checkpoint
* and following suddenly power-off.
*/
- get_node_info(sbi, inode->i_ino, &ni);
+ err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ if (err) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "May loss orphan inode, run fsck to fix.");
+ goto out;
+ }
if (ni.blk_addr != NULL_ADDR) {
- int err = acquire_orphan_inode(sbi);
+ err = f2fs_acquire_orphan_inode(sbi);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Too many orphan inodes, run fsck to fix.");
+ f2fs_warn(sbi, "Too many orphan inodes, run fsck to fix.");
} else {
- add_orphan_inode(inode);
+ f2fs_add_orphan_inode(inode);
}
- alloc_nid_done(sbi, inode->i_ino);
+ f2fs_alloc_nid_done(sbi, inode->i_ino);
} else {
set_inode_flag(inode, FI_FREE_NID);
}
+out:
f2fs_unlock_op(sbi);
/* iput will drop the inode object */
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 0355891dbbf8..94902b82d4ec 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -1,24 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/namei.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/pagemap.h>
#include <linux/sched.h>
#include <linux/ctype.h>
+#include <linux/random.h>
#include <linux/dcache.h>
#include <linux/namei.h>
#include <linux/quotaops.h>
#include "f2fs.h"
#include "node.h"
+#include "segment.h"
#include "xattr.h"
#include "acl.h"
#include <trace/events/f2fs.h>
@@ -37,7 +36,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
return ERR_PTR(-ENOMEM);
f2fs_lock_op(sbi);
- if (!alloc_nid(sbi, &ino)) {
+ if (!f2fs_alloc_nid(sbi, &ino)) {
f2fs_unlock_op(sbi);
err = -ENOSPC;
goto fail;
@@ -52,7 +51,10 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime =
F2FS_I(inode)->i_crtime = current_time(inode);
- inode->i_generation = sbi->s_next_generation++;
+ inode->i_generation = prandom_u32();
+
+ if (S_ISDIR(inode->i_mode))
+ F2FS_I(inode)->i_current_depth = 1;
err = insert_inode_locked(inode);
if (err) {
@@ -60,8 +62,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
goto fail;
}
- if (f2fs_sb_has_project_quota(sbi->sb) &&
- (F2FS_I(dir)->i_flags & FS_PROJINHERIT_FL))
+ if (f2fs_sb_has_project_quota(sbi) &&
+ (F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL))
F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid;
else
F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns,
@@ -71,10 +73,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
if (err)
goto fail_drop;
- err = dquot_alloc_inode(inode);
- if (err)
- goto fail_drop;
-
set_inode_flag(inode, FI_NEW_INODE);
/* If the directory encrypted, then we should encrypt the inode. */
@@ -82,7 +80,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
f2fs_may_encrypt(inode))
f2fs_set_encrypted_inode(inode);
- if (f2fs_sb_has_extra_attr(sbi->sb)) {
+ if (f2fs_sb_has_extra_attr(sbi)) {
set_inode_flag(inode, FI_EXTRA_ATTR);
F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE;
}
@@ -95,7 +93,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
if (f2fs_may_inline_dentry(inode))
set_inode_flag(inode, FI_INLINE_DENTRY);
- if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
+ if (f2fs_sb_has_flexible_inline_xattr(sbi)) {
f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode));
if (f2fs_has_inline_xattr(inode))
xattr_size = F2FS_OPTION(sbi).inline_xattr_size;
@@ -116,11 +114,13 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED);
if (S_ISDIR(inode->i_mode))
- F2FS_I(inode)->i_flags |= FS_INDEX_FL;
+ F2FS_I(inode)->i_flags |= F2FS_INDEX_FL;
- if (F2FS_I(inode)->i_flags & FS_PROJINHERIT_FL)
+ if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
+ f2fs_set_inode_flags(inode);
+
trace_f2fs_new_inode(inode, 0);
return inode;
@@ -143,7 +143,7 @@ fail_drop:
return ERR_PTR(err);
}
-static int is_extension_exist(const unsigned char *s, const char *sub)
+static inline int is_extension_exist(const unsigned char *s, const char *sub)
{
size_t slen = strlen(s);
size_t sublen = strlen(sub);
@@ -181,19 +181,22 @@ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *
hot_count = sbi->raw_super->hot_ext_count;
for (i = 0; i < cold_count + hot_count; i++) {
- if (!is_extension_exist(name, extlist[i]))
- continue;
- if (i < cold_count)
- file_set_cold(inode);
- else
- file_set_hot(inode);
- break;
+ if (is_extension_exist(name, extlist[i]))
+ break;
}
up_read(&sbi->sb_lock);
+
+ if (i == cold_count + hot_count)
+ return;
+
+ if (i < cold_count)
+ file_set_cold(inode);
+ else
+ file_set_hot(inode);
}
-int update_extension_list(struct f2fs_sb_info *sbi, const char *name,
+int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
bool hot, bool set)
{
__u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list;
@@ -243,7 +246,7 @@ int update_extension_list(struct f2fs_sb_info *sbi, const char *name,
return -EINVAL;
if (hot) {
- strncpy(extlist[count], name, strlen(name));
+ memcpy(extlist[count], name, strlen(name));
sbi->raw_super->hot_ext_count = hot_count + 1;
} else {
char buf[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN];
@@ -251,7 +254,7 @@ int update_extension_list(struct f2fs_sb_info *sbi, const char *name,
memcpy(buf, &extlist[cold_count],
F2FS_EXTENSION_LEN * hot_count);
memset(extlist[cold_count], 0, F2FS_EXTENSION_LEN);
- strncpy(extlist[cold_count], name, strlen(name));
+ memcpy(extlist[cold_count], name, strlen(name));
memcpy(&extlist[cold_count + 1], buf,
F2FS_EXTENSION_LEN * hot_count);
sbi->raw_super->extension_count = cpu_to_le32(cold_count + 1);
@@ -269,6 +272,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
+ err = f2fs_is_checkpoint_ready(sbi);
+ if (err)
+ return err;
err = dquot_initialize(dir);
if (err)
@@ -292,9 +298,10 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
goto out;
f2fs_unlock_op(sbi);
- alloc_nid_done(sbi, ino);
+ f2fs_alloc_nid_done(sbi, ino);
- d_instantiate_new(dentry, inode);
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
@@ -302,7 +309,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
f2fs_balance_fs(sbi, true);
return 0;
out:
- handle_failed_inode(inode);
+ f2fs_handle_failed_inode(inode);
return err;
}
@@ -315,6 +322,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
+ err = f2fs_is_checkpoint_ready(sbi);
+ if (err)
+ return err;
err = fscrypt_prepare_link(old_dentry, dir, dentry);
if (err)
@@ -376,9 +386,8 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
int err = 0;
if (f2fs_readonly(sbi->sb)) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "skip recovering inline_dots inode (ino:%lu, pino:%u) "
- "in readonly mountpoint", dir->i_ino, pino);
+ f2fs_info(sbi, "skip recovering inline_dots inode (ino:%lu, pino:%u) in readonly mountpoint",
+ dir->i_ino, pino);
return 0;
}
@@ -397,7 +406,7 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
err = PTR_ERR(page);
goto out;
} else {
- err = __f2fs_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR);
+ err = f2fs_do_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR);
if (err)
goto out;
}
@@ -408,7 +417,7 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
else if (IS_ERR(page))
err = PTR_ERR(page);
else
- err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR);
+ err = f2fs_do_add_link(dir, &dotdot, NULL, pino, S_IFDIR);
out:
if (!err)
clear_inode_flag(dir, FI_INLINE_DOTS);
@@ -471,9 +480,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
if (f2fs_encrypted_inode(dir) &&
(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
!fscrypt_has_permitted_context(dir, inode)) {
- f2fs_msg(inode->i_sb, KERN_WARNING,
- "Inconsistent encryption contexts: %lu/%lu",
- dir->i_ino, inode->i_ino);
+ f2fs_warn(F2FS_I_SB(inode), "Inconsistent encryption contexts: %lu/%lu",
+ dir->i_ino, inode->i_ino);
err = -EPERM;
goto out_iput;
}
@@ -520,7 +528,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
- err = acquire_orphan_inode(sbi);
+ err = f2fs_acquire_orphan_inode(sbi);
if (err) {
f2fs_unlock_op(sbi);
f2fs_put_page(page, 0);
@@ -558,6 +566,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
+ err = f2fs_is_checkpoint_ready(sbi);
+ if (err)
+ return err;
err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
&disk_link);
@@ -582,9 +593,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
- goto out_handle_failed_inode;
+ goto out_f2fs_handle_failed_inode;
f2fs_unlock_op(sbi);
- alloc_nid_done(sbi, inode->i_ino);
+ f2fs_alloc_nid_done(sbi, inode->i_ino);
err = fscrypt_encrypt_symlink(inode, symname, len, &disk_link);
if (err)
@@ -593,7 +604,8 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
err = page_symlink(inode, disk_link.name, disk_link.len);
err_out:
- d_instantiate_new(dentry, inode);
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
/*
* Let's flush symlink data in order to avoid broken symlink as much as
@@ -617,11 +629,11 @@ err_out:
f2fs_balance_fs(sbi, true);
goto out_free_encrypted_link;
-out_handle_failed_inode:
- handle_failed_inode(inode);
+out_f2fs_handle_failed_inode:
+ f2fs_handle_failed_inode(inode);
out_free_encrypted_link:
if (disk_link.name != (unsigned char *)symname)
- kfree(disk_link.name);
+ kvfree(disk_link.name);
return err;
}
@@ -654,9 +666,10 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out_fail;
f2fs_unlock_op(sbi);
- alloc_nid_done(sbi, inode->i_ino);
+ f2fs_alloc_nid_done(sbi, inode->i_ino);
- d_instantiate_new(dentry, inode);
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
@@ -666,7 +679,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
out_fail:
clear_inode_flag(inode, FI_INC_LINK);
- handle_failed_inode(inode);
+ f2fs_handle_failed_inode(inode);
return err;
}
@@ -687,6 +700,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
+ err = f2fs_is_checkpoint_ready(sbi);
+ if (err)
+ return err;
err = dquot_initialize(dir);
if (err)
@@ -705,9 +721,10 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
goto out;
f2fs_unlock_op(sbi);
- alloc_nid_done(sbi, inode->i_ino);
+ f2fs_alloc_nid_done(sbi, inode->i_ino);
- d_instantiate_new(dentry, inode);
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
@@ -715,7 +732,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
f2fs_balance_fs(sbi, true);
return 0;
out:
- handle_failed_inode(inode);
+ f2fs_handle_failed_inode(inode);
return err;
}
@@ -744,7 +761,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
}
f2fs_lock_op(sbi);
- err = acquire_orphan_inode(sbi);
+ err = f2fs_acquire_orphan_inode(sbi);
if (err)
goto out;
@@ -756,8 +773,8 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
* add this non-linked tmpfile to orphan list, in this way we could
* remove all unused data of tmpfile after abnormal power-off.
*/
- add_orphan_inode(inode);
- alloc_nid_done(sbi, inode->i_ino);
+ f2fs_add_orphan_inode(inode);
+ f2fs_alloc_nid_done(sbi, inode->i_ino);
if (whiteout) {
f2fs_i_links_write(inode, false);
@@ -773,9 +790,9 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
return 0;
release_out:
- release_orphan_inode(sbi);
+ f2fs_release_orphan_inode(sbi);
out:
- handle_failed_inode(inode);
+ f2fs_handle_failed_inode(inode);
return err;
}
@@ -817,10 +834,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct f2fs_dir_entry *old_entry;
struct f2fs_dir_entry *new_entry;
bool is_old_inline = f2fs_has_inline_dentry(old_dir);
- int err = -ENOENT;
+ int err;
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
+ err = f2fs_is_checkpoint_ready(sbi);
+ if (err)
+ return err;
if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
(!projid_eq(F2FS_I(new_dir)->i_projid,
@@ -841,6 +861,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out;
}
+ err = -ENOENT;
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry) {
if (IS_ERR(old_page))
@@ -882,7 +903,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_lock_op(sbi);
- err = acquire_orphan_inode(sbi);
+ err = f2fs_acquire_orphan_inode(sbi);
if (err)
goto put_out_dir;
@@ -896,9 +917,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
up_write(&F2FS_I(new_inode)->i_sem);
if (!new_inode->i_nlink)
- add_orphan_inode(new_inode);
+ f2fs_add_orphan_inode(new_inode);
else
- release_orphan_inode(sbi);
+ f2fs_release_orphan_inode(sbi);
} else {
f2fs_balance_fs(sbi, true);
@@ -966,13 +987,19 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_put_page(old_dir_page, 0);
f2fs_i_links_write(old_dir, false);
}
- if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
- add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
+ if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) {
+ f2fs_add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
+ if (S_ISDIR(old_inode->i_mode))
+ f2fs_add_ino_entry(sbi, old_inode->i_ino,
+ TRANS_DIR_INO);
+ }
f2fs_unlock_op(sbi);
if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
f2fs_sync_fs(sbi->sb, 1);
+
+ f2fs_update_time(sbi, REQ_TIME);
return 0;
put_out_dir:
@@ -1002,10 +1029,13 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL;
struct f2fs_dir_entry *old_entry, *new_entry;
int old_nlink = 0, new_nlink = 0;
- int err = -ENOENT;
+ int err;
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
+ err = f2fs_is_checkpoint_ready(sbi);
+ if (err)
+ return err;
if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
!projid_eq(F2FS_I(new_dir)->i_projid,
@@ -1023,6 +1053,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
if (err)
goto out;
+ err = -ENOENT;
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry) {
if (IS_ERR(old_page))
@@ -1118,14 +1149,16 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_mark_inode_dirty_sync(new_dir, false);
if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) {
- add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO);
- add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
+ f2fs_add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO);
+ f2fs_add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
}
f2fs_unlock_op(sbi);
if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
f2fs_sync_fs(sbi->sb, 1);
+
+ f2fs_update_time(sbi, REQ_TIME);
return 0;
out_new_dir:
if (new_dir_entry) {
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 50592991b6cc..508e684c8dbd 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/node.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
@@ -23,13 +20,28 @@
#include "trace.h"
#include <trace/events/f2fs.h>
-#define on_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock)
+#define on_f2fs_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock)
static struct kmem_cache *nat_entry_slab;
static struct kmem_cache *free_nid_slab;
static struct kmem_cache *nat_entry_set_slab;
+static struct kmem_cache *fsync_node_entry_slab;
+
+/*
+ * Check whether the given nid is within node id range.
+ */
+int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
+{
+ if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: out-of-range nid=%x, run fsck to fix.",
+ __func__, nid);
+ return -EFSCORRUPTED;
+ }
+ return 0;
+}
-bool available_free_memory(struct f2fs_sb_info *sbi, int type)
+bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct sysinfo val;
@@ -87,44 +99,35 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
static void clear_node_page_dirty(struct page *page)
{
- struct address_space *mapping = page->mapping;
- unsigned int long flags;
-
if (PageDirty(page)) {
- spin_lock_irqsave(&mapping->tree_lock, flags);
- radix_tree_tag_clear(&mapping->page_tree,
- page_index(page),
- PAGECACHE_TAG_DIRTY);
- spin_unlock_irqrestore(&mapping->tree_lock, flags);
-
+ f2fs_clear_radix_tree_dirty_tag(page);
clear_page_dirty_for_io(page);
- dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
+ dec_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
}
ClearPageUptodate(page);
}
static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
{
- pgoff_t index = current_nat_addr(sbi, nid);
- return get_meta_page(sbi, index);
+ return f2fs_get_meta_page_nofail(sbi, current_nat_addr(sbi, nid));
}
static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
{
struct page *src_page;
struct page *dst_page;
- pgoff_t src_off;
pgoff_t dst_off;
void *src_addr;
void *dst_addr;
struct f2fs_nm_info *nm_i = NM_I(sbi);
- src_off = current_nat_addr(sbi, nid);
- dst_off = next_nat_addr(sbi, src_off);
+ dst_off = next_nat_addr(sbi, current_nat_addr(sbi, nid));
/* get current nat block page with lock */
- src_page = get_meta_page(sbi, src_off);
- dst_page = grab_meta_page(sbi, dst_off);
+ src_page = get_current_nat_page(sbi, nid);
+ if (IS_ERR(src_page))
+ return src_page;
+ dst_page = f2fs_grab_meta_page(sbi, dst_off);
f2fs_bug_on(sbi, PageDirty(src_page));
src_addr = page_address(src_page);
@@ -169,14 +172,30 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
if (raw_ne)
node_info_from_raw_nat(&ne->ni, raw_ne);
+
+ spin_lock(&nm_i->nat_list_lock);
list_add_tail(&ne->list, &nm_i->nat_entries);
+ spin_unlock(&nm_i->nat_list_lock);
+
nm_i->nat_cnt++;
return ne;
}
static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
{
- return radix_tree_lookup(&nm_i->nat_root, n);
+ struct nat_entry *ne;
+
+ ne = radix_tree_lookup(&nm_i->nat_root, n);
+
+ /* for recent accessed nat entry, move it to tail of lru list */
+ if (ne && !get_nat_flag(ne, IS_DIRTY)) {
+ spin_lock(&nm_i->nat_list_lock);
+ if (!list_empty(&ne->list))
+ list_move_tail(&ne->list, &nm_i->nat_entries);
+ spin_unlock(&nm_i->nat_list_lock);
+ }
+
+ return ne;
}
static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
@@ -187,7 +206,6 @@ static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
{
- list_del(&e->list);
radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
nm_i->nat_cnt--;
__free_nat_entry(e);
@@ -238,16 +256,21 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
nm_i->dirty_nat_cnt++;
set_nat_flag(ne, IS_DIRTY, true);
refresh_list:
+ spin_lock(&nm_i->nat_list_lock);
if (new_ne)
list_del_init(&ne->list);
else
list_move_tail(&ne->list, &head->entry_list);
+ spin_unlock(&nm_i->nat_list_lock);
}
static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
struct nat_entry_set *set, struct nat_entry *ne)
{
+ spin_lock(&nm_i->nat_list_lock);
list_move_tail(&ne->list, &nm_i->nat_entries);
+ spin_unlock(&nm_i->nat_list_lock);
+
set_nat_flag(ne, IS_DIRTY, false);
set->entry_cnt--;
nm_i->dirty_nat_cnt--;
@@ -260,7 +283,73 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
start, nr);
}
-int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
+bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page)
+{
+ return NODE_MAPPING(sbi) == page->mapping &&
+ IS_DNODE(page) && is_cold_node(page);
+}
+
+void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi)
+{
+ spin_lock_init(&sbi->fsync_node_lock);
+ INIT_LIST_HEAD(&sbi->fsync_node_list);
+ sbi->fsync_seg_id = 0;
+ sbi->fsync_node_num = 0;
+}
+
+static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi,
+ struct page *page)
+{
+ struct fsync_node_entry *fn;
+ unsigned long flags;
+ unsigned int seq_id;
+
+ fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab, GFP_NOFS);
+
+ get_page(page);
+ fn->page = page;
+ INIT_LIST_HEAD(&fn->list);
+
+ spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+ list_add_tail(&fn->list, &sbi->fsync_node_list);
+ fn->seq_id = sbi->fsync_seg_id++;
+ seq_id = fn->seq_id;
+ sbi->fsync_node_num++;
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+
+ return seq_id;
+}
+
+void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page)
+{
+ struct fsync_node_entry *fn;
+ unsigned long flags;
+
+ spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+ list_for_each_entry(fn, &sbi->fsync_node_list, list) {
+ if (fn->page == page) {
+ list_del(&fn->list);
+ sbi->fsync_node_num--;
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+ kmem_cache_free(fsync_node_entry_slab, fn);
+ put_page(page);
+ return;
+ }
+ }
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+ f2fs_bug_on(sbi, 1);
+}
+
+void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+ sbi->fsync_seg_id = 0;
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+}
+
+int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
@@ -277,7 +366,7 @@ int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
return need;
}
-bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
+bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
@@ -291,7 +380,7 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
return is_cp;
}
-bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
+bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
@@ -364,8 +453,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
new_blkaddr == NULL_ADDR);
f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
new_blkaddr == NEW_ADDR);
- f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR &&
- nat_get_blkaddr(e) != NULL_ADDR &&
+ f2fs_bug_on(sbi, __is_valid_data_blkaddr(nat_get_blkaddr(e)) &&
new_blkaddr == NEW_ADDR);
/* increment version no as node is removed */
@@ -376,7 +464,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
/* change address */
nat_set_blkaddr(e, new_blkaddr);
- if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
+ if (!__is_valid_data_blkaddr(new_blkaddr))
set_nat_flag(e, IS_CHECKPOINTED, false);
__set_nat_cache_dirty(nm_i, e);
@@ -391,7 +479,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
up_write(&nm_i->nat_tree_lock);
}
-int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
+int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
int nr = nr_shrink;
@@ -399,13 +487,25 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
if (!down_write_trylock(&nm_i->nat_tree_lock))
return 0;
- while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
+ spin_lock(&nm_i->nat_list_lock);
+ while (nr_shrink) {
struct nat_entry *ne;
+
+ if (list_empty(&nm_i->nat_entries))
+ break;
+
ne = list_first_entry(&nm_i->nat_entries,
struct nat_entry, list);
+ list_del(&ne->list);
+ spin_unlock(&nm_i->nat_list_lock);
+
__del_from_nat_cache(nm_i, ne);
nr_shrink--;
+
+ spin_lock(&nm_i->nat_list_lock);
}
+ spin_unlock(&nm_i->nat_list_lock);
+
up_write(&nm_i->nat_tree_lock);
return nr - nr_shrink;
}
@@ -413,7 +513,8 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
/*
* This function always returns success
*/
-void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
+int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
+ struct node_info *ni)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -424,6 +525,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
struct f2fs_nat_entry ne;
struct nat_entry *e;
pgoff_t index;
+ block_t blkaddr;
int i;
ni->nid = nid;
@@ -436,14 +538,14 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
ni->blk_addr = nat_get_blkaddr(e);
ni->version = nat_get_version(e);
up_read(&nm_i->nat_tree_lock);
- return;
+ return 0;
}
memset(&ne, 0, sizeof(struct f2fs_nat_entry));
/* Check current segment summary */
down_read(&curseg->journal_rwsem);
- i = lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
+ i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
if (i >= 0) {
ne = nat_in_journal(journal, i);
node_info_from_raw_nat(ni, &ne);
@@ -458,20 +560,29 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
index = current_nat_addr(sbi, nid);
up_read(&nm_i->nat_tree_lock);
- page = get_meta_page(sbi, index);
+ page = f2fs_get_meta_page(sbi, index);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
nat_blk = (struct f2fs_nat_block *)page_address(page);
ne = nat_blk->entries[nid - start_nid];
node_info_from_raw_nat(ni, &ne);
f2fs_put_page(page, 1);
cache:
+ blkaddr = le32_to_cpu(ne.block_addr);
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE))
+ return -EFAULT;
+
/* cache nat entry */
cache_nat_entry(sbi, nid, &ne);
+ return 0;
}
/*
* readahead MAX_RA_NODE number of node pages.
*/
-static void ra_node_pages(struct page *parent, int start, int n)
+static void f2fs_ra_node_pages(struct page *parent, int start, int n)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
struct blk_plug plug;
@@ -485,18 +596,18 @@ static void ra_node_pages(struct page *parent, int start, int n)
end = min(end, NIDS_PER_BLOCK);
for (i = start; i < end; i++) {
nid = get_nid(parent, i, false);
- ra_node_page(sbi, nid);
+ f2fs_ra_node_page(sbi, nid);
}
blk_finish_plug(&plug);
}
-pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
+pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
{
const long direct_index = ADDRS_PER_INODE(dn->inode);
- const long direct_blks = ADDRS_PER_BLOCK;
- const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
- unsigned int skipped_unit = ADDRS_PER_BLOCK;
+ const long direct_blks = ADDRS_PER_BLOCK(dn->inode);
+ const long indirect_blks = ADDRS_PER_BLOCK(dn->inode) * NIDS_PER_BLOCK;
+ unsigned int skipped_unit = ADDRS_PER_BLOCK(dn->inode);
int cur_level = dn->cur_level;
int max_level = dn->max_level;
pgoff_t base = 0;
@@ -530,9 +641,9 @@ static int get_node_path(struct inode *inode, long block,
int offset[4], unsigned int noffset[4])
{
const long direct_index = ADDRS_PER_INODE(inode);
- const long direct_blks = ADDRS_PER_BLOCK;
+ const long direct_blks = ADDRS_PER_BLOCK(inode);
const long dptrs_per_blk = NIDS_PER_BLOCK;
- const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
+ const long indirect_blks = ADDRS_PER_BLOCK(inode) * NIDS_PER_BLOCK;
const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
int n = 0;
int level = 0;
@@ -606,7 +717,7 @@ got:
* f2fs_unlock_op() only if ro is not set RDONLY_NODE.
* In the case of RDONLY_NODE, we don't need to care about mutex.
*/
-int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
+int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct page *npage[4];
@@ -625,7 +736,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
npage[0] = dn->inode_page;
if (!npage[0]) {
- npage[0] = get_node_page(sbi, nids[0]);
+ npage[0] = f2fs_get_node_page(sbi, nids[0]);
if (IS_ERR(npage[0]))
return PTR_ERR(npage[0]);
}
@@ -649,24 +760,24 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
if (!nids[i] && mode == ALLOC_NODE) {
/* alloc new node */
- if (!alloc_nid(sbi, &(nids[i]))) {
+ if (!f2fs_alloc_nid(sbi, &(nids[i]))) {
err = -ENOSPC;
goto release_pages;
}
dn->nid = nids[i];
- npage[i] = new_node_page(dn, noffset[i]);
+ npage[i] = f2fs_new_node_page(dn, noffset[i]);
if (IS_ERR(npage[i])) {
- alloc_nid_failed(sbi, nids[i]);
+ f2fs_alloc_nid_failed(sbi, nids[i]);
err = PTR_ERR(npage[i]);
goto release_pages;
}
set_nid(parent, offset[i - 1], nids[i], i == 1);
- alloc_nid_done(sbi, nids[i]);
+ f2fs_alloc_nid_done(sbi, nids[i]);
done = true;
} else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
- npage[i] = get_node_page_ra(parent, offset[i - 1]);
+ npage[i] = f2fs_get_node_page_ra(parent, offset[i - 1]);
if (IS_ERR(npage[i])) {
err = PTR_ERR(npage[i]);
goto release_pages;
@@ -681,7 +792,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
}
if (!done) {
- npage[i] = get_node_page(sbi, nids[i]);
+ npage[i] = f2fs_get_node_page(sbi, nids[i]);
if (IS_ERR(npage[i])) {
err = PTR_ERR(npage[i]);
f2fs_put_page(npage[0], 0);
@@ -715,21 +826,24 @@ release_out:
return err;
}
-static void truncate_node(struct dnode_of_data *dn)
+static int truncate_node(struct dnode_of_data *dn)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct node_info ni;
+ int err;
pgoff_t index;
- get_node_info(sbi, dn->nid, &ni);
+ err = f2fs_get_node_info(sbi, dn->nid, &ni);
+ if (err)
+ return err;
/* Deallocate node address */
- invalidate_blocks(sbi, ni.blk_addr);
+ f2fs_invalidate_blocks(sbi, ni.blk_addr);
dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino);
set_node_addr(sbi, &ni, NULL_ADDR, false);
if (dn->nid == dn->inode->i_ino) {
- remove_orphan_inode(sbi, dn->nid);
+ f2fs_remove_orphan_inode(sbi, dn->nid);
dec_valid_inode_count(sbi);
f2fs_inode_synced(dn->inode);
}
@@ -745,17 +859,20 @@ static void truncate_node(struct dnode_of_data *dn)
dn->node_page = NULL;
trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
+
+ return 0;
}
static int truncate_dnode(struct dnode_of_data *dn)
{
struct page *page;
+ int err;
if (dn->nid == 0)
return 1;
/* get direct node */
- page = get_node_page(F2FS_I_SB(dn->inode), dn->nid);
+ page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid);
if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
return 1;
else if (IS_ERR(page))
@@ -764,8 +881,11 @@ static int truncate_dnode(struct dnode_of_data *dn)
/* Make dnode_of_data for parameter */
dn->node_page = page;
dn->ofs_in_node = 0;
- truncate_data_blocks(dn);
- truncate_node(dn);
+ f2fs_truncate_data_blocks(dn);
+ err = truncate_node(dn);
+ if (err)
+ return err;
+
return 1;
}
@@ -785,13 +905,13 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
- page = get_node_page(F2FS_I_SB(dn->inode), dn->nid);
+ page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid);
if (IS_ERR(page)) {
trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page));
return PTR_ERR(page);
}
- ra_node_pages(page, ofs, NIDS_PER_BLOCK);
+ f2fs_ra_node_pages(page, ofs, NIDS_PER_BLOCK);
rn = F2FS_NODE(page);
if (depth < 3) {
@@ -830,7 +950,9 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
if (!ofs) {
/* remove current indirect node */
dn->node_page = page;
- truncate_node(dn);
+ ret = truncate_node(dn);
+ if (ret)
+ goto out_err;
freed++;
} else {
f2fs_put_page(page, 1);
@@ -861,7 +983,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
/* get indirect nodes in the path */
for (i = 0; i < idx + 1; i++) {
/* reference count'll be increased */
- pages[i] = get_node_page(F2FS_I_SB(dn->inode), nid[i]);
+ pages[i] = f2fs_get_node_page(F2FS_I_SB(dn->inode), nid[i]);
if (IS_ERR(pages[i])) {
err = PTR_ERR(pages[i]);
idx = i - 1;
@@ -870,7 +992,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
}
- ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK);
+ f2fs_ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK);
/* free direct nodes linked to a partial indirect node */
for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
@@ -888,7 +1010,9 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
if (offset[idx + 1] == 0) {
dn->node_page = pages[idx];
dn->nid = nid[idx];
- truncate_node(dn);
+ err = truncate_node(dn);
+ if (err)
+ goto fail;
} else {
f2fs_put_page(pages[idx], 1);
}
@@ -907,7 +1031,7 @@ fail:
/*
* All the block addresses of data and nodes should be nullified.
*/
-int truncate_inode_blocks(struct inode *inode, pgoff_t from)
+int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int err = 0, cont = 1;
@@ -923,7 +1047,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
if (level < 0)
return level;
- page = get_node_page(sbi, inode->i_ino);
+ page = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(page)) {
trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page));
return PTR_ERR(page);
@@ -987,7 +1111,7 @@ skip_partial:
ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
lock_page(page);
BUG_ON(page->mapping != NODE_MAPPING(sbi));
- f2fs_wait_on_page_writeback(page, NODE, true);
+ f2fs_wait_on_page_writeback(page, NODE, true, true);
ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
set_page_dirty(page);
unlock_page(page);
@@ -1003,24 +1127,30 @@ fail:
}
/* caller must lock inode page */
-int truncate_xattr_node(struct inode *inode)
+int f2fs_truncate_xattr_node(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
nid_t nid = F2FS_I(inode)->i_xattr_nid;
struct dnode_of_data dn;
struct page *npage;
+ int err;
if (!nid)
return 0;
- npage = get_node_page(sbi, nid);
+ npage = f2fs_get_node_page(sbi, nid);
if (IS_ERR(npage))
return PTR_ERR(npage);
+ set_new_dnode(&dn, inode, NULL, npage, nid);
+ err = truncate_node(&dn);
+ if (err) {
+ f2fs_put_page(npage, 1);
+ return err;
+ }
+
f2fs_i_xnid_write(inode, 0);
- set_new_dnode(&dn, inode, NULL, npage, nid);
- truncate_node(&dn);
return 0;
}
@@ -1028,17 +1158,17 @@ int truncate_xattr_node(struct inode *inode)
* Caller should grab and release a rwsem by calling f2fs_lock_op() and
* f2fs_unlock_op().
*/
-int remove_inode_page(struct inode *inode)
+int f2fs_remove_inode_page(struct inode *inode)
{
struct dnode_of_data dn;
int err;
set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
- err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
+ err = f2fs_get_dnode_of_data(&dn, 0, LOOKUP_NODE);
if (err)
return err;
- err = truncate_xattr_node(inode);
+ err = f2fs_truncate_xattr_node(inode);
if (err) {
f2fs_put_dnode(&dn);
return err;
@@ -1047,18 +1177,30 @@ int remove_inode_page(struct inode *inode)
/* remove potential inline_data blocks */
if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode))
- truncate_data_blocks_range(&dn, 1);
+ f2fs_truncate_data_blocks_range(&dn, 1);
/* 0 is possible, after f2fs_new_inode() has failed */
- f2fs_bug_on(F2FS_I_SB(inode),
- inode->i_blocks != 0 && inode->i_blocks != 8);
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
+ f2fs_put_dnode(&dn);
+ return -EIO;
+ }
+
+ if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) {
+ f2fs_warn(F2FS_I_SB(inode), "Inconsistent i_blocks, ino:%lu, iblocks:%llu",
+ inode->i_ino, (unsigned long long)inode->i_blocks);
+ set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
+ }
/* will put inode & node pages */
- truncate_node(&dn);
+ err = truncate_node(&dn);
+ if (err) {
+ f2fs_put_dnode(&dn);
+ return err;
+ }
return 0;
}
-struct page *new_inode_page(struct inode *inode)
+struct page *f2fs_new_inode_page(struct inode *inode)
{
struct dnode_of_data dn;
@@ -1066,10 +1208,10 @@ struct page *new_inode_page(struct inode *inode)
set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
/* caller should f2fs_put_page(page, 1); */
- return new_node_page(&dn, 0);
+ return f2fs_new_node_page(&dn, 0);
}
-struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs)
+struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct node_info new_ni;
@@ -1087,7 +1229,11 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs)
goto fail;
#ifdef CONFIG_F2FS_CHECK_FS
- get_node_info(sbi, dn->nid, &new_ni);
+ err = f2fs_get_node_info(sbi, dn->nid, &new_ni);
+ if (err) {
+ dec_valid_node_count(sbi, dn->inode, !ofs);
+ goto fail;
+ }
f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
#endif
new_ni.nid = dn->nid;
@@ -1097,7 +1243,7 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs)
new_ni.version = 0;
set_node_addr(sbi, &new_ni, NEW_ADDR, false);
- f2fs_wait_on_page_writeback(page, NODE, true);
+ f2fs_wait_on_page_writeback(page, NODE, true, true);
fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
set_cold_node(page, S_ISDIR(dn->inode->i_mode));
if (!PageUptodate(page))
@@ -1135,13 +1281,22 @@ static int read_node_page(struct page *page, int op_flags)
.page = page,
.encrypted_page = NULL,
};
+ int err;
- if (PageUptodate(page))
+ if (PageUptodate(page)) {
+ if (!f2fs_inode_chksum_verify(sbi, page)) {
+ ClearPageUptodate(page);
+ return -EFSBADCRC;
+ }
return LOCKED_PAGE;
+ }
- get_node_info(sbi, page->index, &ni);
+ err = f2fs_get_node_info(sbi, page->index, &ni);
+ if (err)
+ return err;
- if (unlikely(ni.blk_addr == NULL_ADDR)) {
+ if (unlikely(ni.blk_addr == NULL_ADDR) ||
+ is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) {
ClearPageUptodate(page);
return -ENOENT;
}
@@ -1153,14 +1308,15 @@ static int read_node_page(struct page *page, int op_flags)
/*
* Readahead a node page
*/
-void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
+void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
{
struct page *apage;
int err;
if (!nid)
return;
- f2fs_bug_on(sbi, check_nid_range(sbi, nid));
+ if (f2fs_check_nid_range(sbi, nid))
+ return;
rcu_read_lock();
apage = radix_tree_lookup(&NODE_MAPPING(sbi)->page_tree, nid);
@@ -1184,7 +1340,8 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
if (!nid)
return ERR_PTR(-ENOENT);
- f2fs_bug_on(sbi, check_nid_range(sbi, nid));
+ if (f2fs_check_nid_range(sbi, nid))
+ return ERR_PTR(-EINVAL);
repeat:
page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
if (!page)
@@ -1200,7 +1357,7 @@ repeat:
}
if (parent)
- ra_node_pages(parent, start + 1, MAX_RA_NODE);
+ f2fs_ra_node_pages(parent, start + 1, MAX_RA_NODE);
lock_page(page);
@@ -1215,16 +1372,15 @@ repeat:
}
if (!f2fs_inode_chksum_verify(sbi, page)) {
- err = -EBADMSG;
+ err = -EFSBADCRC;
goto out_err;
}
page_hit:
if(unlikely(nid != nid_of_node(page))) {
- f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, "
- "nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
- nid, nid_of_node(page), ino_of_node(page),
- ofs_of_node(page), cpver_of_node(page),
- next_blkaddr_of_node(page));
+ f2fs_warn(sbi, "inconsistent node block, nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
+ nid, nid_of_node(page), ino_of_node(page),
+ ofs_of_node(page), cpver_of_node(page),
+ next_blkaddr_of_node(page));
err = -EINVAL;
out_err:
ClearPageUptodate(page);
@@ -1234,12 +1390,12 @@ out_err:
return page;
}
-struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
+struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
{
return __get_node_page(sbi, nid, NULL, 0);
}
-struct page *get_node_page_ra(struct page *parent, int start)
+struct page *f2fs_get_node_page_ra(struct page *parent, int start)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
nid_t nid = get_nid(parent, start, false);
@@ -1274,7 +1430,7 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
ret = f2fs_write_inline_data(inode, page);
inode_dec_dirty_pages(inode);
- remove_dirty_inode(inode);
+ f2fs_remove_dirty_inode(inode);
if (ret)
set_page_dirty(page);
page_out:
@@ -1285,21 +1441,17 @@ iput_out:
static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
{
- pgoff_t index, end;
+ pgoff_t index;
struct pagevec pvec;
struct page *last_page = NULL;
+ int nr_pages;
pagevec_init(&pvec, 0);
index = 0;
- end = ULONG_MAX;
-
- while (index <= end) {
- int i, nr_pages;
- nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
- PAGECACHE_TAG_DIRTY,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
- if (nr_pages == 0)
- break;
+
+ while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
+ PAGECACHE_TAG_DIRTY))) {
+ int i;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
@@ -1345,7 +1497,7 @@ continue_unlock:
static int __write_node_page(struct page *page, bool atomic, bool *submitted,
struct writeback_control *wbc, bool do_balance,
- enum iostat_type io_type)
+ enum iostat_type io_type, unsigned int *seq_id)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
nid_t nid;
@@ -1362,22 +1514,27 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
.io_type = io_type,
.io_wbc = wbc,
};
+ unsigned int seq;
trace_f2fs_writepage(page, NODE);
- if (unlikely(f2fs_cp_error(sbi))) {
- dec_page_count(sbi, F2FS_DIRTY_NODES);
- unlock_page(page);
- return 0;
- }
+ if (unlikely(f2fs_cp_error(sbi)))
+ goto redirty_out;
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
+ if (wbc->sync_mode == WB_SYNC_NONE &&
+ IS_DNODE(page) && is_cold_node(page))
+ goto redirty_out;
+
/* get old block addr of this node page */
nid = nid_of_node(page);
f2fs_bug_on(sbi, page->index != nid);
+ if (f2fs_get_node_info(sbi, nid, &ni))
+ goto redirty_out;
+
if (wbc->for_reclaim) {
if (!down_read_trylock(&sbi->node_write))
goto redirty_out;
@@ -1385,8 +1542,6 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
down_read(&sbi->node_write);
}
- get_node_info(sbi, nid, &ni);
-
/* This page is already truncated */
if (unlikely(ni.blk_addr == NULL_ADDR)) {
ClearPageUptodate(page);
@@ -1396,20 +1551,33 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
return 0;
}
+ if (__is_valid_data_blkaddr(ni.blk_addr) &&
+ !f2fs_is_valid_blkaddr(sbi, ni.blk_addr,
+ DATA_GENERIC_ENHANCE)) {
+ up_read(&sbi->node_write);
+ goto redirty_out;
+ }
+
if (atomic && !test_opt(sbi, NOBARRIER))
fio.op_flags |= WRITE_FLUSH_FUA;
set_page_writeback(page);
ClearPageError(page);
+
+ if (f2fs_in_warm_node_list(sbi, page)) {
+ seq = f2fs_add_fsync_node_entry(sbi, page);
+ if (seq_id)
+ *seq_id = seq;
+ }
+
fio.old_blkaddr = ni.blk_addr;
- write_node_page(nid, &fio);
+ f2fs_do_write_node_page(nid, &fio);
set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
dec_page_count(sbi, F2FS_DIRTY_NODES);
up_read(&sbi->node_write);
if (wbc->for_reclaim) {
- f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0,
- page->index, NODE);
+ f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE);
submitted = NULL;
}
@@ -1431,8 +1599,10 @@ redirty_out:
return AOP_WRITEPAGE_ACTIVATE;
}
-void move_node_page(struct page *node_page, int gc_type)
+int f2fs_move_node_page(struct page *node_page, int gc_type)
{
+ int err = 0;
+
if (gc_type == FG_GC) {
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
@@ -1440,16 +1610,20 @@ void move_node_page(struct page *node_page, int gc_type)
.for_reclaim = 0,
};
+ f2fs_wait_on_page_writeback(node_page, NODE, true, true);
+
set_page_dirty(node_page);
- f2fs_wait_on_page_writeback(node_page, NODE, true);
- f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page));
- if (!clear_page_dirty_for_io(node_page))
+ if (!clear_page_dirty_for_io(node_page)) {
+ err = -EAGAIN;
goto out_page;
+ }
if (__write_node_page(node_page, false, NULL,
- &wbc, false, FS_GC_NODE_IO))
+ &wbc, false, FS_GC_NODE_IO, NULL)) {
+ err = -EAGAIN;
unlock_page(node_page);
+ }
goto release_page;
} else {
/* set page dirty and write it */
@@ -1460,24 +1634,28 @@ out_page:
unlock_page(node_page);
release_page:
f2fs_put_page(node_page, 0);
+ return err;
}
static int f2fs_write_node_page(struct page *page,
struct writeback_control *wbc)
{
- return __write_node_page(page, false, NULL, wbc, false, FS_NODE_IO);
+ return __write_node_page(page, false, NULL, wbc, false,
+ FS_NODE_IO, NULL);
}
-int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
- struct writeback_control *wbc, bool atomic)
+int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
+ struct writeback_control *wbc, bool atomic,
+ unsigned int *seq_id)
{
- pgoff_t index, end;
- pgoff_t last_idx = ULONG_MAX;
+ pgoff_t index;
struct pagevec pvec;
int ret = 0;
struct page *last_page = NULL;
bool marked = false;
nid_t ino = inode->i_ino;
+ int nr_pages;
+ int nwritten = 0;
if (atomic) {
last_page = last_fsync_dnode(sbi, ino);
@@ -1487,15 +1665,10 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
retry:
pagevec_init(&pvec, 0);
index = 0;
- end = ULONG_MAX;
-
- while (index <= end) {
- int i, nr_pages;
- nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
- PAGECACHE_TAG_DIRTY,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
- if (nr_pages == 0)
- break;
+
+ while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
+ PAGECACHE_TAG_DIRTY))) {
+ int i;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
@@ -1528,8 +1701,7 @@ continue_unlock:
goto continue_unlock;
}
- f2fs_wait_on_page_writeback(page, NODE, true);
- BUG_ON(PageWriteback(page));
+ f2fs_wait_on_page_writeback(page, NODE, true, true);
set_fsync_mark(page, 0);
set_dentry_mark(page, 0);
@@ -1539,9 +1711,9 @@ continue_unlock:
if (IS_INODE(page)) {
if (is_inode_flag_set(inode,
FI_DIRTY_INODE))
- update_inode(inode, page);
+ f2fs_update_inode(inode, page);
set_dentry_mark(page,
- need_dentry_mark(sbi, ino));
+ f2fs_need_dentry_mark(sbi, ino));
}
/* may be written by other thread */
if (!PageDirty(page))
@@ -1554,13 +1726,13 @@ continue_unlock:
ret = __write_node_page(page, atomic &&
page == last_page,
&submitted, wbc, true,
- FS_NODE_IO);
+ FS_NODE_IO, seq_id);
if (ret) {
unlock_page(page);
f2fs_put_page(last_page, 0);
break;
} else if (submitted) {
- last_idx = page->index;
+ nwritten++;
}
if (page == last_page) {
@@ -1576,48 +1748,51 @@ continue_unlock:
break;
}
if (!ret && atomic && !marked) {
- f2fs_msg(sbi->sb, KERN_DEBUG,
- "Retry to write fsync mark: ino=%u, idx=%lx",
- ino, last_page->index);
+ f2fs_debug(sbi, "Retry to write fsync mark: ino=%u, idx=%lx",
+ ino, last_page->index);
lock_page(last_page);
- f2fs_wait_on_page_writeback(last_page, NODE, true);
+ f2fs_wait_on_page_writeback(last_page, NODE, true, true);
set_page_dirty(last_page);
unlock_page(last_page);
goto retry;
}
out:
- if (last_idx != ULONG_MAX)
- f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE);
+ if (nwritten)
+ f2fs_submit_merged_write_cond(sbi, NULL, NULL, ino, NODE);
return ret ? -EIO: 0;
}
-int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc,
+int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
+ struct writeback_control *wbc,
bool do_balance, enum iostat_type io_type)
{
- pgoff_t index, end;
+ pgoff_t index;
struct pagevec pvec;
int step = 0;
int nwritten = 0;
int ret = 0;
+ int nr_pages, done = 0;
pagevec_init(&pvec, 0);
next_step:
index = 0;
- end = ULONG_MAX;
-
- while (index <= end) {
- int i, nr_pages;
- nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
- PAGECACHE_TAG_DIRTY,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
- if (nr_pages == 0)
- break;
+
+ while (!done && (nr_pages = pagevec_lookup_tag(&pvec,
+ NODE_MAPPING(sbi), &index, PAGECACHE_TAG_DIRTY))) {
+ int i;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
bool submitted = false;
+ /* give a priority to WB_SYNC threads */
+ if (atomic_read(&sbi->wb_sync_req[NODE]) &&
+ wbc->sync_mode == WB_SYNC_NONE) {
+ done = 1;
+ break;
+ }
+
/*
* flushing sequence with step:
* 0. indirect nodes
@@ -1633,7 +1808,9 @@ next_step:
!is_cold_node(page)))
continue;
lock_node:
- if (!trylock_page(page))
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ lock_page(page);
+ else if (!trylock_page(page))
continue;
if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
@@ -1655,9 +1832,8 @@ continue_unlock:
goto lock_node;
}
- f2fs_wait_on_page_writeback(page, NODE, true);
+ f2fs_wait_on_page_writeback(page, NODE, true, true);
- BUG_ON(PageWriteback(page));
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
@@ -1665,7 +1841,7 @@ continue_unlock:
set_dentry_mark(page, 0);
ret = __write_node_page(page, false, &submitted,
- wbc, do_balance, io_type);
+ wbc, do_balance, io_type, NULL);
if (ret)
unlock_page(page);
else if (submitted)
@@ -1684,10 +1860,12 @@ continue_unlock:
}
if (step < 2) {
+ if (wbc->sync_mode == WB_SYNC_NONE && step == 1)
+ goto out;
step++;
goto next_step;
}
-
+out:
if (nwritten)
f2fs_submit_merged_write(sbi, NODE);
@@ -1696,37 +1874,40 @@ continue_unlock:
return ret;
}
-int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
+int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
+ unsigned int seq_id)
{
- pgoff_t index = 0, end = ULONG_MAX;
- struct pagevec pvec;
+ struct fsync_node_entry *fn;
+ struct page *page;
+ struct list_head *head = &sbi->fsync_node_list;
+ unsigned long flags;
+ unsigned int cur_seq_id = 0;
int ret2 = 0, ret = 0;
- pagevec_init(&pvec, 0);
-
- while (index <= end) {
- int i, nr_pages;
- nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
- PAGECACHE_TAG_WRITEBACK,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
- if (nr_pages == 0)
+ while (seq_id && cur_seq_id < seq_id) {
+ spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+ if (list_empty(head)) {
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+ break;
+ }
+ fn = list_first_entry(head, struct fsync_node_entry, list);
+ if (fn->seq_id > seq_id) {
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
break;
+ }
+ cur_seq_id = fn->seq_id;
+ page = fn->page;
+ get_page(page);
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
+ f2fs_wait_on_page_writeback(page, NODE, true, false);
+ if (TestClearPageError(page))
+ ret = -EIO;
- /* until radix tree lookup accepts end_index */
- if (unlikely(page->index > end))
- continue;
+ put_page(page);
- if (ino && ino_of_node(page) == ino) {
- f2fs_wait_on_page_writeback(page, NODE, true);
- if (TestClearPageError(page))
- ret = -EIO;
- }
- }
- pagevec_release(&pvec);
- cond_resched();
+ if (ret)
+ break;
}
if (unlikely(test_and_clear_bit(AS_ENOSPC, &NODE_MAPPING(sbi)->flags)))
@@ -1735,6 +1916,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
ret2 = -EIO;
if (!ret)
ret = ret2;
+
return ret;
}
@@ -1752,17 +1934,26 @@ static int f2fs_write_node_pages(struct address_space *mapping,
f2fs_balance_fs_bg(sbi);
/* collect a number of dirty node pages and write together */
- if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
+ if (wbc->sync_mode != WB_SYNC_ALL &&
+ get_pages(sbi, F2FS_DIRTY_NODES) <
+ nr_pages_to_skip(sbi, NODE))
+ goto skip_write;
+
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ atomic_inc(&sbi->wb_sync_req[NODE]);
+ else if (atomic_read(&sbi->wb_sync_req[NODE]))
goto skip_write;
trace_f2fs_writepages(mapping->host, wbc, NODE);
diff = nr_pages_to_write(sbi, NODE, wbc);
- wbc->sync_mode = WB_SYNC_NONE;
blk_start_plug(&plug);
- sync_node_pages(sbi, wbc, true, FS_NODE_IO);
+ f2fs_sync_node_pages(sbi, wbc, true, FS_NODE_IO);
blk_finish_plug(&plug);
wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
+
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ atomic_dec(&sbi->wb_sync_req[NODE]);
return 0;
skip_write:
@@ -1777,10 +1968,14 @@ static int f2fs_set_node_page_dirty(struct page *page)
if (!PageUptodate(page))
SetPageUptodate(page);
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (IS_INODE(page))
+ f2fs_inode_chksum_set(F2FS_P_SB(page), page);
+#endif
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
- SetPagePrivate(page);
+ f2fs_set_page_private(page, 0);
f2fs_trace_pid(page);
return 1;
}
@@ -1895,6 +2090,9 @@ static bool add_free_nid(struct f2fs_sb_info *sbi,
if (unlikely(nid == 0))
return false;
+ if (unlikely(f2fs_check_nid_range(sbi, nid)))
+ return false;
+
i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
i->nid = nid;
i->state = FREE_NID;
@@ -1908,20 +2106,20 @@ static bool add_free_nid(struct f2fs_sb_info *sbi,
* Thread A Thread B
* - f2fs_create
* - f2fs_new_inode
- * - alloc_nid
+ * - f2fs_alloc_nid
* - __insert_nid_to_list(PREALLOC_NID)
* - f2fs_balance_fs_bg
- * - build_free_nids
- * - __build_free_nids
+ * - f2fs_build_free_nids
+ * - __f2fs_build_free_nids
* - scan_nat_page
* - add_free_nid
* - __lookup_nat_cache
* - f2fs_add_link
- * - init_inode_metadata
- * - new_inode_page
- * - new_node_page
+ * - f2fs_init_inode_metadata
+ * - f2fs_new_inode_page
+ * - f2fs_new_node_page
* - set_node_addr
- * - alloc_nid_done
+ * - f2fs_alloc_nid_done
* - __remove_nid_from_list(PREALLOC_NID)
* - __insert_nid_to_list(FREE_NID)
*/
@@ -1971,7 +2169,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
kmem_cache_free(free_nid_slab, i);
}
-static void scan_nat_page(struct f2fs_sb_info *sbi,
+static int scan_nat_page(struct f2fs_sb_info *sbi,
struct page *nat_page, nid_t start_nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1989,7 +2187,10 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
break;
blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
- f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
+
+ if (blk_addr == NEW_ADDR)
+ return -EINVAL;
+
if (blk_addr == NULL_ADDR) {
add_free_nid(sbi, start_nid, true, true);
} else {
@@ -1998,6 +2199,8 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
spin_unlock(&NM_I(sbi)->nid_list_lock);
}
}
+
+ return 0;
}
static void scan_curseg_cache(struct f2fs_sb_info *sbi)
@@ -2053,10 +2256,11 @@ out:
up_read(&nm_i->nat_tree_lock);
}
-static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
+static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
+ bool sync, bool mount)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- int i = 0;
+ int i = 0, ret;
nid_t nid = nm_i->next_scan_nid;
if (unlikely(nid >= nm_i->max_nid))
@@ -2064,21 +2268,21 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
/* Enough entries */
if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
- return;
+ return 0;
- if (!sync && !available_free_memory(sbi, FREE_NIDS))
- return;
+ if (!sync && !f2fs_available_free_memory(sbi, FREE_NIDS))
+ return 0;
if (!mount) {
/* try to find free nids in free_nid_bitmap */
scan_free_nid_bits(sbi);
if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
- return;
+ return 0;
}
/* readahead nat pages to be scanned */
- ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
+ f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
META_NAT, true);
down_read(&nm_i->nat_tree_lock);
@@ -2088,8 +2292,19 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
nm_i->nat_block_bitmap)) {
struct page *page = get_current_nat_page(sbi, nid);
- scan_nat_page(sbi, page, nid);
- f2fs_put_page(page, 1);
+ if (IS_ERR(page)) {
+ ret = PTR_ERR(page);
+ } else {
+ ret = scan_nat_page(sbi, page, nid);
+ f2fs_put_page(page, 1);
+ }
+
+ if (ret) {
+ up_read(&nm_i->nat_tree_lock);
+ f2fs_bug_on(sbi, !mount);
+ f2fs_err(sbi, "NAT is corrupt, run fsck to fix it");
+ return ret;
+ }
}
nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
@@ -2108,15 +2323,21 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
up_read(&nm_i->nat_tree_lock);
- ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
+ f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
nm_i->ra_nid_pages, META_NAT, false);
+
+ return 0;
}
-void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
+int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
{
+ int ret;
+
mutex_lock(&NM_I(sbi)->build_lock);
- __build_free_nids(sbi, sync, mount);
+ ret = __f2fs_build_free_nids(sbi, sync, mount);
mutex_unlock(&NM_I(sbi)->build_lock);
+
+ return ret;
}
/*
@@ -2124,17 +2345,16 @@ void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
* from second parameter of this function.
* The returned nid could be used ino as well as nid when inode is created.
*/
-bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
+bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i = NULL;
retry:
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ALLOC_NID)) {
f2fs_show_injection_info(FAULT_ALLOC_NID);
return false;
}
-#endif
+
spin_lock(&nm_i->nid_list_lock);
if (unlikely(nm_i->available_nids == 0)) {
@@ -2142,8 +2362,8 @@ retry:
return false;
}
- /* We should not use stale free nids created by build_free_nids */
- if (nm_i->nid_cnt[FREE_NID] && !on_build_free_nids(nm_i)) {
+ /* We should not use stale free nids created by f2fs_build_free_nids */
+ if (nm_i->nid_cnt[FREE_NID] && !on_f2fs_build_free_nids(nm_i)) {
f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
i = list_first_entry(&nm_i->free_nid_list,
struct free_nid, list);
@@ -2160,14 +2380,15 @@ retry:
spin_unlock(&nm_i->nid_list_lock);
/* Let's scan nat pages and its caches to get free nids */
- build_free_nids(sbi, true, false);
- goto retry;
+ if (!f2fs_build_free_nids(sbi, true, false))
+ goto retry;
+ return false;
}
/*
- * alloc_nid() should be called prior to this function.
+ * f2fs_alloc_nid() should be called prior to this function.
*/
-void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
+void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
@@ -2182,9 +2403,9 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
}
/*
- * alloc_nid() should be called prior to this function.
+ * f2fs_alloc_nid() should be called prior to this function.
*/
-void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
+void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
@@ -2197,7 +2418,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
i = __lookup_free_nid_list(nm_i, nid);
f2fs_bug_on(sbi, !i);
- if (!available_free_memory(sbi, FREE_NIDS)) {
+ if (!f2fs_available_free_memory(sbi, FREE_NIDS)) {
__remove_free_nid(sbi, i, PREALLOC_NID);
need_free = true;
} else {
@@ -2214,7 +2435,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
kmem_cache_free(free_nid_slab, i);
}
-int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
+int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i, *next;
@@ -2242,14 +2463,14 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
return nr - nr_shrink;
}
-void recover_inline_xattr(struct inode *inode, struct page *page)
+void f2fs_recover_inline_xattr(struct inode *inode, struct page *page)
{
void *src_addr, *dst_addr;
size_t inline_size;
struct page *ipage;
struct f2fs_inode *ri;
- ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
+ ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage));
ri = F2FS_INODE(page);
@@ -2264,14 +2485,14 @@ void recover_inline_xattr(struct inode *inode, struct page *page)
src_addr = inline_xattr_addr(inode, page);
inline_size = inline_xattr_size(inode);
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
memcpy(dst_addr, src_addr, inline_size);
update_inode:
- update_inode(inode, ipage);
+ f2fs_update_inode(inode, ipage);
f2fs_put_page(ipage, 1);
}
-int recover_xattr_data(struct inode *inode, struct page *page)
+int f2fs_recover_xattr_data(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
@@ -2279,30 +2500,34 @@ int recover_xattr_data(struct inode *inode, struct page *page)
struct dnode_of_data dn;
struct node_info ni;
struct page *xpage;
+ int err;
if (!prev_xnid)
goto recover_xnid;
/* 1: invalidate the previous xattr nid */
- get_node_info(sbi, prev_xnid, &ni);
- invalidate_blocks(sbi, ni.blk_addr);
+ err = f2fs_get_node_info(sbi, prev_xnid, &ni);
+ if (err)
+ return err;
+
+ f2fs_invalidate_blocks(sbi, ni.blk_addr);
dec_valid_node_count(sbi, inode, false);
set_node_addr(sbi, &ni, NULL_ADDR, false);
recover_xnid:
/* 2: update xattr nid in inode */
- if (!alloc_nid(sbi, &new_xnid))
+ if (!f2fs_alloc_nid(sbi, &new_xnid))
return -ENOSPC;
set_new_dnode(&dn, inode, NULL, NULL, new_xnid);
- xpage = new_node_page(&dn, XATTR_NODE_OFFSET);
+ xpage = f2fs_new_node_page(&dn, XATTR_NODE_OFFSET);
if (IS_ERR(xpage)) {
- alloc_nid_failed(sbi, new_xnid);
+ f2fs_alloc_nid_failed(sbi, new_xnid);
return PTR_ERR(xpage);
}
- alloc_nid_done(sbi, new_xnid);
- update_inode_page(inode);
+ f2fs_alloc_nid_done(sbi, new_xnid);
+ f2fs_update_inode_page(inode);
/* 3: update and set xattr node page dirty */
memcpy(F2FS_NODE(xpage), F2FS_NODE(page), VALID_XATTR_BLOCK_SIZE);
@@ -2313,14 +2538,17 @@ recover_xnid:
return 0;
}
-int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
+int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
{
struct f2fs_inode *src, *dst;
nid_t ino = ino_of_node(page);
struct node_info old_ni, new_ni;
struct page *ipage;
+ int err;
- get_node_info(sbi, ino, &old_ni);
+ err = f2fs_get_node_info(sbi, ino, &old_ni);
+ if (err)
+ return err;
if (unlikely(old_ni.blk_addr != NULL_ADDR))
return -EINVAL;
@@ -2337,7 +2565,7 @@ retry:
if (!PageUptodate(ipage))
SetPageUptodate(ipage);
fill_node_footer(ipage, ino, ino, 0, true);
- set_cold_node(page, false);
+ set_cold_node(ipage, false);
src = F2FS_INODE(page);
dst = F2FS_INODE(ipage);
@@ -2351,15 +2579,22 @@ retry:
if (dst->i_inline & F2FS_EXTRA_ATTR) {
dst->i_extra_isize = src->i_extra_isize;
- if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) &&
+ if (f2fs_sb_has_flexible_inline_xattr(sbi) &&
F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
i_inline_xattr_size))
dst->i_inline_xattr_size = src->i_inline_xattr_size;
- if (f2fs_sb_has_project_quota(sbi->sb) &&
+ if (f2fs_sb_has_project_quota(sbi) &&
F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
i_projid))
dst->i_projid = src->i_projid;
+
+ if (f2fs_sb_has_inode_crtime(sbi) &&
+ F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
+ i_crtime_nsec)) {
+ dst->i_crtime = src->i_crtime;
+ dst->i_crtime_nsec = src->i_crtime_nsec;
+ }
}
new_ni = old_ni;
@@ -2374,7 +2609,7 @@ retry:
return 0;
}
-void restore_node_summary(struct f2fs_sb_info *sbi,
+int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum)
{
struct f2fs_node *rn;
@@ -2391,10 +2626,13 @@ void restore_node_summary(struct f2fs_sb_info *sbi,
nrpages = min(last_offset - i, BIO_MAX_PAGES);
/* readahead node pages */
- ra_meta_pages(sbi, addr, nrpages, META_POR, true);
+ f2fs_ra_meta_pages(sbi, addr, nrpages, META_POR, true);
for (idx = addr; idx < addr + nrpages; idx++) {
- struct page *page = get_tmp_page(sbi, idx);
+ struct page *page = f2fs_get_tmp_page(sbi, idx);
+
+ if (IS_ERR(page))
+ return PTR_ERR(page);
rn = F2FS_NODE(page);
sum_entry->nid = rn->footer.nid;
@@ -2407,6 +2645,7 @@ void restore_node_summary(struct f2fs_sb_info *sbi,
invalidate_mapping_pages(META_MAPPING(sbi), addr,
addr + nrpages);
}
+ return 0;
}
static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
@@ -2483,7 +2722,7 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
i = 1;
}
for (; i < NAT_ENTRY_PER_BLOCK; i++) {
- if (nat_blk->entries[i].block_addr != NULL_ADDR)
+ if (le32_to_cpu(nat_blk->entries[i].block_addr) != NULL_ADDR)
valid++;
}
if (valid == 0) {
@@ -2499,7 +2738,7 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
__clear_bit_le(nat_index, nm_i->full_nat_bits);
}
-static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
+static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
struct nat_entry_set *set, struct cp_control *cpc)
{
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -2523,6 +2762,9 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
down_write(&curseg->journal_rwsem);
} else {
page = get_next_nat_page(sbi, start_nid);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
nat_blk = page_address(page);
f2fs_bug_on(sbi, !nat_blk);
}
@@ -2536,7 +2778,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR);
if (to_journal) {
- offset = lookup_journal_in_cursum(journal,
+ offset = f2fs_lookup_journal_in_cursum(journal,
NAT_JOURNAL, nid, 1);
f2fs_bug_on(sbi, offset < 0);
raw_ne = &nat_in_journal(journal, offset);
@@ -2568,12 +2810,13 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
kmem_cache_free(nat_entry_set_slab, set);
}
+ return 0;
}
/*
* This function is called during the checkpointing process.
*/
-void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -2583,9 +2826,17 @@ void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
unsigned int found;
nid_t set_idx = 0;
LIST_HEAD(sets);
+ int err = 0;
+
+ /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */
+ if (enabled_nat_bits(sbi, cpc)) {
+ down_write(&nm_i->nat_tree_lock);
+ remove_nats_in_journal(sbi);
+ up_write(&nm_i->nat_tree_lock);
+ }
if (!nm_i->dirty_nat_cnt)
- return;
+ return 0;
down_write(&nm_i->nat_tree_lock);
@@ -2608,11 +2859,16 @@ void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
}
/* flush dirty nats in nat entry set */
- list_for_each_entry_safe(set, tmp, &sets, set_list)
- __flush_nat_entry_set(sbi, set, cpc);
+ list_for_each_entry_safe(set, tmp, &sets, set_list) {
+ err = __flush_nat_entry_set(sbi, set, cpc);
+ if (err)
+ break;
+ }
up_write(&nm_i->nat_tree_lock);
/* Allow dirty nats by node block allocation in write_begin */
+
+ return err;
}
static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
@@ -2636,7 +2892,11 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
nm_i->nat_bits_blocks;
for (i = 0; i < nm_i->nat_bits_blocks; i++) {
- struct page *page = get_meta_page(sbi, nat_bits_addr++);
+ struct page *page;
+
+ page = f2fs_get_meta_page(sbi, nat_bits_addr++);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
page_address(page), F2FS_BLKSIZE);
@@ -2652,7 +2912,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
nm_i->full_nat_bits = nm_i->nat_bits + 8;
nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
- f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
+ f2fs_notice(sbi, "Found nat_bits in checkpoint");
return 0;
}
@@ -2720,6 +2980,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
INIT_LIST_HEAD(&nm_i->nat_entries);
+ spin_lock_init(&nm_i->nat_list_lock);
mutex_init(&nm_i->build_lock);
spin_lock_init(&nm_i->nid_list_lock);
@@ -2755,15 +3016,17 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi)
struct f2fs_nm_info *nm_i = NM_I(sbi);
int i;
- nm_i->free_nid_bitmap = f2fs_kzalloc(sbi, nm_i->nat_blocks *
- sizeof(unsigned char *), GFP_KERNEL);
+ nm_i->free_nid_bitmap =
+ f2fs_kzalloc(sbi, array_size(sizeof(unsigned char *),
+ nm_i->nat_blocks),
+ GFP_KERNEL);
if (!nm_i->free_nid_bitmap)
return -ENOMEM;
for (i = 0; i < nm_i->nat_blocks; i++) {
nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi,
- NAT_ENTRY_BITMAP_SIZE_ALIGNED, GFP_KERNEL);
- if (!nm_i->free_nid_bitmap)
+ f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK), GFP_KERNEL);
+ if (!nm_i->free_nid_bitmap[i])
return -ENOMEM;
}
@@ -2772,14 +3035,16 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi)
if (!nm_i->nat_block_bitmap)
return -ENOMEM;
- nm_i->free_nid_count = f2fs_kvzalloc(sbi, nm_i->nat_blocks *
- sizeof(unsigned short), GFP_KERNEL);
+ nm_i->free_nid_count =
+ f2fs_kvzalloc(sbi, array_size(sizeof(unsigned short),
+ nm_i->nat_blocks),
+ GFP_KERNEL);
if (!nm_i->free_nid_count)
return -ENOMEM;
return 0;
}
-int build_node_manager(struct f2fs_sb_info *sbi)
+int f2fs_build_node_manager(struct f2fs_sb_info *sbi)
{
int err;
@@ -2799,11 +3064,10 @@ int build_node_manager(struct f2fs_sb_info *sbi)
/* load free nid status from nat_bits table */
load_free_nid_bitmap(sbi);
- build_free_nids(sbi, true, true);
- return 0;
+ return f2fs_build_free_nids(sbi, true, true);
}
-void destroy_node_manager(struct f2fs_sb_info *sbi)
+void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i, *next_i;
@@ -2835,8 +3099,13 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
unsigned idx;
nid = nat_get_nid(natvec[found - 1]) + 1;
- for (idx = 0; idx < found; idx++)
+ for (idx = 0; idx < found; idx++) {
+ spin_lock(&nm_i->nat_list_lock);
+ list_del(&natvec[idx]->list);
+ spin_unlock(&nm_i->nat_list_lock);
+
__del_from_nat_cache(nm_i, natvec[idx]);
+ }
}
f2fs_bug_on(sbi, nm_i->nat_cnt);
@@ -2862,20 +3131,20 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
for (i = 0; i < nm_i->nat_blocks; i++)
kvfree(nm_i->free_nid_bitmap[i]);
- kfree(nm_i->free_nid_bitmap);
+ kvfree(nm_i->free_nid_bitmap);
}
kvfree(nm_i->free_nid_count);
- kfree(nm_i->nat_bitmap);
- kfree(nm_i->nat_bits);
+ kvfree(nm_i->nat_bitmap);
+ kvfree(nm_i->nat_bits);
#ifdef CONFIG_F2FS_CHECK_FS
- kfree(nm_i->nat_bitmap_mir);
+ kvfree(nm_i->nat_bitmap_mir);
#endif
sbi->nm_info = NULL;
- kfree(nm_i);
+ kvfree(nm_i);
}
-int __init create_node_manager_caches(void)
+int __init f2fs_create_node_manager_caches(void)
{
nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
sizeof(struct nat_entry));
@@ -2891,8 +3160,15 @@ int __init create_node_manager_caches(void)
sizeof(struct nat_entry_set));
if (!nat_entry_set_slab)
goto destroy_free_nid;
+
+ fsync_node_entry_slab = f2fs_kmem_cache_create("fsync_node_entry",
+ sizeof(struct fsync_node_entry));
+ if (!fsync_node_entry_slab)
+ goto destroy_nat_entry_set;
return 0;
+destroy_nat_entry_set:
+ kmem_cache_destroy(nat_entry_set_slab);
destroy_free_nid:
kmem_cache_destroy(free_nid_slab);
destroy_nat_entry:
@@ -2901,8 +3177,9 @@ fail:
return -ENOMEM;
}
-void destroy_node_manager_caches(void)
+void f2fs_destroy_node_manager_caches(void)
{
+ kmem_cache_destroy(fsync_node_entry_slab);
kmem_cache_destroy(nat_entry_set_slab);
kmem_cache_destroy(free_nid_slab);
kmem_cache_destroy(nat_entry_slab);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index b95e49e4a928..e05af5df5648 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/node.h
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
/* start node id of a node block dedicated to the given node id */
#define START_NID(nid) (((nid) / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK)
@@ -135,6 +132,11 @@ static inline bool excess_cached_nats(struct f2fs_sb_info *sbi)
return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD;
}
+static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi)
+{
+ return get_pages(sbi, F2FS_DIRTY_NODES) >= sbi->blocks_per_seg * 8;
+}
+
enum mem_type {
FREE_NIDS, /* indicates the free nid list */
NAT_ENTRIES, /* indicates the cached nat entry */
@@ -359,7 +361,7 @@ static inline int set_nid(struct page *p, int off, nid_t nid, bool i)
{
struct f2fs_node *rn = F2FS_NODE(p);
- f2fs_wait_on_page_writeback(p, NODE, true);
+ f2fs_wait_on_page_writeback(p, NODE, true, true);
if (i)
rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
@@ -444,6 +446,10 @@ static inline void set_mark(struct page *page, int mark, int type)
else
flag &= ~(0x1 << type);
rn->footer.flag = cpu_to_le32(flag);
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ f2fs_inode_chksum_set(F2FS_P_SB(page), page);
+#endif
}
#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT)
#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index c2355b486669..5d0c77054252 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/recovery.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
@@ -47,7 +44,7 @@
static struct kmem_cache *fsync_entry_slab;
-bool space_for_roll_forward(struct f2fs_sb_info *sbi)
+bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi)
{
s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count);
@@ -99,8 +96,12 @@ err_out:
return ERR_PTR(err);
}
-static void del_fsync_inode(struct fsync_inode_entry *entry)
+static void del_fsync_inode(struct fsync_inode_entry *entry, int drop)
{
+ if (drop) {
+ /* inode should not be recovered, drop it */
+ f2fs_inode_synced(entry->inode);
+ }
iput(entry->inode);
list_del(&entry->list);
kmem_cache_free(fsync_entry_slab, entry);
@@ -162,7 +163,7 @@ retry:
goto out_put;
}
- err = acquire_orphan_inode(F2FS_I_SB(inode));
+ err = f2fs_acquire_orphan_inode(F2FS_I_SB(inode));
if (err) {
iput(einode);
goto out_put;
@@ -173,7 +174,7 @@ retry:
} else if (IS_ERR(page)) {
err = PTR_ERR(page);
} else {
- err = __f2fs_do_add_link(dir, &fname, inode,
+ err = f2fs_add_dentry(dir, &fname, inode,
inode->i_ino, inode->i_mode);
}
if (err == -ENOMEM)
@@ -187,10 +188,36 @@ out:
name = "<encrypted>";
else
name = raw_inode->i_name;
- f2fs_msg(inode->i_sb, KERN_NOTICE,
- "%s: ino = %x, name = %s, dir = %lx, err = %d",
- __func__, ino_of_node(ipage), name,
- IS_ERR(dir) ? 0 : dir->i_ino, err);
+ f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d",
+ __func__, ino_of_node(ipage), name,
+ IS_ERR(dir) ? 0 : dir->i_ino, err);
+ return err;
+}
+
+static int recover_quota_data(struct inode *inode, struct page *page)
+{
+ struct f2fs_inode *raw = F2FS_INODE(page);
+ struct iattr attr;
+ uid_t i_uid = le32_to_cpu(raw->i_uid);
+ gid_t i_gid = le32_to_cpu(raw->i_gid);
+ int err;
+
+ memset(&attr, 0, sizeof(attr));
+
+ attr.ia_uid = make_kuid(&init_user_ns, i_uid);
+ attr.ia_gid = make_kgid(&init_user_ns, i_gid);
+
+ if (!uid_eq(attr.ia_uid, inode->i_uid))
+ attr.ia_valid |= ATTR_UID;
+ if (!gid_eq(attr.ia_gid, inode->i_gid))
+ attr.ia_valid |= ATTR_GID;
+
+ if (!attr.ia_valid)
+ return 0;
+
+ err = dquot_transfer(inode, &attr);
+ if (err)
+ set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR);
return err;
}
@@ -204,16 +231,35 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
set_inode_flag(inode, FI_DATA_EXIST);
else
clear_inode_flag(inode, FI_DATA_EXIST);
- if (!(ri->i_inline & F2FS_INLINE_DOTS))
- clear_inode_flag(inode, FI_INLINE_DOTS);
}
-static void recover_inode(struct inode *inode, struct page *page)
+static int recover_inode(struct inode *inode, struct page *page)
{
struct f2fs_inode *raw = F2FS_INODE(page);
char *name;
+ int err;
inode->i_mode = le16_to_cpu(raw->i_mode);
+
+ err = recover_quota_data(inode, page);
+ if (err)
+ return err;
+
+ i_uid_write(inode, le32_to_cpu(raw->i_uid));
+ i_gid_write(inode, le32_to_cpu(raw->i_gid));
+
+ if (raw->i_inline & F2FS_EXTRA_ATTR) {
+ if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) &&
+ F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize),
+ i_projid)) {
+ projid_t i_projid;
+
+ i_projid = (projid_t)le32_to_cpu(raw->i_projid);
+ F2FS_I(inode)->i_projid =
+ make_kprojid(&init_user_ns, i_projid);
+ }
+ }
+
f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
@@ -223,17 +269,23 @@ static void recover_inode(struct inode *inode, struct page *page)
inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
F2FS_I(inode)->i_advise = raw->i_advise;
+ F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags);
+ f2fs_set_inode_flags(inode);
+ F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] =
+ le16_to_cpu(raw->i_gc_failures);
recover_inline_flags(inode, raw);
+ f2fs_mark_inode_dirty_sync(inode, true);
+
if (file_enc_name(inode))
name = "<encrypted>";
else
name = F2FS_INODE(page)->i_name;
- f2fs_msg(inode->i_sb, KERN_NOTICE,
- "recover_inode: ino = %x, name = %s, inline = %x",
- ino_of_node(page), name, raw->i_inline);
+ f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x",
+ ino_of_node(page), name, raw->i_inline);
+ return 0;
}
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
@@ -243,8 +295,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
struct page *page = NULL;
block_t blkaddr;
unsigned int loop_cnt = 0;
- unsigned int free_blocks = sbi->user_block_count -
- valid_user_blocks(sbi);
+ unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg -
+ valid_user_blocks(sbi);
int err = 0;
/* get node pages in the current segment */
@@ -254,13 +306,19 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
while (1) {
struct fsync_inode_entry *entry;
- if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
return 0;
- page = get_tmp_page(sbi, blkaddr);
+ page = f2fs_get_tmp_page(sbi, blkaddr);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ break;
+ }
- if (!is_recoverable_dnode(page))
+ if (!is_recoverable_dnode(page)) {
+ f2fs_put_page(page, 1);
break;
+ }
if (!is_fsync_dnode(page))
goto next;
@@ -271,9 +329,11 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
if (!check_only &&
IS_INODE(page) && is_dent_dnode(page)) {
- err = recover_inode_page(sbi, page);
- if (err)
+ err = f2fs_recover_inode_page(sbi, page);
+ if (err) {
+ f2fs_put_page(page, 1);
break;
+ }
quota_inode = true;
}
@@ -289,6 +349,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
err = 0;
goto next;
}
+ f2fs_put_page(page, 1);
break;
}
}
@@ -300,10 +361,10 @@ next:
/* sanity check in order to detect looped node chain */
if (++loop_cnt >= free_blocks ||
blkaddr == next_blkaddr_of_node(page)) {
- f2fs_msg(sbi->sb, KERN_NOTICE,
- "%s: detect looped node chain, "
- "blkaddr:%u, next:%u",
- __func__, blkaddr, next_blkaddr_of_node(page));
+ f2fs_notice(sbi, "%s: detect looped node chain, blkaddr:%u, next:%u",
+ __func__, blkaddr,
+ next_blkaddr_of_node(page));
+ f2fs_put_page(page, 1);
err = -EINVAL;
break;
}
@@ -312,18 +373,17 @@ next:
blkaddr = next_blkaddr_of_node(page);
f2fs_put_page(page, 1);
- ra_meta_pages_cond(sbi, blkaddr);
+ f2fs_ra_meta_pages_cond(sbi, blkaddr);
}
- f2fs_put_page(page, 1);
return err;
}
-static void destroy_fsync_dnodes(struct list_head *head)
+static void destroy_fsync_dnodes(struct list_head *head, int drop)
{
struct fsync_inode_entry *entry, *tmp;
list_for_each_entry_safe(entry, tmp, head, list)
- del_fsync_inode(entry);
+ del_fsync_inode(entry, drop);
}
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
@@ -355,7 +415,9 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
}
}
- sum_page = get_sum_page(sbi, segno);
+ sum_page = f2fs_get_sum_page(sbi, segno);
+ if (IS_ERR(sum_page))
+ return PTR_ERR(sum_page);
sum_node = (struct f2fs_summary_block *)page_address(sum_page);
sum = sum_node->entries[blkoff];
f2fs_put_page(sum_page, 1);
@@ -375,7 +437,7 @@ got_it:
}
/* Get the node page */
- node_page = get_node_page(sbi, nid);
+ node_page = f2fs_get_node_page(sbi, nid);
if (IS_ERR(node_page))
return PTR_ERR(node_page);
@@ -400,7 +462,8 @@ got_it:
inode = dn->inode;
}
- bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node);
+ bidx = f2fs_start_bidx_of_node(offset, inode) +
+ le16_to_cpu(sum.ofs_in_node);
/*
* if inode page is locked, unlock temporarily, but its reference
@@ -410,11 +473,11 @@ got_it:
unlock_page(dn->inode_page);
set_new_dnode(&tdn, inode, NULL, NULL, 0);
- if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
+ if (f2fs_get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
goto out;
if (tdn.data_blkaddr == blkaddr)
- truncate_data_blocks_range(&tdn, 1);
+ f2fs_truncate_data_blocks_range(&tdn, 1);
f2fs_put_dnode(&tdn);
out:
@@ -427,7 +490,7 @@ out:
truncate_out:
if (datablock_addr(tdn.inode, tdn.node_page,
tdn.ofs_in_node) == blkaddr)
- truncate_data_blocks_range(&tdn, 1);
+ f2fs_truncate_data_blocks_range(&tdn, 1);
if (dn->inode->i_ino == nid && !dn->inode_page_locked)
unlock_page(dn->inode_page);
return 0;
@@ -443,25 +506,25 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
/* step 1: recover xattr */
if (IS_INODE(page)) {
- recover_inline_xattr(inode, page);
+ f2fs_recover_inline_xattr(inode, page);
} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
- err = recover_xattr_data(inode, page);
+ err = f2fs_recover_xattr_data(inode, page);
if (!err)
recovered++;
goto out;
}
/* step 2: recover inline data */
- if (recover_inline_data(inode, page))
+ if (f2fs_recover_inline_data(inode, page))
goto out;
/* step 3: recover data indices */
- start = start_bidx_of_node(ofs_of_node(page), inode);
+ start = f2fs_start_bidx_of_node(ofs_of_node(page), inode);
end = start + ADDRS_PER_PAGE(page, inode);
set_new_dnode(&dn, inode, NULL, NULL, 0);
retry_dn:
- err = get_dnode_of_data(&dn, start, ALLOC_NODE);
+ err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE);
if (err) {
if (err == -ENOMEM) {
congestion_wait(BLK_RW_ASYNC, HZ/50);
@@ -470,17 +533,19 @@ retry_dn:
goto out;
}
- f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
+ f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true);
+
+ err = f2fs_get_node_info(sbi, dn.nid, &ni);
+ if (err)
+ goto err;
- get_node_info(sbi, dn.nid, &ni);
f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
if (ofs_of_node(dn.node_page) != ofs_of_node(page)) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u",
- inode->i_ino, ofs_of_node(dn.node_page),
- ofs_of_node(page));
- err = -EFAULT;
+ f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u",
+ inode->i_ino, ofs_of_node(dn.node_page),
+ ofs_of_node(page));
+ err = -EFSCORRUPTED;
goto err;
}
@@ -490,13 +555,25 @@ retry_dn:
src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
dest = datablock_addr(dn.inode, page, dn.ofs_in_node);
+ if (__is_valid_data_blkaddr(src) &&
+ !f2fs_is_valid_blkaddr(sbi, src, META_POR)) {
+ err = -EFSCORRUPTED;
+ goto err;
+ }
+
+ if (__is_valid_data_blkaddr(dest) &&
+ !f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
+ err = -EFSCORRUPTED;
+ goto err;
+ }
+
/* skip recovering if dest is the same as src */
if (src == dest)
continue;
/* dest is invalid, just invalidate src block */
if (dest == NULL_ADDR) {
- truncate_data_blocks_range(&dn, 1);
+ f2fs_truncate_data_blocks_range(&dn, 1);
continue;
}
@@ -510,20 +587,19 @@ retry_dn:
* and then reserve one new block in dnode page.
*/
if (dest == NEW_ADDR) {
- truncate_data_blocks_range(&dn, 1);
- reserve_new_block(&dn);
+ f2fs_truncate_data_blocks_range(&dn, 1);
+ f2fs_reserve_new_block(&dn);
continue;
}
/* dest is valid block, try to recover from src to dest */
- if (is_valid_blkaddr(sbi, dest, META_POR)) {
+ if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
if (src == NULL_ADDR) {
- err = reserve_new_block(&dn);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- while (err)
- err = reserve_new_block(&dn);
-#endif
+ err = f2fs_reserve_new_block(&dn);
+ while (err &&
+ IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION))
+ err = f2fs_reserve_new_block(&dn);
/* We should not get -ENOSPC */
f2fs_bug_on(sbi, err);
if (err)
@@ -554,16 +630,14 @@ retry_prev:
err:
f2fs_put_dnode(&dn);
out:
- f2fs_msg(sbi->sb, KERN_NOTICE,
- "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
- inode->i_ino,
- file_keep_isize(inode) ? "keep" : "recover",
- recovered, err);
+ f2fs_notice(sbi, "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
+ inode->i_ino, file_keep_isize(inode) ? "keep" : "recover",
+ recovered, err);
return err;
}
static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
- struct list_head *dir_list)
+ struct list_head *tmp_inode_list, struct list_head *dir_list)
{
struct curseg_info *curseg;
struct page *page = NULL;
@@ -577,12 +651,16 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
while (1) {
struct fsync_inode_entry *entry;
- if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
break;
- ra_meta_pages_cond(sbi, blkaddr);
+ f2fs_ra_meta_pages_cond(sbi, blkaddr);
- page = get_tmp_page(sbi, blkaddr);
+ page = f2fs_get_tmp_page(sbi, blkaddr);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ break;
+ }
if (!is_recoverable_dnode(page)) {
f2fs_put_page(page, 1);
@@ -597,8 +675,13 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
* In this case, we can lose the latest inode(x).
* So, call recover_inode for the inode update.
*/
- if (IS_INODE(page))
- recover_inode(entry->inode, page);
+ if (IS_INODE(page)) {
+ err = recover_inode(entry->inode, page);
+ if (err) {
+ f2fs_put_page(page, 1);
+ break;
+ }
+ }
if (entry->last_dentry == blkaddr) {
err = recover_dentry(entry->inode, page, dir_list);
if (err) {
@@ -613,20 +696,20 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
}
if (entry->blkaddr == blkaddr)
- del_fsync_inode(entry);
+ list_move_tail(&entry->list, tmp_inode_list);
next:
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
f2fs_put_page(page, 1);
}
if (!err)
- allocate_new_segments(sbi);
+ f2fs_allocate_new_segments(sbi);
return err;
}
-int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
+int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
{
- struct list_head inode_list;
+ struct list_head inode_list, tmp_inode_list;
struct list_head dir_list;
int err;
int ret = 0;
@@ -637,7 +720,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
#endif
if (s_flags & MS_RDONLY) {
- f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
+ f2fs_info(sbi, "recover fsync data on readonly fs");
sbi->sb->s_flags &= ~MS_RDONLY;
}
@@ -656,6 +739,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
}
INIT_LIST_HEAD(&inode_list);
+ INIT_LIST_HEAD(&tmp_inode_list);
INIT_LIST_HEAD(&dir_list);
/* prevent checkpoint */
@@ -674,11 +758,16 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
need_writecp = true;
/* step #2: recover data */
- err = recover_data(sbi, &inode_list, &dir_list);
+ err = recover_data(sbi, &inode_list, &tmp_inode_list, &dir_list);
if (!err)
f2fs_bug_on(sbi, !list_empty(&inode_list));
+ else {
+ /* restore s_flags to let iput() trash data */
+ sbi->sb->s_flags = s_flags;
+ }
skip:
- destroy_fsync_dnodes(&inode_list);
+ destroy_fsync_dnodes(&inode_list, err);
+ destroy_fsync_dnodes(&tmp_inode_list, err);
/* truncate meta pages to be used by the recovery */
truncate_inode_pages_range(META_MAPPING(sbi),
@@ -687,19 +776,23 @@ skip:
if (err) {
truncate_inode_pages_final(NODE_MAPPING(sbi));
truncate_inode_pages_final(META_MAPPING(sbi));
+ } else {
+ clear_sbi_flag(sbi, SBI_POR_DOING);
}
-
- clear_sbi_flag(sbi, SBI_POR_DOING);
mutex_unlock(&sbi->cp_mutex);
/* let's drop all the directory inodes for clean checkpoint */
- destroy_fsync_dnodes(&dir_list);
+ destroy_fsync_dnodes(&dir_list, err);
+
+ if (need_writecp) {
+ set_sbi_flag(sbi, SBI_IS_RECOVERED);
- if (!err && need_writecp) {
- struct cp_control cpc = {
- .reason = CP_RECOVERY,
- };
- err = write_checkpoint(sbi, &cpc);
+ if (!err) {
+ struct cp_control cpc = {
+ .reason = CP_RECOVERY,
+ };
+ err = f2fs_write_checkpoint(sbi, &cpc);
+ }
}
kmem_cache_destroy(fsync_entry_slab);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 9abe8284a91d..7ddd8664487b 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/segment.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
@@ -169,7 +166,7 @@ found:
return result - size + __reverse_ffz(tmp);
}
-bool need_SSR(struct f2fs_sb_info *sbi)
+bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
{
int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
@@ -177,23 +174,22 @@ bool need_SSR(struct f2fs_sb_info *sbi)
if (test_opt(sbi, LFS))
return false;
- if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
+ if (sbi->gc_mode == GC_URGENT)
+ return true;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
return true;
return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
}
-void register_inmem_page(struct inode *inode, struct page *page)
+void f2fs_register_inmem_page(struct inode *inode, struct page *page)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct f2fs_inode_info *fi = F2FS_I(inode);
struct inmem_pages *new;
f2fs_trace_pid(page);
- set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
- SetPagePrivate(page);
+ f2fs_set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
@@ -202,21 +198,18 @@ void register_inmem_page(struct inode *inode, struct page *page)
INIT_LIST_HEAD(&new->list);
/* increase reference count with clean state */
- mutex_lock(&fi->inmem_lock);
get_page(page);
- list_add_tail(&new->list, &fi->inmem_pages);
- spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
- if (list_empty(&fi->inmem_ilist))
- list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
- spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+ mutex_lock(&F2FS_I(inode)->inmem_lock);
+ list_add_tail(&new->list, &F2FS_I(inode)->inmem_pages);
inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
- mutex_unlock(&fi->inmem_lock);
+ mutex_unlock(&F2FS_I(inode)->inmem_lock);
trace_f2fs_register_inmem_page(page, INMEM);
}
static int __revoke_inmem_pages(struct inode *inode,
- struct list_head *head, bool drop, bool recover)
+ struct list_head *head, bool drop, bool recover,
+ bool trylock)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct inmem_pages *cur, *tmp;
@@ -228,7 +221,18 @@ static int __revoke_inmem_pages(struct inode *inode,
if (drop)
trace_f2fs_commit_inmem_page(page, INMEM_DROP);
- lock_page(page);
+ if (trylock) {
+ /*
+ * to avoid deadlock in between page lock and
+ * inmem_lock.
+ */
+ if (!trylock_page(page))
+ continue;
+ } else {
+ lock_page(page);
+ }
+
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
if (recover) {
struct dnode_of_data dn;
@@ -237,7 +241,8 @@ static int __revoke_inmem_pages(struct inode *inode,
trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
retry:
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
+ err = f2fs_get_dnode_of_data(&dn, page->index,
+ LOOKUP_NODE);
if (err) {
if (err == -ENOMEM) {
congestion_wait(BLK_RW_ASYNC, HZ/50);
@@ -247,9 +252,15 @@ retry:
err = -EAGAIN;
goto next;
}
- get_node_info(sbi, dn.nid, &ni);
+
+ err = f2fs_get_node_info(sbi, dn.nid, &ni);
+ if (err) {
+ f2fs_put_dnode(&dn);
+ return err;
+ }
+
if (cur->old_addr == NEW_ADDR) {
- invalidate_blocks(sbi, dn.data_blkaddr);
+ f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
f2fs_update_data_blkaddr(&dn, NEW_ADDR);
} else
f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
@@ -258,10 +269,11 @@ retry:
}
next:
/* we don't need to invalidate this in the sccessful status */
- if (drop || recover)
+ if (drop || recover) {
ClearPageUptodate(page);
- set_page_private(page, 0);
- ClearPagePrivate(page);
+ clear_cold_data(page);
+ }
+ f2fs_clear_page_private(page);
f2fs_put_page(page, 1);
list_del(&cur->list);
@@ -271,7 +283,7 @@ next:
return err;
}
-void drop_inmem_pages_all(struct f2fs_sb_info *sbi)
+void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
{
struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
struct inode *inode;
@@ -287,33 +299,45 @@ next:
spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
if (inode) {
- drop_inmem_pages(inode);
+ if (gc_failure) {
+ if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
+ goto drop;
+ goto skip;
+ }
+drop:
+ set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
+ f2fs_drop_inmem_pages(inode);
iput(inode);
}
+skip:
congestion_wait(BLK_RW_ASYNC, HZ/50);
cond_resched();
goto next;
}
-void drop_inmem_pages(struct inode *inode)
+void f2fs_drop_inmem_pages(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
- mutex_lock(&fi->inmem_lock);
- __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
+ while (!list_empty(&fi->inmem_pages)) {
+ mutex_lock(&fi->inmem_lock);
+ __revoke_inmem_pages(inode, &fi->inmem_pages,
+ true, false, true);
+ mutex_unlock(&fi->inmem_lock);
+ }
+
+ clear_inode_flag(inode, FI_ATOMIC_FILE);
+ fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
+ stat_dec_atomic_write(inode);
+
spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
if (!list_empty(&fi->inmem_ilist))
list_del_init(&fi->inmem_ilist);
spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
- mutex_unlock(&fi->inmem_lock);
-
- clear_inode_flag(inode, FI_ATOMIC_FILE);
- clear_inode_flag(inode, FI_HOT_DATA);
- stat_dec_atomic_write(inode);
}
-void drop_inmem_page(struct inode *inode, struct page *page)
+void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -328,7 +352,7 @@ void drop_inmem_page(struct inode *inode, struct page *page)
break;
}
- f2fs_bug_on(sbi, !cur || cur->page != page);
+ f2fs_bug_on(sbi, list_empty(head) || cur->page != page);
list_del(&cur->list);
mutex_unlock(&fi->inmem_lock);
@@ -336,15 +360,13 @@ void drop_inmem_page(struct inode *inode, struct page *page)
kmem_cache_free(inmem_entry_slab, cur);
ClearPageUptodate(page);
- set_page_private(page, 0);
- ClearPagePrivate(page);
+ f2fs_clear_page_private(page);
f2fs_put_page(page, 0);
trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
}
-static int __commit_inmem_pages(struct inode *inode,
- struct list_head *revoke_list)
+static int __f2fs_commit_inmem_pages(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
@@ -357,9 +379,12 @@ static int __commit_inmem_pages(struct inode *inode,
.op_flags = REQ_SYNC | REQ_PRIO,
.io_type = FS_DATA_IO,
};
- pgoff_t last_idx = ULONG_MAX;
+ struct list_head revoke_list;
+ bool submit_bio = false;
int err = 0;
+ INIT_LIST_HEAD(&revoke_list);
+
list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
struct page *page = cur->page;
@@ -367,18 +392,19 @@ static int __commit_inmem_pages(struct inode *inode,
if (page->mapping == inode->i_mapping) {
trace_f2fs_commit_inmem_page(page, INMEM);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
+
set_page_dirty(page);
- f2fs_wait_on_page_writeback(page, DATA, true);
if (clear_page_dirty_for_io(page)) {
inode_dec_dirty_pages(inode);
- remove_dirty_inode(inode);
+ f2fs_remove_dirty_inode(inode);
}
retry:
fio.page = page;
fio.old_blkaddr = NULL_ADDR;
fio.encrypted_page = NULL;
fio.need_lock = LOCK_DONE;
- err = do_write_data_page(&fio);
+ err = f2fs_do_write_data_page(&fio);
if (err) {
if (err == -ENOMEM) {
congestion_wait(BLK_RW_ASYNC, HZ/50);
@@ -390,62 +416,60 @@ retry:
}
/* record old blkaddr for revoking */
cur->old_addr = fio.old_blkaddr;
- last_idx = page->index;
+ submit_bio = true;
}
unlock_page(page);
- list_move_tail(&cur->list, revoke_list);
+ list_move_tail(&cur->list, &revoke_list);
}
- if (last_idx != ULONG_MAX)
- f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA);
+ if (submit_bio)
+ f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA);
- if (!err)
- __revoke_inmem_pages(inode, revoke_list, false, false);
+ if (err) {
+ /*
+ * try to revoke all committed pages, but still we could fail
+ * due to no memory or other reason, if that happened, EAGAIN
+ * will be returned, which means in such case, transaction is
+ * already not integrity, caller should use journal to do the
+ * recovery or rewrite & commit last transaction. For other
+ * error number, revoking was done by filesystem itself.
+ */
+ err = __revoke_inmem_pages(inode, &revoke_list,
+ false, true, false);
+
+ /* drop all uncommitted pages */
+ __revoke_inmem_pages(inode, &fi->inmem_pages,
+ true, false, false);
+ } else {
+ __revoke_inmem_pages(inode, &revoke_list,
+ false, false, false);
+ }
return err;
}
-int commit_inmem_pages(struct inode *inode)
+int f2fs_commit_inmem_pages(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
- struct list_head revoke_list;
int err;
- INIT_LIST_HEAD(&revoke_list);
f2fs_balance_fs(sbi, true);
- f2fs_lock_op(sbi);
+ down_write(&fi->i_gc_rwsem[WRITE]);
+
+ f2fs_lock_op(sbi);
set_inode_flag(inode, FI_ATOMIC_COMMIT);
mutex_lock(&fi->inmem_lock);
- err = __commit_inmem_pages(inode, &revoke_list);
- if (err) {
- int ret;
- /*
- * try to revoke all committed pages, but still we could fail
- * due to no memory or other reason, if that happened, EAGAIN
- * will be returned, which means in such case, transaction is
- * already not integrity, caller should use journal to do the
- * recovery or rewrite & commit last transaction. For other
- * error number, revoking was done by filesystem itself.
- */
- ret = __revoke_inmem_pages(inode, &revoke_list, false, true);
- if (ret)
- err = ret;
-
- /* drop all uncommitted pages */
- __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
- }
- spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
- if (!list_empty(&fi->inmem_ilist))
- list_del_init(&fi->inmem_ilist);
- spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+ err = __f2fs_commit_inmem_pages(inode);
mutex_unlock(&fi->inmem_lock);
clear_inode_flag(inode, FI_ATOMIC_COMMIT);
f2fs_unlock_op(sbi);
+ up_write(&fi->i_gc_rwsem[WRITE]);
+
return err;
}
@@ -455,17 +479,18 @@ int commit_inmem_pages(struct inode *inode)
*/
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(FAULT_CHECKPOINT);
f2fs_stop_checkpoint(sbi, false);
}
-#endif
/* balance_fs_bg is able to be pending */
if (need && excess_cached_nats(sbi))
f2fs_balance_fs_bg(sbi);
+ if (f2fs_is_checkpoint_ready(sbi))
+ return;
+
/*
* We should do GC or end up with checkpoint, if there are so many dirty
* dir/node pages without enough free segments.
@@ -482,33 +507,39 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
return;
/* try to shrink extent cache when there is no enough memory */
- if (!available_free_memory(sbi, EXTENT_CACHE))
+ if (!f2fs_available_free_memory(sbi, EXTENT_CACHE))
f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
/* check the # of cached NAT entries */
- if (!available_free_memory(sbi, NAT_ENTRIES))
- try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
+ if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
+ f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
- if (!available_free_memory(sbi, FREE_NIDS))
- try_to_free_nids(sbi, MAX_FREE_NIDS);
+ if (!f2fs_available_free_memory(sbi, FREE_NIDS))
+ f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
else
- build_free_nids(sbi, false, false);
+ f2fs_build_free_nids(sbi, false, false);
- if (!is_idle(sbi) && !excess_dirty_nats(sbi))
+ if (!is_idle(sbi, REQ_TIME) &&
+ (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
return;
/* checkpoint is the only way to shrink partial cached entries */
- if (!available_free_memory(sbi, NAT_ENTRIES) ||
- !available_free_memory(sbi, INO_ENTRIES) ||
+ if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
+ !f2fs_available_free_memory(sbi, INO_ENTRIES) ||
excess_prefree_segs(sbi) ||
excess_dirty_nats(sbi) ||
+ excess_dirty_nodes(sbi) ||
f2fs_time_over(sbi, CP_TIME)) {
if (test_opt(sbi, DATA_FLUSH)) {
struct blk_plug plug;
+ mutex_lock(&sbi->flush_lock);
+
blk_start_plug(&plug);
- sync_dirty_inodes(sbi, FILE_INODE);
+ f2fs_sync_dirty_inodes(sbi, FILE_INODE);
blk_finish_plug(&plug);
+
+ mutex_unlock(&sbi->flush_lock);
}
f2fs_sync_fs(sbi->sb, true);
stat_inc_bg_cp_count(sbi->stat_info);
@@ -518,9 +549,13 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
static int __submit_flush_wait(struct f2fs_sb_info *sbi,
struct block_device *bdev)
{
- struct bio *bio = f2fs_bio_alloc(sbi, 0, true);
+ struct bio *bio;
int ret;
+ bio = f2fs_bio_alloc(sbi, 0, false);
+ if (!bio)
+ return -ENOMEM;
+
bio->bi_rw = REQ_OP_WRITE;
bio->bi_bdev = bdev;
ret = submit_bio_wait(WRITE_FLUSH, bio);
@@ -536,11 +571,11 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
int ret = 0;
int i;
- if (!sbi->s_ndevs)
+ if (!f2fs_is_multi_device(sbi))
return __submit_flush_wait(sbi, sbi->sb->s_bdev);
for (i = 0; i < sbi->s_ndevs; i++) {
- if (!is_dirty_device(sbi, ino, i, FLUSH_INO))
+ if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
continue;
ret = __submit_flush_wait(sbi, FDEV(i).bdev);
if (ret)
@@ -597,14 +632,17 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
return 0;
if (!test_opt(sbi, FLUSH_MERGE)) {
+ atomic_inc(&fcc->queued_flush);
ret = submit_flush_wait(sbi, ino);
+ atomic_dec(&fcc->queued_flush);
atomic_inc(&fcc->issued_flush);
return ret;
}
- if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) {
+ if (atomic_inc_return(&fcc->queued_flush) == 1 ||
+ f2fs_is_multi_device(sbi)) {
ret = submit_flush_wait(sbi, ino);
- atomic_dec(&fcc->issing_flush);
+ atomic_dec(&fcc->queued_flush);
atomic_inc(&fcc->issued_flush);
return ret;
@@ -623,14 +661,14 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
if (fcc->f2fs_issue_flush) {
wait_for_completion(&cmd.wait);
- atomic_dec(&fcc->issing_flush);
+ atomic_dec(&fcc->queued_flush);
} else {
struct llist_node *list;
list = llist_del_all(&fcc->issue_list);
if (!list) {
wait_for_completion(&cmd.wait);
- atomic_dec(&fcc->issing_flush);
+ atomic_dec(&fcc->queued_flush);
} else {
struct flush_cmd *tmp, *next;
@@ -639,7 +677,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
llist_for_each_entry_safe(tmp, next, list, llnode) {
if (tmp == &cmd) {
cmd.ret = ret;
- atomic_dec(&fcc->issing_flush);
+ atomic_dec(&fcc->queued_flush);
continue;
}
tmp->ret = ret;
@@ -651,7 +689,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
return cmd.ret;
}
-int create_flush_cmd_control(struct f2fs_sb_info *sbi)
+int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
{
dev_t dev = sbi->sb->s_bdev->bd_dev;
struct flush_cmd_control *fcc;
@@ -668,7 +706,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
if (!fcc)
return -ENOMEM;
atomic_set(&fcc->issued_flush, 0);
- atomic_set(&fcc->issing_flush, 0);
+ atomic_set(&fcc->queued_flush, 0);
init_waitqueue_head(&fcc->flush_wait_queue);
init_llist_head(&fcc->issue_list);
SM_I(sbi)->fcc_info = fcc;
@@ -680,7 +718,7 @@ init_thread:
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(fcc->f2fs_issue_flush)) {
err = PTR_ERR(fcc->f2fs_issue_flush);
- kfree(fcc);
+ kvfree(fcc);
SM_I(sbi)->fcc_info = NULL;
return err;
}
@@ -688,7 +726,7 @@ init_thread:
return err;
}
-void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
+void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
{
struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
@@ -699,7 +737,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
kthread_stop(flush_thread);
}
if (free) {
- kfree(fcc);
+ kvfree(fcc);
SM_I(sbi)->fcc_info = NULL;
}
}
@@ -708,7 +746,7 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
{
int ret = 0, i;
- if (!sbi->s_ndevs)
+ if (!f2fs_is_multi_device(sbi))
return 0;
for (i = 1; i < sbi->s_ndevs; i++) {
@@ -780,7 +818,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- unsigned short valid_blocks;
+ unsigned short valid_blocks, ckpt_valid_blocks;
if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
return;
@@ -788,8 +826,10 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
mutex_lock(&dirty_i->seglist_lock);
valid_blocks = get_valid_blocks(sbi, segno, false);
+ ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
- if (valid_blocks == 0) {
+ if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
+ ckpt_valid_blocks == sbi->blocks_per_seg)) {
__locate_dirty_segment(sbi, segno, PRE);
__remove_dirty_segment(sbi, segno, DIRTY);
} else if (valid_blocks < sbi->blocks_per_seg) {
@@ -802,6 +842,82 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
mutex_unlock(&dirty_i->seglist_lock);
}
+/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
+void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
+{
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ unsigned int segno;
+
+ mutex_lock(&dirty_i->seglist_lock);
+ for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+ if (get_valid_blocks(sbi, segno, false))
+ continue;
+ if (IS_CURSEG(sbi, segno))
+ continue;
+ __locate_dirty_segment(sbi, segno, PRE);
+ __remove_dirty_segment(sbi, segno, DIRTY);
+ }
+ mutex_unlock(&dirty_i->seglist_lock);
+}
+
+block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
+{
+ int ovp_hole_segs =
+ (overprovision_segments(sbi) - reserved_segments(sbi));
+ block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg;
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ block_t holes[2] = {0, 0}; /* DATA and NODE */
+ block_t unusable;
+ struct seg_entry *se;
+ unsigned int segno;
+
+ mutex_lock(&dirty_i->seglist_lock);
+ for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+ se = get_seg_entry(sbi, segno);
+ if (IS_NODESEG(se->type))
+ holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
+ else
+ holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
+ }
+ mutex_unlock(&dirty_i->seglist_lock);
+
+ unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE];
+ if (unusable > ovp_holes)
+ return unusable - ovp_holes;
+ return 0;
+}
+
+int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
+{
+ int ovp_hole_segs =
+ (overprovision_segments(sbi) - reserved_segments(sbi));
+ if (unusable > F2FS_OPTION(sbi).unusable_cap)
+ return -EAGAIN;
+ if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
+ dirty_segments(sbi) > ovp_hole_segs)
+ return -EAGAIN;
+ return 0;
+}
+
+/* This is only used by SBI_CP_DISABLED */
+static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
+{
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ unsigned int segno = 0;
+
+ mutex_lock(&dirty_i->seglist_lock);
+ for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+ if (get_valid_blocks(sbi, segno, false))
+ continue;
+ if (get_ckpt_valid_blocks(sbi, segno))
+ continue;
+ mutex_unlock(&dirty_i->seglist_lock);
+ return segno;
+ }
+ mutex_unlock(&dirty_i->seglist_lock);
+ return NULL_SEGNO;
+}
+
static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t lstart,
block_t start, block_t len)
@@ -822,9 +938,12 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
dc->len = len;
dc->ref = 0;
dc->state = D_PREP;
+ dc->queued = 0;
dc->error = 0;
init_completion(&dc->wait);
list_add_tail(&dc->list, pend_list);
+ spin_lock_init(&dc->lock);
+ dc->bio_ref = 0;
atomic_inc(&dcc->discard_cmd_cnt);
dcc->undiscard_blks += len;
@@ -851,7 +970,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc,
struct discard_cmd *dc)
{
if (dc->state == D_DONE)
- atomic_dec(&dcc->issing_discard);
+ atomic_sub(dc->queued, &dcc->queued_discard);
list_del(&dc->list);
rb_erase(&dc->rb_node, &dcc->root);
@@ -866,28 +985,43 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_cmd *dc)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ unsigned long flags;
trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
+ spin_lock_irqsave(&dc->lock, flags);
+ if (dc->bio_ref) {
+ spin_unlock_irqrestore(&dc->lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&dc->lock, flags);
+
f2fs_bug_on(sbi, dc->ref);
if (dc->error == -EOPNOTSUPP)
dc->error = 0;
if (dc->error)
- f2fs_msg(sbi->sb, KERN_INFO,
- "Issue discard(%u, %u, %u) failed, ret: %d",
- dc->lstart, dc->start, dc->len, dc->error);
+ printk_ratelimited(
+ "%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d",
+ KERN_INFO, dc->lstart, dc->start, dc->len, dc->error);
__detach_discard_cmd(dcc, dc);
}
static void f2fs_submit_discard_endio(struct bio *bio)
{
struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
+ unsigned long flags;
dc->error = bio->bi_error;
- dc->state = D_DONE;
- complete_all(&dc->wait);
+
+ spin_lock_irqsave(&dc->lock, flags);
+ dc->bio_ref--;
+ if (!dc->bio_ref && dc->state == D_SUBMIT) {
+ dc->state = D_DONE;
+ complete_all(&dc->wait);
+ }
+ spin_unlock_irqrestore(&dc->lock, flags);
bio_put(bio);
}
@@ -1006,69 +1140,147 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
/* common policy */
dpolicy->type = discard_type;
dpolicy->sync = true;
+ dpolicy->ordered = false;
dpolicy->granularity = granularity;
dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
dpolicy->io_aware_gran = MAX_PLIST_NUM;
+ dpolicy->timeout = 0;
if (discard_type == DPOLICY_BG) {
dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+ dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
dpolicy->io_aware = true;
dpolicy->sync = false;
+ dpolicy->ordered = true;
if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
dpolicy->granularity = 1;
dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
}
} else if (discard_type == DPOLICY_FORCE) {
dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+ dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
dpolicy->io_aware = false;
} else if (discard_type == DPOLICY_FSTRIM) {
dpolicy->io_aware = false;
} else if (discard_type == DPOLICY_UMOUNT) {
+ dpolicy->max_requests = UINT_MAX;
dpolicy->io_aware = false;
+ /* we need to issue all to keep CP_TRIMMED_FLAG */
+ dpolicy->granularity = 1;
}
}
-
+static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
+ struct block_device *bdev, block_t lstart,
+ block_t start, block_t len);
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
-static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
+static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
- struct discard_cmd *dc)
+ struct discard_cmd *dc,
+ unsigned int *issued)
{
+ struct block_device *bdev = dc->bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
+ unsigned int max_discard_blocks =
+ SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
&(dcc->fstrim_list) : &(dcc->wait_list);
- struct bio *bio = NULL;
int flag = dpolicy->sync ? REQ_SYNC : 0;
+ block_t lstart, start, len, total_len;
+ int err = 0;
if (dc->state != D_PREP)
- return;
+ return 0;
- trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len);
+ if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+ return 0;
- dc->error = __blkdev_issue_discard(dc->bdev,
- SECTOR_FROM_BLOCK(dc->start),
- SECTOR_FROM_BLOCK(dc->len),
- GFP_NOFS, 0, &bio);
- if (!dc->error) {
- /* should keep before submission to avoid D_DONE right away */
- dc->state = D_SUBMIT;
- atomic_inc(&dcc->issued_discard);
- atomic_inc(&dcc->issing_discard);
- if (bio) {
- bio->bi_private = dc;
- bio->bi_end_io = f2fs_submit_discard_endio;
- submit_bio(flag, bio);
- list_move_tail(&dc->list, wait_list);
- __check_sit_bitmap(sbi, dc->start, dc->start + dc->len);
+ trace_f2fs_issue_discard(bdev, dc->start, dc->len);
+
+ lstart = dc->lstart;
+ start = dc->start;
+ len = dc->len;
+ total_len = len;
+
+ dc->len = 0;
- f2fs_update_iostat(sbi, FS_DISCARD, 1);
+ while (total_len && *issued < dpolicy->max_requests && !err) {
+ struct bio *bio = NULL;
+ unsigned long flags;
+ bool last = true;
+
+ if (len > max_discard_blocks) {
+ len = max_discard_blocks;
+ last = false;
}
- } else {
- __remove_discard_cmd(sbi, dc);
+
+ (*issued)++;
+ if (*issued == dpolicy->max_requests)
+ last = true;
+
+ dc->len += len;
+
+ if (time_to_inject(sbi, FAULT_DISCARD)) {
+ f2fs_show_injection_info(FAULT_DISCARD);
+ err = -EIO;
+ goto submit;
+ }
+ err = __blkdev_issue_discard(bdev,
+ SECTOR_FROM_BLOCK(start),
+ SECTOR_FROM_BLOCK(len),
+ GFP_NOFS, 0, &bio);
+submit:
+ if (err) {
+ spin_lock_irqsave(&dc->lock, flags);
+ if (dc->state == D_PARTIAL)
+ dc->state = D_SUBMIT;
+ spin_unlock_irqrestore(&dc->lock, flags);
+
+ break;
+ }
+
+ f2fs_bug_on(sbi, !bio);
+
+ /*
+ * should keep before submission to avoid D_DONE
+ * right away
+ */
+ spin_lock_irqsave(&dc->lock, flags);
+ if (last)
+ dc->state = D_SUBMIT;
+ else
+ dc->state = D_PARTIAL;
+ dc->bio_ref++;
+ spin_unlock_irqrestore(&dc->lock, flags);
+
+ atomic_inc(&dcc->queued_discard);
+ dc->queued++;
+ list_move_tail(&dc->list, wait_list);
+
+ /* sanity check on discard range */
+ __check_sit_bitmap(sbi, lstart, lstart + len);
+
+ bio->bi_private = dc;
+ bio->bi_end_io = f2fs_submit_discard_endio;
+ submit_bio(flag, bio);
+
+ atomic_inc(&dcc->issued_discard);
+
+ f2fs_update_iostat(sbi, FS_DISCARD, 1);
+
+ lstart += len;
+ start += len;
+ total_len -= len;
+ len = total_len;
}
+
+ if (!err && len)
+ __update_discard_tree_range(sbi, bdev, lstart, start, len);
+ return err;
}
static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
@@ -1088,7 +1300,7 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
goto do_insert;
}
- p = __lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart);
+ p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart);
do_insert:
dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p);
if (!dc)
@@ -1149,11 +1361,12 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
struct discard_cmd *dc;
struct discard_info di = {0};
struct rb_node **insert_p = NULL, *insert_parent = NULL;
+ struct request_queue *q = bdev_get_queue(bdev);
+ unsigned int max_discard_blocks =
+ SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
block_t end = lstart + len;
- mutex_lock(&dcc->cmd_lock);
-
- dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
+ dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
NULL, lstart,
(struct rb_entry **)&prev_dc,
(struct rb_entry **)&next_dc,
@@ -1192,7 +1405,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
if (prev_dc && prev_dc->state == D_PREP &&
prev_dc->bdev == bdev &&
- __is_discard_back_mergeable(&di, &prev_dc->di)) {
+ __is_discard_back_mergeable(&di, &prev_dc->di,
+ max_discard_blocks)) {
prev_dc->di.len += di.len;
dcc->undiscard_blks += di.len;
__relocate_discard_cmd(dcc, prev_dc);
@@ -1203,7 +1417,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
if (next_dc && next_dc->state == D_PREP &&
next_dc->bdev == bdev &&
- __is_discard_front_mergeable(&di, &next_dc->di)) {
+ __is_discard_front_mergeable(&di, &next_dc->di,
+ max_discard_blocks)) {
next_dc->di.lstart = di.lstart;
next_dc->di.len += di.len;
next_dc->di.start = di.start;
@@ -1226,8 +1441,6 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
node = rb_next(&prev_dc->rb_node);
next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
}
-
- mutex_unlock(&dcc->cmd_lock);
}
static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
@@ -1235,17 +1448,82 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
{
block_t lblkstart = blkstart;
+ if (!f2fs_bdev_support_discard(bdev))
+ return 0;
+
trace_f2fs_queue_discard(bdev, blkstart, blklen);
- if (sbi->s_ndevs) {
+ if (f2fs_is_multi_device(sbi)) {
int devi = f2fs_target_device_index(sbi, blkstart);
blkstart -= FDEV(devi).start_blk;
}
+ mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
__update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
+ mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
return 0;
}
+static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
+ struct discard_policy *dpolicy)
+{
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
+ struct rb_node **insert_p = NULL, *insert_parent = NULL;
+ struct discard_cmd *dc;
+ struct blk_plug plug;
+ unsigned int pos = dcc->next_pos;
+ unsigned int issued = 0;
+ bool io_interrupted = false;
+
+ mutex_lock(&dcc->cmd_lock);
+ dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
+ NULL, pos,
+ (struct rb_entry **)&prev_dc,
+ (struct rb_entry **)&next_dc,
+ &insert_p, &insert_parent, true);
+ if (!dc)
+ dc = next_dc;
+
+ blk_start_plug(&plug);
+
+ while (dc) {
+ struct rb_node *node;
+ int err = 0;
+
+ if (dc->state != D_PREP)
+ goto next;
+
+ if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
+ io_interrupted = true;
+ break;
+ }
+
+ dcc->next_pos = dc->lstart + dc->len;
+ err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
+
+ if (issued >= dpolicy->max_requests)
+ break;
+next:
+ node = rb_next(&dc->rb_node);
+ if (err)
+ __remove_discard_cmd(sbi, dc);
+ dc = rb_entry_safe(node, struct discard_cmd, rb_node);
+ }
+
+ blk_finish_plug(&plug);
+
+ if (!dc)
+ dcc->next_pos = 0;
+
+ mutex_unlock(&dcc->cmd_lock);
+
+ if (!issued && io_interrupted)
+ issued = -1;
+
+ return issued;
+}
+
static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
{
@@ -1253,39 +1531,55 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct list_head *pend_list;
struct discard_cmd *dc, *tmp;
struct blk_plug plug;
- int i, iter = 0, issued = 0;
+ int i, issued = 0;
bool io_interrupted = false;
+ if (dpolicy->timeout != 0)
+ f2fs_update_time(sbi, dpolicy->timeout);
+
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
+ if (dpolicy->timeout != 0 &&
+ f2fs_time_over(sbi, dpolicy->timeout))
+ break;
+
if (i + 1 < dpolicy->granularity)
break;
+
+ if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
+ return __issue_discard_cmd_orderly(sbi, dpolicy);
+
pend_list = &dcc->pend_list[i];
mutex_lock(&dcc->cmd_lock);
if (list_empty(pend_list))
goto next;
- f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
+ if (unlikely(dcc->rbtree_check))
+ f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
+ &dcc->root));
blk_start_plug(&plug);
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
+ if (dpolicy->timeout != 0 &&
+ f2fs_time_over(sbi, dpolicy->timeout))
+ break;
+
if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
- !is_idle(sbi)) {
+ !is_idle(sbi, DISCARD_TIME)) {
io_interrupted = true;
- goto skip;
+ break;
}
- __submit_discard_cmd(sbi, dpolicy, dc);
- issued++;
-skip:
- if (++iter >= dpolicy->max_requests)
+ __submit_discard_cmd(sbi, dpolicy, dc, &issued);
+
+ if (issued >= dpolicy->max_requests)
break;
}
blk_finish_plug(&plug);
next:
mutex_unlock(&dcc->cmd_lock);
- if (iter >= dpolicy->max_requests)
+ if (issued >= dpolicy->max_requests || io_interrupted)
break;
}
@@ -1317,7 +1611,7 @@ static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
return dropped;
}
-void drop_discard_cmd(struct f2fs_sb_info *sbi)
+void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
{
__drop_discard_cmd(sbi);
}
@@ -1383,21 +1677,22 @@ next:
return trimmed;
}
-static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
+static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
{
struct discard_policy dp;
+ unsigned int discard_blks;
- if (dpolicy) {
- __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
- return;
- }
+ if (dpolicy)
+ return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
/* wait all */
__init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
- __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
+ discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
__init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
- __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
+ discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
+
+ return discard_blks;
}
/* This should be covered by global mutex, &sit_i->sentry_lock */
@@ -1408,7 +1703,8 @@ static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
bool need_wait = false;
mutex_lock(&dcc->cmd_lock);
- dc = (struct discard_cmd *)__lookup_rb_tree(&dcc->root, NULL, blkaddr);
+ dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
+ NULL, blkaddr);
if (dc) {
if (dc->state == D_PREP) {
__punch_discard_cmd(sbi, dc, blkaddr);
@@ -1423,7 +1719,7 @@ static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
__wait_one_discard_bio(sbi, dc);
}
-void stop_discard_thread(struct f2fs_sb_info *sbi)
+void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
@@ -1436,7 +1732,7 @@ void stop_discard_thread(struct f2fs_sb_info *sbi)
}
/* This comes from f2fs_put_super */
-bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
+bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct discard_policy dpolicy;
@@ -1444,11 +1740,14 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
__init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
dcc->discard_granularity);
+ dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT;
__issue_discard_cmd(sbi, &dpolicy);
dropped = __drop_discard_cmd(sbi);
/* just to make sure there is no pending discard commands */
__wait_all_discard_cmd(sbi, NULL);
+
+ f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
return dropped;
}
@@ -1471,25 +1770,38 @@ static int issue_discard_thread(void *data)
kthread_should_stop() || freezing(current) ||
dcc->discard_wake,
msecs_to_jiffies(wait_ms));
+
+ if (dcc->discard_wake)
+ dcc->discard_wake = 0;
+
+ /* clean up pending candidates before going to sleep */
+ if (atomic_read(&dcc->queued_discard))
+ __wait_all_discard_cmd(sbi, NULL);
+
if (try_to_freeze())
continue;
if (f2fs_readonly(sbi->sb))
continue;
if (kthread_should_stop())
return 0;
+ if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
+ wait_ms = dpolicy.max_interval;
+ continue;
+ }
- if (dcc->discard_wake)
- dcc->discard_wake = 0;
-
- if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
+ if (sbi->gc_mode == GC_URGENT)
__init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
sb_start_intwrite(sbi->sb);
issued = __issue_discard_cmd(sbi, &dpolicy);
- if (issued) {
+ if (issued > 0) {
__wait_all_discard_cmd(sbi, &dpolicy);
wait_ms = dpolicy.min_interval;
+ } else if (issued == -1){
+ wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
+ if (!wait_ms)
+ wait_ms = dpolicy.mid_interval;
} else {
wait_ms = dpolicy.max_interval;
}
@@ -1508,42 +1820,34 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
block_t lblkstart = blkstart;
int devi = 0;
- if (sbi->s_ndevs) {
+ if (f2fs_is_multi_device(sbi)) {
devi = f2fs_target_device_index(sbi, blkstart);
+ if (blkstart < FDEV(devi).start_blk ||
+ blkstart > FDEV(devi).end_blk) {
+ f2fs_err(sbi, "Invalid block %x", blkstart);
+ return -EIO;
+ }
blkstart -= FDEV(devi).start_blk;
}
- /*
- * We need to know the type of the zone: for conventional zones,
- * use regular discard if the drive supports it. For sequential
- * zones, reset the zone write pointer.
- */
- switch (get_blkz_type(sbi, bdev, blkstart)) {
-
- case BLK_ZONE_TYPE_CONVENTIONAL:
- if (!blk_queue_discard(bdev_get_queue(bdev)))
- return 0;
- return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
- case BLK_ZONE_TYPE_SEQWRITE_REQ:
- case BLK_ZONE_TYPE_SEQWRITE_PREF:
+ /* For sequential zones, reset the zone write pointer */
+ if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
sector = SECTOR_FROM_BLOCK(blkstart);
nr_sects = SECTOR_FROM_BLOCK(blklen);
if (sector & (bdev_zone_sectors(bdev) - 1) ||
nr_sects != bdev_zone_sectors(bdev)) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "(%d) %s: Unaligned discard attempted (block %x + %x)",
- devi, sbi->s_ndevs ? FDEV(devi).path: "",
- blkstart, blklen);
+ f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
+ devi, sbi->s_ndevs ? FDEV(devi).path : "",
+ blkstart, blklen);
return -EIO;
}
trace_f2fs_issue_reset_zone(bdev, blkstart);
- return blkdev_reset_zones(bdev, sector,
- nr_sects, GFP_NOFS);
- default:
- /* Unknown zone type: broken device ? */
- return -EIO;
+ return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS);
}
+
+ /* For conventional zones, use regular discard if supported */
+ return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
}
#endif
@@ -1551,8 +1855,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkstart, block_t blklen)
{
#ifdef CONFIG_BLK_DEV_ZONED
- if (f2fs_sb_has_blkzoned(sbi->sb) &&
- bdev_zoned_model(bdev) != BLK_ZONED_NONE)
+ if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
#endif
return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
@@ -1614,11 +1917,11 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
int i;
- if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi))
+ if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi))
return false;
if (!force) {
- if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
+ if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
SM_I(sbi)->dcc_info->nr_discards >=
SM_I(sbi)->dcc_info->max_discards)
return false;
@@ -1658,20 +1961,24 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
return false;
}
-void release_discard_addrs(struct f2fs_sb_info *sbi)
+static void release_discard_addr(struct discard_entry *entry)
+{
+ list_del(&entry->list);
+ kmem_cache_free(discard_entry_slab, entry);
+}
+
+void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
{
struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
struct discard_entry *entry, *this;
/* drop caches */
- list_for_each_entry_safe(entry, this, head, list) {
- list_del(&entry->list);
- kmem_cache_free(discard_entry_slab, entry);
- }
+ list_for_each_entry_safe(entry, this, head, list)
+ release_discard_addr(entry);
}
/*
- * Should call clear_prefree_segments after checkpoint is done.
+ * Should call f2fs_clear_prefree_segments after checkpoint is done.
*/
static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
{
@@ -1684,7 +1991,8 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
mutex_unlock(&dirty_i->seglist_lock);
}
-void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
+ struct cp_control *cpc)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct list_head *head = &dcc->entry_list;
@@ -1694,30 +2002,39 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
unsigned int start = 0, end = -1;
unsigned int secno, start_segno;
bool force = (cpc->reason & CP_DISCARD);
+ bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
mutex_lock(&dirty_i->seglist_lock);
while (1) {
int i;
+
+ if (need_align && end != -1)
+ end--;
start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
if (start >= MAIN_SEGS(sbi))
break;
end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
start + 1);
- for (i = start; i < end; i++)
- clear_bit(i, prefree_map);
+ if (need_align) {
+ start = rounddown(start, sbi->segs_per_sec);
+ end = roundup(end, sbi->segs_per_sec);
+ }
- dirty_i->nr_dirty[PRE] -= end - start;
+ for (i = start; i < end; i++) {
+ if (test_and_clear_bit(i, prefree_map))
+ dirty_i->nr_dirty[PRE]--;
+ }
- if (!test_opt(sbi, DISCARD))
+ if (!f2fs_realtime_discard_enable(sbi))
continue;
if (force && start >= cpc->trim_start &&
(end - 1) <= cpc->trim_end)
continue;
- if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) {
+ if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) {
f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
(end - start) << sbi->log_blocks_per_seg);
continue;
@@ -1749,7 +2066,7 @@ find_next:
sbi->blocks_per_seg, cur_pos);
len = next_pos - cur_pos;
- if (f2fs_sb_has_blkzoned(sbi->sb) ||
+ if (f2fs_sb_has_blkzoned(sbi) ||
(force && len < cpc->trim_minlen))
goto skip;
@@ -1767,9 +2084,8 @@ skip:
if (cur_pos < sbi->blocks_per_seg)
goto find_next;
- list_del(&entry->list);
+ release_discard_addr(entry);
dcc->nr_discards -= total_len;
- kmem_cache_free(discard_entry_slab, entry);
}
wake_up_discard_thread(sbi, false);
@@ -1798,12 +2114,14 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&dcc->fstrim_list);
mutex_init(&dcc->cmd_lock);
atomic_set(&dcc->issued_discard, 0);
- atomic_set(&dcc->issing_discard, 0);
+ atomic_set(&dcc->queued_discard, 0);
atomic_set(&dcc->discard_cmd_cnt, 0);
dcc->nr_discards = 0;
dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
dcc->undiscard_blks = 0;
+ dcc->next_pos = 0;
dcc->root = RB_ROOT;
+ dcc->rbtree_check = false;
init_waitqueue_head(&dcc->discard_wait_queue);
SM_I(sbi)->dcc_info = dcc;
@@ -1812,7 +2130,7 @@ init_thread:
"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(dcc->f2fs_issue_discard)) {
err = PTR_ERR(dcc->f2fs_issue_discard);
- kfree(dcc);
+ kvfree(dcc);
SM_I(sbi)->dcc_info = NULL;
return err;
}
@@ -1827,9 +2145,9 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
if (!dcc)
return;
- stop_discard_thread(sbi);
+ f2fs_stop_discard_thread(sbi);
- kfree(dcc);
+ kvfree(dcc);
SM_I(sbi)->dcc_info = NULL;
}
@@ -1874,8 +2192,9 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
(new_vblocks > sbi->blocks_per_seg)));
se->valid_blocks = new_vblocks;
- se->mtime = get_mtime(sbi);
- SIT_I(sbi)->max_mtime = se->mtime;
+ se->mtime = get_mtime(sbi, false);
+ if (se->mtime > SIT_I(sbi)->max_mtime)
+ SIT_I(sbi)->max_mtime = se->mtime;
/* Update valid block bitmap */
if (del > 0) {
@@ -1884,26 +2203,25 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
mir_exist = f2fs_test_and_set_bit(offset,
se->cur_valid_map_mir);
if (unlikely(exist != mir_exist)) {
- f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
- "when setting bitmap, blk:%u, old bit:%d",
- blkaddr, exist);
+ f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
+ blkaddr, exist);
f2fs_bug_on(sbi, 1);
}
#endif
if (unlikely(exist)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Bitmap was wrongly set, blk:%u", blkaddr);
+ f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
+ blkaddr);
f2fs_bug_on(sbi, 1);
se->valid_blocks--;
del = 0;
}
- if (f2fs_discard_en(sbi) &&
- !f2fs_test_and_set_bit(offset, se->discard_map))
+ if (!f2fs_test_and_set_bit(offset, se->discard_map))
sbi->discard_blks--;
/* don't overwrite by SSR to keep node chain */
- if (IS_NODESEG(se->type)) {
+ if (IS_NODESEG(se->type) &&
+ !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
se->ckpt_valid_blocks++;
}
@@ -1913,22 +2231,32 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
mir_exist = f2fs_test_and_clear_bit(offset,
se->cur_valid_map_mir);
if (unlikely(exist != mir_exist)) {
- f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
- "when clearing bitmap, blk:%u, old bit:%d",
- blkaddr, exist);
+ f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
+ blkaddr, exist);
f2fs_bug_on(sbi, 1);
}
#endif
if (unlikely(!exist)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Bitmap was wrongly cleared, blk:%u", blkaddr);
+ f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
+ blkaddr);
f2fs_bug_on(sbi, 1);
se->valid_blocks++;
del = 0;
+ } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ /*
+ * If checkpoints are off, we must not reuse data that
+ * was used in the previous checkpoint. If it was used
+ * before, we must track that to know how much space we
+ * really have.
+ */
+ if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
+ spin_lock(&sbi->stat_lock);
+ sbi->unusable_block_count++;
+ spin_unlock(&sbi->stat_lock);
+ }
}
- if (f2fs_discard_en(sbi) &&
- f2fs_test_and_clear_bit(offset, se->discard_map))
+ if (f2fs_test_and_clear_bit(offset, se->discard_map))
sbi->discard_blks++;
}
if (!f2fs_test_bit(offset, se->ckpt_valid_map))
@@ -1939,11 +2267,11 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
/* update total number of valid blocks to be written in ckpt area */
SIT_I(sbi)->written_valid_blocks += del;
- if (sbi->segs_per_sec > 1)
+ if (__is_large_section(sbi))
get_sec_entry(sbi, segno)->valid_blocks += del;
}
-void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
+void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
{
unsigned int segno = GET_SEGNO(sbi, addr);
struct sit_info *sit_i = SIT_I(sbi);
@@ -1952,6 +2280,8 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
if (addr == NEW_ADDR)
return;
+ invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
+
/* add it into sit main buffer */
down_write(&sit_i->sentry_lock);
@@ -1963,14 +2293,14 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
up_write(&sit_i->sentry_lock);
}
-bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
+bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
{
struct sit_info *sit_i = SIT_I(sbi);
unsigned int segno, offset;
struct seg_entry *se;
bool is_cp = false;
- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
+ if (!__is_valid_data_blkaddr(blkaddr))
return true;
down_read(&sit_i->sentry_lock);
@@ -2002,7 +2332,7 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
/*
* Calculate the number of current summary pages for writing
*/
-int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
+int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
{
int valid_sum_count = 0;
int i, sum_in_page;
@@ -2032,14 +2362,15 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
/*
* Caller should put this summary page
*/
-struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
+struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
{
- return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
+ return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
}
-void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr)
+void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
+ void *src, block_t blk_addr)
{
- struct page *page = grab_meta_page(sbi, blk_addr);
+ struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
memcpy(page_address(page), src, PAGE_SIZE);
set_page_dirty(page);
@@ -2049,18 +2380,19 @@ void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr)
static void write_sum_page(struct f2fs_sb_info *sbi,
struct f2fs_summary_block *sum_blk, block_t blk_addr)
{
- update_meta_page(sbi, (void *)sum_blk, blk_addr);
+ f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
}
static void write_current_sum_page(struct f2fs_sb_info *sbi,
int type, block_t blk_addr)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
- struct page *page = grab_meta_page(sbi, blk_addr);
+ struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
struct f2fs_summary_block *src = curseg->sum_blk;
struct f2fs_summary_block *dst;
dst = (struct f2fs_summary_block *)page_address(page);
+ memset(dst, 0, PAGE_SIZE);
mutex_lock(&curseg->curseg_mutex);
@@ -2201,9 +2533,12 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
{
/* if segs_per_sec is large than 1, we need to keep original policy. */
- if (sbi->segs_per_sec != 1)
+ if (__is_large_section(sbi))
return CURSEG_I(sbi, type)->segno;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+ return 0;
+
if (test_opt(sbi, NOHEAP) &&
(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
return 0;
@@ -2300,7 +2635,8 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
curseg->alloc_type = SSR;
__next_free_blkoff(sbi, curseg, 0);
- sum_page = get_sum_page(sbi, new_segno);
+ sum_page = f2fs_get_sum_page(sbi, new_segno);
+ f2fs_bug_on(sbi, IS_ERR(sum_page));
sum_node = (struct f2fs_summary_block *)page_address(sum_page);
memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
f2fs_put_page(sum_page, 1);
@@ -2314,7 +2650,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
int i, cnt;
bool reversed = false;
- /* need_SSR() already forces to do this */
+ /* f2fs_need_SSR() already forces to do this */
if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
curseg->next_segno = segno;
return 1;
@@ -2347,6 +2683,15 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
return 1;
}
}
+
+ /* find valid_blocks=0 in dirty list */
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ segno = get_free_segment(sbi);
+ if (segno != NULL_SEGNO) {
+ curseg->next_segno = segno;
+ return 1;
+ }
+ }
return 0;
}
@@ -2364,9 +2709,10 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
type == CURSEG_WARM_NODE)
new_curseg(sbi, type, false);
- else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
+ else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
+ likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
new_curseg(sbi, type, false);
- else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
+ else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
change_curseg(sbi, type);
else
new_curseg(sbi, type, false);
@@ -2374,7 +2720,40 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
stat_inc_seg_type(sbi, curseg);
}
-void allocate_new_segments(struct f2fs_sb_info *sbi)
+void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+ unsigned int start, unsigned int end)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned int segno;
+
+ down_read(&SM_I(sbi)->curseg_lock);
+ mutex_lock(&curseg->curseg_mutex);
+ down_write(&SIT_I(sbi)->sentry_lock);
+
+ segno = CURSEG_I(sbi, type)->segno;
+ if (segno < start || segno > end)
+ goto unlock;
+
+ if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
+ change_curseg(sbi, type);
+ else
+ new_curseg(sbi, type, true);
+
+ stat_inc_seg_type(sbi, curseg);
+
+ locate_dirty_segment(sbi, segno);
+unlock:
+ up_write(&SIT_I(sbi)->sentry_lock);
+
+ if (segno != curseg->segno)
+ f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
+ type, segno, curseg->segno);
+
+ mutex_unlock(&curseg->curseg_mutex);
+ up_read(&SM_I(sbi)->curseg_lock);
+}
+
+void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
{
struct curseg_info *curseg;
unsigned int old_segno;
@@ -2396,7 +2775,8 @@ static const struct segment_allocation default_salloc_ops = {
.allocate_segment = allocate_segment_by_default,
};
-bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
+ struct cp_control *cpc)
{
__u64 trim_start = cpc->trim_start;
bool has_candidate = false;
@@ -2414,7 +2794,7 @@ bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
return has_candidate;
}
-static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
+static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
unsigned int start, unsigned int end)
{
@@ -2424,14 +2804,17 @@ static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
struct discard_cmd *dc;
struct blk_plug plug;
int issued;
+ unsigned int trimmed = 0;
next:
issued = 0;
mutex_lock(&dcc->cmd_lock);
- f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
+ if (unlikely(dcc->rbtree_check))
+ f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
+ &dcc->root));
- dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
+ dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
NULL, start,
(struct rb_entry **)&prev_dc,
(struct rb_entry **)&next_dc,
@@ -2443,6 +2826,7 @@ next:
while (dc && dc->lstart <= end) {
struct rb_node *node;
+ int err = 0;
if (dc->len < dpolicy->granularity)
goto skip;
@@ -2452,19 +2836,24 @@ next:
goto skip;
}
- __submit_discard_cmd(sbi, dpolicy, dc);
+ err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
- if (++issued >= dpolicy->max_requests) {
+ if (issued >= dpolicy->max_requests) {
start = dc->lstart + dc->len;
+ if (err)
+ __remove_discard_cmd(sbi, dc);
+
blk_finish_plug(&plug);
mutex_unlock(&dcc->cmd_lock);
- __wait_all_discard_cmd(sbi, NULL);
+ trimmed += __wait_all_discard_cmd(sbi, NULL);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto next;
}
skip:
node = rb_next(&dc->rb_node);
+ if (err)
+ __remove_discard_cmd(sbi, dc);
dc = rb_entry_safe(node, struct discard_cmd, rb_node);
if (fatal_signal_pending(current))
@@ -2473,6 +2862,8 @@ skip:
blk_finish_plug(&plug);
mutex_unlock(&dcc->cmd_lock);
+
+ return trimmed;
}
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
@@ -2485,23 +2876,27 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
struct discard_policy dpolicy;
unsigned long long trimmed = 0;
int err = 0;
+ bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
return -EINVAL;
- if (end <= MAIN_BLKADDR(sbi))
+ if (end < MAIN_BLKADDR(sbi))
goto out;
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Found FS corruption, run fsck to fix.");
- goto out;
+ f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
+ return -EFSCORRUPTED;
}
/* start/end segment number in main_area */
start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
GET_SEGNO(sbi, end);
+ if (need_align) {
+ start_segno = rounddown(start_segno, sbi->segs_per_sec);
+ end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
+ }
cpc.reason = CP_DISCARD;
cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
@@ -2512,29 +2907,32 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
goto out;
mutex_lock(&sbi->gc_mutex);
- err = write_checkpoint(sbi, &cpc);
+ err = f2fs_write_checkpoint(sbi, &cpc);
mutex_unlock(&sbi->gc_mutex);
if (err)
goto out;
- start_block = START_BLOCK(sbi, start_segno);
- end_block = START_BLOCK(sbi, end_segno + 1);
-
- __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
- __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block);
-
/*
* We filed discard candidates, but actually we don't need to wait for
* all of them, since they'll be issued in idle time along with runtime
* discard option. User configuration looks like using runtime discard
* or periodic fstrim instead of it.
*/
- if (!test_opt(sbi, DISCARD)) {
- trimmed = __wait_discard_cmd_range(sbi, &dpolicy,
+ if (f2fs_realtime_discard_enable(sbi))
+ goto out;
+
+ start_block = START_BLOCK(sbi, start_segno);
+ end_block = START_BLOCK(sbi, end_segno + 1);
+
+ __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
+ trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
+ start_block, end_block);
+
+ trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
start_block, end_block);
- range->len = F2FS_BLK_TO_BYTES(trimmed);
- }
out:
+ if (!err)
+ range->len = F2FS_BLK_TO_BYTES(trimmed);
return err;
}
@@ -2546,7 +2944,7 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
return false;
}
-int rw_hint_to_seg_type(enum rw_hint hint)
+int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
{
switch (hint) {
case WRITE_LIFE_SHORT:
@@ -2619,7 +3017,7 @@ int rw_hint_to_seg_type(enum rw_hint hint)
* WRITE_LIFE_LONG " WRITE_LIFE_LONG
*/
-enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi,
+enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
enum page_type type, enum temp_type temp)
{
if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
@@ -2686,9 +3084,11 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
if (is_cold_data(fio->page) || file_is_cold(inode))
return CURSEG_COLD_DATA;
if (file_is_hot(inode) ||
- is_inode_flag_set(inode, FI_HOT_DATA))
+ is_inode_flag_set(inode, FI_HOT_DATA) ||
+ f2fs_is_atomic_file(inode) ||
+ f2fs_is_volatile_file(inode))
return CURSEG_HOT_DATA;
- /* rw_hint_to_seg_type(inode->i_write_hint); */
+ /* f2fs_rw_hint_to_seg_type(inode->i_write_hint); */
return CURSEG_WARM_DATA;
} else {
if (IS_DNODE(fio->page))
@@ -2725,7 +3125,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
return type;
}
-void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type,
struct f2fs_io_info *fio, bool add_list)
@@ -2785,6 +3185,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
INIT_LIST_HEAD(&fio->list);
fio->in_list = true;
+ fio->retry = false;
io = sbi->write_io[fio->type] + fio->temp;
spin_lock(&io->io_lock);
list_add_tail(&fio->list, &io->io_list);
@@ -2801,13 +3202,13 @@ static void update_device_state(struct f2fs_io_info *fio)
struct f2fs_sb_info *sbi = fio->sbi;
unsigned int devidx;
- if (!sbi->s_ndevs)
+ if (!f2fs_is_multi_device(sbi))
return;
devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
/* update device state for fsync */
- set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
+ f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
/* update device state for checkpoint */
if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
@@ -2820,23 +3221,31 @@ static void update_device_state(struct f2fs_io_info *fio)
static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
{
int type = __get_segment_type(fio);
- int err;
+ bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA);
+ if (keep_order)
+ down_read(&fio->sbi->io_order_lock);
reallocate:
- allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
+ f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
&fio->new_blkaddr, sum, type, fio, true);
+ if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
+ invalidate_mapping_pages(META_MAPPING(fio->sbi),
+ fio->old_blkaddr, fio->old_blkaddr);
/* writeout dirty page into bdev */
- err = f2fs_submit_page_write(fio);
- if (err == -EAGAIN) {
+ f2fs_submit_page_write(fio);
+ if (fio->retry) {
fio->old_blkaddr = fio->new_blkaddr;
goto reallocate;
- } else if (!err) {
- update_device_state(fio);
}
+
+ update_device_state(fio);
+
+ if (keep_order)
+ up_read(&fio->sbi->io_order_lock);
}
-void write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
+void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
enum iostat_type io_type)
{
struct f2fs_io_info fio = {
@@ -2859,10 +3268,11 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
ClearPageError(page);
f2fs_submit_page_write(&fio);
+ stat_inc_meta_count(sbi, page->index);
f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
}
-void write_node_page(unsigned int nid, struct f2fs_io_info *fio)
+void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
{
struct f2fs_summary sum;
@@ -2872,40 +3282,49 @@ void write_node_page(unsigned int nid, struct f2fs_io_info *fio)
f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
}
-void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio)
+void f2fs_outplace_write_data(struct dnode_of_data *dn,
+ struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = fio->sbi;
struct f2fs_summary sum;
- struct node_info ni;
f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
- get_node_info(sbi, dn->nid, &ni);
- set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
+ set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
do_write_page(&sum, fio);
f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE);
}
-int rewrite_data_page(struct f2fs_io_info *fio)
+int f2fs_inplace_write_data(struct f2fs_io_info *fio)
{
int err;
struct f2fs_sb_info *sbi = fio->sbi;
+ unsigned int segno;
fio->new_blkaddr = fio->old_blkaddr;
/* i/o temperature is needed for passing down write hints */
__get_segment_type(fio);
- f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi,
- GET_SEGNO(sbi, fio->new_blkaddr))->type));
+ segno = GET_SEGNO(sbi, fio->new_blkaddr);
+
+ if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
+ __func__, segno);
+ return -EFSCORRUPTED;
+ }
stat_inc_inplace_blocks(fio->sbi);
- err = f2fs_submit_page_bio(fio);
- if (!err)
+ if (fio->bio)
+ err = f2fs_merge_page_bio(fio);
+ else
+ err = f2fs_submit_page_bio(fio);
+ if (!err) {
update_device_state(fio);
-
- f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
+ f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
+ }
return err;
}
@@ -2922,7 +3341,7 @@ static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
return i;
}
-void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t old_blkaddr, block_t new_blkaddr,
bool recover_curseg, bool recover_newaddr)
{
@@ -2977,8 +3396,11 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (!recover_curseg || recover_newaddr)
update_sit_entry(sbi, new_blkaddr, 1);
- if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
+ if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
+ invalidate_mapping_pages(META_MAPPING(sbi),
+ old_blkaddr, old_blkaddr);
update_sit_entry(sbi, old_blkaddr, -1);
+ }
locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
@@ -3007,42 +3429,56 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
set_summary(&sum, dn->nid, dn->ofs_in_node, version);
- __f2fs_replace_block(sbi, &sum, old_addr, new_addr,
+ f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
recover_curseg, recover_newaddr);
f2fs_update_data_blkaddr(dn, new_addr);
}
void f2fs_wait_on_page_writeback(struct page *page,
- enum page_type type, bool ordered)
+ enum page_type type, bool ordered, bool locked)
{
if (PageWriteback(page)) {
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
- f2fs_submit_merged_write_cond(sbi, page->mapping->host,
- 0, page->index, type);
- if (ordered)
+ f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
+ if (ordered) {
wait_on_page_writeback(page);
- else
+ f2fs_bug_on(sbi, locked && PageWriteback(page));
+ } else {
wait_for_stable_page(page);
+ }
}
}
-void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
+void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct page *cpage;
- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
+ if (!f2fs_post_read_required(inode))
+ return;
+
+ if (!__is_valid_data_blkaddr(blkaddr))
return;
cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
if (cpage) {
- f2fs_wait_on_page_writeback(cpage, DATA, true);
+ f2fs_wait_on_page_writeback(cpage, DATA, true, true);
f2fs_put_page(cpage, 1);
}
}
-static void read_compacted_summaries(struct f2fs_sb_info *sbi)
+void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
+ block_t len)
+{
+ block_t i;
+
+ for (i = 0; i < len; i++)
+ f2fs_wait_on_block_writeback(inode, blkaddr + i);
+}
+
+static int read_compacted_summaries(struct f2fs_sb_info *sbi)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct curseg_info *seg_i;
@@ -3053,7 +3489,9 @@ static void read_compacted_summaries(struct f2fs_sb_info *sbi)
start = start_sum_block(sbi);
- page = get_meta_page(sbi, start++);
+ page = f2fs_get_meta_page(sbi, start++);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
kaddr = (unsigned char *)page_address(page);
/* Step 1: restore nat cache */
@@ -3093,12 +3531,15 @@ static void read_compacted_summaries(struct f2fs_sb_info *sbi)
f2fs_put_page(page, 1);
page = NULL;
- page = get_meta_page(sbi, start++);
+ page = f2fs_get_meta_page(sbi, start++);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
kaddr = (unsigned char *)page_address(page);
offset = 0;
}
}
f2fs_put_page(page, 1);
+ return 0;
}
static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
@@ -3110,6 +3551,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
unsigned short blk_off;
unsigned int segno = 0;
block_t blk_addr = 0;
+ int err = 0;
/* get segment number and block addr */
if (IS_DATASEG(type)) {
@@ -3132,7 +3574,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
blk_addr = GET_SUM_BLOCK(sbi, segno);
}
- new = get_meta_page(sbi, blk_addr);
+ new = f2fs_get_meta_page(sbi, blk_addr);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
sum = (struct f2fs_summary_block *)page_address(new);
if (IS_NODESEG(type)) {
@@ -3144,7 +3588,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
ns->ofs_in_node = 0;
}
} else {
- restore_node_summary(sbi, segno, sum);
+ err = f2fs_restore_node_summary(sbi, segno, sum);
+ if (err)
+ goto out;
}
}
@@ -3164,8 +3610,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
curseg->alloc_type = ckpt->alloc_type[type];
curseg->next_blkoff = blk_off;
mutex_unlock(&curseg->curseg_mutex);
+out:
f2fs_put_page(new, 1);
- return 0;
+ return err;
}
static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
@@ -3176,19 +3623,21 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
int err;
if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
- int npages = npages_for_summary_flush(sbi, true);
+ int npages = f2fs_npages_for_summary_flush(sbi, true);
if (npages >= 2)
- ra_meta_pages(sbi, start_sum_block(sbi), npages,
+ f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
META_CP, true);
/* restore for compacted data summary */
- read_compacted_summaries(sbi);
+ err = read_compacted_summaries(sbi);
+ if (err)
+ return err;
type = CURSEG_HOT_NODE;
}
if (__exist_node_summaries(sbi))
- ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
+ f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
NR_CURSEG_TYPE - type, META_CP, true);
for (; type <= CURSEG_COLD_NODE; type++) {
@@ -3199,8 +3648,11 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
/* sanity check for summary blocks */
if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
- sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES)
+ sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
+ f2fs_err(sbi, "invalid journal entries nats %u sits %u\n",
+ nats_in_cursum(nat_j), sits_in_cursum(sit_j));
return -EINVAL;
+ }
return 0;
}
@@ -3214,8 +3666,9 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
int written_size = 0;
int i, j;
- page = grab_meta_page(sbi, blkaddr++);
+ page = f2fs_grab_meta_page(sbi, blkaddr++);
kaddr = (unsigned char *)page_address(page);
+ memset(kaddr, 0, PAGE_SIZE);
/* Step 1: write nat cache */
seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -3238,8 +3691,9 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
for (j = 0; j < blkoff; j++) {
if (!page) {
- page = grab_meta_page(sbi, blkaddr++);
+ page = f2fs_grab_meta_page(sbi, blkaddr++);
kaddr = (unsigned char *)page_address(page);
+ memset(kaddr, 0, PAGE_SIZE);
written_size = 0;
}
summary = (struct f2fs_summary *)(kaddr + written_size);
@@ -3274,7 +3728,7 @@ static void write_normal_summaries(struct f2fs_sb_info *sbi,
write_current_sum_page(sbi, i, blkaddr + (i - type));
}
-void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
+void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
{
if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
write_compacted_summaries(sbi, start_blk);
@@ -3282,12 +3736,12 @@ void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
}
-void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
+void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
{
write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
}
-int lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
+int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
unsigned int val, int alloc)
{
int i;
@@ -3312,7 +3766,7 @@ int lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
unsigned int segno)
{
- return get_meta_page(sbi, current_sit_addr(sbi, segno));
+ return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
}
static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
@@ -3325,7 +3779,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
src_off = current_sit_addr(sbi, start);
dst_off = next_sit_addr(sbi, src_off);
- page = grab_meta_page(sbi, dst_off);
+ page = f2fs_grab_meta_page(sbi, dst_off);
seg_info_to_sit_page(sbi, page, start);
set_page_dirty(page);
@@ -3421,7 +3875,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
* CP calls this function, which flushes SIT entries including sit_journal,
* and moves prefree segs to free segs.
*/
-void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct sit_info *sit_i = SIT_I(sbi);
unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
@@ -3429,7 +3883,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct f2fs_journal *journal = curseg->journal;
struct sit_entry_set *ses, *tmp;
struct list_head *head = &SM_I(sbi)->sit_entry_set;
- bool to_journal = true;
+ bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
struct seg_entry *se;
down_write(&sit_i->sentry_lock);
@@ -3448,7 +3902,8 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
* entries, remove all entries from journal and add and account
* them in sit entry set.
*/
- if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL))
+ if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
+ !to_journal)
remove_sits_in_journal(sbi);
/*
@@ -3480,6 +3935,11 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
int offset, sit_offset;
se = get_seg_entry(sbi, segno);
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
+ SIT_VBLOCK_MAP_SIZE))
+ f2fs_bug_on(sbi, 1);
+#endif
/* add discard candidates */
if (!(cpc->reason & CP_DISCARD)) {
@@ -3488,17 +3948,21 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
}
if (to_journal) {
- offset = lookup_journal_in_cursum(journal,
+ offset = f2fs_lookup_journal_in_cursum(journal,
SIT_JOURNAL, segno, 1);
f2fs_bug_on(sbi, offset < 0);
segno_in_journal(journal, offset) =
cpu_to_le32(segno);
seg_info_to_raw_sit(se,
&sit_in_journal(journal, offset));
+ check_block_count(sbi, segno,
+ &sit_in_journal(journal, offset));
} else {
sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
seg_info_to_raw_sit(se,
&raw_sit->entries[sit_offset]);
+ check_block_count(sbi, segno,
+ &raw_sit->entries[sit_offset]);
}
__clear_bit(segno, bitmap);
@@ -3546,8 +4010,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
SM_I(sbi)->sit_info = sit_i;
- sit_i->sentries = f2fs_kvzalloc(sbi, MAIN_SEGS(sbi) *
- sizeof(struct seg_entry), GFP_KERNEL);
+ sit_i->sentries =
+ f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
+ MAIN_SEGS(sbi)),
+ GFP_KERNEL);
if (!sit_i->sentries)
return -ENOMEM;
@@ -3573,22 +4039,22 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
return -ENOMEM;
#endif
- if (f2fs_discard_en(sbi)) {
- sit_i->sentries[start].discard_map
- = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE,
- GFP_KERNEL);
- if (!sit_i->sentries[start].discard_map)
- return -ENOMEM;
- }
+ sit_i->sentries[start].discard_map
+ = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE,
+ GFP_KERNEL);
+ if (!sit_i->sentries[start].discard_map)
+ return -ENOMEM;
}
sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
if (!sit_i->tmp_map)
return -ENOMEM;
- if (sbi->segs_per_sec > 1) {
- sit_i->sec_entries = f2fs_kvzalloc(sbi, MAIN_SECS(sbi) *
- sizeof(struct sec_entry), GFP_KERNEL);
+ if (__is_large_section(sbi)) {
+ sit_i->sec_entries =
+ f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
+ MAIN_SECS(sbi)),
+ GFP_KERNEL);
if (!sit_i->sec_entries)
return -ENOMEM;
}
@@ -3664,7 +4130,8 @@ static int build_curseg(struct f2fs_sb_info *sbi)
struct curseg_info *array;
int i;
- array = f2fs_kzalloc(sbi, sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL);
+ array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
+ GFP_KERNEL);
if (!array)
return -ENOMEM;
@@ -3697,9 +4164,10 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
unsigned int i, start, end;
unsigned int readed, start_blk = 0;
int err = 0;
+ block_t total_node_blocks = 0;
do {
- readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
+ readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
META_SIT, true);
start = start_blk * sit_i->sents_per_block;
@@ -3711,6 +4179,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
se = &sit_i->sentries[start];
page = get_current_sit_page(sbi, start);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
sit_blk = (struct f2fs_sit_block *)page_address(page);
sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
f2fs_put_page(page, 1);
@@ -3719,23 +4189,23 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
if (err)
return err;
seg_info_from_raw_sit(se, &sit);
+ if (IS_NODESEG(se->type))
+ total_node_blocks += se->valid_blocks;
/* build discard map only one time */
- if (f2fs_discard_en(sbi)) {
- if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
- memset(se->discard_map, 0xff,
- SIT_VBLOCK_MAP_SIZE);
- } else {
- memcpy(se->discard_map,
- se->cur_valid_map,
- SIT_VBLOCK_MAP_SIZE);
- sbi->discard_blks +=
- sbi->blocks_per_seg -
- se->valid_blocks;
- }
+ if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+ memset(se->discard_map, 0xff,
+ SIT_VBLOCK_MAP_SIZE);
+ } else {
+ memcpy(se->discard_map,
+ se->cur_valid_map,
+ SIT_VBLOCK_MAP_SIZE);
+ sbi->discard_blks +=
+ sbi->blocks_per_seg -
+ se->valid_blocks;
}
- if (sbi->segs_per_sec > 1)
+ if (__is_large_section(sbi))
get_sec_entry(sbi, start)->valid_blocks +=
se->valid_blocks;
}
@@ -3747,33 +4217,53 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
unsigned int old_valid_blocks;
start = le32_to_cpu(segno_in_journal(journal, i));
+ if (start >= MAIN_SEGS(sbi)) {
+ f2fs_err(sbi, "Wrong journal entry on segno %u",
+ start);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ err = -EFSCORRUPTED;
+ break;
+ }
+
se = &sit_i->sentries[start];
sit = sit_in_journal(journal, i);
old_valid_blocks = se->valid_blocks;
+ if (IS_NODESEG(se->type))
+ total_node_blocks -= old_valid_blocks;
err = check_block_count(sbi, start, &sit);
if (err)
break;
seg_info_from_raw_sit(se, &sit);
+ if (IS_NODESEG(se->type))
+ total_node_blocks += se->valid_blocks;
- if (f2fs_discard_en(sbi)) {
- if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
- memset(se->discard_map, 0xff,
- SIT_VBLOCK_MAP_SIZE);
- } else {
- memcpy(se->discard_map, se->cur_valid_map,
- SIT_VBLOCK_MAP_SIZE);
- sbi->discard_blks += old_valid_blocks -
- se->valid_blocks;
- }
+ if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+ memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
+ } else {
+ memcpy(se->discard_map, se->cur_valid_map,
+ SIT_VBLOCK_MAP_SIZE);
+ sbi->discard_blks += old_valid_blocks;
+ sbi->discard_blks -= se->valid_blocks;
}
- if (sbi->segs_per_sec > 1)
+ if (__is_large_section(sbi)) {
get_sec_entry(sbi, start)->valid_blocks +=
- se->valid_blocks - old_valid_blocks;
+ se->valid_blocks;
+ get_sec_entry(sbi, start)->valid_blocks -=
+ old_valid_blocks;
+ }
}
up_read(&curseg->journal_rwsem);
+
+ if (!err && total_node_blocks != valid_node_count(sbi)) {
+ f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
+ total_node_blocks, valid_node_count(sbi));
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ err = -EFSCORRUPTED;
+ }
+
return err;
}
@@ -3885,13 +4375,13 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
if (!f2fs_test_bit(blkofs, se->cur_valid_map))
continue;
out:
- f2fs_msg(sbi->sb, KERN_ERR,
- "Current segment's next free block offset is "
- "inconsistent with bitmap, logtype:%u, "
- "segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
- i, curseg->segno, curseg->alloc_type,
- curseg->next_blkoff, blkofs);
- return -EINVAL;
+ f2fs_err(sbi,
+ "Current segment's next free block offset is "
+ "inconsistent with bitmap, logtype:%u, "
+ "segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
+ i, curseg->segno, curseg->alloc_type,
+ curseg->next_blkoff, blkofs);
+ return -EFSCORRUPTED;
}
}
return 0;
@@ -3907,7 +4397,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
down_write(&sit_i->sentry_lock);
- sit_i->min_mtime = LLONG_MAX;
+ sit_i->min_mtime = ULLONG_MAX;
for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
unsigned int i;
@@ -3921,11 +4411,11 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
if (sit_i->min_mtime > mtime)
sit_i->min_mtime = mtime;
}
- sit_i->max_mtime = get_mtime(sbi);
+ sit_i->max_mtime = get_mtime(sbi, false);
up_write(&sit_i->sentry_lock);
}
-int build_segment_manager(struct f2fs_sb_info *sbi)
+int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
@@ -3954,6 +4444,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
+ sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
sm_info->min_ssr_sections = reserved_sections(sbi);
@@ -3962,7 +4453,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
init_rwsem(&sm_info->curseg_lock);
if (!f2fs_readonly(sbi->sb)) {
- err = create_flush_cmd_control(sbi);
+ err = f2fs_create_flush_cmd_control(sbi);
if (err)
return err;
}
@@ -4030,7 +4521,7 @@ static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
destroy_victim_secmap(sbi);
SM_I(sbi)->dirty_info = NULL;
- kfree(dirty_i);
+ kvfree(dirty_i);
}
static void destroy_curseg(struct f2fs_sb_info *sbi)
@@ -4042,10 +4533,10 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
return;
SM_I(sbi)->curseg_array = NULL;
for (i = 0; i < NR_CURSEG_TYPE; i++) {
- kfree(array[i].sum_blk);
- kfree(array[i].journal);
+ kvfree(array[i].sum_blk);
+ kvfree(array[i].journal);
}
- kfree(array);
+ kvfree(array);
}
static void destroy_free_segmap(struct f2fs_sb_info *sbi)
@@ -4056,7 +4547,7 @@ static void destroy_free_segmap(struct f2fs_sb_info *sbi)
SM_I(sbi)->free_info = NULL;
kvfree(free_i->free_segmap);
kvfree(free_i->free_secmap);
- kfree(free_i);
+ kvfree(free_i);
}
static void destroy_sit_info(struct f2fs_sb_info *sbi)
@@ -4069,45 +4560,45 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
if (sit_i->sentries) {
for (start = 0; start < MAIN_SEGS(sbi); start++) {
- kfree(sit_i->sentries[start].cur_valid_map);
+ kvfree(sit_i->sentries[start].cur_valid_map);
#ifdef CONFIG_F2FS_CHECK_FS
- kfree(sit_i->sentries[start].cur_valid_map_mir);
+ kvfree(sit_i->sentries[start].cur_valid_map_mir);
#endif
- kfree(sit_i->sentries[start].ckpt_valid_map);
- kfree(sit_i->sentries[start].discard_map);
+ kvfree(sit_i->sentries[start].ckpt_valid_map);
+ kvfree(sit_i->sentries[start].discard_map);
}
}
- kfree(sit_i->tmp_map);
+ kvfree(sit_i->tmp_map);
kvfree(sit_i->sentries);
kvfree(sit_i->sec_entries);
kvfree(sit_i->dirty_sentries_bitmap);
SM_I(sbi)->sit_info = NULL;
- kfree(sit_i->sit_bitmap);
+ kvfree(sit_i->sit_bitmap);
#ifdef CONFIG_F2FS_CHECK_FS
- kfree(sit_i->sit_bitmap_mir);
+ kvfree(sit_i->sit_bitmap_mir);
#endif
- kfree(sit_i);
+ kvfree(sit_i);
}
-void destroy_segment_manager(struct f2fs_sb_info *sbi)
+void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_sm_info *sm_info = SM_I(sbi);
if (!sm_info)
return;
- destroy_flush_cmd_control(sbi, true);
+ f2fs_destroy_flush_cmd_control(sbi, true);
destroy_discard_cmd_control(sbi);
destroy_dirty_segmap(sbi);
destroy_curseg(sbi);
destroy_free_segmap(sbi);
destroy_sit_info(sbi);
sbi->sm_info = NULL;
- kfree(sm_info);
+ kvfree(sm_info);
}
-int __init create_segment_manager_caches(void)
+int __init f2fs_create_segment_manager_caches(void)
{
discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
sizeof(struct discard_entry));
@@ -4140,7 +4631,7 @@ fail:
return -ENOMEM;
}
-void destroy_segment_manager_caches(void)
+void f2fs_destroy_segment_manager_caches(void)
{
kmem_cache_destroy(sit_entry_set_slab);
kmem_cache_destroy(discard_cmd_slab);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index d28073cff8cf..b74602813a05 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/segment.h
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
@@ -85,7 +82,7 @@
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1))
#define GET_SEGNO(sbi, blk_addr) \
- ((((blk_addr) == NULL_ADDR) || ((blk_addr) == NEW_ADDR)) ? \
+ ((!__is_valid_data_blkaddr(blk_addr)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
#define BLKS_PER_SEC(sbi) \
@@ -112,7 +109,7 @@
#define START_SEGNO(segno) \
(SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK)
#define SIT_BLK_CNT(sbi) \
- ((MAIN_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK)
+ DIV_ROUND_UP(MAIN_SEGS(sbi), SIT_ENTRY_PER_BLOCK)
#define f2fs_bitmap_size(nr) \
(BITS_TO_LONGS(nr) * sizeof(unsigned long))
@@ -215,6 +212,8 @@ struct segment_allocation {
#define IS_DUMMY_WRITTEN_PAGE(page) \
(page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)
+#define MAX_SKIP_GC_COUNT 16
+
struct inmem_pages {
struct list_head list;
struct page *page;
@@ -334,12 +333,18 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
* In order to get # of valid blocks in a section instantly from many
* segments, f2fs manages two counting structures separately.
*/
- if (use_section && sbi->segs_per_sec > 1)
+ if (use_section && __is_large_section(sbi))
return get_sec_entry(sbi, segno)->valid_blocks;
else
return get_seg_entry(sbi, segno)->valid_blocks;
}
+static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+}
+
static inline void seg_info_from_raw_sit(struct seg_entry *se,
struct f2fs_sit_entry *rs)
{
@@ -375,6 +380,7 @@ static inline void seg_info_to_sit_page(struct f2fs_sb_info *sbi,
int i;
raw_sit = (struct f2fs_sit_block *)page_address(page);
+ memset(raw_sit, 0, PAGE_SIZE);
for (i = 0; i < end - start; i++) {
rs = &raw_sit->entries[i];
se = get_seg_entry(sbi, start + i);
@@ -576,6 +582,15 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
reserved_sections(sbi) + needed);
}
+static inline int f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi)
+{
+ if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+ return 0;
+ if (likely(!has_not_enough_free_secs(sbi, 0, 0)))
+ return 0;
+ return -ENOSPC;
+}
+
static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
{
return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments;
@@ -641,17 +656,15 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1);
}
-static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr)
+static inline void verify_fio_blkaddr(struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = fio->sbi;
- if (PAGE_TYPE_OF_BIO(fio->type) == META &&
- (!is_read_io(fio->op) || fio->is_meta))
- BUG_ON(blk_addr < SEG0_BLKADDR(sbi) ||
- blk_addr >= MAIN_BLKADDR(sbi));
- else
- BUG_ON(blk_addr < MAIN_BLKADDR(sbi) ||
- blk_addr >= MAX_BLKADDR(sbi));
+ if (__is_valid_data_blkaddr(fio->old_blkaddr))
+ verify_blkaddr(sbi, fio->old_blkaddr, __is_meta_io(fio) ?
+ META_GENERIC : DATA_GENERIC);
+ verify_blkaddr(sbi, fio->new_blkaddr, __is_meta_io(fio) ?
+ META_GENERIC : DATA_GENERIC_ENHANCE);
}
/*
@@ -680,21 +693,19 @@ static inline int check_block_count(struct f2fs_sb_info *sbi,
} while (cur_pos < sbi->blocks_per_seg);
if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Mismatch valid blocks %d vs. %d",
- GET_SIT_VBLOCKS(raw_sit), valid_blocks);
+ f2fs_err(sbi, "Mismatch valid blocks %d vs. %d",
+ GET_SIT_VBLOCKS(raw_sit), valid_blocks);
set_sbi_flag(sbi, SBI_NEED_FSCK);
- return -EINVAL;
+ return -EFSCORRUPTED;
}
/* check segment usage, and check boundary of a given segment number */
if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
|| segno > TOTAL_SEGS(sbi) - 1)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong valid blocks %d or segno %u",
- GET_SIT_VBLOCKS(raw_sit), segno);
+ f2fs_err(sbi, "Wrong valid blocks %d or segno %u",
+ GET_SIT_VBLOCKS(raw_sit), segno);
set_sbi_flag(sbi, SBI_NEED_FSCK);
- return -EINVAL;
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -744,11 +755,23 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start)
#endif
}
-static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi)
+static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi,
+ bool base_time)
{
struct sit_info *sit_i = SIT_I(sbi);
- return sit_i->elapsed_time + CURRENT_TIME_SEC.tv_sec -
- sit_i->mounted_time;
+ time64_t diff, now = ktime_get_real_seconds();
+
+ if (now >= sit_i->mounted_time)
+ return sit_i->elapsed_time + now - sit_i->mounted_time;
+
+ /* system time is set to the past */
+ if (!base_time) {
+ diff = sit_i->mounted_time - now;
+ if (sit_i->elapsed_time >= diff)
+ return sit_i->elapsed_time - diff;
+ return 0;
+ }
+ return sit_i->elapsed_time;
}
static inline void set_summary(struct f2fs_summary *sum, nid_t nid,
@@ -772,15 +795,6 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
- (base + 1) + type;
}
-static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi,
- unsigned int secno)
-{
- if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) >
- sbi->fggc_threshold)
- return true;
- return false;
-}
-
static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
{
if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno))
@@ -849,7 +863,7 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force)
}
}
mutex_unlock(&dcc->cmd_lock);
- if (!wakeup)
+ if (!wakeup || !is_idle(sbi, DISCARD_TIME))
return;
wake_up:
dcc->discard_wake = 1;
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 0b5664a1a6cc..a467aca29cfe 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* f2fs shrinker support
* the basic infra was copied from fs/ubifs/shrinker.c
*
* Copyright (c) 2015 Motorola Mobility
* Copyright (c) 2015 Jaegeuk Kim <jaegeuk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
@@ -109,11 +106,11 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
/* shrink clean nat cache entries */
if (freed < nr)
- freed += try_to_free_nats(sbi, nr - freed);
+ freed += f2fs_try_to_free_nats(sbi, nr - freed);
/* shrink free nids cache entries */
if (freed < nr)
- freed += try_to_free_nids(sbi, nr - freed);
+ freed += f2fs_try_to_free_nids(sbi, nr - freed);
spin_lock(&f2fs_list_lock);
p = p->next;
@@ -138,6 +135,6 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi)
f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi));
spin_lock(&f2fs_list_lock);
- list_del(&sbi->s_list);
+ list_del_init(&sbi->s_list);
spin_unlock(&f2fs_list_lock);
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ebe4668c4997..281ec7da8128 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/super.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/init.h>
@@ -41,7 +38,7 @@ static struct kmem_cache *f2fs_inode_cachep;
#ifdef CONFIG_F2FS_FAULT_INJECTION
-char *fault_name[FAULT_MAX] = {
+const char *f2fs_fault_name[FAULT_MAX] = {
[FAULT_KMALLOC] = "kmalloc",
[FAULT_KVMALLOC] = "kvmalloc",
[FAULT_PAGE_ALLOC] = "page alloc",
@@ -53,22 +50,27 @@ char *fault_name[FAULT_MAX] = {
[FAULT_DIR_DEPTH] = "too big dir depth",
[FAULT_EVICT_INODE] = "evict_inode fail",
[FAULT_TRUNCATE] = "truncate fail",
- [FAULT_IO] = "IO error",
+ [FAULT_READ_IO] = "read IO error",
[FAULT_CHECKPOINT] = "checkpoint error",
+ [FAULT_DISCARD] = "discard error",
+ [FAULT_WRITE_IO] = "write IO error",
};
-static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi,
- unsigned int rate)
+void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
+ unsigned int type)
{
struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info;
if (rate) {
atomic_set(&ffi->inject_ops, 0);
ffi->inject_rate = rate;
- ffi->inject_type = (1 << FAULT_MAX) - 1;
- } else {
- memset(ffi, 0, sizeof(struct f2fs_fault_info));
}
+
+ if (type)
+ ffi->inject_type = type;
+
+ if (!rate && !type)
+ memset(ffi, 0, sizeof(struct f2fs_fault_info));
}
#endif
@@ -113,6 +115,7 @@ enum {
Opt_mode,
Opt_io_size_bits,
Opt_fault_injection,
+ Opt_fault_type,
Opt_lazytime,
Opt_nolazytime,
Opt_quota,
@@ -133,6 +136,10 @@ enum {
Opt_alloc,
Opt_fsync,
Opt_test_dummy_encryption,
+ Opt_checkpoint_disable,
+ Opt_checkpoint_disable_cap,
+ Opt_checkpoint_disable_cap_perc,
+ Opt_checkpoint_enable,
Opt_err,
};
@@ -170,6 +177,7 @@ static match_table_t f2fs_tokens = {
{Opt_mode, "mode=%s"},
{Opt_io_size_bits, "io_bits=%u"},
{Opt_fault_injection, "fault_injection=%u"},
+ {Opt_fault_type, "fault_type=%u"},
{Opt_lazytime, "lazytime"},
{Opt_nolazytime, "nolazytime"},
{Opt_quota, "quota"},
@@ -190,44 +198,52 @@ static match_table_t f2fs_tokens = {
{Opt_alloc, "alloc_mode=%s"},
{Opt_fsync, "fsync_mode=%s"},
{Opt_test_dummy_encryption, "test_dummy_encryption"},
+ {Opt_checkpoint_disable, "checkpoint=disable"},
+ {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"},
+ {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"},
+ {Opt_checkpoint_enable, "checkpoint=enable"},
{Opt_err, NULL},
};
-void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
+void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
+ int level;
va_start(args, fmt);
- vaf.fmt = fmt;
+
+ level = printk_get_level(fmt);
+ vaf.fmt = printk_skip_level(fmt);
vaf.va = &args;
- printk_ratelimited("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf);
+ printk("%c%cF2FS-fs (%s): %pV\n",
+ KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
+
va_end(args);
}
static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
{
- block_t limit = (sbi->user_block_count << 1) / 1000;
+ block_t limit = min((sbi->user_block_count << 1) / 1000,
+ sbi->user_block_count - sbi->reserved_blocks);
/* limit is 0.2% */
if (test_opt(sbi, RESERVE_ROOT) &&
F2FS_OPTION(sbi).root_reserved_blocks > limit) {
F2FS_OPTION(sbi).root_reserved_blocks = limit;
- f2fs_msg(sbi->sb, KERN_INFO,
- "Reduce reserved blocks for root = %u",
- F2FS_OPTION(sbi).root_reserved_blocks);
+ f2fs_info(sbi, "Reduce reserved blocks for root = %u",
+ F2FS_OPTION(sbi).root_reserved_blocks);
}
if (!test_opt(sbi, RESERVE_ROOT) &&
(!uid_eq(F2FS_OPTION(sbi).s_resuid,
make_kuid(&init_user_ns, F2FS_DEF_RESUID)) ||
!gid_eq(F2FS_OPTION(sbi).s_resgid,
make_kgid(&init_user_ns, F2FS_DEF_RESGID))))
- f2fs_msg(sbi->sb, KERN_INFO,
- "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root",
- from_kuid_munged(&init_user_ns,
- F2FS_OPTION(sbi).s_resuid),
- from_kgid_munged(&init_user_ns,
- F2FS_OPTION(sbi).s_resgid));
+ f2fs_info(sbi, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root",
+ from_kuid_munged(&init_user_ns,
+ F2FS_OPTION(sbi).s_resuid),
+ from_kgid_munged(&init_user_ns,
+ F2FS_OPTION(sbi).s_resgid));
}
static void init_once(void *foo)
@@ -248,42 +264,36 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype,
int ret = -EINVAL;
if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) {
- f2fs_msg(sb, KERN_ERR,
- "Cannot change journaled "
- "quota options when quota turned on");
+ f2fs_err(sbi, "Cannot change journaled quota options when quota turned on");
return -EINVAL;
}
- if (f2fs_sb_has_quota_ino(sb)) {
- f2fs_msg(sb, KERN_INFO,
- "QUOTA feature is enabled, so ignore qf_name");
+ if (f2fs_sb_has_quota_ino(sbi)) {
+ f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name");
return 0;
}
qname = match_strdup(args);
if (!qname) {
- f2fs_msg(sb, KERN_ERR,
- "Not enough memory for storing quotafile name");
- return -EINVAL;
+ f2fs_err(sbi, "Not enough memory for storing quotafile name");
+ return -ENOMEM;
}
if (F2FS_OPTION(sbi).s_qf_names[qtype]) {
if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0)
ret = 0;
else
- f2fs_msg(sb, KERN_ERR,
- "%s quota file already specified",
+ f2fs_err(sbi, "%s quota file already specified",
QTYPE2NAME(qtype));
goto errout;
}
if (strchr(qname, '/')) {
- f2fs_msg(sb, KERN_ERR,
- "quotafile must be on filesystem root");
+ f2fs_err(sbi, "quotafile must be on filesystem root");
goto errout;
}
F2FS_OPTION(sbi).s_qf_names[qtype] = qname;
set_opt(sbi, QUOTA);
return 0;
errout:
- kfree(qname);
+ kvfree(qname);
return ret;
}
@@ -292,11 +302,10 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype)
struct f2fs_sb_info *sbi = F2FS_SB(sb);
if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) {
- f2fs_msg(sb, KERN_ERR, "Cannot change journaled quota options"
- " when quota turned on");
+ f2fs_err(sbi, "Cannot change journaled quota options when quota turned on");
return -EINVAL;
}
- kfree(F2FS_OPTION(sbi).s_qf_names[qtype]);
+ kvfree(F2FS_OPTION(sbi).s_qf_names[qtype]);
F2FS_OPTION(sbi).s_qf_names[qtype] = NULL;
return 0;
}
@@ -308,9 +317,8 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
* 'grpquota' mount options are allowed even without quota feature
* to support legacy quotas in quota files.
*/
- if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi->sb)) {
- f2fs_msg(sbi->sb, KERN_ERR, "Project quota feature not enabled. "
- "Cannot enable project quota enforcement.");
+ if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) {
+ f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement.");
return -1;
}
if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
@@ -330,29 +338,20 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) ||
test_opt(sbi, PRJQUOTA)) {
- f2fs_msg(sbi->sb, KERN_ERR, "old and new quota "
- "format mixing");
+ f2fs_err(sbi, "old and new quota format mixing");
return -1;
}
if (!F2FS_OPTION(sbi).s_jquota_fmt) {
- f2fs_msg(sbi->sb, KERN_ERR, "journaled quota format "
- "not specified");
+ f2fs_err(sbi, "journaled quota format not specified");
return -1;
}
}
- if (f2fs_sb_has_quota_ino(sbi->sb) && F2FS_OPTION(sbi).s_jquota_fmt) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "QUOTA feature is enabled, so ignore jquota_fmt");
+ if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) {
+ f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt");
F2FS_OPTION(sbi).s_jquota_fmt = 0;
}
- if (f2fs_sb_has_quota_ino(sbi->sb) && f2fs_readonly(sbi->sb)) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "Filesystem with quota feature cannot be mounted RDWR "
- "without CONFIG_QUOTA");
- return -1;
- }
return 0;
}
#endif
@@ -360,7 +359,6 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
static int parse_options(struct super_block *sb, char *options)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
- struct request_queue *q;
substring_t args[MAX_OPT_ARGS];
char *p, *name;
int arg = 0;
@@ -400,10 +398,10 @@ static int parse_options(struct super_block *sb, char *options)
set_opt(sbi, BG_GC);
set_opt(sbi, FORCE_FG_GC);
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
case Opt_disable_roll_forward:
set_opt(sbi, DISABLE_ROLL_FORWARD);
@@ -415,19 +413,11 @@ static int parse_options(struct super_block *sb, char *options)
return -EINVAL;
break;
case Opt_discard:
- q = bdev_get_queue(sb->s_bdev);
- if (blk_queue_discard(q)) {
- set_opt(sbi, DISCARD);
- } else if (!f2fs_sb_has_blkzoned(sb)) {
- f2fs_msg(sb, KERN_WARNING,
- "mounting with \"discard\" option, but "
- "the device does not support discard");
- }
+ set_opt(sbi, DISCARD);
break;
case Opt_nodiscard:
- if (f2fs_sb_has_blkzoned(sb)) {
- f2fs_msg(sb, KERN_WARNING,
- "discard is required for zoned block devices");
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ f2fs_warn(sbi, "discard is required for zoned block devices");
return -EINVAL;
}
clear_opt(sbi, DISCARD);
@@ -459,20 +449,16 @@ static int parse_options(struct super_block *sb, char *options)
break;
#else
case Opt_user_xattr:
- f2fs_msg(sb, KERN_INFO,
- "user_xattr options not supported");
+ f2fs_info(sbi, "user_xattr options not supported");
break;
case Opt_nouser_xattr:
- f2fs_msg(sb, KERN_INFO,
- "nouser_xattr options not supported");
+ f2fs_info(sbi, "nouser_xattr options not supported");
break;
case Opt_inline_xattr:
- f2fs_msg(sb, KERN_INFO,
- "inline_xattr options not supported");
+ f2fs_info(sbi, "inline_xattr options not supported");
break;
case Opt_noinline_xattr:
- f2fs_msg(sb, KERN_INFO,
- "noinline_xattr options not supported");
+ f2fs_info(sbi, "noinline_xattr options not supported");
break;
#endif
#ifdef CONFIG_F2FS_FS_POSIX_ACL
@@ -484,10 +470,10 @@ static int parse_options(struct super_block *sb, char *options)
break;
#else
case Opt_acl:
- f2fs_msg(sb, KERN_INFO, "acl options not supported");
+ f2fs_info(sbi, "acl options not supported");
break;
case Opt_noacl:
- f2fs_msg(sb, KERN_INFO, "noacl options not supported");
+ f2fs_info(sbi, "noacl options not supported");
break;
#endif
case Opt_active_logs:
@@ -537,9 +523,8 @@ static int parse_options(struct super_block *sb, char *options)
if (args->from && match_int(args, &arg))
return -EINVAL;
if (test_opt(sbi, RESERVE_ROOT)) {
- f2fs_msg(sb, KERN_INFO,
- "Preserve previous reserve_root=%u",
- F2FS_OPTION(sbi).root_reserved_blocks);
+ f2fs_info(sbi, "Preserve previous reserve_root=%u",
+ F2FS_OPTION(sbi).root_reserved_blocks);
} else {
F2FS_OPTION(sbi).root_reserved_blocks = arg;
set_opt(sbi, RESERVE_ROOT);
@@ -550,8 +535,7 @@ static int parse_options(struct super_block *sb, char *options)
return -EINVAL;
uid = make_kuid(current_user_ns(), arg);
if (!uid_valid(uid)) {
- f2fs_msg(sb, KERN_ERR,
- "Invalid uid value %d", arg);
+ f2fs_err(sbi, "Invalid uid value %d", arg);
return -EINVAL;
}
F2FS_OPTION(sbi).s_resuid = uid;
@@ -561,8 +545,7 @@ static int parse_options(struct super_block *sb, char *options)
return -EINVAL;
gid = make_kgid(current_user_ns(), arg);
if (!gid_valid(gid)) {
- f2fs_msg(sb, KERN_ERR,
- "Invalid gid value %d", arg);
+ f2fs_err(sbi, "Invalid gid value %d", arg);
return -EINVAL;
}
F2FS_OPTION(sbi).s_resgid = gid;
@@ -574,11 +557,9 @@ static int parse_options(struct super_block *sb, char *options)
return -ENOMEM;
if (strlen(name) == 8 &&
!strncmp(name, "adaptive", 8)) {
- if (f2fs_sb_has_blkzoned(sb)) {
- f2fs_msg(sb, KERN_WARNING,
- "adaptive mode is not allowed with "
- "zoned block device feature");
- kfree(name);
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ f2fs_warn(sbi, "adaptive mode is not allowed with zoned block device feature");
+ kvfree(name);
return -EINVAL;
}
set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
@@ -586,33 +567,44 @@ static int parse_options(struct super_block *sb, char *options)
!strncmp(name, "lfs", 3)) {
set_opt_mode(sbi, F2FS_MOUNT_LFS);
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
case Opt_io_size_bits:
if (args->from && match_int(args, &arg))
return -EINVAL;
- if (arg > __ilog2_u32(BIO_MAX_PAGES)) {
- f2fs_msg(sb, KERN_WARNING,
- "Not support %d, larger than %d",
- 1 << arg, BIO_MAX_PAGES);
+ if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_PAGES)) {
+ f2fs_warn(sbi, "Not support %d, larger than %d",
+ 1 << arg, BIO_MAX_PAGES);
return -EINVAL;
}
F2FS_OPTION(sbi).write_io_size_bits = arg;
break;
+#ifdef CONFIG_F2FS_FAULT_INJECTION
case Opt_fault_injection:
if (args->from && match_int(args, &arg))
return -EINVAL;
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- f2fs_build_fault_attr(sbi, arg);
+ f2fs_build_fault_attr(sbi, arg, F2FS_ALL_FAULT_TYPE);
set_opt(sbi, FAULT_INJECTION);
+ break;
+
+ case Opt_fault_type:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+ f2fs_build_fault_attr(sbi, 0, arg);
+ set_opt(sbi, FAULT_INJECTION);
+ break;
#else
- f2fs_msg(sb, KERN_INFO,
- "FAULT_INJECTION was not selected");
-#endif
+ case Opt_fault_injection:
+ f2fs_info(sbi, "fault_injection options not supported");
break;
+
+ case Opt_fault_type:
+ f2fs_info(sbi, "fault_type options not supported");
+ break;
+#endif
case Opt_lazytime:
sb->s_flags |= MS_LAZYTIME;
break;
@@ -690,8 +682,7 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_jqfmt_vfsv0:
case Opt_jqfmt_vfsv1:
case Opt_noquota:
- f2fs_msg(sb, KERN_INFO,
- "quota operations not supported");
+ f2fs_info(sbi, "quota operations not supported");
break;
#endif
case Opt_whint:
@@ -708,10 +699,10 @@ static int parse_options(struct super_block *sb, char *options)
!strncmp(name, "fs-based", 8)) {
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS;
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
case Opt_alloc:
name = match_strdup(&args[0]);
@@ -725,10 +716,10 @@ static int parse_options(struct super_block *sb, char *options)
!strncmp(name, "reuse", 5)) {
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
case Opt_fsync:
name = match_strdup(&args[0]);
@@ -745,71 +736,104 @@ static int parse_options(struct super_block *sb, char *options)
F2FS_OPTION(sbi).fsync_mode =
FSYNC_MODE_NOBARRIER;
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
case Opt_test_dummy_encryption:
#ifdef CONFIG_F2FS_FS_ENCRYPTION
- if (!f2fs_sb_has_encrypt(sb)) {
- f2fs_msg(sb, KERN_ERR, "Encrypt feature is off");
+ if (!f2fs_sb_has_encrypt(sbi)) {
+ f2fs_err(sbi, "Encrypt feature is off");
return -EINVAL;
}
F2FS_OPTION(sbi).test_dummy_encryption = true;
- f2fs_msg(sb, KERN_INFO,
- "Test dummy encryption mode enabled");
+ f2fs_info(sbi, "Test dummy encryption mode enabled");
#else
- f2fs_msg(sb, KERN_INFO,
- "Test dummy encryption mount option ignored");
+ f2fs_info(sbi, "Test dummy encryption mount option ignored");
#endif
break;
+ case Opt_checkpoint_disable_cap_perc:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+ if (arg < 0 || arg > 100)
+ return -EINVAL;
+ if (arg == 100)
+ F2FS_OPTION(sbi).unusable_cap =
+ sbi->user_block_count;
+ else
+ F2FS_OPTION(sbi).unusable_cap =
+ (sbi->user_block_count / 100) * arg;
+ set_opt(sbi, DISABLE_CHECKPOINT);
+ break;
+ case Opt_checkpoint_disable_cap:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+ F2FS_OPTION(sbi).unusable_cap = arg;
+ set_opt(sbi, DISABLE_CHECKPOINT);
+ break;
+ case Opt_checkpoint_disable:
+ set_opt(sbi, DISABLE_CHECKPOINT);
+ break;
+ case Opt_checkpoint_enable:
+ clear_opt(sbi, DISABLE_CHECKPOINT);
+ break;
default:
- f2fs_msg(sb, KERN_ERR,
- "Unrecognized mount option \"%s\" or missing value",
- p);
+ f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
+ p);
return -EINVAL;
}
}
#ifdef CONFIG_QUOTA
if (f2fs_check_quota_options(sbi))
return -EINVAL;
+#else
+ if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) {
+ f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA");
+ return -EINVAL;
+ }
+ if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) {
+ f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA");
+ return -EINVAL;
+ }
#endif
if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
- f2fs_msg(sb, KERN_ERR,
- "Should set mode=lfs with %uKB-sized IO",
- F2FS_IO_SIZE_KB(sbi));
+ f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO",
+ F2FS_IO_SIZE_KB(sbi));
return -EINVAL;
}
if (test_opt(sbi, INLINE_XATTR_SIZE)) {
- if (!f2fs_sb_has_extra_attr(sb) ||
- !f2fs_sb_has_flexible_inline_xattr(sb)) {
- f2fs_msg(sb, KERN_ERR,
- "extra_attr or flexible_inline_xattr "
- "feature is off");
+ int min_size, max_size;
+
+ if (!f2fs_sb_has_extra_attr(sbi) ||
+ !f2fs_sb_has_flexible_inline_xattr(sbi)) {
+ f2fs_err(sbi, "extra_attr or flexible_inline_xattr feature is off");
return -EINVAL;
}
if (!test_opt(sbi, INLINE_XATTR)) {
- f2fs_msg(sb, KERN_ERR,
- "inline_xattr_size option should be "
- "set with inline_xattr option");
+ f2fs_err(sbi, "inline_xattr_size option should be set with inline_xattr option");
return -EINVAL;
}
- if (!F2FS_OPTION(sbi).inline_xattr_size ||
- F2FS_OPTION(sbi).inline_xattr_size >=
- DEF_ADDRS_PER_INODE -
- F2FS_TOTAL_EXTRA_ATTR_SIZE -
- DEF_INLINE_RESERVED_SIZE -
- DEF_MIN_INLINE_SIZE) {
- f2fs_msg(sb, KERN_ERR,
- "inline xattr size is out of range");
+
+ min_size = sizeof(struct f2fs_xattr_header) / sizeof(__le32);
+ max_size = MAX_INLINE_XATTR_SIZE;
+
+ if (F2FS_OPTION(sbi).inline_xattr_size < min_size ||
+ F2FS_OPTION(sbi).inline_xattr_size > max_size) {
+ f2fs_err(sbi, "inline xattr size is out of range: %d ~ %d",
+ min_size, max_size);
return -EINVAL;
}
}
+ if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) {
+ f2fs_err(sbi, "LFS not compatible with checkpoint=disable\n");
+ return -EINVAL;
+ }
+
/* Not pass down write hints if the number of active logs is lesser
* than NR_CURSEG_TYPE.
*/
@@ -830,15 +854,14 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
/* Initialize f2fs-specific inode info */
atomic_set(&fi->dirty_pages, 0);
- fi->i_current_depth = 1;
init_rwsem(&fi->i_sem);
INIT_LIST_HEAD(&fi->dirty_list);
INIT_LIST_HEAD(&fi->gdirty_list);
INIT_LIST_HEAD(&fi->inmem_ilist);
INIT_LIST_HEAD(&fi->inmem_pages);
mutex_init(&fi->inmem_lock);
- init_rwsem(&fi->dio_rwsem[READ]);
- init_rwsem(&fi->dio_rwsem[WRITE]);
+ init_rwsem(&fi->i_gc_rwsem[READ]);
+ init_rwsem(&fi->i_gc_rwsem[WRITE]);
init_rwsem(&fi->i_mmap_sem);
init_rwsem(&fi->i_xattr_sem);
@@ -866,7 +889,7 @@ static int f2fs_drop_inode(struct inode *inode)
/* some remained atomic pages should discarded */
if (f2fs_is_atomic_file(inode))
- drop_inmem_pages(inode);
+ f2fs_drop_inmem_pages(inode);
/* should remain fi->extent_tree for writepage */
f2fs_destroy_extent_node(inode);
@@ -874,6 +897,10 @@ static int f2fs_drop_inode(struct inode *inode)
sb_start_intwrite(inode->i_sb);
f2fs_i_size_write(inode, 0);
+ f2fs_submit_merged_write_cond(F2FS_I_SB(inode),
+ inode, NULL, 0, DATA);
+ truncate_inode_pages_final(inode->i_mapping);
+
if (F2FS_HAS_BLOCKS(inode))
f2fs_truncate(inode);
@@ -976,10 +1003,10 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
for (i = 0; i < sbi->s_ndevs; i++) {
blkdev_put(FDEV(i).bdev, FMODE_EXCL);
#ifdef CONFIG_BLK_DEV_ZONED
- kfree(FDEV(i).blkz_type);
+ kvfree(FDEV(i).blkz_seq);
#endif
}
- kfree(sbi->devs);
+ kvfree(sbi->devs);
}
static void f2fs_put_super(struct super_block *sb)
@@ -998,32 +1025,30 @@ static void f2fs_put_super(struct super_block *sb)
* But, the previous checkpoint was not done by umount, it needs to do
* clean checkpoint again.
*/
- if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
- !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+ if ((is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
+ !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG))) {
struct cp_control cpc = {
.reason = CP_UMOUNT,
};
- write_checkpoint(sbi, &cpc);
+ f2fs_write_checkpoint(sbi, &cpc);
}
/* be sure to wait for any on-going discard commands */
- dropped = f2fs_wait_discard_bios(sbi);
+ dropped = f2fs_issue_discard_timeout(sbi);
- if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped) {
+ if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) &&
+ !sbi->discard_blks && !dropped) {
struct cp_control cpc = {
.reason = CP_UMOUNT | CP_TRIMMED,
};
- write_checkpoint(sbi, &cpc);
+ f2fs_write_checkpoint(sbi, &cpc);
}
- /* write_checkpoint can update stat informaion */
- f2fs_destroy_stats(sbi);
-
/*
* normally superblock is clean, so we need to release this.
* In addition, EIO will skip do checkpoint, we need this as well.
*/
- release_ino_entry(sbi, true);
+ f2fs_release_ino_entry(sbi, true);
f2fs_leave_shrinker(sbi);
mutex_unlock(&sbi->umount_mutex);
@@ -1031,33 +1056,46 @@ static void f2fs_put_super(struct super_block *sb)
/* our cp_error case, we can wait for any writeback page */
f2fs_flush_merged_writes(sbi);
+ f2fs_wait_on_all_pages_writeback(sbi);
+
+ f2fs_bug_on(sbi, sbi->fsync_node_num);
+
iput(sbi->node_inode);
+ sbi->node_inode = NULL;
+
iput(sbi->meta_inode);
+ sbi->meta_inode = NULL;
+
+ /*
+ * iput() can update stat information, if f2fs_write_checkpoint()
+ * above failed with error.
+ */
+ f2fs_destroy_stats(sbi);
/* destroy f2fs internal modules */
- destroy_node_manager(sbi);
- destroy_segment_manager(sbi);
+ f2fs_destroy_node_manager(sbi);
+ f2fs_destroy_segment_manager(sbi);
- kfree(sbi->ckpt);
+ kvfree(sbi->ckpt);
f2fs_unregister_sysfs(sbi);
sb->s_fs_info = NULL;
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
- kfree(sbi->raw_super);
+ kvfree(sbi->raw_super);
destroy_device_list(sbi);
if (sbi->write_io_dummy)
mempool_destroy(sbi->write_io_dummy);
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
- kfree(F2FS_OPTION(sbi).s_qf_names[i]);
+ kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
#endif
destroy_percpu_info(sbi);
for (i = 0; i < NR_PAGE_TYPE; i++)
- kfree(sbi->write_io[i]);
- kfree(sbi);
+ kvfree(sbi->write_io[i]);
+ kvfree(sbi);
}
int f2fs_sync_fs(struct super_block *sb, int sync)
@@ -1067,6 +1105,8 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
if (unlikely(f2fs_cp_error(sbi)))
return 0;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+ return 0;
trace_f2fs_sync_fs(sb, sync);
@@ -1079,7 +1119,7 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
cpc.reason = __get_cp_reason(sbi);
mutex_lock(&sbi->gc_mutex);
- err = write_checkpoint(sbi, &cpc);
+ err = f2fs_write_checkpoint(sbi, &cpc);
mutex_unlock(&sbi->gc_mutex);
}
f2fs_trace_ios(NULL, 1);
@@ -1166,6 +1206,14 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_blocks = total_count - start_count;
buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
sbi->current_reserved_blocks;
+
+ spin_lock(&sbi->stat_lock);
+ if (unlikely(buf->f_bfree <= sbi->unusable_block_count))
+ buf->f_bfree = 0;
+ else
+ buf->f_bfree -= sbi->unusable_block_count;
+ spin_unlock(&sbi->stat_lock);
+
if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks)
buf->f_bavail = buf->f_bfree -
F2FS_OPTION(sbi).root_reserved_blocks;
@@ -1250,6 +1298,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",disable_roll_forward");
if (test_opt(sbi, DISCARD))
seq_puts(seq, ",discard");
+ else
+ seq_puts(seq, ",nodiscard");
if (test_opt(sbi, NOHEAP))
seq_puts(seq, ",no_heap");
else
@@ -1310,11 +1360,15 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
from_kgid_munged(&init_user_ns,
F2FS_OPTION(sbi).s_resgid));
if (F2FS_IO_SIZE_BITS(sbi))
- seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
+ seq_printf(seq, ",io_bits=%u",
+ F2FS_OPTION(sbi).write_io_size_bits);
#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (test_opt(sbi, FAULT_INJECTION))
+ if (test_opt(sbi, FAULT_INJECTION)) {
seq_printf(seq, ",fault_injection=%u",
F2FS_OPTION(sbi).fault_info.inject_rate);
+ seq_printf(seq, ",fault_type=%u",
+ F2FS_OPTION(sbi).fault_info.inject_type);
+ }
#endif
#ifdef CONFIG_QUOTA
if (test_opt(sbi, QUOTA))
@@ -1341,10 +1395,15 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
seq_printf(seq, ",alloc_mode=%s", "reuse");
+ if (test_opt(sbi, DISABLE_CHECKPOINT))
+ seq_printf(seq, ",checkpoint=disable:%u",
+ F2FS_OPTION(sbi).unusable_cap);
if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
seq_printf(seq, ",fsync_mode=%s", "posix");
else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
seq_printf(seq, ",fsync_mode=%s", "strict");
+ else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_NOBARRIER)
+ seq_printf(seq, ",fsync_mode=%s", "nobarrier");
return 0;
}
@@ -1357,7 +1416,8 @@ static void default_options(struct f2fs_sb_info *sbi)
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
F2FS_OPTION(sbi).test_dummy_encryption = false;
- sbi->readdir_ra = 1;
+ F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
+ F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
set_opt(sbi, BG_GC);
set_opt(sbi, INLINE_XATTR);
@@ -1366,13 +1426,14 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, EXTENT_CACHE);
set_opt(sbi, NOHEAP);
sbi->sb->s_flags |= MS_LAZYTIME;
+ clear_opt(sbi, DISABLE_CHECKPOINT);
+ F2FS_OPTION(sbi).unusable_cap = 0;
set_opt(sbi, FLUSH_MERGE);
- if (f2fs_sb_has_blkzoned(sbi->sb)) {
+ set_opt(sbi, DISCARD);
+ if (f2fs_sb_has_blkzoned(sbi))
set_opt_mode(sbi, F2FS_MOUNT_LFS);
- set_opt(sbi, DISCARD);
- } else {
+ else
set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
- }
#ifdef CONFIG_F2FS_FS_XATTR
set_opt(sbi, XATTR_USER);
@@ -1381,14 +1442,82 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, POSIX_ACL);
#endif
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- f2fs_build_fault_attr(sbi, 0);
-#endif
+ f2fs_build_fault_attr(sbi, 0, 0);
}
#ifdef CONFIG_QUOTA
static int f2fs_enable_quotas(struct super_block *sb);
#endif
+
+static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
+{
+ unsigned int s_flags = sbi->sb->s_flags;
+ struct cp_control cpc;
+ int err = 0;
+ int ret;
+ block_t unusable;
+
+ if (s_flags & MS_RDONLY) {
+ f2fs_err(sbi, "checkpoint=disable on readonly fs");
+ return -EINVAL;
+ }
+ sbi->sb->s_flags |= MS_ACTIVE;
+
+ f2fs_update_time(sbi, DISABLE_TIME);
+
+ while (!f2fs_time_over(sbi, DISABLE_TIME)) {
+ mutex_lock(&sbi->gc_mutex);
+ err = f2fs_gc(sbi, true, false, NULL_SEGNO);
+ if (err == -ENODATA) {
+ err = 0;
+ break;
+ }
+ if (err && err != -EAGAIN)
+ break;
+ }
+
+ ret = sync_filesystem(sbi->sb);
+ if (ret || err) {
+ err = ret ? ret: err;
+ goto restore_flag;
+ }
+
+ unusable = f2fs_get_unusable_blocks(sbi);
+ if (f2fs_disable_cp_again(sbi, unusable)) {
+ err = -EAGAIN;
+ goto restore_flag;
+ }
+
+ mutex_lock(&sbi->gc_mutex);
+ cpc.reason = CP_PAUSE;
+ set_sbi_flag(sbi, SBI_CP_DISABLED);
+ err = f2fs_write_checkpoint(sbi, &cpc);
+ if (err)
+ goto out_unlock;
+
+ spin_lock(&sbi->stat_lock);
+ sbi->unusable_block_count = unusable;
+ spin_unlock(&sbi->stat_lock);
+
+out_unlock:
+ mutex_unlock(&sbi->gc_mutex);
+restore_flag:
+ sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
+ return err;
+}
+
+static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
+{
+ mutex_lock(&sbi->gc_mutex);
+ f2fs_dirty_to_prefree(sbi);
+
+ clear_sbi_flag(sbi, SBI_CP_DISABLED);
+ set_sbi_flag(sbi, SBI_IS_DIRTY);
+ mutex_unlock(&sbi->gc_mutex);
+
+ f2fs_sync_fs(sbi->sb, 1);
+}
+
static int f2fs_remount(struct super_block *sb, int *flags, char *data)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -1398,6 +1527,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
bool need_restart_gc = false;
bool need_stop_gc = false;
bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
+ bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
+ bool checkpoint_changed;
#ifdef CONFIG_QUOTA
int i, j;
#endif
@@ -1418,7 +1549,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
GFP_KERNEL);
if (!org_mount_opt.s_qf_names[i]) {
for (j = 0; j < i; j++)
- kfree(org_mount_opt.s_qf_names[j]);
+ kvfree(org_mount_opt.s_qf_names[j]);
return -ENOMEM;
}
} else {
@@ -1430,8 +1561,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
/* recover superblocks we couldn't write due to previous RO mount */
if (!(*flags & MS_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) {
err = f2fs_commit_super(sbi, false);
- f2fs_msg(sb, KERN_INFO,
- "Try to recover all the superblocks, ret: %d", err);
+ f2fs_info(sbi, "Try to recover all the superblocks, ret: %d",
+ err);
if (!err)
clear_sbi_flag(sbi, SBI_NEED_SB_WRITE);
}
@@ -1442,6 +1573,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
err = parse_options(sb, data);
if (err)
goto restore_opts;
+ checkpoint_changed =
+ disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT);
/*
* Previous and new state of filesystem is RO,
@@ -1460,7 +1593,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
sb->s_flags &= ~MS_RDONLY;
if (sb_any_quota_suspended(sb)) {
dquot_resume(sb, -1);
- } else if (f2fs_sb_has_quota_ino(sb)) {
+ } else if (f2fs_sb_has_quota_ino(sbi)) {
err = f2fs_enable_quotas(sb);
if (err)
goto restore_opts;
@@ -1470,8 +1603,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
/* disallow enable/disable extent_cache dynamically */
if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
err = -EINVAL;
- f2fs_msg(sbi->sb, KERN_WARNING,
- "switch extent_cache option is not allowed");
+ f2fs_warn(sbi, "switch extent_cache option is not allowed");
+ goto restore_opts;
+ }
+
+ if ((*flags & MS_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
+ err = -EINVAL;
+ f2fs_warn(sbi, "disabling checkpoint not compatible with read-only");
goto restore_opts;
}
@@ -1482,11 +1620,11 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
*/
if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) {
if (sbi->gc_thread) {
- stop_gc_thread(sbi);
+ f2fs_stop_gc_thread(sbi);
need_restart_gc = true;
}
} else if (!sbi->gc_thread) {
- err = start_gc_thread(sbi);
+ err = f2fs_start_gc_thread(sbi);
if (err)
goto restore_opts;
need_stop_gc = true;
@@ -1503,15 +1641,25 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
clear_sbi_flag(sbi, SBI_IS_CLOSE);
}
+ if (checkpoint_changed) {
+ if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+ err = f2fs_disable_checkpoint(sbi);
+ if (err)
+ goto restore_gc;
+ } else {
+ f2fs_enable_checkpoint(sbi);
+ }
+ }
+
/*
* We stop issue flush thread if FS is mounted as RO
* or if flush_merge is not passed in mount option.
*/
if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
clear_opt(sbi, FLUSH_MERGE);
- destroy_flush_cmd_control(sbi, false);
+ f2fs_destroy_flush_cmd_control(sbi, false);
} else {
- err = create_flush_cmd_control(sbi);
+ err = f2fs_create_flush_cmd_control(sbi);
if (err)
goto restore_gc;
}
@@ -1519,27 +1667,27 @@ skip:
#ifdef CONFIG_QUOTA
/* Release old quota file names */
for (i = 0; i < MAXQUOTAS; i++)
- kfree(org_mount_opt.s_qf_names[i]);
+ kvfree(org_mount_opt.s_qf_names[i]);
#endif
/* Update the POSIXACL Flag */
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
(test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
limit_reserve_root(sbi);
+ *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME);
return 0;
restore_gc:
if (need_restart_gc) {
- if (start_gc_thread(sbi))
- f2fs_msg(sbi->sb, KERN_WARNING,
- "background gc thread has stopped");
+ if (f2fs_start_gc_thread(sbi))
+ f2fs_warn(sbi, "background gc thread has stopped");
} else if (need_stop_gc) {
- stop_gc_thread(sbi);
+ f2fs_stop_gc_thread(sbi);
}
restore_opts:
#ifdef CONFIG_QUOTA
F2FS_OPTION(sbi).s_jquota_fmt = org_mount_opt.s_jquota_fmt;
for (i = 0; i < MAXQUOTAS; i++) {
- kfree(F2FS_OPTION(sbi).s_qf_names[i]);
+ kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
F2FS_OPTION(sbi).s_qf_names[i] = org_mount_opt.s_qf_names[i];
}
#endif
@@ -1578,6 +1726,7 @@ repeat:
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto repeat;
}
+ set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
return PTR_ERR(page);
}
@@ -1589,6 +1738,7 @@ repeat:
}
if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
+ set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
return -EIO;
}
@@ -1630,6 +1780,7 @@ retry:
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry;
}
+ set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
break;
}
@@ -1666,6 +1817,11 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode)
static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
{
+ if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
+ f2fs_err(sbi, "quota sysfile may be corrupted, skip loading it");
+ return 0;
+ }
+
return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type],
F2FS_OPTION(sbi).s_jquota_fmt, type);
}
@@ -1675,11 +1831,10 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly)
int enabled = 0;
int i, err;
- if (f2fs_sb_has_quota_ino(sbi->sb) && rdonly) {
+ if (f2fs_sb_has_quota_ino(sbi) && rdonly) {
err = f2fs_enable_quotas(sbi->sb);
if (err) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Cannot turn on quota_ino: %d", err);
+ f2fs_err(sbi, "Cannot turn on quota_ino: %d", err);
return 0;
}
return 1;
@@ -1692,8 +1847,8 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly)
enabled = 1;
continue;
}
- f2fs_msg(sbi->sb, KERN_ERR,
- "Cannot turn on quotas: %d on %d", err, i);
+ f2fs_err(sbi, "Cannot turn on quotas: %d on %d",
+ err, i);
}
}
return enabled;
@@ -1706,7 +1861,7 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id,
unsigned long qf_inum;
int err;
- BUG_ON(!f2fs_sb_has_quota_ino(sb));
+ BUG_ON(!f2fs_sb_has_quota_ino(F2FS_SB(sb)));
qf_inum = f2fs_qf_ino(sb, type);
if (!qf_inum)
@@ -1714,8 +1869,7 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id,
qf_inode = f2fs_iget(sb, qf_inum);
if (IS_ERR(qf_inode)) {
- f2fs_msg(sb, KERN_ERR,
- "Bad quota inode %u:%lu", type, qf_inum);
+ f2fs_err(F2FS_SB(sb), "Bad quota inode %u:%lu", type, qf_inum);
return PTR_ERR(qf_inode);
}
@@ -1728,15 +1882,22 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id,
static int f2fs_enable_quotas(struct super_block *sb)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
int type, err = 0;
unsigned long qf_inum;
bool quota_mopt[MAXQUOTAS] = {
- test_opt(F2FS_SB(sb), USRQUOTA),
- test_opt(F2FS_SB(sb), GRPQUOTA),
- test_opt(F2FS_SB(sb), PRJQUOTA),
+ test_opt(sbi, USRQUOTA),
+ test_opt(sbi, GRPQUOTA),
+ test_opt(sbi, PRJQUOTA),
};
+ if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) {
+ f2fs_err(sbi, "quota file may be corrupted, skip loading it");
+ return 0;
+ }
+
sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+
for (type = 0; type < MAXQUOTAS; type++) {
qf_inum = f2fs_qf_ino(sb, type);
if (qf_inum) {
@@ -1744,12 +1905,12 @@ static int f2fs_enable_quotas(struct super_block *sb)
DQUOT_USAGE_ENABLED |
(quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
if (err) {
- f2fs_msg(sb, KERN_ERR,
- "Failed to enable quota tracking "
- "(type=%d, err=%d). Please run "
- "fsck to fix.", type, err);
+ f2fs_err(sbi, "Failed to enable quota tracking (type=%d, err=%d). Please run fsck to fix.",
+ type, err);
for (type--; type >= 0; type--)
dquot_quota_off(sb, type);
+ set_sbi_flag(F2FS_SB(sb),
+ SBI_QUOTA_NEED_REPAIR);
return err;
}
}
@@ -1757,35 +1918,65 @@ static int f2fs_enable_quotas(struct super_block *sb)
return 0;
}
-static int f2fs_quota_sync(struct super_block *sb, int type)
+int f2fs_quota_sync(struct super_block *sb, int type)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct quota_info *dqopt = sb_dqopt(sb);
int cnt;
int ret;
+ /*
+ * do_quotactl
+ * f2fs_quota_sync
+ * down_read(quota_sem)
+ * dquot_writeback_dquots()
+ * f2fs_dquot_commit
+ * block_operation
+ * down_read(quota_sem)
+ */
+ f2fs_lock_op(sbi);
+
+ down_read(&sbi->quota_sem);
ret = dquot_writeback_dquots(sb, type);
if (ret)
- return ret;
+ goto out;
/*
* Now when everything is written we can discard the pagecache so
* that userspace sees the changes.
*/
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ struct address_space *mapping;
+
if (type != -1 && cnt != type)
continue;
if (!sb_has_quota_active(sb, cnt))
continue;
- ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping);
+ mapping = dqopt->files[cnt]->i_mapping;
+
+ ret = filemap_fdatawrite(mapping);
if (ret)
- return ret;
+ goto out;
+
+ /* if we are using journalled quota */
+ if (is_journalled_quota(sbi))
+ continue;
+
+ ret = filemap_fdatawait(mapping);
+ if (ret)
+ set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
inode_lock(dqopt->files[cnt]);
truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
inode_unlock(dqopt->files[cnt]);
}
- return 0;
+out:
+ if (ret)
+ set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+ up_read(&sbi->quota_sem);
+ f2fs_unlock_op(sbi);
+ return ret;
}
static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
@@ -1805,9 +1996,8 @@ static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
inode = d_inode(path->dentry);
inode_lock(inode);
- F2FS_I(inode)->i_flags |= FS_NOATIME_FL | FS_IMMUTABLE_FL;
- inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
- S_NOATIME | S_IMMUTABLE);
+ F2FS_I(inode)->i_flags |= F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL;
+ f2fs_set_inode_flags(inode);
inode_unlock(inode);
f2fs_mark_inode_dirty_sync(inode, false);
@@ -1822,15 +2012,17 @@ static int f2fs_quota_off(struct super_block *sb, int type)
if (!inode || !igrab(inode))
return dquot_quota_off(sb, type);
- f2fs_quota_sync(sb, type);
+ err = f2fs_quota_sync(sb, type);
+ if (err)
+ goto out_put;
err = dquot_quota_off(sb, type);
- if (err || f2fs_sb_has_quota_ino(sb))
+ if (err || f2fs_sb_has_quota_ino(F2FS_SB(sb)))
goto out_put;
inode_lock(inode);
- F2FS_I(inode)->i_flags &= ~(FS_NOATIME_FL | FS_IMMUTABLE_FL);
- inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
+ F2FS_I(inode)->i_flags &= ~(F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL);
+ f2fs_set_inode_flags(inode);
inode_unlock(inode);
f2fs_mark_inode_dirty_sync(inode, false);
out_put:
@@ -1841,9 +2033,105 @@ out_put:
void f2fs_quota_off_umount(struct super_block *sb)
{
int type;
+ int err;
+
+ for (type = 0; type < MAXQUOTAS; type++) {
+ err = f2fs_quota_off(sb, type);
+ if (err) {
+ int ret = dquot_quota_off(sb, type);
- for (type = 0; type < MAXQUOTAS; type++)
- f2fs_quota_off(sb, type);
+ f2fs_err(F2FS_SB(sb), "Fail to turn off disk quota (type: %d, err: %d, ret:%d), Please run fsck to fix it.",
+ type, err, ret);
+ set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+ }
+ }
+ /*
+ * In case of checkpoint=disable, we must flush quota blocks.
+ * This can cause NULL exception for node_inode in end_io, since
+ * put_super already dropped it.
+ */
+ sync_filesystem(sb);
+}
+
+static void f2fs_truncate_quota_inode_pages(struct super_block *sb)
+{
+ struct quota_info *dqopt = sb_dqopt(sb);
+ int type;
+
+ for (type = 0; type < MAXQUOTAS; type++) {
+ if (!dqopt->files[type])
+ continue;
+ f2fs_inode_synced(dqopt->files[type]);
+ }
+}
+
+static int f2fs_dquot_commit(struct dquot *dquot)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
+ int ret;
+
+ down_read(&sbi->quota_sem);
+ ret = dquot_commit(dquot);
+ if (ret < 0)
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ up_read(&sbi->quota_sem);
+ return ret;
+}
+
+static int f2fs_dquot_acquire(struct dquot *dquot)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
+ int ret;
+
+ down_read(&sbi->quota_sem);
+ ret = dquot_acquire(dquot);
+ if (ret < 0)
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ up_read(&sbi->quota_sem);
+ return ret;
+}
+
+static int f2fs_dquot_release(struct dquot *dquot)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
+ int ret;
+
+ down_read(&sbi->quota_sem);
+ ret = dquot_release(dquot);
+ if (ret < 0)
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ up_read(&sbi->quota_sem);
+ return ret;
+}
+
+static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
+{
+ struct super_block *sb = dquot->dq_sb;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ int ret;
+
+ down_read(&sbi->quota_sem);
+ ret = dquot_mark_dquot_dirty(dquot);
+
+ /* if we are using journalled quota */
+ if (is_journalled_quota(sbi))
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
+
+ up_read(&sbi->quota_sem);
+ return ret;
+}
+
+static int f2fs_dquot_commit_info(struct super_block *sb, int type)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ int ret;
+
+ down_read(&sbi->quota_sem);
+ ret = dquot_commit_info(sb, type);
+ if (ret < 0)
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ up_read(&sbi->quota_sem);
+ return ret;
}
#if 0
@@ -1856,11 +2144,11 @@ static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
static const struct dquot_operations f2fs_quota_operations = {
.get_reserved_space = f2fs_get_reserved_space,
- .write_dquot = dquot_commit,
- .acquire_dquot = dquot_acquire,
- .release_dquot = dquot_release,
- .mark_dirty = dquot_mark_dquot_dirty,
- .write_info = dquot_commit_info,
+ .write_dquot = f2fs_dquot_commit,
+ .acquire_dquot = f2fs_dquot_acquire,
+ .release_dquot = f2fs_dquot_release,
+ .mark_dirty = f2fs_dquot_mark_dquot_dirty,
+ .write_info = f2fs_dquot_commit_info,
.alloc_dquot = dquot_alloc,
.destroy_dquot = dquot_destroy,
#if 0
@@ -1879,6 +2167,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = {
.set_dqblk = dquot_set_dqblk,
};
#else
+int f2fs_quota_sync(struct super_block *sb, int type)
+{
+ return 0;
+}
+
void f2fs_quota_off_umount(struct super_block *sb)
{
}
@@ -1924,7 +2217,7 @@ static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len,
* if LOST_FOUND feature is enabled.
*
*/
- if (f2fs_sb_has_lost_found(sbi->sb) &&
+ if (f2fs_sb_has_lost_found(sbi) &&
inode->i_ino == F2FS_ROOT_INO(sbi))
return -EPERM;
@@ -1938,19 +2231,13 @@ static bool f2fs_dummy_context(struct inode *inode)
return DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(inode));
}
-static unsigned f2fs_max_namelen(struct inode *inode)
-{
- return S_ISLNK(inode->i_mode) ?
- inode->i_sb->s_blocksize : F2FS_NAME_LEN;
-}
-
static const struct fscrypt_operations f2fs_cryptops = {
.key_prefix = "f2fs:",
.get_context = f2fs_get_context,
.set_context = f2fs_set_context,
.dummy_context = f2fs_dummy_context,
.empty_dir = f2fs_empty_dir,
- .max_namelen = f2fs_max_namelen,
+ .max_namelen = F2FS_NAME_LEN,
};
#endif
@@ -1960,7 +2247,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct inode *inode;
- if (check_nid_range(sbi, ino))
+ if (f2fs_check_nid_range(sbi, ino))
return ERR_PTR(-ESTALE);
/*
@@ -2002,7 +2289,7 @@ static const struct export_operations f2fs_export_ops = {
static loff_t max_file_blocks(void)
{
loff_t result = 0;
- loff_t leaf_count = ADDRS_PER_BLOCK;
+ loff_t leaf_count = DEF_ADDRS_PER_BLOCK;
/*
* note: previously, result is equal to (DEF_ADDRS_PER_INODE -
@@ -2063,55 +2350,49 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
(segment_count << log_blocks_per_seg);
if (segment0_blkaddr != cp_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
- segment0_blkaddr, cp_blkaddr);
+ f2fs_info(sbi, "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
+ segment0_blkaddr, cp_blkaddr);
return true;
}
if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) !=
sit_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong CP boundary, start(%u) end(%u) blocks(%u)",
- cp_blkaddr, sit_blkaddr,
- segment_count_ckpt << log_blocks_per_seg);
+ f2fs_info(sbi, "Wrong CP boundary, start(%u) end(%u) blocks(%u)",
+ cp_blkaddr, sit_blkaddr,
+ segment_count_ckpt << log_blocks_per_seg);
return true;
}
if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) !=
nat_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
- sit_blkaddr, nat_blkaddr,
- segment_count_sit << log_blocks_per_seg);
+ f2fs_info(sbi, "Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
+ sit_blkaddr, nat_blkaddr,
+ segment_count_sit << log_blocks_per_seg);
return true;
}
if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) !=
ssa_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
- nat_blkaddr, ssa_blkaddr,
- segment_count_nat << log_blocks_per_seg);
+ f2fs_info(sbi, "Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
+ nat_blkaddr, ssa_blkaddr,
+ segment_count_nat << log_blocks_per_seg);
return true;
}
if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) !=
main_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
- ssa_blkaddr, main_blkaddr,
- segment_count_ssa << log_blocks_per_seg);
+ f2fs_info(sbi, "Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
+ ssa_blkaddr, main_blkaddr,
+ segment_count_ssa << log_blocks_per_seg);
return true;
}
if (main_end_blkaddr > seg_end_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)",
- main_blkaddr,
- segment0_blkaddr +
- (segment_count << log_blocks_per_seg),
- segment_count_main << log_blocks_per_seg);
+ f2fs_info(sbi, "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)",
+ main_blkaddr,
+ segment0_blkaddr +
+ (segment_count << log_blocks_per_seg),
+ segment_count_main << log_blocks_per_seg);
return true;
} else if (main_end_blkaddr < seg_end_blkaddr) {
int err = 0;
@@ -2128,12 +2409,11 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
err = __f2fs_commit_super(bh, NULL);
res = err ? "failed" : "done";
}
- f2fs_msg(sb, KERN_INFO,
- "Fix alignment : %s, start(%u) end(%u) block(%u)",
- res, main_blkaddr,
- segment0_blkaddr +
- (segment_count << log_blocks_per_seg),
- segment_count_main << log_blocks_per_seg);
+ f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%u) block(%u)",
+ res, main_blkaddr,
+ segment0_blkaddr +
+ (segment_count << log_blocks_per_seg),
+ segment_count_main << log_blocks_per_seg);
if (err)
return true;
}
@@ -2143,41 +2423,56 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
struct buffer_head *bh)
{
+ block_t segment_count, segs_per_sec, secs_per_zone;
+ block_t total_sections, blocks_per_seg;
struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
(bh->b_data + F2FS_SUPER_OFFSET);
- struct super_block *sb = sbi->sb;
unsigned int blocksize;
+ size_t crc_offset = 0;
+ __u32 crc = 0;
- if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
- f2fs_msg(sb, KERN_INFO,
- "Magic Mismatch, valid(0x%x) - read(0x%x)",
- F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
- return 1;
+ if (le32_to_cpu(raw_super->magic) != F2FS_SUPER_MAGIC) {
+ f2fs_info(sbi, "Magic Mismatch, valid(0x%x) - read(0x%x)",
+ F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
+ return -EINVAL;
+ }
+
+ /* Check checksum_offset and crc in superblock */
+ if (__F2FS_HAS_FEATURE(raw_super, F2FS_FEATURE_SB_CHKSUM)) {
+ crc_offset = le32_to_cpu(raw_super->checksum_offset);
+ if (crc_offset !=
+ offsetof(struct f2fs_super_block, crc)) {
+ f2fs_info(sbi, "Invalid SB checksum offset: %zu",
+ crc_offset);
+ return -EFSCORRUPTED;
+ }
+ crc = le32_to_cpu(raw_super->crc);
+ if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) {
+ f2fs_info(sbi, "Invalid SB checksum value: %u", crc);
+ return -EFSCORRUPTED;
+ }
}
/* Currently, support only 4KB page cache size */
if (F2FS_BLKSIZE != PAGE_SIZE) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid page_cache_size (%lu), supports only 4KB\n",
- PAGE_SIZE);
- return 1;
+ f2fs_info(sbi, "Invalid page_cache_size (%lu), supports only 4KB",
+ PAGE_SIZE);
+ return -EFSCORRUPTED;
}
/* Currently, support only 4KB block size */
blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
if (blocksize != F2FS_BLKSIZE) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid blocksize (%u), supports only 4KB\n",
- blocksize);
- return 1;
+ f2fs_info(sbi, "Invalid blocksize (%u), supports only 4KB",
+ blocksize);
+ return -EFSCORRUPTED;
}
/* check log blocks per segment */
if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid log blocks per segment (%u)\n",
- le32_to_cpu(raw_super->log_blocks_per_seg));
- return 1;
+ f2fs_info(sbi, "Invalid log blocks per segment (%u)",
+ le32_to_cpu(raw_super->log_blocks_per_seg));
+ return -EFSCORRUPTED;
}
/* Currently, support 512/1024/2048/4096 bytes sector size */
@@ -2185,47 +2480,102 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
F2FS_MAX_LOG_SECTOR_SIZE ||
le32_to_cpu(raw_super->log_sectorsize) <
F2FS_MIN_LOG_SECTOR_SIZE) {
- f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)",
- le32_to_cpu(raw_super->log_sectorsize));
- return 1;
+ f2fs_info(sbi, "Invalid log sectorsize (%u)",
+ le32_to_cpu(raw_super->log_sectorsize));
+ return -EFSCORRUPTED;
}
if (le32_to_cpu(raw_super->log_sectors_per_block) +
le32_to_cpu(raw_super->log_sectorsize) !=
F2FS_MAX_LOG_SECTOR_SIZE) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid log sectors per block(%u) log sectorsize(%u)",
- le32_to_cpu(raw_super->log_sectors_per_block),
- le32_to_cpu(raw_super->log_sectorsize));
- return 1;
+ f2fs_info(sbi, "Invalid log sectors per block(%u) log sectorsize(%u)",
+ le32_to_cpu(raw_super->log_sectors_per_block),
+ le32_to_cpu(raw_super->log_sectorsize));
+ return -EFSCORRUPTED;
+ }
+
+ segment_count = le32_to_cpu(raw_super->segment_count);
+ segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
+ secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
+ total_sections = le32_to_cpu(raw_super->section_count);
+
+ /* blocks_per_seg should be 512, given the above check */
+ blocks_per_seg = 1 << le32_to_cpu(raw_super->log_blocks_per_seg);
+
+ if (segment_count > F2FS_MAX_SEGMENT ||
+ segment_count < F2FS_MIN_SEGMENTS) {
+ f2fs_info(sbi, "Invalid segment count (%u)", segment_count);
+ return -EFSCORRUPTED;
+ }
+
+ if (total_sections > segment_count ||
+ total_sections < F2FS_MIN_SEGMENTS ||
+ segs_per_sec > segment_count || !segs_per_sec) {
+ f2fs_info(sbi, "Invalid segment/section count (%u, %u x %u)",
+ segment_count, total_sections, segs_per_sec);
+ return -EFSCORRUPTED;
+ }
+
+ if ((segment_count / segs_per_sec) < total_sections) {
+ f2fs_info(sbi, "Small segment_count (%u < %u * %u)",
+ segment_count, segs_per_sec, total_sections);
+ return -EFSCORRUPTED;
+ }
+
+ if (segment_count > (le64_to_cpu(raw_super->block_count) >> 9)) {
+ f2fs_info(sbi, "Wrong segment_count / block_count (%u > %llu)",
+ segment_count, le64_to_cpu(raw_super->block_count));
+ return -EFSCORRUPTED;
+ }
+
+ if (secs_per_zone > total_sections || !secs_per_zone) {
+ f2fs_info(sbi, "Wrong secs_per_zone / total_sections (%u, %u)",
+ secs_per_zone, total_sections);
+ return -EFSCORRUPTED;
+ }
+ if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION ||
+ raw_super->hot_ext_count > F2FS_MAX_EXTENSION ||
+ (le32_to_cpu(raw_super->extension_count) +
+ raw_super->hot_ext_count) > F2FS_MAX_EXTENSION) {
+ f2fs_info(sbi, "Corrupted extension count (%u + %u > %u)",
+ le32_to_cpu(raw_super->extension_count),
+ raw_super->hot_ext_count,
+ F2FS_MAX_EXTENSION);
+ return -EFSCORRUPTED;
+ }
+
+ if (le32_to_cpu(raw_super->cp_payload) >
+ (blocks_per_seg - F2FS_CP_PACKS)) {
+ f2fs_info(sbi, "Insane cp_payload (%u > %u)",
+ le32_to_cpu(raw_super->cp_payload),
+ blocks_per_seg - F2FS_CP_PACKS);
+ return -EFSCORRUPTED;
}
/* check reserved ino info */
if (le32_to_cpu(raw_super->node_ino) != 1 ||
le32_to_cpu(raw_super->meta_ino) != 2 ||
le32_to_cpu(raw_super->root_ino) != 3) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
- le32_to_cpu(raw_super->node_ino),
- le32_to_cpu(raw_super->meta_ino),
- le32_to_cpu(raw_super->root_ino));
- return 1;
+ f2fs_info(sbi, "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
+ le32_to_cpu(raw_super->node_ino),
+ le32_to_cpu(raw_super->meta_ino),
+ le32_to_cpu(raw_super->root_ino));
+ return -EFSCORRUPTED;
}
if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid segment count (%u)",
+ f2fs_info(sbi, "Invalid segment count (%u)",
le32_to_cpu(raw_super->segment_count));
return 1;
}
/* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
if (sanity_check_area_boundary(sbi, bh))
- return 1;
+ return -EFSCORRUPTED;
return 0;
}
-int sanity_check_ckpt(struct f2fs_sb_info *sbi)
+int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
{
unsigned int total, fsmeta;
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
@@ -2235,6 +2585,10 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
unsigned int sit_segs, nat_segs;
unsigned int sit_bitmap_size, nat_bitmap_size;
unsigned int log_blocks_per_seg;
+ unsigned int segment_count_main;
+ unsigned int cp_pack_start_sum, cp_payload;
+ block_t user_block_count, valid_user_blocks;
+ block_t avail_node_count, valid_node_count;
int i, j;
total = le32_to_cpu(raw_super->segment_count);
@@ -2254,8 +2608,33 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
ovp_segments == 0 || reserved_segments == 0)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong layout: check mkfs.f2fs version");
+ f2fs_err(sbi, "Wrong layout: check mkfs.f2fs version");
+ return 1;
+ }
+
+ user_block_count = le64_to_cpu(ckpt->user_block_count);
+ segment_count_main = le32_to_cpu(raw_super->segment_count_main);
+ log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
+ if (!user_block_count || user_block_count >=
+ segment_count_main << log_blocks_per_seg) {
+ f2fs_err(sbi, "Wrong user_block_count: %u",
+ user_block_count);
+ return 1;
+ }
+
+ valid_user_blocks = le64_to_cpu(ckpt->valid_block_count);
+ if (valid_user_blocks > user_block_count) {
+ f2fs_err(sbi, "Wrong valid_user_blocks: %u, user_block_count: %u",
+ valid_user_blocks, user_block_count);
+ return 1;
+ }
+
+ valid_node_count = le32_to_cpu(ckpt->valid_node_count);
+ avail_node_count = sbi->total_node_count - sbi->nquota_files -
+ F2FS_RESERVED_NODE_NUM;
+ if (valid_node_count > avail_node_count) {
+ f2fs_err(sbi, "Wrong valid_node_count: %u, avail_node_count: %u",
+ valid_node_count, avail_node_count);
return 1;
}
@@ -2269,10 +2648,9 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
for (j = i + 1; j < NR_CURSEG_NODE_TYPE; j++) {
if (le32_to_cpu(ckpt->cur_node_segno[i]) ==
le32_to_cpu(ckpt->cur_node_segno[j])) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Node segment (%u, %u) has the same "
- "segno: %u", i, j,
- le32_to_cpu(ckpt->cur_node_segno[i]));
+ f2fs_err(sbi, "Node segment (%u, %u) has the same segno: %u",
+ i, j,
+ le32_to_cpu(ckpt->cur_node_segno[i]));
return 1;
}
}
@@ -2284,10 +2662,9 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
for (j = i + 1; j < NR_CURSEG_DATA_TYPE; j++) {
if (le32_to_cpu(ckpt->cur_data_segno[i]) ==
le32_to_cpu(ckpt->cur_data_segno[j])) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Data segment (%u, %u) has the same "
- "segno: %u", i, j,
- le32_to_cpu(ckpt->cur_data_segno[i]));
+ f2fs_err(sbi, "Data segment (%u, %u) has the same segno: %u",
+ i, j,
+ le32_to_cpu(ckpt->cur_data_segno[i]));
return 1;
}
}
@@ -2296,7 +2673,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
for (j = 0; j < NR_CURSEG_DATA_TYPE; j++) {
if (le32_to_cpu(ckpt->cur_node_segno[i]) ==
le32_to_cpu(ckpt->cur_data_segno[j])) {
- f2fs_msg(sbi->sb, KERN_ERR,
+ f2fs_err(sbi,
"Node segment (%u) and Data segment (%u)"
" has the same segno: %u", i, j,
le32_to_cpu(ckpt->cur_node_segno[i]));
@@ -2311,14 +2688,32 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 ||
nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong bitmap size: sit: %u, nat:%u",
- sit_bitmap_size, nat_bitmap_size);
+ f2fs_err(sbi, "Wrong bitmap size: sit: %u, nat:%u",
+ sit_bitmap_size, nat_bitmap_size);
+ return 1;
+ }
+
+ cp_pack_start_sum = __start_sum_addr(sbi);
+ cp_payload = __cp_payload(sbi);
+ if (cp_pack_start_sum < cp_payload + 1 ||
+ cp_pack_start_sum > blocks_per_seg - 1 -
+ NR_CURSEG_TYPE) {
+ f2fs_err(sbi, "Wrong cp_pack_start_sum: %u",
+ cp_pack_start_sum);
+ return 1;
+ }
+
+ if (__is_set_ckpt_flags(ckpt, CP_LARGE_NAT_BITMAP_FLAG) &&
+ le32_to_cpu(ckpt->checksum_offset) != CP_MIN_CHKSUM_OFFSET) {
+ f2fs_warn(sbi, "using deprecated layout of large_nat_bitmap, "
+ "please run fsck v1.13.0 or higher to repair, chksum_offset: %u, "
+ "fixed with patch: \"f2fs-tools: relocate chksum_offset for large_nat_bitmap feature\"",
+ le32_to_cpu(ckpt->checksum_offset));
return 1;
}
if (unlikely(f2fs_cp_error(sbi))) {
- f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
+ f2fs_err(sbi, "A bug case: need to run fsck");
return 1;
}
return 0;
@@ -2327,7 +2722,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
static void init_sb_info(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = sbi->raw_super;
- int i, j;
+ int i;
sbi->log_sectors_per_block =
le32_to_cpu(raw_super->log_sectors_per_block);
@@ -2345,23 +2740,30 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
sbi->cur_victim_sec = NULL_SECNO;
+ sbi->next_victim_seg[BG_GC] = NULL_SEGNO;
+ sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
+ sbi->migration_granularity = sbi->segs_per_sec;
sbi->dir_level = DEF_DIR_LEVEL;
sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
+ sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL;
+ sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL;
+ sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL;
+ sbi->interval_time[UMOUNT_DISCARD_TIMEOUT] =
+ DEF_UMOUNT_DISCARD_TIMEOUT;
clear_sbi_flag(sbi, SBI_NEED_FSCK);
for (i = 0; i < NR_COUNT_TYPE; i++)
atomic_set(&sbi->nr_pages[i], 0);
- atomic_set(&sbi->wb_sync_req, 0);
+ for (i = 0; i < META; i++)
+ atomic_set(&sbi->wb_sync_req[i], 0);
INIT_LIST_HEAD(&sbi->s_list);
mutex_init(&sbi->umount_mutex);
- for (i = 0; i < NR_PAGE_TYPE - 1; i++)
- for (j = HOT; j < NR_TEMP_TYPE; j++)
- mutex_init(&sbi->wio_mutex[i][j]);
+ init_rwsem(&sbi->io_order_lock);
spin_lock_init(&sbi->cp_lock);
sbi->dirty_device = 0;
@@ -2378,8 +2780,12 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
if (err)
return err;
- return percpu_counter_init(&sbi->total_valid_inode_count, 0,
+ err = percpu_counter_init(&sbi->total_valid_inode_count, 0,
GFP_KERNEL);
+ if (err)
+ percpu_counter_destroy(&sbi->alloc_valid_block_count);
+
+ return err;
}
#ifdef CONFIG_BLK_DEV_ZONED
@@ -2393,7 +2799,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
unsigned int n = 0;
int err = -EIO;
- if (!f2fs_sb_has_blkzoned(sbi->sb))
+ if (!f2fs_sb_has_blkzoned(sbi))
return 0;
if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
@@ -2409,15 +2815,19 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
FDEV(devi).nr_blkz++;
- FDEV(devi).blkz_type = f2fs_kmalloc(sbi, FDEV(devi).nr_blkz,
- GFP_KERNEL);
- if (!FDEV(devi).blkz_type)
+ FDEV(devi).blkz_seq = f2fs_kzalloc(sbi,
+ BITS_TO_LONGS(FDEV(devi).nr_blkz)
+ * sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!FDEV(devi).blkz_seq)
return -ENOMEM;
#define F2FS_REPORT_NR_ZONES 4096
- zones = f2fs_kzalloc(sbi, sizeof(struct blk_zone) *
- F2FS_REPORT_NR_ZONES, GFP_KERNEL);
+ zones = f2fs_kzalloc(sbi,
+ array_size(F2FS_REPORT_NR_ZONES,
+ sizeof(struct blk_zone)),
+ GFP_KERNEL);
if (!zones)
return -ENOMEM;
@@ -2436,13 +2846,14 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
}
for (i = 0; i < nr_zones; i++) {
- FDEV(devi).blkz_type[n] = zones[i].type;
+ if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL)
+ set_bit(n, FDEV(devi).blkz_seq);
sector += zones[i].len;
n++;
}
}
- kfree(zones);
+ kvfree(zones);
return err;
}
@@ -2471,18 +2882,17 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
for (block = 0; block < 2; block++) {
bh = sb_bread(sb, block);
if (!bh) {
- f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
- block + 1);
+ f2fs_err(sbi, "Unable to read %dth superblock",
+ block + 1);
err = -EIO;
continue;
}
/* sanity checking of raw super */
- if (sanity_check_raw_super(sbi, bh)) {
- f2fs_msg(sb, KERN_ERR,
- "Can't find valid F2FS filesystem in %dth superblock",
- block + 1);
- err = -EINVAL;
+ err = sanity_check_raw_super(sbi, bh);
+ if (err) {
+ f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock",
+ block + 1);
brelse(bh);
continue;
}
@@ -2502,7 +2912,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
/* No valid superblock */
if (!*raw_super)
- kfree(super);
+ kvfree(super);
else
err = 0;
@@ -2512,6 +2922,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
{
struct buffer_head *bh;
+ __u32 crc = 0;
int err;
if ((recover && f2fs_readonly(sbi->sb)) ||
@@ -2520,6 +2931,13 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
return -EROFS;
}
+ /* we should update superblock crc here */
+ if (!recover && f2fs_sb_has_sb_chksum(sbi)) {
+ crc = f2fs_crc32(sbi, F2FS_RAW_SUPER(sbi),
+ offsetof(struct f2fs_super_block, crc));
+ F2FS_RAW_SUPER(sbi)->crc = cpu_to_le32(crc);
+ }
+
/* write back-up superblock first */
bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1);
if (!bh)
@@ -2561,8 +2979,10 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
* Initialize multiple devices information, or single
* zoned block device information.
*/
- sbi->devs = f2fs_kzalloc(sbi, sizeof(struct f2fs_dev_info) *
- max_devices, GFP_KERNEL);
+ sbi->devs = f2fs_kzalloc(sbi,
+ array_size(max_devices,
+ sizeof(struct f2fs_dev_info)),
+ GFP_KERNEL);
if (!sbi->devs)
return -ENOMEM;
@@ -2604,37 +3024,33 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
#ifdef CONFIG_BLK_DEV_ZONED
if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
- !f2fs_sb_has_blkzoned(sbi->sb)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Zoned block device feature not enabled\n");
+ !f2fs_sb_has_blkzoned(sbi)) {
+ f2fs_err(sbi, "Zoned block device feature not enabled\n");
return -EINVAL;
}
if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) {
if (init_blkz_info(sbi, i)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Failed to initialize F2FS blkzone information");
+ f2fs_err(sbi, "Failed to initialize F2FS blkzone information");
return -EINVAL;
}
if (max_devices == 1)
break;
- f2fs_msg(sbi->sb, KERN_INFO,
- "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
- i, FDEV(i).path,
- FDEV(i).total_segments,
- FDEV(i).start_blk, FDEV(i).end_blk,
- bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
- "Host-aware" : "Host-managed");
+ f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
+ i, FDEV(i).path,
+ FDEV(i).total_segments,
+ FDEV(i).start_blk, FDEV(i).end_blk,
+ bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
+ "Host-aware" : "Host-managed");
continue;
}
#endif
- f2fs_msg(sbi->sb, KERN_INFO,
- "Mount Device [%2d]: %20s, %8u, %8x - %8x",
- i, FDEV(i).path,
- FDEV(i).total_segments,
- FDEV(i).start_blk, FDEV(i).end_blk);
- }
- f2fs_msg(sbi->sb, KERN_INFO,
- "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
+ f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x",
+ i, FDEV(i).path,
+ FDEV(i).total_segments,
+ FDEV(i).start_blk, FDEV(i).end_blk);
+ }
+ f2fs_info(sbi,
+ "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
return 0;
}
@@ -2648,6 +3064,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
sm_i->dcc_info->discard_granularity = 1;
sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
}
+
+ sbi->readdir_ra = 1;
}
static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
@@ -2656,10 +3074,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
struct f2fs_super_block *raw_super;
struct inode *root;
int err;
- bool retry = true, need_fsck = false;
+ bool skip_recovery = false, need_fsck = false;
char *options = NULL;
int recovery, i, valid_super_block;
struct curseg_info *seg_i;
+ int retry_cnt = 1;
try_onemore:
err = -EINVAL;
@@ -2677,7 +3096,7 @@ try_onemore:
/* Load the checksum driver */
sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0);
if (IS_ERR(sbi->s_chksum_driver)) {
- f2fs_msg(sb, KERN_ERR, "Cannot load crc32 driver.");
+ f2fs_err(sbi, "Cannot load crc32 driver.");
err = PTR_ERR(sbi->s_chksum_driver);
sbi->s_chksum_driver = NULL;
goto free_sbi;
@@ -2685,7 +3104,7 @@ try_onemore:
/* set a block size */
if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) {
- f2fs_msg(sb, KERN_ERR, "unable to set blocksize");
+ f2fs_err(sbi, "unable to set blocksize");
goto free_sbi;
}
@@ -2697,11 +3116,8 @@ try_onemore:
sb->s_fs_info = sbi;
sbi->raw_super = raw_super;
- F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
- F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
-
/* precompute checksum seed for metadata */
- if (f2fs_sb_has_inode_chksum(sb))
+ if (f2fs_sb_has_inode_chksum(sbi))
sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid,
sizeof(raw_super->uuid));
@@ -2711,9 +3127,8 @@ try_onemore:
* devices, but mandatory for host-managed zoned block devices.
*/
#ifndef CONFIG_BLK_DEV_ZONED
- if (f2fs_sb_has_blkzoned(sb)) {
- f2fs_msg(sb, KERN_ERR,
- "Zoned block device support is not enabled\n");
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ f2fs_err(sbi, "Zoned block device support is not enabled");
err = -EOPNOTSUPP;
goto free_sb_buf;
}
@@ -2734,17 +3149,13 @@ try_onemore:
sb->s_maxbytes = sbi->max_file_blocks <<
le32_to_cpu(raw_super->log_blocksize);
sb->s_max_links = F2FS_LINK_MAX;
- get_random_bytes(&sbi->s_next_generation, sizeof(u32));
#ifdef CONFIG_QUOTA
sb->dq_op = &f2fs_quota_operations;
- if (f2fs_sb_has_quota_ino(sb))
- sb->s_qcop = &dquot_quotactl_sysfile_ops;
- else
- sb->s_qcop = &f2fs_quotactl_ops;
+ sb->s_qcop = &f2fs_quotactl_ops;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
- if (f2fs_sb_has_quota_ino(sbi->sb)) {
+ if (f2fs_sb_has_quota_ino(sbi)) {
for (i = 0; i < MAXQUOTAS; i++) {
if (f2fs_qf_ino(sbi->sb, i))
sbi->nquota_files++;
@@ -2768,7 +3179,9 @@ try_onemore:
/* init f2fs-specific super block info */
sbi->valid_super_block = valid_super_block;
mutex_init(&sbi->gc_mutex);
+ mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
+ mutex_init(&sbi->resize_mutex);
init_rwsem(&sbi->node_write);
init_rwsem(&sbi->node_change);
@@ -2784,12 +3197,14 @@ try_onemore:
int n = (i == META) ? 1: NR_TEMP_TYPE;
int j;
- sbi->write_io[i] = f2fs_kmalloc(sbi,
- n * sizeof(struct f2fs_bio_info),
- GFP_KERNEL);
+ sbi->write_io[i] =
+ f2fs_kmalloc(sbi,
+ array_size(n,
+ sizeof(struct f2fs_bio_info)),
+ GFP_KERNEL);
if (!sbi->write_io[i]) {
err = -ENOMEM;
- goto free_options;
+ goto free_bio_info;
}
for (j = HOT; j < n; j++) {
@@ -2802,6 +3217,7 @@ try_onemore:
}
init_rwsem(&sbi->cp_rwsem);
+ init_rwsem(&sbi->quota_sem);
init_waitqueue_head(&sbi->cp_wait);
init_sb_info(sbi);
@@ -2821,21 +3237,31 @@ try_onemore:
/* get an inode for meta space */
sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
if (IS_ERR(sbi->meta_inode)) {
- f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
+ f2fs_err(sbi, "Failed to read F2FS meta data inode");
err = PTR_ERR(sbi->meta_inode);
goto free_io_dummy;
}
- err = get_valid_checkpoint(sbi);
+ err = f2fs_get_valid_checkpoint(sbi);
if (err) {
- f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint");
+ f2fs_err(sbi, "Failed to get valid F2FS checkpoint");
goto free_meta_inode;
}
+ if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG))
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_DISABLED_QUICK_FLAG)) {
+ set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
+ sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_QUICK_INTERVAL;
+ }
+
+ if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_FSCK_FLAG))
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+
/* Initialize device list */
err = f2fs_scan_devices(sbi);
if (err) {
- f2fs_msg(sb, KERN_ERR, "Failed to find devices");
+ f2fs_err(sbi, "Failed to find devices");
goto free_devices;
}
@@ -2855,22 +3281,25 @@ try_onemore:
INIT_LIST_HEAD(&sbi->inode_list[i]);
spin_lock_init(&sbi->inode_lock[i]);
}
+ mutex_init(&sbi->flush_lock);
+
+ f2fs_init_extent_cache_info(sbi);
- init_extent_cache_info(sbi);
+ f2fs_init_ino_entry_info(sbi);
- init_ino_entry_info(sbi);
+ f2fs_init_fsync_node_info(sbi);
/* setup f2fs internal modules */
- err = build_segment_manager(sbi);
+ err = f2fs_build_segment_manager(sbi);
if (err) {
- f2fs_msg(sb, KERN_ERR,
- "Failed to initialize F2FS segment manager");
+ f2fs_err(sbi, "Failed to initialize F2FS segment manager (%d)",
+ err);
goto free_sm;
}
- err = build_node_manager(sbi);
+ err = f2fs_build_node_manager(sbi);
if (err) {
- f2fs_msg(sb, KERN_ERR,
- "Failed to initialize F2FS node manager");
+ f2fs_err(sbi, "Failed to initialize F2FS node manager (%d)",
+ err);
goto free_nm;
}
@@ -2885,28 +3314,29 @@ try_onemore:
sbi->kbytes_written =
le64_to_cpu(seg_i->journal->info.kbytes_written);
- build_gc_manager(sbi);
+ f2fs_build_gc_manager(sbi);
+
+ err = f2fs_build_stats(sbi);
+ if (err)
+ goto free_nm;
/* get an inode for node space */
sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
if (IS_ERR(sbi->node_inode)) {
- f2fs_msg(sb, KERN_ERR, "Failed to read node inode");
+ f2fs_err(sbi, "Failed to read node inode");
err = PTR_ERR(sbi->node_inode);
- goto free_nm;
+ goto free_stats;
}
- err = f2fs_build_stats(sbi);
- if (err)
- goto free_node_inode;
-
/* read root inode and dentry */
root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
if (IS_ERR(root)) {
- f2fs_msg(sb, KERN_ERR, "Failed to read root inode");
+ f2fs_err(sbi, "Failed to read root inode");
err = PTR_ERR(root);
- goto free_stats;
+ goto free_node_inode;
}
- if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
+ if (!S_ISDIR(root->i_mode) || !root->i_blocks ||
+ !root->i_size || !root->i_nlink) {
iput(root);
err = -EINVAL;
goto free_node_inode;
@@ -2915,7 +3345,7 @@ try_onemore:
sb->s_root = d_make_root(root); /* allocate root dentry */
if (!sb->s_root) {
err = -ENOMEM;
- goto free_root_inode;
+ goto free_node_inode;
}
err = f2fs_register_sysfs(sbi);
@@ -2923,152 +3353,168 @@ try_onemore:
goto free_root_inode;
#ifdef CONFIG_QUOTA
- /*
- * Turn on quotas which were not enabled for read-only mounts if
- * filesystem has quota feature, so that they are updated correctly.
- */
- if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) {
+ /* Enable quota usage during mount */
+ if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) {
err = f2fs_enable_quotas(sb);
- if (err) {
- f2fs_msg(sb, KERN_ERR,
- "Cannot turn on quotas: error %d", err);
- goto free_sysfs;
- }
+ if (err)
+ f2fs_err(sbi, "Cannot turn on quotas: error %d", err);
}
#endif
/* if there are nt orphan nodes free them */
- err = recover_orphan_inodes(sbi);
+ err = f2fs_recover_orphan_inodes(sbi);
if (err)
goto free_meta;
+ if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)))
+ goto reset_checkpoint;
+
/* recover fsynced data */
if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
/*
* mount should be failed, when device has readonly mode, and
* previous checkpoint was not done by clean system shutdown.
*/
- if (bdev_read_only(sb->s_bdev) &&
- !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
- err = -EROFS;
- goto free_meta;
+ if (f2fs_hw_is_readonly(sbi)) {
+ if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+ err = -EROFS;
+ f2fs_err(sbi, "Need to recover fsync data, but write access unavailable");
+ goto free_meta;
+ }
+ f2fs_info(sbi, "write access unavailable, skipping recovery");
+ goto reset_checkpoint;
}
if (need_fsck)
set_sbi_flag(sbi, SBI_NEED_FSCK);
- if (!retry)
- goto skip_recovery;
+ if (skip_recovery)
+ goto reset_checkpoint;
- err = recover_fsync_data(sbi, false);
+ err = f2fs_recover_fsync_data(sbi, false);
if (err < 0) {
+ if (err != -ENOMEM)
+ skip_recovery = true;
need_fsck = true;
- f2fs_msg(sb, KERN_ERR,
- "Cannot recover all fsync data errno=%d", err);
+ f2fs_err(sbi, "Cannot recover all fsync data errno=%d",
+ err);
goto free_meta;
}
} else {
- err = recover_fsync_data(sbi, true);
+ err = f2fs_recover_fsync_data(sbi, true);
if (!f2fs_readonly(sb) && err > 0) {
err = -EINVAL;
- f2fs_msg(sb, KERN_ERR,
- "Need to recover fsync data");
+ f2fs_err(sbi, "Need to recover fsync data");
goto free_meta;
}
}
-skip_recovery:
- /* recover_fsync_data() cleared this already */
+reset_checkpoint:
+ /* f2fs_recover_fsync_data() cleared this already */
clear_sbi_flag(sbi, SBI_POR_DOING);
+ if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+ err = f2fs_disable_checkpoint(sbi);
+ if (err)
+ goto sync_free_meta;
+ } else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) {
+ f2fs_enable_checkpoint(sbi);
+ }
+
/*
* If filesystem is not mounted as read-only then
* do start the gc_thread.
*/
if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) {
/* After POR, we can run background GC thread.*/
- err = start_gc_thread(sbi);
+ err = f2fs_start_gc_thread(sbi);
if (err)
- goto free_meta;
+ goto sync_free_meta;
}
- kfree(options);
+ kvfree(options);
/* recover broken superblock */
if (recovery) {
err = f2fs_commit_super(sbi, true);
- f2fs_msg(sb, KERN_INFO,
- "Try to recover %dth superblock, ret: %d",
- sbi->valid_super_block ? 1 : 2, err);
+ f2fs_info(sbi, "Try to recover %dth superblock, ret: %d",
+ sbi->valid_super_block ? 1 : 2, err);
}
f2fs_join_shrinker(sbi);
f2fs_tuning_parameters(sbi);
- f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx",
- cur_cp_version(F2FS_CKPT(sbi)));
+ f2fs_notice(sbi, "Mounted with checkpoint version = %llx",
+ cur_cp_version(F2FS_CKPT(sbi)));
f2fs_update_time(sbi, CP_TIME);
f2fs_update_time(sbi, REQ_TIME);
+ clear_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
return 0;
+sync_free_meta:
+ /* safe to flush all the data */
+ sync_filesystem(sbi->sb);
+ retry_cnt = 0;
+
free_meta:
#ifdef CONFIG_QUOTA
- if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb))
+ f2fs_truncate_quota_inode_pages(sb);
+ if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb))
f2fs_quota_off_umount(sbi->sb);
#endif
- f2fs_sync_inode_meta(sbi);
/*
- * Some dirty meta pages can be produced by recover_orphan_inodes()
+ * Some dirty meta pages can be produced by f2fs_recover_orphan_inodes()
* failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg()
- * followed by write_checkpoint() through f2fs_write_node_pages(), which
- * falls into an infinite loop in sync_meta_pages().
+ * followed by f2fs_write_checkpoint() through f2fs_write_node_pages(), which
+ * falls into an infinite loop in f2fs_sync_meta_pages().
*/
truncate_inode_pages_final(META_MAPPING(sbi));
-#ifdef CONFIG_QUOTA
-free_sysfs:
-#endif
+ /* evict some inodes being cached by GC */
+ evict_inodes(sb);
f2fs_unregister_sysfs(sbi);
free_root_inode:
dput(sb->s_root);
sb->s_root = NULL;
-free_stats:
- f2fs_destroy_stats(sbi);
free_node_inode:
- release_ino_entry(sbi, true);
+ f2fs_release_ino_entry(sbi, true);
truncate_inode_pages_final(NODE_MAPPING(sbi));
iput(sbi->node_inode);
+ sbi->node_inode = NULL;
+free_stats:
+ f2fs_destroy_stats(sbi);
free_nm:
- destroy_node_manager(sbi);
+ f2fs_destroy_node_manager(sbi);
free_sm:
- destroy_segment_manager(sbi);
+ f2fs_destroy_segment_manager(sbi);
free_devices:
destroy_device_list(sbi);
- kfree(sbi->ckpt);
+ kvfree(sbi->ckpt);
free_meta_inode:
make_bad_inode(sbi->meta_inode);
iput(sbi->meta_inode);
+ sbi->meta_inode = NULL;
free_io_dummy:
mempool_destroy(sbi->write_io_dummy);
free_percpu:
destroy_percpu_info(sbi);
free_bio_info:
for (i = 0; i < NR_PAGE_TYPE; i++)
- kfree(sbi->write_io[i]);
+ kvfree(sbi->write_io[i]);
free_options:
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
- kfree(F2FS_OPTION(sbi).s_qf_names[i]);
+ kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
#endif
- kfree(options);
+ kvfree(options);
free_sb_buf:
- kfree(raw_super);
+ kvfree(raw_super);
free_sbi:
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
- kfree(sbi);
+ kvfree(sbi);
/* give only one another chance */
- if (retry) {
- retry = false;
+ if (retry_cnt > 0 && skip_recovery) {
+ retry_cnt--;
shrink_dcache_sb(sb);
goto try_onemore;
}
@@ -3084,9 +3530,22 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
static void kill_f2fs_super(struct super_block *sb)
{
if (sb->s_root) {
- set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE);
- stop_gc_thread(F2FS_SB(sb));
- stop_discard_thread(F2FS_SB(sb));
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+
+ set_sbi_flag(sbi, SBI_IS_CLOSE);
+ f2fs_stop_gc_thread(sbi);
+ f2fs_stop_discard_thread(sbi);
+
+ if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
+ !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+ struct cp_control cpc = {
+ .reason = CP_UMOUNT,
+ };
+ f2fs_write_checkpoint(sbi, &cpc);
+ }
+
+ if (is_sbi_flag_set(sbi, SBI_IS_RECOVERED) && f2fs_readonly(sb))
+ sb->s_flags &= ~MS_RDONLY;
}
kill_block_super(sb);
}
@@ -3135,16 +3594,16 @@ static int __init init_f2fs_fs(void)
err = init_inodecache();
if (err)
goto fail;
- err = create_node_manager_caches();
+ err = f2fs_create_node_manager_caches();
if (err)
goto free_inodecache;
- err = create_segment_manager_caches();
+ err = f2fs_create_segment_manager_caches();
if (err)
goto free_node_manager_caches;
- err = create_checkpoint_caches();
+ err = f2fs_create_checkpoint_caches();
if (err)
goto free_segment_manager_caches;
- err = create_extent_cache();
+ err = f2fs_create_extent_cache();
if (err)
goto free_checkpoint_caches;
err = f2fs_init_sysfs();
@@ -3156,9 +3615,7 @@ static int __init init_f2fs_fs(void)
err = register_filesystem(&f2fs_fs_type);
if (err)
goto free_shrinker;
- err = f2fs_create_root_stats();
- if (err)
- goto free_filesystem;
+ f2fs_create_root_stats();
err = f2fs_init_post_read_processing();
if (err)
goto free_root_stats;
@@ -3166,20 +3623,19 @@ static int __init init_f2fs_fs(void)
free_root_stats:
f2fs_destroy_root_stats();
-free_filesystem:
unregister_filesystem(&f2fs_fs_type);
free_shrinker:
unregister_shrinker(&f2fs_shrinker_info);
free_sysfs:
f2fs_exit_sysfs();
free_extent_cache:
- destroy_extent_cache();
+ f2fs_destroy_extent_cache();
free_checkpoint_caches:
- destroy_checkpoint_caches();
+ f2fs_destroy_checkpoint_caches();
free_segment_manager_caches:
- destroy_segment_manager_caches();
+ f2fs_destroy_segment_manager_caches();
free_node_manager_caches:
- destroy_node_manager_caches();
+ f2fs_destroy_node_manager_caches();
free_inodecache:
destroy_inodecache();
fail:
@@ -3193,10 +3649,10 @@ static void __exit exit_f2fs_fs(void)
unregister_filesystem(&f2fs_fs_type);
unregister_shrinker(&f2fs_shrinker_info);
f2fs_exit_sysfs();
- destroy_extent_cache();
- destroy_checkpoint_caches();
- destroy_segment_manager_caches();
- destroy_node_manager_caches();
+ f2fs_destroy_extent_cache();
+ f2fs_destroy_checkpoint_caches();
+ f2fs_destroy_segment_manager_caches();
+ f2fs_destroy_node_manager_caches();
destroy_inodecache();
f2fs_destroy_trace_ios();
}
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 2c53de9251be..9e910b15bb4b 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -1,14 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* f2fs sysfs interface
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
* Copyright (c) 2017 Chao Yu <chao@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
+#include <linux/compiler.h>
#include <linux/proc_fs.h>
#include <linux/f2fs_fs.h>
#include <linux/seq_file.h>
@@ -70,6 +68,20 @@ static ssize_t dirty_segments_show(struct f2fs_attr *a,
(unsigned long long)(dirty_segments(sbi)));
}
+static ssize_t unusable_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ block_t unusable;
+
+ if (test_opt(sbi, DISABLE_CHECKPOINT))
+ unusable = sbi->unusable_block_count;
+ else
+ unusable = f2fs_get_unusable_blocks(sbi);
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)unusable);
+}
+
+
static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
@@ -92,33 +104,36 @@ static ssize_t features_show(struct f2fs_attr *a,
if (!sb->s_bdev->bd_part)
return snprintf(buf, PAGE_SIZE, "0\n");
- if (f2fs_sb_has_encrypt(sb))
+ if (f2fs_sb_has_encrypt(sbi))
len += snprintf(buf, PAGE_SIZE - len, "%s",
"encryption");
- if (f2fs_sb_has_blkzoned(sb))
+ if (f2fs_sb_has_blkzoned(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "blkzoned");
- if (f2fs_sb_has_extra_attr(sb))
+ if (f2fs_sb_has_extra_attr(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "extra_attr");
- if (f2fs_sb_has_project_quota(sb))
+ if (f2fs_sb_has_project_quota(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "projquota");
- if (f2fs_sb_has_inode_chksum(sb))
+ if (f2fs_sb_has_inode_chksum(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "inode_checksum");
- if (f2fs_sb_has_flexible_inline_xattr(sb))
+ if (f2fs_sb_has_flexible_inline_xattr(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "flexible_inline_xattr");
- if (f2fs_sb_has_quota_ino(sb))
+ if (f2fs_sb_has_quota_ino(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "quota_ino");
- if (f2fs_sb_has_inode_crtime(sb))
+ if (f2fs_sb_has_inode_crtime(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "inode_crtime");
- if (f2fs_sb_has_lost_found(sb))
+ if (f2fs_sb_has_lost_found(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "lost_found");
+ if (f2fs_sb_has_sb_chksum(sbi))
+ len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "sb_checksum");
len += snprintf(buf + len, PAGE_SIZE - len, "\n");
return len;
}
@@ -147,13 +162,13 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
int len = 0, i;
len += snprintf(buf + len, PAGE_SIZE - len,
- "cold file extenstion:\n");
+ "cold file extension:\n");
for (i = 0; i < cold_count; i++)
len += snprintf(buf + len, PAGE_SIZE - len, "%s\n",
extlist[i]);
len += snprintf(buf + len, PAGE_SIZE - len,
- "hot file extenstion:\n");
+ "hot file extension:\n");
for (i = cold_count; i < cold_count + hot_count; i++)
len += snprintf(buf + len, PAGE_SIZE - len, "%s\n",
extlist[i]);
@@ -165,7 +180,7 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
}
-static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
+static ssize_t __sbi_store(struct f2fs_attr *a,
struct f2fs_sb_info *sbi,
const char *buf, size_t count)
{
@@ -201,13 +216,13 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
down_write(&sbi->sb_lock);
- ret = update_extension_list(sbi, name, hot, set);
+ ret = f2fs_update_extension_list(sbi, name, hot, set);
if (ret)
goto out;
ret = f2fs_commit_super(sbi, false);
if (ret)
- update_extension_list(sbi, name, hot, !set);
+ f2fs_update_extension_list(sbi, name, hot, !set);
out:
up_write(&sbi->sb_lock);
return ret ? ret : count;
@@ -221,6 +236,8 @@ out:
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX))
return -EINVAL;
+ if (a->struct_type == FAULT_INFO_RATE && t >= UINT_MAX)
+ return -EINVAL;
#endif
if (a->struct_type == RESERVED_BLOCKS) {
spin_lock(&sbi->stat_lock);
@@ -245,22 +262,70 @@ out:
return count;
}
+ if (!strcmp(a->attr.name, "migration_granularity")) {
+ if (t == 0 || t > sbi->segs_per_sec)
+ return -EINVAL;
+ }
+
if (!strcmp(a->attr.name, "trim_sections"))
return -EINVAL;
- *ui = t;
+ if (!strcmp(a->attr.name, "gc_urgent")) {
+ if (t >= 1) {
+ sbi->gc_mode = GC_URGENT;
+ if (sbi->gc_thread) {
+ sbi->gc_thread->gc_wake = 1;
+ wake_up_interruptible_all(
+ &sbi->gc_thread->gc_wait_queue_head);
+ wake_up_discard_thread(sbi, true);
+ }
+ } else {
+ sbi->gc_mode = GC_NORMAL;
+ }
+ return count;
+ }
+ if (!strcmp(a->attr.name, "gc_idle")) {
+ if (t == GC_IDLE_CB)
+ sbi->gc_mode = GC_IDLE_CB;
+ else if (t == GC_IDLE_GREEDY)
+ sbi->gc_mode = GC_IDLE_GREEDY;
+ else
+ sbi->gc_mode = GC_NORMAL;
+ return count;
+ }
+
- if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
- f2fs_reset_iostat(sbi);
- if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) {
- sbi->gc_thread->gc_wake = 1;
- wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head);
- wake_up_discard_thread(sbi, true);
+ if (!strcmp(a->attr.name, "iostat_enable")) {
+ sbi->iostat_enable = !!t;
+ if (!sbi->iostat_enable)
+ f2fs_reset_iostat(sbi);
+ return count;
}
+ *ui = (unsigned int)t;
+
return count;
}
+static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi,
+ const char *buf, size_t count)
+{
+ ssize_t ret;
+ bool gc_entry = (!strcmp(a->attr.name, "gc_urgent") ||
+ a->struct_type == GC_THREAD);
+
+ if (gc_entry) {
+ if (!down_read_trylock(&sbi->sb->s_umount))
+ return -EAGAIN;
+ }
+ ret = __sbi_store(a, sbi, buf, count);
+ if (gc_entry)
+ up_read(&sbi->sb->s_umount);
+
+ return ret;
+}
+
static ssize_t f2fs_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
@@ -299,6 +364,7 @@ enum feat_id {
FEAT_QUOTA_INO,
FEAT_INODE_CRTIME,
FEAT_LOST_FOUND,
+ FEAT_SB_CHECKSUM,
};
static ssize_t f2fs_feature_show(struct f2fs_attr *a,
@@ -315,6 +381,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a,
case FEAT_QUOTA_INO:
case FEAT_INODE_CRTIME:
case FEAT_LOST_FOUND:
+ case FEAT_SB_CHECKSUM:
return snprintf(buf, PAGE_SIZE, "supported\n");
}
return 0;
@@ -349,8 +416,8 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent_sleep_time,
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent, gc_urgent);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity);
@@ -359,15 +426,22 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, migration_granularity, migration_granularity);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval,
+ interval_time[DISCARD_TIME]);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info,
+ umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
@@ -380,6 +454,7 @@ F2FS_GENERAL_RO_ATTR(dirty_segments);
F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
F2FS_GENERAL_RO_ATTR(features);
F2FS_GENERAL_RO_ATTR(current_reserved_blocks);
+F2FS_GENERAL_RO_ATTR(unusable);
#ifdef CONFIG_F2FS_FS_ENCRYPTION
F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO);
@@ -395,6 +470,7 @@ F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR);
F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO);
F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME);
F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND);
+F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
@@ -411,15 +487,20 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(ipu_policy),
ATTR_LIST(min_ipu_util),
ATTR_LIST(min_fsync_blocks),
+ ATTR_LIST(min_seq_blocks),
ATTR_LIST(min_hot_blocks),
ATTR_LIST(min_ssr_sections),
ATTR_LIST(max_victim_search),
+ ATTR_LIST(migration_granularity),
ATTR_LIST(dir_level),
ATTR_LIST(ram_thresh),
ATTR_LIST(ra_nid_pages),
ATTR_LIST(dirty_nats_ratio),
ATTR_LIST(cp_interval),
ATTR_LIST(idle_interval),
+ ATTR_LIST(discard_idle_interval),
+ ATTR_LIST(gc_idle_interval),
+ ATTR_LIST(umount_discard_timeout),
ATTR_LIST(iostat_enable),
ATTR_LIST(readdir_ra),
ATTR_LIST(gc_pin_file_thresh),
@@ -429,6 +510,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(inject_type),
#endif
ATTR_LIST(dirty_segments),
+ ATTR_LIST(unusable),
ATTR_LIST(lifetime_write_kbytes),
ATTR_LIST(features),
ATTR_LIST(reserved_blocks),
@@ -451,6 +533,7 @@ static struct attribute *f2fs_feat_attrs[] = {
ATTR_LIST(quota_ino),
ATTR_LIST(inode_crtime),
ATTR_LIST(lost_found),
+ ATTR_LIST(sb_checksum),
NULL,
};
@@ -482,7 +565,8 @@ static struct kobject f2fs_feat = {
.kset = &f2fs_kset,
};
-static int segment_info_seq_show(struct seq_file *seq, void *offset)
+static int __maybe_unused segment_info_seq_show(struct seq_file *seq,
+ void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -498,8 +582,7 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
if ((i % 10) == 0)
seq_printf(seq, "%-10d", i);
- seq_printf(seq, "%d|%-3u", se->type,
- get_valid_blocks(sbi, i, false));
+ seq_printf(seq, "%d|%-3u", se->type, se->valid_blocks);
if ((i % 10) == 9 || i == (total_segs - 1))
seq_putc(seq, '\n');
else
@@ -509,7 +592,8 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
return 0;
}
-static int segment_bits_seq_show(struct seq_file *seq, void *offset)
+static int __maybe_unused segment_bits_seq_show(struct seq_file *seq,
+ void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -524,8 +608,7 @@ static int segment_bits_seq_show(struct seq_file *seq, void *offset)
struct seg_entry *se = get_seg_entry(sbi, i);
seq_printf(seq, "%-10d", i);
- seq_printf(seq, "%d|%-3u|", se->type,
- get_valid_blocks(sbi, i, false));
+ seq_printf(seq, "%d|%-3u|", se->type, se->valid_blocks);
for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++)
seq_printf(seq, " %.2x", se->cur_valid_map[j]);
seq_putc(seq, '\n');
@@ -533,7 +616,8 @@ static int segment_bits_seq_show(struct seq_file *seq, void *offset)
return 0;
}
-static int iostat_info_seq_show(struct seq_file *seq, void *offset)
+static int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
+ void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -575,6 +659,28 @@ static int iostat_info_seq_show(struct seq_file *seq, void *offset)
return 0;
}
+static int __maybe_unused victim_bits_seq_show(struct seq_file *seq,
+ void *offset)
+{
+ struct super_block *sb = seq->private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ int i;
+
+ seq_puts(seq, "format: victim_secmap bitmaps\n");
+
+ for (i = 0; i < MAIN_SECS(sbi); i++) {
+ if ((i % 10) == 0)
+ seq_printf(seq, "%-10d", i);
+ seq_printf(seq, "%d", test_bit(i, dirty_i->victim_secmap) ? 1 : 0);
+ if ((i % 10) == 9 || i == (MAIN_SECS(sbi) - 1))
+ seq_putc(seq, '\n');
+ else
+ seq_putc(seq, ' ');
+ }
+ return 0;
+}
+
#define F2FS_PROC_FILE_DEF(_name) \
static int _name##_open_fs(struct inode *inode, struct file *file) \
{ \
@@ -591,6 +697,7 @@ static const struct file_operations f2fs_seq_##_name##_fops = { \
F2FS_PROC_FILE_DEF(segment_info);
F2FS_PROC_FILE_DEF(segment_bits);
F2FS_PROC_FILE_DEF(iostat_info);
+F2FS_PROC_FILE_DEF(victim_bits);
int __init f2fs_init_sysfs(void)
{
@@ -641,6 +748,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
&f2fs_seq_segment_bits_fops, sb);
proc_create_data("iostat_info", S_IRUGO, sbi->s_proc,
&f2fs_seq_iostat_info_fops, sb);
+ proc_create_data("victim_bits", S_IRUGO, sbi->s_proc,
+ &f2fs_seq_victim_bits_fops, sb);
}
return 0;
}
@@ -651,6 +760,7 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi)
remove_proc_entry("iostat_info", sbi->s_proc);
remove_proc_entry("segment_info", sbi->s_proc);
remove_proc_entry("segment_bits", sbi->s_proc);
+ remove_proc_entry("victim_bits", sbi->s_proc);
remove_proc_entry(sbi->sb->s_id, f2fs_proc_root);
}
kobject_del(&sbi->s_kobj);
diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c
index cb696791044b..d0ab533a9ce8 100644
--- a/fs/f2fs/trace.c
+++ b/fs/f2fs/trace.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* f2fs IO tracer
*
* Copyright (c) 2014 Motorola Mobility
* Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
@@ -17,7 +14,7 @@
#include "trace.h"
static RADIX_TREE(pids, GFP_ATOMIC);
-static struct mutex pids_lock;
+static spinlock_t pids_lock;
static struct last_io_info last_io;
static inline void __print_last_io(void)
@@ -65,7 +62,7 @@ retry:
if (radix_tree_preload(GFP_NOFS))
return;
- mutex_lock(&pids_lock);
+ spin_lock(&pids_lock);
p = radix_tree_lookup(&pids, pid);
if (p == current)
goto out;
@@ -83,7 +80,7 @@ retry:
MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
pid, current->comm);
out:
- mutex_unlock(&pids_lock);
+ spin_unlock(&pids_lock);
radix_tree_preload_end();
}
@@ -128,7 +125,7 @@ void f2fs_trace_ios(struct f2fs_io_info *fio, int flush)
void f2fs_build_trace_ios(void)
{
- mutex_init(&pids_lock);
+ spin_lock_init(&pids_lock);
}
#define PIDVEC_SIZE 128
@@ -156,7 +153,7 @@ void f2fs_destroy_trace_ios(void)
pid_t next_pid = 0;
unsigned int found;
- mutex_lock(&pids_lock);
+ spin_lock(&pids_lock);
while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) {
unsigned idx;
@@ -164,5 +161,5 @@ void f2fs_destroy_trace_ios(void)
for (idx = 0; idx < found; idx++)
radix_tree_delete(&pids, pid[idx]);
}
- mutex_unlock(&pids_lock);
+ spin_unlock(&pids_lock);
}
diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h
index 67db24ac1e85..e8075fc5b228 100644
--- a/fs/f2fs/trace.h
+++ b/fs/f2fs/trace.h
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* f2fs IO tracer
*
* Copyright (c) 2014 Motorola Mobility
* Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef __F2FS_TRACE_H__
#define __F2FS_TRACE_H__
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 116be979b897..d48af3b5857b 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/xattr.c
*
@@ -13,10 +14,6 @@
* suggestion of Luka Renko <luka.renko@hermes.si>.
* xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
* Red Hat Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/rwsem.h>
#include <linux/f2fs_fs.h>
@@ -38,9 +35,6 @@ static size_t f2fs_xattr_generic_list(const struct xattr_handler *handler,
return -EOPNOTSUPP;
break;
case F2FS_XATTR_INDEX_TRUSTED:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- break;
case F2FS_XATTR_INDEX_SECURITY:
break;
default:
@@ -69,9 +63,6 @@ static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
return -EOPNOTSUPP;
break;
case F2FS_XATTR_INDEX_TRUSTED:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- break;
case F2FS_XATTR_INDEX_SECURITY:
break;
default:
@@ -142,6 +133,8 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
size_t size, int flags)
{
struct inode *inode = d_inode(dentry);
+ unsigned char old_advise = F2FS_I(inode)->i_advise;
+ unsigned char new_advise;
if (strcmp(name, "") != 0)
return -EINVAL;
@@ -150,7 +143,14 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
if (value == NULL)
return -EINVAL;
- F2FS_I(inode)->i_advise |= *(char *)value;
+ new_advise = *(char *)value;
+ if (new_advise & ~FADVISE_MODIFIABLE_BITS)
+ return -EINVAL;
+
+ new_advise = new_advise & FADVISE_MODIFIABLE_BITS;
+ new_advise |= old_advise & ~FADVISE_MODIFIABLE_BITS;
+
+ F2FS_I(inode)->i_advise = new_advise;
f2fs_mark_inode_dirty_sync(inode, true);
return 0;
}
@@ -248,12 +248,17 @@ static inline const struct xattr_handler *f2fs_xattr_handler(int index)
return handler;
}
-static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
- size_t len, const char *name)
+static struct f2fs_xattr_entry *__find_xattr(void *base_addr,
+ void *last_base_addr, int index,
+ size_t len, const char *name)
{
struct f2fs_xattr_entry *entry;
list_for_each_xattr(entry, base_addr) {
+ if ((void *)(entry) + sizeof(__u32) > last_base_addr ||
+ (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr)
+ return NULL;
+
if (entry->e_name_index != index)
continue;
if (entry->e_name_len != len)
@@ -270,11 +275,11 @@ static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode,
{
struct f2fs_xattr_entry *entry;
unsigned int inline_size = inline_xattr_size(inode);
+ void *max_addr = base_addr + inline_size;
list_for_each_xattr(entry, base_addr) {
- if ((void *)entry + sizeof(__u32) > base_addr + inline_size ||
- (void *)XATTR_NEXT_ENTRY(entry) + sizeof(__u32) >
- base_addr + inline_size) {
+ if ((void *)entry + sizeof(__u32) > max_addr ||
+ (void *)XATTR_NEXT_ENTRY(entry) > max_addr) {
*last_addr = entry;
return NULL;
}
@@ -285,6 +290,13 @@ static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode,
if (!memcmp(entry->e_name, name, len))
break;
}
+
+ /* inline xattr header or entry across max inline xattr size */
+ if (IS_XATTR_LAST_ENTRY(entry) &&
+ (void *)entry + sizeof(__u32) > max_addr) {
+ *last_addr = entry;
+ return NULL;
+ }
return entry;
}
@@ -299,7 +311,7 @@ static int read_inline_xattr(struct inode *inode, struct page *ipage,
if (ipage) {
inline_addr = inline_xattr_addr(inode, ipage);
} else {
- page = get_node_page(sbi, inode->i_ino);
+ page = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(page))
return PTR_ERR(page);
@@ -320,7 +332,7 @@ static int read_xattr_block(struct inode *inode, void *txattr_addr)
void *xattr_addr;
/* The inode already has an extended attribute block. */
- xpage = get_node_page(sbi, xnid);
+ xpage = f2fs_get_node_page(sbi, xnid);
if (IS_ERR(xpage))
return PTR_ERR(xpage);
@@ -334,22 +346,24 @@ static int read_xattr_block(struct inode *inode, void *txattr_addr)
static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
unsigned int index, unsigned int len,
const char *name, struct f2fs_xattr_entry **xe,
- void **base_addr)
+ void **base_addr, int *base_size)
{
- void *cur_addr, *txattr_addr, *last_addr = NULL;
+ void *cur_addr, *txattr_addr, *last_txattr_addr;
+ void *last_addr = NULL;
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
- unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0;
unsigned int inline_size = inline_xattr_size(inode);
int err = 0;
- if (!size && !inline_size)
+ if (!xnid && !inline_size)
return -ENODATA;
- txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode),
- inline_size + size + XATTR_PADDING_SIZE, GFP_NOFS);
+ *base_size = XATTR_SIZE(xnid, inode) + XATTR_PADDING_SIZE;
+ txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), *base_size, GFP_NOFS);
if (!txattr_addr)
return -ENOMEM;
+ last_txattr_addr = (void *)txattr_addr + XATTR_SIZE(xnid, inode);
+
/* read from inline xattr */
if (inline_size) {
err = read_inline_xattr(inode, ipage, txattr_addr);
@@ -358,8 +372,10 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
*xe = __find_inline_xattr(inode, txattr_addr, &last_addr,
index, len, name);
- if (*xe)
+ if (*xe) {
+ *base_size = inline_size;
goto check;
+ }
}
/* read from xattr node block */
@@ -374,7 +390,14 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
else
cur_addr = txattr_addr;
- *xe = __find_xattr(cur_addr, index, len, name);
+ *xe = __find_xattr(cur_addr, last_txattr_addr, index, len, name);
+ if (!*xe) {
+ f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
+ inode->i_ino);
+ set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
+ err = -EFSCORRUPTED;
+ goto out;
+ }
check:
if (IS_XATTR_LAST_ENTRY(*xe)) {
err = -ENODATA;
@@ -384,7 +407,7 @@ check:
*base_addr = txattr_addr;
return 0;
out:
- kzfree(txattr_addr);
+ kvfree(txattr_addr);
return err;
}
@@ -427,7 +450,7 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage,
*base_addr = txattr_addr;
return 0;
fail:
- kzfree(txattr_addr);
+ kvfree(txattr_addr);
return err;
}
@@ -444,7 +467,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
int err = 0;
if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid)
- if (!alloc_nid(sbi, &new_nid))
+ if (!f2fs_alloc_nid(sbi, &new_nid))
return -ENOSPC;
/* write to inline xattr */
@@ -452,20 +475,20 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
if (ipage) {
inline_addr = inline_xattr_addr(inode, ipage);
} else {
- in_page = get_node_page(sbi, inode->i_ino);
+ in_page = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(in_page)) {
- alloc_nid_failed(sbi, new_nid);
+ f2fs_alloc_nid_failed(sbi, new_nid);
return PTR_ERR(in_page);
}
inline_addr = inline_xattr_addr(inode, in_page);
}
f2fs_wait_on_page_writeback(ipage ? ipage : in_page,
- NODE, true);
+ NODE, true, true);
/* no need to use xattr node block */
if (hsize <= inline_size) {
- err = truncate_xattr_node(inode);
- alloc_nid_failed(sbi, new_nid);
+ err = f2fs_truncate_xattr_node(inode);
+ f2fs_alloc_nid_failed(sbi, new_nid);
if (err) {
f2fs_put_page(in_page, 1);
return err;
@@ -478,24 +501,24 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
/* write to xattr node block */
if (F2FS_I(inode)->i_xattr_nid) {
- xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
+ xpage = f2fs_get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
if (IS_ERR(xpage)) {
err = PTR_ERR(xpage);
- alloc_nid_failed(sbi, new_nid);
+ f2fs_alloc_nid_failed(sbi, new_nid);
goto in_page_out;
}
f2fs_bug_on(sbi, new_nid);
- f2fs_wait_on_page_writeback(xpage, NODE, true);
+ f2fs_wait_on_page_writeback(xpage, NODE, true, true);
} else {
struct dnode_of_data dn;
set_new_dnode(&dn, inode, NULL, NULL, new_nid);
- xpage = new_node_page(&dn, XATTR_NODE_OFFSET);
+ xpage = f2fs_new_node_page(&dn, XATTR_NODE_OFFSET);
if (IS_ERR(xpage)) {
err = PTR_ERR(xpage);
- alloc_nid_failed(sbi, new_nid);
+ f2fs_alloc_nid_failed(sbi, new_nid);
goto in_page_out;
}
- alloc_nid_done(sbi, new_nid);
+ f2fs_alloc_nid_done(sbi, new_nid);
}
xattr_addr = page_address(xpage);
@@ -520,6 +543,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
int error = 0;
unsigned int size, len;
void *base_addr = NULL;
+ int base_size;
if (name == NULL)
return -EINVAL;
@@ -530,7 +554,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
down_read(&F2FS_I(inode)->i_xattr_sem);
error = lookup_all_xattrs(inode, ipage, index, len, name,
- &entry, &base_addr);
+ &entry, &base_addr, &base_size);
up_read(&F2FS_I(inode)->i_xattr_sem);
if (error)
return error;
@@ -544,11 +568,16 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
if (buffer) {
char *pval = entry->e_name + entry->e_name_len;
+
+ if (base_size - (pval - (char *)base_addr) < size) {
+ error = -ERANGE;
+ goto out;
+ }
memcpy(buffer, pval, size);
}
error = size;
out:
- kzfree(base_addr);
+ kvfree(base_addr);
return error;
}
@@ -587,7 +616,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
}
error = buffer_size - rest;
cleanup:
- kzfree(base_addr);
+ kvfree(base_addr);
return error;
}
@@ -605,7 +634,8 @@ static int __f2fs_setxattr(struct inode *inode, int index,
struct page *ipage, int flags)
{
struct f2fs_xattr_entry *here, *last;
- void *base_addr;
+ void *base_addr, *last_base_addr;
+ nid_t xnid = F2FS_I(inode)->i_xattr_nid;
int found, newsize;
size_t len;
__u32 new_hsize;
@@ -629,8 +659,17 @@ static int __f2fs_setxattr(struct inode *inode, int index,
if (error)
return error;
+ last_base_addr = (void *)base_addr + XATTR_SIZE(xnid, inode);
+
/* find entry with wanted name. */
- here = __find_xattr(base_addr, index, len, name);
+ here = __find_xattr(base_addr, last_base_addr, index, len, name);
+ if (!here) {
+ f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
+ inode->i_ino);
+ set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
+ error = -EFSCORRUPTED;
+ goto exit;
+ }
found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
@@ -718,7 +757,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
if (!error && S_ISDIR(inode->i_mode))
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP);
exit:
- kzfree(base_addr);
+ kvfree(base_addr);
return error;
}
@@ -733,7 +772,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
if (err)
return err;
- /* this case is only from init_inode_metadata */
+ /* this case is only from f2fs_init_inode_metadata */
if (ipage)
return __f2fs_setxattr(inode, index, name, value,
size, ipage, flags);
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index 08a4840d6d7d..2909c9241145 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/xattr.h
*
@@ -9,10 +10,6 @@
* On-disk format of extended attributes for the ext2 filesystem.
*
* (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef __F2FS_XATTR_H__
#define __F2FS_XATTR_H__
@@ -74,6 +71,8 @@ struct f2fs_xattr_entry {
entry = XATTR_NEXT_ENTRY(entry))
#define VALID_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer))
#define XATTR_PADDING_SIZE (sizeof(__u32))
+#define XATTR_SIZE(x,i) (((x) ? VALID_XATTR_BLOCK_SIZE : 0) + \
+ (inline_xattr_size(i)))
#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \
VALID_XATTR_BLOCK_SIZE)
@@ -81,6 +80,12 @@ struct f2fs_xattr_entry {
sizeof(struct f2fs_xattr_header) - \
sizeof(struct f2fs_xattr_entry))
+#define MAX_INLINE_XATTR_SIZE \
+ (DEF_ADDRS_PER_INODE - \
+ F2FS_TOTAL_EXTRA_ATTR_SIZE / sizeof(__le32) - \
+ DEF_INLINE_RESERVED_SIZE - \
+ MIN_INLINE_DENTRY_SIZE / sizeof(__le32))
+
/*
* On-disk structure of f2fs_xattr
* We use inline xattrs space + 1 block for xattr.
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index be8529739d23..94fdb8efd93e 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -92,7 +92,8 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent,
err_brelse:
brelse(bhs[0]);
err:
- fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr);
+ fat_msg_ratelimit(sb, KERN_ERR,
+ "FAT read failed (blocknr %llu)", (llu)blocknr);
return -EIO;
}
@@ -105,8 +106,8 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent,
fatent->fat_inode = MSDOS_SB(sb)->fat_inode;
fatent->bhs[0] = sb_bread(sb, blocknr);
if (!fatent->bhs[0]) {
- fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)",
- (llu)blocknr);
+ fat_msg_ratelimit(sb, KERN_ERR,
+ "FAT read failed (blocknr %llu)", (llu)blocknr);
return -EIO;
}
fatent->nr_bhs = 1;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 944fff1ef536..79375a009306 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -775,8 +775,9 @@ retry:
fat_get_blknr_offset(sbi, i_pos, &blocknr, &offset);
bh = sb_bread(sb, blocknr);
if (!bh) {
- fat_msg(sb, KERN_ERR, "unable to read inode block "
- "for updating (i_pos %lld)", i_pos);
+ fat_msg_ratelimit(sb, KERN_ERR,
+ "unable to read inode block for updating (i_pos %lld)",
+ i_pos);
return -EIO;
}
spin_lock(&sbi->inode_hash_lock);
diff --git a/fs/file_table.c b/fs/file_table.c
index ad17e05ebf95..b4baa0de4988 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -41,6 +41,141 @@ static struct kmem_cache *filp_cachep __read_mostly;
static struct percpu_counter nr_files __cacheline_aligned_in_smp;
+#ifdef CONFIG_FILE_TABLE_DEBUG
+#include <linux/hashtable.h>
+#include <mount.h>
+static DEFINE_MUTEX(global_files_lock);
+static DEFINE_HASHTABLE(global_files_hashtable, 10);
+
+struct global_filetable_lookup_key {
+ struct work_struct work;
+ uintptr_t value;
+};
+
+void global_filetable_print_warning_once(void)
+{
+ pr_err_once("\n**********************************************************\n");
+ pr_err_once("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
+ pr_err_once("** **\n");
+ pr_err_once("** VFS FILE TABLE DEBUG is enabled . **\n");
+ pr_err_once("** Allocating extra memory and slowing access to files **\n");
+ pr_err_once("** **\n");
+ pr_err_once("** This means that this is a DEBUG kernel and it is **\n");
+ pr_err_once("** unsafe for production use. **\n");
+ pr_err_once("** **\n");
+ pr_err_once("** If you see this message and you are not debugging **\n");
+ pr_err_once("** the kernel, report this immediately to your vendor! **\n");
+ pr_err_once("** **\n");
+ pr_err_once("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
+ pr_err_once("**********************************************************\n");
+}
+
+void global_filetable_add(struct file *filp)
+{
+ struct mount *mnt;
+
+ if (filp->f_path.dentry->d_iname == NULL ||
+ strlen(filp->f_path.dentry->d_iname) == 0)
+ return;
+
+ mnt = real_mount(filp->f_path.mnt);
+
+ mutex_lock(&global_files_lock);
+ hash_add(global_files_hashtable, &filp->f_hash, (uintptr_t)mnt);
+ mutex_unlock(&global_files_lock);
+}
+
+void global_filetable_del(struct file *filp)
+{
+ mutex_lock(&global_files_lock);
+ hash_del(&filp->f_hash);
+ mutex_unlock(&global_files_lock);
+}
+
+static void global_print_file(struct file *filp, char *path_buffer, int *count)
+{
+ char *pathname;
+
+ pathname = d_path(&filp->f_path, path_buffer, PAGE_SIZE);
+ if (IS_ERR(pathname))
+ pr_err("VFS: File %d Address : %pa partial filename: %s ref_count=%ld\n",
+ ++(*count), &filp, filp->f_path.dentry->d_iname,
+ atomic_long_read(&filp->f_count));
+ else
+ pr_err("VFS: File %d Address : %pa full filepath: %s ref_count=%ld\n",
+ ++(*count), &filp, pathname,
+ atomic_long_read(&filp->f_count));
+}
+
+static void global_filetable_print(uintptr_t lookup_mnt)
+{
+ struct hlist_node *tmp;
+ struct file *filp;
+ struct mount *mnt;
+ int index;
+ int count = 0;
+ char *path_buffer = (char *)__get_free_page(GFP_TEMPORARY);
+
+ mutex_lock(&global_files_lock);
+ pr_err("\n**********************************************************\n");
+ pr_err("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
+
+ pr_err("\n");
+ pr_err("VFS: The following files hold a reference to the mount\n");
+ pr_err("\n");
+ hash_for_each_possible_safe(global_files_hashtable, filp, tmp, f_hash,
+ lookup_mnt) {
+ mnt = real_mount(filp->f_path.mnt);
+ if ((uintptr_t)mnt == lookup_mnt)
+ global_print_file(filp, path_buffer, &count);
+ }
+ pr_err("\n");
+ pr_err("VFS: Found total of %d open files\n", count);
+ pr_err("\n");
+
+ count = 0;
+ pr_err("\n");
+ pr_err("VFS: The following files need to cleaned up\n");
+ pr_err("\n");
+ hash_for_each_safe(global_files_hashtable, index, tmp, filp, f_hash) {
+ if (atomic_long_read(&filp->f_count) == 0)
+ global_print_file(filp, path_buffer, &count);
+ }
+
+ pr_err("\n");
+ pr_err("VFS: Found total of %d files awaiting clean-up\n", count);
+ pr_err("\n");
+ pr_err("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
+ pr_err("\n**********************************************************\n");
+
+ mutex_unlock(&global_files_lock);
+ free_page((unsigned long)path_buffer);
+}
+
+static void global_filetable_print_work_fn(struct work_struct *work)
+{
+ struct global_filetable_lookup_key *key;
+ uintptr_t lookup_mnt;
+
+ key = container_of(work, struct global_filetable_lookup_key, work);
+ lookup_mnt = key->value;
+ kfree(key);
+ global_filetable_print(lookup_mnt);
+}
+
+void global_filetable_delayed_print(struct mount *mnt)
+{
+ struct global_filetable_lookup_key *key;
+
+ key = kzalloc(sizeof(*key), GFP_KERNEL);
+ if (key == NULL)
+ return;
+ key->value = (uintptr_t)mnt;
+ INIT_WORK(&key->work, global_filetable_print_work_fn);
+ schedule_work(&key->work);
+}
+#endif /* CONFIG_FILE_TABLE_DEBUG */
+
static void file_free_rcu(struct rcu_head *head)
{
struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
@@ -219,6 +354,7 @@ static void __fput(struct file *file)
put_write_access(inode);
__mnt_drop_write(mnt);
}
+ global_filetable_del(file);
file->f_path.dentry = NULL;
file->f_path.mnt = NULL;
file->f_inode = NULL;
@@ -314,6 +450,7 @@ void __init files_init(void)
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
percpu_counter_init(&nr_files, 0, GFP_KERNEL);
+ global_filetable_print_warning_once();
}
/*
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index e95eeb445e58..3805040bee46 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -5,4 +5,4 @@
obj-$(CONFIG_FUSE_FS) += fuse.o
obj-$(CONFIG_CUSE) += cuse.o
-fuse-objs := dev.o dir.o file.o inode.o control.o
+fuse-objs := dev.o dir.o file.o inode.o control.o passthrough.o
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 755a56f9fb58..7909b72b1f88 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -7,6 +7,7 @@
*/
#include "fuse_i.h"
+#include "fuse_passthrough.h"
#include <linux/init.h>
#include <linux/module.h>
@@ -596,9 +597,14 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
args->out.numargs * sizeof(struct fuse_arg));
fuse_request_send(fc, req);
ret = req->out.h.error;
- if (!ret && args->out.argvar) {
- BUG_ON(args->out.numargs != 1);
- ret = req->out.args[0].size;
+ if (!ret) {
+ if (args->out.argvar) {
+ BUG_ON(args->out.numargs != 1);
+ ret = req->out.args[0].size;
+ }
+
+ if (req->passthrough_filp != NULL)
+ args->out.passthrough_filp = req->passthrough_filp;
}
fuse_put_request(fc, req);
@@ -1986,6 +1992,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
}
fuse_copy_finish(cs);
+ fuse_setup_passthrough(fc, req);
spin_lock(&fpq->lock);
clear_bit(FR_LOCKED, &req->flags);
if (!fpq->connected)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 3bc6e1990e64..7a5ff8d5afbd 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -480,6 +480,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
args.out.args[0].value = &outentry;
args.out.args[1].size = sizeof(outopen);
args.out.args[1].value = &outopen;
+ args.out.passthrough_filp = NULL;
err = fuse_simple_request(fc, &args);
if (err)
goto out_free_ff;
@@ -492,6 +493,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
ff->fh = outopen.fh;
ff->nodeid = outentry.nodeid;
ff->open_flags = outopen.open_flags;
+ if (args.out.passthrough_filp != NULL)
+ ff->passthrough_filp = args.out.passthrough_filp;
inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
&outentry.attr, entry_attr_timeout(&outentry), 0);
if (!inode) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f7d025d1684c..9fc6c2597dcc 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -7,6 +7,7 @@
*/
#include "fuse_i.h"
+#include "fuse_passthrough.h"
#include <linux/pagemap.h>
#include <linux/slab.h>
@@ -22,8 +23,10 @@
static const struct file_operations fuse_direct_io_file_operations;
static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
- int opcode, struct fuse_open_out *outargp)
+ int opcode, struct fuse_open_out *outargp,
+ struct file **passthrough_filpp)
{
+ int ret_val;
struct fuse_open_in inarg;
FUSE_ARGS(args);
@@ -39,8 +42,14 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
args.out.numargs = 1;
args.out.args[0].size = sizeof(*outargp);
args.out.args[0].value = outargp;
+ args.out.passthrough_filp = NULL;
- return fuse_simple_request(fc, &args);
+ ret_val = fuse_simple_request(fc, &args);
+
+ if (args.out.passthrough_filp != NULL)
+ *passthrough_filpp = args.out.passthrough_filp;
+
+ return ret_val;
}
struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
@@ -51,6 +60,10 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
if (unlikely(!ff))
return NULL;
+ ff->passthrough_filp = NULL;
+ ff->passthrough_enabled = 0;
+ if (fc->passthrough)
+ ff->passthrough_enabled = 1;
ff->fc = fc;
ff->reserved_req = fuse_request_alloc(0);
if (unlikely(!ff->reserved_req)) {
@@ -119,6 +132,7 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
bool isdir)
{
struct fuse_file *ff;
+ struct file *passthrough_filp = NULL;
int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
ff = fuse_file_alloc(fc);
@@ -131,10 +145,12 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
struct fuse_open_out outarg;
int err;
- err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
+ err = fuse_send_open(fc, nodeid, file, opcode, &outarg,
+ &(passthrough_filp));
if (!err) {
ff->fh = outarg.fh;
ff->open_flags = outarg.open_flags;
+ ff->passthrough_filp = passthrough_filp;
} else if (err != -ENOSYS || isdir) {
fuse_file_free(ff);
@@ -260,6 +276,8 @@ void fuse_release_common(struct file *file, int opcode)
if (unlikely(!ff))
return;
+ fuse_passthrough_release(ff);
+
req = ff->reserved_req;
fuse_prepare_release(ff, file->f_flags, opcode);
@@ -891,6 +909,43 @@ static int fuse_readpages_fill(void *_data, struct page *page)
return -EIO;
}
+#ifdef CONFIG_CMA
+ if (is_cma_pageblock(page)) {
+ struct page *oldpage = page, *newpage;
+ int err;
+
+ /* make sure that old page is not free in-between the calls */
+ page_cache_get(oldpage);
+
+ newpage = alloc_page(GFP_HIGHUSER);
+ if (!newpage) {
+ page_cache_release(oldpage);
+ return -ENOMEM;
+ }
+
+ err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
+ if (err) {
+ __free_page(newpage);
+ page_cache_release(oldpage);
+ return err;
+ }
+
+ /*
+ * Decrement the count on new page to make page cache the only
+ * owner of it
+ */
+ lock_page(newpage);
+ put_page(newpage);
+
+ lru_cache_add_file(newpage);
+
+ /* finally release the old page and swap pointers */
+ unlock_page(oldpage);
+ page_cache_release(oldpage);
+ page = newpage;
+ }
+#endif
+
page_cache_get(page);
req->pages[req->num_pages] = page;
req->page_descs[req->num_pages].length = PAGE_SIZE;
@@ -936,8 +991,10 @@ out:
static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
+ ssize_t ret_val;
struct inode *inode = iocb->ki_filp->f_mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = iocb->ki_filp->private_data;
/*
* In auto invalidate mode, always update attributes on read.
@@ -952,7 +1009,12 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return err;
}
- return generic_file_read_iter(iocb, to);
+ if (ff && ff->passthrough_enabled && ff->passthrough_filp)
+ ret_val = fuse_passthrough_read_iter(iocb, to);
+ else
+ ret_val = generic_file_read_iter(iocb, to);
+
+ return ret_val;
}
static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
@@ -1184,6 +1246,7 @@ static ssize_t fuse_perform_write(struct file *file,
static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
+ struct fuse_file *ff = file->private_data;
struct address_space *mapping = file->f_mapping;
ssize_t written = 0;
ssize_t written_buffered = 0;
@@ -1217,8 +1280,14 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (err)
goto out;
+ if (ff && ff->passthrough_enabled && ff->passthrough_filp) {
+ written = fuse_passthrough_write_iter(iocb, from);
+ goto out;
+ }
+
if (iocb->ki_flags & IOCB_DIRECT) {
loff_t pos = iocb->ki_pos;
+
written = generic_file_direct_write(iocb, from, pos);
if (written < 0 || !iov_iter_count(from))
goto out;
@@ -2090,6 +2159,9 @@ static const struct vm_operations_struct fuse_file_vm_ops = {
static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
{
+ struct fuse_file *ff = file->private_data;
+
+ ff->passthrough_enabled = 0;
if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
fuse_link_write_file(file);
@@ -2100,6 +2172,9 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma)
{
+ struct fuse_file *ff = file->private_data;
+
+ ff->passthrough_enabled = 0;
/* Can't provide the coherency needed for MAP_SHARED */
if (vma->vm_flags & VM_MAYSHARE)
return -ENODEV;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index cc5b053c4c04..411dcdec3db9 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -158,6 +158,10 @@ struct fuse_file {
/** Has flock been performed on this file? */
bool flock:1;
+
+ /* the read write file */
+ struct file *passthrough_filp;
+ bool passthrough_enabled;
};
/** One input argument of a request */
@@ -237,6 +241,7 @@ struct fuse_args {
unsigned argvar:1;
unsigned numargs;
struct fuse_arg args[2];
+ struct file *passthrough_filp;
} out;
};
@@ -387,6 +392,9 @@ struct fuse_req {
/** Request is stolen from fuse_file->reserved_req */
struct file *stolen_file;
+
+ /** fuse passthrough file */
+ struct file *passthrough_filp;
};
struct fuse_iqueue {
@@ -544,6 +552,9 @@ struct fuse_conn {
/** write-back cache policy (default is write-through) */
unsigned writeback_cache:1;
+ /** passthrough IO. */
+ unsigned passthrough:1;
+
/*
* The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction
diff --git a/fs/fuse/fuse_passthrough.h b/fs/fuse/fuse_passthrough.h
new file mode 100644
index 000000000000..62f12c12ffec
--- /dev/null
+++ b/fs/fuse/fuse_passthrough.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _FS_FUSE_PASSTHROUGH_H
+#define _FS_FUSE_PASSTHROUGH_H
+
+#include "fuse_i.h"
+
+#include <linux/fuse.h>
+#include <linux/file.h>
+
+void fuse_setup_passthrough(struct fuse_conn *fc, struct fuse_req *req);
+
+ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *to);
+
+ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *from);
+
+void fuse_passthrough_release(struct fuse_file *ff);
+
+#endif /* _FS_FUSE_PASSTHROUGH_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 4b2eb65be0d4..ca9c492a1885 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -857,6 +857,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->conn_error = 1;
else {
unsigned long ra_pages;
+ struct super_block *sb = fc->sb;
process_init_limits(fc, arg);
@@ -895,6 +896,13 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->async_dio = 1;
if (arg->flags & FUSE_WRITEBACK_CACHE)
fc->writeback_cache = 1;
+ if (arg->flags & FUSE_PASSTHROUGH) {
+ fc->passthrough = 1;
+ /* Prevent further stacking */
+ sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
+ pr_info("FUSE: Pass through is enabled [%s : %d]!\n",
+ current->comm, current->pid);
+ }
if (arg->time_gran && arg->time_gran <= 1000000000)
fc->sb->s_time_gran = arg->time_gran;
} else {
diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
new file mode 100644
index 000000000000..785af63acabd
--- /dev/null
+++ b/fs/fuse/passthrough.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "fuse_passthrough.h"
+
+#include <linux/aio.h>
+#include <linux/fs_stack.h>
+
+void fuse_setup_passthrough(struct fuse_conn *fc, struct fuse_req *req)
+{
+ int daemon_fd, fs_stack_depth;
+ unsigned open_out_index;
+ struct file *passthrough_filp;
+ struct inode *passthrough_inode;
+ struct super_block *passthrough_sb;
+ struct fuse_open_out *open_out;
+
+ req->passthrough_filp = NULL;
+
+ if (!(fc->passthrough))
+ return;
+
+ if ((req->in.h.opcode != FUSE_OPEN) &&
+ (req->in.h.opcode != FUSE_CREATE))
+ return;
+
+ open_out_index = req->in.numargs - 1;
+
+ BUG_ON(open_out_index != 0 && open_out_index != 1);
+ BUG_ON(req->out.args[open_out_index].size != sizeof(*open_out));
+
+ open_out = req->out.args[open_out_index].value;
+
+ daemon_fd = (int)open_out->passthrough_fd;
+ if (daemon_fd < 0)
+ return;
+
+ passthrough_filp = fget_raw(daemon_fd);
+ if (!passthrough_filp)
+ return;
+
+ passthrough_inode = file_inode(passthrough_filp);
+ passthrough_sb = passthrough_inode->i_sb;
+ fs_stack_depth = passthrough_sb->s_stack_depth + 1;
+
+ /* If we reached the stacking limit go through regular io */
+ if (fs_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
+ /* Release the passthrough file. */
+ fput(passthrough_filp);
+ pr_err("FUSE: maximum fs stacking depth exceeded, cannot use passthrough for this file\n");
+ return;
+ }
+ req->passthrough_filp = passthrough_filp;
+}
+
+static ssize_t fuse_passthrough_read_write_iter(struct kiocb *iocb,
+ struct iov_iter *iter, int do_write)
+{
+ ssize_t ret_val;
+ struct fuse_file *ff;
+ struct file *fuse_file, *passthrough_filp;
+ struct inode *fuse_inode, *passthrough_inode;
+ struct fuse_conn *fc;
+
+ ff = iocb->ki_filp->private_data;
+ fuse_file = iocb->ki_filp;
+ passthrough_filp = ff->passthrough_filp;
+ fc = ff->fc;
+
+ /* lock passthrough file to prevent it from being released */
+ get_file(passthrough_filp);
+ iocb->ki_filp = passthrough_filp;
+ fuse_inode = fuse_file->f_path.dentry->d_inode;
+ passthrough_inode = file_inode(passthrough_filp);
+
+ if (do_write) {
+ if (!passthrough_filp->f_op->write_iter)
+ return -EIO;
+ ret_val = passthrough_filp->f_op->write_iter(iocb, iter);
+
+ if (ret_val >= 0 || ret_val == -EIOCBQUEUED) {
+ spin_lock(&fc->lock);
+ fsstack_copy_inode_size(fuse_inode, passthrough_inode);
+ spin_unlock(&fc->lock);
+ fsstack_copy_attr_times(fuse_inode, passthrough_inode);
+ }
+ } else {
+ if (!passthrough_filp->f_op->read_iter)
+ return -EIO;
+ ret_val = passthrough_filp->f_op->read_iter(iocb, iter);
+ if (ret_val >= 0 || ret_val == -EIOCBQUEUED)
+ fsstack_copy_attr_atime(fuse_inode, passthrough_inode);
+ }
+
+ iocb->ki_filp = fuse_file;
+
+ /* unlock passthrough file */
+ fput(passthrough_filp);
+
+ return ret_val;
+}
+
+ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ return fuse_passthrough_read_write_iter(iocb, to, 0);
+}
+
+ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ return fuse_passthrough_read_write_iter(iocb, from, 1);
+}
+
+void fuse_passthrough_release(struct fuse_file *ff)
+{
+ if (!(ff->passthrough_filp))
+ return;
+
+ /* Release the passthrough file. */
+ fput(ff->passthrough_filp);
+ ff->passthrough_filp = NULL;
+}
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index ff0ac96a8e7b..db4c867369b5 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -78,8 +78,11 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (type == ACL_TYPE_ACCESS) {
umode_t mode = inode->i_mode;
-
+ struct posix_acl *old_acl = acl;
error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+
+ if (!acl)
+ posix_acl_release(old_acl);
if (error)
return error;
if (mode != inode->i_mode)
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 1caee0534587..582ef53f2104 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -249,22 +249,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping,
for(i = 0; i < nr_pages; i++) {
struct page *page = pvec->pages[i];
- /*
- * At this point, the page may be truncated or
- * invalidated (changing page->mapping to NULL), or
- * even swizzled back from swapper_space to tmpfs file
- * mapping. However, page->index will not change
- * because we have a reference on the page.
- */
- if (page->index > end) {
- /*
- * can't be range_cyclic (1st pass) because
- * end == -1 in that case.
- */
- ret = 1;
- break;
- }
-
*done_index = page->index;
lock_page(page);
@@ -382,8 +366,8 @@ retry:
tag_pages_for_writeback(mapping, index, end);
done_index = index;
while (!done && (index <= end)) {
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+ nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
+ tag);
if (nr_pages == 0)
break;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 8ed2b1a71637..d66790fd702f 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1226,7 +1226,7 @@ static int set_gfs2_super(struct super_block *s, void *data)
* We set the bdi here to the queue backing, file systems can
* overwrite this in ->fill_super()
*/
- s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info;
+ s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info;
return 0;
}
diff --git a/fs/inode.c b/fs/inode.c
index 3da76119d937..28c8c265f2f6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -633,6 +633,7 @@ again:
dispose_list(&dispose);
}
+EXPORT_SYMBOL_GPL(evict_inodes);
/**
* invalidate_inodes - attempt to free all inodes on a superblock
@@ -2052,3 +2053,27 @@ void inode_nohighmem(struct inode *inode)
mapping_set_gfp_mask(inode->i_mapping, GFP_USER);
}
EXPORT_SYMBOL(inode_nohighmem);
+
+/*
+ * Generic function to check FS_IOC_SETFLAGS values and reject any invalid
+ * configurations.
+ *
+ * Note: the caller should be holding i_mutex, or else be sure that they have
+ * exclusive access to the inode structure.
+ */
+int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags,
+ unsigned int flags)
+{
+ /*
+ * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+ * the relevant capability.
+ *
+ * This test looks nicer. Thanks to Pauline Middelink
+ */
+ if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL) &&
+ !capable(CAP_LINUX_IMMUTABLE))
+ return -EPERM;
+
+ return 0;
+}
+EXPORT_SYMBOL(vfs_ioc_setflags_prepare);
diff --git a/fs/internal.h b/fs/internal.h
index 6387b35a1c0d..0ec681176ae9 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -123,7 +123,6 @@ extern void inode_add_lru(struct inode *inode);
extern void inode_io_list_del(struct inode *inode);
extern long get_nr_dirty_inodes(void);
-extern void evict_inodes(struct super_block *);
extern int invalidate_inodes(struct super_block *, bool);
/*
@@ -153,3 +152,29 @@ extern void mnt_pin_kill(struct mount *m);
* fs/nsfs.c
*/
extern struct dentry_operations ns_dentry_operations;
+
+#ifdef CONFIG_FILE_TABLE_DEBUG
+void global_filetable_print_warning_once(void);
+void global_filetable_add(struct file *filp);
+void global_filetable_del(struct file *filp);
+void global_filetable_delayed_print(struct mount *mnt);
+
+#else /* i.e NOT CONFIG_FILE_TABLE_DEBUG */
+
+static inline void global_filetable_print_warning_once(void)
+{
+}
+
+static inline void global_filetable_add(struct file *filp)
+{
+}
+
+static inline void global_filetable_del(struct file *filp)
+{
+}
+
+static inline void global_filetable_delayed_print(struct mount *mnt)
+{
+}
+
+#endif /* CONFIG_FILE_TABLE_DEBUG */
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 7a3368929245..6f467642d530 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -207,7 +207,8 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
__func__, inode->i_ino, inode->i_mode, inode->i_nlink,
f->inocache->pino_nlink, inode->i_mapping->nrpages);
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
return 0;
fail:
@@ -427,7 +428,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
mutex_unlock(&dir_f->sem);
jffs2_complete_reservation(c);
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
return 0;
fail:
@@ -571,7 +573,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode
mutex_unlock(&dir_f->sem);
jffs2_complete_reservation(c);
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
return 0;
fail:
@@ -746,7 +749,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode
mutex_unlock(&dir_f->sem);
jffs2_complete_reservation(c);
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
return 0;
fail:
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index f217ae750adb..9d7551f5c32a 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -178,7 +178,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
unlock_new_inode(ip);
iput(ip);
} else {
- d_instantiate_new(dentry, ip);
+ unlock_new_inode(ip);
+ d_instantiate(dentry, ip);
}
out2:
@@ -312,7 +313,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
unlock_new_inode(ip);
iput(ip);
} else {
- d_instantiate_new(dentry, ip);
+ unlock_new_inode(ip);
+ d_instantiate(dentry, ip);
}
out2:
@@ -1056,7 +1058,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
unlock_new_inode(ip);
iput(ip);
} else {
- d_instantiate_new(dentry, ip);
+ unlock_new_inode(ip);
+ d_instantiate(dentry, ip);
}
out2:
@@ -1440,7 +1443,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
unlock_new_inode(ip);
iput(ip);
} else {
- d_instantiate_new(dentry, ip);
+ unlock_new_inode(ip);
+ d_instantiate(dentry, ip);
}
out1:
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 187477ded6b3..de509271d031 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -222,8 +222,19 @@ __mb_cache_entry_release(struct mb_cache_entry *ce)
* then reacquire the lock in the proper order.
*/
spin_lock(&mb_cache_spinlock);
- if (list_empty(&ce->e_lru_list))
- list_add_tail(&ce->e_lru_list, &mb_cache_lru_list);
+ /*
+ * Evaluate the conditions under global lock mb_cache_spinlock,
+ * to check if mb_cache_entry_get() is running now
+ * and has already deleted the entry from mb_cache_lru_list
+ * and incremented ce->e_refcnt to prevent further additions
+ * to mb_cache_lru_list.
+ */
+ if (!(ce->e_used || ce->e_queued ||
+ atomic_read(&ce->e_refcnt))) {
+ if (list_empty(&ce->e_lru_list))
+ list_add_tail(&ce->e_lru_list,
+ &mb_cache_lru_list);
+ }
spin_unlock(&mb_cache_spinlock);
}
__spin_unlock_mb_cache_entry(ce);
@@ -262,7 +273,6 @@ mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
list_del_init(&ce->e_lru_list);
if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))
continue;
- spin_unlock(&mb_cache_spinlock);
/* Prevent any find or get operation on the entry */
hlist_bl_lock(ce->e_block_hash_p);
hlist_bl_lock(ce->e_index_hash_p);
@@ -271,10 +281,10 @@ mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
!list_empty(&ce->e_lru_list)) {
hlist_bl_unlock(ce->e_index_hash_p);
hlist_bl_unlock(ce->e_block_hash_p);
- spin_lock(&mb_cache_spinlock);
continue;
}
__mb_cache_entry_unhash_unlock(ce);
+ spin_unlock(&mb_cache_spinlock);
list_add_tail(&ce->e_lru_list, &free_list);
spin_lock(&mb_cache_spinlock);
}
@@ -516,7 +526,6 @@ mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
if (ce->e_used || ce->e_queued ||
atomic_read(&ce->e_refcnt))
continue;
- spin_unlock(&mb_cache_spinlock);
/*
* Prevent any find or get operation on the
* entry.
@@ -530,13 +539,13 @@ mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
hlist_bl_unlock(ce->e_index_hash_p);
hlist_bl_unlock(ce->e_block_hash_p);
l = &mb_cache_lru_list;
- spin_lock(&mb_cache_spinlock);
continue;
}
mb_assert(list_empty(&ce->e_lru_list));
mb_assert(!(ce->e_used || ce->e_queued ||
atomic_read(&ce->e_refcnt)));
__mb_cache_entry_unhash_unlock(ce);
+ spin_unlock(&mb_cache_spinlock);
goto found;
}
}
@@ -670,6 +679,7 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
cache->c_bucket_bits);
block_hash_p = &cache->c_block_hash[bucket];
/* First serialize access to the block corresponding hash chain. */
+ spin_lock(&mb_cache_spinlock);
hlist_bl_lock(block_hash_p);
hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) {
mb_assert(ce->e_block_hash_p == block_hash_p);
@@ -678,9 +688,11 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
* Prevent a free from removing the entry.
*/
atomic_inc(&ce->e_refcnt);
+ if (!list_empty(&ce->e_lru_list))
+ list_del_init(&ce->e_lru_list);
hlist_bl_unlock(block_hash_p);
+ spin_unlock(&mb_cache_spinlock);
__spin_lock_mb_cache_entry(ce);
- atomic_dec(&ce->e_refcnt);
if (ce->e_used > 0) {
DEFINE_WAIT(wait);
while (ce->e_used > 0) {
@@ -695,13 +707,9 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
finish_wait(&mb_cache_queue, &wait);
}
ce->e_used += 1 + MB_CACHE_WRITER;
+ atomic_dec(&ce->e_refcnt);
__spin_unlock_mb_cache_entry(ce);
- if (!list_empty(&ce->e_lru_list)) {
- spin_lock(&mb_cache_spinlock);
- list_del_init(&ce->e_lru_list);
- spin_unlock(&mb_cache_spinlock);
- }
if (!__mb_cache_entry_is_block_hashed(ce)) {
__mb_cache_entry_release(ce);
return NULL;
@@ -710,6 +718,7 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
}
}
hlist_bl_unlock(block_hash_p);
+ spin_unlock(&mb_cache_spinlock);
return NULL;
}
diff --git a/fs/namei.c b/fs/namei.c
index 3a8f1327149f..fe1612ac009d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -40,6 +40,9 @@
#include "internal.h"
#include "mount.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/namei.h>
+
/* [Feb-1997 T. Schoebel-Theuer]
* Fundamental changes in the pathname lookup mechanisms (namei)
* were necessary because of omirr. The reason is that omirr needs
@@ -784,6 +787,81 @@ static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
return dentry->d_op->d_revalidate(dentry, flags);
}
+#define INIT_PATH_SIZE 64
+
+static void success_walk_trace(struct nameidata *nd)
+{
+ struct path *pt = &nd->path;
+ struct inode *i = nd->inode;
+ char buf[INIT_PATH_SIZE], *try_buf;
+ int cur_path_size;
+ char *p;
+
+ /* When eBPF/ tracepoint is disabled, keep overhead low. */
+ if (!trace_inodepath_enabled())
+ return;
+
+ /* First try stack allocated buffer. */
+ try_buf = buf;
+ cur_path_size = INIT_PATH_SIZE;
+
+ while (cur_path_size <= PATH_MAX) {
+ /* Free previous heap allocation if we are now trying
+ * a second or later heap allocation.
+ */
+ if (try_buf != buf)
+ kfree(try_buf);
+
+ /* All but the first alloc are on the heap. */
+ if (cur_path_size != INIT_PATH_SIZE) {
+ try_buf = kmalloc(cur_path_size, GFP_KERNEL);
+ if (!try_buf) {
+ try_buf = buf;
+ sprintf(try_buf, "error:buf_alloc_failed");
+ break;
+ }
+ }
+
+ p = d_path(pt, try_buf, cur_path_size);
+
+ if (!IS_ERR(p)) {
+ char *end = mangle_path(try_buf, p, "\n");
+
+ if (end) {
+ try_buf[end - try_buf] = 0;
+ break;
+ } else {
+ /* On mangle errors, double path size
+ * till PATH_MAX.
+ */
+ cur_path_size = cur_path_size << 1;
+ continue;
+ }
+ }
+
+ if (PTR_ERR(p) == -ENAMETOOLONG) {
+ /* If d_path complains that name is too long,
+ * then double path size till PATH_MAX.
+ */
+ cur_path_size = cur_path_size << 1;
+ continue;
+ }
+
+ sprintf(try_buf, "error:d_path_failed_%lu",
+ -1 * PTR_ERR(p));
+ break;
+ }
+
+ if (cur_path_size > PATH_MAX)
+ sprintf(try_buf, "error:d_path_name_too_long");
+
+ trace_inodepath(i, try_buf);
+
+ if (try_buf != buf)
+ kfree(try_buf);
+ return;
+}
+
/**
* complete_walk - successful completion of path walk
* @nd: pointer nameidata
@@ -806,15 +884,21 @@ static int complete_walk(struct nameidata *nd)
return -ECHILD;
}
- if (likely(!(nd->flags & LOOKUP_JUMPED)))
+ if (likely(!(nd->flags & LOOKUP_JUMPED))) {
+ success_walk_trace(nd);
return 0;
+ }
- if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE)))
+ if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE))) {
+ success_walk_trace(nd);
return 0;
+ }
status = dentry->d_op->d_weak_revalidate(dentry, nd->flags);
- if (status > 0)
+ if (status > 0) {
+ success_walk_trace(nd);
return 0;
+ }
if (!status)
status = -ESTALE;
@@ -2734,8 +2818,14 @@ int vfs_create2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry,
if (error)
return error;
error = dir->i_op->create(dir, dentry, mode, want_excl);
+ if (error)
+ return error;
+ error = security_inode_post_create(dir, dentry, mode);
+ if (error)
+ return error;
if (!error)
fsnotify_create(dir, dentry);
+
return error;
}
EXPORT_SYMBOL(vfs_create2);
@@ -3428,6 +3518,8 @@ out2:
error = -ESTALE;
}
file = ERR_PTR(error);
+ } else {
+ global_filetable_add(file);
}
return file;
}
@@ -3595,8 +3687,16 @@ int vfs_mknod2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, u
return error;
error = dir->i_op->mknod(dir, dentry, mode, dev);
+ if (error)
+ return error;
+
+ error = security_inode_post_create(dir, dentry, mode);
+ if (error)
+ return error;
+
if (!error)
fsnotify_create(dir, dentry);
+
return error;
}
EXPORT_SYMBOL(vfs_mknod2);
diff --git a/fs/namespace.c b/fs/namespace.c
index e7f6b482bf7e..58c6f27b141d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1625,6 +1625,8 @@ static int do_umount(struct mount *mnt, int flags)
out:
unlock_mount_hash();
namespace_unlock();
+ if (retval == -EBUSY)
+ global_filetable_delayed_print(mnt);
return retval;
}
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 3a3821b00486..9deca59be7e5 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -2147,8 +2147,8 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
pagevec_init(&pvec, 0);
- while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
- PAGEVEC_SIZE)) {
+ while (pagevec_lookup_tag(&pvec, btcache, &index,
+ PAGECACHE_TAG_DIRTY)) {
for (i = 0; i < pagevec_count(&pvec); i++) {
bh = head = page_buffers(pvec.pages[i]);
do {
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index cd7f5b0abe84..c9a1a491aa91 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -50,7 +50,8 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode)
{
int err = nilfs_add_link(dentry, inode);
if (!err) {
- d_instantiate_new(dentry, inode);
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
return 0;
}
inode_dec_link_count(inode);
@@ -245,7 +246,8 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out_fail;
nilfs_mark_inode_dirty(inode);
- d_instantiate_new(dentry, inode);
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
out:
if (!err)
err = nilfs_transaction_commit(dir->i_sb);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 45d650addd56..447999563737 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -262,8 +262,7 @@ int nilfs_copy_dirty_pages(struct address_space *dmap,
pagevec_init(&pvec, 0);
repeat:
- if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY,
- PAGEVEC_SIZE))
+ if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY))
return 0;
for (i = 0; i < pagevec_count(&pvec); i++) {
@@ -382,8 +381,8 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent)
pagevec_init(&pvec, 0);
- while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
- PAGEVEC_SIZE)) {
+ while (pagevec_lookup_tag(&pvec, mapping, &index,
+ PAGECACHE_TAG_DIRTY)) {
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index d58c0c62b2ae..b174508d8766 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -705,18 +705,14 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
pagevec_init(&pvec, 0);
repeat:
if (unlikely(index > last) ||
- !pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
- min_t(pgoff_t, last - index,
- PAGEVEC_SIZE - 1) + 1))
+ !pagevec_lookup_range_tag(&pvec, mapping, &index, last,
+ PAGECACHE_TAG_DIRTY))
return ndirties;
for (i = 0; i < pagevec_count(&pvec); i++) {
struct buffer_head *bh, *head;
struct page *page = pvec.pages[i];
- if (unlikely(page->index > last))
- break;
-
lock_page(page);
if (!page_has_buffers(page))
create_empty_buffers(page, i_blocksize(inode), 0);
@@ -753,8 +749,8 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
pagevec_init(&pvec, 0);
- while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
- PAGEVEC_SIZE)) {
+ while (pagevec_lookup_tag(&pvec, mapping, &index,
+ PAGECACHE_TAG_DIRTY)) {
for (i = 0; i < pagevec_count(&pvec); i++) {
bh = head = page_buffers(pvec.pages[i]);
do {
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 354013ea22ec..67c6c650b21e 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1079,7 +1079,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_time_gran = 1;
sb->s_max_links = NILFS_LINK_MAX;
- sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info;
+ sb->s_bdi = bdev_get_queue(sb->s_bdev)->backing_dev_info;
err = load_nilfs(nilfs, sb);
if (err)
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 3972ac87a8cb..3189a32d8fa3 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -357,7 +357,7 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
}
out_unlock:
unlock_rename(workdir, upperdir);
- revert_creds(old_cred);
+ ovl_revert_creds(old_cred);
if (link)
free_page((unsigned long) link);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index eedacae889b9..953c88dd6519 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -407,7 +407,7 @@ static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev,
if (!ovl_dentry_is_opaque(dentry)) {
err = ovl_create_upper(dentry, inode, &stat, link, hardlink);
} else {
- const struct cred *old_cred;
+ const struct cred *old_cred, *hold_cred = NULL;
struct cred *override_cred;
old_cred = ovl_override_creds(dentry->d_sb);
@@ -415,15 +415,22 @@ static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev,
err = -ENOMEM;
override_cred = prepare_creds();
if (override_cred) {
- override_cred->fsuid = old_cred->fsuid;
- override_cred->fsgid = old_cred->fsgid;
- put_cred(override_creds(override_cred));
+ const struct cred *our_cred;
+
+ our_cred = old_cred;
+ if (!our_cred)
+ our_cred = current_cred();
+ override_cred->fsuid = our_cred->fsuid;
+ override_cred->fsgid = our_cred->fsgid;
+ hold_cred = override_creds(override_cred);
put_cred(override_cred);
err = ovl_create_over_whiteout(dentry, inode, &stat,
link, hardlink);
}
- revert_creds(old_cred);
+ ovl_revert_creds(old_cred ?: hold_cred);
+ if (old_cred && hold_cred)
+ put_cred(hold_cred);
}
if (!err)
@@ -657,7 +664,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
err = ovl_remove_and_whiteout(dentry, is_dir);
- revert_creds(old_cred);
+ ovl_revert_creds(old_cred);
}
out_drop_write:
ovl_drop_write(dentry);
@@ -896,8 +903,7 @@ out_dput_old:
out_unlock:
unlock_rename(new_upperdir, old_upperdir);
out_revert_creds:
- if (old_opaque || new_opaque)
- revert_creds(old_cred);
+ ovl_revert_creds(old_cred);
out_drop_write:
ovl_drop_write(old);
out:
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 27a42975d7cd..0723b8f97651 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -151,6 +151,7 @@ bool ovl_dentry_is_opaque(struct dentry *dentry);
void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque);
bool ovl_is_whiteout(struct dentry *dentry);
const struct cred *ovl_override_creds(struct super_block *sb);
+void ovl_revert_creds(const struct cred *oldcred);
void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index da999e73c97a..9ff8f8192bf1 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -223,7 +223,7 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
}
mutex_unlock(&dir->d_inode->i_mutex);
}
- revert_creds(old_cred);
+ ovl_revert_creds(old_cred);
return err;
}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index fa20c95bd456..48f7c1ab94cd 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -31,6 +31,7 @@ struct ovl_config {
char *lowerdir;
char *upperdir;
char *workdir;
+ bool override_creds;
};
/* private information held for overlayfs's superblock */
@@ -252,9 +253,17 @@ const struct cred *ovl_override_creds(struct super_block *sb)
{
struct ovl_fs *ofs = sb->s_fs_info;
+ if (!ofs->config.override_creds)
+ return NULL;
return override_creds(ofs->creator_cred);
}
+void ovl_revert_creds(const struct cred *old_cred)
+{
+ if (old_cred)
+ revert_creds(old_cred);
+}
+
static bool ovl_is_opaquedir(struct dentry *dentry)
{
int res;
@@ -626,6 +635,11 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
return err;
}
+static bool __read_mostly ovl_override_creds_def = true;
+module_param_named(override_creds, ovl_override_creds_def, bool, 0644);
+MODULE_PARM_DESC(ovl_override_creds_def,
+ "Use mounter's credentials for accesses");
+
/**
* ovl_show_options
*
@@ -642,6 +656,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
seq_show_option(m, "upperdir", ufs->config.upperdir);
seq_show_option(m, "workdir", ufs->config.workdir);
}
+ if (ufs->config.override_creds != ovl_override_creds_def)
+ seq_show_option(m, "override_creds",
+ ufs->config.override_creds ? "on" : "off");
return 0;
}
@@ -666,6 +683,8 @@ enum {
OPT_LOWERDIR,
OPT_UPPERDIR,
OPT_WORKDIR,
+ OPT_OVERRIDE_CREDS_ON,
+ OPT_OVERRIDE_CREDS_OFF,
OPT_ERR,
};
@@ -673,6 +692,8 @@ static const match_table_t ovl_tokens = {
{OPT_LOWERDIR, "lowerdir=%s"},
{OPT_UPPERDIR, "upperdir=%s"},
{OPT_WORKDIR, "workdir=%s"},
+ {OPT_OVERRIDE_CREDS_ON, "override_creds=on"},
+ {OPT_OVERRIDE_CREDS_OFF, "override_creds=off"},
{OPT_ERR, NULL}
};
@@ -703,6 +724,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
{
char *p;
+ config->override_creds = ovl_override_creds_def;
while ((p = ovl_next_opt(&opt)) != NULL) {
int token;
substring_t args[MAX_OPT_ARGS];
@@ -733,6 +755,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
return -ENOMEM;
break;
+ case OPT_OVERRIDE_CREDS_ON:
+ config->override_creds = true;
+ break;
+
+ case OPT_OVERRIDE_CREDS_OFF:
+ config->override_creds = false;
+ break;
+
default:
pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
return -EINVAL;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 6238f45eed02..c4478abd1bef 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -169,51 +169,45 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
task_unlock(p);
rcu_read_unlock();
- seq_printf(m,
- "State:\t%s\n"
- "Tgid:\t%d\n"
- "Ngid:\t%d\n"
- "Pid:\t%d\n"
- "PPid:\t%d\n"
- "TracerPid:\t%d\n"
- "Uid:\t%d\t%d\t%d\t%d\n"
- "Gid:\t%d\t%d\t%d\t%d\n"
- "FDSize:\t%d\nGroups:\t",
- get_task_state(p),
- tgid, ngid, pid_nr_ns(pid, ns), ppid, tpid,
- from_kuid_munged(user_ns, cred->uid),
- from_kuid_munged(user_ns, cred->euid),
- from_kuid_munged(user_ns, cred->suid),
- from_kuid_munged(user_ns, cred->fsuid),
- from_kgid_munged(user_ns, cred->gid),
- from_kgid_munged(user_ns, cred->egid),
- from_kgid_munged(user_ns, cred->sgid),
- from_kgid_munged(user_ns, cred->fsgid),
- max_fds);
-
+ seq_printf(m, "State:\t%s", get_task_state(p));
+
+ seq_put_decimal_ull(m, "\nTgid:\t", tgid);
+ seq_put_decimal_ull(m, "\nPid:\t", pid_nr_ns(pid, ns));
+ seq_put_decimal_ull(m, "\nPPid:\t", ppid);
+ seq_put_decimal_ull(m, "\nTracerPid:\t", tpid);
+ seq_put_decimal_ull(m, "\nUid:\t", from_kuid_munged(user_ns, cred->uid));
+ seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->euid));
+ seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->suid));
+ seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->fsuid));
+ seq_put_decimal_ull(m, "\nGid:\t", from_kgid_munged(user_ns, cred->gid));
+ seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->egid));
+ seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->sgid));
+ seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->fsgid));
+ seq_put_decimal_ull(m, "\nNgid:\t", ngid);
+ seq_put_decimal_ull(m, "\nFDSize:\t", max_fds);
+
+ seq_puts(m, "\nGroups:\t");
group_info = cred->group_info;
for (g = 0; g < group_info->ngroups; g++)
- seq_printf(m, "%d ",
- from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
+ seq_put_decimal_ull(m, g ? " " : "",
+ from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
put_cred(cred);
+ /* Trailing space shouldn't have been added in the first place. */
+ seq_putc(m, ' ');
#ifdef CONFIG_PID_NS
seq_puts(m, "\nNStgid:");
for (g = ns->level; g <= pid->level; g++)
- seq_printf(m, "\t%d",
- task_tgid_nr_ns(p, pid->numbers[g].ns));
+ seq_put_decimal_ull(m, "\t", task_tgid_nr_ns(p, pid->numbers[g].ns));
seq_puts(m, "\nNSpid:");
for (g = ns->level; g <= pid->level; g++)
- seq_printf(m, "\t%d",
- task_pid_nr_ns(p, pid->numbers[g].ns));
+ seq_put_decimal_ull(m, "\t", task_pid_nr_ns(p, pid->numbers[g].ns));
seq_puts(m, "\nNSpgid:");
for (g = ns->level; g <= pid->level; g++)
- seq_printf(m, "\t%d",
- task_pgrp_nr_ns(p, pid->numbers[g].ns));
+ seq_put_decimal_ull(m, "\t", task_pgrp_nr_ns(p, pid->numbers[g].ns));
seq_puts(m, "\nNSsid:");
for (g = ns->level; g <= pid->level; g++)
- seq_printf(m, "\t%d",
- task_session_nr_ns(p, pid->numbers[g].ns));
+ seq_put_decimal_ull(m, "\t", task_session_nr_ns(p, pid->numbers[g].ns));
#endif
seq_putc(m, '\n');
}
@@ -282,11 +276,12 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
unlock_task_sighand(p, &flags);
}
- seq_printf(m, "Threads:\t%d\n", num_threads);
- seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim);
+ seq_put_decimal_ull(m, "Threads:\t", num_threads);
+ seq_put_decimal_ull(m, "\nSigQ:\t", qsize);
+ seq_put_decimal_ull(m, "/", qlim);
/* render them all */
- render_sigset_t(m, "SigPnd:\t", &pending);
+ render_sigset_t(m, "\nSigPnd:\t", &pending);
render_sigset_t(m, "ShdPnd:\t", &shpending);
render_sigset_t(m, "SigBlk:\t", &blocked);
render_sigset_t(m, "SigIgn:\t", &ignored);
@@ -331,7 +326,8 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
{
#ifdef CONFIG_SECCOMP
- seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode);
+ seq_put_decimal_ull(m, "Seccomp:\t", p->seccomp.mode);
+ seq_putc(m, '\n');
#endif
seq_printf(m, "Speculation_Store_Bypass:\t");
switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) {
@@ -363,10 +359,9 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
static inline void task_context_switch_counts(struct seq_file *m,
struct task_struct *p)
{
- seq_printf(m, "voluntary_ctxt_switches:\t%lu\n"
- "nonvoluntary_ctxt_switches:\t%lu\n",
- p->nvcsw,
- p->nivcsw);
+ seq_put_decimal_ull(m, "voluntary_ctxt_switches:\t", p->nvcsw);
+ seq_put_decimal_ull(m, "\nnonvoluntary_ctxt_switches:\t", p->nivcsw);
+ seq_putc(m, '\n');
}
static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
@@ -510,41 +505,41 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
start_time = nsec_to_clock_t(task->real_start_time);
seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
- seq_put_decimal_ll(m, ' ', ppid);
- seq_put_decimal_ll(m, ' ', pgid);
- seq_put_decimal_ll(m, ' ', sid);
- seq_put_decimal_ll(m, ' ', tty_nr);
- seq_put_decimal_ll(m, ' ', tty_pgrp);
- seq_put_decimal_ull(m, ' ', task->flags);
- seq_put_decimal_ull(m, ' ', min_flt);
- seq_put_decimal_ull(m, ' ', cmin_flt);
- seq_put_decimal_ull(m, ' ', maj_flt);
- seq_put_decimal_ull(m, ' ', cmaj_flt);
- seq_put_decimal_ull(m, ' ', cputime_to_clock_t(utime));
- seq_put_decimal_ull(m, ' ', cputime_to_clock_t(stime));
- seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cutime));
- seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cstime));
- seq_put_decimal_ll(m, ' ', priority);
- seq_put_decimal_ll(m, ' ', nice);
- seq_put_decimal_ll(m, ' ', num_threads);
- seq_put_decimal_ull(m, ' ', 0);
- seq_put_decimal_ull(m, ' ', start_time);
- seq_put_decimal_ull(m, ' ', vsize);
- seq_put_decimal_ull(m, ' ', mm ? get_mm_rss(mm) : 0);
- seq_put_decimal_ull(m, ' ', rsslim);
- seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0);
- seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0);
- seq_put_decimal_ull(m, ' ', (permitted && mm) ? mm->start_stack : 0);
- seq_put_decimal_ull(m, ' ', esp);
- seq_put_decimal_ull(m, ' ', eip);
+ seq_put_decimal_ll(m, " ", ppid);
+ seq_put_decimal_ll(m, " ", pgid);
+ seq_put_decimal_ll(m, " ", sid);
+ seq_put_decimal_ll(m, " ", tty_nr);
+ seq_put_decimal_ll(m, " ", tty_pgrp);
+ seq_put_decimal_ull(m, " ", task->flags);
+ seq_put_decimal_ull(m, " ", min_flt);
+ seq_put_decimal_ull(m, " ", cmin_flt);
+ seq_put_decimal_ull(m, " ", maj_flt);
+ seq_put_decimal_ull(m, " ", cmaj_flt);
+ seq_put_decimal_ull(m, " ", cputime_to_clock_t(utime));
+ seq_put_decimal_ull(m, " ", cputime_to_clock_t(stime));
+ seq_put_decimal_ll(m, " ", cputime_to_clock_t(cutime));
+ seq_put_decimal_ll(m, " ", cputime_to_clock_t(cstime));
+ seq_put_decimal_ll(m, " ", priority);
+ seq_put_decimal_ll(m, " ", nice);
+ seq_put_decimal_ll(m, " ", num_threads);
+ seq_put_decimal_ull(m, " ", 0);
+ seq_put_decimal_ull(m, " ", start_time);
+ seq_put_decimal_ull(m, " ", vsize);
+ seq_put_decimal_ull(m, " ", mm ? get_mm_rss(mm) : 0);
+ seq_put_decimal_ull(m, " ", rsslim);
+ seq_put_decimal_ull(m, " ", mm ? (permitted ? mm->start_code : 1) : 0);
+ seq_put_decimal_ull(m, " ", mm ? (permitted ? mm->end_code : 1) : 0);
+ seq_put_decimal_ull(m, " ", (permitted && mm) ? mm->start_stack : 0);
+ seq_put_decimal_ull(m, " ", esp);
+ seq_put_decimal_ull(m, " ", eip);
/* The signal information here is obsolete.
* It must be decimal for Linux 2.0 compatibility.
* Use /proc/#/status for real-time signals.
*/
- seq_put_decimal_ull(m, ' ', task->pending.signal.sig[0] & 0x7fffffffUL);
- seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL);
- seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL);
- seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL);
+ seq_put_decimal_ull(m, " ", task->pending.signal.sig[0] & 0x7fffffffUL);
+ seq_put_decimal_ull(m, " ", task->blocked.sig[0] & 0x7fffffffUL);
+ seq_put_decimal_ull(m, " ", sigign.sig[0] & 0x7fffffffUL);
+ seq_put_decimal_ull(m, " ", sigcatch.sig[0] & 0x7fffffffUL);
/*
* We used to output the absolute kernel address, but that's an
@@ -558,31 +553,31 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
else
seq_puts(m, " 0");
- seq_put_decimal_ull(m, ' ', 0);
- seq_put_decimal_ull(m, ' ', 0);
- seq_put_decimal_ll(m, ' ', task->exit_signal);
- seq_put_decimal_ll(m, ' ', task_cpu(task));
- seq_put_decimal_ull(m, ' ', task->rt_priority);
- seq_put_decimal_ull(m, ' ', task->policy);
- seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task));
- seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime));
- seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime));
+ seq_put_decimal_ull(m, " ", 0);
+ seq_put_decimal_ull(m, " ", 0);
+ seq_put_decimal_ll(m, " ", task->exit_signal);
+ seq_put_decimal_ll(m, " ", task_cpu(task));
+ seq_put_decimal_ull(m, " ", task->rt_priority);
+ seq_put_decimal_ull(m, " ", task->policy);
+ seq_put_decimal_ull(m, " ", delayacct_blkio_ticks(task));
+ seq_put_decimal_ull(m, " ", cputime_to_clock_t(gtime));
+ seq_put_decimal_ll(m, " ", cputime_to_clock_t(cgtime));
if (mm && permitted) {
- seq_put_decimal_ull(m, ' ', mm->start_data);
- seq_put_decimal_ull(m, ' ', mm->end_data);
- seq_put_decimal_ull(m, ' ', mm->start_brk);
- seq_put_decimal_ull(m, ' ', mm->arg_start);
- seq_put_decimal_ull(m, ' ', mm->arg_end);
- seq_put_decimal_ull(m, ' ', mm->env_start);
- seq_put_decimal_ull(m, ' ', mm->env_end);
+ seq_put_decimal_ull(m, " ", mm->start_data);
+ seq_put_decimal_ull(m, " ", mm->end_data);
+ seq_put_decimal_ull(m, " ", mm->start_brk);
+ seq_put_decimal_ull(m, " ", mm->arg_start);
+ seq_put_decimal_ull(m, " ", mm->arg_end);
+ seq_put_decimal_ull(m, " ", mm->env_start);
+ seq_put_decimal_ull(m, " ", mm->env_end);
} else
- seq_printf(m, " 0 0 0 0 0 0 0");
+ seq_puts(m, " 0 0 0 0 0 0 0");
if (permitted)
- seq_put_decimal_ll(m, ' ', task->exit_code);
+ seq_put_decimal_ll(m, " ", task->exit_code);
else
- seq_put_decimal_ll(m, ' ', 0);
+ seq_puts(m, " 0");
seq_putc(m, '\n');
if (mm)
@@ -618,13 +613,13 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
* seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n",
* size, resident, shared, text, data);
*/
- seq_put_decimal_ull(m, 0, size);
- seq_put_decimal_ull(m, ' ', resident);
- seq_put_decimal_ull(m, ' ', shared);
- seq_put_decimal_ull(m, ' ', text);
- seq_put_decimal_ull(m, ' ', 0);
- seq_put_decimal_ull(m, ' ', data);
- seq_put_decimal_ull(m, ' ', 0);
+ seq_put_decimal_ull(m, "", size);
+ seq_put_decimal_ull(m, " ", resident);
+ seq_put_decimal_ull(m, " ", shared);
+ seq_put_decimal_ull(m, " ", text);
+ seq_put_decimal_ull(m, " ", 0);
+ seq_put_decimal_ull(m, " ", data);
+ seq_put_decimal_ull(m, " ", 0);
seq_putc(m, '\n');
return 0;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 91d58005d9b9..8da1fc940a86 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1047,15 +1047,20 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
int oom_adj = OOM_ADJUST_MIN;
size_t len;
unsigned long flags;
+ int mult = 1;
if (!task)
return -ESRCH;
if (lock_task_sighand(task, &flags)) {
- if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
+ if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) {
oom_adj = OOM_ADJUST_MAX;
- else
- oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
- OOM_SCORE_ADJ_MAX;
+ } else {
+ if (task->signal->oom_score_adj < 0)
+ mult = -1;
+ oom_adj = roundup(mult * task->signal->oom_score_adj *
+ -OOM_DISABLE, OOM_SCORE_ADJ_MAX) /
+ OOM_SCORE_ADJ_MAX * mult;
+ }
unlock_task_sighand(task, &flags);
}
put_task_struct(task);
@@ -1439,6 +1444,204 @@ static const struct file_operations proc_pid_sched_operations = {
#endif
+/*
+ * Print out various scheduling related per-task fields:
+ */
+
+#ifdef CONFIG_SMP
+
+static int sched_wake_up_idle_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = m->private;
+ struct task_struct *p;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ seq_printf(m, "%d\n", sched_get_wake_up_idle(p));
+
+ put_task_struct(p);
+
+ return 0;
+}
+
+static ssize_t
+sched_wake_up_idle_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offset)
+{
+ struct inode *inode = file_inode(file);
+ struct task_struct *p;
+ char buffer[PROC_NUMBUF];
+ int wake_up_idle, err;
+
+ memset(buffer, 0, sizeof(buffer));
+ if (count > sizeof(buffer) - 1)
+ count = sizeof(buffer) - 1;
+ if (copy_from_user(buffer, buf, count)) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ err = kstrtoint(strstrip(buffer), 0, &wake_up_idle);
+ if (err)
+ goto out;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ err = sched_set_wake_up_idle(p, wake_up_idle);
+
+ put_task_struct(p);
+
+out:
+ return err < 0 ? err : count;
+}
+
+static int sched_wake_up_idle_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, sched_wake_up_idle_show, inode);
+}
+
+static const struct file_operations proc_pid_sched_wake_up_idle_operations = {
+ .open = sched_wake_up_idle_open,
+ .read = seq_read,
+ .write = sched_wake_up_idle_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_SCHED_HMP
+
+static int sched_init_task_load_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = m->private;
+ struct task_struct *p;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ seq_printf(m, "%d\n", sched_get_init_task_load(p));
+
+ put_task_struct(p);
+
+ return 0;
+}
+
+static ssize_t
+sched_init_task_load_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offset)
+{
+ struct inode *inode = file_inode(file);
+ struct task_struct *p;
+ char buffer[PROC_NUMBUF];
+ int init_task_load, err;
+
+ memset(buffer, 0, sizeof(buffer));
+ if (count > sizeof(buffer) - 1)
+ count = sizeof(buffer) - 1;
+ if (copy_from_user(buffer, buf, count)) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ err = kstrtoint(strstrip(buffer), 0, &init_task_load);
+ if (err)
+ goto out;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ err = sched_set_init_task_load(p, init_task_load);
+
+ put_task_struct(p);
+
+out:
+ return err < 0 ? err : count;
+}
+
+static int sched_init_task_load_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, sched_init_task_load_show, inode);
+}
+
+static const struct file_operations proc_pid_sched_init_task_load_operations = {
+ .open = sched_init_task_load_open,
+ .read = seq_read,
+ .write = sched_init_task_load_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int sched_group_id_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = m->private;
+ struct task_struct *p;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ seq_printf(m, "%d\n", sched_get_group_id(p));
+
+ put_task_struct(p);
+
+ return 0;
+}
+
+static ssize_t
+sched_group_id_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offset)
+{
+ struct inode *inode = file_inode(file);
+ struct task_struct *p;
+ char buffer[PROC_NUMBUF];
+ int group_id, err;
+
+ memset(buffer, 0, sizeof(buffer));
+ if (count > sizeof(buffer) - 1)
+ count = sizeof(buffer) - 1;
+ if (copy_from_user(buffer, buf, count)) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ err = kstrtoint(strstrip(buffer), 0, &group_id);
+ if (err)
+ goto out;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ err = sched_set_group_id(p, group_id);
+
+ put_task_struct(p);
+
+out:
+ return err < 0 ? err : count;
+}
+
+static int sched_group_id_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, sched_group_id_show, inode);
+}
+
+static const struct file_operations proc_pid_sched_group_id_operations = {
+ .open = sched_group_id_open,
+ .read = seq_read,
+ .write = sched_group_id_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+#endif /* CONFIG_SCHED_HMP */
+
#ifdef CONFIG_SCHED_AUTOGROUP
/*
* Print out autogroup related information:
@@ -2892,6 +3095,13 @@ static const struct pid_entry tgid_base_stuff[] = {
ONE("status", S_IRUGO, proc_pid_status),
ONE("personality", S_IRUSR, proc_pid_personality),
ONE("limits", S_IRUGO, proc_pid_limits),
+#ifdef CONFIG_SMP
+ REG("sched_wake_up_idle", S_IRUGO|S_IWUSR, proc_pid_sched_wake_up_idle_operations),
+#endif
+#ifdef CONFIG_SCHED_HMP
+ REG("sched_init_task_load", S_IRUGO|S_IWUSR, proc_pid_sched_init_task_load_operations),
+ REG("sched_group_id", S_IRUGO|S_IWUGO, proc_pid_sched_group_id_operations),
+#endif
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif
@@ -2916,9 +3126,13 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("mounts", S_IRUGO, proc_mounts_operations),
REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
REG("mountstats", S_IRUSR, proc_mountstats_operations),
+#ifdef CONFIG_PROCESS_RECLAIM
+ REG("reclaim", S_IWUSR, proc_reclaim_operations),
+#endif
#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
REG("smaps", S_IRUGO, proc_pid_smaps_operations),
+ REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
REG("pagemap", S_IRUSR, proc_pagemap_operations),
#endif
#ifdef CONFIG_SECURITY
@@ -3313,6 +3527,7 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
REG("smaps", S_IRUGO, proc_tid_smaps_operations),
+ REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
REG("pagemap", S_IRUSR, proc_pagemap_operations),
#endif
#ifdef CONFIG_SECURITY
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index fa4dd182bfaa..2bfff313bf94 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -209,6 +209,7 @@ struct pde_opener {
extern const struct inode_operations proc_link_inode_operations;
extern const struct inode_operations proc_pid_link_inode_operations;
+extern const struct file_operations proc_reclaim_operations;
extern void proc_init_inodecache(void);
extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
@@ -284,10 +285,12 @@ extern int proc_remount(struct super_block *, int *, char *);
/*
* task_[no]mmu.c
*/
+struct mem_size_stats;
struct proc_maps_private {
struct inode *inode;
struct task_struct *task;
struct mm_struct *mm;
+ struct mem_size_stats *rollup;
#ifdef CONFIG_MMU
struct vm_area_struct *tail_vma;
#endif
@@ -303,6 +306,7 @@ extern const struct file_operations proc_tid_maps_operations;
extern const struct file_operations proc_pid_numa_maps_operations;
extern const struct file_operations proc_tid_numa_maps_operations;
extern const struct file_operations proc_pid_smaps_operations;
+extern const struct file_operations proc_pid_smaps_rollup_operations;
extern const struct file_operations proc_tid_smaps_operations;
extern const struct file_operations proc_clear_refs_operations;
extern const struct file_operations proc_pagemap_operations;
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 510413eb25b8..0fd3f81fe51f 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -117,17 +117,16 @@ static int show_stat(struct seq_file *p, void *v)
}
sum += arch_irq_stat();
- seq_puts(p, "cpu ");
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice));
+ seq_put_decimal_ull(p, "cpu ", cputime64_to_clock_t(user));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice));
seq_putc(p, '\n');
for_each_online_cpu(i) {
@@ -143,23 +142,23 @@ static int show_stat(struct seq_file *p, void *v)
guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
seq_printf(p, "cpu%d", i);
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(user));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice));
seq_putc(p, '\n');
}
- seq_printf(p, "intr %llu", (unsigned long long)sum);
+ seq_put_decimal_ull(p, "intr ", (unsigned long long)sum);
/* sum again ? it could be updated? */
for_each_irq_nr(j)
- seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j));
+ seq_put_decimal_ull(p, " ", kstat_irqs_usr(j));
seq_printf(p,
"\nctxt %llu\n"
@@ -173,10 +172,10 @@ static int show_stat(struct seq_file *p, void *v)
nr_running(),
nr_iowait());
- seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq);
+ seq_put_decimal_ull(p, "softirq ", (unsigned long long)sum_softirq);
for (i = 0; i < NR_SOFTIRQS; i++)
- seq_put_decimal_ull(p, ' ', per_softirq_sums[i]);
+ seq_put_decimal_ull(p, " ", per_softirq_sums[i]);
seq_putc(p, '\n');
return 0;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index fc8dca4a7b19..c3faa39c0ce1 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -14,6 +14,8 @@
#include <linux/swapops.h>
#include <linux/mmu_notifier.h>
#include <linux/page_idle.h>
+#include <linux/mm_inline.h>
+#include <linux/ctype.h>
#include <asm/elf.h>
#include <asm/uaccess.h>
@@ -288,6 +290,7 @@ static int proc_map_release(struct inode *inode, struct file *file)
if (priv->mm)
mmdrop(priv->mm);
+ kfree(priv->rollup);
return seq_release_private(inode, file);
}
@@ -314,6 +317,23 @@ static int is_stack(struct proc_maps_private *priv,
vma->vm_end >= vma->vm_mm->start_stack;
}
+static void show_vma_header_prefix(struct seq_file *m,
+ unsigned long start, unsigned long end,
+ vm_flags_t flags, unsigned long long pgoff,
+ dev_t dev, unsigned long ino)
+{
+ seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
+ seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
+ start,
+ end,
+ flags & VM_READ ? 'r' : '-',
+ flags & VM_WRITE ? 'w' : '-',
+ flags & VM_EXEC ? 'x' : '-',
+ flags & VM_MAYSHARE ? 's' : 'p',
+ pgoff,
+ MAJOR(dev), MINOR(dev), ino);
+}
+
static void
show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
{
@@ -337,17 +357,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
/* We don't show the stack guard page in /proc/maps */
start = vma->vm_start;
end = vma->vm_end;
-
- seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
- seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
- start,
- end,
- flags & VM_READ ? 'r' : '-',
- flags & VM_WRITE ? 'w' : '-',
- flags & VM_EXEC ? 'x' : '-',
- flags & VM_MAYSHARE ? 's' : 'p',
- pgoff,
- MAJOR(dev), MINOR(dev), ino);
+ show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino);
/*
* Print the dentry name for named mappings, and a
@@ -382,7 +392,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
name = "[stack]";
goto done;
}
-
if (vma_get_anon_name(vma)) {
seq_pad(m, ' ');
seq_print_vma_name(m, vma);
@@ -473,6 +482,7 @@ const struct file_operations proc_tid_maps_operations = {
#ifdef CONFIG_PROC_PAGE_MONITOR
struct mem_size_stats {
+ bool first;
unsigned long resident;
unsigned long shared_clean;
unsigned long shared_dirty;
@@ -484,7 +494,9 @@ struct mem_size_stats {
unsigned long swap;
unsigned long shared_hugetlb;
unsigned long private_hugetlb;
+ unsigned long first_vma_start;
u64 pss;
+ u64 pss_locked;
u64 swap_pss;
};
@@ -694,69 +706,105 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
static int show_smap(struct seq_file *m, void *v, int is_pid)
{
+ struct proc_maps_private *priv = m->private;
struct vm_area_struct *vma = v;
- struct mem_size_stats mss;
+ struct mem_size_stats mss_stack;
+ struct mem_size_stats *mss;
struct mm_walk smaps_walk = {
.pmd_entry = smaps_pte_range,
#ifdef CONFIG_HUGETLB_PAGE
.hugetlb_entry = smaps_hugetlb_range,
#endif
.mm = vma->vm_mm,
- .private = &mss,
};
+ int ret = 0;
+ bool rollup_mode;
+ bool last_vma;
+
+ if (priv->rollup) {
+ rollup_mode = true;
+ mss = priv->rollup;
+ if (mss->first) {
+ mss->first_vma_start = vma->vm_start;
+ mss->first = false;
+ }
+ last_vma = !m_next_vma(priv, vma);
+ } else {
+ rollup_mode = false;
+ memset(&mss_stack, 0, sizeof(mss_stack));
+ mss = &mss_stack;
+ }
+
+ smaps_walk.private = mss;
- memset(&mss, 0, sizeof mss);
/* mmap_sem is held in m_start */
walk_page_vma(vma, &smaps_walk);
+ if (vma->vm_flags & VM_LOCKED)
+ mss->pss_locked += mss->pss;
+
+ if (!rollup_mode) {
+ show_map_vma(m, vma, is_pid);
+ } else if (last_vma) {
+ show_vma_header_prefix(
+ m, mss->first_vma_start, vma->vm_end, 0, 0, 0, 0);
+ seq_pad(m, ' ');
+ seq_puts(m, "[rollup]\n");
+ } else {
+ ret = SEQ_SKIP;
+ }
- show_map_vma(m, vma, is_pid);
-
- if (vma_get_anon_name(vma)) {
+ if (!rollup_mode && vma_get_anon_name(vma)) {
seq_puts(m, "Name: ");
seq_print_vma_name(m, vma);
seq_putc(m, '\n');
}
- seq_printf(m,
- "Size: %8lu kB\n"
- "Rss: %8lu kB\n"
- "Pss: %8lu kB\n"
- "Shared_Clean: %8lu kB\n"
- "Shared_Dirty: %8lu kB\n"
- "Private_Clean: %8lu kB\n"
- "Private_Dirty: %8lu kB\n"
- "Referenced: %8lu kB\n"
- "Anonymous: %8lu kB\n"
- "AnonHugePages: %8lu kB\n"
- "Shared_Hugetlb: %8lu kB\n"
- "Private_Hugetlb: %7lu kB\n"
- "Swap: %8lu kB\n"
- "SwapPss: %8lu kB\n"
- "KernelPageSize: %8lu kB\n"
- "MMUPageSize: %8lu kB\n"
- "Locked: %8lu kB\n",
- (vma->vm_end - vma->vm_start) >> 10,
- mss.resident >> 10,
- (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
- mss.shared_clean >> 10,
- mss.shared_dirty >> 10,
- mss.private_clean >> 10,
- mss.private_dirty >> 10,
- mss.referenced >> 10,
- mss.anonymous >> 10,
- mss.anonymous_thp >> 10,
- mss.shared_hugetlb >> 10,
- mss.private_hugetlb >> 10,
- mss.swap >> 10,
- (unsigned long)(mss.swap_pss >> (10 + PSS_SHIFT)),
- vma_kernel_pagesize(vma) >> 10,
- vma_mmu_pagesize(vma) >> 10,
- (vma->vm_flags & VM_LOCKED) ?
- (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
-
- show_smap_vma_flags(m, vma);
+ if (!rollup_mode)
+ seq_printf(m,
+ "Size: %8lu kB\n"
+ "KernelPageSize: %8lu kB\n"
+ "MMUPageSize: %8lu kB\n",
+ (vma->vm_end - vma->vm_start) >> 10,
+ vma_kernel_pagesize(vma) >> 10,
+ vma_mmu_pagesize(vma) >> 10);
+
+
+ if (!rollup_mode || last_vma)
+ seq_printf(m,
+ "Rss: %8lu kB\n"
+ "Pss: %8lu kB\n"
+ "Shared_Clean: %8lu kB\n"
+ "Shared_Dirty: %8lu kB\n"
+ "Private_Clean: %8lu kB\n"
+ "Private_Dirty: %8lu kB\n"
+ "Referenced: %8lu kB\n"
+ "Anonymous: %8lu kB\n"
+ "AnonHugePages: %8lu kB\n"
+ "Shared_Hugetlb: %8lu kB\n"
+ "Private_Hugetlb: %7lu kB\n"
+ "Swap: %8lu kB\n"
+ "SwapPss: %8lu kB\n"
+ "Locked: %8lu kB\n",
+ mss->resident >> 10,
+ (unsigned long)(mss->pss >> (10 + PSS_SHIFT)),
+ mss->shared_clean >> 10,
+ mss->shared_dirty >> 10,
+ mss->private_clean >> 10,
+ mss->private_dirty >> 10,
+ mss->referenced >> 10,
+ mss->anonymous >> 10,
+ mss->anonymous_thp >> 10,
+ mss->shared_hugetlb >> 10,
+ mss->private_hugetlb >> 10,
+ mss->swap >> 10,
+ (unsigned long)(mss->swap_pss >> (10 + PSS_SHIFT)),
+ (unsigned long)(mss->pss >> (10 + PSS_SHIFT)));
+
+ if (!rollup_mode) {
+ show_smap_vma_flags(m, vma);
+ }
m_cache_vma(m, vma);
- return 0;
+ return ret;
}
static int show_pid_smap(struct seq_file *m, void *v)
@@ -788,6 +836,25 @@ static int pid_smaps_open(struct inode *inode, struct file *file)
return do_maps_open(inode, file, &proc_pid_smaps_op);
}
+static int pid_smaps_rollup_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ struct proc_maps_private *priv;
+ int ret = do_maps_open(inode, file, &proc_pid_smaps_op);
+
+ if (ret < 0)
+ return ret;
+ seq = file->private_data;
+ priv = seq->private;
+ priv->rollup = kzalloc(sizeof(*priv->rollup), GFP_KERNEL);
+ if (!priv->rollup) {
+ proc_map_release(inode, file);
+ return -ENOMEM;
+ }
+ priv->rollup->first = true;
+ return 0;
+}
+
static int tid_smaps_open(struct inode *inode, struct file *file)
{
return do_maps_open(inode, file, &proc_tid_smaps_op);
@@ -800,6 +867,13 @@ const struct file_operations proc_pid_smaps_operations = {
.release = proc_map_release,
};
+const struct file_operations proc_pid_smaps_rollup_operations = {
+ .open = pid_smaps_rollup_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = proc_map_release,
+};
+
const struct file_operations proc_tid_smaps_operations = {
.open = tid_smaps_open,
.read = seq_read,
@@ -1433,6 +1507,241 @@ const struct file_operations proc_pagemap_operations = {
};
#endif /* CONFIG_PROC_PAGE_MONITOR */
+#ifdef CONFIG_PROCESS_RECLAIM
+static int reclaim_pte_range(pmd_t *pmd, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ struct reclaim_param *rp = walk->private;
+ struct vm_area_struct *vma = rp->vma;
+ pte_t *pte, ptent;
+ spinlock_t *ptl;
+ struct page *page;
+ LIST_HEAD(page_list);
+ int isolated;
+ int reclaimed;
+
+ split_huge_page_pmd(vma, addr, pmd);
+ if (pmd_trans_unstable(pmd) || !rp->nr_to_reclaim)
+ return 0;
+cont:
+ isolated = 0;
+ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ for (; addr != end; pte++, addr += PAGE_SIZE) {
+ ptent = *pte;
+ if (!pte_present(ptent))
+ continue;
+
+ page = vm_normal_page(vma, addr, ptent);
+ if (!page)
+ continue;
+
+ if (isolate_lru_page(page))
+ continue;
+
+ list_add(&page->lru, &page_list);
+ inc_zone_page_state(page, NR_ISOLATED_ANON +
+ page_is_file_cache(page));
+ isolated++;
+ rp->nr_scanned++;
+ if ((isolated >= SWAP_CLUSTER_MAX) || !rp->nr_to_reclaim)
+ break;
+ }
+ pte_unmap_unlock(pte - 1, ptl);
+ reclaimed = reclaim_pages_from_list(&page_list, vma);
+ rp->nr_reclaimed += reclaimed;
+ rp->nr_to_reclaim -= reclaimed;
+ if (rp->nr_to_reclaim < 0)
+ rp->nr_to_reclaim = 0;
+
+ if (rp->nr_to_reclaim && (addr != end))
+ goto cont;
+
+ cond_resched();
+ return 0;
+}
+
+enum reclaim_type {
+ RECLAIM_FILE,
+ RECLAIM_ANON,
+ RECLAIM_ALL,
+ RECLAIM_RANGE,
+};
+
+struct reclaim_param reclaim_task_anon(struct task_struct *task,
+ int nr_to_reclaim)
+{
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ struct mm_walk reclaim_walk = {};
+ struct reclaim_param rp;
+
+ rp.nr_reclaimed = 0;
+ rp.nr_scanned = 0;
+ get_task_struct(task);
+ mm = get_task_mm(task);
+ if (!mm)
+ goto out;
+
+ reclaim_walk.mm = mm;
+ reclaim_walk.pmd_entry = reclaim_pte_range;
+
+ rp.nr_to_reclaim = nr_to_reclaim;
+ reclaim_walk.private = &rp;
+
+ down_read(&mm->mmap_sem);
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ if (is_vm_hugetlb_page(vma))
+ continue;
+
+ if (vma->vm_file)
+ continue;
+
+ if (vma->vm_flags & VM_LOCKED)
+ continue;
+
+ if (!rp.nr_to_reclaim)
+ break;
+
+ rp.vma = vma;
+ walk_page_range(vma->vm_start, vma->vm_end,
+ &reclaim_walk);
+ }
+
+ flush_tlb_mm(mm);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+out:
+ put_task_struct(task);
+ return rp;
+}
+
+static ssize_t reclaim_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task;
+ char buffer[200];
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ enum reclaim_type type;
+ char *type_buf;
+ struct mm_walk reclaim_walk = {};
+ unsigned long start = 0;
+ unsigned long end = 0;
+ struct reclaim_param rp;
+
+ memset(buffer, 0, sizeof(buffer));
+ if (count > sizeof(buffer) - 1)
+ count = sizeof(buffer) - 1;
+
+ if (copy_from_user(buffer, buf, count))
+ return -EFAULT;
+
+ type_buf = strstrip(buffer);
+ if (!strcmp(type_buf, "file"))
+ type = RECLAIM_FILE;
+ else if (!strcmp(type_buf, "anon"))
+ type = RECLAIM_ANON;
+ else if (!strcmp(type_buf, "all"))
+ type = RECLAIM_ALL;
+ else if (isdigit(*type_buf))
+ type = RECLAIM_RANGE;
+ else
+ goto out_err;
+
+ if (type == RECLAIM_RANGE) {
+ char *token;
+ unsigned long long len, len_in, tmp;
+ token = strsep(&type_buf, " ");
+ if (!token)
+ goto out_err;
+ tmp = memparse(token, &token);
+ if (tmp & ~PAGE_MASK || tmp > ULONG_MAX)
+ goto out_err;
+ start = tmp;
+
+ token = strsep(&type_buf, " ");
+ if (!token)
+ goto out_err;
+ len_in = memparse(token, &token);
+ len = (len_in + ~PAGE_MASK) & PAGE_MASK;
+ if (len > ULONG_MAX)
+ goto out_err;
+ /*
+ * Check to see whether len was rounded up from small -ve
+ * to zero.
+ */
+ if (len_in && !len)
+ goto out_err;
+
+ end = start + len;
+ if (end < start)
+ goto out_err;
+ }
+
+ task = get_proc_task(file->f_path.dentry->d_inode);
+ if (!task)
+ return -ESRCH;
+
+ mm = get_task_mm(task);
+ if (!mm)
+ goto out;
+
+ reclaim_walk.mm = mm;
+ reclaim_walk.pmd_entry = reclaim_pte_range;
+
+ rp.nr_to_reclaim = ~0;
+ rp.nr_reclaimed = 0;
+ reclaim_walk.private = &rp;
+
+ down_read(&mm->mmap_sem);
+ if (type == RECLAIM_RANGE) {
+ vma = find_vma(mm, start);
+ while (vma) {
+ if (vma->vm_start > end)
+ break;
+ if (is_vm_hugetlb_page(vma))
+ continue;
+
+ rp.vma = vma;
+ walk_page_range(max(vma->vm_start, start),
+ min(vma->vm_end, end),
+ &reclaim_walk);
+ vma = vma->vm_next;
+ }
+ } else {
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ if (is_vm_hugetlb_page(vma))
+ continue;
+
+ if (type == RECLAIM_ANON && vma->vm_file)
+ continue;
+
+ if (type == RECLAIM_FILE && !vma->vm_file)
+ continue;
+
+ rp.vma = vma;
+ walk_page_range(vma->vm_start, vma->vm_end,
+ &reclaim_walk);
+ }
+ }
+
+ flush_tlb_mm(mm);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+out:
+ put_task_struct(task);
+ return count;
+
+out_err:
+ return -EINVAL;
+}
+
+const struct file_operations proc_reclaim_operations = {
+ .write = reclaim_write,
+ .llseek = noop_llseek,
+};
+#endif
+
#ifdef CONFIG_NUMA
struct numa_maps {
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index ac6c78fe19cf..7a380208b006 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -36,7 +36,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
-#include <linux/syslog.h>
+#include <linux/vmalloc.h>
#include "internal.h"
@@ -121,18 +121,6 @@ static const struct seq_operations pstore_ftrace_seq_ops = {
.show = pstore_ftrace_seq_show,
};
-static int pstore_check_syslog_permissions(struct pstore_private *ps)
-{
- switch (ps->type) {
- case PSTORE_TYPE_DMESG:
- case PSTORE_TYPE_CONSOLE:
- return check_syslog_permissions(SYSLOG_ACTION_READ_ALL,
- SYSLOG_FROM_READER);
- default:
- return 0;
- }
-}
-
static ssize_t pstore_file_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
{
@@ -151,10 +139,6 @@ static int pstore_file_open(struct inode *inode, struct file *file)
int err;
const struct seq_operations *sops = NULL;
- err = pstore_check_syslog_permissions(ps);
- if (err)
- return err;
-
if (ps->type == PSTORE_TYPE_FTRACE)
sops = &pstore_ftrace_seq_ops;
@@ -191,11 +175,6 @@ static const struct file_operations pstore_file_operations = {
static int pstore_unlink(struct inode *dir, struct dentry *dentry)
{
struct pstore_private *p = d_inode(dentry)->i_private;
- int err;
-
- err = pstore_check_syslog_permissions(p);
- if (err)
- return err;
if (p->psi->erase)
p->psi->erase(p->type, p->id, p->count,
@@ -216,7 +195,7 @@ static void pstore_evict_inode(struct inode *inode)
spin_lock_irqsave(&allpstore_lock, flags);
list_del(&p->list);
spin_unlock_irqrestore(&allpstore_lock, flags);
- kfree(p);
+ vfree(p);
}
}
@@ -328,7 +307,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
goto fail;
inode->i_mode = S_IFREG | 0444;
inode->i_fop = &pstore_file_operations;
- private = kmalloc(sizeof *private + size, GFP_KERNEL);
+ private = vmalloc(sizeof *private + size);
if (!private)
goto fail_alloc;
private->type = type;
@@ -402,7 +381,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
fail_lockedalloc:
mutex_unlock(&d_inode(root)->i_mutex);
- kfree(private);
+ vfree(private);
fail_alloc:
iput(inode);
@@ -429,7 +408,7 @@ static int pstore_fill_super(struct super_block *sb, void *data, int silent)
inode = pstore_get_inode(sb);
if (inode) {
- inode->i_mode = S_IFDIR | 0755;
+ inode->i_mode = S_IFDIR | 0750;
inode->i_op = &pstore_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
inc_nlink(inode);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 6fbfa8189451..21bf055bdebf 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -36,6 +36,7 @@
#include <linux/hardirq.h>
#include <linux/jiffies.h>
#include <linux/workqueue.h>
+#include <linux/vmalloc.h>
#include "internal.h"
@@ -580,7 +581,7 @@ void pstore_get_records(int quiet)
big_oops_buf_sz);
if (unzipped_len > 0) {
- kfree(buf);
+ vfree(buf);
buf = big_oops_buf;
size = unzipped_len;
compressed = false;
@@ -594,7 +595,7 @@ void pstore_get_records(int quiet)
compressed, (size_t)size, time, psi);
if (unzipped_len < 0) {
/* Free buffer other than big oops */
- kfree(buf);
+ vfree(buf);
buf = NULL;
} else
unzipped_len = -1;
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 283b04218997..2a004480fc4f 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -233,7 +233,7 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
/* ECC correction notice */
ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0);
- *buf = kmalloc(size + ecc_notice_size + 1, GFP_KERNEL);
+ *buf = vmalloc(size + ecc_notice_size + 1);
if (*buf == NULL)
return -ENOMEM;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 11e558efd61e..cdf82f4b5698 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -291,7 +291,7 @@ void persistent_ram_save_old(struct persistent_ram_zone *prz)
if (!prz->old_log) {
persistent_ram_ecc_old(prz);
- prz->old_log = kmalloc(size, GFP_KERNEL);
+ prz->old_log = vmalloc(size);
}
if (!prz->old_log) {
pr_err("failed to allocate buffer\n");
@@ -377,7 +377,7 @@ void *persistent_ram_old(struct persistent_ram_zone *prz)
void persistent_ram_free_old(struct persistent_ram_zone *prz)
{
- kfree(prz->old_log);
+ vfree(prz->old_log);
prz->old_log = NULL;
prz->old_log_size = 0;
}
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index eb611bdd4725..3ebc70167e41 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -687,7 +687,8 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod
reiserfs_update_inode_transaction(inode);
reiserfs_update_inode_transaction(dir);
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
retval = journal_end(&th);
out_failed:
@@ -770,7 +771,8 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode
goto out_failed;
}
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
retval = journal_end(&th);
out_failed:
@@ -869,7 +871,8 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
/* the above add_entry did not update dir's stat data */
reiserfs_update_sd(&th, dir);
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
retval = journal_end(&th);
out_failed:
reiserfs_write_unlock(dir->i_sb);
@@ -1183,7 +1186,8 @@ static int reiserfs_symlink(struct inode *parent_dir,
goto out_failed;
}
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
retval = journal_end(&th);
out_failed:
reiserfs_write_unlock(parent_dir->i_sb);
diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c
index 3795d2c26915..eb279de02ffb 100644
--- a/fs/sdcardfs/dentry.c
+++ b/fs/sdcardfs/dentry.c
@@ -123,6 +123,12 @@ out:
return err;
}
+/* 1 = delete, 0 = cache */
+static int sdcardfs_d_delete(const struct dentry *d)
+{
+ return SDCARDFS_SB(d->d_sb)->options.nocache ? 1 : 0;
+}
+
static void sdcardfs_d_release(struct dentry *dentry)
{
if (!dentry || !dentry->d_fsdata)
@@ -182,6 +188,7 @@ static void sdcardfs_canonical_path(const struct path *path,
const struct dentry_operations sdcardfs_ci_dops = {
.d_revalidate = sdcardfs_d_revalidate,
+ .d_delete = sdcardfs_d_delete,
.d_release = sdcardfs_d_release,
.d_hash = sdcardfs_hash_ci,
.d_compare = sdcardfs_cmp_ci,
diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c
index 85126ec6533c..1f142fed5a5e 100644
--- a/fs/sdcardfs/derived_perm.c
+++ b/fs/sdcardfs/derived_perm.c
@@ -62,6 +62,7 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry,
int err;
struct qstr q_Android = QSTR_LITERAL("Android");
struct qstr q_data = QSTR_LITERAL("data");
+ struct qstr q_sandbox = QSTR_LITERAL("sandbox");
struct qstr q_obb = QSTR_LITERAL("obb");
struct qstr q_media = QSTR_LITERAL("media");
struct qstr q_cache = QSTR_LITERAL("cache");
@@ -110,6 +111,9 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry,
if (qstr_case_eq(name, &q_data)) {
/* App-specific directories inside; let anyone traverse */
info->data->perm = PERM_ANDROID_DATA;
+ } else if (qstr_case_eq(name, &q_sandbox)) {
+ /* App-specific directories inside; let anyone traverse */
+ info->data->perm = PERM_ANDROID_DATA;
} else if (qstr_case_eq(name, &q_obb)) {
/* App-specific directories inside; let anyone traverse */
info->data->perm = PERM_ANDROID_OBB;
@@ -356,7 +360,8 @@ int need_graft_path(struct dentry *dentry)
struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
struct qstr obb = QSTR_LITERAL("obb");
- if (parent_info->data->perm == PERM_ANDROID &&
+ if (!sbi->options.unshared_obb &&
+ parent_info->data->perm == PERM_ANDROID &&
qstr_case_eq(&dentry->d_name, &obb)) {
/* /Android/obb is the base obbpath of DERIVED_UNIFIED */
diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c
index 6c0039284ae0..ab0952f13510 100644
--- a/fs/sdcardfs/inode.c
+++ b/fs/sdcardfs/inode.c
@@ -525,7 +525,7 @@ static const char *sdcardfs_follow_link(struct dentry *dentry, void **cookie)
static int sdcardfs_permission_wrn(struct inode *inode, int mask)
{
- WARN_RATELIMIT(1, "sdcardfs does not support permission. Use permission2.\n");
+ pr_debug("sdcardfs does not support permission. Use permission2.\n");
return -EINVAL;
}
diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c
index 30e0c431a1ea..6e44903b3d2f 100644
--- a/fs/sdcardfs/main.c
+++ b/fs/sdcardfs/main.c
@@ -34,6 +34,8 @@ enum {
Opt_reserved_mb,
Opt_gid_derivation,
Opt_default_normal,
+ Opt_nocache,
+ Opt_unshared_obb,
Opt_err,
};
@@ -47,7 +49,9 @@ static const match_table_t sdcardfs_tokens = {
{Opt_multiuser, "multiuser"},
{Opt_gid_derivation, "derive_gid"},
{Opt_default_normal, "default_normal"},
+ {Opt_unshared_obb, "unshared_obb"},
{Opt_reserved_mb, "reserved_mb=%u"},
+ {Opt_nocache, "nocache"},
{Opt_err, NULL}
};
@@ -71,6 +75,7 @@ static int parse_options(struct super_block *sb, char *options, int silent,
/* by default, gid derivation is off */
opts->gid_derivation = false;
opts->default_normal = false;
+ opts->nocache = false;
*debug = 0;
@@ -128,6 +133,12 @@ static int parse_options(struct super_block *sb, char *options, int silent,
case Opt_default_normal:
opts->default_normal = true;
break;
+ case Opt_nocache:
+ opts->nocache = true;
+ break;
+ case Opt_unshared_obb:
+ opts->unshared_obb = true;
+ break;
/* unknown option */
default:
if (!silent)
@@ -181,13 +192,16 @@ int parse_options_remount(struct super_block *sb, char *options, int silent,
return 0;
vfsopts->mask = option;
break;
+ case Opt_unshared_obb:
case Opt_default_normal:
case Opt_multiuser:
case Opt_userid:
case Opt_fsuid:
case Opt_fsgid:
case Opt_reserved_mb:
- pr_warn("Option \"%s\" can't be changed during remount\n", p);
+ case Opt_gid_derivation:
+ if (!silent)
+ pr_warn("Option \"%s\" can't be changed during remount\n", p);
break;
/* unknown option */
default:
@@ -264,7 +278,7 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb,
pr_info("sdcardfs: dev_name -> %s\n", dev_name);
pr_info("sdcardfs: options -> %s\n", (char *)raw_data);
- pr_info("sdcardfs: mnt -> %p\n", mnt);
+ pr_info("sdcardfs: mnt -> %pK\n", mnt);
/* parse lower path */
err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
@@ -295,6 +309,13 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb,
atomic_inc(&lower_sb->s_active);
sdcardfs_set_lower_super(sb, lower_sb);
+ sb->s_stack_depth = lower_sb->s_stack_depth + 1;
+ if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
+ pr_err("sdcardfs: maximum fs stacking depth exceeded\n");
+ err = -EINVAL;
+ goto out_sput;
+ }
+
/* inherit maxbytes from lower file system */
sb->s_maxbytes = lower_sb->s_maxbytes;
diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h
index 99227a07a8d6..2bee162921ce 100644
--- a/fs/sdcardfs/sdcardfs.h
+++ b/fs/sdcardfs/sdcardfs.h
@@ -197,7 +197,9 @@ struct sdcardfs_mount_options {
bool multiuser;
bool gid_derivation;
bool default_normal;
+ bool unshared_obb;
unsigned int reserved_mb;
+ bool nocache;
};
struct sdcardfs_vfsmount_options {
diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c
index cffcdb11cb8a..45cfa73b285e 100644
--- a/fs/sdcardfs/super.c
+++ b/fs/sdcardfs/super.c
@@ -144,7 +144,7 @@ static int sdcardfs_remount_fs2(struct vfsmount *mnt, struct super_block *sb,
pr_err("sdcardfs: remount flags 0x%x unsupported\n", *flags);
err = -EINVAL;
}
- pr_info("Remount options were %s for vfsmnt %p.\n", options, mnt);
+ pr_info("Remount options were %s for vfsmnt %pK.\n", options, mnt);
err = parse_options_remount(sb, options, *flags & ~MS_SILENT, mnt->data);
@@ -311,6 +311,10 @@ static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m,
seq_puts(m, ",default_normal");
if (opts->reserved_mb != 0)
seq_printf(m, ",reserved=%uMB", opts->reserved_mb);
+ if (opts->nocache)
+ seq_printf(m, ",nocache");
+ if (opts->unshared_obb)
+ seq_printf(m, ",unshared_obb");
return 0;
};
diff --git a/fs/sdfat/Kconfig b/fs/sdfat/Kconfig
new file mode 100644
index 000000000000..08b12f7f768b
--- /dev/null
+++ b/fs/sdfat/Kconfig
@@ -0,0 +1,126 @@
+config SDFAT_FS
+ tristate "sdFAT filesystem support"
+ select NLS
+ select NLS_UTF8
+ select NLS_CODEPAGE_437
+ select NLS_ISO8859_1
+ help
+ If you want to use the sdFAT file system, then you must say Y or M
+ here to inlucde sdFAT support.
+ sdFAT is unified FAT-based file system which supports not only fat12/
+ 16/32 with vfat but also exfat. sdFAT supports winnt short-name rule.
+ (winnt: emulate the Windows NT rule for display/create.)
+
+ To compile this as a module, choose M here: the module will be called
+ sdfat_core and sdfat_fs.
+
+config SDFAT_USE_FOR_EXFAT
+ bool "Register sdFAT as exFAT"
+ default y
+ depends on SDFAT_FS && !EXFAT_FS
+ help
+ If you want to register sdFAT as available for exFAT, say Y.
+
+config SDFAT_USE_FOR_VFAT
+ bool "Register sdFAT as VFAT"
+ default y
+ depends on SDFAT_FS && !VFAT_FS
+ help
+ If you want to register sdFAT as available for VFAT, say Y.
+
+config SDFAT_DELAYED_META_DIRTY
+ bool "Enable delayed metadata dirty"
+ default y
+ depends on SDFAT_FS
+ help
+ If you enable this feature, metadata(FAT/Directory entry) is updated
+ by flush thread.
+
+config SDFAT_SUPPORT_DIR_SYNC
+ bool "Enable supporting dir sync"
+ default n
+ depends on SDFAT_FS
+ help
+ If you enable this feature, the modification for directory operation
+ is written to a storage at once.
+
+config SDFAT_DEFAULT_CODEPAGE
+ int "Default codepage for sdFAT"
+ default 437
+ depends on SDFAT_FS
+ help
+ This option should be set to the codepage of your sdFAT filesystems.
+
+config SDFAT_DEFAULT_IOCHARSET
+ string "Default iocharset for sdFAT"
+ default "utf8"
+ depends on SDFAT_FS
+ help
+ Set this to the default input/output character set you'd
+ like sdFAT to use. It should probably match the character set
+ that most of your sdFAT filesystems use, and can be overridden
+ with the "iocharset" mount option for sdFAT filesystems.
+
+config SDFAT_CHECK_RO_ATTR
+ bool "Check read-only attribute"
+ default n
+ depends on SDFAT_FS
+
+config SDFAT_ALIGNED_MPAGE_WRITE
+ bool "Enable supporting aligned mpage_write"
+ default y
+ depends on SDFAT_FS
+
+config SDFAT_VIRTUAL_XATTR
+ bool "Virtual xattr support for sdFAT"
+ default y
+ depends on SDFAT_FS
+ help
+ To support virtual xattr.
+
+config SDFAT_VIRTUAL_XATTR_SELINUX_LABEL
+ string "Default string for SELinux label"
+ default "u:object_r:sdcard_external:s0"
+ depends on SDFAT_FS && SDFAT_VIRTUAL_XATTR
+ help
+ Set this to the default string for SELinux label.
+
+config SDFAT_SUPPORT_STLOG
+ bool "Enable storage log"
+ default y
+ depends on SDFAT_FS && PROC_STLOG
+
+config SDFAT_DEBUG
+ bool "enable debug features"
+ depends on SDFAT_FS
+ default y
+
+config SDFAT_DBG_IOCTL
+ bool "enable debug-ioctl features"
+ depends on SDFAT_FS && SDFAT_DEBUG
+ default n
+
+config SDFAT_DBG_MSG
+ bool "enable debug messages"
+ depends on SDFAT_FS && SDFAT_DEBUG
+ default y
+
+config SDFAT_DBG_BUGON
+ bool "enable strict BUG_ON() for debugging"
+ depends on SDFAT_FS && SDFAT_DEBUG
+ default n
+
+config SDFAT_DBG_WARNON
+ bool "enable strict WARN_ON() for debugging"
+ depends on SDFAT_FS && SDFAT_DEBUG
+ default n
+
+config SDFAT_STATISTICS
+ bool "enable statistics for bigdata"
+ depends on SDFAT_FS
+ default y
+
+config SDFAT_UEVENT
+ bool "enable uevent"
+ depends on SDFAT_FS
+ default y
diff --git a/fs/sdfat/LICENSE b/fs/sdfat/LICENSE
new file mode 100644
index 000000000000..d159169d1050
--- /dev/null
+++ b/fs/sdfat/LICENSE
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/fs/sdfat/Makefile b/fs/sdfat/Makefile
new file mode 100644
index 000000000000..a5cd0858c6ca
--- /dev/null
+++ b/fs/sdfat/Makefile
@@ -0,0 +1,24 @@
+#
+# Makefile for the linux FAT12/16/32(VFAT)/64(exFAT) filesystem driver.
+#
+
+obj-$(CONFIG_SDFAT_FS) += sdfat_fs.o
+
+sdfat_fs-objs := sdfat.o core.o core_fat.o core_exfat.o api.o blkdev.o \
+ fatent.o amap_smart.o cache.o dfr.o nls.o misc.o \
+ mpage.o extent.o
+
+sdfat_fs-$(CONFIG_SDFAT_VIRTUAL_XATTR) += xattr.o
+sdfat_fs-$(CONFIG_SDFAT_STATISTICS) += statistics.o
+
+
+all:
+ make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
+
+clean:
+ make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
+
+cscope:
+ rm -rf cscope.files cscope.files
+ find $(PWD) \( -name '*.c' -o -name '*.cpp' -o -name '*.cc' -o -name '*.h' -o -name '*.s' -o -name '*.S' \) -print > cscope.files
+ cscope
diff --git a/fs/sdfat/README.md b/fs/sdfat/README.md
new file mode 100644
index 000000000000..f291e8f1ae00
--- /dev/null
+++ b/fs/sdfat/README.md
@@ -0,0 +1,19 @@
+sdFAT FS support for Linux Kernel 4.4
+=====================================
+
+sdFAT is unified FAT-based file system which supports not only fat12/16/32 with
+vfat but also exfat. sdFAT supports winnt short-name rule.
+
+Suggested Kernel config:
+
+ CONFIG_SDFAT_FS=y
+ CONFIG_SDFAT_DELAYED_META_DIRTY=y
+ CONFIG_SDFAT_SUPPORT_DIR_SYNC=y
+ CONFIG_SDFAT_DEFAULT_CODEPAGE=437
+ CONFIG_SDFAT_DEFAULT_IOCHARSET="utf8"
+ CONFIG_SDFAT_ALIGNED_MPAGE_WRITE=y
+ CONFIG_SDFAT_VIRTUAL_XATTR=y
+ CONFIG_SDFAT_VIRTUAL_XATTR_SELINUX_LABEL="u:object_r:vfat:s0"
+ CONFIG_SDFAT_DEBUG=y
+ CONFIG_SDFAT_DBG_MSG=y
+ CONFIG_SDFAT_STATISTICS=y
diff --git a/fs/sdfat/amap_smart.c b/fs/sdfat/amap_smart.c
new file mode 100644
index 000000000000..b556f868d76e
--- /dev/null
+++ b/fs/sdfat/amap_smart.c
@@ -0,0 +1,1314 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : amap_smart.c */
+/* PURPOSE : FAT32 Smart allocation code for sdFAT */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include "sdfat.h"
+#include "core.h"
+#include "amap_smart.h"
+
+/* AU list related functions */
+static inline void amap_list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+
+ /* Will be used to check if the entry is a single entry(selected) */
+ entry->prev = NULL;
+ entry->next = NULL;
+}
+
+static inline int amap_insert_to_list(AU_INFO_T *au, struct slist_head *shead)
+{
+ struct slist_head *entry = &au->shead;
+
+ ASSERT(!entry->head);
+
+ entry->next = shead->next;
+ entry->head = shead;
+
+ shead->next = entry;
+
+ return 0;
+}
+
+static inline int amap_remove_from_list(AU_INFO_T *au, struct slist_head *shead)
+{
+ struct slist_head *entry = &au->shead;
+ struct slist_head *iter;
+
+ BUG_ON(entry->head != shead);
+
+ iter = shead;
+
+ while (iter->next) {
+ if (iter->next == entry) {
+ // iter->next = iter->next->next
+ iter->next = entry->next;
+
+ entry->next = NULL;
+ entry->head = NULL;
+ return 0;
+ }
+ iter = iter->next;
+ }
+
+ BUG_ON("Not reachable");
+}
+
+/* Full-linear serach => Find AU with max. number of fclu */
+static inline AU_INFO_T *amap_find_hot_au_largest(struct slist_head *shead)
+{
+ struct slist_head *iter;
+ uint16_t max_fclu = 0;
+ AU_INFO_T *entry, *ret = NULL;
+
+ ASSERT(shead->head == shead); /* Singly-list condition */
+ ASSERT(shead->next != shead);
+
+ iter = shead->next;
+
+ while (iter) {
+ entry = list_entry(iter, AU_INFO_T, shead);
+
+ if (entry->free_clusters > max_fclu) {
+ max_fclu = entry->free_clusters;
+ ret = entry;
+ }
+
+ iter = iter->next;
+ }
+
+ return ret;
+}
+
+/* Find partially used AU with max. number of fclu.
+ * If there is no partial AU available, pick a clean one
+ */
+static inline AU_INFO_T *amap_find_hot_au_partial(AMAP_T *amap)
+{
+ struct slist_head *iter;
+ uint16_t max_fclu = 0;
+ AU_INFO_T *entry, *ret = NULL;
+
+ iter = &amap->slist_hot;
+ ASSERT(iter->head == iter); /* Singly-list condition */
+ ASSERT(iter->next != iter);
+
+ iter = iter->next;
+
+ while (iter) {
+ entry = list_entry(iter, AU_INFO_T, shead);
+
+ if (entry->free_clusters > max_fclu) {
+ if (entry->free_clusters < amap->clusters_per_au) {
+ max_fclu = entry->free_clusters;
+ ret = entry;
+ } else {
+ if (!ret)
+ ret = entry;
+ }
+ }
+
+ iter = iter->next;
+ }
+
+ return ret;
+}
+
+
+
+
+/*
+ * Size-base AU management functions
+ */
+
+/*
+ * Add au into cold AU MAP
+ * au: an isolated (not in a list) AU data structure
+ */
+int amap_add_cold_au(AMAP_T *amap, AU_INFO_T *au)
+{
+ FCLU_NODE_T *fclu_node = NULL;
+
+ /* Check if a single entry */
+ BUG_ON(au->head.prev);
+
+ /* Ignore if the au is full */
+ if (!au->free_clusters)
+ return 0;
+
+ /* Find entry */
+ fclu_node = NODE(au->free_clusters, amap);
+
+ /* Insert to the list */
+ list_add_tail(&(au->head), &(fclu_node->head));
+
+ /* Update fclu_hint (Increase) */
+ if (au->free_clusters > amap->fclu_hint)
+ amap->fclu_hint = au->free_clusters;
+
+ return 0;
+}
+
+/*
+ * Remove an AU from AU MAP
+ */
+int amap_remove_cold_au(AMAP_T *amap, AU_INFO_T *au)
+{
+ struct list_head *prev = au->head.prev;
+
+ /* Single entries are not managed in lists */
+ if (!prev) {
+ BUG_ON(au->free_clusters > 0);
+ return 0;
+ }
+
+ /* remove from list */
+ amap_list_del(&(au->head));
+
+ return 0;
+}
+
+
+/* "Find" best fit AU
+ * returns NULL if there is no AU w/ enough free space.
+ *
+ * This function doesn't change AU status.
+ * The caller should call amap_remove_cold_au() if needed.
+ */
+AU_INFO_T *amap_find_cold_au_bestfit(AMAP_T *amap, uint16_t free_clusters)
+{
+ AU_INFO_T *au = NULL;
+ FCLU_NODE_T *fclu_iter;
+
+ if (free_clusters <= 0 || free_clusters > amap->clusters_per_au) {
+ EMSG("AMAP: amap_find_cold_au_bestfit / unexpected arg. (%d)\n",
+ free_clusters);
+ return NULL;
+ }
+
+ fclu_iter = NODE(free_clusters, amap);
+
+ if (amap->fclu_hint < free_clusters) {
+ /* There is no AUs with enough free_clusters */
+ return NULL;
+ }
+
+ /* Naive Hash management (++) */
+ do {
+ if (!list_empty(&fclu_iter->head)) {
+ struct list_head *first = fclu_iter->head.next;
+
+ au = list_entry(first, AU_INFO_T, head);
+
+ break;
+ }
+
+ fclu_iter++;
+ } while (fclu_iter < (amap->fclu_nodes + amap->clusters_per_au));
+
+
+ // BUG_ON(au->free_clusters < 0);
+ BUG_ON(au && (au->free_clusters > amap->clusters_per_au));
+
+ return au;
+}
+
+
+/* "Pop" best fit AU
+ *
+ * returns NULL if there is no AU w/ enough free space.
+ * The returned AU will not be in the list anymore.
+ */
+AU_INFO_T *amap_pop_cold_au_bestfit(AMAP_T *amap, uint16_t free_clusters)
+{
+ /* Naive implementation */
+ AU_INFO_T *au;
+
+ au = amap_find_cold_au_bestfit(amap, free_clusters);
+ if (au)
+ amap_remove_cold_au(amap, au);
+
+ return au;
+}
+
+
+
+/* Pop the AU with the largest free space
+ *
+ * search from 'start_fclu' to 0
+ * (target freecluster : -1 for each step)
+ * start_fclu = 0 means to search from the max. value
+ */
+AU_INFO_T *amap_pop_cold_au_largest(AMAP_T *amap, uint16_t start_fclu)
+{
+ AU_INFO_T *au = NULL;
+ FCLU_NODE_T *fclu_iter;
+
+ if (!start_fclu)
+ start_fclu = amap->clusters_per_au;
+ if (start_fclu > amap->clusters_per_au)
+ start_fclu = amap->clusters_per_au;
+
+ /* Use hint (search start point) */
+ if (amap->fclu_hint < start_fclu)
+ fclu_iter = NODE(amap->fclu_hint, amap);
+ else
+ fclu_iter = NODE(start_fclu, amap);
+
+ /* Naive Hash management */
+ do {
+ if (!list_empty(&fclu_iter->head)) {
+ struct list_head *first = fclu_iter->head.next;
+
+ au = list_entry(first, AU_INFO_T, head);
+ // BUG_ON((au < amap->entries) || ((amap->entries + amap->n_au) <= au));
+
+ amap_list_del(first);
+
+ // (Hint) Possible maximum value of free clusters (among cold)
+ /* if it wasn't the whole search, don't update fclu_hint */
+ if (start_fclu == amap->clusters_per_au)
+ amap->fclu_hint = au->free_clusters;
+
+ break;
+ }
+
+ fclu_iter--;
+ } while (amap->fclu_nodes <= fclu_iter);
+
+ return au;
+}
+
+
+
+/*
+ * ===============================================
+ * Allocation Map related functions
+ * ===============================================
+ */
+
+/* Create AMAP related data structure (mount time) */
+int amap_create(struct super_block *sb, u32 pack_ratio, u32 sect_per_au, u32 hidden_sect)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ AMAP_T *amap;
+ int total_used_clusters;
+ int n_au_table = 0;
+ int i, i_clu, i_au;
+ int i_au_root = -1, i_au_hot_from = INT_MAX;
+ u32 misaligned_sect = hidden_sect;
+ u64 tmp;
+
+ BUG_ON(!fsi->bd_opened);
+
+ if (fsi->amap)
+ return -EEXIST;
+
+ /* Check conditions */
+ if (fsi->vol_type != FAT32) {
+ sdfat_msg(sb, KERN_ERR, "smart allocation is only available "
+ "with fat32-fs");
+ return -ENOTSUPP;
+ }
+
+ if (fsi->num_sectors < AMAP_MIN_SUPPORT_SECTORS) {
+ sdfat_msg(sb, KERN_ERR, "smart allocation is only available "
+ "with sectors above %d", AMAP_MIN_SUPPORT_SECTORS);
+ return -ENOTSUPP;
+ }
+
+ /* AU size must be a multiple of clu_size */
+ if ((sect_per_au <= 0) || (sect_per_au & (fsi->sect_per_clus - 1))) {
+ sdfat_msg(sb, KERN_ERR,
+ "invalid AU size (sect_per_au : %u, "
+ "sect_per_clus : %u) "
+ "please re-format for performance.",
+ sect_per_au, fsi->sect_per_clus);
+ return -EINVAL;
+ }
+
+ /* the start sector of this partition must be a multiple of clu_size */
+ if (misaligned_sect & (fsi->sect_per_clus - 1)) {
+ sdfat_msg(sb, KERN_ERR,
+ "misaligned part (start sect : %u, "
+ "sect_per_clus : %u) "
+ "please re-format for performance.",
+ misaligned_sect, fsi->sect_per_clus);
+ return -EINVAL;
+ }
+
+ /* data start sector must be a multiple of clu_size */
+ if (fsi->data_start_sector & (fsi->sect_per_clus - 1)) {
+ sdfat_msg(sb, KERN_ERR,
+ "misaligned data area (start sect : %llu, "
+ "sect_per_clus : %u) "
+ "please re-format for performance.",
+ fsi->data_start_sector, fsi->sect_per_clus);
+ return -EINVAL;
+ }
+
+ misaligned_sect &= (sect_per_au - 1);
+
+ /* Allocate data structrues */
+ amap = kzalloc(sizeof(AMAP_T), GFP_NOIO);
+ if (!amap)
+ return -ENOMEM;
+
+ amap->sb = sb;
+
+ tmp = fsi->num_sectors + misaligned_sect + sect_per_au - 1;
+ do_div(tmp, sect_per_au);
+ amap->n_au = tmp;
+ amap->n_clean_au = 0;
+ amap->n_full_au = 0;
+
+ /* Reflect block-partition align first,
+ * then partition-data_start align
+ */
+ amap->clu_align_bias = (misaligned_sect / fsi->sect_per_clus);
+ amap->clu_align_bias += (fsi->data_start_sector >> fsi->sect_per_clus_bits) - CLUS_BASE;
+ amap->clusters_per_au = sect_per_au / fsi->sect_per_clus;
+
+ /* That is,
+ * the size of cluster is at least 4KB if the size of AU is 4MB
+ */
+ if (amap->clusters_per_au > MAX_CLU_PER_AU) {
+ sdfat_log_msg(sb, KERN_INFO,
+ "too many clusters per AU (clus/au:%d > %d).",
+ amap->clusters_per_au,
+ MAX_CLU_PER_AU);
+ }
+
+ /* is it needed? why here? */
+ // set_sb_dirty(sb);
+
+ spin_lock_init(&amap->amap_lock);
+
+ amap->option.packing_ratio = pack_ratio;
+ amap->option.au_size = sect_per_au;
+ amap->option.au_align_factor = hidden_sect;
+
+
+ /* Allocate AU info table */
+ n_au_table = (amap->n_au + N_AU_PER_TABLE - 1) / N_AU_PER_TABLE;
+ amap->au_table = kmalloc(sizeof(AU_INFO_T *) * n_au_table, GFP_NOIO);
+ if (!amap->au_table) {
+ sdfat_msg(sb, KERN_ERR,
+ "failed to alloc amap->au_table\n");
+ kfree(amap);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < n_au_table; i++)
+ amap->au_table[i] = (AU_INFO_T *)get_zeroed_page(GFP_NOIO);
+
+ /* Allocate buckets indexed by # of free clusters */
+ amap->fclu_order = get_order(sizeof(FCLU_NODE_T) * amap->clusters_per_au);
+
+ // XXX: amap->clusters_per_au limitation is 512 (w/ 8 byte list_head)
+ sdfat_log_msg(sb, KERN_INFO, "page orders for AU nodes : %d "
+ "(clus_per_au : %d, node_size : %lu)",
+ amap->fclu_order,
+ amap->clusters_per_au,
+ (unsigned long)sizeof(FCLU_NODE_T));
+
+ if (!amap->fclu_order)
+ amap->fclu_nodes = (FCLU_NODE_T *)get_zeroed_page(GFP_NOIO);
+ else
+ amap->fclu_nodes = vzalloc(PAGE_SIZE << amap->fclu_order);
+
+ amap->fclu_hint = amap->clusters_per_au;
+
+ /* Hot AU list, ignored AU list */
+ amap->slist_hot.next = NULL;
+ amap->slist_hot.head = &amap->slist_hot;
+ amap->total_fclu_hot = 0;
+
+ amap->slist_ignored.next = NULL;
+ amap->slist_ignored.head = &amap->slist_ignored;
+
+ /* Strategy related vars. */
+ amap->cur_cold.au = NULL;
+ amap->cur_hot.au = NULL;
+ amap->n_need_packing = 0;
+
+
+ /* Build AMAP info */
+ total_used_clusters = 0; // Count # of used clusters
+
+ i_au_root = i_AU_of_CLU(amap, fsi->root_dir);
+ i_au_hot_from = amap->n_au - (SMART_ALLOC_N_HOT_AU - 1);
+
+ for (i = 0; i < amap->clusters_per_au; i++)
+ INIT_LIST_HEAD(&amap->fclu_nodes[i].head);
+
+ /*
+ * Thanks to kzalloc()
+ * amap->entries[i_au].free_clusters = 0;
+ * amap->entries[i_au].head.prev = NULL;
+ * amap->entries[i_au].head.next = NULL;
+ */
+
+ /* Parse FAT table */
+ for (i_clu = CLUS_BASE; i_clu < fsi->num_clusters; i_clu++) {
+ u32 clu_data;
+ AU_INFO_T *au;
+
+ if (fat_ent_get(sb, i_clu, &clu_data)) {
+ sdfat_msg(sb, KERN_ERR,
+ "failed to read fat entry(%u)\n", i_clu);
+ goto free_and_eio;
+ }
+
+ if (IS_CLUS_FREE(clu_data)) {
+ au = GET_AU(amap, i_AU_of_CLU(amap, i_clu));
+ au->free_clusters++;
+ } else
+ total_used_clusters++;
+ }
+
+ /* Build AU list */
+ for (i_au = 0; i_au < amap->n_au; i_au++) {
+ AU_INFO_T *au = GET_AU(amap, i_au);
+
+ au->idx = i_au;
+ BUG_ON(au->free_clusters > amap->clusters_per_au);
+
+ if (au->free_clusters == amap->clusters_per_au)
+ amap->n_clean_au++;
+ else if (au->free_clusters == 0)
+ amap->n_full_au++;
+
+ /* If hot, insert to the hot list */
+ if (i_au >= i_au_hot_from) {
+ amap_add_hot_au(amap, au);
+ amap->total_fclu_hot += au->free_clusters;
+ } else if (i_au != i_au_root || SMART_ALLOC_N_HOT_AU == 0) {
+ /* Otherwise, insert to the free cluster hash */
+ amap_add_cold_au(amap, au);
+ }
+ }
+
+ /* Hot list -> (root) -> (last) -> (last - 1) -> ... */
+ if (i_au_root >= 0 && SMART_ALLOC_N_HOT_AU > 0) {
+ amap_add_hot_au(amap, GET_AU(amap, i_au_root));
+ amap->total_fclu_hot += GET_AU(amap, i_au_root)->free_clusters;
+ }
+
+ fsi->amap = amap;
+ fsi->used_clusters = total_used_clusters;
+
+ sdfat_msg(sb, KERN_INFO,
+ "AMAP: Smart allocation enabled (opt : %u / %u / %u)",
+ amap->option.au_size, amap->option.au_align_factor,
+ amap->option.packing_ratio);
+
+ /* Debug purpose - check */
+ //{
+ //u32 used_clusters;
+ //fat_count_used_clusters(sb, &used_clusters)
+ //ASSERT(used_clusters == total_used_clusters);
+ //}
+
+ return 0;
+
+
+free_and_eio:
+ if (amap) {
+ if (amap->au_table) {
+ for (i = 0; i < n_au_table; i++)
+ free_page((unsigned long)amap->au_table[i]);
+ kfree(amap->au_table);
+ }
+ if (amap->fclu_nodes) {
+ if (!amap->fclu_order)
+ free_page((unsigned long)amap->fclu_nodes);
+ else
+ vfree(amap->fclu_nodes);
+ }
+ kfree(amap);
+ }
+ return -EIO;
+}
+
+
+/* Free AMAP related structure */
+void amap_destroy(struct super_block *sb)
+{
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ int n_au_table;
+
+ if (!amap)
+ return;
+
+ DMSG("%s\n", __func__);
+
+ n_au_table = (amap->n_au + N_AU_PER_TABLE - 1) / N_AU_PER_TABLE;
+
+ if (amap->au_table) {
+ int i;
+
+ for (i = 0; i < n_au_table; i++)
+ free_page((unsigned long)amap->au_table[i]);
+
+ kfree(amap->au_table);
+ }
+ if (!amap->fclu_order)
+ free_page((unsigned long)amap->fclu_nodes);
+ else
+ vfree(amap->fclu_nodes);
+ kfree(amap);
+ SDFAT_SB(sb)->fsi.amap = NULL;
+}
+
+
+/*
+ * Check status of FS
+ * and change destination if needed to disable AU-aligned alloc.
+ * (from ALLOC_COLD_ALIGNED to ALLOC_COLD_SEQ)
+ */
+static inline int amap_update_dest(AMAP_T *amap, int ori_dest)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(amap->sb)->fsi);
+ int n_partial_au, n_partial_freeclus;
+
+ if (ori_dest != ALLOC_COLD_ALIGNED)
+ return ori_dest;
+
+ /* # of partial AUs and # of clusters in those AUs */
+ n_partial_au = amap->n_au - amap->n_clean_au - amap->n_full_au;
+ n_partial_freeclus = fsi->num_clusters - fsi->used_clusters -
+ amap->clusters_per_au * amap->n_clean_au;
+
+ /* Status of AUs : Full / Partial / Clean
+ * If there are many partial (and badly fragmented) AUs,
+ * the throughput will decrease extremly.
+ *
+ * The follow code will treat those worst cases.
+ */
+
+ /* XXX: AMAP heuristics */
+ if ((amap->n_clean_au * 50 <= amap->n_au) &&
+ (n_partial_freeclus*2) < (n_partial_au*amap->clusters_per_au)) {
+ /* If clean AUs are fewer than 2% of n_au (80 AUs per 16GB)
+ * and fragment ratio is more than 2 (AVG free_clusters=half AU)
+ *
+ * disable clean-first allocation
+ * enable VFAT-like sequential allocation
+ */
+ return ALLOC_COLD_SEQ;
+ }
+
+ return ori_dest;
+}
+
+
+#define PACKING_SOFTLIMIT (amap->option.packing_ratio)
+#define PACKING_HARDLIMIT (amap->option.packing_ratio * 4)
+/*
+ * Pick a packing AU if needed.
+ * Otherwise just return NULL
+ *
+ * This function includes some heuristics.
+ */
+static inline AU_INFO_T *amap_get_packing_au(AMAP_T *amap, int dest, int num_to_wb, int *clu_to_skip)
+{
+ AU_INFO_T *au = NULL;
+
+ if (dest == ALLOC_COLD_PACKING) {
+ /* ALLOC_COLD_PACKING:
+ * Packing-first mode for defrag.
+ * Optimized to save clean AU
+ *
+ * 1) best-fit AU
+ * 2) Smallest AU (w/ minimum free clusters)
+ */
+ if (num_to_wb >= amap->clusters_per_au)
+ num_to_wb = num_to_wb % amap->clusters_per_au;
+
+ /* 이거 주석처리하면, AU size 딱 맞을때는 clean, 나머지는 작은거부터 */
+ if (num_to_wb == 0)
+ num_to_wb = 1; // Don't use clean AUs
+
+ au = amap_find_cold_au_bestfit(amap, num_to_wb);
+ if (au && au->free_clusters == amap->clusters_per_au && num_to_wb > 1) {
+ /* if au is clean then get a new partial one */
+ au = amap_find_cold_au_bestfit(amap, 1);
+ }
+
+ if (au) {
+ amap->n_need_packing = 0;
+ amap_remove_cold_au(amap, au);
+ return au;
+ }
+ }
+
+
+ /* Heuristic packing:
+ * This will improve QoS greatly.
+ *
+ * Count # of AU_ALIGNED allocation.
+ * If the number exceeds the specific threshold,
+ * allocate on a partial AU or generate random I/O.
+ */
+ if ((PACKING_SOFTLIMIT > 0) &&
+ (amap->n_need_packing >= PACKING_SOFTLIMIT) &&
+ (num_to_wb < (int)amap->clusters_per_au)) {
+ /* Best-fit packing:
+ * If num_to_wb (expected number to be allocated) is smaller
+ * than AU_SIZE, find a best-fit AU.
+ */
+
+ /* Back margin (heuristics) */
+ if (num_to_wb < amap->clusters_per_au / 4)
+ num_to_wb = amap->clusters_per_au / 4;
+
+ au = amap_find_cold_au_bestfit(amap, num_to_wb);
+ if (au != NULL) {
+ amap_remove_cold_au(amap, au);
+
+ MMSG("AMAP: packing (cnt: %d) / softlimit, "
+ "best-fit (num_to_wb: %d))\n",
+ amap->n_need_packing, num_to_wb);
+
+ if (au->free_clusters > num_to_wb) { // Best-fit search: if 문 무조건 hit
+ *clu_to_skip = au->free_clusters - num_to_wb;
+ /* otherwise don't skip */
+ }
+ amap->n_need_packing = 0;
+ return au;
+ }
+ }
+
+ if ((PACKING_HARDLIMIT) && amap->n_need_packing >= PACKING_HARDLIMIT) {
+ /* Compulsory SLC flushing:
+ * If there was no chance to do best-fit packing
+ * and the # of AU-aligned allocation exceeds HARD threshold,
+ * then pick a clean AU and generate a compulsory random I/O.
+ */
+ au = amap_pop_cold_au_largest(amap, amap->clusters_per_au);
+ if (au) {
+ MMSG("AMAP: packing (cnt: %d) / hard-limit, largest)\n",
+ amap->n_need_packing);
+
+ if (au->free_clusters >= 96) {
+ *clu_to_skip = au->free_clusters / 2;
+ MMSG("AMAP: cluster idx re-position\n");
+ }
+ amap->n_need_packing = 0;
+ return au;
+ }
+ }
+
+ /* Update # of clean AU allocation */
+ amap->n_need_packing++;
+ return NULL;
+}
+
+
+/* Pick a target AU:
+ * This function should be called
+ * only if there are one or more free clusters in the bdev.
+ */
+TARGET_AU_T *amap_get_target_au(AMAP_T *amap, int dest, int num_to_wb)
+{
+ int loop_count = 0;
+
+retry:
+ if (++loop_count >= 3) {
+ /* No space available (or AMAP consistency error)
+ * This could happen because of the ignored AUs but not likely
+ * (because the defrag daemon will not work if there is no enough space)
+ */
+ BUG_ON(amap->slist_ignored.next == NULL);
+ return NULL;
+ }
+
+ /* Hot clusters (DIR) */
+ if (dest == ALLOC_HOT) {
+
+ /* Working hot AU exist? */
+ if (amap->cur_hot.au == NULL || amap->cur_hot.au->free_clusters == 0) {
+ AU_INFO_T *au;
+
+ if (amap->total_fclu_hot == 0) {
+ /* No more hot AU avaialbe */
+ dest = ALLOC_COLD;
+
+ goto retry;
+ }
+
+ au = amap_find_hot_au_partial(amap);
+
+ BUG_ON(au == NULL);
+ BUG_ON(au->free_clusters <= 0);
+
+ amap->cur_hot.au = au;
+ amap->cur_hot.idx = 0;
+ amap->cur_hot.clu_to_skip = 0;
+ }
+
+ /* Now allocate on a hot AU */
+ return &amap->cur_hot;
+ }
+
+ /* Cold allocation:
+ * If amap->cur_cold.au has one or more free cluster(s),
+ * then just return amap->cur_cold
+ */
+ if ((!amap->cur_cold.au)
+ || (amap->cur_cold.idx == amap->clusters_per_au)
+ || (amap->cur_cold.au->free_clusters == 0)) {
+
+ AU_INFO_T *au = NULL;
+ const AU_INFO_T *old_au = amap->cur_cold.au;
+ int n_clu_to_skip = 0;
+
+ if (old_au) {
+ ASSERT(!IS_AU_WORKING(old_au, amap));
+ /* must be NOT WORKING AU.
+ * (only for information gathering)
+ */
+ }
+
+ /* Next target AU is needed:
+ * There are 3 possible ALLOC options for cold AU
+ *
+ * ALLOC_COLD_ALIGNED: Clean AU first, but heuristic packing is ON
+ * ALLOC_COLD_PACKING: Packing AU first (usually for defrag)
+ * ALLOC_COLD_SEQ : Sequential AU allocation (VFAT-like)
+ */
+
+ /* Experimental: Modify allocation destination if needed (ALIGNED => SEQ) */
+ // dest = amap_update_dest(amap, dest);
+
+ if ((dest == ALLOC_COLD_SEQ) && old_au) {
+ int i_au = old_au->idx + 1;
+
+ while (i_au != old_au->idx) {
+ au = GET_AU(amap, i_au);
+
+ if ((au->free_clusters > 0) &&
+ !IS_AU_HOT(au, amap) &&
+ !IS_AU_IGNORED(au, amap)) {
+ MMSG("AMAP: new cold AU(%d) with %d "
+ "clusters (seq)\n",
+ au->idx, au->free_clusters);
+
+ amap_remove_cold_au(amap, au);
+ goto ret_new_cold;
+ }
+ i_au++;
+ if (i_au >= amap->n_au)
+ i_au = 0;
+ }
+
+ // no cold AUs are available => Hot allocation
+ dest = ALLOC_HOT;
+ goto retry;
+ }
+
+
+ /*
+ * Check if packing is needed
+ * (ALLOC_COLD_PACKING is treated by this function)
+ */
+ au = amap_get_packing_au(amap, dest, num_to_wb, &n_clu_to_skip);
+ if (au) {
+ MMSG("AMAP: new cold AU(%d) with %d clusters "
+ "(packing)\n", au->idx, au->free_clusters);
+ goto ret_new_cold;
+ }
+
+ /* ALLOC_COLD_ALIGNED */
+ /* Check if the adjacent AU is clean */
+ if (old_au && ((old_au->idx + 1) < amap->n_au)) {
+ au = GET_AU(amap, old_au->idx + 1);
+ if ((au->free_clusters == amap->clusters_per_au) &&
+ !IS_AU_HOT(au, amap) &&
+ !IS_AU_IGNORED(au, amap)) {
+ MMSG("AMAP: new cold AU(%d) with %d clusters "
+ "(adjacent)\n", au->idx, au->free_clusters);
+ amap_remove_cold_au(amap, au);
+ goto ret_new_cold;
+ }
+ }
+
+ /* Clean or largest AU */
+ au = amap_pop_cold_au_largest(amap, 0);
+ if (!au) {
+ //ASSERT(amap->total_fclu_hot == (fsi->num_clusters - fsi->used_clusters - 2));
+ dest = ALLOC_HOT;
+ goto retry;
+ }
+
+ MMSG("AMAP: New cold AU (%d) with %d clusters\n",
+ au->idx, au->free_clusters);
+
+ret_new_cold:
+ SET_AU_WORKING(au);
+
+ amap->cur_cold.au = au;
+ amap->cur_cold.idx = 0;
+ amap->cur_cold.clu_to_skip = n_clu_to_skip;
+ }
+
+ return &amap->cur_cold;
+}
+
+/* Put and update target AU */
+void amap_put_target_au(AMAP_T *amap, TARGET_AU_T *cur, unsigned int num_allocated)
+{
+ /* Update AMAP info vars. */
+ if (num_allocated > 0 &&
+ (cur->au->free_clusters + num_allocated) == amap->clusters_per_au) {
+ /* if the target AU was a clean AU before this allocation ... */
+ amap->n_clean_au--;
+ }
+ if (num_allocated > 0 &&
+ cur->au->free_clusters == 0)
+ amap->n_full_au++;
+
+ if (IS_AU_HOT(cur->au, amap)) {
+ /* Hot AU */
+ MMSG("AMAP: hot allocation at AU %d\n", cur->au->idx);
+ amap->total_fclu_hot -= num_allocated;
+
+ /* Intra-AU round-robin */
+ if (cur->idx >= amap->clusters_per_au)
+ cur->idx = 0;
+
+ /* No more space available */
+ if (cur->au->free_clusters == 0)
+ cur->au = NULL;
+
+ } else {
+ /* non-hot AU */
+ ASSERT(IS_AU_WORKING(cur->au, amap));
+
+ if (cur->idx >= amap->clusters_per_au || cur->au->free_clusters == 0) {
+ /* It should be inserted back to AU MAP */
+ cur->au->shead.head = NULL; // SET_AU_NOT_WORKING
+ amap_add_cold_au(amap, cur->au);
+
+ // cur->au = NULL; // This value will be used for the next AU selection
+ cur->idx = amap->clusters_per_au; // AU closing
+ }
+ }
+
+}
+
+
+/* Reposition target->idx for packing (Heuristics):
+ * Skip (num_to_skip) free clusters in (cur->au)
+ */
+static inline int amap_skip_cluster(struct super_block *sb, TARGET_AU_T *cur, int num_to_skip)
+{
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ u32 clu, read_clu;
+ MMSG_VAR(int num_to_skip_orig = num_to_skip);
+
+ if (num_to_skip >= cur->au->free_clusters) {
+ EMSG("AMAP(%s): skip mis-use. amap_566\n", __func__);
+ return -EIO;
+ }
+
+ clu = CLU_of_i_AU(amap, cur->au->idx, cur->idx);
+ while (num_to_skip > 0) {
+ if (clu >= CLUS_BASE) {
+ /* Cf.
+ * If AMAP's integrity is okay,
+ * we don't need to check if (clu < fsi->num_clusters)
+ */
+
+ if (fat_ent_get(sb, clu, &read_clu))
+ return -EIO;
+
+ if (IS_CLUS_FREE(read_clu))
+ num_to_skip--;
+ }
+
+ // Move clu->idx
+ clu++;
+ (cur->idx)++;
+
+ if (cur->idx >= amap->clusters_per_au) {
+ /* End of AU (Not supposed) */
+ EMSG("AMAP: Skip - End of AU?! (amap_596)\n");
+ cur->idx = 0;
+ return -EIO;
+ }
+ }
+
+ MMSG("AMAP: Skip_clusters (%d skipped => %d, among %d free clus)\n",
+ num_to_skip_orig, cur->idx, cur->au->free_clusters);
+
+ return 0;
+}
+
+
+/* AMAP-based allocation function for FAT32 */
+s32 amap_fat_alloc_cluster(struct super_block *sb, u32 num_alloc, CHAIN_T *p_chain, s32 dest)
+{
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ TARGET_AU_T *cur = NULL;
+ AU_INFO_T *target_au = NULL; /* Allocation target AU */
+ s32 ret = -ENOSPC;
+ u32 last_clu = CLUS_EOF, read_clu;
+ u32 new_clu, total_cnt;
+ u32 num_allocated = 0, num_allocated_each = 0;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ BUG_ON(!amap);
+ BUG_ON(IS_CLUS_EOF(fsi->used_clusters));
+
+ total_cnt = fsi->num_clusters - CLUS_BASE;
+
+ if (unlikely(total_cnt < fsi->used_clusters)) {
+ sdfat_fs_error_ratelimit(sb,
+ "AMAP(%s): invalid used clusters(t:%u,u:%u)\n",
+ __func__, total_cnt, fsi->used_clusters);
+ return -EIO;
+ }
+
+ if (num_alloc > total_cnt - fsi->used_clusters)
+ return -ENOSPC;
+
+ p_chain->dir = CLUS_EOF;
+
+ set_sb_dirty(sb);
+
+ // spin_lock(&amap->amap_lock);
+
+retry_alloc:
+ /* Allocation strategy implemented */
+ cur = amap_get_target_au(amap, dest, fsi->reserved_clusters);
+ if (unlikely(!cur)) {
+ // There is no available AU (only ignored-AU are left)
+ sdfat_msg(sb, KERN_ERR, "AMAP Allocator: no avaialble AU.");
+ goto error;
+ }
+
+ /* If there are clusters to skip */
+ if (cur->clu_to_skip > 0) {
+ if (amap_skip_cluster(sb, &amap->cur_cold, cur->clu_to_skip)) {
+ ret = -EIO;
+ goto error;
+ }
+ cur->clu_to_skip = 0;
+ }
+
+ target_au = cur->au;
+
+ /*
+ * cur->au : target AU info pointer
+ * cur->idx : the intra-cluster idx in the AU to start from
+ */
+ BUG_ON(!cur->au);
+ BUG_ON(!cur->au->free_clusters);
+ BUG_ON(cur->idx >= amap->clusters_per_au);
+
+ num_allocated_each = 0;
+ new_clu = CLU_of_i_AU(amap, target_au->idx, cur->idx);
+
+ do {
+ /* Allocate at the target AU */
+ if ((new_clu >= CLUS_BASE) && (new_clu < fsi->num_clusters)) {
+ if (fat_ent_get(sb, new_clu, &read_clu)) {
+ // spin_unlock(&amap->amap_lock);
+ ret = -EIO;
+ goto error;
+ }
+
+ if (IS_CLUS_FREE(read_clu)) {
+ BUG_ON(GET_AU(amap, i_AU_of_CLU(amap, new_clu)) != target_au);
+
+ /* Free cluster found */
+ if (fat_ent_set(sb, new_clu, CLUS_EOF)) {
+ ret = -EIO;
+ goto error;
+ }
+
+ num_allocated_each++;
+
+ if (IS_CLUS_EOF(p_chain->dir)) {
+ p_chain->dir = new_clu;
+ } else {
+ if (fat_ent_set(sb, last_clu, new_clu)) {
+ ret = -EIO;
+ goto error;
+ }
+ }
+ last_clu = new_clu;
+
+ /* Update au info */
+ target_au->free_clusters--;
+ }
+
+ }
+
+ new_clu++;
+ (cur->idx)++;
+
+ /* End of the AU */
+ if ((cur->idx >= amap->clusters_per_au) || !(target_au->free_clusters))
+ break;
+ } while (num_allocated_each < num_alloc);
+
+ /* Update strategy info */
+ amap_put_target_au(amap, cur, num_allocated_each);
+
+
+ num_allocated += num_allocated_each;
+ fsi->used_clusters += num_allocated_each;
+ num_alloc -= num_allocated_each;
+
+
+ if (num_alloc > 0)
+ goto retry_alloc;
+
+ // spin_unlock(&amap->amap_lock);
+ return 0;
+error:
+ if (num_allocated)
+ fsi->fs_func->free_cluster(sb, p_chain, 0);
+ return ret;
+}
+
+
+/* Free cluster for FAT32 (not implemented yet) */
+s32 amap_free_cluster(struct super_block *sb, CHAIN_T *p_chain, s32 do_relse)
+{
+ return -ENOTSUPP;
+}
+
+
+/*
+ * This is called by fat_free_cluster()
+ * to update AMAP info.
+ */
+s32 amap_release_cluster(struct super_block *sb, u32 clu)
+{
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ AU_INFO_T *au;
+ int i_au;
+
+ // spin_lock(&amap->amap_lock);
+
+ /* Update AU info */
+ i_au = i_AU_of_CLU(amap, clu);
+ BUG_ON(i_au >= amap->n_au);
+ au = GET_AU(amap, i_au);
+ if (au->free_clusters >= amap->clusters_per_au) {
+ sdfat_fs_error(sb, "%s, au->free_clusters(%hd) is "
+ "greater than or equal to amap->clusters_per_au(%hd)",
+ __func__, au->free_clusters, amap->clusters_per_au);
+ return -EIO;
+ }
+
+ if (IS_AU_HOT(au, amap)) {
+ MMSG("AMAP: Hot cluster freed\n");
+ au->free_clusters++;
+ amap->total_fclu_hot++;
+ } else if (!IS_AU_WORKING(au, amap) && !IS_AU_IGNORED(au, amap)) {
+ /* Ordinary AU - update AU tree */
+ // Can be optimized by implementing amap_update_au
+ amap_remove_cold_au(amap, au);
+ au->free_clusters++;
+ amap_add_cold_au(amap, au);
+ } else
+ au->free_clusters++;
+
+
+ /* Update AMAP info */
+ if (au->free_clusters == amap->clusters_per_au)
+ amap->n_clean_au++;
+ if (au->free_clusters == 1)
+ amap->n_full_au--;
+
+ // spin_unlock(&amap->amap_lock);
+ return 0;
+}
+
+
+/*
+ * Check if the cluster is in a working AU
+ * The caller should hold sb lock.
+ * This func. should be used only if smart allocation is on
+ */
+s32 amap_check_working(struct super_block *sb, u32 clu)
+{
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ AU_INFO_T *au;
+
+ BUG_ON(!amap);
+ au = GET_AU(amap, i_AU_of_CLU(amap, clu));
+ return IS_AU_WORKING(au, amap);
+}
+
+
+/*
+ * Return the # of free clusters in that AU
+ */
+s32 amap_get_freeclus(struct super_block *sb, u32 clu)
+{
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ AU_INFO_T *au;
+
+ BUG_ON(!amap);
+ au = GET_AU(amap, i_AU_of_CLU(amap, clu));
+ return (s32)au->free_clusters;
+}
+
+
+/*
+ * Add the AU containing 'clu' to the ignored AU list.
+ * The AU will not be used by the allocator.
+ *
+ * XXX: Ignored counter needed
+ */
+s32 amap_mark_ignore(struct super_block *sb, u32 clu)
+{
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ AU_INFO_T *au;
+
+ BUG_ON(!amap);
+ au = GET_AU(amap, i_AU_of_CLU(amap, clu));
+
+ if (IS_AU_HOT(au, amap)) {
+ /* Doesn't work with hot AUs */
+ return -EPERM;
+ } else if (IS_AU_WORKING(au, amap)) {
+ return -EBUSY;
+ }
+
+ //BUG_ON(IS_AU_IGNORED(au, amap) && (GET_IGN_CNT(au) == 0));
+ if (IS_AU_IGNORED(au, amap))
+ return 0;
+
+ amap_remove_cold_au(amap, au);
+ amap_insert_to_list(au, &amap->slist_ignored);
+
+ BUG_ON(!IS_AU_IGNORED(au, amap));
+
+ //INC_IGN_CNT(au);
+ MMSG("AMAP: Mark ignored AU (%d)\n", au->idx);
+ return 0;
+}
+
+
+/*
+ * This function could be used only on IGNORED AUs.
+ * The caller should care whether it's ignored or not before using this func.
+ */
+s32 amap_unmark_ignore(struct super_block *sb, u32 clu)
+{
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ AU_INFO_T *au;
+
+ BUG_ON(!amap);
+
+ au = GET_AU(amap, i_AU_of_CLU(amap, clu));
+
+ BUG_ON(!IS_AU_IGNORED(au, amap));
+ // BUG_ON(GET_IGN_CNT(au) == 0);
+
+ amap_remove_from_list(au, &amap->slist_ignored);
+ amap_add_cold_au(amap, au);
+
+ BUG_ON(IS_AU_IGNORED(au, amap));
+
+ //DEC_IGN_CNT(au);
+
+ MMSG("AMAP: Unmark ignored AU (%d)\n", au->idx);
+
+ return 0;
+}
+
+/*
+ * Unmark all ignored AU
+ * This will return # of unmarked AUs
+ */
+s32 amap_unmark_ignore_all(struct super_block *sb)
+{
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ struct slist_head *entry;
+ AU_INFO_T *au;
+ int n = 0;
+
+ BUG_ON(!amap);
+ entry = amap->slist_ignored.next;
+ while (entry) {
+ au = list_entry(entry, AU_INFO_T, shead);
+
+ BUG_ON(au != GET_AU(amap, au->idx));
+ BUG_ON(!IS_AU_IGNORED(au, amap));
+
+ //CLEAR_IGN_CNT(au);
+ amap_remove_from_list(au, &amap->slist_ignored);
+ amap_add_cold_au(amap, au);
+
+ MMSG("AMAP: Unmark ignored AU (%d)\n", au->idx);
+ n++;
+
+ entry = amap->slist_ignored.next;
+ }
+
+ BUG_ON(amap->slist_ignored.next != NULL);
+ MMSG("AMAP: unmark_ignore_all, total %d AUs\n", n);
+
+ return n;
+}
+
+/**
+ * @fn amap_get_au_stat
+ * @brief report AUs status depending on mode
+ * @return positive on success, 0 otherwise
+ * @param sbi super block info
+ * @param mode TOTAL, CLEAN and FULL
+ */
+u32 amap_get_au_stat(struct super_block *sb, s32 mode)
+{
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+
+ if (!amap)
+ return 0;
+
+ if (mode == VOL_AU_STAT_TOTAL)
+ return amap->n_au;
+ else if (mode == VOL_AU_STAT_CLEAN)
+ return amap->n_clean_au;
+ else if (mode == VOL_AU_STAT_FULL)
+ return amap->n_full_au;
+
+ return 0;
+}
+
diff --git a/fs/sdfat/amap_smart.h b/fs/sdfat/amap_smart.h
new file mode 100644
index 000000000000..caee6f6b3681
--- /dev/null
+++ b/fs/sdfat/amap_smart.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SDFAT_AMAP_H
+#define _SDFAT_AMAP_H
+
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+
+/* AMAP Configuration Variable */
+#define SMART_ALLOC_N_HOT_AU (5)
+
+/* Allocating Destination (for smart allocator):
+ * moved to sdfat.h
+ */
+/*
+ * #define ALLOC_COLD_ALIGNED (1)
+ * #define ALLOC_COLD_PACKING (2)
+ * #define ALLOC_COLD_SEQ (4)
+ */
+
+/* Minimum sectors for support AMAP create */
+#define AMAP_MIN_SUPPORT_SECTORS (1048576)
+
+#define amap_add_hot_au(amap, au) amap_insert_to_list(au, &amap->slist_hot)
+
+/* singly linked list */
+struct slist_head {
+ struct slist_head *next;
+ struct slist_head *head;
+};
+
+/* AU entry type */
+typedef struct __AU_INFO_T {
+ uint16_t idx; /* the index of the AU (0, 1, 2, ... ) */
+ uint16_t free_clusters; /* # of available cluster */
+ union {
+ struct list_head head;
+ struct slist_head shead;/* singly linked list head for hot list */
+ };
+} AU_INFO_T;
+
+
+/* Allocation Target AU */
+typedef struct __TARGET_AU_T {
+ AU_INFO_T *au; /* Working AU */
+ uint16_t idx; /* Intra-AU cluster index */
+ uint16_t clu_to_skip; /* Clusters to skip */
+} TARGET_AU_T;
+
+
+/* AMAP free-clusters-based node */
+typedef struct {
+ struct list_head head; /* the list of AUs */
+} FCLU_NODE_T;
+
+
+/* AMAP options */
+typedef struct {
+ unsigned int packing_ratio; /* Tunable packing ratio */
+ unsigned int au_size; /* AU size in sectors */
+ unsigned int au_align_factor; /* Hidden sectors % au_size */
+} AMAP_OPT_T;
+
+typedef struct __AMAP_T {
+ spinlock_t amap_lock; /* obsolete */
+ struct super_block *sb;
+
+ int n_au;
+ int n_clean_au, n_full_au;
+ int clu_align_bias;
+ uint16_t clusters_per_au;
+ AU_INFO_T **au_table; /* An array of AU_INFO entries */
+ AMAP_OPT_T option;
+
+ /* Size-based AU management pool (cold) */
+ FCLU_NODE_T *fclu_nodes; /* An array of listheads */
+ int fclu_order; /* Page order that fclu_nodes needs */
+ int fclu_hint; /* maximum # of free clusters in an AU */
+
+ /* Hot AU list */
+ unsigned int total_fclu_hot; /* Free clusters in hot list */
+ struct slist_head slist_hot; /* Hot AU list */
+
+ /* Ignored AU list */
+ struct slist_head slist_ignored;
+
+ /* Allocator variables (keep 2 AUs at maximum) */
+ TARGET_AU_T cur_cold;
+ TARGET_AU_T cur_hot;
+ int n_need_packing;
+} AMAP_T;
+
+
+/* AU table */
+#define N_AU_PER_TABLE (int)(PAGE_SIZE / sizeof(AU_INFO_T))
+#define GET_AU(amap, i_AU) (amap->au_table[(i_AU) / N_AU_PER_TABLE] + ((i_AU) % N_AU_PER_TABLE))
+//#define MAX_CLU_PER_AU (int)(PAGE_SIZE / sizeof(FCLU_NODE_T))
+#define MAX_CLU_PER_AU (1024)
+
+/* Cold AU bucket <-> # of freeclusters */
+#define NODE_CLEAN(amap) (&amap->fclu_nodes[amap->clusters_per_au - 1])
+#define NODE(fclu, amap) (&amap->fclu_nodes[fclu - 1])
+#define FREE_CLUSTERS(node, amap) ((int)(node - amap->fclu_nodes) + 1)
+
+/* AU status */
+#define MAGIC_WORKING ((struct slist_head *)0xFFFF5091)
+#define IS_AU_HOT(au, amap) (au->shead.head == &amap->slist_hot)
+#define IS_AU_IGNORED(au, amap) (au->shead.head == &amap->slist_ignored)
+#define IS_AU_WORKING(au, amap) (au->shead.head == MAGIC_WORKING)
+#define SET_AU_WORKING(au) (au->shead.head = MAGIC_WORKING)
+
+/* AU <-> cluster */
+#define i_AU_of_CLU(amap, clu) ((amap->clu_align_bias + clu) / amap->clusters_per_au)
+#define CLU_of_i_AU(amap, i_au, idx) \
+ ((uint32_t)(i_au) * (uint32_t)amap->clusters_per_au + (idx) - amap->clu_align_bias)
+
+/*
+ * NOTE : AMAP internal functions are moved to core.h
+ */
+
+#endif /* _SDFAT_AMAP_H */
diff --git a/fs/sdfat/api.c b/fs/sdfat/api.c
new file mode 100644
index 000000000000..45b0c4106bda
--- /dev/null
+++ b/fs/sdfat/api.c
@@ -0,0 +1,636 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : sdfat_api.c */
+/* PURPOSE : sdFAT volume lock layer */
+/* */
+/************************************************************************/
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+
+#include "version.h"
+#include "config.h"
+
+#include "sdfat.h"
+#include "core.h"
+
+/*----------------------------------------------------------------------*/
+/* Internal structures */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Constant & Macro Definitions */
+/*----------------------------------------------------------------------*/
+static DEFINE_MUTEX(_lock_core);
+
+/*----------------------------------------------------------------------*/
+/* Global Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Local Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Local Function Declarations */
+/*----------------------------------------------------------------------*/
+
+/*======================================================================*/
+/* Global Function Definitions */
+/* - All functions for global use have same return value format, */
+/* that is, 0 on success and minus error number on */
+/* various error condition. */
+/*======================================================================*/
+
+/*----------------------------------------------------------------------*/
+/* sdFAT Filesystem Init & Exit Functions */
+/*----------------------------------------------------------------------*/
+
+s32 fsapi_init(void)
+{
+ return fscore_init();
+}
+
+s32 fsapi_shutdown(void)
+{
+ return fscore_shutdown();
+}
+
+/*----------------------------------------------------------------------*/
+/* Volume Management Functions */
+/*----------------------------------------------------------------------*/
+
+/* mount the file system volume */
+s32 fsapi_mount(struct super_block *sb)
+{
+ s32 err;
+
+ /* acquire the core lock for file system ccritical section */
+ mutex_lock(&_lock_core);
+
+ err = meta_cache_init(sb);
+ if (err)
+ goto out;
+
+ err = fscore_mount(sb);
+out:
+ if (err)
+ meta_cache_shutdown(sb);
+
+ /* release the core lock for file system critical section */
+ mutex_unlock(&_lock_core);
+
+ return err;
+}
+EXPORT_SYMBOL(fsapi_mount);
+
+/* unmount the file system volume */
+s32 fsapi_umount(struct super_block *sb)
+{
+ s32 err;
+
+ /* acquire the core lock for file system ccritical section */
+ mutex_lock(&_lock_core);
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_umount(sb);
+ meta_cache_shutdown(sb);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+
+ /* release the core lock for file system critical section */
+ mutex_unlock(&_lock_core);
+
+ return err;
+}
+EXPORT_SYMBOL(fsapi_umount);
+
+/* get the information of a file system volume */
+s32 fsapi_statfs(struct super_block *sb, VOL_INFO_T *info)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ /* check the validity of pointer parameters */
+ ASSERT(info);
+
+ if (fsi->used_clusters == (u32) ~0) {
+ s32 err;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_statfs(sb, info);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+ }
+
+ info->FatType = fsi->vol_type;
+ info->ClusterSize = fsi->cluster_size;
+ info->NumClusters = fsi->num_clusters - 2; /* clu 0 & 1 */
+ info->UsedClusters = fsi->used_clusters + fsi->reserved_clusters;
+ info->FreeClusters = info->NumClusters - info->UsedClusters;
+
+ return 0;
+}
+EXPORT_SYMBOL(fsapi_statfs);
+
+/* synchronize a file system volume */
+s32 fsapi_sync_fs(struct super_block *sb, s32 do_sync)
+{
+ s32 err;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_sync_fs(sb, do_sync);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_sync_fs);
+
+s32 fsapi_set_vol_flags(struct super_block *sb, u16 new_flag, s32 always_sync)
+{
+ s32 err;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_set_vol_flags(sb, new_flag, always_sync);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_set_vol_flags);
+
+/*----------------------------------------------------------------------*/
+/* File Operation Functions */
+/*----------------------------------------------------------------------*/
+
+/* lookup */
+s32 fsapi_lookup(struct inode *inode, u8 *path, FILE_ID_T *fid)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(fid && path);
+
+ if (unlikely(!strlen(path)))
+ return -EINVAL;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_lookup(inode, path, fid);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_lookup);
+
+/* create a file */
+s32 fsapi_create(struct inode *inode, u8 *path, u8 mode, FILE_ID_T *fid)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(fid && path);
+
+ if (unlikely(!strlen(path)))
+ return -EINVAL;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_create(inode, path, mode, fid);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_create);
+
+/* read the target string of symlink */
+s32 fsapi_read_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *rcount)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(fid && buffer);
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_read_link(inode, fid, buffer, count, rcount);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_read_link);
+
+/* write the target string of symlink */
+s32 fsapi_write_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *wcount)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(fid && buffer);
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_write_link(inode, fid, buffer, count, wcount);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_write_link);
+
+/* resize the file length */
+s32 fsapi_truncate(struct inode *inode, u64 old_size, u64 new_size)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ TMSG("%s entered (inode %p size %llu)\n", __func__, inode, new_size);
+ err = fscore_truncate(inode, old_size, new_size);
+ TMSG("%s exitted (%d)\n", __func__, err);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_truncate);
+
+/* rename or move a old file into a new file */
+s32 fsapi_rename(struct inode *old_parent_inode, FILE_ID_T *fid,
+ struct inode *new_parent_inode, struct dentry *new_dentry)
+{
+ s32 err;
+ struct super_block *sb = old_parent_inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(fid);
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_rename(old_parent_inode, fid, new_parent_inode, new_dentry);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_rename);
+
+/* remove a file */
+s32 fsapi_remove(struct inode *inode, FILE_ID_T *fid)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(fid);
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_remove(inode, fid);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_remove);
+
+/* get the information of a given file */
+s32 fsapi_read_inode(struct inode *inode, DIR_ENTRY_T *info)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ TMSG("%s entered (inode %p info %p\n", __func__, inode, info);
+ err = fscore_read_inode(inode, info);
+ TMSG("%s exited (err:%d)\n", __func__, err);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_read_inode);
+
+/* set the information of a given file */
+s32 fsapi_write_inode(struct inode *inode, DIR_ENTRY_T *info, int sync)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ TMSG("%s entered (inode %p info %p sync:%d\n",
+ __func__, inode, info, sync);
+ err = fscore_write_inode(inode, info, sync);
+ TMSG("%s exited (err:%d)\n", __func__, err);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_write_inode);
+
+/* return the cluster number in the given cluster offset */
+s32 fsapi_map_clus(struct inode *inode, u32 clu_offset, u32 *clu, int dest)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(clu);
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ TMSG("%s entered (inode:%p clus:%08x dest:%d\n",
+ __func__, inode, *clu, dest);
+ err = fscore_map_clus(inode, clu_offset, clu, dest);
+ TMSG("%s exited (clu:%08x err:%d)\n", __func__, *clu, err);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_map_clus);
+
+/* reserve a cluster */
+s32 fsapi_reserve_clus(struct inode *inode)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ TMSG("%s entered (inode:%p)\n", __func__, inode);
+ err = fscore_reserve_clus(inode);
+ TMSG("%s exited (err:%d)\n", __func__, err);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_reserve_clus);
+
+/*----------------------------------------------------------------------*/
+/* Directory Operation Functions */
+/*----------------------------------------------------------------------*/
+
+/* create(make) a directory */
+s32 fsapi_mkdir(struct inode *inode, u8 *path, FILE_ID_T *fid)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(fid && path);
+
+ if (unlikely(!strlen(path)))
+ return -EINVAL;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_mkdir(inode, path, fid);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_mkdir);
+
+/* read a directory entry from the opened directory */
+s32 fsapi_readdir(struct inode *inode, DIR_ENTRY_T *dir_entry)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(dir_entry);
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_readdir(inode, dir_entry);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_readdir);
+
+/* remove a directory */
+s32 fsapi_rmdir(struct inode *inode, FILE_ID_T *fid)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(fid);
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_rmdir(inode, fid);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_rmdir);
+
+/* unlink a file.
+ * that is, remove an entry from a directory. BUT don't truncate
+ */
+s32 fsapi_unlink(struct inode *inode, FILE_ID_T *fid)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(fid);
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_unlink(inode, fid);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_unlink);
+
+/* reflect the internal dirty flags to VFS bh dirty flags */
+s32 fsapi_cache_flush(struct super_block *sb, int do_sync)
+{
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ fcache_flush(sb, do_sync);
+ dcache_flush(sb, do_sync);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return 0;
+}
+EXPORT_SYMBOL(fsapi_cache_flush);
+
+/* release FAT & buf cache */
+s32 fsapi_cache_release(struct super_block *sb)
+{
+#ifdef CONFIG_SDFAT_DEBUG
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+
+ fcache_release_all(sb);
+ dcache_release_all(sb);
+
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+#endif /* CONFIG_SDFAT_DEBUG */
+ return 0;
+}
+EXPORT_SYMBOL(fsapi_cache_release);
+
+u32 fsapi_get_au_stat(struct super_block *sb, s32 mode)
+{
+ /* volume lock is not required */
+ return fscore_get_au_stat(sb, mode);
+}
+EXPORT_SYMBOL(fsapi_get_au_stat);
+
+/* clear extent cache */
+void fsapi_invalidate_extent(struct inode *inode)
+{
+ /* Volume lock is not required,
+ * because it is only called by evict_inode.
+ * If any other function can call it,
+ * you should check whether volume lock is needed or not.
+ */
+ extent_cache_inval_inode(inode);
+}
+EXPORT_SYMBOL(fsapi_invalidate_extent);
+
+/* check device is ejected */
+s32 fsapi_check_bdi_valid(struct super_block *sb)
+{
+ return fscore_check_bdi_valid(sb);
+}
+EXPORT_SYMBOL(fsapi_check_bdi_valid);
+
+
+
+#ifdef CONFIG_SDFAT_DFR
+/*----------------------------------------------------------------------*/
+/* Defragmentation related */
+/*----------------------------------------------------------------------*/
+s32 fsapi_dfr_get_info(struct super_block *sb, void *arg)
+{
+ /* volume lock is not required */
+ return defrag_get_info(sb, (struct defrag_info_arg *)arg);
+}
+EXPORT_SYMBOL(fsapi_dfr_get_info);
+
+s32 fsapi_dfr_scan_dir(struct super_block *sb, void *args)
+{
+ s32 err;
+
+ /* check the validity of pointer parameters */
+ ASSERT(args);
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = defrag_scan_dir(sb, (struct defrag_trav_arg *)args);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_dfr_scan_dir);
+
+s32 fsapi_dfr_validate_clus(struct inode *inode, void *chunk, int skip_prev)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = defrag_validate_cluster(inode,
+ (struct defrag_chunk_info *)chunk, skip_prev);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_dfr_validate_clus);
+
+s32 fsapi_dfr_reserve_clus(struct super_block *sb, s32 nr_clus)
+{
+ s32 err;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = defrag_reserve_clusters(sb, nr_clus);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_dfr_reserve_clus);
+
+s32 fsapi_dfr_mark_ignore(struct super_block *sb, unsigned int clus)
+{
+ /* volume lock is not required */
+ return defrag_mark_ignore(sb, clus);
+}
+EXPORT_SYMBOL(fsapi_dfr_mark_ignore);
+
+void fsapi_dfr_unmark_ignore_all(struct super_block *sb)
+{
+ /* volume lock is not required */
+ defrag_unmark_ignore_all(sb);
+}
+EXPORT_SYMBOL(fsapi_dfr_unmark_ignore_all);
+
+s32 fsapi_dfr_map_clus(struct inode *inode, u32 clu_offset, u32 *clu)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(clu);
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = defrag_map_cluster(inode, clu_offset, clu);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+
+ return err;
+}
+EXPORT_SYMBOL(fsapi_dfr_map_clus);
+
+void fsapi_dfr_writepage_endio(struct page *page)
+{
+ /* volume lock is not required */
+ defrag_writepage_end_io(page);
+}
+EXPORT_SYMBOL(fsapi_dfr_writepage_endio);
+
+void fsapi_dfr_update_fat_prev(struct super_block *sb, int force)
+{
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ defrag_update_fat_prev(sb, force);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+}
+EXPORT_SYMBOL(fsapi_dfr_update_fat_prev);
+
+void fsapi_dfr_update_fat_next(struct super_block *sb)
+{
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ defrag_update_fat_next(sb);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+}
+EXPORT_SYMBOL(fsapi_dfr_update_fat_next);
+
+void fsapi_dfr_check_discard(struct super_block *sb)
+{
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ defrag_check_discard(sb);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+}
+EXPORT_SYMBOL(fsapi_dfr_check_discard);
+
+void fsapi_dfr_free_clus(struct super_block *sb, u32 clus)
+{
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ defrag_free_cluster(sb, clus);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+}
+EXPORT_SYMBOL(fsapi_dfr_free_clus);
+
+s32 fsapi_dfr_check_dfr_required(struct super_block *sb, int *totalau, int *cleanau, int *fullau)
+{
+ /* volume lock is not required */
+ return defrag_check_defrag_required(sb, totalau, cleanau, fullau);
+}
+EXPORT_SYMBOL(fsapi_dfr_check_dfr_required);
+
+s32 fsapi_dfr_check_dfr_on(struct inode *inode, loff_t start, loff_t end, s32 cancel, const char *caller)
+{
+ /* volume lock is not required */
+ return defrag_check_defrag_on(inode, start, end, cancel, caller);
+}
+EXPORT_SYMBOL(fsapi_dfr_check_dfr_on);
+
+
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+void fsapi_dfr_spo_test(struct super_block *sb, int flag, const char *caller)
+{
+ /* volume lock is not required */
+ defrag_spo_test(sb, flag, caller);
+}
+EXPORT_SYMBOL(fsapi_dfr_spo_test);
+#endif
+
+
+#endif /* CONFIG_SDFAT_DFR */
+
+/* end of sdfat_api.c */
diff --git a/fs/sdfat/api.h b/fs/sdfat/api.h
new file mode 100644
index 000000000000..344297ab58ae
--- /dev/null
+++ b/fs/sdfat/api.h
@@ -0,0 +1,409 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SDFAT_API_H
+#define _SDFAT_API_H
+
+#include "config.h"
+#include "sdfat_fs.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+
+/*----------------------------------------------------------------------*/
+/* Configure Constant & Macro Definitions */
+/*----------------------------------------------------------------------*/
+/* cache size (in number of sectors) */
+/* (should be an exponential value of 2) */
+#define FAT_CACHE_SIZE 128
+#define FAT_CACHE_HASH_SIZE 64
+#define BUF_CACHE_SIZE 256
+#define BUF_CACHE_HASH_SIZE 64
+
+/* Read-ahead related */
+/* First config vars. should be pow of 2 */
+#define FCACHE_MAX_RA_SIZE (PAGE_SIZE)
+#define DCACHE_MAX_RA_SIZE (128*1024)
+
+/*----------------------------------------------------------------------*/
+/* Constant & Macro Definitions */
+/*----------------------------------------------------------------------*/
+/* type values */
+#define TYPE_UNUSED 0x0000
+#define TYPE_DELETED 0x0001
+#define TYPE_INVALID 0x0002
+#define TYPE_CRITICAL_PRI 0x0100
+#define TYPE_BITMAP 0x0101
+#define TYPE_UPCASE 0x0102
+#define TYPE_VOLUME 0x0103
+#define TYPE_DIR 0x0104
+#define TYPE_FILE 0x011F
+#define TYPE_SYMLINK 0x015F
+#define TYPE_CRITICAL_SEC 0x0200
+#define TYPE_STREAM 0x0201
+#define TYPE_EXTEND 0x0202
+#define TYPE_ACL 0x0203
+#define TYPE_BENIGN_PRI 0x0400
+#define TYPE_GUID 0x0401
+#define TYPE_PADDING 0x0402
+#define TYPE_ACLTAB 0x0403
+#define TYPE_BENIGN_SEC 0x0800
+#define TYPE_ALL 0x0FFF
+
+/* eio values */
+#define SDFAT_EIO_NONE (0x00000000)
+#define SDFAT_EIO_READ (0x00000001)
+#define SDFAT_EIO_WRITE (0x00000002)
+#define SDFAT_EIO_BDI (0x00000004)
+
+/* modes for volume allocation unit status */
+#define VOL_AU_STAT_TOTAL (0)
+#define VOL_AU_STAT_CLEAN (1)
+#define VOL_AU_STAT_FULL (2)
+
+/*----------------------------------------------------------------------*/
+/* NLS Type Definitions */
+/*----------------------------------------------------------------------*/
+
+/* DOS name structure */
+typedef struct {
+ u8 name[DOS_NAME_LENGTH];
+ u8 name_case;
+} DOS_NAME_T;
+
+/* unicode name structure */
+typedef struct {
+ u16 name[MAX_NAME_LENGTH+3]; /* +3 for null and for converting */
+ u16 name_hash;
+ u8 name_len;
+} UNI_NAME_T;
+
+/*----------------------------------------------------------------------*/
+/* Type Definitions */
+/*----------------------------------------------------------------------*/
+/* should be merged it to DATE_TIME_T */
+typedef union {
+ struct {
+ u8 off : 7;
+ u8 valid : 1;
+ };
+ u8 value;
+} TIMEZONE_T;
+
+typedef struct {
+ u16 sec; /* 0 ~ 59 */
+ u16 min; /* 0 ~ 59 */
+ u16 hour; /* 0 ~ 23 */
+ u16 day; /* 1 ~ 31 */
+ u16 mon; /* 1 ~ 12 */
+ u16 year; /* 0 ~ 127 (since 1980) */
+ TIMEZONE_T tz;
+} TIMESTAMP_T;
+
+typedef struct {
+ u16 Year;
+ u16 Month;
+ u16 Day;
+ u16 Hour;
+ u16 Minute;
+ u16 Second;
+ u16 MilliSecond;
+ TIMEZONE_T Timezone;
+} DATE_TIME_T;
+
+typedef struct {
+ u64 Offset; // start sector number of the partition
+ u64 Size; // in sectors
+} PART_INFO_T;
+
+typedef struct {
+ u32 SecSize; // sector size in bytes
+ u64 DevSize; // block device size in sectors
+} DEV_INFO_T;
+
+typedef struct {
+ u32 FatType;
+ u32 ClusterSize;
+ u32 NumClusters;
+ u32 FreeClusters;
+ u32 UsedClusters;
+} VOL_INFO_T;
+
+/* directory structure */
+typedef struct {
+ u32 dir;
+ u32 size;
+ u8 flags;
+} CHAIN_T;
+
+/* hint structure */
+typedef struct {
+ u32 clu;
+ union {
+ u32 off; // cluster offset
+ s32 eidx; // entry index
+ };
+} HINT_T;
+
+typedef struct {
+ spinlock_t cache_lru_lock;
+ struct list_head cache_lru;
+ s32 nr_caches;
+ u32 cache_valid_id; // for avoiding the race between alloc and free
+} EXTENT_T;
+
+/* first empty entry hint information */
+typedef struct {
+ s32 eidx; // entry index of a directory
+ s32 count; // count of continuous empty entry
+ CHAIN_T cur; // the cluster that first empty slot exists in
+} HINT_FEMP_T;
+
+/* file id structure */
+typedef struct {
+ CHAIN_T dir;
+ s32 entry;
+ u32 type;
+ u32 attr;
+ u32 start_clu;
+ u64 size;
+ u8 flags;
+ u8 reserved[3]; // padding
+ u32 version; // the copy of low 32bit of i_version to check the validation of hint_stat
+ s64 rwoffset; // file offset or dentry index for readdir
+ EXTENT_T extent; // extent cache for a file
+ HINT_T hint_bmap; // hint for cluster last accessed
+ HINT_T hint_stat; // hint for entry index we try to lookup next time
+ HINT_FEMP_T hint_femp; // hint for first empty entry
+} FILE_ID_T;
+
+typedef struct {
+ s8 *lfn;
+ s8 *sfn;
+ s32 lfnbuf_len; //usally MAX_UNINAME_BUF_SIZE
+ s32 sfnbuf_len; //usally MAX_DOSNAME_BUF_SIZE, used only for vfat, not for exfat
+} DENTRY_NAMEBUF_T;
+
+typedef struct {
+ u32 Attr;
+ u64 Size;
+ u32 NumSubdirs;
+ DATE_TIME_T CreateTimestamp;
+ DATE_TIME_T ModifyTimestamp;
+ DATE_TIME_T AccessTimestamp;
+ DENTRY_NAMEBUF_T NameBuf;
+} DIR_ENTRY_T;
+
+/* cache information */
+typedef struct __cache_entry {
+ struct __cache_entry *next;
+ struct __cache_entry *prev;
+ struct {
+ struct __cache_entry *next;
+ struct __cache_entry *prev;
+ } hash;
+ u64 sec;
+ u32 flag;
+ struct buffer_head *bh;
+} cache_ent_t;
+
+/*----------------------------------------------------------------------*/
+/* Type Definitions : Wrapper & In-Core */
+/*----------------------------------------------------------------------*/
+typedef struct __FATENT_OPS_T {
+ s32 (*ent_get)(struct super_block *sb, u32 loc, u32 *content);
+ s32 (*ent_set)(struct super_block *sb, u32 loc, u32 content);
+} FATENT_OPS_T;
+
+typedef struct {
+ s32 (*alloc_cluster)(struct super_block *, u32, CHAIN_T *, s32);
+ s32 (*free_cluster)(struct super_block *, CHAIN_T *, s32);
+ s32 (*count_used_clusters)(struct super_block *, u32 *);
+ s32 (*init_dir_entry)(struct super_block *, CHAIN_T *, s32, u32, u32, u64);
+ s32 (*init_ext_entry)(struct super_block *, CHAIN_T *, s32, s32, UNI_NAME_T *, DOS_NAME_T *);
+ s32 (*find_dir_entry)(struct super_block *, FILE_ID_T *, CHAIN_T *, UNI_NAME_T *, s32, DOS_NAME_T *, u32);
+ s32 (*delete_dir_entry)(struct super_block *, CHAIN_T *, s32, s32, s32);
+ void (*get_uniname_from_ext_entry)(struct super_block *, CHAIN_T *, s32, u16 *);
+ s32 (*count_ext_entries)(struct super_block *, CHAIN_T *, s32, DENTRY_T *);
+ s32 (*calc_num_entries)(UNI_NAME_T *);
+ s32 (*check_max_dentries)(FILE_ID_T *);
+ u32 (*get_entry_type)(DENTRY_T *);
+ void (*set_entry_type)(DENTRY_T *, u32);
+ u32 (*get_entry_attr)(DENTRY_T *);
+ void (*set_entry_attr)(DENTRY_T *, u32);
+ u8 (*get_entry_flag)(DENTRY_T *);
+ void (*set_entry_flag)(DENTRY_T *, u8);
+ u32 (*get_entry_clu0)(DENTRY_T *);
+ void (*set_entry_clu0)(DENTRY_T *, u32);
+ u64 (*get_entry_size)(DENTRY_T *);
+ void (*set_entry_size)(DENTRY_T *, u64);
+ void (*get_entry_time)(DENTRY_T *, TIMESTAMP_T *, u8);
+ void (*set_entry_time)(DENTRY_T *, TIMESTAMP_T *, u8);
+ u32 (*get_au_stat)(struct super_block *, s32);
+} FS_FUNC_T;
+
+typedef struct __FS_INFO_T {
+ s32 bd_opened; // opened or not
+ u32 vol_type; // volume FAT type
+ u32 vol_id; // volume serial number
+ u64 num_sectors; // num of sectors in volume
+ u32 num_clusters; // num of clusters in volume
+ u32 cluster_size; // cluster size in bytes
+ u32 cluster_size_bits;
+ u32 sect_per_clus; // cluster size in sectors
+ u32 sect_per_clus_bits;
+ u64 FAT1_start_sector; // FAT1 start sector
+ u64 FAT2_start_sector; // FAT2 start sector
+ u64 root_start_sector; // root dir start sector
+ u64 data_start_sector; // data area start sector
+ u32 num_FAT_sectors; // num of FAT sectors
+ u32 root_dir; // root dir cluster
+ u32 dentries_in_root; // num of dentries in root dir
+ u32 dentries_per_clu; // num of dentries per cluster
+ u32 vol_flag; // volume dirty flag
+ struct buffer_head *pbr_bh; // buffer_head of PBR sector
+
+ u32 map_clu; // allocation bitmap start cluster
+ u32 map_sectors; // num of allocation bitmap sectors
+ struct buffer_head **vol_amap; // allocation bitmap
+
+ u16 **vol_utbl; // upcase table
+
+ u32 clu_srch_ptr; // cluster search pointer
+ u32 used_clusters; // number of used clusters
+
+ u32 prev_eio; // block device operation error flag
+
+ FS_FUNC_T *fs_func;
+ FATENT_OPS_T *fatent_ops;
+
+ s32 reserved_clusters; // # of reserved clusters (DA)
+ void *amap; // AU Allocation Map
+
+ /* fat cache */
+ struct {
+ cache_ent_t pool[FAT_CACHE_SIZE];
+ cache_ent_t lru_list;
+ cache_ent_t hash_list[FAT_CACHE_HASH_SIZE];
+ } fcache;
+
+ /* meta cache */
+ struct {
+ cache_ent_t pool[BUF_CACHE_SIZE];
+ cache_ent_t lru_list;
+ cache_ent_t keep_list; // CACHEs in this list will not be kicked by normal lru operations
+ cache_ent_t hash_list[BUF_CACHE_HASH_SIZE];
+ } dcache;
+} FS_INFO_T;
+
+/*======================================================================*/
+/* */
+/* API FUNCTION DECLARATIONS */
+/* (CHANGE THIS PART IF REQUIRED) */
+/* */
+/*======================================================================*/
+
+/*----------------------------------------------------------------------*/
+/* External Function Declarations */
+/*----------------------------------------------------------------------*/
+
+/* file system initialization & shutdown functions */
+s32 fsapi_init(void);
+s32 fsapi_shutdown(void);
+
+/* volume management functions */
+s32 fsapi_mount(struct super_block *sb);
+s32 fsapi_umount(struct super_block *sb);
+s32 fsapi_statfs(struct super_block *sb, VOL_INFO_T *info);
+s32 fsapi_sync_fs(struct super_block *sb, s32 do_sync);
+s32 fsapi_set_vol_flags(struct super_block *sb, u16 new_flag, s32 always_sync);
+
+/* file management functions */
+s32 fsapi_lookup(struct inode *inode, u8 *path, FILE_ID_T *fid);
+s32 fsapi_create(struct inode *inode, u8 *path, u8 mode, FILE_ID_T *fid);
+s32 fsapi_read_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *rcount);
+s32 fsapi_write_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *wcount);
+s32 fsapi_remove(struct inode *inode, FILE_ID_T *fid); /* unlink and truncate */
+s32 fsapi_truncate(struct inode *inode, u64 old_size, u64 new_size);
+s32 fsapi_rename(struct inode *old_parent_inode, FILE_ID_T *fid,
+ struct inode *new_parent_inode, struct dentry *new_dentry);
+s32 fsapi_unlink(struct inode *inode, FILE_ID_T *fid);
+s32 fsapi_read_inode(struct inode *inode, DIR_ENTRY_T *info);
+s32 fsapi_write_inode(struct inode *inode, DIR_ENTRY_T *info, int sync);
+s32 fsapi_map_clus(struct inode *inode, u32 clu_offset, u32 *clu, int dest);
+s32 fsapi_reserve_clus(struct inode *inode);
+
+/* directory management functions */
+s32 fsapi_mkdir(struct inode *inode, u8 *path, FILE_ID_T *fid);
+s32 fsapi_readdir(struct inode *inode, DIR_ENTRY_T *dir_entry);
+s32 fsapi_rmdir(struct inode *inode, FILE_ID_T *fid);
+
+/* FAT & buf cache functions */
+s32 fsapi_cache_flush(struct super_block *sb, int do_sync);
+s32 fsapi_cache_release(struct super_block *sb);
+
+/* extra info functions */
+u32 fsapi_get_au_stat(struct super_block *sb, s32 mode);
+
+/* extent cache functions */
+void fsapi_invalidate_extent(struct inode *inode);
+
+/* bdev management */
+s32 fsapi_check_bdi_valid(struct super_block *sb);
+
+#ifdef CONFIG_SDFAT_DFR
+/*----------------------------------------------------------------------*/
+/* Defragmentation related */
+/*----------------------------------------------------------------------*/
+
+s32 fsapi_dfr_get_info(struct super_block *sb, void *arg);
+
+s32 fsapi_dfr_scan_dir(struct super_block *sb, void *args);
+
+s32 fsapi_dfr_validate_clus(struct inode *inode, void *chunk, int skip_prev);
+s32 fsapi_dfr_reserve_clus(struct super_block *sb, s32 nr_clus);
+s32 fsapi_dfr_mark_ignore(struct super_block *sb, unsigned int clus);
+void fsapi_dfr_unmark_ignore_all(struct super_block *sb);
+
+s32 fsapi_dfr_map_clus(struct inode *inode, u32 clu_offset, u32 *clu);
+void fsapi_dfr_writepage_endio(struct page *page);
+
+void fsapi_dfr_update_fat_prev(struct super_block *sb, int force);
+void fsapi_dfr_update_fat_next(struct super_block *sb);
+void fsapi_dfr_check_discard(struct super_block *sb);
+void fsapi_dfr_free_clus(struct super_block *sb, u32 clus);
+
+s32 fsapi_dfr_check_dfr_required(struct super_block *sb, int *totalau, int *cleanau, int *fullau);
+s32 fsapi_dfr_check_dfr_on(struct inode *inode, loff_t start, loff_t end, s32 cancel, const char *caller);
+
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+void fsapi_dfr_spo_test(struct super_block *sb, int flag, const char *caller);
+#endif /* CONFIG_SDFAT_DFR_DEBUG */
+
+#endif /* CONFIG_SDFAT_DFR */
+
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _SDFAT_API_H */
+
+/* end of api.h */
diff --git a/fs/sdfat/blkdev.c b/fs/sdfat/blkdev.c
new file mode 100644
index 000000000000..264c670df0f0
--- /dev/null
+++ b/fs/sdfat/blkdev.c
@@ -0,0 +1,416 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : blkdev.c */
+/* PURPOSE : sdFAT Block Device Driver Glue Layer */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/************************************************************************/
+
+#include <linux/blkdev.h>
+#include <linux/log2.h>
+#include <linux/backing-dev.h>
+
+#include "sdfat.h"
+
+/*----------------------------------------------------------------------*/
+/* Constant & Macro Definitions */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Global Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Local Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY */
+/************************************************************************/
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
+ /* EMPTY */
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) */
+static struct backing_dev_info *inode_to_bdi(struct inode *bd_inode)
+{
+ return bd_inode->i_mapping->backing_dev_info;
+}
+#endif
+
+/*======================================================================*/
+/* Function Definitions */
+/*======================================================================*/
+s32 bdev_open_dev(struct super_block *sb)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (fsi->bd_opened)
+ return 0;
+
+ fsi->bd_opened = true;
+ return 0;
+}
+
+s32 bdev_close_dev(struct super_block *sb)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ fsi->bd_opened = false;
+ return 0;
+}
+
+static inline s32 block_device_ejected(struct super_block *sb)
+{
+ struct inode *bd_inode = sb->s_bdev->bd_inode;
+ struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
+
+ return (bdi->dev == NULL);
+}
+
+s32 bdev_check_bdi_valid(struct super_block *sb)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (block_device_ejected(sb)) {
+ if (!(fsi->prev_eio & SDFAT_EIO_BDI)) {
+ fsi->prev_eio |= SDFAT_EIO_BDI;
+ sdfat_log_msg(sb, KERN_ERR, "%s: block device is "
+ "eliminated.(bdi:%p)", __func__, sb->s_bdi);
+ sdfat_debug_warn_on(1);
+ }
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+
+/* Make a readahead request */
+s32 bdev_readahead(struct super_block *sb, u64 secno, u64 num_secs)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u32 sects_per_page = (PAGE_SIZE >> sb->s_blocksize_bits);
+ struct blk_plug plug;
+ u64 i;
+
+ if (!fsi->bd_opened)
+ return -EIO;
+
+ blk_start_plug(&plug);
+ for (i = 0; i < num_secs; i++) {
+ if (i && !(i & (sects_per_page - 1)))
+ blk_flush_plug(current);
+ sb_breadahead(sb, (sector_t)(secno + i));
+ }
+ blk_finish_plug(&plug);
+
+ return 0;
+}
+
+s32 bdev_mread(struct super_block *sb, u64 secno, struct buffer_head **bh, u64 num_secs, s32 read)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u8 blksize_bits = sb->s_blocksize_bits;
+#ifdef CONFIG_SDFAT_DBG_IOCTL
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ long flags = sbi->debug_flags;
+
+ if (flags & SDFAT_DEBUGFLAGS_ERROR_RW)
+ return -EIO;
+#endif /* CONFIG_SDFAT_DBG_IOCTL */
+
+ if (!fsi->bd_opened)
+ return -EIO;
+
+ brelse(*bh);
+
+ if (read)
+ *bh = __bread(sb->s_bdev, (sector_t)secno, num_secs << blksize_bits);
+ else
+ *bh = __getblk(sb->s_bdev, (sector_t)secno, num_secs << blksize_bits);
+
+ /* read successfully */
+ if (*bh)
+ return 0;
+
+ /*
+ * patch 1.2.4 : reset ONCE warning message per volume.
+ */
+ if (!(fsi->prev_eio & SDFAT_EIO_READ)) {
+ fsi->prev_eio |= SDFAT_EIO_READ;
+ sdfat_log_msg(sb, KERN_ERR, "%s: No bh. I/O error.", __func__);
+ sdfat_debug_warn_on(1);
+ }
+
+ return -EIO;
+}
+
+s32 bdev_mwrite(struct super_block *sb, u64 secno, struct buffer_head *bh, u64 num_secs, s32 sync)
+{
+ u64 count;
+ struct buffer_head *bh2;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+#ifdef CONFIG_SDFAT_DBG_IOCTL
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ long flags = sbi->debug_flags;
+
+ if (flags & SDFAT_DEBUGFLAGS_ERROR_RW)
+ return -EIO;
+#endif /* CONFIG_SDFAT_DBG_IOCTL */
+
+ if (!fsi->bd_opened)
+ return -EIO;
+
+ if (secno == bh->b_blocknr) {
+ set_buffer_uptodate(bh);
+ mark_buffer_dirty(bh);
+ if (sync && (sync_dirty_buffer(bh) != 0))
+ return -EIO;
+ } else {
+ count = num_secs << sb->s_blocksize_bits;
+
+ bh2 = __getblk(sb->s_bdev, (sector_t)secno, count);
+
+ if (!bh2)
+ goto no_bh;
+
+ lock_buffer(bh2);
+ memcpy(bh2->b_data, bh->b_data, count);
+ set_buffer_uptodate(bh2);
+ mark_buffer_dirty(bh2);
+ unlock_buffer(bh2);
+ if (sync && (sync_dirty_buffer(bh2) != 0)) {
+ __brelse(bh2);
+ goto no_bh;
+ }
+ __brelse(bh2);
+ }
+ return 0;
+no_bh:
+ /*
+ * patch 1.2.4 : reset ONCE warning message per volume.
+ */
+ if (!(fsi->prev_eio & SDFAT_EIO_WRITE)) {
+ fsi->prev_eio |= SDFAT_EIO_WRITE;
+ sdfat_log_msg(sb, KERN_ERR, "%s: No bh. I/O error.", __func__);
+ sdfat_debug_warn_on(1);
+ }
+
+ return -EIO;
+}
+
+s32 bdev_sync_all(struct super_block *sb)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+#ifdef CONFIG_SDFAT_DBG_IOCTL
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ long flags = sbi->debug_flags;
+
+ if (flags & SDFAT_DEBUGFLAGS_ERROR_RW)
+ return -EIO;
+#endif /* CONFIG_SDFAT_DBG_IOCTL */
+
+ if (!fsi->bd_opened)
+ return -EIO;
+
+ return sync_blockdev(sb->s_bdev);
+}
+
+/*
+ * Sector Read/Write Functions
+ */
+s32 read_sect(struct super_block *sb, u64 sec, struct buffer_head **bh, s32 read)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ BUG_ON(!bh);
+ if ((sec >= fsi->num_sectors) && (fsi->num_sectors > 0)) {
+ sdfat_fs_error_ratelimit(sb,
+ "%s: out of range (sect:%llu)", __func__, sec);
+ return -EIO;
+ }
+
+ if (bdev_mread(sb, sec, bh, 1, read)) {
+ sdfat_fs_error_ratelimit(sb,
+ "%s: I/O error (sect:%llu)", __func__, sec);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+s32 write_sect(struct super_block *sb, u64 sec, struct buffer_head *bh, s32 sync)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ BUG_ON(!bh);
+ if ((sec >= fsi->num_sectors) && (fsi->num_sectors > 0)) {
+ sdfat_fs_error_ratelimit(sb,
+ "%s: out of range (sect:%llu)", __func__, sec);
+ return -EIO;
+ }
+
+ if (bdev_mwrite(sb, sec, bh, 1, sync)) {
+ sdfat_fs_error_ratelimit(sb, "%s: I/O error (sect:%llu)",
+ __func__, sec);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+s32 read_msect(struct super_block *sb, u64 sec, struct buffer_head **bh, u64 num_secs, s32 read)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ BUG_ON(!bh);
+ if (((sec+num_secs) > fsi->num_sectors) && (fsi->num_sectors > 0)) {
+ sdfat_fs_error_ratelimit(sb, "%s: out of range(sect:%llu len:%llu)",
+ __func__, sec, num_secs);
+ return -EIO;
+ }
+
+ if (bdev_mread(sb, sec, bh, num_secs, read)) {
+ sdfat_fs_error_ratelimit(sb, "%s: I/O error (sect:%llu len:%llu)",
+ __func__, sec, num_secs);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+s32 write_msect(struct super_block *sb, u64 sec, struct buffer_head *bh, u64 num_secs, s32 sync)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ BUG_ON(!bh);
+ if (((sec+num_secs) > fsi->num_sectors) && (fsi->num_sectors > 0)) {
+ sdfat_fs_error_ratelimit(sb, "%s: out of range(sect:%llu len:%llu)",
+ __func__, sec, num_secs);
+ return -EIO;
+ }
+
+
+ if (bdev_mwrite(sb, sec, bh, num_secs, sync)) {
+ sdfat_fs_error_ratelimit(sb, "%s: I/O error (sect:%llu len:%llu)",
+ __func__, sec, num_secs);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static inline void __blkdev_write_bhs(struct buffer_head **bhs, s32 nr_bhs)
+{
+ s32 i;
+
+ for (i = 0; i < nr_bhs; i++)
+ write_dirty_buffer(bhs[i], WRITE);
+}
+
+static inline s32 __blkdev_sync_bhs(struct buffer_head **bhs, s32 nr_bhs)
+{
+ s32 i, err = 0;
+
+ for (i = 0; i < nr_bhs; i++) {
+ wait_on_buffer(bhs[i]);
+ if (!err && !buffer_uptodate(bhs[i]))
+ err = -EIO;
+ }
+ return err;
+}
+
+static inline s32 __buffer_zeroed(struct super_block *sb, u64 blknr, u64 num_secs)
+{
+ struct buffer_head *bhs[MAX_BUF_PER_PAGE];
+ s32 nr_bhs = MAX_BUF_PER_PAGE;
+ u64 last_blknr = blknr + num_secs;
+ s32 err, i, n;
+ struct blk_plug plug;
+
+ /* Zeroing the unused blocks on this cluster */
+ n = 0;
+ blk_start_plug(&plug);
+ while (blknr < last_blknr) {
+ bhs[n] = sb_getblk(sb, (sector_t)blknr);
+ if (!bhs[n]) {
+ err = -ENOMEM;
+ blk_finish_plug(&plug);
+ goto error;
+ }
+ memset(bhs[n]->b_data, 0, sb->s_blocksize);
+ set_buffer_uptodate(bhs[n]);
+ mark_buffer_dirty(bhs[n]);
+
+ n++;
+ blknr++;
+
+ if (blknr == last_blknr)
+ break;
+
+ if (n == nr_bhs) {
+ __blkdev_write_bhs(bhs, n);
+
+ for (i = 0; i < n; i++)
+ brelse(bhs[i]);
+ n = 0;
+ }
+ }
+ __blkdev_write_bhs(bhs, n);
+ blk_finish_plug(&plug);
+
+ err = __blkdev_sync_bhs(bhs, n);
+ if (err)
+ goto error;
+
+ for (i = 0; i < n; i++)
+ brelse(bhs[i]);
+
+ return 0;
+
+error:
+ EMSG("%s: failed zeroed sect %llu\n", __func__, blknr);
+ for (i = 0; i < n; i++)
+ bforget(bhs[i]);
+
+ return err;
+}
+
+s32 write_msect_zero(struct super_block *sb, u64 sec, u64 num_secs)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (((sec+num_secs) > fsi->num_sectors) && (fsi->num_sectors > 0)) {
+ sdfat_fs_error_ratelimit(sb, "%s: out of range(sect:%llu len:%llu)",
+ __func__, sec, num_secs);
+ return -EIO;
+ }
+
+ /* Just return -EAGAIN if it is failed */
+ if (__buffer_zeroed(sb, sec, num_secs))
+ return -EAGAIN;
+
+ return 0;
+} /* end of write_msect_zero */
+
+/* end of blkdev.c */
diff --git a/fs/sdfat/cache.c b/fs/sdfat/cache.c
new file mode 100644
index 000000000000..8318898646be
--- /dev/null
+++ b/fs/sdfat/cache.c
@@ -0,0 +1,846 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : cache.c */
+/* PURPOSE : sdFAT Cache Manager */
+/* (FAT Cache & Buffer Cache) */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/swap.h> /* for mark_page_accessed() */
+#include <asm/unaligned.h>
+
+#include "sdfat.h"
+#include "core.h"
+
+#define DEBUG_HASH_LIST
+#define DEBUG_HASH_PREV (0xAAAA5555)
+#define DEBUG_HASH_NEXT (0x5555AAAA)
+
+/*----------------------------------------------------------------------*/
+/* Global Variable Definitions */
+/*----------------------------------------------------------------------*/
+/* All buffer structures are protected w/ fsi->v_sem */
+
+/*----------------------------------------------------------------------*/
+/* Local Variable Definitions */
+/*----------------------------------------------------------------------*/
+#define LOCKBIT (0x01)
+#define DIRTYBIT (0x02)
+#define KEEPBIT (0x04)
+
+/*----------------------------------------------------------------------*/
+/* Cache handling function declarations */
+/*----------------------------------------------------------------------*/
+static cache_ent_t *__fcache_find(struct super_block *sb, u64 sec);
+static cache_ent_t *__fcache_get(struct super_block *sb);
+static void __fcache_insert_hash(struct super_block *sb, cache_ent_t *bp);
+static void __fcache_remove_hash(cache_ent_t *bp);
+
+static cache_ent_t *__dcache_find(struct super_block *sb, u64 sec);
+static cache_ent_t *__dcache_get(struct super_block *sb);
+static void __dcache_insert_hash(struct super_block *sb, cache_ent_t *bp);
+static void __dcache_remove_hash(cache_ent_t *bp);
+
+/*----------------------------------------------------------------------*/
+/* Static functions */
+/*----------------------------------------------------------------------*/
+static void push_to_mru(cache_ent_t *bp, cache_ent_t *list)
+{
+ bp->next = list->next;
+ bp->prev = list;
+ list->next->prev = bp;
+ list->next = bp;
+}
+
+static void push_to_lru(cache_ent_t *bp, cache_ent_t *list)
+{
+ bp->prev = list->prev;
+ bp->next = list;
+ list->prev->next = bp;
+ list->prev = bp;
+}
+
+static void move_to_mru(cache_ent_t *bp, cache_ent_t *list)
+{
+ bp->prev->next = bp->next;
+ bp->next->prev = bp->prev;
+ push_to_mru(bp, list);
+}
+
+static void move_to_lru(cache_ent_t *bp, cache_ent_t *list)
+{
+ bp->prev->next = bp->next;
+ bp->next->prev = bp->prev;
+ push_to_lru(bp, list);
+}
+
+static inline s32 __check_hash_valid(cache_ent_t *bp)
+{
+#ifdef DEBUG_HASH_LIST
+ if ((bp->hash.next == (cache_ent_t *)DEBUG_HASH_NEXT) ||
+ (bp->hash.prev == (cache_ent_t *)DEBUG_HASH_PREV)) {
+ return -EINVAL;
+ }
+#endif
+ if ((bp->hash.next == bp) || (bp->hash.prev == bp))
+ return -EINVAL;
+
+ return 0;
+}
+
+static inline void __remove_from_hash(cache_ent_t *bp)
+{
+ (bp->hash.prev)->hash.next = bp->hash.next;
+ (bp->hash.next)->hash.prev = bp->hash.prev;
+ bp->hash.next = bp;
+ bp->hash.prev = bp;
+#ifdef DEBUG_HASH_LIST
+ bp->hash.next = (cache_ent_t *)DEBUG_HASH_NEXT;
+ bp->hash.prev = (cache_ent_t *)DEBUG_HASH_PREV;
+#endif
+}
+
+/* Do FAT mirroring (don't sync)
+ * sec: sector No. in FAT1
+ * bh: bh of sec.
+ */
+static inline s32 __fat_copy(struct super_block *sb, u64 sec, struct buffer_head *bh, int sync)
+{
+#ifdef CONFIG_SDFAT_FAT_MIRRORING
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u64 sec2;
+
+ if (fsi->FAT2_start_sector != fsi->FAT1_start_sector) {
+ sec2 = sec - fsi->FAT1_start_sector + fsi->FAT2_start_sector;
+ BUG_ON(sec2 != (sec + (u64)fsi->num_FAT_sectors));
+
+ MMSG("BD: fat mirroring (%llu in FAT1, %llu in FAT2)\n", sec, sec2);
+ if (write_sect(sb, sec2, bh, sync))
+ return -EIO;
+ }
+#else
+ /* DO NOTHING */
+#endif
+ return 0;
+} /* end of __fat_copy */
+
+/*
+ * returns 1, if bp is flushed
+ * returns 0, if bp is not dirty
+ * returns -1, if error occurs
+ */
+static s32 __fcache_ent_flush(struct super_block *sb, cache_ent_t *bp, u32 sync)
+{
+ if (!(bp->flag & DIRTYBIT))
+ return 0;
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ // Make buffer dirty (XXX: Naive impl.)
+ if (write_sect(sb, bp->sec, bp->bh, 0))
+ return -EIO;
+
+ if (__fat_copy(sb, bp->sec, bp->bh, 0))
+ return -EIO;
+#endif
+ bp->flag &= ~(DIRTYBIT);
+
+ if (sync)
+ sync_dirty_buffer(bp->bh);
+
+ return 1;
+}
+
+static s32 __fcache_ent_discard(struct super_block *sb, cache_ent_t *bp)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ __fcache_remove_hash(bp);
+ bp->sec = ~0;
+ bp->flag = 0;
+
+ if (bp->bh) {
+ __brelse(bp->bh);
+ bp->bh = NULL;
+ }
+ move_to_lru(bp, &fsi->fcache.lru_list);
+ return 0;
+}
+
+u8 *fcache_getblk(struct super_block *sb, u64 sec)
+{
+ cache_ent_t *bp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u32 page_ra_count = FCACHE_MAX_RA_SIZE >> sb->s_blocksize_bits;
+
+ bp = __fcache_find(sb, sec);
+ if (bp) {
+ if (bdev_check_bdi_valid(sb)) {
+ __fcache_ent_flush(sb, bp, 0);
+ __fcache_ent_discard(sb, bp);
+ return NULL;
+ }
+ move_to_mru(bp, &fsi->fcache.lru_list);
+ return bp->bh->b_data;
+ }
+
+ bp = __fcache_get(sb);
+ if (!__check_hash_valid(bp))
+ __fcache_remove_hash(bp);
+
+ bp->sec = sec;
+ bp->flag = 0;
+ __fcache_insert_hash(sb, bp);
+
+ /* Naive FAT read-ahead (increase I/O unit to page_ra_count) */
+ if ((sec & (page_ra_count - 1)) == 0)
+ bdev_readahead(sb, sec, (u64)page_ra_count);
+
+ /*
+ * patch 1.2.4 : buffer_head null pointer exception problem.
+ *
+ * When read_sect is failed, fcache should be moved to
+ * EMPTY hash_list and the first of lru_list.
+ */
+ if (read_sect(sb, sec, &(bp->bh), 1)) {
+ __fcache_ent_discard(sb, bp);
+ return NULL;
+ }
+
+ return bp->bh->b_data;
+}
+
+static inline int __mark_delayed_dirty(struct super_block *sb, cache_ent_t *bp)
+{
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (fsi->vol_type == EXFAT)
+ return -ENOTSUPP;
+
+ bp->flag |= DIRTYBIT;
+ return 0;
+#else
+ return -ENOTSUPP;
+#endif
+}
+
+
+
+s32 fcache_modify(struct super_block *sb, u64 sec)
+{
+ cache_ent_t *bp;
+
+ bp = __fcache_find(sb, sec);
+ if (!bp) {
+ sdfat_fs_error(sb, "Can`t find fcache (sec 0x%016llx)", sec);
+ return -EIO;
+ }
+
+ if (!__mark_delayed_dirty(sb, bp))
+ return 0;
+
+ if (write_sect(sb, sec, bp->bh, 0))
+ return -EIO;
+
+ if (__fat_copy(sb, sec, bp->bh, 0))
+ return -EIO;
+
+ return 0;
+}
+
+/*======================================================================*/
+/* Cache Initialization Functions */
+/*======================================================================*/
+s32 meta_cache_init(struct super_block *sb)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ s32 i;
+
+ /* LRU list */
+ fsi->fcache.lru_list.next = &fsi->fcache.lru_list;
+ fsi->fcache.lru_list.prev = fsi->fcache.lru_list.next;
+
+ for (i = 0; i < FAT_CACHE_SIZE; i++) {
+ fsi->fcache.pool[i].sec = ~0;
+ fsi->fcache.pool[i].flag = 0;
+ fsi->fcache.pool[i].bh = NULL;
+ fsi->fcache.pool[i].prev = NULL;
+ fsi->fcache.pool[i].next = NULL;
+ push_to_mru(&(fsi->fcache.pool[i]), &fsi->fcache.lru_list);
+ }
+
+ fsi->dcache.lru_list.next = &fsi->dcache.lru_list;
+ fsi->dcache.lru_list.prev = fsi->dcache.lru_list.next;
+ fsi->dcache.keep_list.next = &fsi->dcache.keep_list;
+ fsi->dcache.keep_list.prev = fsi->dcache.keep_list.next;
+
+ // Initially, all the BUF_CACHEs are in the LRU list
+ for (i = 0; i < BUF_CACHE_SIZE; i++) {
+ fsi->dcache.pool[i].sec = ~0;
+ fsi->dcache.pool[i].flag = 0;
+ fsi->dcache.pool[i].bh = NULL;
+ fsi->dcache.pool[i].prev = NULL;
+ fsi->dcache.pool[i].next = NULL;
+ push_to_mru(&(fsi->dcache.pool[i]), &fsi->dcache.lru_list);
+ }
+
+ /* HASH list */
+ for (i = 0; i < FAT_CACHE_HASH_SIZE; i++) {
+ fsi->fcache.hash_list[i].sec = ~0;
+ fsi->fcache.hash_list[i].hash.next = &(fsi->fcache.hash_list[i]);
+;
+ fsi->fcache.hash_list[i].hash.prev = fsi->fcache.hash_list[i].hash.next;
+ }
+
+ for (i = 0; i < FAT_CACHE_SIZE; i++)
+ __fcache_insert_hash(sb, &(fsi->fcache.pool[i]));
+
+ for (i = 0; i < BUF_CACHE_HASH_SIZE; i++) {
+ fsi->dcache.hash_list[i].sec = ~0;
+ fsi->dcache.hash_list[i].hash.next = &(fsi->dcache.hash_list[i]);
+
+ fsi->dcache.hash_list[i].hash.prev = fsi->dcache.hash_list[i].hash.next;
+ }
+
+ for (i = 0; i < BUF_CACHE_SIZE; i++)
+ __dcache_insert_hash(sb, &(fsi->dcache.pool[i]));
+
+ return 0;
+}
+
+s32 meta_cache_shutdown(struct super_block *sb)
+{
+ return 0;
+}
+
+/*======================================================================*/
+/* FAT Read/Write Functions */
+/*======================================================================*/
+s32 fcache_release_all(struct super_block *sb)
+{
+ s32 ret = 0;
+ cache_ent_t *bp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ s32 dirtycnt = 0;
+
+ bp = fsi->fcache.lru_list.next;
+ while (bp != &fsi->fcache.lru_list) {
+ s32 ret_tmp = __fcache_ent_flush(sb, bp, 0);
+
+ if (ret_tmp < 0)
+ ret = ret_tmp;
+ else
+ dirtycnt += ret_tmp;
+
+ bp->sec = ~0;
+ bp->flag = 0;
+
+ if (bp->bh) {
+ __brelse(bp->bh);
+ bp->bh = NULL;
+ }
+ bp = bp->next;
+ }
+
+ DMSG("BD:Release / dirty fat cache: %d (err:%d)\n", dirtycnt, ret);
+ return ret;
+}
+
+
+/* internal DIRTYBIT marked => bh dirty */
+s32 fcache_flush(struct super_block *sb, u32 sync)
+{
+ s32 ret = 0;
+ cache_ent_t *bp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ s32 dirtycnt = 0;
+
+ bp = fsi->fcache.lru_list.next;
+ while (bp != &fsi->fcache.lru_list) {
+ ret = __fcache_ent_flush(sb, bp, sync);
+ if (ret < 0)
+ break;
+
+ dirtycnt += ret;
+ bp = bp->next;
+ }
+
+ MMSG("BD: flush / dirty fat cache: %d (err:%d)\n", dirtycnt, ret);
+ return ret;
+}
+
+static cache_ent_t *__fcache_find(struct super_block *sb, u64 sec)
+{
+ s32 off;
+ cache_ent_t *bp, *hp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ off = (sec + (sec >> fsi->sect_per_clus_bits)) & (FAT_CACHE_HASH_SIZE - 1);
+ hp = &(fsi->fcache.hash_list[off]);
+ for (bp = hp->hash.next; bp != hp; bp = bp->hash.next) {
+ if (bp->sec == sec) {
+ /*
+ * patch 1.2.4 : for debugging
+ */
+ WARN(!bp->bh, "[SDFAT] fcache has no bh. "
+ "It will make system panic.\n");
+
+ touch_buffer(bp->bh);
+ return bp;
+ }
+ }
+ return NULL;
+}
+
+static cache_ent_t *__fcache_get(struct super_block *sb)
+{
+ cache_ent_t *bp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ bp = fsi->fcache.lru_list.prev;
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ while (bp->flag & DIRTYBIT) {
+ cache_ent_t *bp_prev = bp->prev;
+
+ bp = bp_prev;
+ if (bp == &fsi->fcache.lru_list) {
+ DMSG("BD: fat cache flooding\n");
+ fcache_flush(sb, 0); // flush all dirty FAT caches
+ bp = fsi->fcache.lru_list.prev;
+ }
+ }
+#endif
+// if (bp->flag & DIRTYBIT)
+// sync_dirty_buffer(bp->bh);
+
+ move_to_mru(bp, &fsi->fcache.lru_list);
+ return bp;
+}
+
+static void __fcache_insert_hash(struct super_block *sb, cache_ent_t *bp)
+{
+ s32 off;
+ cache_ent_t *hp;
+ FS_INFO_T *fsi;
+
+ fsi = &(SDFAT_SB(sb)->fsi);
+ off = (bp->sec + (bp->sec >> fsi->sect_per_clus_bits)) & (FAT_CACHE_HASH_SIZE-1);
+
+ hp = &(fsi->fcache.hash_list[off]);
+ bp->hash.next = hp->hash.next;
+ bp->hash.prev = hp;
+ hp->hash.next->hash.prev = bp;
+ hp->hash.next = bp;
+}
+
+
+static void __fcache_remove_hash(cache_ent_t *bp)
+{
+#ifdef DEBUG_HASH_LIST
+ if ((bp->hash.next == (cache_ent_t *)DEBUG_HASH_NEXT) ||
+ (bp->hash.prev == (cache_ent_t *)DEBUG_HASH_PREV)) {
+ EMSG("%s: FATAL: tried to remove already-removed-cache-entry"
+ "(bp:%p)\n", __func__, bp);
+ return;
+ }
+#endif
+ WARN_ON(bp->flag & DIRTYBIT);
+ __remove_from_hash(bp);
+}
+
+/*======================================================================*/
+/* Buffer Read/Write Functions */
+/*======================================================================*/
+/* Read-ahead a cluster */
+s32 dcache_readahead(struct super_block *sb, u64 sec)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ struct buffer_head *bh;
+ u32 max_ra_count = DCACHE_MAX_RA_SIZE >> sb->s_blocksize_bits;
+ u32 page_ra_count = PAGE_SIZE >> sb->s_blocksize_bits;
+ u32 adj_ra_count = max(fsi->sect_per_clus, page_ra_count);
+ u32 ra_count = min(adj_ra_count, max_ra_count);
+
+ /* Read-ahead is not required */
+ if (fsi->sect_per_clus == 1)
+ return 0;
+
+ if (sec < fsi->data_start_sector) {
+ EMSG("BD: %s: requested sector is invalid(sect:%llu, root:%llu)\n",
+ __func__, sec, fsi->data_start_sector);
+ return -EIO;
+ }
+
+ /* Not sector aligned with ra_count, resize ra_count to page size */
+ if ((sec - fsi->data_start_sector) & (ra_count - 1))
+ ra_count = page_ra_count;
+
+ bh = sb_find_get_block(sb, sec);
+ if (!bh || !buffer_uptodate(bh))
+ bdev_readahead(sb, sec, (u64)ra_count);
+
+ brelse(bh);
+
+ return 0;
+}
+
+/*
+ * returns 1, if bp is flushed
+ * returns 0, if bp is not dirty
+ * returns -1, if error occurs
+ */
+static s32 __dcache_ent_flush(struct super_block *sb, cache_ent_t *bp, u32 sync)
+{
+ if (!(bp->flag & DIRTYBIT))
+ return 0;
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ // Make buffer dirty (XXX: Naive impl.)
+ if (write_sect(sb, bp->sec, bp->bh, 0))
+ return -EIO;
+#endif
+ bp->flag &= ~(DIRTYBIT);
+
+ if (sync)
+ sync_dirty_buffer(bp->bh);
+
+ return 1;
+}
+
+static s32 __dcache_ent_discard(struct super_block *sb, cache_ent_t *bp)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ MMSG("%s : bp[%p] (sec:%016llx flag:%08x bh:%p) list(prev:%p next:%p) "
+ "hash(prev:%p next:%p)\n", __func__,
+ bp, bp->sec, bp->flag, bp->bh, bp->prev, bp->next,
+ bp->hash.prev, bp->hash.next);
+
+ __dcache_remove_hash(bp);
+ bp->sec = ~0;
+ bp->flag = 0;
+
+ if (bp->bh) {
+ __brelse(bp->bh);
+ bp->bh = NULL;
+ }
+
+ move_to_lru(bp, &fsi->dcache.lru_list);
+ return 0;
+}
+
+u8 *dcache_getblk(struct super_block *sb, u64 sec)
+{
+ cache_ent_t *bp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ bp = __dcache_find(sb, sec);
+ if (bp) {
+ if (bdev_check_bdi_valid(sb)) {
+ MMSG("%s: found cache(%p, sect:%llu). But invalid BDI\n"
+ , __func__, bp, sec);
+ __dcache_ent_flush(sb, bp, 0);
+ __dcache_ent_discard(sb, bp);
+ return NULL;
+ }
+
+ if (!(bp->flag & KEEPBIT)) // already in keep list
+ move_to_mru(bp, &fsi->dcache.lru_list);
+
+ return bp->bh->b_data;
+ }
+
+ bp = __dcache_get(sb);
+
+ if (!__check_hash_valid(bp))
+ __dcache_remove_hash(bp);
+
+ bp->sec = sec;
+ bp->flag = 0;
+ __dcache_insert_hash(sb, bp);
+
+ if (read_sect(sb, sec, &(bp->bh), 1)) {
+ __dcache_ent_discard(sb, bp);
+ return NULL;
+ }
+
+ return bp->bh->b_data;
+
+}
+
+s32 dcache_modify(struct super_block *sb, u64 sec)
+{
+ s32 ret = -EIO;
+ cache_ent_t *bp;
+
+ set_sb_dirty(sb);
+
+ bp = __dcache_find(sb, sec);
+ if (unlikely(!bp)) {
+ sdfat_fs_error(sb, "Can`t find dcache (sec 0x%016llx)", sec);
+ return -EIO;
+ }
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ if (SDFAT_SB(sb)->fsi.vol_type != EXFAT) {
+ bp->flag |= DIRTYBIT;
+ return 0;
+ }
+#endif
+ ret = write_sect(sb, sec, bp->bh, 0);
+
+ if (ret) {
+ DMSG("%s : failed to modify buffer(err:%d, sec:%llu, bp:0x%p)\n",
+ __func__, ret, sec, bp);
+ }
+
+ return ret;
+}
+
+s32 dcache_lock(struct super_block *sb, u64 sec)
+{
+ cache_ent_t *bp;
+
+ bp = __dcache_find(sb, sec);
+ if (likely(bp)) {
+ bp->flag |= LOCKBIT;
+ return 0;
+ }
+
+ EMSG("%s : failed to lock buffer(sec:%llu, bp:0x%p)\n", __func__, sec, bp);
+ return -EIO;
+}
+
+s32 dcache_unlock(struct super_block *sb, u64 sec)
+{
+ cache_ent_t *bp;
+
+ bp = __dcache_find(sb, sec);
+ if (likely(bp)) {
+ bp->flag &= ~(LOCKBIT);
+ return 0;
+ }
+
+ EMSG("%s : failed to unlock buffer (sec:%llu, bp:0x%p)\n", __func__, sec, bp);
+ return -EIO;
+}
+
+s32 dcache_release(struct super_block *sb, u64 sec)
+{
+ cache_ent_t *bp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ bp = __dcache_find(sb, sec);
+ if (unlikely(!bp))
+ return -ENOENT;
+
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ if (bp->flag & DIRTYBIT) {
+ if (write_sect(sb, bp->sec, bp->bh, 0))
+ return -EIO;
+ }
+#endif
+ bp->sec = ~0;
+ bp->flag = 0;
+
+ if (bp->bh) {
+ __brelse(bp->bh);
+ bp->bh = NULL;
+ }
+
+ move_to_lru(bp, &fsi->dcache.lru_list);
+ return 0;
+}
+
+s32 dcache_release_all(struct super_block *sb)
+{
+ s32 ret = 0;
+ cache_ent_t *bp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ s32 dirtycnt = 0;
+#endif
+
+ /* Connect list elements:
+ * LRU list : (A - B - ... - bp_front) + (bp_first + ... + bp_last)
+ */
+ while (fsi->dcache.keep_list.prev != &fsi->dcache.keep_list) {
+ cache_ent_t *bp_keep = fsi->dcache.keep_list.prev;
+ // bp_keep->flag &= ~(KEEPBIT); // Will be 0-ed later
+ move_to_mru(bp_keep, &fsi->dcache.lru_list);
+ }
+
+ bp = fsi->dcache.lru_list.next;
+ while (bp != &fsi->dcache.lru_list) {
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ if (bp->flag & DIRTYBIT) {
+ dirtycnt++;
+ if (write_sect(sb, bp->sec, bp->bh, 0))
+ ret = -EIO;
+ }
+#endif
+ bp->sec = ~0;
+ bp->flag = 0;
+
+ if (bp->bh) {
+ __brelse(bp->bh);
+ bp->bh = NULL;
+ }
+ bp = bp->next;
+ }
+
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ DMSG("BD:Release / dirty buf cache: %d (err:%d)", dirtycnt, ret);
+#endif
+ return ret;
+}
+
+
+s32 dcache_flush(struct super_block *sb, u32 sync)
+{
+ s32 ret = 0;
+ cache_ent_t *bp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ s32 dirtycnt = 0;
+ s32 keepcnt = 0;
+
+ /* Connect list elements:
+ * LRU list : (A - B - ... - bp_front) + (bp_first + ... + bp_last)
+ */
+ while (fsi->dcache.keep_list.prev != &fsi->dcache.keep_list) {
+ cache_ent_t *bp_keep = fsi->dcache.keep_list.prev;
+
+ bp_keep->flag &= ~(KEEPBIT); // Will be 0-ed later
+ move_to_mru(bp_keep, &fsi->dcache.lru_list);
+ keepcnt++;
+ }
+
+ bp = fsi->dcache.lru_list.next;
+ while (bp != &fsi->dcache.lru_list) {
+ if (bp->flag & DIRTYBIT) {
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ // Make buffer dirty (XXX: Naive impl.)
+ if (write_sect(sb, bp->sec, bp->bh, 0)) {
+ ret = -EIO;
+ break;
+ }
+
+#endif
+ bp->flag &= ~(DIRTYBIT);
+ dirtycnt++;
+
+ if (sync != 0)
+ sync_dirty_buffer(bp->bh);
+ }
+ bp = bp->next;
+ }
+
+ MMSG("BD: flush / dirty dentry cache: %d (%d from keeplist, err:%d)\n",
+ dirtycnt, keepcnt, ret);
+ return ret;
+}
+
+static cache_ent_t *__dcache_find(struct super_block *sb, u64 sec)
+{
+ s32 off;
+ cache_ent_t *bp, *hp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ off = (sec + (sec >> fsi->sect_per_clus_bits)) & (BUF_CACHE_HASH_SIZE - 1);
+
+ hp = &(fsi->dcache.hash_list[off]);
+ for (bp = hp->hash.next; bp != hp; bp = bp->hash.next) {
+ if (bp->sec == sec) {
+ touch_buffer(bp->bh);
+ return bp;
+ }
+ }
+ return NULL;
+}
+
+static cache_ent_t *__dcache_get(struct super_block *sb)
+{
+ cache_ent_t *bp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ bp = fsi->dcache.lru_list.prev;
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ while (bp->flag & (DIRTYBIT | LOCKBIT)) {
+ cache_ent_t *bp_prev = bp->prev; // hold prev
+
+ if (bp->flag & DIRTYBIT) {
+ MMSG("BD: Buf cache => Keep list\n");
+ bp->flag |= KEEPBIT;
+ move_to_mru(bp, &fsi->dcache.keep_list);
+ }
+ bp = bp_prev;
+
+ /* If all dcaches are dirty */
+ if (bp == &fsi->dcache.lru_list) {
+ DMSG("BD: buf cache flooding\n");
+ dcache_flush(sb, 0);
+ bp = fsi->dcache.lru_list.prev;
+ }
+ }
+#else
+ while (bp->flag & LOCKBIT)
+ bp = bp->prev;
+#endif
+// if (bp->flag & DIRTYBIT)
+// sync_dirty_buffer(bp->bh);
+
+ move_to_mru(bp, &fsi->dcache.lru_list);
+ return bp;
+}
+
+static void __dcache_insert_hash(struct super_block *sb, cache_ent_t *bp)
+{
+ s32 off;
+ cache_ent_t *hp;
+ FS_INFO_T *fsi;
+
+ fsi = &(SDFAT_SB(sb)->fsi);
+ off = (bp->sec + (bp->sec >> fsi->sect_per_clus_bits)) & (BUF_CACHE_HASH_SIZE-1);
+
+ hp = &(fsi->dcache.hash_list[off]);
+ bp->hash.next = hp->hash.next;
+ bp->hash.prev = hp;
+ hp->hash.next->hash.prev = bp;
+ hp->hash.next = bp;
+}
+
+static void __dcache_remove_hash(cache_ent_t *bp)
+{
+#ifdef DEBUG_HASH_LIST
+ if ((bp->hash.next == (cache_ent_t *)DEBUG_HASH_NEXT) ||
+ (bp->hash.prev == (cache_ent_t *)DEBUG_HASH_PREV)) {
+ EMSG("%s: FATAL: tried to remove already-removed-cache-entry"
+ "(bp:%p)\n", __func__, bp);
+ return;
+ }
+#endif
+ WARN_ON(bp->flag & DIRTYBIT);
+ __remove_from_hash(bp);
+}
+
+
+/* end of cache.c */
diff --git a/fs/sdfat/config.h b/fs/sdfat/config.h
new file mode 100644
index 000000000000..6e2a4e80932c
--- /dev/null
+++ b/fs/sdfat/config.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SDFAT_CONFIG_H
+#define _SDFAT_CONFIG_H
+/*======================================================================*/
+/* */
+/* FFS CONFIGURATIONS */
+/* (CHANGE THIS PART IF REQUIRED) */
+/* */
+/*======================================================================*/
+
+/*----------------------------------------------------------------------*/
+/* Feature Config */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Debug/Experimental Config */
+/*----------------------------------------------------------------------*/
+//#define CONFIG_SDFAT_TRACE_IO
+//#define CONFIG_SDFAT_TRACE_LOCK /* Trace elapsed time in lock_super(sb) */
+
+/*----------------------------------------------------------------------*/
+/* Defragmentation Config */
+/*----------------------------------------------------------------------*/
+//#define CONFIG_SDFAT_DFR
+//#define CONFIG_SDFAT_DFR_PACKING
+//#define CONFIG_SDFAT_DFR_DEBUG
+
+/*----------------------------------------------------------------------*/
+/* Config for Kernel equal or newer than 3.7 */
+/*----------------------------------------------------------------------*/
+#ifndef CONFIG_SDFAT_WRITE_SB_INTERVAL_CSECS
+#define CONFIG_SDFAT_WRITE_SB_INTERVAL_CSECS (dirty_writeback_interval)
+#endif
+
+/*----------------------------------------------------------------------*/
+/* Default Kconfig */
+/*----------------------------------------------------------------------*/
+/* default mount options */
+#ifndef CONFIG_SDFAT_DEFAULT_CODEPAGE /* if Kconfig lacked codepage */
+#define CONFIG_SDFAT_DEFAULT_CODEPAGE 437
+#endif
+
+#ifndef CONFIG_SDFAT_DEFAULT_IOCHARSET /* if Kconfig lacked iocharset */
+#define CONFIG_SDFAT_DEFAULT_IOCHARSET "utf8"
+#endif
+
+#ifndef CONFIG_SDFAT_FAT32_SHORTNAME_SEQ /* Shortname ~1, ... ~9 have higher
+ * priority (WIN32/VFAT-like)
+ */
+//#define CONFIG_SDFAT_FAT32_SHORTNAME_SEQ
+#endif
+
+#ifndef CONFIG_SDFAT_ALIGNED_MPAGE_WRITE
+//#define CONFIG_SDFAT_ALIGNED_MPAGE_WRITE
+#endif
+
+#ifndef CONFIG_SDFAT_FAT_MIRRORING /* if Kconfig lacked fat-mirroring option */
+#define CONFIG_SDFAT_FAT_MIRRORING /* Write FAT 1, FAT 2 simultaneously */
+#endif
+
+#ifndef CONFIG_SDFAT_DELAYED_META_DIRTY
+//#define CONFIG_SDFAT_DELAYED_META_DIRTY /* delayed DIR/FAT dirty support */
+#endif
+
+#ifndef CONFIG_SDFAT_SUPPORT_DIR_SYNC
+//#define CONFIG_SDFAT_SUPPORT_DIR_SYNC /* support DIR_SYNC */
+#endif
+
+#ifndef CONFIG_SDFAT_CHECK_RO_ATTR
+//#define CONFIG_SDFAT_CHECK_RO_ATTR
+#endif
+
+#ifndef CONFIG_SDFAT_RESTRICT_EXT_ONLY_SFN
+#define CONFIG_SDFAT_RESTRICT_EXT_ONLY_SFN
+#endif
+
+#ifndef CONFIG_SDFAT_ALLOW_LOOKUP_LOSSY_SFN
+//#define CONFIG_SDFAT_ALLOW_LOOKUP_LOSSY_SFN
+#endif
+
+#ifndef CONFIG_SDFAT_DBG_SHOW_PID
+//#define CONFIG_SDFAT_DBG_SHOW_PID
+#endif
+
+#ifndef CONFIG_SDFAT_VIRTUAL_XATTR
+//#define CONFIG_SDFAT_VIRTUAL_XATTR
+#endif
+
+#ifndef CONFIG_SDFAT_SUPPORT_STLOG
+//#define CONFIG_SDFAT_SUPPORT_STLOG
+#endif
+
+#ifndef CONFIG_SDFAT_DEBUG
+//{
+//#define CONFIG_SDFAT_DEBUG
+
+#ifndef CONFIG_SDFAT_DBG_IOCTL
+//#define CONFIG_SDFAT_DBG_IOCTL
+#endif
+
+#ifndef CONFIG_SDFAT_DBG_MSG
+//#define CONFIG_SDFAT_DBG_MSG
+#endif
+
+#ifndef CONFIG_SDFAT_DBG_CAREFUL
+//#define CONFIG_SDFAT_DBG_CAREFUL
+#endif
+
+#ifndef CONFIG_SDFAT_DBG_BUGON
+//#define CONFIG_SDFAT_DBG_BUGON
+#endif
+
+#ifndef CONFIG_SDFAT_DBG_WARNON
+//#define CONFIG_SDFAT_DBG_WARNON
+#endif
+//}
+#endif /* CONFIG_SDFAT_DEBUG */
+
+
+#ifndef CONFIG_SDFAT_TRACE_SB_LOCK
+//#define CONFIG_SDFAT_TRACE_SB_LOCK
+#endif
+
+#ifndef CONFIG_SDFAT_TRACE_ELAPSED_TIME
+//#define CONFIG_SDFAT_TRACE_ELAPSED_TIME
+#endif
+
+#endif /* _SDFAT_CONFIG_H */
+
+/* end of config.h */
diff --git a/fs/sdfat/core.c b/fs/sdfat/core.c
new file mode 100644
index 000000000000..3a5af0b83d59
--- /dev/null
+++ b/fs/sdfat/core.c
@@ -0,0 +1,3694 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : core.c */
+/* PURPOSE : FAT & exFAT common core code for sdFAT */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/version.h>
+#include <linux/blkdev.h>
+#include <linux/workqueue.h>
+#include <linux/writeback.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0)
+#include <linux/iversion.h>
+#endif
+
+#include "sdfat.h"
+#include "core.h"
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+
+/*************************************************************************
+ * FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
+ *************************************************************************/
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0)
+static inline u64 inode_peek_iversion(struct inode *inode)
+{
+ return inode->i_version;
+}
+#endif
+
+
+/*----------------------------------------------------------------------*/
+/* Constant & Macro Definitions */
+/*----------------------------------------------------------------------*/
+static inline void __set_sb_dirty(struct super_block *sb)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+ sb->s_dirt = 1;
+#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ sbi->s_dirt = 1;
+ /* Insert work */
+ spin_lock(&sbi->work_lock);
+ if (!sbi->write_super_queued) {
+ unsigned long delay;
+
+ delay = msecs_to_jiffies(CONFIG_SDFAT_WRITE_SB_INTERVAL_CSECS * 10);
+ queue_delayed_work(system_long_wq, &sbi->write_super_work, delay);
+ sbi->write_super_queued = 1;
+ }
+ spin_unlock(&sbi->work_lock);
+#endif
+}
+
+void set_sb_dirty(struct super_block *sb)
+{
+ __set_sb_dirty(sb);
+ // XXX: to be removed later, prints too much output
+ //TMSG("%s finished.\n", __func__);
+}
+
+/*----------------------------------------------------------------------*/
+/* Global Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Local Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+static s8 *reserved_names[] = {
+ "AUX ", "CON ", "NUL ", "PRN ",
+ "COM1 ", "COM2 ", "COM3 ", "COM4 ",
+ "COM5 ", "COM6 ", "COM7 ", "COM8 ", "COM9 ",
+ "LPT1 ", "LPT2 ", "LPT3 ", "LPT4 ",
+ "LPT5 ", "LPT6 ", "LPT7 ", "LPT8 ", "LPT9 ",
+ NULL
+};
+
+/*======================================================================*/
+/* Local Function Definitions */
+/*======================================================================*/
+
+/*
+ * File System Management Functions
+ */
+
+static s32 check_type_size(void)
+{
+ /* critical check for system requirement on size of DENTRY_T structure */
+ if (sizeof(DENTRY_T) != DENTRY_SIZE)
+ return -EINVAL;
+
+ if (sizeof(DOS_DENTRY_T) != DENTRY_SIZE)
+ return -EINVAL;
+
+ if (sizeof(EXT_DENTRY_T) != DENTRY_SIZE)
+ return -EINVAL;
+
+ if (sizeof(FILE_DENTRY_T) != DENTRY_SIZE)
+ return -EINVAL;
+
+ if (sizeof(STRM_DENTRY_T) != DENTRY_SIZE)
+ return -EINVAL;
+
+ if (sizeof(NAME_DENTRY_T) != DENTRY_SIZE)
+ return -EINVAL;
+
+ if (sizeof(BMAP_DENTRY_T) != DENTRY_SIZE)
+ return -EINVAL;
+
+ if (sizeof(CASE_DENTRY_T) != DENTRY_SIZE)
+ return -EINVAL;
+
+ if (sizeof(VOLM_DENTRY_T) != DENTRY_SIZE)
+ return -EINVAL;
+
+ return 0;
+}
+
+static s32 __fs_set_vol_flags(struct super_block *sb, u16 new_flag, s32 always_sync)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ s32 err;
+ s32 sync = 0;
+
+ /* flags are not changed */
+ if (fsi->vol_flag == new_flag)
+ return 0;
+
+ fsi->vol_flag = new_flag;
+
+ /* skip updating volume dirty flag,
+ * if this volume has been mounted with read-only
+ */
+ if (sb->s_flags & MS_RDONLY)
+ return 0;
+
+ if (!fsi->pbr_bh) {
+ err = read_sect(sb, 0, &(fsi->pbr_bh), 1);
+ if (err) {
+ EMSG("%s : failed to read boot sector\n", __func__);
+ return err;
+ }
+ }
+
+ if (fsi->vol_type == EXFAT) {
+ pbr64_t *bpb = (pbr64_t *)fsi->pbr_bh->b_data;
+ bpb->bsx.vol_flags = cpu_to_le16(new_flag);
+ } else if (fsi->vol_type == FAT32) {
+ pbr32_t *bpb = (pbr32_t *)fsi->pbr_bh->b_data;
+ bpb->bsx.state = new_flag & VOL_DIRTY ? FAT_VOL_DIRTY : 0x00;
+ } else { /* FAT16/12 */
+ pbr16_t *bpb = (pbr16_t *) fsi->pbr_bh->b_data;
+ bpb->bpb.state = new_flag & VOL_DIRTY ? FAT_VOL_DIRTY : 0x00;
+ }
+
+ if (always_sync)
+ sync = 1;
+ else if ((new_flag == VOL_DIRTY) && (!buffer_dirty(fsi->pbr_bh)))
+ sync = 1;
+ else
+ sync = 0;
+
+ err = write_sect(sb, 0, fsi->pbr_bh, sync);
+ if (err)
+ EMSG("%s : failed to modify volume flag\n", __func__);
+
+ return err;
+}
+
+static s32 fs_set_vol_flags(struct super_block *sb, u16 new_flag)
+{
+ return __fs_set_vol_flags(sb, new_flag, 0);
+}
+
+s32 fscore_set_vol_flags(struct super_block *sb, u16 new_flag, s32 always_sync)
+{
+ return __fs_set_vol_flags(sb, new_flag, always_sync);
+}
+
+static inline s32 __fs_meta_sync(struct super_block *sb, s32 do_sync)
+{
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (fsi->vol_type != EXFAT) {
+ MMSG("meta flush in fs_sync(sync=%d)\n", do_sync);
+ fcache_flush(sb, 0);
+ dcache_flush(sb, 0);
+ }
+#else
+ /* DO NOTHING */
+#endif
+ return 0;
+}
+
+static s32 fs_sync(struct super_block *sb, s32 do_sync)
+{
+ s32 err;
+
+ if (!do_sync)
+ return 0;
+
+ err = __fs_meta_sync(sb, do_sync);
+
+ if (!err)
+ err = bdev_sync_all(sb);
+
+ if (err)
+ EMSG("%s : failed to sync. (err:%d)\n", __func__, err);
+
+ return err;
+}
+
+/*
+ * Cluster Management Functions
+ */
+
+static s32 __clear_cluster(struct inode *inode, u32 clu)
+{
+ u64 s, n;
+ struct super_block *sb = inode->i_sb;
+ u32 sect_size = (u32)sb->s_blocksize;
+ s32 ret = 0;
+ struct buffer_head *tmp_bh = NULL;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (IS_CLUS_FREE(clu)) { /* FAT16 root_dir */
+ s = fsi->root_start_sector;
+ n = fsi->data_start_sector;
+ } else {
+ s = CLUS_TO_SECT(fsi, clu);
+ n = s + fsi->sect_per_clus;
+ }
+
+ if (IS_DIRSYNC(inode)) {
+ ret = write_msect_zero(sb, s, (u64)fsi->sect_per_clus);
+ if (ret != -EAGAIN)
+ return ret;
+ }
+
+ /* Trying buffered zero writes
+ * if it doesn't have DIRSYNC or write_msect_zero() returned -EAGAIN
+ */
+ for ( ; s < n; s++) {
+#if 0
+ dcache_release(sb, s);
+#endif
+ ret = read_sect(sb, s, &tmp_bh, 0);
+ if (ret)
+ goto out;
+
+ memset((u8 *)tmp_bh->b_data, 0x0, sect_size);
+ ret = write_sect(sb, s, tmp_bh, 0);
+ if (ret)
+ goto out;
+ }
+out:
+ brelse(tmp_bh);
+ return ret;
+} /* end of __clear_cluster */
+
+static s32 __find_last_cluster(struct super_block *sb, CHAIN_T *p_chain, u32 *ret_clu)
+{
+ u32 clu, next;
+ u32 count = 0;
+
+ next = p_chain->dir;
+ if (p_chain->flags == 0x03) {
+ *ret_clu = next + p_chain->size - 1;
+ return 0;
+ }
+
+ do {
+ count++;
+ clu = next;
+ if (fat_ent_get_safe(sb, clu, &next))
+ return -EIO;
+ } while (!IS_CLUS_EOF(next));
+
+ if (p_chain->size != count) {
+ sdfat_fs_error(sb, "bogus directory size "
+ "(clus : ondisk(%d) != counted(%d))",
+ p_chain->size, count);
+ sdfat_debug_bug_on(1);
+ return -EIO;
+ }
+
+ *ret_clu = clu;
+ return 0;
+}
+
+
+static s32 __count_num_clusters(struct super_block *sb, CHAIN_T *p_chain, u32 *ret_count)
+{
+ u32 i, count;
+ u32 clu;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (!p_chain->dir || IS_CLUS_EOF(p_chain->dir)) {
+ *ret_count = 0;
+ return 0;
+ }
+
+ if (p_chain->flags == 0x03) {
+ *ret_count = p_chain->size;
+ return 0;
+ }
+
+ clu = p_chain->dir;
+ count = 0;
+ for (i = CLUS_BASE; i < fsi->num_clusters; i++) {
+ count++;
+ if (fat_ent_get_safe(sb, clu, &clu))
+ return -EIO;
+ if (IS_CLUS_EOF(clu))
+ break;
+ }
+
+ *ret_count = count;
+ return 0;
+}
+
+/*
+ * Upcase table Management Functions
+ */
+static void free_upcase_table(struct super_block *sb)
+{
+ u32 i;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u16 **upcase_table;
+
+ upcase_table = fsi->vol_utbl;
+ for (i = 0 ; i < UTBL_COL_COUNT ; i++) {
+ /* kfree(NULL) is safe */
+ kfree(upcase_table[i]);
+ upcase_table[i] = NULL;
+ }
+
+ /* kfree(NULL) is safe */
+ kfree(fsi->vol_utbl);
+ fsi->vol_utbl = NULL;
+}
+
+static s32 __load_upcase_table(struct super_block *sb, u64 sector, u64 num_sectors, u32 utbl_checksum)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ struct buffer_head *tmp_bh = NULL;
+ u32 sect_size = (u32)sb->s_blocksize;
+ s32 ret = -EIO;
+ u32 i, j;
+
+ u8 skip = false;
+ u32 index = 0;
+ u32 checksum = 0;
+ u16 **upcase_table = kzalloc((UTBL_COL_COUNT * sizeof(u16 *)), GFP_KERNEL);
+
+ if (!upcase_table)
+ return -ENOMEM;
+ /* thanks for kzalloc
+ * memset(upcase_table, 0, UTBL_COL_COUNT * sizeof(u16 *));
+ */
+
+ fsi->vol_utbl = upcase_table;
+ num_sectors += sector;
+
+ while (sector < num_sectors) {
+ ret = read_sect(sb, sector, &tmp_bh, 1);
+ if (ret) {
+ EMSG("%s: failed to read sector(0x%llx)\n",
+ __func__, sector);
+ goto error;
+ }
+ sector++;
+
+ for (i = 0; i < sect_size && index <= 0xFFFF; i += 2) {
+ /* FIXME : is __le16 ok? */
+ //u16 uni = le16_to_cpu(((__le16*)(tmp_bh->b_data))[i]);
+ u16 uni = get_unaligned_le16((u8 *)tmp_bh->b_data+i);
+
+ checksum = ((checksum & 1) ? 0x80000000 : 0) +
+ (checksum >> 1) + *(((u8 *)tmp_bh->b_data)+i);
+ checksum = ((checksum & 1) ? 0x80000000 : 0) +
+ (checksum >> 1) + *(((u8 *)tmp_bh->b_data)+(i+1));
+
+ if (skip) {
+ MMSG("skip from 0x%X to 0x%X(amount of 0x%X)\n",
+ index, index+uni, uni);
+ index += uni;
+ skip = false;
+ } else if (uni == index) {
+ index++;
+ } else if (uni == 0xFFFF) {
+ skip = true;
+ } else { /* uni != index , uni != 0xFFFF */
+ u16 col_index = get_col_index(index);
+
+ if (!upcase_table[col_index]) {
+ upcase_table[col_index] =
+ kmalloc((UTBL_ROW_COUNT * sizeof(u16)), GFP_KERNEL);
+ if (!upcase_table[col_index]) {
+ EMSG("failed to allocate memory"
+ " for column 0x%X\n",
+ col_index);
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ for (j = 0; j < UTBL_ROW_COUNT; j++)
+ upcase_table[col_index][j] = (col_index << LOW_INDEX_BIT) | j;
+ }
+
+ upcase_table[col_index][get_row_index(index)] = uni;
+ index++;
+ }
+ }
+ }
+ if (index >= 0xFFFF && utbl_checksum == checksum) {
+ DMSG("%s: load upcase table successfully"
+ "(idx:0x%08x, utbl_chksum:0x%08x)\n",
+ __func__, index, utbl_checksum);
+ if (tmp_bh)
+ brelse(tmp_bh);
+ return 0;
+ }
+
+ EMSG("%s: failed to load upcase table"
+ "(idx:0x%08x, chksum:0x%08x, utbl_chksum:0x%08x)\n",
+ __func__, index, checksum, utbl_checksum);
+
+ ret = -EINVAL;
+error:
+ if (tmp_bh)
+ brelse(tmp_bh);
+ free_upcase_table(sb);
+ return ret;
+}
+
+static s32 __load_default_upcase_table(struct super_block *sb)
+{
+ s32 i, ret = -EIO;
+ u32 j;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ u8 skip = false;
+ u32 index = 0;
+ u16 uni = 0;
+ u16 **upcase_table;
+
+ upcase_table = kmalloc((UTBL_COL_COUNT * sizeof(u16 *)), GFP_KERNEL);
+ if (!upcase_table)
+ return -ENOMEM;
+
+ fsi->vol_utbl = upcase_table;
+ memset(upcase_table, 0, UTBL_COL_COUNT * sizeof(u16 *));
+
+ for (i = 0; index <= 0xFFFF && i < SDFAT_NUM_UPCASE*2; i += 2) {
+ /* FIXME : is __le16 ok? */
+ //uni = le16_to_cpu(((__le16*)uni_def_upcase)[i>>1]);
+ uni = get_unaligned_le16((u8 *)uni_def_upcase+i);
+ if (skip) {
+ MMSG("skip from 0x%x ", index);
+ index += uni;
+ MMSG("to 0x%x (amount of 0x%x)\n", index, uni);
+ skip = false;
+ } else if (uni == index) {
+ index++;
+ } else if (uni == 0xFFFF) {
+ skip = true;
+ } else { /* uni != index , uni != 0xFFFF */
+ u16 col_index = get_col_index(index);
+
+ if (!upcase_table[col_index]) {
+ upcase_table[col_index] = kmalloc((UTBL_ROW_COUNT * sizeof(u16)), GFP_KERNEL);
+ if (!upcase_table[col_index]) {
+ EMSG("failed to allocate memory for "
+ "new column 0x%x\n", col_index);
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ for (j = 0; j < UTBL_ROW_COUNT; j++)
+ upcase_table[col_index][j] = (col_index << LOW_INDEX_BIT) | j;
+ }
+
+ upcase_table[col_index][get_row_index(index)] = uni;
+ index++;
+ }
+ }
+
+ if (index >= 0xFFFF)
+ return 0;
+
+error:
+ /* FATAL error: default upcase table has error */
+ free_upcase_table(sb);
+ return ret;
+}
+
+static s32 load_upcase_table(struct super_block *sb)
+{
+ s32 i, ret;
+ u32 tbl_clu, type;
+ u64 sector, tbl_size, num_sectors;
+ u8 blksize_bits = sb->s_blocksize_bits;
+ CHAIN_T clu;
+ CASE_DENTRY_T *ep;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ clu.dir = fsi->root_dir;
+ clu.flags = 0x01;
+
+ if (fsi->vol_type != EXFAT)
+ goto load_default;
+
+ while (!IS_CLUS_EOF(clu.dir)) {
+ for (i = 0; i < fsi->dentries_per_clu; i++) {
+ ep = (CASE_DENTRY_T *) get_dentry_in_dir(sb, &clu, i, NULL);
+ if (!ep)
+ return -EIO;
+
+ type = fsi->fs_func->get_entry_type((DENTRY_T *) ep);
+
+ if (type == TYPE_UNUSED)
+ break;
+ if (type != TYPE_UPCASE)
+ continue;
+
+ tbl_clu = le32_to_cpu(ep->start_clu);
+ tbl_size = le64_to_cpu(ep->size);
+
+ sector = CLUS_TO_SECT(fsi, tbl_clu);
+ num_sectors = ((tbl_size - 1) >> blksize_bits) + 1;
+ ret = __load_upcase_table(sb, sector, num_sectors,
+ le32_to_cpu(ep->checksum));
+
+ if (ret && (ret != -EIO))
+ goto load_default;
+
+ /* load successfully */
+ return ret;
+ }
+
+ if (get_next_clus_safe(sb, &(clu.dir)))
+ return -EIO;
+ }
+
+load_default:
+ sdfat_log_msg(sb, KERN_INFO, "trying to load default upcase table");
+ /* load default upcase table */
+ return __load_default_upcase_table(sb);
+} /* end of load_upcase_table */
+
+
+/*
+ * Directory Entry Management Functions
+ */
+s32 walk_fat_chain(struct super_block *sb, CHAIN_T *p_dir, u32 byte_offset, u32 *clu)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u32 clu_offset;
+ u32 cur_clu;
+
+ clu_offset = byte_offset >> fsi->cluster_size_bits;
+ cur_clu = p_dir->dir;
+
+ if (p_dir->flags == 0x03) {
+ cur_clu += clu_offset;
+ } else {
+ while (clu_offset > 0) {
+ if (get_next_clus_safe(sb, &cur_clu))
+ return -EIO;
+ if (IS_CLUS_EOF(cur_clu)) {
+ sdfat_fs_error(sb, "invalid dentry access "
+ "beyond EOF (clu : %u, eidx : %d)",
+ p_dir->dir,
+ byte_offset >> DENTRY_SIZE_BITS);
+ return -EIO;
+ }
+ clu_offset--;
+ }
+ }
+
+ if (clu)
+ *clu = cur_clu;
+ return 0;
+}
+
+static s32 find_location(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u64 *sector, s32 *offset)
+{
+ s32 ret;
+ u32 off, clu = 0;
+ u32 blksize_mask = (u32)(sb->s_blocksize-1);
+ u8 blksize_bits = sb->s_blocksize_bits;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ off = entry << DENTRY_SIZE_BITS;
+
+ /* FAT16 root_dir */
+ if (IS_CLUS_FREE(p_dir->dir)) {
+ *offset = off & blksize_mask;
+ *sector = off >> blksize_bits;
+ *sector += fsi->root_start_sector;
+ return 0;
+ }
+
+ ret = walk_fat_chain(sb, p_dir, off, &clu);
+ if (ret)
+ return ret;
+
+ /* byte offset in cluster */
+ off &= (fsi->cluster_size - 1);
+
+ /* byte offset in sector */
+ *offset = off & blksize_mask;
+
+ /* sector offset in cluster */
+ *sector = off >> blksize_bits;
+ *sector += CLUS_TO_SECT(fsi, clu);
+ return 0;
+} /* end of find_location */
+
+DENTRY_T *get_dentry_in_dir(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u64 *sector)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u32 dentries_per_page = PAGE_SIZE >> DENTRY_SIZE_BITS;
+ s32 off;
+ u64 sec;
+ u8 *buf;
+
+ if (p_dir->dir == DIR_DELETED) {
+ EMSG("%s : abnormal access to deleted dentry\n", __func__);
+ BUG_ON(!fsi->prev_eio);
+ return NULL;
+ }
+
+ if (find_location(sb, p_dir, entry, &sec, &off))
+ return NULL;
+
+ /* DIRECTORY READAHEAD :
+ * Try to read ahead per a page except root directory of fat12/16
+ */
+ if ((!IS_CLUS_FREE(p_dir->dir)) &&
+ !(entry & (dentries_per_page - 1)))
+ dcache_readahead(sb, sec);
+
+ buf = dcache_getblk(sb, sec);
+ if (!buf)
+ return NULL;
+
+ if (sector)
+ *sector = sec;
+ return (DENTRY_T *)(buf + off);
+} /* end of get_dentry_in_dir */
+
+/* used only in search empty_slot() */
+#define CNT_UNUSED_NOHIT (-1)
+#define CNT_UNUSED_HIT (-2)
+/* search EMPTY CONTINUOUS "num_entries" entries */
+static s32 search_empty_slot(struct super_block *sb, HINT_FEMP_T *hint_femp, CHAIN_T *p_dir, s32 num_entries)
+{
+ s32 i, dentry, num_empty = 0;
+ s32 dentries_per_clu;
+ u32 type;
+ CHAIN_T clu;
+ DENTRY_T *ep;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (IS_CLUS_FREE(p_dir->dir)) /* FAT16 root_dir */
+ dentries_per_clu = fsi->dentries_in_root;
+ else
+ dentries_per_clu = fsi->dentries_per_clu;
+
+ ASSERT(-1 <= hint_femp->eidx);
+
+ if (hint_femp->eidx != -1) {
+ clu.dir = hint_femp->cur.dir;
+ clu.size = hint_femp->cur.size;
+ clu.flags = hint_femp->cur.flags;
+
+ dentry = hint_femp->eidx;
+
+ if (num_entries <= hint_femp->count) {
+ MMSG("%s: empty slot(HIT) - found "
+ "(clu : 0x%08x eidx : %d)\n",
+ __func__, hint_femp->cur.dir, hint_femp->eidx);
+ hint_femp->eidx = -1;
+
+ if (fsi->vol_type == EXFAT)
+ return dentry;
+
+ return dentry + (num_entries - 1);
+ }
+ MMSG("%s: empty slot(HIT) - search from "
+ "(clu : 0x%08x eidx : %d)\n",
+ __func__, hint_femp->cur.dir, hint_femp->eidx);
+ } else {
+ MMSG("%s: empty slot(MISS) - search from "
+ "(clu:0x%08x eidx : 0)\n",
+ __func__, p_dir->dir);
+
+ clu.dir = p_dir->dir;
+ clu.size = p_dir->size;
+ clu.flags = p_dir->flags;
+
+ dentry = 0;
+ }
+
+ while (!IS_CLUS_EOF(clu.dir)) {
+ /* FAT16 root_dir */
+ if (IS_CLUS_FREE(p_dir->dir))
+ i = dentry % dentries_per_clu;
+ else
+ i = dentry & (dentries_per_clu-1);
+
+ for ( ; i < dentries_per_clu; i++, dentry++) {
+ ep = get_dentry_in_dir(sb, &clu, i, NULL);
+ if (!ep)
+ return -EIO;
+
+ type = fsi->fs_func->get_entry_type(ep);
+
+ if ((type == TYPE_UNUSED) || (type == TYPE_DELETED)) {
+ num_empty++;
+ if (hint_femp->eidx == -1) {
+ hint_femp->eidx = dentry;
+ hint_femp->count = CNT_UNUSED_NOHIT;
+
+ hint_femp->cur.dir = clu.dir;
+ hint_femp->cur.size = clu.size;
+ hint_femp->cur.flags = clu.flags;
+ }
+
+ if ((type == TYPE_UNUSED) &&
+ (hint_femp->count != CNT_UNUSED_HIT)) {
+ hint_femp->count = CNT_UNUSED_HIT;
+ }
+ } else {
+ if ((hint_femp->eidx != -1) &&
+ (hint_femp->count == CNT_UNUSED_HIT)) {
+ /* unused empty group means
+ * an empty group which includes
+ * unused dentry
+ */
+ sdfat_fs_error(sb,
+ "found bogus dentry(%d) "
+ "beyond unused empty group(%d) "
+ "(start_clu : %u, cur_clu : %u)",
+ dentry, hint_femp->eidx, p_dir->dir,
+ clu.dir);
+ return -EIO;
+ }
+
+ num_empty = 0;
+ hint_femp->eidx = -1;
+ }
+
+ if (num_empty >= num_entries) {
+ /* found and invalidate hint_femp */
+ hint_femp->eidx = -1;
+
+ if (fsi->vol_type == EXFAT)
+ return (dentry - (num_entries-1));
+
+ return dentry;
+ }
+ }
+
+ if (IS_CLUS_FREE(p_dir->dir))
+ break; /* FAT16 root_dir */
+
+ if (clu.flags == 0x03) {
+ if ((--clu.size) > 0)
+ clu.dir++;
+ else
+ clu.dir = CLUS_EOF;
+ } else {
+ if (get_next_clus_safe(sb, &(clu.dir)))
+ return -EIO;
+ }
+ }
+
+ return -ENOSPC;
+} /* end of search_empty_slot */
+
+/* find empty directory entry.
+ * if there isn't any empty slot, expand cluster chain.
+ */
+static s32 find_empty_entry(struct inode *inode, CHAIN_T *p_dir, s32 num_entries)
+{
+ s32 dentry;
+ u32 ret, last_clu;
+ u64 sector;
+ u64 size = 0;
+ CHAIN_T clu;
+ DENTRY_T *ep = NULL;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ HINT_FEMP_T hint_femp;
+
+ hint_femp.eidx = -1;
+
+ ASSERT(-1 <= fid->hint_femp.eidx);
+
+ if (fid->hint_femp.eidx != -1) {
+ memcpy(&hint_femp, &fid->hint_femp, sizeof(HINT_FEMP_T));
+ fid->hint_femp.eidx = -1;
+ }
+
+ /* FAT16 root_dir */
+ if (IS_CLUS_FREE(p_dir->dir))
+ return search_empty_slot(sb, &hint_femp, p_dir, num_entries);
+
+ while ((dentry = search_empty_slot(sb, &hint_femp, p_dir, num_entries)) < 0) {
+ if (dentry == -EIO)
+ break;
+
+ if (fsi->fs_func->check_max_dentries(fid))
+ return -ENOSPC;
+
+ /* we trust p_dir->size regardless of FAT type */
+ if (__find_last_cluster(sb, p_dir, &last_clu))
+ return -EIO;
+
+ /*
+ * Allocate new cluster to this directory
+ */
+ clu.dir = last_clu + 1;
+ clu.size = 0; /* UNUSED */
+ clu.flags = p_dir->flags;
+
+ /* (0) check if there are reserved clusters
+ * (create_dir 의 주석 참고)
+ */
+ if (!IS_CLUS_EOF(fsi->used_clusters) &&
+ ((fsi->used_clusters + fsi->reserved_clusters) >= (fsi->num_clusters - 2)))
+ return -ENOSPC;
+
+ /* (1) allocate a cluster */
+ ret = fsi->fs_func->alloc_cluster(sb, 1, &clu, ALLOC_HOT);
+ if (ret)
+ return ret;
+
+ if (__clear_cluster(inode, clu.dir))
+ return -EIO;
+
+ /* (2) append to the FAT chain */
+ if (clu.flags != p_dir->flags) {
+ /* no-fat-chain bit is disabled,
+ * so fat-chain should be synced with alloc-bmp
+ */
+ chain_cont_cluster(sb, p_dir->dir, p_dir->size);
+ p_dir->flags = 0x01;
+ hint_femp.cur.flags = 0x01;
+ }
+
+ if (clu.flags == 0x01)
+ if (fat_ent_set(sb, last_clu, clu.dir))
+ return -EIO;
+
+ if (hint_femp.eidx == -1) {
+ /* the special case that new dentry
+ * should be allocated from the start of new cluster
+ */
+ hint_femp.eidx = (s32)(p_dir->size <<
+ (fsi->cluster_size_bits - DENTRY_SIZE_BITS));
+ hint_femp.count = fsi->dentries_per_clu;
+
+ hint_femp.cur.dir = clu.dir;
+ hint_femp.cur.size = 0;
+ hint_femp.cur.flags = clu.flags;
+ }
+ hint_femp.cur.size++;
+ p_dir->size++;
+ size = (p_dir->size << fsi->cluster_size_bits);
+
+ /* (3) update the directory entry */
+ if ((fsi->vol_type == EXFAT) && (p_dir->dir != fsi->root_dir)) {
+ ep = get_dentry_in_dir(sb,
+ &(fid->dir), fid->entry+1, &sector);
+ if (!ep)
+ return -EIO;
+ fsi->fs_func->set_entry_size(ep, size);
+ fsi->fs_func->set_entry_flag(ep, p_dir->flags);
+ if (dcache_modify(sb, sector))
+ return -EIO;
+
+ if (update_dir_chksum(sb, &(fid->dir), fid->entry))
+ return -EIO;
+ }
+
+ /* directory inode should be updated in here */
+ i_size_write(inode, (loff_t)size);
+ SDFAT_I(inode)->i_size_ondisk += fsi->cluster_size;
+ SDFAT_I(inode)->i_size_aligned += fsi->cluster_size;
+ SDFAT_I(inode)->fid.size = size;
+ SDFAT_I(inode)->fid.flags = p_dir->flags;
+ inode->i_blocks += 1 << (fsi->cluster_size_bits - sb->s_blocksize_bits);
+ }
+
+ return dentry;
+} /* end of find_empty_entry */
+
+#define SDFAT_MIN_SUBDIR (2)
+static const char *dot_name[SDFAT_MIN_SUBDIR] = { DOS_CUR_DIR_NAME, DOS_PAR_DIR_NAME };
+
+static s32 __count_dos_name_entries(struct super_block *sb, CHAIN_T *p_dir, u32 type, u32 *dotcnt)
+{
+ s32 i, count = 0, check_dot = 0;
+ s32 dentries_per_clu;
+ u32 entry_type;
+ CHAIN_T clu;
+ DENTRY_T *ep;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (IS_CLUS_FREE(p_dir->dir)) /* FAT16 root_dir */
+ dentries_per_clu = fsi->dentries_in_root;
+ else
+ dentries_per_clu = fsi->dentries_per_clu;
+
+ clu.dir = p_dir->dir;
+ clu.size = p_dir->size;
+ clu.flags = p_dir->flags;
+
+ if (dotcnt) {
+ *dotcnt = 0;
+ if (fsi->vol_type != EXFAT)
+ check_dot = 1;
+ }
+
+ while (!IS_CLUS_EOF(clu.dir)) {
+ for (i = 0; i < dentries_per_clu; i++) {
+ ep = get_dentry_in_dir(sb, &clu, i, NULL);
+ if (!ep)
+ return -EIO;
+
+ entry_type = fsi->fs_func->get_entry_type(ep);
+
+ if (entry_type == TYPE_UNUSED)
+ return count;
+ if (!(type & TYPE_CRITICAL_PRI) && !(type & TYPE_BENIGN_PRI))
+ continue;
+
+ if ((type != TYPE_ALL) && (type != entry_type))
+ continue;
+
+ count++;
+ if (check_dot && (i < SDFAT_MIN_SUBDIR)) {
+ BUG_ON(fsi->vol_type == EXFAT);
+ /* 11 is DOS_NAME_LENGTH */
+ if (!strncmp(ep->dummy, dot_name[i], 11))
+ (*dotcnt)++;
+ }
+ }
+
+ /* FAT16 root_dir */
+ if (IS_CLUS_FREE(p_dir->dir))
+ break;
+
+ if (clu.flags == 0x03) {
+ if ((--clu.size) > 0)
+ clu.dir++;
+ else
+ clu.dir = CLUS_EOF;
+ } else {
+ if (get_next_clus_safe(sb, &(clu.dir)))
+ return -EIO;
+ }
+
+ check_dot = 0;
+ }
+
+ return count;
+}
+
+s32 check_dir_empty(struct super_block *sb, CHAIN_T *p_dir)
+{
+ s32 i, count = 0;
+ s32 dentries_per_clu;
+ u32 type;
+ CHAIN_T clu;
+ DENTRY_T *ep;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (IS_CLUS_FREE(p_dir->dir)) /* FAT16 root_dir */
+ dentries_per_clu = fsi->dentries_in_root;
+ else
+ dentries_per_clu = fsi->dentries_per_clu;
+
+ clu.dir = p_dir->dir;
+ clu.size = p_dir->size;
+ clu.flags = p_dir->flags;
+
+ while (!IS_CLUS_EOF(clu.dir)) {
+ for (i = 0; i < dentries_per_clu; i++) {
+ ep = get_dentry_in_dir(sb, &clu, i, NULL);
+ if (!ep)
+ return -EIO;
+
+ type = fsi->fs_func->get_entry_type(ep);
+
+ if (type == TYPE_UNUSED)
+ return 0;
+
+ if ((type != TYPE_FILE) && (type != TYPE_DIR))
+ continue;
+
+ /* FAT16 root_dir */
+ if (IS_CLUS_FREE(p_dir->dir))
+ return -ENOTEMPTY;
+
+ if (fsi->vol_type == EXFAT)
+ return -ENOTEMPTY;
+
+ if ((p_dir->dir == fsi->root_dir) || (++count > 2))
+ return -ENOTEMPTY;
+ }
+
+ /* FAT16 root_dir */
+ if (IS_CLUS_FREE(p_dir->dir))
+ return -ENOTEMPTY;
+
+ if (clu.flags == 0x03) {
+ if ((--clu.size) > 0)
+ clu.dir++;
+ else
+ clu.dir = CLUS_EOF;
+ } else {
+ if (get_next_clus_safe(sb, &(clu.dir)))
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Name Conversion Functions
+ */
+#ifdef CONFIG_SDFAT_ALLOW_LOOKUP_LOSSY_SFN
+ /* over name length only */
+#define NEED_INVALIDATE_SFN(x) ((x) & NLS_NAME_OVERLEN)
+#else
+ /* all lossy case */
+#define NEED_INVALIDATE_SFN(x) (x)
+#endif
+
+/* NOTE :
+ * We should keep shortname code compatible with v1.0.15 or lower
+ * So, we try to check ext-only-name at create-mode only.
+ *
+ * i.e. '.mtp' ->
+ * v1.0.15 : ' MTP' with name_case, 0x10
+ * v1.1.0 : 'MT????~?' with name_case, 0x00 and longname.
+ */
+static inline void preprocess_ext_only_sfn(s32 lookup, u16 first_char, DOS_NAME_T *p_dosname, s32 *lossy)
+{
+#ifdef CONFIG_SDFAT_RESTRICT_EXT_ONLY_SFN
+ int i;
+ /* check ext-only-name at create-mode */
+ if (*lossy || lookup || (first_char != (u16)'.'))
+ return;
+
+ p_dosname->name_case = 0xFF;
+
+ /* move ext-name to base-name */
+ for (i = 0; i < 3; i++) {
+ p_dosname->name[i] = p_dosname->name[8+i];
+ if (p_dosname->name[i] == ' ')
+ p_dosname->name[i] = '_';
+ }
+
+ /* fill remained space with '_' */
+ for (i = 3; i < 8; i++)
+ p_dosname->name[i] = '_';
+
+ /* eliminate ext-name */
+ for (i = 8; i < 11; i++)
+ p_dosname->name[i] = ' ';
+
+ *lossy = NLS_NAME_LOSSY;
+#endif /* CONFIG_SDFAT_CAN_CREATE_EXT_ONLY_SFN */
+}
+
+/* input : dir, uni_name
+ * output : num_of_entry, dos_name(format : aaaaaa~1.bbb)
+ */
+static s32 get_num_entries_and_dos_name(struct super_block *sb, CHAIN_T *p_dir,
+ UNI_NAME_T *p_uniname, s32 *entries,
+ DOS_NAME_T *p_dosname, s32 lookup)
+{
+ s32 ret, num_entries, lossy = NLS_NAME_NO_LOSSY;
+ s8 **r;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ /* Init null char. */
+ p_dosname->name[0] = '\0';
+
+ num_entries = fsi->fs_func->calc_num_entries(p_uniname);
+ if (num_entries == 0)
+ return -EINVAL;
+
+ if (fsi->vol_type == EXFAT)
+ goto out;
+
+ nls_uni16s_to_sfn(sb, p_uniname, p_dosname, &lossy);
+
+ preprocess_ext_only_sfn(lookup, p_uniname->name[0], p_dosname, &lossy);
+
+ if (!lossy) {
+ for (r = reserved_names; *r; r++) {
+ if (!strncmp((void *) p_dosname->name, *r, 8))
+ return -EINVAL;
+ }
+
+ if (p_dosname->name_case != 0xFF)
+ num_entries = 1;
+ } else if (!lookup) {
+ /* create new dos name */
+ ret = fat_generate_dos_name_new(sb, p_dir, p_dosname,
+ num_entries);
+ if (ret)
+ return ret;
+
+ } else if (NEED_INVALIDATE_SFN(lossy)) {
+ /* FIXME : We should check num_entries */
+ p_dosname->name[0] = '\0';
+ }
+
+ if (num_entries > 1)
+ p_dosname->name_case = 0x0;
+out:
+ *entries = num_entries;
+ return 0;
+} /* end of get_num_entries_and_dos_name */
+
+void get_uniname_from_dos_entry(struct super_block *sb, DOS_DENTRY_T *ep, UNI_NAME_T *p_uniname, u8 mode)
+{
+ DOS_NAME_T dos_name;
+
+ if (mode == 0x0)
+ dos_name.name_case = 0x0;
+ else
+ dos_name.name_case = ep->lcase;
+
+ memcpy(dos_name.name, ep->name, DOS_NAME_LENGTH);
+ nls_sfn_to_uni16s(sb, &dos_name, p_uniname);
+} /* end of get_uniname_from_dos_entry */
+
+/* returns the length of a struct qstr, ignoring trailing dots */
+static inline unsigned int __striptail_len(unsigned int len, const char *name)
+{
+ while (len && name[len - 1] == '.')
+ len--;
+ return len;
+}
+
+/*
+ * Name Resolution Functions :
+ * Zero if it was successful; otherwise nonzero.
+ */
+static s32 __resolve_path(struct inode *inode, const u8 *path, CHAIN_T *p_dir, UNI_NAME_T *p_uniname, int lookup)
+{
+ s32 namelen;
+ s32 lossy = NLS_NAME_NO_LOSSY;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+
+ /* DOT and DOTDOT are handled by VFS layer */
+
+ /* strip all trailing spaces */
+ /* DO NOTHING : Is needed? */
+
+ /* strip all trailing periods */
+ namelen = __striptail_len(strlen(path), path);
+ if (!namelen)
+ return -ENOENT;
+
+ /* the limitation of linux? */
+ if (strlen(path) > (MAX_NAME_LENGTH * MAX_CHARSET_SIZE))
+ return -ENAMETOOLONG;
+
+ /*
+ * strip all leading spaces :
+ * "MS windows 7" supports leading spaces.
+ * So we should skip this preprocessing for compatibility.
+ */
+
+ /* file name conversion :
+ * If lookup case, we allow bad-name for compatibility.
+ */
+ namelen = nls_vfsname_to_uni16s(sb, path, namelen, p_uniname, &lossy);
+ if (namelen < 0)
+ return namelen; /* return error value */
+
+ if ((lossy && !lookup) || !namelen)
+ return -EINVAL;
+
+ sdfat_debug_bug_on(fid->size != i_size_read(inode));
+// fid->size = i_size_read(inode);
+
+ p_dir->dir = fid->start_clu;
+ p_dir->size = (u32)(fid->size >> fsi->cluster_size_bits);
+ p_dir->flags = fid->flags;
+
+ return 0;
+}
+
+static inline s32 resolve_path(struct inode *inode, const u8 *path, CHAIN_T *dir, UNI_NAME_T *uni)
+{
+ return __resolve_path(inode, path, dir, uni, 0);
+}
+
+static inline s32 resolve_path_for_lookup(struct inode *inode, const u8 *path, CHAIN_T *dir, UNI_NAME_T *uni)
+{
+ return __resolve_path(inode, path, dir, uni, 1);
+}
+
+static s32 create_dir(struct inode *inode, CHAIN_T *p_dir, UNI_NAME_T *p_uniname, FILE_ID_T *fid)
+{
+ s32 dentry, num_entries;
+ u64 ret;
+ u64 size;
+ CHAIN_T clu;
+ DOS_NAME_T dos_name, dot_name;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ ret = get_num_entries_and_dos_name(sb, p_dir, p_uniname, &num_entries, &dos_name, 0);
+ if (ret)
+ return ret;
+
+ /* find_empty_entry must be called before alloc_cluster */
+ dentry = find_empty_entry(inode, p_dir, num_entries);
+ if (dentry < 0)
+ return dentry; /* -EIO or -ENOSPC */
+
+ clu.dir = CLUS_EOF;
+ clu.size = 0;
+ clu.flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01;
+
+ /* (0) Check if there are reserved clusters up to max. */
+ if ((fsi->used_clusters + fsi->reserved_clusters) >= (fsi->num_clusters - CLUS_BASE))
+ return -ENOSPC;
+
+ /* (1) allocate a cluster */
+ ret = fsi->fs_func->alloc_cluster(sb, 1, &clu, ALLOC_HOT);
+ if (ret)
+ return ret;
+
+ ret = __clear_cluster(inode, clu.dir);
+ if (ret)
+ return ret;
+
+ size = fsi->cluster_size;
+ if (fsi->vol_type != EXFAT) {
+ /* initialize the . and .. entry
+ * Information for . points to itself
+ * Information for .. points to parent dir
+ */
+
+ dot_name.name_case = 0x0;
+ memcpy(dot_name.name, DOS_CUR_DIR_NAME, DOS_NAME_LENGTH);
+
+ ret = fsi->fs_func->init_dir_entry(sb, &clu, 0, TYPE_DIR, clu.dir, 0);
+ if (ret)
+ return ret;
+
+ ret = fsi->fs_func->init_ext_entry(sb, &clu, 0, 1, NULL, &dot_name);
+ if (ret)
+ return ret;
+
+ memcpy(dot_name.name, DOS_PAR_DIR_NAME, DOS_NAME_LENGTH);
+
+ if (p_dir->dir == fsi->root_dir)
+ ret = fsi->fs_func->init_dir_entry(sb, &clu, 1, TYPE_DIR, CLUS_FREE, 0);
+ else
+ ret = fsi->fs_func->init_dir_entry(sb, &clu, 1, TYPE_DIR, p_dir->dir, 0);
+
+ if (ret)
+ return ret;
+
+ ret = fsi->fs_func->init_ext_entry(sb, &clu, 1, 1, NULL, &dot_name);
+ if (ret)
+ return ret;
+ }
+
+ /* (2) update the directory entry */
+ /* make sub-dir entry in parent directory */
+ ret = fsi->fs_func->init_dir_entry(sb, p_dir, dentry, TYPE_DIR, clu.dir, size);
+ if (ret)
+ return ret;
+
+ ret = fsi->fs_func->init_ext_entry(sb, p_dir, dentry, num_entries, p_uniname, &dos_name);
+ if (ret)
+ return ret;
+
+ fid->dir.dir = p_dir->dir;
+ fid->dir.size = p_dir->size;
+ fid->dir.flags = p_dir->flags;
+ fid->entry = dentry;
+
+ fid->attr = ATTR_SUBDIR;
+ fid->flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01;
+ fid->size = size;
+ fid->start_clu = clu.dir;
+
+ fid->type = TYPE_DIR;
+ fid->rwoffset = 0;
+ fid->hint_bmap.off = CLUS_EOF;
+
+ /* hint_stat will be used if this is directory. */
+ fid->version = 0;
+ fid->hint_stat.eidx = 0;
+ fid->hint_stat.clu = fid->start_clu;
+ fid->hint_femp.eidx = -1;
+
+ return 0;
+} /* end of create_dir */
+
+static s32 create_file(struct inode *inode, CHAIN_T *p_dir, UNI_NAME_T *p_uniname, u8 mode, FILE_ID_T *fid)
+{
+ s32 ret, dentry, num_entries;
+ DOS_NAME_T dos_name;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ ret = get_num_entries_and_dos_name(sb, p_dir, p_uniname, &num_entries, &dos_name, 0);
+ if (ret)
+ return ret;
+
+ /* find_empty_entry must be called before alloc_cluster() */
+ dentry = find_empty_entry(inode, p_dir, num_entries);
+ if (dentry < 0)
+ return dentry; /* -EIO or -ENOSPC */
+
+ /* (1) update the directory entry */
+ /* fill the dos name directory entry information of the created file.
+ * the first cluster is not determined yet. (0)
+ */
+ ret = fsi->fs_func->init_dir_entry(sb, p_dir, dentry, TYPE_FILE | mode, CLUS_FREE, 0);
+ if (ret)
+ return ret;
+
+ ret = fsi->fs_func->init_ext_entry(sb, p_dir, dentry, num_entries, p_uniname, &dos_name);
+ if (ret)
+ return ret;
+
+ fid->dir.dir = p_dir->dir;
+ fid->dir.size = p_dir->size;
+ fid->dir.flags = p_dir->flags;
+ fid->entry = dentry;
+
+ fid->attr = ATTR_ARCHIVE | mode;
+ fid->flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01;
+ fid->size = 0;
+ fid->start_clu = CLUS_EOF;
+
+ fid->type = TYPE_FILE;
+ fid->rwoffset = 0;
+ fid->hint_bmap.off = CLUS_EOF;
+
+ /* hint_stat will be used if this is directory. */
+ fid->version = 0;
+ fid->hint_stat.eidx = 0;
+ fid->hint_stat.clu = fid->start_clu;
+ fid->hint_femp.eidx = -1;
+
+ return 0;
+} /* end of create_file */
+
+static s32 remove_file(struct inode *inode, CHAIN_T *p_dir, s32 entry)
+{
+ s32 num_entries;
+ u64 sector;
+ DENTRY_T *ep;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ ep = get_dentry_in_dir(sb, p_dir, entry, &sector);
+ if (!ep)
+ return -EIO;
+
+ dcache_lock(sb, sector);
+
+ /* dcache_lock() before call count_ext_entries() */
+ num_entries = fsi->fs_func->count_ext_entries(sb, p_dir, entry, ep);
+ if (num_entries < 0) {
+ dcache_unlock(sb, sector);
+ return -EIO;
+ }
+ num_entries++;
+
+ dcache_unlock(sb, sector);
+
+ /* (1) update the directory entry */
+ return fsi->fs_func->delete_dir_entry(sb, p_dir, entry, 0, num_entries);
+} /* end of remove_file */
+
+static s32 rename_file(struct inode *inode, CHAIN_T *p_dir, s32 oldentry, UNI_NAME_T *p_uniname, FILE_ID_T *fid)
+{
+ s32 ret, newentry = -1, num_old_entries, num_new_entries;
+ u64 sector_old, sector_new;
+ DOS_NAME_T dos_name;
+ DENTRY_T *epold, *epnew;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ epold = get_dentry_in_dir(sb, p_dir, oldentry, &sector_old);
+ if (!epold)
+ return -EIO;
+
+ dcache_lock(sb, sector_old);
+
+ /* dcache_lock() before call count_ext_entries() */
+ num_old_entries = fsi->fs_func->count_ext_entries(sb, p_dir, oldentry, epold);
+ if (num_old_entries < 0) {
+ dcache_unlock(sb, sector_old);
+ return -EIO;
+ }
+ num_old_entries++;
+
+ ret = get_num_entries_and_dos_name(sb, p_dir, p_uniname, &num_new_entries, &dos_name, 0);
+ if (ret) {
+ dcache_unlock(sb, sector_old);
+ return ret;
+ }
+
+ if (num_old_entries < num_new_entries) {
+ newentry = find_empty_entry(inode, p_dir, num_new_entries);
+ if (newentry < 0) {
+ dcache_unlock(sb, sector_old);
+ return newentry; /* -EIO or -ENOSPC */
+ }
+
+ epnew = get_dentry_in_dir(sb, p_dir, newentry, &sector_new);
+ if (!epnew) {
+ dcache_unlock(sb, sector_old);
+ return -EIO;
+ }
+
+ memcpy((void *) epnew, (void *) epold, DENTRY_SIZE);
+ if (fsi->fs_func->get_entry_type(epnew) == TYPE_FILE) {
+ fsi->fs_func->set_entry_attr(epnew, fsi->fs_func->get_entry_attr(epnew) | ATTR_ARCHIVE);
+ fid->attr |= ATTR_ARCHIVE;
+ }
+ dcache_modify(sb, sector_new);
+ dcache_unlock(sb, sector_old);
+
+ if (fsi->vol_type == EXFAT) {
+ epold = get_dentry_in_dir(sb, p_dir, oldentry+1, &sector_old);
+ dcache_lock(sb, sector_old);
+ epnew = get_dentry_in_dir(sb, p_dir, newentry+1, &sector_new);
+
+ if (!epold || !epnew) {
+ dcache_unlock(sb, sector_old);
+ return -EIO;
+ }
+
+ memcpy((void *) epnew, (void *) epold, DENTRY_SIZE);
+ dcache_modify(sb, sector_new);
+ dcache_unlock(sb, sector_old);
+ }
+
+ ret = fsi->fs_func->init_ext_entry(sb, p_dir, newentry, num_new_entries, p_uniname, &dos_name);
+ if (ret)
+ return ret;
+
+ fsi->fs_func->delete_dir_entry(sb, p_dir, oldentry, 0, num_old_entries);
+ fid->entry = newentry;
+ } else {
+ if (fsi->fs_func->get_entry_type(epold) == TYPE_FILE) {
+ fsi->fs_func->set_entry_attr(epold, fsi->fs_func->get_entry_attr(epold) | ATTR_ARCHIVE);
+ fid->attr |= ATTR_ARCHIVE;
+ }
+ dcache_modify(sb, sector_old);
+ dcache_unlock(sb, sector_old);
+
+ ret = fsi->fs_func->init_ext_entry(sb, p_dir, oldentry, num_new_entries, p_uniname, &dos_name);
+ if (ret)
+ return ret;
+
+ fsi->fs_func->delete_dir_entry(sb, p_dir, oldentry, num_new_entries, num_old_entries);
+ }
+
+ return 0;
+} /* end of rename_file */
+
+static s32 move_file(struct inode *inode, CHAIN_T *p_olddir, s32 oldentry,
+ CHAIN_T *p_newdir, UNI_NAME_T *p_uniname, FILE_ID_T *fid)
+{
+ s32 ret, newentry, num_new_entries, num_old_entries;
+ u64 sector_mov, sector_new;
+ CHAIN_T clu;
+ DOS_NAME_T dos_name;
+ DENTRY_T *epmov, *epnew;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ epmov = get_dentry_in_dir(sb, p_olddir, oldentry, &sector_mov);
+ if (!epmov)
+ return -EIO;
+
+ /* check if the source and target directory is the same */
+ if (fsi->fs_func->get_entry_type(epmov) == TYPE_DIR &&
+ fsi->fs_func->get_entry_clu0(epmov) == p_newdir->dir)
+ return -EINVAL;
+
+ dcache_lock(sb, sector_mov);
+
+ /* dcache_lock() before call count_ext_entries() */
+ num_old_entries = fsi->fs_func->count_ext_entries(sb, p_olddir, oldentry, epmov);
+ if (num_old_entries < 0) {
+ dcache_unlock(sb, sector_mov);
+ return -EIO;
+ }
+ num_old_entries++;
+
+ ret = get_num_entries_and_dos_name(sb, p_newdir, p_uniname, &num_new_entries, &dos_name, 0);
+ if (ret) {
+ dcache_unlock(sb, sector_mov);
+ return ret;
+ }
+
+ newentry = find_empty_entry(inode, p_newdir, num_new_entries);
+ if (newentry < 0) {
+ dcache_unlock(sb, sector_mov);
+ return newentry; /* -EIO or -ENOSPC */
+ }
+
+ epnew = get_dentry_in_dir(sb, p_newdir, newentry, &sector_new);
+ if (!epnew) {
+ dcache_unlock(sb, sector_mov);
+ return -EIO;
+ }
+
+ memcpy((void *) epnew, (void *) epmov, DENTRY_SIZE);
+ if (fsi->fs_func->get_entry_type(epnew) == TYPE_FILE) {
+ fsi->fs_func->set_entry_attr(epnew, fsi->fs_func->get_entry_attr(epnew) | ATTR_ARCHIVE);
+ fid->attr |= ATTR_ARCHIVE;
+ }
+ dcache_modify(sb, sector_new);
+ dcache_unlock(sb, sector_mov);
+
+ if (fsi->vol_type == EXFAT) {
+ epmov = get_dentry_in_dir(sb, p_olddir, oldentry+1, &sector_mov);
+ dcache_lock(sb, sector_mov);
+ epnew = get_dentry_in_dir(sb, p_newdir, newentry+1, &sector_new);
+ if (!epmov || !epnew) {
+ dcache_unlock(sb, sector_mov);
+ return -EIO;
+ }
+
+ memcpy((void *) epnew, (void *) epmov, DENTRY_SIZE);
+ dcache_modify(sb, sector_new);
+ dcache_unlock(sb, sector_mov);
+ } else if (fsi->fs_func->get_entry_type(epnew) == TYPE_DIR) {
+ /* change ".." pointer to new parent dir */
+ clu.dir = fsi->fs_func->get_entry_clu0(epnew);
+ clu.flags = 0x01;
+
+ epnew = get_dentry_in_dir(sb, &clu, 1, &sector_new);
+ if (!epnew)
+ return -EIO;
+
+ if (p_newdir->dir == fsi->root_dir)
+ fsi->fs_func->set_entry_clu0(epnew, CLUS_FREE);
+ else
+ fsi->fs_func->set_entry_clu0(epnew, p_newdir->dir);
+ dcache_modify(sb, sector_new);
+ }
+
+ ret = fsi->fs_func->init_ext_entry(sb, p_newdir, newentry, num_new_entries, p_uniname, &dos_name);
+ if (ret)
+ return ret;
+
+ fsi->fs_func->delete_dir_entry(sb, p_olddir, oldentry, 0, num_old_entries);
+
+ fid->dir.dir = p_newdir->dir;
+ fid->dir.size = p_newdir->size;
+ fid->dir.flags = p_newdir->flags;
+
+ fid->entry = newentry;
+
+ return 0;
+} /* end of move_file */
+
+
+/*======================================================================*/
+/* Global Function Definitions */
+/*======================================================================*/
+/* roll back to the initial state of the file system */
+s32 fscore_init(void)
+{
+ s32 ret;
+
+ ret = check_type_size();
+ if (ret)
+ return ret;
+
+ return extent_cache_init();
+}
+
+/* make free all memory-alloced global buffers */
+s32 fscore_shutdown(void)
+{
+ extent_cache_shutdown();
+ return 0;
+}
+
+/* check device is ejected */
+s32 fscore_check_bdi_valid(struct super_block *sb)
+{
+ return bdev_check_bdi_valid(sb);
+}
+
+static bool is_exfat(pbr_t *pbr)
+{
+ int i = 53;
+
+ do {
+ if (pbr->bpb.f64.res_zero[i-1])
+ break;
+ } while (--i);
+ return i ? false : true;
+}
+
+static bool is_fat32(pbr_t *pbr)
+{
+ if (le16_to_cpu(pbr->bpb.f16.num_fat_sectors))
+ return false;
+ return true;
+}
+
+inline pbr_t *read_pbr_with_logical_sector(struct super_block *sb, struct buffer_head **prev_bh)
+{
+ pbr_t *p_pbr = (pbr_t *) (*prev_bh)->b_data;
+ u16 logical_sect = 0;
+
+ if (is_exfat(p_pbr))
+ logical_sect = 1 << p_pbr->bsx.f64.sect_size_bits;
+ else
+ logical_sect = get_unaligned_le16(&p_pbr->bpb.f16.sect_size);
+
+ /* is x a power of 2?
+ * (x) != 0 && (((x) & ((x) - 1)) == 0)
+ */
+ if (!is_power_of_2(logical_sect)
+ || (logical_sect < 512)
+ || (logical_sect > 4096)) {
+ sdfat_log_msg(sb, KERN_ERR, "bogus logical sector size %u",
+ logical_sect);
+ return NULL;
+ }
+
+ if (logical_sect < sb->s_blocksize) {
+ sdfat_log_msg(sb, KERN_ERR,
+ "logical sector size too small for device"
+ " (logical sector size = %u)", logical_sect);
+ return NULL;
+ }
+
+ if (logical_sect > sb->s_blocksize) {
+ struct buffer_head *bh = NULL;
+
+ __brelse(*prev_bh);
+ *prev_bh = NULL;
+
+ if (!sb_set_blocksize(sb, logical_sect)) {
+ sdfat_log_msg(sb, KERN_ERR,
+ "unable to set blocksize %u", logical_sect);
+ return NULL;
+ }
+ bh = sb_bread(sb, 0);
+ if (!bh) {
+ sdfat_log_msg(sb, KERN_ERR,
+ "unable to read boot sector "
+ "(logical sector size = %lu)", sb->s_blocksize);
+ return NULL;
+ }
+
+ *prev_bh = bh;
+ p_pbr = (pbr_t *) bh->b_data;
+ }
+
+ sdfat_log_msg(sb, KERN_INFO,
+ "set logical sector size : %lu", sb->s_blocksize);
+
+ return p_pbr;
+}
+
+/* mount the file system volume */
+s32 fscore_mount(struct super_block *sb)
+{
+ s32 ret;
+ pbr_t *p_pbr;
+ struct buffer_head *tmp_bh = NULL;
+ struct gendisk *disk = sb->s_bdev->bd_disk;
+ struct hd_struct *part = sb->s_bdev->bd_part;
+ struct sdfat_mount_options *opts = &(SDFAT_SB(sb)->options);
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ /* initialize previous I/O error */
+ fsi->prev_eio = 0;
+
+ /* open the block device */
+ if (bdev_open_dev(sb))
+ return -EIO;
+
+ /* set block size to read super block */
+ sb_min_blocksize(sb, 512);
+
+ /* read boot sector */
+ ret = read_sect(sb, 0, &tmp_bh, 1);
+ if (ret) {
+ sdfat_log_msg(sb, KERN_ERR, "unable to read boot sector");
+ ret = -EIO;
+ goto bd_close;
+ }
+
+ /* PRB is read */
+ p_pbr = (pbr_t *) tmp_bh->b_data;
+
+ /* check the validity of PBR */
+ if (le16_to_cpu((p_pbr->signature)) != PBR_SIGNATURE) {
+ sdfat_log_msg(sb, KERN_ERR, "invalid boot record signature");
+ brelse(tmp_bh);
+ ret = -EINVAL;
+ goto bd_close;
+ }
+
+ /* check logical sector size */
+ p_pbr = read_pbr_with_logical_sector(sb, &tmp_bh);
+ if (!p_pbr) {
+ brelse(tmp_bh);
+ ret = -EIO;
+ goto bd_close;
+ }
+
+ /* fill fs_struct */
+ if (is_exfat(p_pbr)) {
+ if (opts->fs_type && opts->fs_type != FS_TYPE_EXFAT) {
+ sdfat_log_msg(sb, KERN_ERR,
+ "not specified filesystem type "
+ "(media:exfat, opts:%s)",
+ FS_TYPE_STR[opts->fs_type]);
+ ret = -EINVAL;
+ goto free_bh;
+ }
+ /* set maximum file size for exFAT */
+ sb->s_maxbytes = 0x7fffffffffffffffLL;
+ opts->improved_allocation = 0;
+ opts->defrag = 0;
+ ret = mount_exfat(sb, p_pbr);
+ } else if (is_fat32(p_pbr)) {
+ if (opts->fs_type && opts->fs_type != FS_TYPE_VFAT) {
+ sdfat_log_msg(sb, KERN_ERR,
+ "not specified filesystem type "
+ "(media:vfat, opts:%s)",
+ FS_TYPE_STR[opts->fs_type]);
+ ret = -EINVAL;
+ goto free_bh;
+ }
+ /* set maximum file size for FAT */
+ sb->s_maxbytes = 0xffffffff;
+ ret = mount_fat32(sb, p_pbr);
+ } else {
+ if (opts->fs_type && opts->fs_type != FS_TYPE_VFAT) {
+ sdfat_log_msg(sb, KERN_ERR,
+ "not specified filesystem type "
+ "(media:vfat, opts:%s)",
+ FS_TYPE_STR[opts->fs_type]);
+ ret = -EINVAL;
+ goto free_bh;
+ }
+ /* set maximum file size for FAT */
+ sb->s_maxbytes = 0xffffffff;
+ opts->improved_allocation = 0;
+ opts->defrag = 0;
+ ret = mount_fat16(sb, p_pbr);
+ }
+free_bh:
+ brelse(tmp_bh);
+ if (ret) {
+ sdfat_log_msg(sb, KERN_ERR, "failed to mount fs-core");
+ goto bd_close;
+ }
+
+ /* warn misaligned data data start sector must be a multiple of clu_size */
+ sdfat_log_msg(sb, KERN_INFO,
+ "detected volume info : %s "
+ "(bps : %lu, spc : %u, data start : %llu, %s)",
+ sdfat_get_vol_type_str(fsi->vol_type),
+ sb->s_blocksize, fsi->sect_per_clus, fsi->data_start_sector,
+ (fsi->data_start_sector & (fsi->sect_per_clus - 1)) ?
+ "misaligned" : "aligned");
+
+ sdfat_log_msg(sb, KERN_INFO,
+ "detected volume size : %llu KB (disk : %llu KB, "
+ "part : %llu KB)",
+ (fsi->num_sectors * (sb->s_blocksize >> SECTOR_SIZE_BITS)) >> 1,
+ disk ? (u64)((disk->part0.nr_sects) >> 1) : 0,
+ part ? (u64)((part->nr_sects) >> 1) : 0);
+
+ ret = load_upcase_table(sb);
+ if (ret) {
+ sdfat_log_msg(sb, KERN_ERR, "failed to load upcase table");
+ goto bd_close;
+ }
+
+ if (fsi->vol_type != EXFAT)
+ goto update_used_clus;
+
+ /* allocate-bitmap is only for exFAT */
+ ret = load_alloc_bmp(sb);
+ if (ret) {
+ sdfat_log_msg(sb, KERN_ERR, "failed to load alloc-bitmap");
+ goto free_upcase;
+ }
+
+update_used_clus:
+ if (fsi->used_clusters == (u32) ~0) {
+ ret = fsi->fs_func->count_used_clusters(sb, &fsi->used_clusters);
+ if (ret) {
+ sdfat_log_msg(sb, KERN_ERR, "failed to scan clusters");
+ goto free_alloc_bmp;
+ }
+ }
+
+ return 0;
+free_alloc_bmp:
+ if (fsi->vol_type == EXFAT)
+ free_alloc_bmp(sb);
+free_upcase:
+ free_upcase_table(sb);
+bd_close:
+ bdev_close_dev(sb);
+ return ret;
+} /* end of fscore_mount */
+
+/* umount the file system volume */
+s32 fscore_umount(struct super_block *sb)
+{
+ s32 ret = 0;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (fs_sync(sb, 0))
+ ret = -EIO;
+
+ if (fs_set_vol_flags(sb, VOL_CLEAN))
+ ret = -EIO;
+
+ free_upcase_table(sb);
+
+ if (fsi->vol_type == EXFAT)
+ free_alloc_bmp(sb);
+
+ if (fcache_release_all(sb))
+ ret = -EIO;
+
+ if (dcache_release_all(sb))
+ ret = -EIO;
+
+ amap_destroy(sb);
+
+ if (fsi->prev_eio)
+ ret = -EIO;
+ /* close the block device */
+ bdev_close_dev(sb);
+ return ret;
+}
+
+/* get the information of a file system volume */
+s32 fscore_statfs(struct super_block *sb, VOL_INFO_T *info)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (fsi->used_clusters == (u32) ~0) {
+ if (fsi->fs_func->count_used_clusters(sb, &fsi->used_clusters))
+ return -EIO;
+ }
+
+ info->FatType = fsi->vol_type;
+ info->ClusterSize = fsi->cluster_size;
+ info->NumClusters = fsi->num_clusters - 2; /* clu 0 & 1 */
+ info->UsedClusters = fsi->used_clusters + fsi->reserved_clusters;
+ info->FreeClusters = info->NumClusters - info->UsedClusters;
+
+ return 0;
+}
+
+/* synchronize all file system volumes */
+s32 fscore_sync_fs(struct super_block *sb, s32 do_sync)
+{
+ /* synchronize the file system */
+ if (fs_sync(sb, do_sync))
+ return -EIO;
+
+ if (fs_set_vol_flags(sb, VOL_CLEAN))
+ return -EIO;
+
+ return 0;
+}
+
+/* stat allocation unit of a file system volume */
+u32 fscore_get_au_stat(struct super_block *sb, s32 mode)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (fsi->fs_func->get_au_stat)
+ return fsi->fs_func->get_au_stat(sb, mode);
+
+ /* No error, just returns 0 */
+ return 0;
+}
+
+
+/*----------------------------------------------------------------------*/
+/* File Operation Functions */
+/*----------------------------------------------------------------------*/
+/* lookup a file */
+s32 fscore_lookup(struct inode *inode, u8 *path, FILE_ID_T *fid)
+{
+ s32 ret, dentry, num_entries;
+ CHAIN_T dir;
+ UNI_NAME_T uni_name;
+ DOS_NAME_T dos_name;
+ DENTRY_T *ep, *ep2;
+ ENTRY_SET_CACHE_T *es = NULL;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ FILE_ID_T *dir_fid = &(SDFAT_I(inode)->fid);
+
+ TMSG("%s entered\n", __func__);
+
+ /* check the validity of directory name in the given pathname */
+ ret = resolve_path_for_lookup(inode, path, &dir, &uni_name);
+ if (ret)
+ return ret;
+
+ ret = get_num_entries_and_dos_name(sb, &dir, &uni_name, &num_entries, &dos_name, 1);
+ if (ret)
+ return ret;
+
+ /* check the validation of hint_stat and initialize it if required */
+ if (dir_fid->version != (u32)inode_peek_iversion(inode)) {
+ dir_fid->hint_stat.clu = dir.dir;
+ dir_fid->hint_stat.eidx = 0;
+ dir_fid->version = (u32)inode_peek_iversion(inode);
+ dir_fid->hint_femp.eidx = -1;
+ }
+
+ /* search the file name for directories */
+ dentry = fsi->fs_func->find_dir_entry(sb, dir_fid, &dir, &uni_name,
+ num_entries, &dos_name, TYPE_ALL);
+
+ if ((dentry < 0) && (dentry != -EEXIST))
+ return dentry; /* -error value */
+
+ fid->dir.dir = dir.dir;
+ fid->dir.size = dir.size;
+ fid->dir.flags = dir.flags;
+ fid->entry = dentry;
+
+ /* root directory itself */
+ if (unlikely(dentry == -EEXIST)) {
+ fid->type = TYPE_DIR;
+ fid->rwoffset = 0;
+ fid->hint_bmap.off = CLUS_EOF;
+
+ fid->attr = ATTR_SUBDIR;
+ fid->flags = 0x01;
+ fid->size = 0;
+ fid->start_clu = fsi->root_dir;
+ } else {
+ if (fsi->vol_type == EXFAT) {
+ es = get_dentry_set_in_dir(sb, &dir, dentry, ES_2_ENTRIES, &ep);
+ if (!es)
+ return -EIO;
+ ep2 = ep+1;
+ } else {
+ ep = get_dentry_in_dir(sb, &dir, dentry, NULL);
+ if (!ep)
+ return -EIO;
+ ep2 = ep;
+ }
+
+ fid->type = fsi->fs_func->get_entry_type(ep);
+ fid->rwoffset = 0;
+ fid->hint_bmap.off = CLUS_EOF;
+ fid->attr = fsi->fs_func->get_entry_attr(ep);
+
+ fid->size = fsi->fs_func->get_entry_size(ep2);
+ if ((fid->type == TYPE_FILE) && (fid->size == 0)) {
+ fid->flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01;
+ fid->start_clu = CLUS_EOF;
+ } else {
+ fid->flags = fsi->fs_func->get_entry_flag(ep2);
+ fid->start_clu = fsi->fs_func->get_entry_clu0(ep2);
+ }
+
+ if ((fid->type == TYPE_DIR) && (fsi->vol_type != EXFAT)) {
+ u32 num_clu = 0;
+ CHAIN_T tmp_dir;
+
+ tmp_dir.dir = fid->start_clu;
+ tmp_dir.flags = fid->flags;
+ tmp_dir.size = 0; /* UNUSED */
+
+ if (__count_num_clusters(sb, &tmp_dir, &num_clu))
+ return -EIO;
+ fid->size = (u64)num_clu << fsi->cluster_size_bits;
+ }
+
+ /* FOR GRACEFUL ERROR HANDLING */
+ if (IS_CLUS_FREE(fid->start_clu)) {
+ sdfat_fs_error(sb,
+ "non-zero size file starts with zero cluster "
+ "(size : %llu, p_dir : %u, entry : 0x%08x)",
+ fid->size, fid->dir.dir, fid->entry);
+ sdfat_debug_bug_on(1);
+ return -EIO;
+ }
+
+ if (fsi->vol_type == EXFAT)
+ release_dentry_set(es);
+ }
+
+ /* hint_stat will be used if this is directory. */
+ fid->version = 0;
+ fid->hint_stat.eidx = 0;
+ fid->hint_stat.clu = fid->start_clu;
+ fid->hint_femp.eidx = -1;
+
+ TMSG("%s exited successfully\n", __func__);
+ return 0;
+} /* end of fscore_lookup */
+
+/* create a file */
+s32 fscore_create(struct inode *inode, u8 *path, u8 mode, FILE_ID_T *fid)
+{
+ s32 ret/*, dentry*/;
+ CHAIN_T dir;
+ UNI_NAME_T uni_name;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of directory name in the given pathname */
+ ret = resolve_path(inode, path, &dir, &uni_name);
+ if (ret)
+ return ret;
+
+ fs_set_vol_flags(sb, VOL_DIRTY);
+
+ /* create a new file */
+ ret = create_file(inode, &dir, &uni_name, mode, fid);
+
+ fs_sync(sb, 0);
+ fs_set_vol_flags(sb, VOL_CLEAN);
+
+ return ret;
+}
+
+/* read data from a opened file */
+s32 fscore_read_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *rcount)
+{
+ s32 ret = 0;
+ s32 offset, sec_offset;
+ u32 clu_offset;
+ u32 clu;
+ u64 logsector, oneblkread, read_bytes;
+ struct buffer_head *tmp_bh = NULL;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ /* check if the given file ID is opened */
+ if (fid->type != TYPE_FILE)
+ return -EPERM;
+
+ if (fid->rwoffset > fid->size)
+ fid->rwoffset = fid->size;
+
+ if (count > (fid->size - fid->rwoffset))
+ count = fid->size - fid->rwoffset;
+
+ if (count == 0) {
+ if (rcount)
+ *rcount = 0;
+ return 0;
+ }
+
+ read_bytes = 0;
+
+ while (count > 0) {
+ clu_offset = fid->rwoffset >> fsi->cluster_size_bits;
+ clu = fid->start_clu;
+
+ if (fid->flags == 0x03) {
+ clu += clu_offset;
+ } else {
+ /* hint information */
+ if ((clu_offset > 0) &&
+ ((fid->hint_bmap.off != CLUS_EOF) && (fid->hint_bmap.off > 0)) &&
+ (clu_offset >= fid->hint_bmap.off)) {
+ clu_offset -= fid->hint_bmap.off;
+ clu = fid->hint_bmap.clu;
+ }
+
+ while (clu_offset > 0) {
+ ret = get_next_clus_safe(sb, &clu);
+ if (ret)
+ goto err_out;
+
+ clu_offset--;
+ }
+ }
+
+ /* hint information */
+ fid->hint_bmap.off = fid->rwoffset >> fsi->cluster_size_bits;
+ fid->hint_bmap.clu = clu;
+
+ offset = (s32)(fid->rwoffset & (fsi->cluster_size - 1)); /* byte offset in cluster */
+ sec_offset = offset >> sb->s_blocksize_bits; /* sector offset in cluster */
+ offset &= (sb->s_blocksize - 1); /* byte offset in sector */
+
+ logsector = CLUS_TO_SECT(fsi, clu) + sec_offset;
+
+ oneblkread = (u64)(sb->s_blocksize - offset);
+ if (oneblkread > count)
+ oneblkread = count;
+
+ if ((offset == 0) && (oneblkread == sb->s_blocksize)) {
+ ret = read_sect(sb, logsector, &tmp_bh, 1);
+ if (ret)
+ goto err_out;
+ memcpy(((s8 *) buffer)+read_bytes, ((s8 *) tmp_bh->b_data), (s32) oneblkread);
+ } else {
+ ret = read_sect(sb, logsector, &tmp_bh, 1);
+ if (ret)
+ goto err_out;
+ memcpy(((s8 *) buffer)+read_bytes, ((s8 *) tmp_bh->b_data)+offset, (s32) oneblkread);
+ }
+ count -= oneblkread;
+ read_bytes += oneblkread;
+ fid->rwoffset += oneblkread;
+ }
+
+err_out:
+ brelse(tmp_bh);
+
+ /* set the size of read bytes */
+ if (rcount != NULL)
+ *rcount = read_bytes;
+
+ return ret;
+} /* end of fscore_read_link */
+
+/* write data into a opened file */
+s32 fscore_write_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *wcount)
+{
+ s32 ret = 0;
+ s32 modified = false, offset, sec_offset;
+ u32 clu_offset, num_clusters, num_alloc;
+ u32 clu, last_clu;
+ u64 logsector, sector, oneblkwrite, write_bytes;
+ CHAIN_T new_clu;
+ TIMESTAMP_T tm;
+ DENTRY_T *ep, *ep2;
+ ENTRY_SET_CACHE_T *es = NULL;
+ struct buffer_head *tmp_bh = NULL;
+ struct super_block *sb = inode->i_sb;
+ u32 blksize = (u32)sb->s_blocksize;
+ u32 blksize_mask = (u32)(sb->s_blocksize-1);
+ u8 blksize_bits = sb->s_blocksize_bits;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ /* check if the given file ID is opened */
+ if (fid->type != TYPE_FILE)
+ return -EPERM;
+
+ if (fid->rwoffset > fid->size)
+ fid->rwoffset = fid->size;
+
+ if (count == 0) {
+ if (wcount)
+ *wcount = 0;
+ return 0;
+ }
+
+ fs_set_vol_flags(sb, VOL_DIRTY);
+
+ if (fid->size == 0)
+ num_clusters = 0;
+ else
+ num_clusters = ((fid->size-1) >> fsi->cluster_size_bits) + 1;
+
+ write_bytes = 0;
+
+ while (count > 0) {
+ clu_offset = (fid->rwoffset >> fsi->cluster_size_bits);
+ clu = last_clu = fid->start_clu;
+
+ if (fid->flags == 0x03) {
+ if ((clu_offset > 0) && (!IS_CLUS_EOF(clu))) {
+ last_clu += clu_offset - 1;
+
+ if (clu_offset == num_clusters)
+ clu = CLUS_EOF;
+ else
+ clu += clu_offset;
+ }
+ } else {
+ /* hint information */
+ if ((clu_offset > 0) &&
+ ((fid->hint_bmap.off != CLUS_EOF) && (fid->hint_bmap.off > 0)) &&
+ (clu_offset >= fid->hint_bmap.off)) {
+ clu_offset -= fid->hint_bmap.off;
+ clu = fid->hint_bmap.clu;
+ }
+
+ while ((clu_offset > 0) && (!IS_CLUS_EOF(clu))) {
+ last_clu = clu;
+ ret = get_next_clus_safe(sb, &clu);
+ if (ret)
+ goto err_out;
+
+ clu_offset--;
+ }
+ }
+
+ if (IS_CLUS_EOF(clu)) {
+ num_alloc = ((count-1) >> fsi->cluster_size_bits) + 1;
+ new_clu.dir = IS_CLUS_EOF(last_clu) ? CLUS_EOF : last_clu+1;
+ new_clu.size = 0;
+ new_clu.flags = fid->flags;
+
+ /* (1) allocate a chain of clusters */
+ ret = fsi->fs_func->alloc_cluster(sb, num_alloc, &new_clu, ALLOC_COLD);
+ if (ret)
+ goto err_out;
+
+ /* (2) append to the FAT chain */
+ if (IS_CLUS_EOF(last_clu)) {
+ if (new_clu.flags == 0x01)
+ fid->flags = 0x01;
+ fid->start_clu = new_clu.dir;
+ modified = true;
+ } else {
+ if (new_clu.flags != fid->flags) {
+ /* no-fat-chain bit is disabled,
+ * so fat-chain should be synced with
+ * alloc-bmp
+ */
+ chain_cont_cluster(sb, fid->start_clu, num_clusters);
+ fid->flags = 0x01;
+ modified = true;
+ }
+ if (new_clu.flags == 0x01) {
+ ret = fat_ent_set(sb, last_clu, new_clu.dir);
+ if (ret)
+ goto err_out;
+ }
+ }
+
+ num_clusters += num_alloc;
+ clu = new_clu.dir;
+ }
+
+ /* hint information */
+ fid->hint_bmap.off = fid->rwoffset >> fsi->cluster_size_bits;
+ fid->hint_bmap.clu = clu;
+
+ /* byte offset in cluster */
+ offset = (s32)(fid->rwoffset & (fsi->cluster_size-1));
+ /* sector offset in cluster */
+ sec_offset = offset >> blksize_bits;
+ /* byte offset in sector */
+ offset &= blksize_mask;
+ logsector = CLUS_TO_SECT(fsi, clu) + sec_offset;
+
+ oneblkwrite = (u64)(blksize - offset);
+ if (oneblkwrite > count)
+ oneblkwrite = count;
+
+ if ((offset == 0) && (oneblkwrite == blksize)) {
+ ret = read_sect(sb, logsector, &tmp_bh, 0);
+ if (ret)
+ goto err_out;
+
+ memcpy(((s8 *)tmp_bh->b_data),
+ ((s8 *)buffer)+write_bytes,
+ (s32)oneblkwrite);
+
+ ret = write_sect(sb, logsector, tmp_bh, 0);
+ if (ret) {
+ brelse(tmp_bh);
+ goto err_out;
+ }
+ } else {
+ if ((offset > 0) || ((fid->rwoffset+oneblkwrite) < fid->size)) {
+ ret = read_sect(sb, logsector, &tmp_bh, 1);
+ if (ret)
+ goto err_out;
+ } else {
+ ret = read_sect(sb, logsector, &tmp_bh, 0);
+ if (ret)
+ goto err_out;
+ }
+
+ memcpy(((s8 *) tmp_bh->b_data)+offset, ((s8 *) buffer)+write_bytes, (s32) oneblkwrite);
+ ret = write_sect(sb, logsector, tmp_bh, 0);
+ if (ret) {
+ brelse(tmp_bh);
+ goto err_out;
+ }
+ }
+
+ count -= oneblkwrite;
+ write_bytes += oneblkwrite;
+ fid->rwoffset += oneblkwrite;
+
+ fid->attr |= ATTR_ARCHIVE;
+
+ if (fid->size < fid->rwoffset) {
+ fid->size = fid->rwoffset;
+ modified = true;
+ }
+ }
+
+ brelse(tmp_bh);
+
+ /* (3) update the direcoty entry */
+ /* get_entry_(set_)in_dir shoulb be check DIR_DELETED flag. */
+ if (fsi->vol_type == EXFAT) {
+ es = get_dentry_set_in_dir(sb, &(fid->dir), fid->entry, ES_ALL_ENTRIES, &ep);
+ if (!es) {
+ ret = -EIO;
+ goto err_out;
+ }
+ ep2 = ep+1;
+ } else {
+ ep = get_dentry_in_dir(sb, &(fid->dir), fid->entry, &sector);
+ if (!ep) {
+ ret = -EIO;
+ goto err_out;
+ }
+ ep2 = ep;
+ }
+
+ fsi->fs_func->set_entry_time(ep, tm_now(SDFAT_SB(sb), &tm), TM_MODIFY);
+ fsi->fs_func->set_entry_attr(ep, fid->attr);
+
+ if (modified) {
+ if (fsi->fs_func->get_entry_flag(ep2) != fid->flags)
+ fsi->fs_func->set_entry_flag(ep2, fid->flags);
+
+ if (fsi->fs_func->get_entry_size(ep2) != fid->size)
+ fsi->fs_func->set_entry_size(ep2, fid->size);
+
+ if (fsi->fs_func->get_entry_clu0(ep2) != fid->start_clu)
+ fsi->fs_func->set_entry_clu0(ep2, fid->start_clu);
+ }
+
+ if (fsi->vol_type == EXFAT) {
+ if (update_dir_chksum_with_entry_set(sb, es)) {
+ ret = -EIO;
+ goto err_out;
+ }
+ release_dentry_set(es);
+ } else {
+ if (dcache_modify(sb, sector)) {
+ ret = -EIO;
+ goto err_out;
+ }
+ }
+
+ fs_sync(sb, 0);
+ fs_set_vol_flags(sb, VOL_CLEAN);
+
+err_out:
+ /* set the size of written bytes */
+ if (wcount)
+ *wcount = write_bytes;
+
+ return ret;
+} /* end of fscore_write_link */
+
+/* resize the file length */
+s32 fscore_truncate(struct inode *inode, u64 old_size, u64 new_size)
+{
+ u32 num_clusters_new, num_clusters_da, num_clusters_phys;
+ u32 last_clu = CLUS_FREE;
+ u64 sector;
+ CHAIN_T clu;
+ TIMESTAMP_T tm;
+ DENTRY_T *ep, *ep2;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ ENTRY_SET_CACHE_T *es = NULL;
+ s32 evict = (fid->dir.dir == DIR_DELETED) ? 1 : 0;
+
+ /* check if the given file ID is opened */
+ if ((fid->type != TYPE_FILE) && (fid->type != TYPE_DIR))
+ return -EPERM;
+
+ /* TO CHECK inode type and size */
+ MMSG("%s: inode(%p) type(%s) size:%lld->%lld\n", __func__, inode,
+ (fid->type == TYPE_FILE) ? "file" : "dir", old_size, new_size);
+
+ /* XXX : This is for debugging. */
+
+ /* It can be when write failed */
+#if 0
+ if (fid->size != old_size) {
+ DMSG("%s: inode(%p) size-mismatch(old:%lld != fid:%lld)\n",
+ __func__, inode, old_size, fid->size);
+ WARN_ON(1);
+ }
+#endif
+ /*
+ * There is no lock to protect fid->size.
+ * So, we should get old_size and use it.
+ */
+ if (old_size <= new_size)
+ return 0;
+
+ fs_set_vol_flags(sb, VOL_DIRTY);
+
+ /* Reserved count update */
+ #define num_clusters(v) ((v) ? (u32)(((v) - 1) >> fsi->cluster_size_bits) + 1 : 0)
+ num_clusters_da = num_clusters(SDFAT_I(inode)->i_size_aligned);
+ num_clusters_new = num_clusters(i_size_read(inode));
+ num_clusters_phys = num_clusters(SDFAT_I(inode)->i_size_ondisk);
+
+ /* num_clusters(i_size_old) should be equal to num_clusters_da */
+ BUG_ON((num_clusters(old_size)) != (num_clusters(SDFAT_I(inode)->i_size_aligned)));
+
+ /* for debugging (FIXME: is okay on no-da case?) */
+ BUG_ON(num_clusters_da < num_clusters_phys);
+
+ if ((num_clusters_da != num_clusters_phys) &&
+ (num_clusters_new < num_clusters_da)) {
+ /* Decrement reserved clusters
+ * n_reserved = num_clusters_da - max(new,phys)
+ */
+ int n_reserved = (num_clusters_new > num_clusters_phys) ?
+ (num_clusters_da - num_clusters_new) :
+ (num_clusters_da - num_clusters_phys);
+
+ fsi->reserved_clusters -= n_reserved;
+ BUG_ON(fsi->reserved_clusters < 0);
+ }
+
+ clu.dir = fid->start_clu;
+ /* In no-da case, num_clusters_phys is equal to below value
+ * clu.size = (u32)((old_size-1) >> fsi->cluster_size_bits) + 1;
+ */
+ clu.size = num_clusters_phys;
+ clu.flags = fid->flags;
+
+ /* For bigdata */
+ sdfat_statistics_set_trunc(clu.flags, &clu);
+
+ if (new_size > 0) {
+ /* Truncate FAT chain num_clusters after the first cluster
+ * num_clusters = min(new, phys);
+ */
+ u32 num_clusters = (num_clusters_new < num_clusters_phys) ?
+ num_clusters_new : num_clusters_phys;
+
+ /* Follow FAT chain
+ * (defensive coding - works fine even with corrupted FAT table
+ */
+ if (clu.flags == 0x03) {
+ clu.dir += num_clusters;
+ clu.size -= num_clusters;
+#if 0
+ /* extent_get_clus can`t know last_cluster
+ * when find target cluster in cache.
+ */
+ } else if (fid->type == TYPE_FILE) {
+ u32 fclus = 0;
+ s32 err = extent_get_clus(inode, num_clusters,
+ &fclus, &(clu.dir), &last_clu, 0);
+ if (err)
+ return -EIO;
+ ASSERT(fclus == num_clusters);
+
+ if ((num_clusters > 1) && (last_clu == fid->start_clu)) {
+ u32 fclus_tmp = 0;
+ u32 temp = 0;
+
+ err = extent_get_clus(inode, num_clusters - 1,
+ &fclus_tmp, &last_clu, &temp, 0);
+ if (err)
+ return -EIO;
+ ASSERT(fclus_tmp == (num_clusters - 1));
+ }
+
+ num_clusters -= fclus;
+ clu.size -= fclus;
+#endif
+ } else {
+ while (num_clusters > 0) {
+ last_clu = clu.dir;
+ if (get_next_clus_safe(sb, &(clu.dir)))
+ return -EIO;
+
+ num_clusters--;
+ clu.size--;
+ }
+ }
+
+ /* Optimization avialable: */
+#if 0
+ if (num_clusters_new < num_clusters) {
+ < loop >
+ } else {
+ // num_clusters_new >= num_clusters_phys
+ // FAT truncation is not necessary
+
+ clu.dir = CLUS_EOF;
+ clu.size = 0;
+ }
+#endif
+ } else if (new_size == 0) {
+ fid->flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01;
+ fid->start_clu = CLUS_EOF;
+ }
+ fid->size = new_size;
+
+ if (fid->type == TYPE_FILE)
+ fid->attr |= ATTR_ARCHIVE;
+
+ /*
+ * clu.dir: free from
+ * clu.size: # of clusters to free (exFAT, 0x03 only), no fat_free if 0
+ * clu.flags: fid->flags (exFAT only)
+ */
+
+ /* (1) update the directory entry */
+ if (!evict) {
+
+ if (fsi->vol_type == EXFAT) {
+ es = get_dentry_set_in_dir(sb, &(fid->dir), fid->entry, ES_ALL_ENTRIES, &ep);
+ if (!es)
+ return -EIO;
+ ep2 = ep+1;
+ } else {
+ ep = get_dentry_in_dir(sb, &(fid->dir), fid->entry, &sector);
+ if (!ep)
+ return -EIO;
+ ep2 = ep;
+ }
+
+ fsi->fs_func->set_entry_time(ep, tm_now(SDFAT_SB(sb), &tm), TM_MODIFY);
+ fsi->fs_func->set_entry_attr(ep, fid->attr);
+
+ /*
+ * if (fsi->vol_type != EXFAT)
+ * dcache_modify(sb, sector);
+ */
+
+ /* File size should be zero if there is no cluster allocated */
+ if (IS_CLUS_EOF(fid->start_clu))
+ fsi->fs_func->set_entry_size(ep2, 0);
+ else
+ fsi->fs_func->set_entry_size(ep2, new_size);
+
+ if (new_size == 0) {
+ /* Any directory can not be truncated to zero */
+ BUG_ON(fid->type != TYPE_FILE);
+
+ fsi->fs_func->set_entry_flag(ep2, 0x01);
+ fsi->fs_func->set_entry_clu0(ep2, CLUS_FREE);
+ }
+
+ if (fsi->vol_type == EXFAT) {
+ if (update_dir_chksum_with_entry_set(sb, es))
+ return -EIO;
+ release_dentry_set(es);
+ } else {
+ if (dcache_modify(sb, sector))
+ return -EIO;
+ }
+
+ } /* end of if(fid->dir.dir != DIR_DELETED) */
+
+ /* (2) cut off from the FAT chain */
+ if ((fid->flags == 0x01) &&
+ (!IS_CLUS_FREE(last_clu)) && (!IS_CLUS_EOF(last_clu))) {
+ if (fat_ent_set(sb, last_clu, CLUS_EOF))
+ return -EIO;
+ }
+
+ /* (3) invalidate cache and free the clusters */
+ /* clear extent cache */
+ extent_cache_inval_inode(inode);
+
+ /* hint information */
+ fid->hint_bmap.off = CLUS_EOF;
+ fid->hint_bmap.clu = CLUS_EOF;
+ if (fid->rwoffset > fid->size)
+ fid->rwoffset = fid->size;
+
+ /* hint_stat will be used if this is directory. */
+ fid->hint_stat.eidx = 0;
+ fid->hint_stat.clu = fid->start_clu;
+ fid->hint_femp.eidx = -1;
+
+ /* free the clusters */
+ if (fsi->fs_func->free_cluster(sb, &clu, evict))
+ return -EIO;
+
+ fs_sync(sb, 0);
+ fs_set_vol_flags(sb, VOL_CLEAN);
+
+ return 0;
+} /* end of fscore_truncate */
+
+static void update_parent_info(FILE_ID_T *fid, struct inode *parent_inode)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(parent_inode->i_sb)->fsi);
+ FILE_ID_T *parent_fid = &(SDFAT_I(parent_inode)->fid);
+
+ /*
+ * the problem that FILE_ID_T caches wrong parent info.
+ *
+ * because of flag-mismatch of fid->dir,
+ * there is abnormal traversing cluster chain.
+ */
+ if (unlikely((parent_fid->flags != fid->dir.flags)
+ || (parent_fid->size != (fid->dir.size<<fsi->cluster_size_bits))
+ || (parent_fid->start_clu != fid->dir.dir))) {
+
+ fid->dir.dir = parent_fid->start_clu;
+ fid->dir.flags = parent_fid->flags;
+ fid->dir.size = ((parent_fid->size + (fsi->cluster_size-1))
+ >> fsi->cluster_size_bits);
+ }
+}
+
+/* rename or move a old file into a new file */
+s32 fscore_rename(struct inode *old_parent_inode, FILE_ID_T *fid,
+ struct inode *new_parent_inode, struct dentry *new_dentry)
+{
+ s32 ret;
+ s32 dentry;
+ CHAIN_T olddir, newdir;
+ CHAIN_T *p_dir = NULL;
+ UNI_NAME_T uni_name;
+ DENTRY_T *ep;
+ struct super_block *sb = old_parent_inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u8 *new_path = (u8 *) new_dentry->d_name.name;
+ struct inode *new_inode = new_dentry->d_inode;
+ int num_entries;
+ FILE_ID_T *new_fid = NULL;
+ u32 new_entry_type = TYPE_UNUSED;
+ s32 new_entry = 0;
+
+ /* check the validity of pointer parameters */
+ if ((new_path == NULL) || (strlen(new_path) == 0))
+ return -EINVAL;
+
+ if (fid->dir.dir == DIR_DELETED) {
+ EMSG("%s : abnormal access to deleted source dentry\n", __func__);
+ return -ENOENT;
+ }
+
+ /* patch 1.2.4 : the problem that FILE_ID_T caches wrong parent info. */
+ update_parent_info(fid, old_parent_inode);
+
+ olddir.dir = fid->dir.dir;
+ olddir.size = fid->dir.size;
+ olddir.flags = fid->dir.flags;
+
+ dentry = fid->entry;
+
+ /* check if the old file is "." or ".." */
+ if (fsi->vol_type != EXFAT) {
+ if ((olddir.dir != fsi->root_dir) && (dentry < 2))
+ return -EPERM;
+ }
+
+ ep = get_dentry_in_dir(sb, &olddir, dentry, NULL);
+ if (!ep)
+ return -EIO;
+
+#ifdef CONFIG_SDFAT_CHECK_RO_ATTR
+ if (fsi->fs_func->get_entry_attr(ep) & ATTR_READONLY)
+ return -EPERM;
+#endif
+
+ /* check whether new dir is existing directory and empty */
+ if (new_inode) {
+ ret = -EIO;
+ new_fid = &SDFAT_I(new_inode)->fid;
+
+ if (new_fid->dir.dir == DIR_DELETED) {
+ EMSG("%s : abnormal access to deleted target dentry\n", __func__);
+ goto out;
+ }
+
+ /* patch 1.2.4 :
+ * the problem that FILE_ID_T caches wrong parent info.
+ *
+ * FIXME : is needed?
+ */
+ update_parent_info(new_fid, new_parent_inode);
+
+ p_dir = &(new_fid->dir);
+ new_entry = new_fid->entry;
+ ep = get_dentry_in_dir(sb, p_dir, new_entry, NULL);
+ if (!ep)
+ goto out;
+
+ new_entry_type = fsi->fs_func->get_entry_type(ep);
+
+ /* if new_inode exists, update fid */
+ new_fid->size = i_size_read(new_inode);
+
+ if (new_entry_type == TYPE_DIR) {
+ CHAIN_T new_clu;
+
+ new_clu.dir = new_fid->start_clu;
+ new_clu.size = ((new_fid->size-1) >> fsi->cluster_size_bits) + 1;
+ new_clu.flags = new_fid->flags;
+
+ ret = check_dir_empty(sb, &new_clu);
+ if (ret)
+ return ret;
+ }
+ }
+
+ /* check the validity of directory name in the given new pathname */
+ ret = resolve_path(new_parent_inode, new_path, &newdir, &uni_name);
+ if (ret)
+ return ret;
+
+ fs_set_vol_flags(sb, VOL_DIRTY);
+
+ if (olddir.dir == newdir.dir)
+ ret = rename_file(new_parent_inode, &olddir, dentry, &uni_name, fid);
+ else
+ ret = move_file(new_parent_inode, &olddir, dentry, &newdir, &uni_name, fid);
+
+ if ((!ret) && new_inode) {
+ /* delete entries of new_dir */
+ ep = get_dentry_in_dir(sb, p_dir, new_entry, NULL);
+ if (!ep) {
+ ret = -EIO;
+ goto del_out;
+ }
+
+ num_entries = fsi->fs_func->count_ext_entries(sb, p_dir, new_entry, ep);
+ if (num_entries < 0) {
+ ret = -EIO;
+ goto del_out;
+ }
+
+
+ if (fsi->fs_func->delete_dir_entry(sb, p_dir, new_entry, 0, num_entries+1)) {
+ ret = -EIO;
+ goto del_out;
+ }
+
+ /* Free the clusters if new_inode is a dir(as if fscore_rmdir) */
+ if (new_entry_type == TYPE_DIR) {
+ /* new_fid, new_clu_to_free */
+ CHAIN_T new_clu_to_free;
+
+ new_clu_to_free.dir = new_fid->start_clu;
+ new_clu_to_free.size = ((new_fid->size-1) >> fsi->cluster_size_bits) + 1;
+ new_clu_to_free.flags = new_fid->flags;
+
+ if (fsi->fs_func->free_cluster(sb, &new_clu_to_free, 1)) {
+ /* just set I/O error only */
+ ret = -EIO;
+ }
+
+ new_fid->size = 0;
+ new_fid->start_clu = CLUS_EOF;
+ new_fid->flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01;
+ }
+del_out:
+ /* Update new_inode fid
+ * Prevent syncing removed new_inode
+ * (new_fid is already initialized above code ("if (new_inode)")
+ */
+ new_fid->dir.dir = DIR_DELETED;
+ }
+out:
+ fs_sync(sb, 0);
+ fs_set_vol_flags(sb, VOL_CLEAN);
+
+ return ret;
+} /* end of fscore_rename */
+
+/* remove a file */
+s32 fscore_remove(struct inode *inode, FILE_ID_T *fid)
+{
+ s32 ret;
+ s32 dentry;
+ CHAIN_T dir, clu_to_free;
+ DENTRY_T *ep;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ dir.dir = fid->dir.dir;
+ dir.size = fid->dir.size;
+ dir.flags = fid->dir.flags;
+
+ dentry = fid->entry;
+
+ if (fid->dir.dir == DIR_DELETED) {
+ EMSG("%s : abnormal access to deleted dentry\n", __func__);
+ return -ENOENT;
+ }
+
+ ep = get_dentry_in_dir(sb, &dir, dentry, NULL);
+ if (!ep)
+ return -EIO;
+
+
+#ifdef CONFIG_SDFAT_CHECK_RO_ATTR
+ if (fsi->fs_func->get_entry_attr(ep) & ATTR_READONLY)
+ return -EPERM;
+#endif
+
+ fs_set_vol_flags(sb, VOL_DIRTY);
+
+ /* (1) update the directory entry */
+ ret = remove_file(inode, &dir, dentry);
+ if (ret)
+ goto out;
+
+ clu_to_free.dir = fid->start_clu;
+ clu_to_free.size = ((fid->size-1) >> fsi->cluster_size_bits) + 1;
+ clu_to_free.flags = fid->flags;
+
+ /* (2) invalidate extent cache and free the clusters
+ */
+ /* clear extent cache */
+ extent_cache_inval_inode(inode);
+ ret = fsi->fs_func->free_cluster(sb, &clu_to_free, 0);
+ /* WARN : DO NOT RETURN ERROR IN HERE */
+
+ /* (3) update FILE_ID_T */
+ fid->size = 0;
+ fid->start_clu = CLUS_EOF;
+ fid->flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01;
+ fid->dir.dir = DIR_DELETED;
+
+ fs_sync(sb, 0);
+ fs_set_vol_flags(sb, VOL_CLEAN);
+out:
+ return ret;
+} /* end of fscore_remove */
+
+
+/*
+ * Get the information of a given file
+ * REMARK : This function does not need any file name on linux
+ *
+ * info.Size means the value saved on disk.
+ * But root directory doesn`t have real dentry,
+ * so the size of root directory returns calculated one exceptively.
+ */
+s32 fscore_read_inode(struct inode *inode, DIR_ENTRY_T *info)
+{
+ u64 sector;
+ s32 count;
+ CHAIN_T dir;
+ TIMESTAMP_T tm;
+ DENTRY_T *ep, *ep2;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ ENTRY_SET_CACHE_T *es = NULL;
+ u8 is_dir = (fid->type == TYPE_DIR) ? 1 : 0;
+
+ TMSG("%s entered\n", __func__);
+
+ extent_cache_init_inode(inode);
+
+ /* if root directory */
+ if (is_dir && (fid->dir.dir == fsi->root_dir) && (fid->entry == -1)) {
+ info->Attr = ATTR_SUBDIR;
+ memset((s8 *) &info->CreateTimestamp, 0, sizeof(DATE_TIME_T));
+ memset((s8 *) &info->ModifyTimestamp, 0, sizeof(DATE_TIME_T));
+ memset((s8 *) &info->AccessTimestamp, 0, sizeof(DATE_TIME_T));
+ //strcpy(info->NameBuf.sfn, ".");
+ //strcpy(info->NameBuf.lfn, ".");
+
+ dir.dir = fsi->root_dir;
+ dir.flags = 0x01;
+ dir.size = 0; /* UNUSED */
+
+ /* FAT16 root_dir */
+ if (IS_CLUS_FREE(fsi->root_dir)) {
+ info->Size = fsi->dentries_in_root << DENTRY_SIZE_BITS;
+ } else {
+ u32 num_clu;
+
+ if (__count_num_clusters(sb, &dir, &num_clu))
+ return -EIO;
+ info->Size = (u64)num_clu << fsi->cluster_size_bits;
+ }
+
+ count = __count_dos_name_entries(sb, &dir, TYPE_DIR, NULL);
+ if (count < 0)
+ return -EIO;
+ info->NumSubdirs = count;
+
+ return 0;
+ }
+
+ /* get the directory entry of given file or directory */
+ if (fsi->vol_type == EXFAT) {
+ /* es should be released */
+ es = get_dentry_set_in_dir(sb, &(fid->dir), fid->entry, ES_2_ENTRIES, &ep);
+ if (!es)
+ return -EIO;
+ ep2 = ep+1;
+ } else {
+ ep = get_dentry_in_dir(sb, &(fid->dir), fid->entry, &sector);
+ if (!ep)
+ return -EIO;
+ ep2 = ep;
+ /* dcache should be unlocked */
+ dcache_lock(sb, sector);
+ }
+
+ /* set FILE_INFO structure using the acquired DENTRY_T */
+ info->Attr = fsi->fs_func->get_entry_attr(ep);
+
+ fsi->fs_func->get_entry_time(ep, &tm, TM_CREATE);
+ info->CreateTimestamp.Year = tm.year;
+ info->CreateTimestamp.Month = tm.mon;
+ info->CreateTimestamp.Day = tm.day;
+ info->CreateTimestamp.Hour = tm.hour;
+ info->CreateTimestamp.Minute = tm.min;
+ info->CreateTimestamp.Second = tm.sec;
+ info->CreateTimestamp.MilliSecond = 0;
+ info->CreateTimestamp.Timezone.value = tm.tz.value;
+
+ fsi->fs_func->get_entry_time(ep, &tm, TM_MODIFY);
+ info->ModifyTimestamp.Year = tm.year;
+ info->ModifyTimestamp.Month = tm.mon;
+ info->ModifyTimestamp.Day = tm.day;
+ info->ModifyTimestamp.Hour = tm.hour;
+ info->ModifyTimestamp.Minute = tm.min;
+ info->ModifyTimestamp.Second = tm.sec;
+ info->ModifyTimestamp.MilliSecond = 0;
+ info->ModifyTimestamp.Timezone.value = tm.tz.value;
+
+ memset((s8 *) &info->AccessTimestamp, 0, sizeof(DATE_TIME_T));
+
+ info->NumSubdirs = 0;
+ info->Size = fsi->fs_func->get_entry_size(ep2);
+
+ if (fsi->vol_type == EXFAT)
+ release_dentry_set(es);
+ else
+ dcache_unlock(sb, sector);
+
+ if (is_dir) {
+ u32 dotcnt = 0;
+
+ dir.dir = fid->start_clu;
+ dir.flags = fid->flags;
+ dir.size = fid->size >> fsi->cluster_size_bits;
+ /*
+ * NOTE :
+ * If "dir.flags" has 0x01, "dir.size" is meaningless.
+ */
+#if 0
+ if (info->Size == 0) {
+ s32 num_clu;
+
+ if (__count_num_clusters(sb, &dir, &num_clu))
+ return -EIO;
+ info->Size = (u64)num_clu << fsi->cluster_size_bits;
+ }
+#endif
+ count = __count_dos_name_entries(sb, &dir, TYPE_DIR, &dotcnt);
+ if (count < 0)
+ return -EIO;
+
+ if (fsi->vol_type == EXFAT) {
+ count += SDFAT_MIN_SUBDIR;
+ } else {
+ /*
+ * if directory has been corrupted,
+ * we have to adjust subdir count.
+ */
+ BUG_ON(dotcnt > SDFAT_MIN_SUBDIR);
+ if (dotcnt < SDFAT_MIN_SUBDIR) {
+ EMSG("%s: contents of the directory has been "
+ "corrupted (parent clus : %08x, idx : %d)",
+ __func__, fid->dir.dir, fid->entry);
+ }
+ count += (SDFAT_MIN_SUBDIR - dotcnt);
+ }
+ info->NumSubdirs = count;
+ }
+
+ TMSG("%s exited successfully\n", __func__);
+ return 0;
+} /* end of fscore_read_inode */
+
+/* set the information of a given file
+ * REMARK : This function does not need any file name on linux
+ */
+s32 fscore_write_inode(struct inode *inode, DIR_ENTRY_T *info, s32 sync)
+{
+ s32 ret = -EIO;
+ u64 sector;
+ TIMESTAMP_T tm;
+ DENTRY_T *ep, *ep2;
+ ENTRY_SET_CACHE_T *es = NULL;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ u8 is_dir = (fid->type == TYPE_DIR) ? 1 : 0;
+
+
+ /* SKIP WRITING INODE :
+ * if the indoe is already unlinked,
+ * there is no need for updating inode
+ */
+ if (fid->dir.dir == DIR_DELETED)
+ return 0;
+
+ if (is_dir && (fid->dir.dir == fsi->root_dir) && (fid->entry == -1))
+ return 0;
+
+ fs_set_vol_flags(sb, VOL_DIRTY);
+
+ /* get the directory entry of given file or directory */
+ if (fsi->vol_type == EXFAT) {
+ es = get_dentry_set_in_dir(sb, &(fid->dir), fid->entry, ES_ALL_ENTRIES, &ep);
+ if (!es)
+ return -EIO;
+ ep2 = ep+1;
+ } else {
+ /* for other than exfat */
+ ep = get_dentry_in_dir(sb, &(fid->dir), fid->entry, &sector);
+ if (!ep)
+ return -EIO;
+ ep2 = ep;
+ }
+
+
+ fsi->fs_func->set_entry_attr(ep, info->Attr);
+
+ /* set FILE_INFO structure using the acquired DENTRY_T */
+ tm.tz = info->CreateTimestamp.Timezone;
+ tm.sec = info->CreateTimestamp.Second;
+ tm.min = info->CreateTimestamp.Minute;
+ tm.hour = info->CreateTimestamp.Hour;
+ tm.day = info->CreateTimestamp.Day;
+ tm.mon = info->CreateTimestamp.Month;
+ tm.year = info->CreateTimestamp.Year;
+ fsi->fs_func->set_entry_time(ep, &tm, TM_CREATE);
+
+ tm.tz = info->ModifyTimestamp.Timezone;
+ tm.sec = info->ModifyTimestamp.Second;
+ tm.min = info->ModifyTimestamp.Minute;
+ tm.hour = info->ModifyTimestamp.Hour;
+ tm.day = info->ModifyTimestamp.Day;
+ tm.mon = info->ModifyTimestamp.Month;
+ tm.year = info->ModifyTimestamp.Year;
+ fsi->fs_func->set_entry_time(ep, &tm, TM_MODIFY);
+
+ if (is_dir && fsi->vol_type != EXFAT) {
+ /* overwirte dirsize if FAT32 and dir size != 0 */
+ if (fsi->fs_func->get_entry_size(ep2))
+ fsi->fs_func->set_entry_size(ep2, 0);
+ } else {
+ /* File size should be zero if there is no cluster allocated */
+ u64 on_disk_size = info->Size;
+
+ if (IS_CLUS_EOF(fid->start_clu))
+ on_disk_size = 0;
+
+ fsi->fs_func->set_entry_size(ep2, on_disk_size);
+ }
+
+ if (fsi->vol_type == EXFAT) {
+ ret = update_dir_chksum_with_entry_set(sb, es);
+ release_dentry_set(es);
+ } else {
+ ret = dcache_modify(sb, sector);
+ }
+
+ fs_sync(sb, sync);
+ /* Comment below code to prevent super block update frequently */
+ //fs_set_vol_flags(sb, VOL_CLEAN);
+
+ return ret;
+} /* end of fscore_write_inode */
+
+
+/*
+ * Input: inode, (logical) clu_offset, target allocation area
+ * Output: errcode, cluster number
+ * *clu = (~0), if it's unable to allocate a new cluster
+ */
+s32 fscore_map_clus(struct inode *inode, u32 clu_offset, u32 *clu, int dest)
+{
+ s32 ret, modified = false;
+ u32 last_clu;
+ u64 sector;
+ CHAIN_T new_clu;
+ DENTRY_T *ep;
+ ENTRY_SET_CACHE_T *es = NULL;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ u32 local_clu_offset = clu_offset;
+ s32 reserved_clusters = fsi->reserved_clusters;
+ u32 num_to_be_allocated = 0, num_clusters = 0;
+
+ fid->rwoffset = (s64)(clu_offset) << fsi->cluster_size_bits;
+
+ if (SDFAT_I(inode)->i_size_ondisk > 0)
+ num_clusters = (u32)((SDFAT_I(inode)->i_size_ondisk-1) >> fsi->cluster_size_bits) + 1;
+
+ if (clu_offset >= num_clusters)
+ num_to_be_allocated = clu_offset - num_clusters + 1;
+
+ if ((dest == ALLOC_NOWHERE) && (num_to_be_allocated > 0)) {
+ *clu = CLUS_EOF;
+ return 0;
+ }
+
+ /* check always request cluster is 1 */
+ //ASSERT(num_to_be_allocated == 1);
+
+ sdfat_debug_check_clusters(inode);
+
+ *clu = last_clu = fid->start_clu;
+
+ /* XXX: Defensive code needed.
+ * what if i_size_ondisk != # of allocated clusters
+ */
+ if (fid->flags == 0x03) {
+ if ((clu_offset > 0) && (!IS_CLUS_EOF(*clu))) {
+ last_clu += clu_offset - 1;
+
+ if (clu_offset == num_clusters)
+ *clu = CLUS_EOF;
+ else
+ *clu += clu_offset;
+ }
+ } else if (fid->type == TYPE_FILE) {
+ u32 fclus = 0;
+ s32 err = extent_get_clus(inode, clu_offset,
+ &fclus, clu, &last_clu, 1);
+ if (err)
+ return -EIO;
+
+ clu_offset -= fclus;
+ } else {
+ /* hint information */
+ if ((clu_offset > 0) &&
+ ((fid->hint_bmap.off != CLUS_EOF) && (fid->hint_bmap.off > 0)) &&
+ (clu_offset >= fid->hint_bmap.off)) {
+ clu_offset -= fid->hint_bmap.off;
+ /* hint_bmap.clu should be valid */
+ ASSERT(fid->hint_bmap.clu >= 2);
+ *clu = fid->hint_bmap.clu;
+ }
+
+ while ((clu_offset > 0) && (!IS_CLUS_EOF(*clu))) {
+ last_clu = *clu;
+ if (get_next_clus_safe(sb, clu))
+ return -EIO;
+ clu_offset--;
+ }
+ }
+
+ if (IS_CLUS_EOF(*clu)) {
+ fs_set_vol_flags(sb, VOL_DIRTY);
+
+ new_clu.dir = (IS_CLUS_EOF(last_clu)) ? CLUS_EOF : last_clu + 1;
+ new_clu.size = 0;
+ new_clu.flags = fid->flags;
+
+ /* (1) allocate a cluster */
+ if (num_to_be_allocated < 1) {
+ /* Broken FAT (i_sze > allocated FAT) */
+ EMSG("%s: invalid fat chain : inode(%p) "
+ "num_to_be_allocated(%d) "
+ "i_size_ondisk(%lld) fid->flags(%02x) "
+ "fid->start(%08x) fid->hint_off(%u) "
+ "fid->hint_clu(%u) fid->rwoffset(%llu) "
+ "modified_clu_off(%d) last_clu(%08x) "
+ "new_clu(%08x)", __func__, inode,
+ num_to_be_allocated,
+ (SDFAT_I(inode)->i_size_ondisk),
+ fid->flags, fid->start_clu,
+ fid->hint_bmap.off, fid->hint_bmap.clu,
+ fid->rwoffset, clu_offset,
+ last_clu, new_clu.dir);
+ sdfat_fs_error(sb, "broken FAT chain.");
+ return -EIO;
+ }
+
+ ret = fsi->fs_func->alloc_cluster(sb, num_to_be_allocated, &new_clu, ALLOC_COLD);
+ if (ret)
+ return ret;
+
+ if (IS_CLUS_EOF(new_clu.dir) || IS_CLUS_FREE(new_clu.dir)) {
+ sdfat_fs_error(sb, "bogus cluster new allocated"
+ "(last_clu : %u, new_clu : %u)",
+ last_clu, new_clu.dir);
+ ASSERT(0);
+ return -EIO;
+ }
+
+ /* Reserved cluster dec. */
+ // XXX: Inode DA flag needed
+ if (SDFAT_SB(sb)->options.improved_allocation & SDFAT_ALLOC_DELAY) {
+ BUG_ON(reserved_clusters < num_to_be_allocated);
+ reserved_clusters -= num_to_be_allocated;
+
+ }
+
+ /* (2) append to the FAT chain */
+ if (IS_CLUS_EOF(last_clu)) {
+ if (new_clu.flags == 0x01)
+ fid->flags = 0x01;
+ fid->start_clu = new_clu.dir;
+ modified = true;
+ } else {
+ if (new_clu.flags != fid->flags) {
+ /* no-fat-chain bit is disabled,
+ * so fat-chain should be synced with alloc-bmp
+ */
+ chain_cont_cluster(sb, fid->start_clu, num_clusters);
+ fid->flags = 0x01;
+ modified = true;
+ }
+ if (new_clu.flags == 0x01)
+ if (fat_ent_set(sb, last_clu, new_clu.dir))
+ return -EIO;
+ }
+
+ num_clusters += num_to_be_allocated;
+ *clu = new_clu.dir;
+
+ if (fid->dir.dir != DIR_DELETED) {
+
+ if (fsi->vol_type == EXFAT) {
+ es = get_dentry_set_in_dir(sb, &(fid->dir), fid->entry, ES_ALL_ENTRIES, &ep);
+ if (!es)
+ return -EIO;
+ /* get stream entry */
+ ep++;
+ }
+
+ /* (3) update directory entry */
+ if (modified) {
+ if (fsi->vol_type != EXFAT) {
+ ep = get_dentry_in_dir(sb, &(fid->dir), fid->entry, &sector);
+ if (!ep)
+ return -EIO;
+ }
+
+ if (fsi->fs_func->get_entry_flag(ep) != fid->flags)
+ fsi->fs_func->set_entry_flag(ep, fid->flags);
+
+ if (fsi->fs_func->get_entry_clu0(ep) != fid->start_clu)
+ fsi->fs_func->set_entry_clu0(ep, fid->start_clu);
+
+ fsi->fs_func->set_entry_size(ep, fid->size);
+
+ if (fsi->vol_type != EXFAT) {
+ if (dcache_modify(sb, sector))
+ return -EIO;
+ }
+ }
+
+ if (fsi->vol_type == EXFAT) {
+ if (update_dir_chksum_with_entry_set(sb, es))
+ return -EIO;
+ release_dentry_set(es);
+ }
+
+ } /* end of if != DIR_DELETED */
+
+
+ /* add number of new blocks to inode (non-DA only) */
+ if (!(SDFAT_SB(sb)->options.improved_allocation & SDFAT_ALLOC_DELAY)) {
+ inode->i_blocks += num_to_be_allocated << (fsi->cluster_size_bits - sb->s_blocksize_bits);
+ } else {
+ // DA의 경우, i_blocks가 이미 증가해있어야 함.
+ BUG_ON(clu_offset >= (inode->i_blocks >> (fsi->cluster_size_bits - sb->s_blocksize_bits)));
+ }
+#if 0
+ fs_sync(sb, 0);
+ fs_set_vol_flags(sb, VOL_CLEAN);
+#endif
+ /* (4) Move *clu pointer along FAT chains (hole care)
+ * because the caller of this function expect *clu to be the last cluster.
+ * This only works when num_to_be_allocated >= 2,
+ * *clu = (the first cluster of the allocated chain) => (the last cluster of ...)
+ */
+ if (fid->flags == 0x03) {
+ *clu += num_to_be_allocated - 1;
+ } else {
+ while (num_to_be_allocated > 1) {
+ if (get_next_clus_safe(sb, clu))
+ return -EIO;
+ num_to_be_allocated--;
+ }
+ }
+
+ }
+
+ /* update reserved_clusters */
+ fsi->reserved_clusters = reserved_clusters;
+
+ /* hint information */
+ fid->hint_bmap.off = local_clu_offset;
+ fid->hint_bmap.clu = *clu;
+
+ return 0;
+} /* end of fscore_map_clus */
+
+/* allocate reserved cluster */
+s32 fscore_reserve_clus(struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if ((fsi->used_clusters + fsi->reserved_clusters) >= (fsi->num_clusters - 2))
+ return -ENOSPC;
+
+ if (bdev_check_bdi_valid(sb))
+ return -EIO;
+
+ fsi->reserved_clusters++;
+
+ /* inode->i_blocks update */
+ inode->i_blocks += 1 << (fsi->cluster_size_bits - sb->s_blocksize_bits);
+
+ sdfat_debug_check_clusters(inode);
+
+ return 0;
+}
+
+/* remove an entry, BUT don't truncate */
+s32 fscore_unlink(struct inode *inode, FILE_ID_T *fid)
+{
+ s32 dentry;
+ CHAIN_T dir;
+ DENTRY_T *ep;
+ struct super_block *sb = inode->i_sb;
+
+ dir.dir = fid->dir.dir;
+ dir.size = fid->dir.size;
+ dir.flags = fid->dir.flags;
+
+ dentry = fid->entry;
+
+ if (fid->dir.dir == DIR_DELETED) {
+ EMSG("%s : abnormal access to deleted dentry\n", __func__);
+ return -ENOENT;
+ }
+
+ ep = get_dentry_in_dir(sb, &dir, dentry, NULL);
+ if (!ep)
+ return -EIO;
+
+#ifdef CONFIG_SDFAT_CHECK_RO_ATTR
+ if (SDFAT_SB(sb)->fsi.fs_func->get_entry_attr(ep) & ATTR_READONLY)
+ return -EPERM;
+#endif
+
+ fs_set_vol_flags(sb, VOL_DIRTY);
+
+ /* (1) update the directory entry */
+ if (remove_file(inode, &dir, dentry))
+ return -EIO;
+
+ /* This doesn't modify fid */
+ fid->dir.dir = DIR_DELETED;
+
+ fs_sync(sb, 0);
+ fs_set_vol_flags(sb, VOL_CLEAN);
+
+ return 0;
+}
+
+/*----------------------------------------------------------------------*/
+/* Directory Operation Functions */
+/*----------------------------------------------------------------------*/
+
+/* create a directory */
+s32 fscore_mkdir(struct inode *inode, u8 *path, FILE_ID_T *fid)
+{
+ s32 ret/*, dentry*/;
+ CHAIN_T dir;
+ UNI_NAME_T uni_name;
+ struct super_block *sb = inode->i_sb;
+
+ TMSG("%s entered\n", __func__);
+
+ /* check the validity of directory name in the given old pathname */
+ ret = resolve_path(inode, path, &dir, &uni_name);
+ if (ret)
+ goto out;
+
+ fs_set_vol_flags(sb, VOL_DIRTY);
+
+ ret = create_dir(inode, &dir, &uni_name, fid);
+
+ fs_sync(sb, 0);
+ fs_set_vol_flags(sb, VOL_CLEAN);
+out:
+ TMSG("%s exited with err(%d)\n", __func__, ret);
+ return ret;
+}
+
+/* read a directory entry from the opened directory */
+s32 fscore_readdir(struct inode *inode, DIR_ENTRY_T *dir_entry)
+{
+ s32 i;
+ s32 dentries_per_clu, dentries_per_clu_bits = 0;
+ u32 type, clu_offset;
+ u64 sector;
+ CHAIN_T dir, clu;
+ UNI_NAME_T uni_name;
+ TIMESTAMP_T tm;
+ DENTRY_T *ep;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ u32 dentry = (u32)(fid->rwoffset & 0xFFFFFFFF); /* u32 is enough for directory */
+
+ /* check if the given file ID is opened */
+ if (fid->type != TYPE_DIR)
+ return -EPERM;
+
+ if (fid->entry == -1) {
+ dir.dir = fsi->root_dir;
+ dir.size = 0; /* just initialize, but will not use */
+ dir.flags = 0x01;
+ } else {
+ dir.dir = fid->start_clu;
+ dir.size = fid->size >> fsi->cluster_size_bits;
+ dir.flags = fid->flags;
+ sdfat_debug_bug_on(dentry >= (dir.size * fsi->dentries_per_clu));
+ }
+
+ if (IS_CLUS_FREE(dir.dir)) { /* FAT16 root_dir */
+ dentries_per_clu = fsi->dentries_in_root;
+
+ /* Prevent readdir over directory size */
+ if (dentry >= dentries_per_clu) {
+ clu.dir = CLUS_EOF;
+ } else {
+ clu.dir = dir.dir;
+ clu.size = dir.size;
+ clu.flags = dir.flags;
+ }
+ } else {
+ dentries_per_clu = fsi->dentries_per_clu;
+ dentries_per_clu_bits = ilog2(dentries_per_clu);
+
+ clu_offset = dentry >> dentries_per_clu_bits;
+ clu.dir = dir.dir;
+ clu.size = dir.size;
+ clu.flags = dir.flags;
+
+ if (clu.flags == 0x03) {
+ clu.dir += clu_offset;
+ clu.size -= clu_offset;
+ } else {
+ /* hint_information */
+ if ((clu_offset > 0) &&
+ ((fid->hint_bmap.off != CLUS_EOF) && (fid->hint_bmap.off > 0)) &&
+ (clu_offset >= fid->hint_bmap.off)) {
+ clu_offset -= fid->hint_bmap.off;
+ clu.dir = fid->hint_bmap.clu;
+ }
+
+ while (clu_offset > 0) {
+ if (get_next_clus_safe(sb, &(clu.dir)))
+ return -EIO;
+
+ clu_offset--;
+ }
+ }
+ }
+
+ while (!IS_CLUS_EOF(clu.dir)) {
+ if (IS_CLUS_FREE(dir.dir)) /* FAT16 root_dir */
+ i = dentry % dentries_per_clu;
+ else
+ i = dentry & (dentries_per_clu-1);
+
+ for ( ; i < dentries_per_clu; i++, dentry++) {
+ ep = get_dentry_in_dir(sb, &clu, i, &sector);
+ if (!ep)
+ return -EIO;
+
+ type = fsi->fs_func->get_entry_type(ep);
+
+ if (type == TYPE_UNUSED)
+ break;
+
+ if ((type != TYPE_FILE) && (type != TYPE_DIR))
+ continue;
+
+ dcache_lock(sb, sector);
+ dir_entry->Attr = fsi->fs_func->get_entry_attr(ep);
+
+ fsi->fs_func->get_entry_time(ep, &tm, TM_CREATE);
+ dir_entry->CreateTimestamp.Year = tm.year;
+ dir_entry->CreateTimestamp.Month = tm.mon;
+ dir_entry->CreateTimestamp.Day = tm.day;
+ dir_entry->CreateTimestamp.Hour = tm.hour;
+ dir_entry->CreateTimestamp.Minute = tm.min;
+ dir_entry->CreateTimestamp.Second = tm.sec;
+ dir_entry->CreateTimestamp.MilliSecond = 0;
+
+ fsi->fs_func->get_entry_time(ep, &tm, TM_MODIFY);
+ dir_entry->ModifyTimestamp.Year = tm.year;
+ dir_entry->ModifyTimestamp.Month = tm.mon;
+ dir_entry->ModifyTimestamp.Day = tm.day;
+ dir_entry->ModifyTimestamp.Hour = tm.hour;
+ dir_entry->ModifyTimestamp.Minute = tm.min;
+ dir_entry->ModifyTimestamp.Second = tm.sec;
+ dir_entry->ModifyTimestamp.MilliSecond = 0;
+
+ memset((s8 *) &dir_entry->AccessTimestamp, 0, sizeof(DATE_TIME_T));
+
+ *(uni_name.name) = 0x0;
+ fsi->fs_func->get_uniname_from_ext_entry(sb, &dir, dentry, uni_name.name);
+ if (*(uni_name.name) == 0x0)
+ get_uniname_from_dos_entry(sb, (DOS_DENTRY_T *) ep, &uni_name, 0x1);
+ nls_uni16s_to_vfsname(sb, &uni_name,
+ dir_entry->NameBuf.lfn,
+ dir_entry->NameBuf.lfnbuf_len);
+ dcache_unlock(sb, sector);
+
+ if (fsi->vol_type == EXFAT) {
+ ep = get_dentry_in_dir(sb, &clu, i+1, NULL);
+ if (!ep)
+ return -EIO;
+ } else {
+ get_uniname_from_dos_entry(sb, (DOS_DENTRY_T *) ep, &uni_name, 0x0);
+ nls_uni16s_to_vfsname(sb, &uni_name,
+ dir_entry->NameBuf.sfn,
+ dir_entry->NameBuf.sfnbuf_len);
+ }
+
+ dir_entry->Size = fsi->fs_func->get_entry_size(ep);
+
+ /*
+ * Update hint information :
+ * fat16 root directory does not need it.
+ */
+ if (!IS_CLUS_FREE(dir.dir)) {
+ fid->hint_bmap.off = dentry >> dentries_per_clu_bits;
+ fid->hint_bmap.clu = clu.dir;
+ }
+
+ fid->rwoffset = (s64) ++dentry;
+
+ return 0;
+ }
+
+ /* fat16 root directory */
+ if (IS_CLUS_FREE(dir.dir))
+ break;
+
+ if (clu.flags == 0x03) {
+ if ((--clu.size) > 0)
+ clu.dir++;
+ else
+ clu.dir = CLUS_EOF;
+ } else {
+ if (get_next_clus_safe(sb, &(clu.dir)))
+ return -EIO;
+ }
+ }
+
+ dir_entry->NameBuf.lfn[0] = '\0';
+
+ fid->rwoffset = (s64)dentry;
+
+ return 0;
+} /* end of fscore_readdir */
+
+/* remove a directory */
+s32 fscore_rmdir(struct inode *inode, FILE_ID_T *fid)
+{
+ s32 ret;
+ s32 dentry;
+ DENTRY_T *ep;
+ CHAIN_T dir, clu_to_free;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ dir.dir = fid->dir.dir;
+ dir.size = fid->dir.size;
+ dir.flags = fid->dir.flags;
+
+ dentry = fid->entry;
+
+ if (fid->dir.dir == DIR_DELETED) {
+ EMSG("%s : abnormal access to deleted dentry\n", __func__);
+ return -ENOENT;
+ }
+
+ /* check if the file is "." or ".." */
+ if (fsi->vol_type != EXFAT) {
+ if ((dir.dir != fsi->root_dir) && (dentry < 2))
+ return -EPERM;
+ }
+
+ ep = get_dentry_in_dir(sb, &dir, dentry, NULL);
+ if (!ep)
+ return -EIO;
+
+#ifdef CONFIG_SDFAT_CHECK_RO_ATTR
+ if (SDFAT_SB(sb)->fsi.fs_func->get_entry_attr(ep) & ATTR_READONLY)
+ return -EPERM;
+#endif
+
+ clu_to_free.dir = fid->start_clu;
+ clu_to_free.size = ((fid->size-1) >> fsi->cluster_size_bits) + 1;
+ clu_to_free.flags = fid->flags;
+
+ ret = check_dir_empty(sb, &clu_to_free);
+ if (ret) {
+ if (ret == -EIO)
+ EMSG("%s : failed to check_dir_empty : err(%d)\n",
+ __func__, ret);
+ return ret;
+ }
+
+ fs_set_vol_flags(sb, VOL_DIRTY);
+
+ /* (1) update the directory entry */
+ ret = remove_file(inode, &dir, dentry);
+ if (ret) {
+ EMSG("%s : failed to remove_file : err(%d)\n", __func__, ret);
+ return ret;
+ }
+
+ fid->dir.dir = DIR_DELETED;
+
+ fs_sync(sb, 0);
+ fs_set_vol_flags(sb, VOL_CLEAN);
+
+ return ret;
+} /* end of fscore_rmdir */
+
+/* end of core.c */
diff --git a/fs/sdfat/core.h b/fs/sdfat/core.h
new file mode 100644
index 000000000000..1f8ed5a28ef3
--- /dev/null
+++ b/fs/sdfat/core.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SDFAT_CORE_H
+#define _SDFAT_CORE_H
+
+#include <asm/byteorder.h>
+
+#include "config.h"
+#include "api.h"
+#include "upcase.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*----------------------------------------------------------------------*/
+/* Constant & Macro Definitions */
+/*----------------------------------------------------------------------*/
+#define get_next_clus(sb, pclu) fat_ent_get(sb, *(pclu), pclu)
+#define get_next_clus_safe(sb, pclu) fat_ent_get_safe(sb, *(pclu), pclu)
+
+/* file status */
+/* this prevents
+ * fscore_write_inode, fscore_map_clus, ... with the unlinked inodes
+ * from corrupting on-disk dentry data.
+ *
+ * The fid->dir value of unlinked inode will be DIR_DELETED
+ * and those functions must check if fid->dir is valid prior to
+ * the calling of get_dentry_in_dir()
+ */
+#define DIR_DELETED 0xFFFF0321
+
+/*----------------------------------------------------------------------*/
+/* Type Definitions */
+/*----------------------------------------------------------------------*/
+#define ES_2_ENTRIES 2
+#define ES_3_ENTRIES 3
+#define ES_ALL_ENTRIES 0
+
+typedef struct {
+ u64 sector; // sector number that contains file_entry
+ u32 offset; // byte offset in the sector
+ s32 alloc_flag; // flag in stream entry. 01 for cluster chain, 03 for contig. clusters.
+ u32 num_entries;
+ void *__buf; // __buf should be the last member
+} ENTRY_SET_CACHE_T;
+
+
+
+/*----------------------------------------------------------------------*/
+/* External Function Declarations */
+/*----------------------------------------------------------------------*/
+
+/* file system initialization & shutdown functions */
+s32 fscore_init(void);
+s32 fscore_shutdown(void);
+
+/* bdev management */
+s32 fscore_check_bdi_valid(struct super_block *sb);
+
+/* chain management */
+s32 chain_cont_cluster(struct super_block *sb, u32 chain, u32 len);
+
+/* volume management functions */
+s32 fscore_mount(struct super_block *sb);
+s32 fscore_umount(struct super_block *sb);
+s32 fscore_statfs(struct super_block *sb, VOL_INFO_T *info);
+s32 fscore_sync_fs(struct super_block *sb, s32 do_sync);
+s32 fscore_set_vol_flags(struct super_block *sb, u16 new_flag, s32 always_sync);
+u32 fscore_get_au_stat(struct super_block *sb, s32 mode);
+
+/* file management functions */
+s32 fscore_lookup(struct inode *inode, u8 *path, FILE_ID_T *fid);
+s32 fscore_create(struct inode *inode, u8 *path, u8 mode, FILE_ID_T *fid);
+s32 fscore_read_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *rcount);
+s32 fscore_write_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *wcount);
+s32 fscore_truncate(struct inode *inode, u64 old_size, u64 new_size);
+s32 fscore_rename(struct inode *old_parent_inode, FILE_ID_T *fid,
+ struct inode *new_parent_inode, struct dentry *new_dentry);
+s32 fscore_remove(struct inode *inode, FILE_ID_T *fid);
+s32 fscore_read_inode(struct inode *inode, DIR_ENTRY_T *info);
+s32 fscore_write_inode(struct inode *inode, DIR_ENTRY_T *info, int sync);
+s32 fscore_map_clus(struct inode *inode, u32 clu_offset, u32 *clu, int dest);
+s32 fscore_reserve_clus(struct inode *inode);
+s32 fscore_unlink(struct inode *inode, FILE_ID_T *fid);
+
+/* directory management functions */
+s32 fscore_mkdir(struct inode *inode, u8 *path, FILE_ID_T *fid);
+s32 fscore_readdir(struct inode *inode, DIR_ENTRY_T *dir_ent);
+s32 fscore_rmdir(struct inode *inode, FILE_ID_T *fid);
+
+
+/*----------------------------------------------------------------------*/
+/* External Function Declarations (NOT TO UPPER LAYER) */
+/*----------------------------------------------------------------------*/
+
+/* core.c : core code for common */
+/* dir entry management functions */
+DENTRY_T *get_dentry_in_dir(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u64 *sector);
+
+/* name conversion functions */
+void get_uniname_from_dos_entry(struct super_block *sb, DOS_DENTRY_T *ep, UNI_NAME_T *p_uniname, u8 mode);
+
+/* file operation functions */
+s32 walk_fat_chain(struct super_block *sb, CHAIN_T *p_dir, u32 byte_offset, u32 *clu);
+
+/* sdfat/cache.c */
+s32 meta_cache_init(struct super_block *sb);
+s32 meta_cache_shutdown(struct super_block *sb);
+u8 *fcache_getblk(struct super_block *sb, u64 sec);
+s32 fcache_modify(struct super_block *sb, u64 sec);
+s32 fcache_release_all(struct super_block *sb);
+s32 fcache_flush(struct super_block *sb, u32 sync);
+
+u8 *dcache_getblk(struct super_block *sb, u64 sec);
+s32 dcache_modify(struct super_block *sb, u64 sec);
+s32 dcache_lock(struct super_block *sb, u64 sec);
+s32 dcache_unlock(struct super_block *sb, u64 sec);
+s32 dcache_release(struct super_block *sb, u64 sec);
+s32 dcache_release_all(struct super_block *sb);
+s32 dcache_flush(struct super_block *sb, u32 sync);
+s32 dcache_readahead(struct super_block *sb, u64 sec);
+
+
+/* fatent.c */
+s32 fat_ent_ops_init(struct super_block *sb);
+s32 fat_ent_get(struct super_block *sb, u32 loc, u32 *content);
+s32 fat_ent_set(struct super_block *sb, u32 loc, u32 content);
+s32 fat_ent_get_safe(struct super_block *sb, u32 loc, u32 *content);
+
+/* core_fat.c : core code for fat */
+s32 fat_generate_dos_name_new(struct super_block *sb, CHAIN_T *p_dir, DOS_NAME_T *p_dosname, s32 n_entries);
+s32 mount_fat16(struct super_block *sb, pbr_t *p_pbr);
+s32 mount_fat32(struct super_block *sb, pbr_t *p_pbr);
+
+/* core_exfat.c : core code for exfat */
+
+s32 load_alloc_bmp(struct super_block *sb);
+void free_alloc_bmp(struct super_block *sb);
+ENTRY_SET_CACHE_T *get_dentry_set_in_dir(struct super_block *sb,
+ CHAIN_T *p_dir, s32 entry, u32 type, DENTRY_T **file_ep);
+void release_dentry_set(ENTRY_SET_CACHE_T *es);
+s32 update_dir_chksum(struct super_block *sb, CHAIN_T *p_dir, s32 entry);
+s32 update_dir_chksum_with_entry_set(struct super_block *sb, ENTRY_SET_CACHE_T *es);
+bool is_dir_empty(struct super_block *sb, CHAIN_T *p_dir);
+s32 mount_exfat(struct super_block *sb, pbr_t *p_pbr);
+
+/* amap_smart.c : creation on mount / destroy on umount */
+int amap_create(struct super_block *sb, u32 pack_ratio, u32 sect_per_au, u32 hidden_sect);
+void amap_destroy(struct super_block *sb);
+
+/* amap_smart.c : (de)allocation functions */
+s32 amap_fat_alloc_cluster(struct super_block *sb, u32 num_alloc, CHAIN_T *p_chain, s32 dest);
+s32 amap_free_cluster(struct super_block *sb, CHAIN_T *p_chain, s32 do_relse);/* Not impelmented */
+s32 amap_release_cluster(struct super_block *sb, u32 clu); /* Only update AMAP */
+
+/* amap_smart.c : misc (for defrag) */
+s32 amap_mark_ignore(struct super_block *sb, u32 clu);
+s32 amap_unmark_ignore(struct super_block *sb, u32 clu);
+s32 amap_unmark_ignore_all(struct super_block *sb);
+s32 amap_check_working(struct super_block *sb, u32 clu);
+s32 amap_get_freeclus(struct super_block *sb, u32 clu);
+
+/* amap_smart.c : stat AU */
+u32 amap_get_au_stat(struct super_block *sb, s32 mode);
+
+
+/* blkdev.c */
+s32 bdev_open_dev(struct super_block *sb);
+s32 bdev_close_dev(struct super_block *sb);
+s32 bdev_check_bdi_valid(struct super_block *sb);
+s32 bdev_readahead(struct super_block *sb, u64 secno, u64 num_secs);
+s32 bdev_mread(struct super_block *sb, u64 secno, struct buffer_head **bh, u64 num_secs, s32 read);
+s32 bdev_mwrite(struct super_block *sb, u64 secno, struct buffer_head *bh, u64 num_secs, s32 sync);
+s32 bdev_sync_all(struct super_block *sb);
+
+/* blkdev.c : sector read/write functions */
+s32 read_sect(struct super_block *sb, u64 sec, struct buffer_head **bh, s32 read);
+s32 write_sect(struct super_block *sb, u64 sec, struct buffer_head *bh, s32 sync);
+s32 read_msect(struct super_block *sb, u64 sec, struct buffer_head **bh, s64 num_secs, s32 read);
+s32 write_msect(struct super_block *sb, u64 sec, struct buffer_head *bh, s64 num_secs, s32 sync);
+s32 write_msect_zero(struct super_block *sb, u64 sec, u64 num_secs);
+
+/* misc.c */
+u8 calc_chksum_1byte(void *data, s32 len, u8 chksum);
+u16 calc_chksum_2byte(void *data, s32 len, u16 chksum, s32 type);
+
+/* extent.c */
+s32 extent_cache_init(void);
+void extent_cache_shutdown(void);
+void extent_cache_init_inode(struct inode *inode);
+void extent_cache_inval_inode(struct inode *inode);
+s32 extent_get_clus(struct inode *inode, u32 cluster, u32 *fclus,
+ u32 *dclus, u32 *last_dclus, s32 allow_eof);
+/*----------------------------------------------------------------------*/
+/* Wrapper Function */
+/*----------------------------------------------------------------------*/
+void set_sb_dirty(struct super_block *sb);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _SDFAT_CORE_H */
+
+/* end of core.h */
diff --git a/fs/sdfat/core_exfat.c b/fs/sdfat/core_exfat.c
new file mode 100644
index 000000000000..9e4b994d0e90
--- /dev/null
+++ b/fs/sdfat/core_exfat.c
@@ -0,0 +1,1560 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : core_exfat.c */
+/* PURPOSE : exFAT-fs core code for sdFAT */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/version.h>
+#include <linux/blkdev.h>
+#include <linux/workqueue.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+
+#include "sdfat.h"
+#include "core.h"
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+/*----------------------------------------------------------------------*/
+/* Constant & Macro Definitions */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Global Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Local Variable Definitions */
+/*----------------------------------------------------------------------*/
+static u8 free_bit[] = {
+ 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/* 0 ~ 19*/
+ 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3,/* 20 ~ 39*/
+ 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/* 40 ~ 59*/
+ 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/* 60 ~ 79*/
+ 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2,/* 80 ~ 99*/
+ 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,/*100 ~ 119*/
+ 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*120 ~ 139*/
+ 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5,/*140 ~ 159*/
+ 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/*160 ~ 179*/
+ 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3,/*180 ~ 199*/
+ 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*200 ~ 219*/
+ 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/*220 ~ 239*/
+ 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 /*240 ~ 254*/
+};
+
+static u8 used_bit[] = {
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3,/* 0 ~ 19*/
+ 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4,/* 20 ~ 39*/
+ 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5,/* 40 ~ 59*/
+ 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,/* 60 ~ 79*/
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4,/* 80 ~ 99*/
+ 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,/*100 ~ 119*/
+ 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4,/*120 ~ 139*/
+ 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,/*140 ~ 159*/
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5,/*160 ~ 179*/
+ 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5,/*180 ~ 199*/
+ 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6,/*200 ~ 219*/
+ 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,/*220 ~ 239*/
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 /*240 ~ 255*/
+};
+
+
+/*======================================================================*/
+/* Local Function Definitions */
+/*======================================================================*/
+/*
+ * Directory Entry Management Functions
+ */
+static u32 exfat_get_entry_type(DENTRY_T *p_entry)
+{
+ FILE_DENTRY_T *ep = (FILE_DENTRY_T *) p_entry;
+
+ if (ep->type == EXFAT_UNUSED)
+ return TYPE_UNUSED;
+ if (ep->type < 0x80)
+ return TYPE_DELETED;
+ if (ep->type == 0x80)
+ return TYPE_INVALID;
+ if (ep->type < 0xA0) {
+ if (ep->type == 0x81)
+ return TYPE_BITMAP;
+ if (ep->type == 0x82)
+ return TYPE_UPCASE;
+ if (ep->type == 0x83)
+ return TYPE_VOLUME;
+ if (ep->type == 0x85) {
+ if (le16_to_cpu(ep->attr) & ATTR_SUBDIR)
+ return TYPE_DIR;
+ return TYPE_FILE;
+ }
+ return TYPE_CRITICAL_PRI;
+ }
+ if (ep->type < 0xC0) {
+ if (ep->type == 0xA0)
+ return TYPE_GUID;
+ if (ep->type == 0xA1)
+ return TYPE_PADDING;
+ if (ep->type == 0xA2)
+ return TYPE_ACLTAB;
+ return TYPE_BENIGN_PRI;
+ }
+ if (ep->type < 0xE0) {
+ if (ep->type == 0xC0)
+ return TYPE_STREAM;
+ if (ep->type == 0xC1)
+ return TYPE_EXTEND;
+ if (ep->type == 0xC2)
+ return TYPE_ACL;
+ return TYPE_CRITICAL_SEC;
+ }
+ return TYPE_BENIGN_SEC;
+} /* end of exfat_get_entry_type */
+
+static void exfat_set_entry_type(DENTRY_T *p_entry, u32 type)
+{
+ FILE_DENTRY_T *ep = (FILE_DENTRY_T *) p_entry;
+
+ if (type == TYPE_UNUSED) {
+ ep->type = 0x0;
+ } else if (type == TYPE_DELETED) {
+ ep->type &= ~0x80;
+ } else if (type == TYPE_STREAM) {
+ ep->type = 0xC0;
+ } else if (type == TYPE_EXTEND) {
+ ep->type = 0xC1;
+ } else if (type == TYPE_BITMAP) {
+ ep->type = 0x81;
+ } else if (type == TYPE_UPCASE) {
+ ep->type = 0x82;
+ } else if (type == TYPE_VOLUME) {
+ ep->type = 0x83;
+ } else if (type == TYPE_DIR) {
+ ep->type = 0x85;
+ ep->attr = cpu_to_le16(ATTR_SUBDIR);
+ } else if (type == TYPE_FILE) {
+ ep->type = 0x85;
+ ep->attr = cpu_to_le16(ATTR_ARCHIVE);
+ } else if (type == TYPE_SYMLINK) {
+ ep->type = 0x85;
+ ep->attr = cpu_to_le16(ATTR_ARCHIVE | ATTR_SYMLINK);
+ }
+} /* end of exfat_set_entry_type */
+
+static u32 exfat_get_entry_attr(DENTRY_T *p_entry)
+{
+ FILE_DENTRY_T *ep = (FILE_DENTRY_T *)p_entry;
+
+ return (u32)le16_to_cpu(ep->attr);
+} /* end of exfat_get_entry_attr */
+
+static void exfat_set_entry_attr(DENTRY_T *p_entry, u32 attr)
+{
+ FILE_DENTRY_T *ep = (FILE_DENTRY_T *)p_entry;
+
+ ep->attr = cpu_to_le16((u16) attr);
+} /* end of exfat_set_entry_attr */
+
+static u8 exfat_get_entry_flag(DENTRY_T *p_entry)
+{
+ STRM_DENTRY_T *ep = (STRM_DENTRY_T *)p_entry;
+
+ return ep->flags;
+} /* end of exfat_get_entry_flag */
+
+static void exfat_set_entry_flag(DENTRY_T *p_entry, u8 flags)
+{
+ STRM_DENTRY_T *ep = (STRM_DENTRY_T *)p_entry;
+
+ ep->flags = flags;
+} /* end of exfat_set_entry_flag */
+
+static u32 exfat_get_entry_clu0(DENTRY_T *p_entry)
+{
+ STRM_DENTRY_T *ep = (STRM_DENTRY_T *)p_entry;
+
+ return (u32)le32_to_cpu(ep->start_clu);
+} /* end of exfat_get_entry_clu0 */
+
+static void exfat_set_entry_clu0(DENTRY_T *p_entry, u32 start_clu)
+{
+ STRM_DENTRY_T *ep = (STRM_DENTRY_T *)p_entry;
+
+ ep->start_clu = cpu_to_le32(start_clu);
+} /* end of exfat_set_entry_clu0 */
+
+static u64 exfat_get_entry_size(DENTRY_T *p_entry)
+{
+ STRM_DENTRY_T *ep = (STRM_DENTRY_T *)p_entry;
+
+ return le64_to_cpu(ep->valid_size);
+} /* end of exfat_get_entry_size */
+
+static void exfat_set_entry_size(DENTRY_T *p_entry, u64 size)
+{
+ STRM_DENTRY_T *ep = (STRM_DENTRY_T *)p_entry;
+
+ ep->valid_size = cpu_to_le64(size);
+ ep->size = cpu_to_le64(size);
+} /* end of exfat_set_entry_size */
+
+static void exfat_get_entry_time(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode)
+{
+ u16 t = 0x00, d = 0x21, tz = 0x00;
+ FILE_DENTRY_T *ep = (FILE_DENTRY_T *)p_entry;
+
+ switch (mode) {
+ case TM_CREATE:
+ t = le16_to_cpu(ep->create_time);
+ d = le16_to_cpu(ep->create_date);
+ tz = ep->create_tz;
+ break;
+ case TM_MODIFY:
+ t = le16_to_cpu(ep->modify_time);
+ d = le16_to_cpu(ep->modify_date);
+ tz = ep->modify_tz;
+ break;
+ case TM_ACCESS:
+ t = le16_to_cpu(ep->access_time);
+ d = le16_to_cpu(ep->access_date);
+ tz = ep->access_tz;
+ break;
+ }
+
+ tp->tz.value = tz;
+ tp->sec = (t & 0x001F) << 1;
+ tp->min = (t >> 5) & 0x003F;
+ tp->hour = (t >> 11);
+ tp->day = (d & 0x001F);
+ tp->mon = (d >> 5) & 0x000F;
+ tp->year = (d >> 9);
+} /* end of exfat_get_entry_time */
+
+static void exfat_set_entry_time(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode)
+{
+ u16 t, d;
+ FILE_DENTRY_T *ep = (FILE_DENTRY_T *)p_entry;
+
+ t = (tp->hour << 11) | (tp->min << 5) | (tp->sec >> 1);
+ d = (tp->year << 9) | (tp->mon << 5) | tp->day;
+
+ switch (mode) {
+ case TM_CREATE:
+ ep->create_time = cpu_to_le16(t);
+ ep->create_date = cpu_to_le16(d);
+ ep->create_tz = tp->tz.value;
+ break;
+ case TM_MODIFY:
+ ep->modify_time = cpu_to_le16(t);
+ ep->modify_date = cpu_to_le16(d);
+ ep->modify_tz = tp->tz.value;
+ break;
+ case TM_ACCESS:
+ ep->access_time = cpu_to_le16(t);
+ ep->access_date = cpu_to_le16(d);
+ ep->access_tz = tp->tz.value;
+ break;
+ }
+} /* end of exfat_set_entry_time */
+
+
+static void __init_file_entry(struct super_block *sb, FILE_DENTRY_T *ep, u32 type)
+{
+ TIMESTAMP_T tm, *tp;
+
+ exfat_set_entry_type((DENTRY_T *) ep, type);
+
+ tp = tm_now(SDFAT_SB(sb), &tm);
+ exfat_set_entry_time((DENTRY_T *) ep, tp, TM_CREATE);
+ exfat_set_entry_time((DENTRY_T *) ep, tp, TM_MODIFY);
+ exfat_set_entry_time((DENTRY_T *) ep, tp, TM_ACCESS);
+ ep->create_time_ms = 0;
+ ep->modify_time_ms = 0;
+} /* end of __init_file_entry */
+
+static void __init_strm_entry(STRM_DENTRY_T *ep, u8 flags, u32 start_clu, u64 size)
+{
+ exfat_set_entry_type((DENTRY_T *) ep, TYPE_STREAM);
+ ep->flags = flags;
+ ep->start_clu = cpu_to_le32(start_clu);
+ ep->valid_size = cpu_to_le64(size);
+ ep->size = cpu_to_le64(size);
+} /* end of __init_strm_entry */
+
+static void __init_name_entry(NAME_DENTRY_T *ep, u16 *uniname)
+{
+ s32 i;
+
+ exfat_set_entry_type((DENTRY_T *) ep, TYPE_EXTEND);
+ ep->flags = 0x0;
+
+ for (i = 0; i < 15; i++) {
+ ep->unicode_0_14[i] = cpu_to_le16(*uniname);
+ if (*uniname == 0x0)
+ break;
+ uniname++;
+ }
+} /* end of __init_name_entry */
+
+static s32 exfat_init_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u32 type, u32 start_clu, u64 size)
+{
+ u64 sector;
+ u8 flags;
+ FILE_DENTRY_T *file_ep;
+ STRM_DENTRY_T *strm_ep;
+
+ flags = (type == TYPE_FILE) ? 0x01 : 0x03;
+
+ /* we cannot use get_dentry_set_in_dir here because file ep is not initialized yet */
+ file_ep = (FILE_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry, &sector);
+ if (!file_ep)
+ return -EIO;
+
+ strm_ep = (STRM_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry+1, &sector);
+ if (!strm_ep)
+ return -EIO;
+
+ __init_file_entry(sb, file_ep, type);
+ if (dcache_modify(sb, sector))
+ return -EIO;
+
+ __init_strm_entry(strm_ep, flags, start_clu, size);
+ if (dcache_modify(sb, sector))
+ return -EIO;
+
+ return 0;
+} /* end of exfat_init_dir_entry */
+
+s32 update_dir_chksum(struct super_block *sb, CHAIN_T *p_dir, s32 entry)
+{
+ s32 ret = -EIO;
+ s32 i, num_entries;
+ u64 sector;
+ u16 chksum;
+ FILE_DENTRY_T *file_ep;
+ DENTRY_T *ep;
+
+ file_ep = (FILE_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry, &sector);
+ if (!file_ep)
+ return -EIO;
+
+ dcache_lock(sb, sector);
+
+ num_entries = (s32) file_ep->num_ext + 1;
+ chksum = calc_chksum_2byte((void *) file_ep, DENTRY_SIZE, 0, CS_DIR_ENTRY);
+
+ for (i = 1; i < num_entries; i++) {
+ ep = get_dentry_in_dir(sb, p_dir, entry+i, NULL);
+ if (!ep)
+ goto out_unlock;
+
+ chksum = calc_chksum_2byte((void *) ep, DENTRY_SIZE, chksum, CS_DEFAULT);
+ }
+
+ file_ep->checksum = cpu_to_le16(chksum);
+ ret = dcache_modify(sb, sector);
+out_unlock:
+ dcache_unlock(sb, sector);
+ return ret;
+
+} /* end of update_dir_chksum */
+
+
+static s32 exfat_init_ext_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 num_entries,
+ UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname)
+{
+ s32 i;
+ u64 sector;
+ u16 *uniname = p_uniname->name;
+ FILE_DENTRY_T *file_ep;
+ STRM_DENTRY_T *strm_ep;
+ NAME_DENTRY_T *name_ep;
+
+ file_ep = (FILE_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry, &sector);
+ if (!file_ep)
+ return -EIO;
+
+ file_ep->num_ext = (u8)(num_entries - 1);
+ dcache_modify(sb, sector);
+
+ strm_ep = (STRM_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry+1, &sector);
+ if (!strm_ep)
+ return -EIO;
+
+ strm_ep->name_len = p_uniname->name_len;
+ strm_ep->name_hash = cpu_to_le16(p_uniname->name_hash);
+ dcache_modify(sb, sector);
+
+ for (i = 2; i < num_entries; i++) {
+ name_ep = (NAME_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry+i, &sector);
+ if (!name_ep)
+ return -EIO;
+
+ __init_name_entry(name_ep, uniname);
+ dcache_modify(sb, sector);
+ uniname += 15;
+ }
+
+ update_dir_chksum(sb, p_dir, entry);
+
+ return 0;
+} /* end of exfat_init_ext_entry */
+
+
+static s32 exfat_delete_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 order, s32 num_entries)
+{
+ s32 i;
+ u64 sector;
+ DENTRY_T *ep;
+
+ for (i = order; i < num_entries; i++) {
+ ep = get_dentry_in_dir(sb, p_dir, entry+i, &sector);
+ if (!ep)
+ return -EIO;
+
+ exfat_set_entry_type(ep, TYPE_DELETED);
+ if (dcache_modify(sb, sector))
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static s32 __write_partial_entries_in_entry_set(struct super_block *sb,
+ ENTRY_SET_CACHE_T *es, u64 sec, u32 off, u32 count)
+{
+ s32 num_entries;
+ u32 buf_off = (off - es->offset);
+ u32 remaining_byte_in_sector, copy_entries;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u32 clu;
+ u8 *buf, *esbuf = (u8 *)&(es->__buf);
+
+ TMSG("%s entered\n", __func__);
+ MMSG("%s: es %p sec %llu off %u cnt %d\n", __func__, es, sec, off, count);
+ num_entries = count;
+
+ while (num_entries) {
+ /* write per sector base */
+ remaining_byte_in_sector = (1 << sb->s_blocksize_bits) - off;
+ copy_entries = min((s32)(remaining_byte_in_sector >> DENTRY_SIZE_BITS), num_entries);
+ buf = dcache_getblk(sb, sec);
+ if (!buf)
+ goto err_out;
+ MMSG("es->buf %p buf_off %u\n", esbuf, buf_off);
+ MMSG("copying %d entries from %p to sector %llu\n", copy_entries, (esbuf + buf_off), sec);
+ memcpy(buf + off, esbuf + buf_off, copy_entries << DENTRY_SIZE_BITS);
+ dcache_modify(sb, sec);
+ num_entries -= copy_entries;
+
+ if (num_entries) {
+ // get next sector
+ if (IS_LAST_SECT_IN_CLUS(fsi, sec)) {
+ clu = SECT_TO_CLUS(fsi, sec);
+ if (es->alloc_flag == 0x03)
+ clu++;
+ else if (get_next_clus_safe(sb, &clu))
+ goto err_out;
+ sec = CLUS_TO_SECT(fsi, clu);
+ } else {
+ sec++;
+ }
+ off = 0;
+ buf_off += copy_entries << DENTRY_SIZE_BITS;
+ }
+ }
+
+ TMSG("%s exited successfully\n", __func__);
+ return 0;
+err_out:
+ TMSG("%s failed\n", __func__);
+ return -EIO;
+}
+
+/* write back all entries in entry set */
+static s32 __write_whole_entry_set(struct super_block *sb, ENTRY_SET_CACHE_T *es)
+{
+ return __write_partial_entries_in_entry_set(sb, es, es->sector, es->offset, es->num_entries);
+}
+
+s32 update_dir_chksum_with_entry_set(struct super_block *sb, ENTRY_SET_CACHE_T *es)
+{
+ DENTRY_T *ep;
+ u16 chksum = 0;
+ s32 chksum_type = CS_DIR_ENTRY, i;
+
+ ep = (DENTRY_T *)&(es->__buf);
+ for (i = 0; i < es->num_entries; i++) {
+ MMSG("%s %p\n", __func__, ep);
+ chksum = calc_chksum_2byte((void *) ep, DENTRY_SIZE, chksum, chksum_type);
+ ep++;
+ chksum_type = CS_DEFAULT;
+ }
+
+ ep = (DENTRY_T *)&(es->__buf);
+ ((FILE_DENTRY_T *)ep)->checksum = cpu_to_le16(chksum);
+ return __write_whole_entry_set(sb, es);
+}
+
+/* returns a set of dentries for a file or dir.
+ * Note that this is a copy (dump) of dentries so that user should call write_entry_set()
+ * to apply changes made in this entry set to the real device.
+ * in:
+ * sb+p_dir+entry: indicates a file/dir
+ * type: specifies how many dentries should be included.
+ * out:
+ * file_ep: will point the first dentry(= file dentry) on success
+ * return:
+ * pointer of entry set on success,
+ * NULL on failure.
+ */
+
+#define ES_MODE_STARTED 0
+#define ES_MODE_GET_FILE_ENTRY 1
+#define ES_MODE_GET_STRM_ENTRY 2
+#define ES_MODE_GET_NAME_ENTRY 3
+#define ES_MODE_GET_CRITICAL_SEC_ENTRY 4
+ENTRY_SET_CACHE_T *get_dentry_set_in_dir(struct super_block *sb,
+ CHAIN_T *p_dir, s32 entry, u32 type, DENTRY_T **file_ep)
+{
+ s32 ret;
+ u32 off, byte_offset, clu = 0;
+ u32 entry_type;
+ u64 sec;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ ENTRY_SET_CACHE_T *es = NULL;
+ DENTRY_T *ep, *pos;
+ u8 *buf;
+ u8 num_entries;
+ s32 mode = ES_MODE_STARTED;
+
+ /* FIXME : is available in error case? */
+ if (p_dir->dir == DIR_DELETED) {
+ EMSG("%s : access to deleted dentry\n", __func__);
+ BUG_ON(!fsi->prev_eio);
+ return NULL;
+ }
+
+ TMSG("%s entered\n", __func__);
+ MMSG("p_dir dir %u flags %x size %d\n", p_dir->dir, p_dir->flags, p_dir->size);
+ MMSG("entry %d type %d\n", entry, type);
+
+ byte_offset = entry << DENTRY_SIZE_BITS;
+ ret = walk_fat_chain(sb, p_dir, byte_offset, &clu);
+ if (ret)
+ return NULL;
+
+ /* byte offset in cluster */
+ byte_offset &= fsi->cluster_size - 1;
+
+ /* byte offset in sector */
+ off = byte_offset & (u32)(sb->s_blocksize - 1);
+
+ /* sector offset in cluster */
+ sec = byte_offset >> (sb->s_blocksize_bits);
+ sec += CLUS_TO_SECT(fsi, clu);
+
+ buf = dcache_getblk(sb, sec);
+ if (!buf)
+ goto err_out;
+
+ ep = (DENTRY_T *)(buf + off);
+ entry_type = exfat_get_entry_type(ep);
+
+ if ((entry_type != TYPE_FILE)
+ && (entry_type != TYPE_DIR))
+ goto err_out;
+
+ if (type == ES_ALL_ENTRIES)
+ num_entries = ((FILE_DENTRY_T *)ep)->num_ext+1;
+ else
+ num_entries = type;
+
+ MMSG("trying to malloc %lx bytes for %d entries\n",
+ (unsigned long)(offsetof(ENTRY_SET_CACHE_T, __buf) + (num_entries) * sizeof(DENTRY_T)), num_entries);
+ es = kmalloc((offsetof(ENTRY_SET_CACHE_T, __buf) + (num_entries) * sizeof(DENTRY_T)), GFP_KERNEL);
+ if (!es) {
+ EMSG("%s: failed to alloc entryset\n", __func__);
+ goto err_out;
+ }
+
+ es->num_entries = num_entries;
+ es->sector = sec;
+ es->offset = off;
+ es->alloc_flag = p_dir->flags;
+
+ pos = (DENTRY_T *) &(es->__buf);
+
+ while (num_entries) {
+ // instead of copying whole sector, we will check every entry.
+ // this will provide minimum stablity and consistency.
+ entry_type = exfat_get_entry_type(ep);
+
+ if ((entry_type == TYPE_UNUSED) || (entry_type == TYPE_DELETED))
+ goto err_out;
+
+ switch (mode) {
+ case ES_MODE_STARTED:
+ if ((entry_type == TYPE_FILE) || (entry_type == TYPE_DIR))
+ mode = ES_MODE_GET_FILE_ENTRY;
+ else
+ goto err_out;
+ break;
+ case ES_MODE_GET_FILE_ENTRY:
+ if (entry_type == TYPE_STREAM)
+ mode = ES_MODE_GET_STRM_ENTRY;
+ else
+ goto err_out;
+ break;
+ case ES_MODE_GET_STRM_ENTRY:
+ if (entry_type == TYPE_EXTEND)
+ mode = ES_MODE_GET_NAME_ENTRY;
+ else
+ goto err_out;
+ break;
+ case ES_MODE_GET_NAME_ENTRY:
+ if (entry_type == TYPE_EXTEND)
+ break;
+ else if (entry_type == TYPE_STREAM)
+ goto err_out;
+ else if (entry_type & TYPE_CRITICAL_SEC)
+ mode = ES_MODE_GET_CRITICAL_SEC_ENTRY;
+ else
+ goto err_out;
+ break;
+ case ES_MODE_GET_CRITICAL_SEC_ENTRY:
+ if ((entry_type == TYPE_EXTEND) || (entry_type == TYPE_STREAM))
+ goto err_out;
+ else if ((entry_type & TYPE_CRITICAL_SEC) != TYPE_CRITICAL_SEC)
+ goto err_out;
+ break;
+ }
+
+ /* copy dentry */
+ memcpy(pos, ep, sizeof(DENTRY_T));
+
+ if (--num_entries == 0)
+ break;
+
+ if (((off + DENTRY_SIZE) & (u32)(sb->s_blocksize - 1)) <
+ (off & (u32)(sb->s_blocksize - 1))) {
+ // get the next sector
+ if (IS_LAST_SECT_IN_CLUS(fsi, sec)) {
+ if (es->alloc_flag == 0x03)
+ clu++;
+ else if (get_next_clus_safe(sb, &clu))
+ goto err_out;
+ sec = CLUS_TO_SECT(fsi, clu);
+ } else {
+ sec++;
+ }
+ buf = dcache_getblk(sb, sec);
+ if (!buf)
+ goto err_out;
+ off = 0;
+ ep = (DENTRY_T *)(buf);
+ } else {
+ ep++;
+ off += DENTRY_SIZE;
+ }
+ pos++;
+ }
+
+ if (file_ep)
+ *file_ep = (DENTRY_T *)&(es->__buf);
+
+ MMSG("es sec %llu offset %u flags %d, num_entries %u buf ptr %p\n",
+ es->sector, es->offset, es->alloc_flag, es->num_entries, &(es->__buf));
+ TMSG("%s exited %p\n", __func__, es);
+ return es;
+err_out:
+ TMSG("%s exited (return NULL) (es %p)\n", __func__, es);
+
+ /* kfree(NULL) is safe */
+ kfree(es);
+ es = NULL;
+ return NULL;
+}
+
+void release_dentry_set(ENTRY_SET_CACHE_T *es)
+{
+ TMSG("%s %p\n", __func__, es);
+
+ /* kfree(NULL) is safe */
+ kfree(es);
+ es = NULL;
+}
+
+static s32 __extract_uni_name_from_name_entry(NAME_DENTRY_T *ep, u16 *uniname, s32 order)
+{
+ s32 i, len = 0;
+
+ for (i = 0; i < 15; i++) {
+ /* FIXME : unaligned? */
+ *uniname = le16_to_cpu(ep->unicode_0_14[i]);
+ if (*uniname == 0x0)
+ return len;
+ uniname++;
+ len++;
+ }
+
+ *uniname = 0x0;
+ return len;
+
+} /* end of __extract_uni_name_from_name_entry */
+
+#define DIRENT_STEP_FILE (0)
+#define DIRENT_STEP_STRM (1)
+#define DIRENT_STEP_NAME (2)
+#define DIRENT_STEP_SECD (3)
+
+/* return values of exfat_find_dir_entry()
+ * >= 0 : return dir entiry position with the name in dir
+ * -EEXIST : (root dir, ".") it is the root dir itself
+ * -ENOENT : entry with the name does not exist
+ * -EIO : I/O error
+ */
+static s32 exfat_find_dir_entry(struct super_block *sb, FILE_ID_T *fid,
+ CHAIN_T *p_dir, UNI_NAME_T *p_uniname, s32 num_entries, DOS_NAME_T *unused, u32 type)
+{
+ s32 i, rewind = 0, dentry = 0, end_eidx = 0, num_ext = 0, len;
+ s32 order, step, name_len;
+ s32 dentries_per_clu, num_empty = 0;
+ u32 entry_type;
+ u16 entry_uniname[16], *uniname = NULL, unichar;
+ CHAIN_T clu;
+ DENTRY_T *ep;
+ HINT_T *hint_stat = &fid->hint_stat;
+ HINT_FEMP_T candi_empty;
+ FILE_DENTRY_T *file_ep;
+ STRM_DENTRY_T *strm_ep;
+ NAME_DENTRY_T *name_ep;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ /*
+ * REMARK:
+ * DOT and DOTDOT are handled by VFS layer
+ */
+
+ if (IS_CLUS_FREE(p_dir->dir))
+ return -EIO;
+
+ dentries_per_clu = fsi->dentries_per_clu;
+
+ clu.dir = p_dir->dir;
+ clu.size = p_dir->size;
+ clu.flags = p_dir->flags;
+
+ if (hint_stat->eidx) {
+ clu.dir = hint_stat->clu;
+ dentry = hint_stat->eidx;
+ end_eidx = dentry;
+ }
+
+ candi_empty.eidx = -1;
+rewind:
+ order = 0;
+ step = DIRENT_STEP_FILE;
+ while (!IS_CLUS_EOF(clu.dir)) {
+ i = dentry & (dentries_per_clu - 1);
+ for (; i < dentries_per_clu; i++, dentry++) {
+ if (rewind && (dentry == end_eidx))
+ goto not_found;
+
+ ep = get_dentry_in_dir(sb, &clu, i, NULL);
+ if (!ep)
+ return -EIO;
+
+ entry_type = exfat_get_entry_type(ep);
+
+ if ((entry_type == TYPE_UNUSED) || (entry_type == TYPE_DELETED)) {
+ step = DIRENT_STEP_FILE;
+
+ num_empty++;
+ if (candi_empty.eidx == -1) {
+ if (num_empty == 1) {
+ candi_empty.cur.dir = clu.dir;
+ candi_empty.cur.size = clu.size;
+ candi_empty.cur.flags = clu.flags;
+ }
+
+ if (num_empty >= num_entries) {
+ candi_empty.eidx = dentry - (num_empty - 1);
+ ASSERT(0 <= candi_empty.eidx);
+ candi_empty.count = num_empty;
+
+ if ((fid->hint_femp.eidx == -1) ||
+ (candi_empty.eidx <= fid->hint_femp.eidx)) {
+ memcpy(&fid->hint_femp,
+ &candi_empty,
+ sizeof(HINT_FEMP_T));
+ }
+ }
+ }
+
+ if (entry_type == TYPE_UNUSED)
+ goto not_found;
+ continue;
+ }
+
+ num_empty = 0;
+ candi_empty.eidx = -1;
+
+ if ((entry_type == TYPE_FILE) || (entry_type == TYPE_DIR)) {
+ step = DIRENT_STEP_FILE;
+ if ((type == TYPE_ALL) || (type == entry_type)) {
+ file_ep = (FILE_DENTRY_T *) ep;
+ num_ext = file_ep->num_ext;
+ step = DIRENT_STEP_STRM;
+ }
+ continue;
+ }
+
+ if (entry_type == TYPE_STREAM) {
+ if (step != DIRENT_STEP_STRM) {
+ step = DIRENT_STEP_FILE;
+ continue;
+ }
+ step = DIRENT_STEP_FILE;
+ strm_ep = (STRM_DENTRY_T *) ep;
+ if ((p_uniname->name_hash == le16_to_cpu(strm_ep->name_hash)) &&
+ (p_uniname->name_len == strm_ep->name_len)) {
+ step = DIRENT_STEP_NAME;
+ order = 1;
+ name_len = 0;
+ }
+ continue;
+ }
+
+ if (entry_type == TYPE_EXTEND) {
+ if (step != DIRENT_STEP_NAME) {
+ step = DIRENT_STEP_FILE;
+ continue;
+ }
+ name_ep = (NAME_DENTRY_T *) ep;
+
+ if ((++order) == 2)
+ uniname = p_uniname->name;
+ else
+ uniname += 15;
+
+ len = __extract_uni_name_from_name_entry(name_ep, entry_uniname, order);
+ name_len += len;
+
+ unichar = *(uniname+len);
+ *(uniname+len) = 0x0;
+
+ if (nls_cmp_uniname(sb, uniname, entry_uniname)) {
+ step = DIRENT_STEP_FILE;
+ } else if (name_len == p_uniname->name_len) {
+ if (order == num_ext) {
+ //fid->hint_femp.eidx = -1;
+ goto found;
+ }
+ step = DIRENT_STEP_SECD;
+ }
+
+ *(uniname+len) = unichar;
+ continue;
+ }
+
+ if (entry_type & (TYPE_CRITICAL_SEC | TYPE_BENIGN_SEC)) {
+ if (step == DIRENT_STEP_SECD) {
+ if (++order == num_ext)
+ goto found;
+ continue;
+ }
+ }
+ step = DIRENT_STEP_FILE;
+ }
+
+ if (clu.flags == 0x03) {
+ if ((--clu.size) > 0)
+ clu.dir++;
+ else
+ clu.dir = CLUS_EOF;
+ } else {
+ if (get_next_clus_safe(sb, &clu.dir))
+ return -EIO;
+ }
+ }
+
+not_found:
+ /* we started at not 0 index,so we should try to find target
+ * from 0 index to the index we started at.
+ */
+ if (!rewind && end_eidx) {
+ rewind = 1;
+ dentry = 0;
+ clu.dir = p_dir->dir;
+ /* reset empty hint */
+ num_empty = 0;
+ candi_empty.eidx = -1;
+ goto rewind;
+ }
+
+ /* initialized hint_stat */
+ hint_stat->clu = p_dir->dir;
+ hint_stat->eidx = 0;
+ return -ENOENT;
+
+found:
+ /* next dentry we'll find is out of this cluster */
+ if (!((dentry + 1) & (dentries_per_clu-1))) {
+ int ret = 0;
+
+ if (clu.flags == 0x03) {
+ if ((--clu.size) > 0)
+ clu.dir++;
+ else
+ clu.dir = CLUS_EOF;
+ } else {
+ ret = get_next_clus_safe(sb, &clu.dir);
+ }
+
+ if (ret || IS_CLUS_EOF(clu.dir)) {
+ /* just initialized hint_stat */
+ hint_stat->clu = p_dir->dir;
+ hint_stat->eidx = 0;
+ return (dentry - num_ext);
+ }
+ }
+
+ hint_stat->clu = clu.dir;
+ hint_stat->eidx = dentry + 1;
+ return (dentry - num_ext);
+} /* end of exfat_find_dir_entry */
+
+/* returns -EIO on error */
+static s32 exfat_count_ext_entries(struct super_block *sb, CHAIN_T *p_dir, s32 entry, DENTRY_T *p_entry)
+{
+ s32 i, count = 0;
+ u32 type;
+ FILE_DENTRY_T *file_ep = (FILE_DENTRY_T *) p_entry;
+ DENTRY_T *ext_ep;
+
+ for (i = 0, entry++; i < file_ep->num_ext; i++, entry++) {
+ ext_ep = get_dentry_in_dir(sb, p_dir, entry, NULL);
+ if (!ext_ep)
+ return -EIO;
+
+ type = exfat_get_entry_type(ext_ep);
+ if ((type == TYPE_EXTEND) || (type == TYPE_STREAM))
+ count++;
+ else
+ return count;
+ }
+
+ return count;
+} /* end of exfat_count_ext_entries */
+
+
+/*
+ * Name Conversion Functions
+ */
+static void exfat_get_uniname_from_ext_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u16 *uniname)
+{
+ s32 i;
+ DENTRY_T *ep;
+ ENTRY_SET_CACHE_T *es;
+
+ es = get_dentry_set_in_dir(sb, p_dir, entry, ES_ALL_ENTRIES, &ep);
+ if (!es)
+ return;
+
+ if (es->num_entries < 3)
+ goto out;
+
+ ep += 2;
+
+ /*
+ * First entry : file entry
+ * Second entry : stream-extension entry
+ * Third entry : first file-name entry
+ * So, the index of first file-name dentry should start from 2.
+ */
+ for (i = 2; i < es->num_entries; i++, ep++) {
+ /* end of name entry */
+ if (exfat_get_entry_type(ep) != TYPE_EXTEND)
+ goto out;
+
+ __extract_uni_name_from_name_entry((NAME_DENTRY_T *)ep, uniname, i);
+ uniname += 15;
+ }
+
+out:
+ release_dentry_set(es);
+} /* end of exfat_get_uniname_from_ext_entry */
+
+static s32 exfat_calc_num_entries(UNI_NAME_T *p_uniname)
+{
+ s32 len;
+
+ len = p_uniname->name_len;
+ if (len == 0)
+ return 0;
+
+ /* 1 file entry + 1 stream entry + name entries */
+ return((len-1) / 15 + 3);
+
+} /* end of exfat_calc_num_entries */
+
+static s32 exfat_check_max_dentries(FILE_ID_T *fid)
+{
+ if ((fid->size >> DENTRY_SIZE_BITS) >= MAX_EXFAT_DENTRIES) {
+ /* exFAT spec allows a dir to grow upto 8388608(256MB) dentries */
+ return -ENOSPC;
+ }
+ return 0;
+} /* end of check_max_dentries */
+
+/*
+ * Allocation Bitmap Management Functions
+ */
+s32 load_alloc_bmp(struct super_block *sb)
+{
+ s32 ret;
+ u32 i, j, map_size, type, need_map_size;
+ u64 sector;
+ CHAIN_T clu;
+ BMAP_DENTRY_T *ep;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ clu.dir = fsi->root_dir;
+ clu.flags = 0x01;
+
+ while (!IS_CLUS_EOF(clu.dir)) {
+ for (i = 0; i < fsi->dentries_per_clu; i++) {
+ ep = (BMAP_DENTRY_T *) get_dentry_in_dir(sb, &clu, i, NULL);
+ if (!ep)
+ return -EIO;
+
+ type = exfat_get_entry_type((DENTRY_T *) ep);
+
+ if (type == TYPE_UNUSED)
+ break;
+ if (type != TYPE_BITMAP)
+ continue;
+
+ if (ep->flags == 0x0) {
+ fsi->map_clu = le32_to_cpu(ep->start_clu);
+ map_size = (u32) le64_to_cpu(ep->size);
+
+ need_map_size = (((fsi->num_clusters - CLUS_BASE) - 1) >> 3) + 1;
+ if (need_map_size != map_size) {
+ sdfat_log_msg(sb, KERN_ERR,
+ "bogus allocation bitmap size(need : %u, cur : %u)",
+ need_map_size, map_size);
+ /* Only allowed when bogus allocation bitmap size is large */
+ if (need_map_size > map_size)
+ return -EIO;
+ }
+ fsi->map_sectors = ((need_map_size - 1) >> (sb->s_blocksize_bits)) + 1;
+ fsi->vol_amap =
+ kmalloc((sizeof(struct buffer_head *) * fsi->map_sectors), GFP_KERNEL);
+ if (!fsi->vol_amap)
+ return -ENOMEM;
+
+ sector = CLUS_TO_SECT(fsi, fsi->map_clu);
+
+ for (j = 0; j < fsi->map_sectors; j++) {
+ fsi->vol_amap[j] = NULL;
+ ret = read_sect(sb, sector+j, &(fsi->vol_amap[j]), 1);
+ if (ret) {
+ /* release all buffers and free vol_amap */
+ i = 0;
+ while (i < j)
+ brelse(fsi->vol_amap[i++]);
+
+ /* kfree(NULL) is safe */
+ kfree(fsi->vol_amap);
+ fsi->vol_amap = NULL;
+ return ret;
+ }
+ }
+
+ fsi->pbr_bh = NULL;
+ return 0;
+ }
+ }
+
+ if (get_next_clus_safe(sb, &clu.dir))
+ return -EIO;
+ }
+
+ return -EINVAL;
+} /* end of load_alloc_bmp */
+
+void free_alloc_bmp(struct super_block *sb)
+{
+ s32 i;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ brelse(fsi->pbr_bh);
+
+ for (i = 0; i < fsi->map_sectors; i++)
+ __brelse(fsi->vol_amap[i]);
+
+ /* kfree(NULL) is safe */
+ kfree(fsi->vol_amap);
+ fsi->vol_amap = NULL;
+}
+
+/* WARN :
+ * If the value of "clu" is 0, it means cluster 2 which is
+ * the first cluster of cluster heap.
+ */
+static s32 set_alloc_bitmap(struct super_block *sb, u32 clu)
+{
+ s32 i, b;
+ u64 sector;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ i = clu >> (sb->s_blocksize_bits + 3);
+ b = clu & (u32)((sb->s_blocksize << 3) - 1);
+
+ sector = CLUS_TO_SECT(fsi, fsi->map_clu) + i;
+ bitmap_set((unsigned long *)(fsi->vol_amap[i]->b_data), b, 1);
+
+ return write_sect(sb, sector, fsi->vol_amap[i], 0);
+} /* end of set_alloc_bitmap */
+
+/* WARN :
+ * If the value of "clu" is 0, it means cluster 2 which is
+ * the first cluster of cluster heap.
+ */
+static s32 clr_alloc_bitmap(struct super_block *sb, u32 clu)
+{
+ s32 ret;
+ s32 i, b;
+ u64 sector;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct sdfat_mount_options *opts = &sbi->options;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ i = clu >> (sb->s_blocksize_bits + 3);
+ b = clu & (u32)((sb->s_blocksize << 3) - 1);
+
+ sector = CLUS_TO_SECT(fsi, fsi->map_clu) + i;
+
+ bitmap_clear((unsigned long *)(fsi->vol_amap[i]->b_data), b, 1);
+
+ ret = write_sect(sb, sector, fsi->vol_amap[i], 0);
+
+ if (opts->discard) {
+ s32 ret_discard;
+
+ TMSG("discard cluster(%08x)\n", clu+2);
+ ret_discard = sb_issue_discard(sb, CLUS_TO_SECT(fsi, clu+2),
+ (1 << fsi->sect_per_clus_bits), GFP_NOFS, 0);
+
+ if (ret_discard == -EOPNOTSUPP) {
+ sdfat_msg(sb, KERN_ERR,
+ "discard not supported by device, disabling");
+ opts->discard = 0;
+ }
+ }
+
+ return ret;
+} /* end of clr_alloc_bitmap */
+
+/* WARN :
+ * If the value of "clu" is 0, it means cluster 2 which is
+ * the first cluster of cluster heap.
+ */
+static u32 test_alloc_bitmap(struct super_block *sb, u32 clu)
+{
+ u32 i, map_i, map_b;
+ u32 clu_base, clu_free;
+ u8 k, clu_mask;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ clu_base = (clu & ~(0x7)) + 2;
+ clu_mask = (1 << (clu - clu_base + 2)) - 1;
+
+ map_i = clu >> (sb->s_blocksize_bits + 3);
+ map_b = (clu >> 3) & (u32)(sb->s_blocksize - 1);
+
+ for (i = 2; i < fsi->num_clusters; i += 8) {
+ k = *(((u8 *) fsi->vol_amap[map_i]->b_data) + map_b);
+ if (clu_mask > 0) {
+ k |= clu_mask;
+ clu_mask = 0;
+ }
+ if (k < 0xFF) {
+ clu_free = clu_base + free_bit[k];
+ if (clu_free < fsi->num_clusters)
+ return clu_free;
+ }
+ clu_base += 8;
+
+ if (((++map_b) >= (u32)sb->s_blocksize) ||
+ (clu_base >= fsi->num_clusters)) {
+ if ((++map_i) >= fsi->map_sectors) {
+ clu_base = 2;
+ map_i = 0;
+ }
+ map_b = 0;
+ }
+ }
+
+ return CLUS_EOF;
+} /* end of test_alloc_bitmap */
+
+void sync_alloc_bmp(struct super_block *sb)
+{
+ s32 i;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (fsi->vol_amap == NULL)
+ return;
+
+ for (i = 0; i < fsi->map_sectors; i++)
+ sync_dirty_buffer(fsi->vol_amap[i]);
+}
+
+static s32 exfat_chain_cont_cluster(struct super_block *sb, u32 chain, u32 len)
+{
+ if (!len)
+ return 0;
+
+ while (len > 1) {
+ if (fat_ent_set(sb, chain, chain+1))
+ return -EIO;
+ chain++;
+ len--;
+ }
+
+ if (fat_ent_set(sb, chain, CLUS_EOF))
+ return -EIO;
+ return 0;
+}
+
+s32 chain_cont_cluster(struct super_block *sb, u32 chain, u32 len)
+{
+ return exfat_chain_cont_cluster(sb, chain, len);
+}
+
+
+static s32 exfat_free_cluster(struct super_block *sb, CHAIN_T *p_chain, s32 do_relse)
+{
+ s32 ret = -EIO;
+ u32 num_clusters = 0;
+ u32 clu;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ s32 i;
+ u64 sector;
+
+ /* invalid cluster number */
+ if (IS_CLUS_FREE(p_chain->dir) || IS_CLUS_EOF(p_chain->dir))
+ return 0;
+
+ /* no cluster to truncate */
+ if (p_chain->size == 0) {
+ DMSG("%s: cluster(%u) truncation is not required.",
+ __func__, p_chain->dir);
+ return 0;
+ }
+
+ /* check cluster validation */
+ if ((p_chain->dir < 2) && (p_chain->dir >= fsi->num_clusters)) {
+ EMSG("%s: invalid start cluster (%u)\n", __func__, p_chain->dir);
+ sdfat_debug_bug_on(1);
+ return -EIO;
+ }
+
+ set_sb_dirty(sb);
+ clu = p_chain->dir;
+
+ if (p_chain->flags == 0x03) {
+ do {
+ if (do_relse) {
+ sector = CLUS_TO_SECT(fsi, clu);
+ for (i = 0; i < fsi->sect_per_clus; i++) {
+ if (dcache_release(sb, sector+i) == -EIO)
+ goto out;
+ }
+ }
+
+ if (clr_alloc_bitmap(sb, clu-2))
+ goto out;
+ clu++;
+
+ num_clusters++;
+ } while (num_clusters < p_chain->size);
+ } else {
+ do {
+ if (do_relse) {
+ sector = CLUS_TO_SECT(fsi, clu);
+ for (i = 0; i < fsi->sect_per_clus; i++) {
+ if (dcache_release(sb, sector+i) == -EIO)
+ goto out;
+ }
+ }
+
+ if (clr_alloc_bitmap(sb, (clu - CLUS_BASE)))
+ goto out;
+
+ if (get_next_clus_safe(sb, &clu))
+ goto out;
+
+ num_clusters++;
+ } while (!IS_CLUS_EOF(clu));
+ }
+
+ /* success */
+ ret = 0;
+out:
+
+ fsi->used_clusters -= num_clusters;
+ return ret;
+} /* end of exfat_free_cluster */
+
+static s32 exfat_alloc_cluster(struct super_block *sb, u32 num_alloc, CHAIN_T *p_chain, s32 dest)
+{
+ s32 ret = -ENOSPC;
+ u32 num_clusters = 0, total_cnt;
+ u32 hint_clu, new_clu, last_clu = CLUS_EOF;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ total_cnt = fsi->num_clusters - CLUS_BASE;
+
+ if (unlikely(total_cnt < fsi->used_clusters)) {
+ sdfat_fs_error_ratelimit(sb,
+ "%s: invalid used clusters(t:%u,u:%u)\n",
+ __func__, total_cnt, fsi->used_clusters);
+ return -EIO;
+ }
+
+ if (num_alloc > total_cnt - fsi->used_clusters)
+ return -ENOSPC;
+
+ hint_clu = p_chain->dir;
+ /* find new cluster */
+ if (IS_CLUS_EOF(hint_clu)) {
+ if (fsi->clu_srch_ptr < CLUS_BASE) {
+ EMSG("%s: fsi->clu_srch_ptr is invalid (%u)\n",
+ __func__, fsi->clu_srch_ptr);
+ ASSERT(0);
+ fsi->clu_srch_ptr = CLUS_BASE;
+ }
+
+ hint_clu = test_alloc_bitmap(sb, fsi->clu_srch_ptr - CLUS_BASE);
+ if (IS_CLUS_EOF(hint_clu))
+ return -ENOSPC;
+ }
+
+ /* check cluster validation */
+ if ((hint_clu < CLUS_BASE) && (hint_clu >= fsi->num_clusters)) {
+ EMSG("%s: hint_cluster is invalid (%u)\n", __func__, hint_clu);
+ ASSERT(0);
+ hint_clu = CLUS_BASE;
+ if (p_chain->flags == 0x03) {
+ if (exfat_chain_cont_cluster(sb, p_chain->dir, num_clusters))
+ return -EIO;
+ p_chain->flags = 0x01;
+ }
+ }
+
+ set_sb_dirty(sb);
+
+ p_chain->dir = CLUS_EOF;
+
+ while ((new_clu = test_alloc_bitmap(sb, hint_clu - CLUS_BASE)) != CLUS_EOF) {
+ if ((new_clu != hint_clu) && (p_chain->flags == 0x03)) {
+ if (exfat_chain_cont_cluster(sb, p_chain->dir, num_clusters)) {
+ ret = -EIO;
+ goto error;
+ }
+ p_chain->flags = 0x01;
+ }
+
+ /* update allocation bitmap */
+ if (set_alloc_bitmap(sb, new_clu - CLUS_BASE)) {
+ ret = -EIO;
+ goto error;
+ }
+
+ num_clusters++;
+
+ /* update FAT table */
+ if (p_chain->flags == 0x01) {
+ if (fat_ent_set(sb, new_clu, CLUS_EOF)) {
+ ret = -EIO;
+ goto error;
+ }
+ }
+
+ if (IS_CLUS_EOF(p_chain->dir)) {
+ p_chain->dir = new_clu;
+ } else if (p_chain->flags == 0x01) {
+ if (fat_ent_set(sb, last_clu, new_clu)) {
+ ret = -EIO;
+ goto error;
+ }
+ }
+ last_clu = new_clu;
+
+ if ((--num_alloc) == 0) {
+ fsi->clu_srch_ptr = hint_clu;
+ fsi->used_clusters += num_clusters;
+
+ p_chain->size += num_clusters;
+ return 0;
+ }
+
+ hint_clu = new_clu + 1;
+ if (hint_clu >= fsi->num_clusters) {
+ hint_clu = CLUS_BASE;
+
+ if (p_chain->flags == 0x03) {
+ if (exfat_chain_cont_cluster(sb, p_chain->dir, num_clusters)) {
+ ret = -EIO;
+ goto error;
+ }
+ p_chain->flags = 0x01;
+ }
+ }
+ }
+error:
+ if (num_clusters)
+ exfat_free_cluster(sb, p_chain, 0);
+ return ret;
+} /* end of exfat_alloc_cluster */
+
+static s32 exfat_count_used_clusters(struct super_block *sb, u32 *ret_count)
+{
+ u32 count = 0;
+ u32 i, map_i, map_b;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u32 total_clus = fsi->num_clusters - 2;
+
+ map_i = map_b = 0;
+
+ for (i = 0; i < total_clus; i += 8) {
+ u8 k = *(((u8 *) fsi->vol_amap[map_i]->b_data) + map_b);
+
+ count += used_bit[k];
+ if ((++map_b) >= (u32)sb->s_blocksize) {
+ map_i++;
+ map_b = 0;
+ }
+ }
+
+ /* FIXME : abnormal bitmap count should be handled as more smart */
+ if (total_clus < count)
+ count = total_clus;
+
+ *ret_count = count;
+ return 0;
+} /* end of exfat_count_used_clusters */
+
+
+/*
+ * File Operation Functions
+ */
+static FS_FUNC_T exfat_fs_func = {
+ .alloc_cluster = exfat_alloc_cluster,
+ .free_cluster = exfat_free_cluster,
+ .count_used_clusters = exfat_count_used_clusters,
+
+ .init_dir_entry = exfat_init_dir_entry,
+ .init_ext_entry = exfat_init_ext_entry,
+ .find_dir_entry = exfat_find_dir_entry,
+ .delete_dir_entry = exfat_delete_dir_entry,
+ .get_uniname_from_ext_entry = exfat_get_uniname_from_ext_entry,
+ .count_ext_entries = exfat_count_ext_entries,
+ .calc_num_entries = exfat_calc_num_entries,
+ .check_max_dentries = exfat_check_max_dentries,
+
+ .get_entry_type = exfat_get_entry_type,
+ .set_entry_type = exfat_set_entry_type,
+ .get_entry_attr = exfat_get_entry_attr,
+ .set_entry_attr = exfat_set_entry_attr,
+ .get_entry_flag = exfat_get_entry_flag,
+ .set_entry_flag = exfat_set_entry_flag,
+ .get_entry_clu0 = exfat_get_entry_clu0,
+ .set_entry_clu0 = exfat_set_entry_clu0,
+ .get_entry_size = exfat_get_entry_size,
+ .set_entry_size = exfat_set_entry_size,
+ .get_entry_time = exfat_get_entry_time,
+ .set_entry_time = exfat_set_entry_time,
+};
+
+s32 mount_exfat(struct super_block *sb, pbr_t *p_pbr)
+{
+ pbr64_t *p_bpb = (pbr64_t *)p_pbr;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (!p_bpb->bsx.num_fats) {
+ sdfat_msg(sb, KERN_ERR, "bogus number of FAT structure");
+ return -EINVAL;
+ }
+
+ fsi->sect_per_clus = 1 << p_bpb->bsx.sect_per_clus_bits;
+ fsi->sect_per_clus_bits = p_bpb->bsx.sect_per_clus_bits;
+ fsi->cluster_size_bits = fsi->sect_per_clus_bits + sb->s_blocksize_bits;
+ fsi->cluster_size = 1 << fsi->cluster_size_bits;
+
+ fsi->num_FAT_sectors = le32_to_cpu(p_bpb->bsx.fat_length);
+
+ fsi->FAT1_start_sector = le32_to_cpu(p_bpb->bsx.fat_offset);
+ if (p_bpb->bsx.num_fats == 1)
+ fsi->FAT2_start_sector = fsi->FAT1_start_sector;
+ else
+ fsi->FAT2_start_sector = fsi->FAT1_start_sector + fsi->num_FAT_sectors;
+
+ fsi->root_start_sector = le32_to_cpu(p_bpb->bsx.clu_offset);
+ fsi->data_start_sector = fsi->root_start_sector;
+
+ fsi->num_sectors = le64_to_cpu(p_bpb->bsx.vol_length);
+ fsi->num_clusters = le32_to_cpu(p_bpb->bsx.clu_count) + 2;
+ /* because the cluster index starts with 2 */
+
+ fsi->vol_type = EXFAT;
+ fsi->vol_id = le32_to_cpu(p_bpb->bsx.vol_serial);
+
+ fsi->root_dir = le32_to_cpu(p_bpb->bsx.root_cluster);
+ fsi->dentries_in_root = 0;
+ fsi->dentries_per_clu = 1 << (fsi->cluster_size_bits - DENTRY_SIZE_BITS);
+
+ fsi->vol_flag = (u32) le16_to_cpu(p_bpb->bsx.vol_flags);
+ fsi->clu_srch_ptr = CLUS_BASE;
+ fsi->used_clusters = (u32) ~0;
+
+ fsi->fs_func = &exfat_fs_func;
+ fat_ent_ops_init(sb);
+
+ if (p_bpb->bsx.vol_flags & VOL_DIRTY) {
+ fsi->vol_flag |= VOL_DIRTY;
+ sdfat_log_msg(sb, KERN_WARNING, "Volume was not properly "
+ "unmounted. Some data may be corrupt. "
+ "Please run fsck.");
+ }
+
+ return 0;
+} /* end of mount_exfat */
+
+/* end of core_exfat.c */
diff --git a/fs/sdfat/core_fat.c b/fs/sdfat/core_fat.c
new file mode 100644
index 000000000000..5e0a196ae42b
--- /dev/null
+++ b/fs/sdfat/core_fat.c
@@ -0,0 +1,1465 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : core_fat.c */
+/* PURPOSE : FAT-fs core code for sdFAT */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/version.h>
+#include <linux/blkdev.h>
+#include <linux/workqueue.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+
+#include "sdfat.h"
+#include "core.h"
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+/*----------------------------------------------------------------------*/
+/* Constant & Macro Definitions */
+/*----------------------------------------------------------------------*/
+#define MAX_LFN_ORDER (20)
+
+/*
+ * MAX_EST_AU_SECT should be changed according to 32/64bits.
+ * On 32bit, 4KB page supports 512 clusters per AU.
+ * But, on 64bit, 4KB page can handle a half of total list_head of 32bit's.
+ * Bcause the size of list_head structure on 64bit increases twofold over 32bit.
+ */
+#if (BITS_PER_LONG == 64)
+//#define MAX_EST_AU_SECT (16384) /* upto 8MB */
+#define MAX_EST_AU_SECT (32768) /* upto 16MB, used more page for list_head */
+#else
+#define MAX_EST_AU_SECT (32768) /* upto 16MB */
+#endif
+
+/*======================================================================*/
+/* Local Function Declarations */
+/*======================================================================*/
+static s32 __extract_uni_name_from_ext_entry(EXT_DENTRY_T *, u16 *, s32);
+
+/*----------------------------------------------------------------------*/
+/* Global Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Local Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+/*======================================================================*/
+/* Local Function Definitions */
+/*======================================================================*/
+static u32 __calc_default_au_size(struct super_block *sb)
+{
+ struct block_device *bdev = sb->s_bdev;
+ struct gendisk *disk;
+ struct request_queue *queue;
+ struct queue_limits *limit;
+ unsigned int est_au_sect = MAX_EST_AU_SECT;
+ unsigned int est_au_size = 0;
+ unsigned int queue_au_size = 0;
+ sector_t total_sect = 0;
+
+ /* we assumed that sector size is 512 bytes */
+
+ disk = bdev->bd_disk;
+ if (!disk)
+ goto out;
+
+ queue = disk->queue;
+ if (!queue)
+ goto out;
+
+ limit = &queue->limits;
+ queue_au_size = limit->discard_granularity;
+
+ /* estimate function(x) =
+ * (total_sect / 2) * 512 / 1024
+ * => (total_sect >> 1) >> 1)
+ * => (total_sect >> 2)
+ * => estimated bytes size
+ *
+ * ex1) <= 8GB -> 4MB
+ * ex2) 16GB -> 8MB
+ * ex3) >= 32GB -> 16MB
+ */
+ total_sect = disk->part0.nr_sects;
+ est_au_size = total_sect >> 2;
+
+ /* au_size assumed that bytes per sector is 512 */
+ est_au_sect = est_au_size >> 9;
+
+ MMSG("DBG1: total_sect(%llu) est_au_size(%u) est_au_sect(%u)\n",
+ (u64)total_sect, est_au_size, est_au_sect);
+
+ if (est_au_sect <= 8192) {
+ /* 4MB */
+ est_au_sect = 8192;
+ } else if (est_au_sect <= 16384) {
+ /* 8MB */
+ est_au_sect = 16384;
+ } else {
+ /* 8MB or 16MB */
+ est_au_sect = MAX_EST_AU_SECT;
+ }
+
+ MMSG("DBG2: total_sect(%llu) est_au_size(%u) est_au_sect(%u)\n",
+ (u64)total_sect, est_au_size, est_au_sect);
+
+ if (est_au_size < queue_au_size &&
+ queue_au_size <= (MAX_EST_AU_SECT << 9)) {
+ DMSG("use queue_au_size(%u) instead of est_au_size(%u)\n",
+ queue_au_size, est_au_size);
+ est_au_sect = queue_au_size >> 9;
+ }
+
+out:
+ if (sb->s_blocksize != 512) {
+ ASSERT(sb->s_blocksize_bits > 9);
+ sdfat_log_msg(sb, KERN_INFO,
+ "adjustment est_au_size by logical block size(%lu)",
+ sb->s_blocksize);
+ est_au_sect >>= (sb->s_blocksize_bits - 9);
+ }
+
+ sdfat_log_msg(sb, KERN_INFO, "set default AU sectors : %u "
+ "(queue_au_size : %u KB, disk_size : %llu MB)",
+ est_au_sect, queue_au_size >> 10, (u64)(total_sect >> 11));
+ return est_au_sect;
+}
+
+
+/*
+ * Cluster Management Functions
+ */
+static s32 fat_free_cluster(struct super_block *sb, CHAIN_T *p_chain, s32 do_relse)
+{
+ s32 ret = -EIO;
+ s32 num_clusters = 0;
+ u32 clu, prev;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ s32 i;
+ u64 sector;
+
+ /* invalid cluster number */
+ if (IS_CLUS_FREE(p_chain->dir) || IS_CLUS_EOF(p_chain->dir))
+ return 0;
+
+ /* no cluster to truncate */
+ if (!p_chain->size) {
+ DMSG("%s: cluster(%u) truncation is not required.",
+ __func__, p_chain->dir);
+ return 0;
+ }
+
+ /* check cluster validation */
+ if ((p_chain->dir < 2) && (p_chain->dir >= fsi->num_clusters)) {
+ EMSG("%s: invalid start cluster (%u)\n", __func__, p_chain->dir);
+ sdfat_debug_bug_on(1);
+ return -EIO;
+ }
+
+
+ set_sb_dirty(sb);
+ clu = p_chain->dir;
+
+ do {
+ if (do_relse) {
+ sector = CLUS_TO_SECT(fsi, clu);
+ for (i = 0; i < fsi->sect_per_clus; i++) {
+ if (dcache_release(sb, sector+i) == -EIO)
+ goto out;
+ }
+ }
+
+ prev = clu;
+ if (get_next_clus_safe(sb, &clu)) {
+ /* print more helpful log */
+ if (IS_CLUS_BAD(clu)) {
+ sdfat_log_msg(sb, KERN_ERR, "%s : "
+ "deleting bad cluster (clu[%u]->BAD)",
+ __func__, prev);
+ } else if (IS_CLUS_FREE(clu)) {
+ sdfat_log_msg(sb, KERN_ERR, "%s : "
+ "deleting free cluster (clu[%u]->FREE)",
+ __func__, prev);
+ }
+ goto out;
+ }
+
+ /* Free FAT chain */
+ if (fat_ent_set(sb, prev, CLUS_FREE))
+ goto out;
+
+ /* Update AMAP if needed */
+ if (fsi->amap) {
+ if (amap_release_cluster(sb, prev))
+ return -EIO;
+ }
+
+ num_clusters++;
+
+ } while (!IS_CLUS_EOF(clu));
+
+ /* success */
+ ret = 0;
+out:
+ fsi->used_clusters -= num_clusters;
+ return ret;
+} /* end of fat_free_cluster */
+
+static s32 fat_alloc_cluster(struct super_block *sb, u32 num_alloc, CHAIN_T *p_chain, s32 dest)
+{
+ s32 ret = -ENOSPC;
+ u32 i, num_clusters = 0, total_cnt;
+ u32 new_clu, last_clu = CLUS_EOF, read_clu;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ total_cnt = fsi->num_clusters - CLUS_BASE;
+
+ if (unlikely(total_cnt < fsi->used_clusters)) {
+ sdfat_fs_error_ratelimit(sb,
+ "%s : invalid used clusters(t:%u,u:%u)\n",
+ __func__, total_cnt, fsi->used_clusters);
+ return -EIO;
+ }
+
+ if (num_alloc > total_cnt - fsi->used_clusters)
+ return -ENOSPC;
+
+ new_clu = p_chain->dir;
+ if (IS_CLUS_EOF(new_clu))
+ new_clu = fsi->clu_srch_ptr;
+ else if (new_clu >= fsi->num_clusters)
+ new_clu = CLUS_BASE;
+
+ set_sb_dirty(sb);
+
+ p_chain->dir = CLUS_EOF;
+
+ for (i = CLUS_BASE; i < fsi->num_clusters; i++) {
+ if (fat_ent_get(sb, new_clu, &read_clu)) {
+ ret = -EIO;
+ goto error;
+ }
+
+ if (IS_CLUS_FREE(read_clu)) {
+ if (fat_ent_set(sb, new_clu, CLUS_EOF)) {
+ ret = -EIO;
+ goto error;
+ }
+ num_clusters++;
+
+ if (IS_CLUS_EOF(p_chain->dir)) {
+ p_chain->dir = new_clu;
+ } else {
+ if (fat_ent_set(sb, last_clu, new_clu)) {
+ ret = -EIO;
+ goto error;
+ }
+ }
+
+ last_clu = new_clu;
+
+ if ((--num_alloc) == 0) {
+ fsi->clu_srch_ptr = new_clu;
+ fsi->used_clusters += num_clusters;
+
+ return 0;
+ }
+ }
+ if ((++new_clu) >= fsi->num_clusters)
+ new_clu = CLUS_BASE;
+ }
+error:
+ if (num_clusters)
+ fat_free_cluster(sb, p_chain, 0);
+ return ret;
+} /* end of fat_alloc_cluster */
+
+static s32 fat_count_used_clusters(struct super_block *sb, u32 *ret_count)
+{
+ s32 i;
+ u32 clu, count = 0;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ for (i = CLUS_BASE; i < fsi->num_clusters; i++) {
+ if (fat_ent_get(sb, i, &clu))
+ return -EIO;
+
+ if (!IS_CLUS_FREE(clu))
+ count++;
+ }
+
+ *ret_count = count;
+ return 0;
+} /* end of fat_count_used_clusters */
+
+
+/*
+ * Directory Entry Management Functions
+ */
+static u32 fat_get_entry_type(DENTRY_T *p_entry)
+{
+ DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry;
+
+ /* first byte of 32bytes dummy */
+ if (*(ep->name) == MSDOS_UNUSED)
+ return TYPE_UNUSED;
+
+ /* 0xE5 of Kanji Japanese is replaced to 0x05 */
+ else if (*(ep->name) == MSDOS_DELETED)
+ return TYPE_DELETED;
+
+ /* 11th byte of 32bytes dummy */
+ else if ((ep->attr & ATTR_EXTEND_MASK) == ATTR_EXTEND)
+ return TYPE_EXTEND;
+
+ else if (!(ep->attr & (ATTR_SUBDIR | ATTR_VOLUME)))
+ return TYPE_FILE;
+
+ else if ((ep->attr & (ATTR_SUBDIR | ATTR_VOLUME)) == ATTR_SUBDIR)
+ return TYPE_DIR;
+
+ else if ((ep->attr & (ATTR_SUBDIR | ATTR_VOLUME)) == ATTR_VOLUME)
+ return TYPE_VOLUME;
+
+ return TYPE_INVALID;
+} /* end of fat_get_entry_type */
+
+static void fat_set_entry_type(DENTRY_T *p_entry, u32 type)
+{
+ DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry;
+
+ if (type == TYPE_UNUSED)
+ *(ep->name) = MSDOS_UNUSED; /* 0x0 */
+
+ else if (type == TYPE_DELETED)
+ *(ep->name) = MSDOS_DELETED; /* 0xE5 */
+
+ else if (type == TYPE_EXTEND)
+ ep->attr = ATTR_EXTEND;
+
+ else if (type == TYPE_DIR)
+ ep->attr = ATTR_SUBDIR;
+
+ else if (type == TYPE_FILE)
+ ep->attr = ATTR_ARCHIVE;
+
+ else if (type == TYPE_SYMLINK)
+ ep->attr = ATTR_ARCHIVE | ATTR_SYMLINK;
+} /* end of fat_set_entry_type */
+
+static u32 fat_get_entry_attr(DENTRY_T *p_entry)
+{
+ DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry;
+
+ return (u32)ep->attr;
+} /* end of fat_get_entry_attr */
+
+static void fat_set_entry_attr(DENTRY_T *p_entry, u32 attr)
+{
+ DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry;
+
+ ep->attr = (u8)attr;
+} /* end of fat_set_entry_attr */
+
+static u8 fat_get_entry_flag(DENTRY_T *p_entry)
+{
+ return 0x01;
+} /* end of fat_get_entry_flag */
+
+static void fat_set_entry_flag(DENTRY_T *p_entry, u8 flags)
+{
+} /* end of fat_set_entry_flag */
+
+static u32 fat_get_entry_clu0(DENTRY_T *p_entry)
+{
+ DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry;
+ /* FIXME : is ok? */
+ return(((u32)(le16_to_cpu(ep->start_clu_hi)) << 16) | le16_to_cpu(ep->start_clu_lo));
+} /* end of fat_get_entry_clu0 */
+
+static void fat_set_entry_clu0(DENTRY_T *p_entry, u32 start_clu)
+{
+ DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry;
+
+ ep->start_clu_lo = cpu_to_le16(CLUSTER_16(start_clu));
+ ep->start_clu_hi = cpu_to_le16(CLUSTER_16(start_clu >> 16));
+} /* end of fat_set_entry_clu0 */
+
+static u64 fat_get_entry_size(DENTRY_T *p_entry)
+{
+ DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry;
+
+ return (u64)le32_to_cpu(ep->size);
+} /* end of fat_get_entry_size */
+
+static void fat_set_entry_size(DENTRY_T *p_entry, u64 size)
+{
+ DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry;
+
+ ep->size = cpu_to_le32((u32)size);
+} /* end of fat_set_entry_size */
+
+static void fat_get_entry_time(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode)
+{
+ u16 t = 0x00, d = 0x21;
+ DOS_DENTRY_T *ep = (DOS_DENTRY_T *) p_entry;
+
+ switch (mode) {
+ case TM_CREATE:
+ t = le16_to_cpu(ep->create_time);
+ d = le16_to_cpu(ep->create_date);
+ break;
+ case TM_MODIFY:
+ t = le16_to_cpu(ep->modify_time);
+ d = le16_to_cpu(ep->modify_date);
+ break;
+ }
+
+ tp->tz.value = 0x00;
+ tp->sec = (t & 0x001F) << 1;
+ tp->min = (t >> 5) & 0x003F;
+ tp->hour = (t >> 11);
+ tp->day = (d & 0x001F);
+ tp->mon = (d >> 5) & 0x000F;
+ tp->year = (d >> 9);
+} /* end of fat_get_entry_time */
+
+static void fat_set_entry_time(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode)
+{
+ u16 t, d;
+ DOS_DENTRY_T *ep = (DOS_DENTRY_T *) p_entry;
+
+ t = (tp->hour << 11) | (tp->min << 5) | (tp->sec >> 1);
+ d = (tp->year << 9) | (tp->mon << 5) | tp->day;
+
+ switch (mode) {
+ case TM_CREATE:
+ ep->create_time = cpu_to_le16(t);
+ ep->create_date = cpu_to_le16(d);
+ break;
+ case TM_MODIFY:
+ ep->modify_time = cpu_to_le16(t);
+ ep->modify_date = cpu_to_le16(d);
+ break;
+ }
+} /* end of fat_set_entry_time */
+
+static void __init_dos_entry(struct super_block *sb, DOS_DENTRY_T *ep, u32 type, u32 start_clu)
+{
+ TIMESTAMP_T tm, *tp;
+
+ fat_set_entry_type((DENTRY_T *) ep, type);
+ ep->start_clu_lo = cpu_to_le16(CLUSTER_16(start_clu));
+ ep->start_clu_hi = cpu_to_le16(CLUSTER_16(start_clu >> 16));
+ ep->size = 0;
+
+ tp = tm_now(SDFAT_SB(sb), &tm);
+ fat_set_entry_time((DENTRY_T *) ep, tp, TM_CREATE);
+ fat_set_entry_time((DENTRY_T *) ep, tp, TM_MODIFY);
+ ep->access_date = 0;
+ ep->create_time_ms = 0;
+} /* end of __init_dos_entry */
+
+static void __init_ext_entry(EXT_DENTRY_T *ep, s32 order, u8 chksum, u16 *uniname)
+{
+ s32 i;
+ u8 end = false;
+
+ fat_set_entry_type((DENTRY_T *) ep, TYPE_EXTEND);
+ ep->order = (u8) order;
+ ep->sysid = 0;
+ ep->checksum = chksum;
+ ep->start_clu = 0;
+
+ /* unaligned name */
+ for (i = 0; i < 5; i++) {
+ if (!end) {
+ put_unaligned_le16(*uniname, &(ep->unicode_0_4[i<<1]));
+ if (*uniname == 0x0)
+ end = true;
+ else
+ uniname++;
+ } else {
+ put_unaligned_le16(0xFFFF, &(ep->unicode_0_4[i<<1]));
+ }
+ }
+
+ /* aligned name */
+ for (i = 0; i < 6; i++) {
+ if (!end) {
+ ep->unicode_5_10[i] = cpu_to_le16(*uniname);
+ if (*uniname == 0x0)
+ end = true;
+ else
+ uniname++;
+ } else {
+ ep->unicode_5_10[i] = cpu_to_le16(0xFFFF);
+ }
+ }
+
+ /* aligned name */
+ for (i = 0; i < 2; i++) {
+ if (!end) {
+ ep->unicode_11_12[i] = cpu_to_le16(*uniname);
+ if (*uniname == 0x0)
+ end = true;
+ else
+ uniname++;
+ } else {
+ ep->unicode_11_12[i] = cpu_to_le16(0xFFFF);
+ }
+ }
+} /* end of __init_ext_entry */
+
+static s32 fat_init_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u32 type,
+ u32 start_clu, u64 size)
+{
+ u64 sector;
+ DOS_DENTRY_T *dos_ep;
+
+ dos_ep = (DOS_DENTRY_T *) get_dentry_in_dir(sb, p_dir, entry, &sector);
+ if (!dos_ep)
+ return -EIO;
+
+ __init_dos_entry(sb, dos_ep, type, start_clu);
+ dcache_modify(sb, sector);
+
+ return 0;
+} /* end of fat_init_dir_entry */
+
+static s32 fat_init_ext_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 num_entries,
+ UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname)
+{
+ s32 i;
+ u64 sector;
+ u8 chksum;
+ u16 *uniname = p_uniname->name;
+ DOS_DENTRY_T *dos_ep;
+ EXT_DENTRY_T *ext_ep;
+
+ dos_ep = (DOS_DENTRY_T *) get_dentry_in_dir(sb, p_dir, entry, &sector);
+ if (!dos_ep)
+ return -EIO;
+
+ dos_ep->lcase = p_dosname->name_case;
+ memcpy(dos_ep->name, p_dosname->name, DOS_NAME_LENGTH);
+ if (dcache_modify(sb, sector))
+ return -EIO;
+
+ if ((--num_entries) > 0) {
+ chksum = calc_chksum_1byte((void *) dos_ep->name, DOS_NAME_LENGTH, 0);
+
+ for (i = 1; i < num_entries; i++) {
+ ext_ep = (EXT_DENTRY_T *) get_dentry_in_dir(sb, p_dir, entry-i, &sector);
+ if (!ext_ep)
+ return -EIO;
+
+ __init_ext_entry(ext_ep, i, chksum, uniname);
+ if (dcache_modify(sb, sector))
+ return -EIO;
+ uniname += 13;
+ }
+
+ ext_ep = (EXT_DENTRY_T *) get_dentry_in_dir(sb, p_dir, entry-i, &sector);
+ if (!ext_ep)
+ return -EIO;
+
+ __init_ext_entry(ext_ep, i+MSDOS_LAST_LFN, chksum, uniname);
+ if (dcache_modify(sb, sector))
+ return -EIO;
+ }
+
+ return 0;
+} /* end of fat_init_ext_entry */
+
+static s32 fat_delete_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 order, s32 num_entries)
+{
+ s32 i;
+ u64 sector;
+ DENTRY_T *ep;
+
+ for (i = num_entries-1; i >= order; i--) {
+ ep = get_dentry_in_dir(sb, p_dir, entry-i, &sector);
+ if (!ep)
+ return -EIO;
+
+ fat_set_entry_type(ep, TYPE_DELETED);
+ if (dcache_modify(sb, sector))
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/* return values of fat_find_dir_entry()
+ * >= 0 : return dir entiry position with the name in dir
+ * -EEXIST : (root dir, ".") it is the root dir itself
+ * -ENOENT : entry with the name does not exist
+ * -EIO : I/O error
+ */
+static inline s32 __get_dentries_per_clu(FS_INFO_T *fsi, s32 clu)
+{
+ if (IS_CLUS_FREE(clu)) /* FAT16 root_dir */
+ return fsi->dentries_in_root;
+
+ return fsi->dentries_per_clu;
+}
+
+static s32 fat_find_dir_entry(struct super_block *sb, FILE_ID_T *fid,
+ CHAIN_T *p_dir, UNI_NAME_T *p_uniname, s32 num_entries, DOS_NAME_T *p_dosname, u32 type)
+{
+ s32 i, rewind = 0, dentry = 0, end_eidx = 0;
+ s32 chksum = 0, lfn_ord = 0, lfn_len = 0;
+ s32 dentries_per_clu, num_empty = 0;
+ u32 entry_type;
+ u16 entry_uniname[14], *uniname = NULL;
+ CHAIN_T clu;
+ DENTRY_T *ep;
+ HINT_T *hint_stat = &fid->hint_stat;
+ HINT_FEMP_T candi_empty;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ /*
+ * REMARK:
+ * DOT and DOTDOT are handled by VFS layer
+ */
+
+ dentries_per_clu = __get_dentries_per_clu(fsi, p_dir->dir);
+ clu.dir = p_dir->dir;
+ clu.flags = p_dir->flags;
+
+ if (hint_stat->eidx) {
+ clu.dir = hint_stat->clu;
+ dentry = hint_stat->eidx;
+ end_eidx = dentry;
+ }
+
+ candi_empty.eidx = -1;
+
+ MMSG("lookup dir= %s\n", p_dosname->name);
+rewind:
+ while (!IS_CLUS_EOF(clu.dir)) {
+ i = dentry % dentries_per_clu;
+ for (; i < dentries_per_clu; i++, dentry++) {
+ if (rewind && (dentry == end_eidx))
+ goto not_found;
+
+ ep = get_dentry_in_dir(sb, &clu, i, NULL);
+ if (!ep)
+ return -EIO;
+
+ entry_type = fat_get_entry_type(ep);
+
+ /*
+ * Most directory entries have long name,
+ * So, we check extend directory entry first.
+ */
+ if (entry_type == TYPE_EXTEND) {
+ EXT_DENTRY_T *ext_ep = (EXT_DENTRY_T *)ep;
+ u32 cur_ord = (u32)ext_ep->order;
+ u32 cur_chksum = (s32)ext_ep->checksum;
+ s32 len = 13;
+ u16 unichar;
+
+ num_empty = 0;
+ candi_empty.eidx = -1;
+
+ /* check whether new lfn or not */
+ if (cur_ord & MSDOS_LAST_LFN) {
+ cur_ord &= ~(MSDOS_LAST_LFN);
+ chksum = cur_chksum;
+ len = (13 * (cur_ord-1));
+ uniname = (p_uniname->name + len);
+ lfn_ord = cur_ord + 1;
+ lfn_len = 0;
+
+ /* check minimum name length */
+ if (cur_ord &&
+ (len > p_uniname->name_len)) {
+ /* MISMATCHED NAME LENGTH */
+ lfn_len = -1;
+ }
+ len = 0;
+ }
+
+ /* invalid lfn order */
+ if (!cur_ord || (cur_ord > MAX_LFN_ORDER) ||
+ ((cur_ord + 1) != lfn_ord))
+ goto reset_dentry_set;
+
+ /* check checksum of directory entry set */
+ if (cur_chksum != chksum)
+ goto reset_dentry_set;
+
+ /* update order for next dentry */
+ lfn_ord = cur_ord;
+
+ /* check whether mismatched lfn or not */
+ if (lfn_len == -1) {
+ /* MISMATCHED LFN DENTRY SET */
+ continue;
+ }
+
+ if (!uniname) {
+ sdfat_fs_error(sb,
+ "%s : abnormal dentry "
+ "(start_clu[%u], "
+ "idx[%u])", __func__,
+ p_dir->dir, dentry);
+ sdfat_debug_bug_on(1);
+ return -EIO;
+ }
+
+ /* update position of name buffer */
+ uniname -= len;
+
+ /* get utf16 characters saved on this entry */
+ len = __extract_uni_name_from_ext_entry(ext_ep, entry_uniname, lfn_ord);
+
+ /* replace last char to null */
+ unichar = *(uniname+len);
+ *(uniname+len) = (u16)0x0;
+
+ /* uniname ext_dentry unit compare repeatdly */
+ if (nls_cmp_uniname(sb, uniname, entry_uniname)) {
+ /* DO HANDLE WRONG NAME */
+ lfn_len = -1;
+ } else {
+ /* add matched chars length */
+ lfn_len += len;
+ }
+
+ /* restore previous character */
+ *(uniname+len) = unichar;
+
+ /* jump to check next dentry */
+ continue;
+
+ } else if ((entry_type == TYPE_FILE) || (entry_type == TYPE_DIR)) {
+ DOS_DENTRY_T *dos_ep = (DOS_DENTRY_T *)ep;
+ u32 cur_chksum = (s32)calc_chksum_1byte(
+ (void *) dos_ep->name,
+ DOS_NAME_LENGTH, 0);
+
+ num_empty = 0;
+ candi_empty.eidx = -1;
+
+ MMSG("checking dir= %c%c%c%c%c%c%c%c%c%c%c\n",
+ dos_ep->name[0], dos_ep->name[1],
+ dos_ep->name[2], dos_ep->name[3],
+ dos_ep->name[4], dos_ep->name[5],
+ dos_ep->name[6], dos_ep->name[7],
+ dos_ep->name[8], dos_ep->name[9],
+ dos_ep->name[10]);
+
+ /*
+ * if there is no valid long filename,
+ * we should check short filename.
+ */
+ if (!lfn_len || (cur_chksum != chksum)) {
+ /* check shortname */
+ if ((p_dosname->name[0] != '\0') &&
+ !nls_cmp_sfn(sb,
+ p_dosname->name,
+ dos_ep->name)) {
+ goto found;
+ }
+ /* check name length */
+ } else if ((lfn_len > 0) &&
+ ((s32)p_uniname->name_len ==
+ lfn_len)) {
+ goto found;
+ }
+
+ /* DO HANDLE MISMATCHED SFN, FALL THROUGH */
+ } else if ((entry_type == TYPE_UNUSED) || (entry_type == TYPE_DELETED)) {
+ num_empty++;
+ if (candi_empty.eidx == -1) {
+ if (num_empty == 1) {
+ candi_empty.cur.dir = clu.dir;
+ candi_empty.cur.size = clu.size;
+ candi_empty.cur.flags = clu.flags;
+ }
+
+ if (num_empty >= num_entries) {
+ candi_empty.eidx = dentry - (num_empty - 1);
+ ASSERT(0 <= candi_empty.eidx);
+ candi_empty.count = num_empty;
+
+ if ((fid->hint_femp.eidx == -1) ||
+ (candi_empty.eidx <= fid->hint_femp.eidx)) {
+ memcpy(&fid->hint_femp,
+ &candi_empty,
+ sizeof(HINT_FEMP_T));
+ }
+ }
+ }
+
+ if (entry_type == TYPE_UNUSED)
+ goto not_found;
+ /* FALL THROUGH */
+ }
+reset_dentry_set:
+ /* TYPE_DELETED, TYPE_VOLUME OR MISMATCHED SFN */
+ lfn_ord = 0;
+ lfn_len = 0;
+ chksum = 0;
+ }
+
+ if (IS_CLUS_FREE(p_dir->dir))
+ break; /* FAT16 root_dir */
+
+ if (get_next_clus_safe(sb, &clu.dir))
+ return -EIO;
+ }
+
+not_found:
+ /* we started at not 0 index,so we should try to find target
+ * from 0 index to the index we started at.
+ */
+ if (!rewind && end_eidx) {
+ rewind = 1;
+ dentry = 0;
+ clu.dir = p_dir->dir;
+ /* reset dentry set */
+ lfn_ord = 0;
+ lfn_len = 0;
+ chksum = 0;
+ /* reset empty hint_*/
+ num_empty = 0;
+ candi_empty.eidx = -1;
+ goto rewind;
+ }
+
+ /* initialized hint_stat */
+ hint_stat->clu = p_dir->dir;
+ hint_stat->eidx = 0;
+ return -ENOENT;
+
+found:
+ /* next dentry we'll find is out of this cluster */
+ if (!((dentry + 1) % dentries_per_clu)) {
+ int ret = 0;
+ /* FAT16 root_dir */
+ if (IS_CLUS_FREE(p_dir->dir))
+ clu.dir = CLUS_EOF;
+ else
+ ret = get_next_clus_safe(sb, &clu.dir);
+
+ if (ret || IS_CLUS_EOF(clu.dir)) {
+ /* just initialized hint_stat */
+ hint_stat->clu = p_dir->dir;
+ hint_stat->eidx = 0;
+ return dentry;
+ }
+ }
+
+ hint_stat->clu = clu.dir;
+ hint_stat->eidx = dentry + 1;
+ return dentry;
+} /* end of fat_find_dir_entry */
+
+/* returns -EIO on error */
+static s32 fat_count_ext_entries(struct super_block *sb, CHAIN_T *p_dir, s32 entry, DENTRY_T *p_entry)
+{
+ s32 count = 0;
+ u8 chksum;
+ DOS_DENTRY_T *dos_ep = (DOS_DENTRY_T *) p_entry;
+ EXT_DENTRY_T *ext_ep;
+
+ chksum = calc_chksum_1byte((void *) dos_ep->name, DOS_NAME_LENGTH, 0);
+
+ for (entry--; entry >= 0; entry--) {
+ ext_ep = (EXT_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry, NULL);
+ if (!ext_ep)
+ return -EIO;
+
+ if ((fat_get_entry_type((DENTRY_T *)ext_ep) == TYPE_EXTEND) &&
+ (ext_ep->checksum == chksum)) {
+ count++;
+ if (ext_ep->order > MSDOS_LAST_LFN)
+ return count;
+ } else {
+ return count;
+ }
+ }
+
+ return count;
+}
+
+
+/*
+ * Name Conversion Functions
+ */
+static s32 __extract_uni_name_from_ext_entry(EXT_DENTRY_T *ep, u16 *uniname, s32 order)
+{
+ s32 i, len = 0;
+
+ for (i = 0; i < 5; i++) {
+ *uniname = get_unaligned_le16(&(ep->unicode_0_4[i<<1]));
+ if (*uniname == 0x0)
+ return len;
+ uniname++;
+ len++;
+ }
+
+ if (order < 20) {
+ for (i = 0; i < 6; i++) {
+ /* FIXME : unaligned? */
+ *uniname = le16_to_cpu(ep->unicode_5_10[i]);
+ if (*uniname == 0x0)
+ return len;
+ uniname++;
+ len++;
+ }
+ } else {
+ for (i = 0; i < 4; i++) {
+ /* FIXME : unaligned? */
+ *uniname = le16_to_cpu(ep->unicode_5_10[i]);
+ if (*uniname == 0x0)
+ return len;
+ uniname++;
+ len++;
+ }
+ *uniname = 0x0; /* uniname[MAX_NAME_LENGTH] */
+ return len;
+ }
+
+ for (i = 0; i < 2; i++) {
+ /* FIXME : unaligned? */
+ *uniname = le16_to_cpu(ep->unicode_11_12[i]);
+ if (*uniname == 0x0)
+ return len;
+ uniname++;
+ len++;
+ }
+
+ *uniname = 0x0;
+ return len;
+
+} /* end of __extract_uni_name_from_ext_entry */
+
+static void fat_get_uniname_from_ext_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u16 *uniname)
+{
+ u32 i;
+ u16 *name = uniname;
+ u32 chksum;
+
+ DOS_DENTRY_T *dos_ep =
+ (DOS_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry, NULL);
+
+ if (unlikely(!dos_ep))
+ goto invalid_lfn;
+
+ chksum = (u32)calc_chksum_1byte(
+ (void *) dos_ep->name,
+ DOS_NAME_LENGTH, 0);
+
+ for (entry--, i = 1; entry >= 0; entry--, i++) {
+ EXT_DENTRY_T *ep;
+
+ ep = (EXT_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry, NULL);
+ if (!ep)
+ goto invalid_lfn;
+
+ if (fat_get_entry_type((DENTRY_T *) ep) != TYPE_EXTEND)
+ goto invalid_lfn;
+
+ if (chksum != (u32)ep->checksum)
+ goto invalid_lfn;
+
+ if (i != (u32)(ep->order & ~(MSDOS_LAST_LFN)))
+ goto invalid_lfn;
+
+ __extract_uni_name_from_ext_entry(ep, name, (s32)i);
+ if (ep->order & MSDOS_LAST_LFN)
+ return;
+
+ name += 13;
+ }
+invalid_lfn:
+ *uniname = (u16)0x0;
+} /* end of fat_get_uniname_from_ext_entry */
+
+/* Find if the shortname exists
+ * and check if there are free entries
+ */
+static s32 __fat_find_shortname_entry(struct super_block *sb, CHAIN_T *p_dir,
+ u8 *p_dosname, s32 *offset, __attribute__((unused))int n_entry_needed)
+{
+ u32 type;
+ s32 i, dentry = 0;
+ s32 dentries_per_clu;
+ DENTRY_T *ep = NULL;
+ DOS_DENTRY_T *dos_ep = NULL;
+ CHAIN_T clu = *p_dir;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (offset)
+ *offset = -1;
+
+ if (IS_CLUS_FREE(clu.dir)) /* FAT16 root_dir */
+ dentries_per_clu = fsi->dentries_in_root;
+ else
+ dentries_per_clu = fsi->dentries_per_clu;
+
+ while (!IS_CLUS_EOF(clu.dir)) {
+ for (i = 0; i < dentries_per_clu; i++, dentry++) {
+ ep = get_dentry_in_dir(sb, &clu, i, NULL);
+ if (!ep)
+ return -EIO;
+
+ type = fat_get_entry_type(ep);
+
+ if ((type == TYPE_FILE) || (type == TYPE_DIR)) {
+ dos_ep = (DOS_DENTRY_T *)ep;
+ if (!nls_cmp_sfn(sb, p_dosname, dos_ep->name)) {
+ if (offset)
+ *offset = dentry;
+ return 0;
+ }
+ }
+ }
+
+ /* fat12/16 root dir */
+ if (IS_CLUS_FREE(clu.dir))
+ break;
+
+ if (get_next_clus_safe(sb, &clu.dir))
+ return -EIO;
+ }
+ return -ENOENT;
+}
+
+#ifdef CONFIG_SDFAT_FAT32_SHORTNAME_SEQ
+static void __fat_attach_count_to_dos_name(u8 *dosname, s32 count)
+{
+ s32 i, j, length;
+ s8 str_count[6];
+
+ snprintf(str_count, sizeof(str_count), "~%d", count);
+ length = strlen(str_count);
+
+ i = j = 0;
+ while (j <= (8 - length)) {
+ i = j;
+ if (dosname[j] == ' ')
+ break;
+ if (dosname[j] & 0x80)
+ j += 2;
+ else
+ j++;
+ }
+
+ for (j = 0; j < length; i++, j++)
+ dosname[i] = (u8) str_count[j];
+
+ if (i == 7)
+ dosname[7] = ' ';
+
+} /* end of __fat_attach_count_to_dos_name */
+#endif
+
+s32 fat_generate_dos_name_new(struct super_block *sb, CHAIN_T *p_dir, DOS_NAME_T *p_dosname, s32 n_entry_needed)
+{
+ s32 i;
+ s32 baselen, err;
+ u8 work[DOS_NAME_LENGTH], buf[5];
+ u8 tail;
+
+ baselen = 8;
+ memset(work, ' ', DOS_NAME_LENGTH);
+ memcpy(work, p_dosname->name, DOS_NAME_LENGTH);
+
+ while (baselen && (work[--baselen] == ' ')) {
+ /* DO NOTHING, JUST FOR CHECK_PATCH */
+ }
+
+ if (baselen > 6)
+ baselen = 6;
+
+ BUG_ON(baselen < 0);
+
+#ifdef CONFIG_SDFAT_FAT32_SHORTNAME_SEQ
+ /* example) namei_exfat.c -> NAMEI_~1 - NAMEI_~9 */
+ work[baselen] = '~';
+ for (i = 1; i < 10; i++) {
+ // '0' + i = 1 ~ 9 ASCII
+ work[baselen + 1] = '0' + i;
+ err = __fat_find_shortname_entry(sb, p_dir, work, NULL, n_entry_needed);
+ if (err == -ENOENT) {
+ /* void return */
+ __fat_attach_count_to_dos_name(p_dosname->name, i);
+ return 0;
+ }
+
+ /* any other error */
+ if (err)
+ return err;
+ }
+#endif
+
+ i = jiffies;
+ tail = (jiffies >> 16) & 0x7;
+
+ if (baselen > 2)
+ baselen = 2;
+
+ BUG_ON(baselen < 0);
+
+ work[baselen + 4] = '~';
+ // 1 ~ 8 ASCII
+ work[baselen + 5] = '1' + tail;
+ while (1) {
+ snprintf(buf, sizeof(buf), "%04X", i & 0xffff);
+ memcpy(&work[baselen], buf, 4);
+ err = __fat_find_shortname_entry(sb, p_dir, work, NULL, n_entry_needed);
+ if (err == -ENOENT) {
+ memcpy(p_dosname->name, work, DOS_NAME_LENGTH);
+ break;
+ }
+
+ /* any other error */
+ if (err)
+ return err;
+
+ i -= 11;
+ }
+ return 0;
+} /* end of generate_dos_name_new */
+
+static s32 fat_calc_num_entries(UNI_NAME_T *p_uniname)
+{
+ s32 len;
+
+ len = p_uniname->name_len;
+ if (len == 0)
+ return 0;
+
+ /* 1 dos name entry + extended entries */
+ return((len-1) / 13 + 2);
+
+} /* end of calc_num_enties */
+
+static s32 fat_check_max_dentries(FILE_ID_T *fid)
+{
+ if ((fid->size >> DENTRY_SIZE_BITS) >= MAX_FAT_DENTRIES) {
+ /* FAT spec allows a dir to grow upto 65536 dentries */
+ return -ENOSPC;
+ }
+ return 0;
+} /* end of check_max_dentries */
+
+
+/*
+ * File Operation Functions
+ */
+static FS_FUNC_T fat_fs_func = {
+ .alloc_cluster = fat_alloc_cluster,
+ .free_cluster = fat_free_cluster,
+ .count_used_clusters = fat_count_used_clusters,
+
+ .init_dir_entry = fat_init_dir_entry,
+ .init_ext_entry = fat_init_ext_entry,
+ .find_dir_entry = fat_find_dir_entry,
+ .delete_dir_entry = fat_delete_dir_entry,
+ .get_uniname_from_ext_entry = fat_get_uniname_from_ext_entry,
+ .count_ext_entries = fat_count_ext_entries,
+ .calc_num_entries = fat_calc_num_entries,
+ .check_max_dentries = fat_check_max_dentries,
+
+ .get_entry_type = fat_get_entry_type,
+ .set_entry_type = fat_set_entry_type,
+ .get_entry_attr = fat_get_entry_attr,
+ .set_entry_attr = fat_set_entry_attr,
+ .get_entry_flag = fat_get_entry_flag,
+ .set_entry_flag = fat_set_entry_flag,
+ .get_entry_clu0 = fat_get_entry_clu0,
+ .set_entry_clu0 = fat_set_entry_clu0,
+ .get_entry_size = fat_get_entry_size,
+ .set_entry_size = fat_set_entry_size,
+ .get_entry_time = fat_get_entry_time,
+ .set_entry_time = fat_set_entry_time,
+};
+
+static FS_FUNC_T amap_fat_fs_func = {
+ .alloc_cluster = amap_fat_alloc_cluster,
+ .free_cluster = fat_free_cluster,
+ .count_used_clusters = fat_count_used_clusters,
+
+ .init_dir_entry = fat_init_dir_entry,
+ .init_ext_entry = fat_init_ext_entry,
+ .find_dir_entry = fat_find_dir_entry,
+ .delete_dir_entry = fat_delete_dir_entry,
+ .get_uniname_from_ext_entry = fat_get_uniname_from_ext_entry,
+ .count_ext_entries = fat_count_ext_entries,
+ .calc_num_entries = fat_calc_num_entries,
+ .check_max_dentries = fat_check_max_dentries,
+
+ .get_entry_type = fat_get_entry_type,
+ .set_entry_type = fat_set_entry_type,
+ .get_entry_attr = fat_get_entry_attr,
+ .set_entry_attr = fat_set_entry_attr,
+ .get_entry_flag = fat_get_entry_flag,
+ .set_entry_flag = fat_set_entry_flag,
+ .get_entry_clu0 = fat_get_entry_clu0,
+ .set_entry_clu0 = fat_set_entry_clu0,
+ .get_entry_size = fat_get_entry_size,
+ .set_entry_size = fat_set_entry_size,
+ .get_entry_time = fat_get_entry_time,
+ .set_entry_time = fat_set_entry_time,
+
+ .get_au_stat = amap_get_au_stat,
+};
+
+s32 mount_fat16(struct super_block *sb, pbr_t *p_pbr)
+{
+ s32 num_root_sectors;
+ bpb16_t *p_bpb = &(p_pbr->bpb.f16);
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (!p_bpb->num_fats) {
+ sdfat_msg(sb, KERN_ERR, "bogus number of FAT structure");
+ return -EINVAL;
+ }
+
+ num_root_sectors = get_unaligned_le16(p_bpb->num_root_entries) << DENTRY_SIZE_BITS;
+ num_root_sectors = ((num_root_sectors-1) >> sb->s_blocksize_bits) + 1;
+
+ fsi->sect_per_clus = p_bpb->sect_per_clus;
+ fsi->sect_per_clus_bits = ilog2(p_bpb->sect_per_clus);
+ fsi->cluster_size_bits = fsi->sect_per_clus_bits + sb->s_blocksize_bits;
+ fsi->cluster_size = 1 << fsi->cluster_size_bits;
+
+ fsi->num_FAT_sectors = le16_to_cpu(p_bpb->num_fat_sectors);
+
+ fsi->FAT1_start_sector = le16_to_cpu(p_bpb->num_reserved);
+ if (p_bpb->num_fats == 1)
+ fsi->FAT2_start_sector = fsi->FAT1_start_sector;
+ else
+ fsi->FAT2_start_sector = fsi->FAT1_start_sector + fsi->num_FAT_sectors;
+
+ fsi->root_start_sector = fsi->FAT2_start_sector + fsi->num_FAT_sectors;
+ fsi->data_start_sector = fsi->root_start_sector + num_root_sectors;
+
+ fsi->num_sectors = get_unaligned_le16(p_bpb->num_sectors);
+ if (!fsi->num_sectors)
+ fsi->num_sectors = le32_to_cpu(p_bpb->num_huge_sectors);
+
+ if (!fsi->num_sectors) {
+ sdfat_msg(sb, KERN_ERR, "bogus number of total sector count");
+ return -EINVAL;
+ }
+
+ fsi->num_clusters = (u32)((fsi->num_sectors - fsi->data_start_sector) >> fsi->sect_per_clus_bits) + CLUS_BASE;
+ /* because the cluster index starts with 2 */
+
+ fsi->vol_type = FAT16;
+ if (fsi->num_clusters < FAT12_THRESHOLD)
+ fsi->vol_type = FAT12;
+
+ fsi->vol_id = get_unaligned_le32(p_bpb->vol_serial);
+
+ fsi->root_dir = 0;
+ fsi->dentries_in_root = get_unaligned_le16(p_bpb->num_root_entries);
+ if (!fsi->dentries_in_root) {
+ sdfat_msg(sb, KERN_ERR, "bogus number of max dentry count "
+ "of the root directory");
+ return -EINVAL;
+ }
+
+ fsi->dentries_per_clu = 1 << (fsi->cluster_size_bits - DENTRY_SIZE_BITS);
+
+ fsi->vol_flag = VOL_CLEAN;
+ fsi->clu_srch_ptr = 2;
+ fsi->used_clusters = (u32) ~0;
+
+ fsi->fs_func = &fat_fs_func;
+ fat_ent_ops_init(sb);
+
+ if (p_bpb->state & FAT_VOL_DIRTY) {
+ fsi->vol_flag |= VOL_DIRTY;
+ sdfat_log_msg(sb, KERN_WARNING, "Volume was not properly "
+ "unmounted. Some data may be corrupt. "
+ "Please run fsck.");
+ }
+
+ return 0;
+} /* end of mount_fat16 */
+
+static sector_t __calc_hidden_sect(struct super_block *sb)
+{
+ struct block_device *bdev = sb->s_bdev;
+ sector_t hidden = 0;
+
+ if (!bdev)
+ goto out;
+
+ hidden = bdev->bd_part->start_sect;
+ /* a disk device, not a partition */
+ if (!hidden) {
+ if (bdev != bdev->bd_contains)
+ sdfat_log_msg(sb, KERN_WARNING,
+ "hidden(0), but disk has a partition table");
+ goto out;
+ }
+
+ if (sb->s_blocksize_bits != 9) {
+ ASSERT(sb->s_blocksize_bits > 9);
+ hidden >>= (sb->s_blocksize_bits - 9);
+ }
+
+out:
+ sdfat_log_msg(sb, KERN_INFO, "start_sect of part(%d) : %lld",
+ bdev ? bdev->bd_part->partno : -1, (s64)hidden);
+ return hidden;
+
+}
+
+s32 mount_fat32(struct super_block *sb, pbr_t *p_pbr)
+{
+ pbr32_t *p_bpb = (pbr32_t *)p_pbr;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (!p_bpb->bpb.num_fats) {
+ sdfat_msg(sb, KERN_ERR, "bogus number of FAT structure");
+ return -EINVAL;
+ }
+
+ fsi->sect_per_clus = p_bpb->bpb.sect_per_clus;
+ fsi->sect_per_clus_bits = ilog2(p_bpb->bpb.sect_per_clus);
+ fsi->cluster_size_bits = fsi->sect_per_clus_bits + sb->s_blocksize_bits;
+ fsi->cluster_size = 1 << fsi->cluster_size_bits;
+
+ fsi->num_FAT_sectors = le32_to_cpu(p_bpb->bpb.num_fat32_sectors);
+
+ fsi->FAT1_start_sector = le16_to_cpu(p_bpb->bpb.num_reserved);
+ if (p_bpb->bpb.num_fats == 1)
+ fsi->FAT2_start_sector = fsi->FAT1_start_sector;
+ else
+ fsi->FAT2_start_sector = fsi->FAT1_start_sector + fsi->num_FAT_sectors;
+
+ fsi->root_start_sector = fsi->FAT2_start_sector + fsi->num_FAT_sectors;
+ fsi->data_start_sector = fsi->root_start_sector;
+
+ /* SPEC violation for compatibility */
+ fsi->num_sectors = get_unaligned_le16(p_bpb->bpb.num_sectors);
+ if (!fsi->num_sectors)
+ fsi->num_sectors = le32_to_cpu(p_bpb->bpb.num_huge_sectors);
+
+ /* 2nd check */
+ if (!fsi->num_sectors) {
+ sdfat_msg(sb, KERN_ERR, "bogus number of total sector count");
+ return -EINVAL;
+ }
+
+ fsi->num_clusters = (u32)((fsi->num_sectors - fsi->data_start_sector) >> fsi->sect_per_clus_bits) + CLUS_BASE;
+ /* because the cluster index starts with 2 */
+
+ fsi->vol_type = FAT32;
+ fsi->vol_id = get_unaligned_le32(p_bpb->bsx.vol_serial);
+
+ fsi->root_dir = le32_to_cpu(p_bpb->bpb.root_cluster);
+ fsi->dentries_in_root = 0;
+ fsi->dentries_per_clu = 1 << (fsi->cluster_size_bits - DENTRY_SIZE_BITS);
+
+ fsi->vol_flag = VOL_CLEAN;
+ fsi->clu_srch_ptr = 2;
+ fsi->used_clusters = (u32) ~0;
+
+ fsi->fs_func = &fat_fs_func;
+
+ /* Delayed / smart allocation related init */
+ fsi->reserved_clusters = 0;
+
+ /* Should be initialized before calling amap_create() */
+ fat_ent_ops_init(sb);
+
+ /* AU Map Creation */
+ if (SDFAT_SB(sb)->options.improved_allocation & SDFAT_ALLOC_SMART) {
+ u32 hidden_sectors = le32_to_cpu(p_bpb->bpb.num_hid_sectors);
+ u32 calc_hid_sect = 0;
+ int ret;
+
+
+ /* calculate hidden sector size */
+ calc_hid_sect = __calc_hidden_sect(sb);
+ if (calc_hid_sect != hidden_sectors) {
+ sdfat_log_msg(sb, KERN_WARNING, "abnormal hidden "
+ "sector : bpb(%u) != ondisk(%u)",
+ hidden_sectors, calc_hid_sect);
+ if (SDFAT_SB(sb)->options.adj_hidsect) {
+ sdfat_log_msg(sb, KERN_INFO,
+ "adjustment hidden sector : "
+ "bpb(%u) -> ondisk(%u)",
+ hidden_sectors, calc_hid_sect);
+ hidden_sectors = calc_hid_sect;
+ }
+ }
+
+ SDFAT_SB(sb)->options.amap_opt.misaligned_sect = hidden_sectors;
+
+ /* calculate AU size if it's not set */
+ if (!SDFAT_SB(sb)->options.amap_opt.sect_per_au) {
+ SDFAT_SB(sb)->options.amap_opt.sect_per_au =
+ __calc_default_au_size(sb);
+ }
+
+ ret = amap_create(sb,
+ SDFAT_SB(sb)->options.amap_opt.pack_ratio,
+ SDFAT_SB(sb)->options.amap_opt.sect_per_au,
+ SDFAT_SB(sb)->options.amap_opt.misaligned_sect);
+ if (ret) {
+ sdfat_log_msg(sb, KERN_WARNING, "failed to create AMAP."
+ " disabling smart allocation. (err:%d)", ret);
+ SDFAT_SB(sb)->options.improved_allocation &= ~(SDFAT_ALLOC_SMART);
+ } else {
+ fsi->fs_func = &amap_fat_fs_func;
+ }
+ }
+
+ /* Check dependency of mount options */
+ if (SDFAT_SB(sb)->options.improved_allocation !=
+ (SDFAT_ALLOC_DELAY | SDFAT_ALLOC_SMART)) {
+ sdfat_log_msg(sb, KERN_INFO, "disabling defragmentation because"
+ " smart, delay options are disabled");
+ SDFAT_SB(sb)->options.defrag = 0;
+ }
+
+ if (p_bpb->bsx.state & FAT_VOL_DIRTY) {
+ fsi->vol_flag |= VOL_DIRTY;
+ sdfat_log_msg(sb, KERN_WARNING, "Volume was not properly "
+ "unmounted. Some data may be corrupt. "
+ "Please run fsck.");
+ }
+
+ return 0;
+} /* end of mount_fat32 */
+
+/* end of core_fat.c */
diff --git a/fs/sdfat/dfr.c b/fs/sdfat/dfr.c
new file mode 100644
index 000000000000..abb4710b4340
--- /dev/null
+++ b/fs/sdfat/dfr.c
@@ -0,0 +1,1372 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* @PROJECT : exFAT & FAT12/16/32 File System */
+/* @FILE : dfr.c */
+/* @PURPOSE : Defragmentation support for SDFAT32 */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/version.h>
+#include <linux/list.h>
+#include <linux/blkdev.h>
+
+#include "sdfat.h"
+#include "core.h"
+#include "amap_smart.h"
+
+#ifdef CONFIG_SDFAT_DFR
+/**
+ * @fn defrag_get_info
+ * @brief get HW params for defrag daemon
+ * @return 0 on success, -errno otherwise
+ * @param sb super block
+ * @param arg defrag info arguments
+ * @remark protected by super_block
+ */
+int
+defrag_get_info(
+ IN struct super_block *sb,
+ OUT struct defrag_info_arg *arg)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+
+ if (!arg)
+ return -EINVAL;
+
+ arg->sec_sz = sb->s_blocksize;
+ arg->clus_sz = fsi->cluster_size;
+ arg->total_sec = fsi->num_sectors;
+ arg->fat_offset_sec = fsi->FAT1_start_sector;
+ arg->fat_sz_sec = fsi->num_FAT_sectors;
+ arg->n_fat = (fsi->FAT1_start_sector == fsi->FAT2_start_sector) ? 1 : 2;
+
+ arg->sec_per_au = amap->option.au_size;
+ arg->hidden_sectors = amap->option.au_align_factor % amap->option.au_size;
+
+ return 0;
+}
+
+
+static int
+__defrag_scan_dir(
+ IN struct super_block *sb,
+ IN DOS_DENTRY_T *dos_ep,
+ IN loff_t i_pos,
+ OUT struct defrag_trav_arg *arg)
+{
+ FS_INFO_T *fsi = NULL;
+ UNI_NAME_T uniname;
+ unsigned int type = 0, start_clus = 0;
+ int err = -EPERM;
+
+ /* Check params */
+ ERR_HANDLE2((!sb || !dos_ep || !i_pos || !arg), err, -EINVAL);
+ fsi = &(SDFAT_SB(sb)->fsi);
+
+ /* Get given entry's type */
+ type = fsi->fs_func->get_entry_type((DENTRY_T *) dos_ep);
+
+ /* Check dos_ep */
+ if (!strncmp(dos_ep->name, DOS_CUR_DIR_NAME, DOS_NAME_LENGTH)) {
+ ;
+ } else if (!strncmp(dos_ep->name, DOS_PAR_DIR_NAME, DOS_NAME_LENGTH)) {
+ ;
+ } else if ((type == TYPE_DIR) || (type == TYPE_FILE)) {
+
+ /* Set start_clus */
+ SET32_HI(start_clus, le16_to_cpu(dos_ep->start_clu_hi));
+ SET32_LO(start_clus, le16_to_cpu(dos_ep->start_clu_lo));
+ arg->start_clus = start_clus;
+
+ /* Set type & i_pos */
+ if (type == TYPE_DIR)
+ arg->type = DFR_TRAV_TYPE_DIR;
+ else
+ arg->type = DFR_TRAV_TYPE_FILE;
+
+ arg->i_pos = i_pos;
+
+ /* Set name */
+ memset(&uniname, 0, sizeof(UNI_NAME_T));
+ get_uniname_from_dos_entry(sb, dos_ep, &uniname, 0x1);
+ /* FIXME :
+ * we should think that whether the size of arg->name
+ * is enough or not
+ */
+ nls_uni16s_to_vfsname(sb, &uniname,
+ arg->name, sizeof(arg->name));
+
+ err = 0;
+ /* End case */
+ } else if (type == TYPE_UNUSED) {
+ err = -ENOENT;
+ } else {
+ ;
+ }
+
+error:
+ return err;
+}
+
+
+/**
+ * @fn defrag_scan_dir
+ * @brief scan given directory
+ * @return 0 on success, -errno otherwise
+ * @param sb super block
+ * @param args traverse args
+ * @remark protected by inode_lock, super_block and volume lock
+ */
+int
+defrag_scan_dir(
+ IN struct super_block *sb,
+ INOUT struct defrag_trav_arg *args)
+{
+ struct sdfat_sb_info *sbi = NULL;
+ FS_INFO_T *fsi = NULL;
+ struct defrag_trav_header *header = NULL;
+ DOS_DENTRY_T *dos_ep;
+ CHAIN_T chain;
+ int dot_found = 0, args_idx = DFR_TRAV_HEADER_IDX + 1, clus = 0, index = 0;
+ int err = 0, j = 0;
+
+ /* Check params */
+ ERR_HANDLE2((!sb || !args), err, -EINVAL);
+ sbi = SDFAT_SB(sb);
+ fsi = &(sbi->fsi);
+ header = (struct defrag_trav_header *) args;
+
+ /* Exceptional case for ROOT */
+ if (header->i_pos == DFR_TRAV_ROOT_IPOS) {
+ header->start_clus = fsi->root_dir;
+ dfr_debug("IOC_DFR_TRAV for ROOT: start_clus %08x", header->start_clus);
+ dot_found = 1;
+ }
+
+ chain.dir = header->start_clus;
+ chain.size = 0;
+ chain.flags = 0;
+
+ /* Check if this is directory */
+ if (!dot_found) {
+ FAT32_CHECK_CLUSTER(fsi, chain.dir, err);
+ ERR_HANDLE(err);
+ dos_ep = (DOS_DENTRY_T *) get_dentry_in_dir(sb, &chain, 0, NULL);
+ ERR_HANDLE2(!dos_ep, err, -EIO);
+
+ if (strncmp(dos_ep->name, DOS_CUR_DIR_NAME, DOS_NAME_LENGTH)) {
+ err = -EINVAL;
+ dfr_err("Scan: Not a directory, err %d", err);
+ goto error;
+ }
+ }
+
+ /* For more-scan case */
+ if ((header->stat == DFR_TRAV_STAT_MORE) &&
+ (header->start_clus == sbi->dfr_hint_clus) &&
+ (sbi->dfr_hint_idx > 0)) {
+
+ index = sbi->dfr_hint_idx;
+ for (j = 0; j < (sbi->dfr_hint_idx / fsi->dentries_per_clu); j++) {
+ /* Follow FAT-chain */
+ FAT32_CHECK_CLUSTER(fsi, chain.dir, err);
+ ERR_HANDLE(err);
+ err = fat_ent_get(sb, chain.dir, &(chain.dir));
+ ERR_HANDLE(err);
+
+ if (!IS_CLUS_EOF(chain.dir)) {
+ clus++;
+ index -= fsi->dentries_per_clu;
+ } else {
+ /**
+ * This directory modified. Stop scanning.
+ */
+ err = -EINVAL;
+ dfr_err("Scan: SCAN_MORE failed, err %d", err);
+ goto error;
+ }
+ }
+
+ /* For first-scan case */
+ } else {
+ clus = 0;
+ index = 0;
+ }
+
+scan_fat_chain:
+ /* Scan given directory and get info of children */
+ for ( ; index < fsi->dentries_per_clu; index++) {
+ DOS_DENTRY_T *dos_ep = NULL;
+ loff_t i_pos = 0;
+
+ /* Get dos_ep */
+ FAT32_CHECK_CLUSTER(fsi, chain.dir, err);
+ ERR_HANDLE(err);
+ dos_ep = (DOS_DENTRY_T *) get_dentry_in_dir(sb, &chain, index, NULL);
+ ERR_HANDLE2(!dos_ep, err, -EIO);
+
+ /* Make i_pos for this entry */
+ SET64_HI(i_pos, header->start_clus);
+ SET64_LO(i_pos, clus * fsi->dentries_per_clu + index);
+
+ err = __defrag_scan_dir(sb, dos_ep, i_pos, &args[args_idx]);
+ if (!err) {
+ /* More-scan case */
+ if (++args_idx >= (PAGE_SIZE / sizeof(struct defrag_trav_arg))) {
+ sbi->dfr_hint_clus = header->start_clus;
+ sbi->dfr_hint_idx = clus * fsi->dentries_per_clu + index + 1;
+
+ header->stat = DFR_TRAV_STAT_MORE;
+ header->nr_entries = args_idx;
+ goto error;
+ }
+ /* Error case */
+ } else if (err == -EINVAL) {
+ sbi->dfr_hint_clus = sbi->dfr_hint_idx = 0;
+ dfr_err("Scan: err %d", err);
+ goto error;
+ /* End case */
+ } else if (err == -ENOENT) {
+ sbi->dfr_hint_clus = sbi->dfr_hint_idx = 0;
+ err = 0;
+ goto done;
+ } else {
+ /* DO NOTHING */
+ }
+ err = 0;
+ }
+
+ /* Follow FAT-chain */
+ FAT32_CHECK_CLUSTER(fsi, chain.dir, err);
+ ERR_HANDLE(err);
+ err = fat_ent_get(sb, chain.dir, &(chain.dir));
+ ERR_HANDLE(err);
+
+ if (!IS_CLUS_EOF(chain.dir)) {
+ index = 0;
+ clus++;
+ goto scan_fat_chain;
+ }
+
+done:
+ /* Update header */
+ header->stat = DFR_TRAV_STAT_DONE;
+ header->nr_entries = args_idx;
+
+error:
+ return err;
+}
+
+
+static int
+__defrag_validate_cluster_prev(
+ IN struct super_block *sb,
+ IN struct defrag_chunk_info *chunk)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ CHAIN_T dir;
+ DENTRY_T *ep = NULL;
+ unsigned int entry = 0, clus = 0;
+ int err = 0;
+
+ if (chunk->prev_clus == 0) {
+ /* For the first cluster of a file */
+ dir.dir = GET64_HI(chunk->i_pos);
+ dir.flags = 0x1; // Assume non-continuous
+
+ entry = GET64_LO(chunk->i_pos);
+
+ FAT32_CHECK_CLUSTER(fsi, dir.dir, err);
+ ERR_HANDLE(err);
+ ep = get_dentry_in_dir(sb, &dir, entry, NULL);
+ if (!ep) {
+ err = -EPERM;
+ goto error;
+ }
+
+ /* should call fat_get_entry_clu0(ep) */
+ clus = fsi->fs_func->get_entry_clu0(ep);
+ if (clus != chunk->d_clus) {
+ err = -ENXIO;
+ goto error;
+ }
+ } else {
+ /* Normal case */
+ FAT32_CHECK_CLUSTER(fsi, chunk->prev_clus, err);
+ ERR_HANDLE(err);
+ err = fat_ent_get(sb, chunk->prev_clus, &clus);
+ if (err)
+ goto error;
+ if (chunk->d_clus != clus)
+ err = -ENXIO;
+ }
+
+error:
+ return err;
+}
+
+
+static int
+__defrag_validate_cluster_next(
+ IN struct super_block *sb,
+ IN struct defrag_chunk_info *chunk)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ unsigned int clus = 0;
+ int err = 0;
+
+ /* Check next_clus */
+ FAT32_CHECK_CLUSTER(fsi, (chunk->d_clus + chunk->nr_clus - 1), err);
+ ERR_HANDLE(err);
+ err = fat_ent_get(sb, (chunk->d_clus + chunk->nr_clus - 1), &clus);
+ if (err)
+ goto error;
+ if (chunk->next_clus != (clus & FAT32_EOF))
+ err = -ENXIO;
+
+error:
+ return err;
+}
+
+
+/**
+ * @fn __defrag_check_au
+ * @brief check if this AU is in use
+ * @return 0 if idle, 1 if busy
+ * @param sb super block
+ * @param clus physical cluster num
+ * @param limit # of used clusters from daemon
+ */
+static int
+__defrag_check_au(
+ struct super_block *sb,
+ u32 clus,
+ u32 limit)
+{
+ unsigned int nr_free = amap_get_freeclus(sb, clus);
+
+#if defined(CONFIG_SDFAT_DFR_DEBUG) && defined(CONFIG_SDFAT_DBG_MSG)
+ if (nr_free < limit) {
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ AU_INFO_T *au = GET_AU(amap, i_AU_of_CLU(amap, clus));
+
+ dfr_debug("AU[%d] nr_free %d, limit %d", au->idx, nr_free, limit);
+ }
+#endif
+ return ((nr_free < limit) ? 1 : 0);
+}
+
+
+/**
+ * @fn defrag_validate_cluster
+ * @brief validate cluster info of given chunk
+ * @return 0 on success, -errno otherwise
+ * @param inode inode of given chunk
+ * @param chunk given chunk
+ * @param skip_prev flag to skip checking previous cluster info
+ * @remark protected by super_block and volume lock
+ */
+int
+defrag_validate_cluster(
+ IN struct inode *inode,
+ IN struct defrag_chunk_info *chunk,
+ IN int skip_prev)
+{
+ struct super_block *sb = inode->i_sb;
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ unsigned int clus = 0;
+ int err = 0, i = 0;
+
+ /* If this inode is unlink-ed, skip it */
+ if (fid->dir.dir == DIR_DELETED)
+ return -ENOENT;
+
+ /* Skip working-AU */
+ err = amap_check_working(sb, chunk->d_clus);
+ if (err)
+ return -EBUSY;
+
+ /* Check # of free_clus of belonged AU */
+ err = __defrag_check_au(inode->i_sb, chunk->d_clus, CLUS_PER_AU(sb) - chunk->au_clus);
+ if (err)
+ return -EINVAL;
+
+ /* Check chunk's clusters */
+ for (i = 0; i < chunk->nr_clus; i++) {
+ err = fsapi_map_clus(inode, chunk->f_clus + i, &clus, ALLOC_NOWHERE);
+ if (err || (chunk->d_clus + i != clus)) {
+ if (!err)
+ err = -ENXIO;
+ goto error;
+ }
+ }
+
+ /* Check next_clus */
+ err = __defrag_validate_cluster_next(sb, chunk);
+ ERR_HANDLE(err);
+
+ if (!skip_prev) {
+ /* Check prev_clus */
+ err = __defrag_validate_cluster_prev(sb, chunk);
+ ERR_HANDLE(err);
+ }
+
+error:
+ return err;
+}
+
+
+/**
+ * @fn defrag_reserve_clusters
+ * @brief reserve clusters for defrag
+ * @return 0 on success, -errno otherwise
+ * @param sb super block
+ * @param nr_clus # of clusters to reserve
+ * @remark protected by super_block and volume lock
+ */
+int
+defrag_reserve_clusters(
+ INOUT struct super_block *sb,
+ IN int nr_clus)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+
+ if (!(sbi->options.improved_allocation & SDFAT_ALLOC_DELAY))
+ /* Nothing to do */
+ return 0;
+
+ /* Check error case */
+ if (fsi->used_clusters + fsi->reserved_clusters + nr_clus >= fsi->num_clusters - 2) {
+ return -ENOSPC;
+ } else if (fsi->reserved_clusters + nr_clus < 0) {
+ dfr_err("Reserve count: reserved_clusters %d, nr_clus %d",
+ fsi->reserved_clusters, nr_clus);
+ BUG_ON(fsi->reserved_clusters + nr_clus < 0);
+ }
+
+ sbi->dfr_reserved_clus += nr_clus;
+ fsi->reserved_clusters += nr_clus;
+
+ return 0;
+}
+
+
+/**
+ * @fn defrag_mark_ignore
+ * @brief mark corresponding AU to be ignored
+ * @return 0 on success, -errno otherwise
+ * @param sb super block
+ * @param clus given cluster num
+ * @remark protected by super_block
+ */
+int
+defrag_mark_ignore(
+ INOUT struct super_block *sb,
+ IN unsigned int clus)
+{
+ int err = 0;
+
+ if (SDFAT_SB(sb)->options.improved_allocation & SDFAT_ALLOC_SMART)
+ err = amap_mark_ignore(sb, clus);
+
+ if (err)
+ dfr_debug("err %d", err);
+ return err;
+}
+
+
+/**
+ * @fn defrag_unmark_ignore_all
+ * @brief unmark all ignored AUs
+ * @return void
+ * @param sb super block
+ * @remark protected by super_block
+ */
+void
+defrag_unmark_ignore_all(struct super_block *sb)
+{
+ if (SDFAT_SB(sb)->options.improved_allocation & SDFAT_ALLOC_SMART)
+ amap_unmark_ignore_all(sb);
+}
+
+
+/**
+ * @fn defrag_map_cluster
+ * @brief get_block function for defrag dests
+ * @return 0 on success, -errno otherwise
+ * @param inode inode
+ * @param clu_offset logical cluster offset
+ * @param clu mapped cluster (physical)
+ * @remark protected by super_block and volume lock
+ */
+int
+defrag_map_cluster(
+ struct inode *inode,
+ unsigned int clu_offset,
+ unsigned int *clu)
+{
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+#ifdef CONFIG_SDFAT_DFR_PACKING
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+#endif
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ struct defrag_info *ino_dfr = &(SDFAT_I(inode)->dfr_info);
+ struct defrag_chunk_info *chunk = NULL;
+ CHAIN_T new_clu;
+ int i = 0, nr_new = 0, err = 0;
+
+ /* Get corresponding chunk */
+ for (i = 0; i < ino_dfr->nr_chunks; i++) {
+ chunk = &(ino_dfr->chunks[i]);
+
+ if ((chunk->f_clus <= clu_offset) && (clu_offset < chunk->f_clus + chunk->nr_clus)) {
+ /* For already allocated new_clus */
+ if (sbi->dfr_new_clus[chunk->new_idx + clu_offset - chunk->f_clus]) {
+ *clu = sbi->dfr_new_clus[chunk->new_idx + clu_offset - chunk->f_clus];
+ return 0;
+ }
+ break;
+ }
+ }
+ BUG_ON(!chunk);
+
+ fscore_set_vol_flags(sb, VOL_DIRTY, 0);
+
+ new_clu.dir = CLUS_EOF;
+ new_clu.size = 0;
+ new_clu.flags = fid->flags;
+
+ /* Allocate new cluster */
+#ifdef CONFIG_SDFAT_DFR_PACKING
+ if (amap->n_clean_au * DFR_FULL_RATIO <= amap->n_au * DFR_DEFAULT_PACKING_RATIO)
+ err = fsi->fs_func->alloc_cluster(sb, 1, &new_clu, ALLOC_COLD_PACKING);
+ else
+ err = fsi->fs_func->alloc_cluster(sb, 1, &new_clu, ALLOC_COLD_ALIGNED);
+#else
+ err = fsi->fs_func->alloc_cluster(sb, 1, &new_clu, ALLOC_COLD_ALIGNED);
+#endif
+
+ if (err) {
+ dfr_err("Map: 1 %d", 0);
+ return err;
+ }
+
+ /* Decrease reserved cluster count */
+ defrag_reserve_clusters(sb, -1);
+
+ /* Add new_clus info in ino_dfr */
+ sbi->dfr_new_clus[chunk->new_idx + clu_offset - chunk->f_clus] = new_clu.dir;
+
+ /* Make FAT-chain for new_clus */
+ for (i = 0; i < chunk->nr_clus; i++) {
+#if 0
+ if (sbi->dfr_new_clus[chunk->new_idx + i])
+ nr_new++;
+ else
+ break;
+#else
+ if (!sbi->dfr_new_clus[chunk->new_idx + i])
+ break;
+ nr_new++;
+#endif
+ }
+ if (nr_new == chunk->nr_clus) {
+ for (i = 0; i < chunk->nr_clus - 1; i++) {
+ FAT32_CHECK_CLUSTER(fsi, sbi->dfr_new_clus[chunk->new_idx + i], err);
+ BUG_ON(err);
+ if (fat_ent_set(sb,
+ sbi->dfr_new_clus[chunk->new_idx + i],
+ sbi->dfr_new_clus[chunk->new_idx + i + 1]))
+ return -EIO;
+ }
+ }
+
+ *clu = new_clu.dir;
+ return 0;
+}
+
+
+/**
+ * @fn defrag_writepage_end_io
+ * @brief check WB status of requested page
+ * @return void
+ * @param page page
+ */
+void
+defrag_writepage_end_io(
+ INOUT struct page *page)
+{
+ struct super_block *sb = page->mapping->host->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct defrag_info *ino_dfr = &(SDFAT_I(page->mapping->host)->dfr_info);
+ unsigned int clus_start = 0, clus_end = 0;
+ int i = 0;
+
+ /* Check if this inode is on defrag */
+ if (atomic_read(&ino_dfr->stat) != DFR_INO_STAT_REQ)
+ return;
+
+ clus_start = page->index / PAGES_PER_CLUS(sb);
+ clus_end = clus_start + 1;
+
+ /* Check each chunk in given inode */
+ for (i = 0; i < ino_dfr->nr_chunks; i++) {
+ struct defrag_chunk_info *chunk = &(ino_dfr->chunks[i]);
+ unsigned int chunk_start = 0, chunk_end = 0;
+
+ chunk_start = chunk->f_clus;
+ chunk_end = chunk->f_clus + chunk->nr_clus;
+
+ if ((clus_start >= chunk_start) && (clus_end <= chunk_end)) {
+ int off = clus_start - chunk_start;
+
+ clear_bit((page->index & (PAGES_PER_CLUS(sb) - 1)),
+ (volatile unsigned long *)&(sbi->dfr_page_wb[chunk->new_idx + off]));
+ }
+ }
+}
+
+
+/**
+ * @fn __defrag_check_wb
+ * @brief check if WB for given chunk completed
+ * @return 0 on success, -errno otherwise
+ * @param sbi super block info
+ * @param chunk given chunk
+ */
+static int
+__defrag_check_wb(
+ IN struct sdfat_sb_info *sbi,
+ IN struct defrag_chunk_info *chunk)
+{
+ int err = 0, wb_i = 0, i = 0, nr_new = 0;
+
+ if (!sbi || !chunk)
+ return -EINVAL;
+
+ /* Check WB complete status first */
+ for (wb_i = 0; wb_i < chunk->nr_clus; wb_i++) {
+ if (atomic_read((atomic_t *)&(sbi->dfr_page_wb[chunk->new_idx + wb_i]))) {
+ err = -EBUSY;
+ break;
+ }
+ }
+
+ /**
+ * Check NEW_CLUS status.
+ * writepage_end_io cannot check whole WB complete status,
+ * so we need to check NEW_CLUS status.
+ */
+ for (i = 0; i < chunk->nr_clus; i++)
+ if (sbi->dfr_new_clus[chunk->new_idx + i])
+ nr_new++;
+
+ if (nr_new == chunk->nr_clus) {
+ err = 0;
+ if ((wb_i != chunk->nr_clus) && (wb_i != chunk->nr_clus - 1))
+ dfr_debug("submit_fullpage_bio() called on a page (nr_clus %d, wb_i %d)",
+ chunk->nr_clus, wb_i);
+
+ BUG_ON(nr_new > chunk->nr_clus);
+ } else {
+ dfr_debug("nr_new %d, nr_clus %d", nr_new, chunk->nr_clus);
+ err = -EBUSY;
+ }
+
+ /* Update chunk's state */
+ if (!err)
+ chunk->stat |= DFR_CHUNK_STAT_WB;
+
+ return err;
+}
+
+
+static void
+__defrag_check_fat_old(
+ IN struct super_block *sb,
+ IN struct inode *inode,
+ IN struct defrag_chunk_info *chunk)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ unsigned int clus = 0;
+ int err = 0, idx = 0, max_idx = 0;
+
+ /* Get start_clus */
+ clus = SDFAT_I(inode)->fid.start_clu;
+
+ /* Follow FAT-chain */
+ #define num_clusters(val) ((val) ? (s32)((val - 1) >> fsi->cluster_size_bits) + 1 : 0)
+ max_idx = num_clusters(SDFAT_I(inode)->i_size_ondisk);
+ for (idx = 0; idx < max_idx; idx++) {
+
+ FAT32_CHECK_CLUSTER(fsi, clus, err);
+ ERR_HANDLE(err);
+ err = fat_ent_get(sb, clus, &clus);
+ ERR_HANDLE(err);
+
+ if ((idx < max_idx - 1) && (IS_CLUS_EOF(clus) || IS_CLUS_FREE(clus))) {
+ dfr_err("FAT: inode %p, max_idx %d, idx %d, clus %08x, "
+ "f_clus %d, nr_clus %d", inode, max_idx,
+ idx, clus, chunk->f_clus, chunk->nr_clus);
+ BUG_ON(idx < max_idx - 1);
+ goto error;
+ }
+ }
+
+error:
+ return;
+}
+
+
+static void
+__defrag_check_fat_new(
+ IN struct super_block *sb,
+ IN struct inode *inode,
+ IN struct defrag_chunk_info *chunk)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ unsigned int clus = 0;
+ int i = 0, err = 0;
+
+ /* Check start of FAT-chain */
+ if (chunk->prev_clus) {
+ FAT32_CHECK_CLUSTER(fsi, chunk->prev_clus, err);
+ BUG_ON(err);
+ err = fat_ent_get(sb, chunk->prev_clus, &clus);
+ BUG_ON(err);
+ } else {
+ clus = SDFAT_I(inode)->fid.start_clu;
+ }
+ if (sbi->dfr_new_clus[chunk->new_idx] != clus) {
+ dfr_err("FAT: inode %p, start_clus %08x, read_clus %08x",
+ inode, sbi->dfr_new_clus[chunk->new_idx], clus);
+ err = EIO;
+ goto error;
+ }
+
+ /* Check inside of FAT-chain */
+ if (chunk->nr_clus > 1) {
+ for (i = 0; i < chunk->nr_clus - 1; i++) {
+ FAT32_CHECK_CLUSTER(fsi, sbi->dfr_new_clus[chunk->new_idx + i], err);
+ BUG_ON(err);
+ err = fat_ent_get(sb, sbi->dfr_new_clus[chunk->new_idx + i], &clus);
+ BUG_ON(err);
+ if (sbi->dfr_new_clus[chunk->new_idx + i + 1] != clus) {
+ dfr_err("FAT: inode %p, new_clus %08x, read_clus %08x",
+ inode, sbi->dfr_new_clus[chunk->new_idx], clus);
+ err = EIO;
+ goto error;
+ }
+ }
+ clus = 0;
+ }
+
+ /* Check end of FAT-chain */
+ FAT32_CHECK_CLUSTER(fsi, sbi->dfr_new_clus[chunk->new_idx + chunk->nr_clus - 1], err);
+ BUG_ON(err);
+ err = fat_ent_get(sb, sbi->dfr_new_clus[chunk->new_idx + chunk->nr_clus - 1], &clus);
+ BUG_ON(err);
+ if ((chunk->next_clus & 0x0FFFFFFF) != (clus & 0x0FFFFFFF)) {
+ dfr_err("FAT: inode %p, next_clus %08x, read_clus %08x", inode, chunk->next_clus, clus);
+ err = EIO;
+ }
+
+error:
+ BUG_ON(err);
+}
+
+
+/**
+ * @fn __defrag_update_dirent
+ * @brief update DIR entry for defrag req
+ * @return void
+ * @param sb super block
+ * @param chunk given chunk
+ */
+static void
+__defrag_update_dirent(
+ struct super_block *sb,
+ struct defrag_chunk_info *chunk)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &SDFAT_SB(sb)->fsi;
+ CHAIN_T dir;
+ DOS_DENTRY_T *dos_ep;
+ unsigned int entry = 0;
+ unsigned long long sector = 0;
+ unsigned short hi = 0, lo = 0;
+ int err = 0;
+
+ dir.dir = GET64_HI(chunk->i_pos);
+ dir.flags = 0x1; // Assume non-continuous
+
+ entry = GET64_LO(chunk->i_pos);
+
+ FAT32_CHECK_CLUSTER(fsi, dir.dir, err);
+ BUG_ON(err);
+ dos_ep = (DOS_DENTRY_T *) get_dentry_in_dir(sb, &dir, entry, &sector);
+
+ hi = GET32_HI(sbi->dfr_new_clus[chunk->new_idx]);
+ lo = GET32_LO(sbi->dfr_new_clus[chunk->new_idx]);
+
+ dos_ep->start_clu_hi = cpu_to_le16(hi);
+ dos_ep->start_clu_lo = cpu_to_le16(lo);
+
+ dcache_modify(sb, sector);
+}
+
+
+/**
+ * @fn defrag_update_fat_prev
+ * @brief update FAT chain for defrag requests
+ * @return void
+ * @param sb super block
+ * @param force flag to force FAT update
+ * @remark protected by super_block and volume lock
+ */
+void
+defrag_update_fat_prev(
+ struct super_block *sb,
+ int force)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+ struct defrag_info *sb_dfr = &sbi->dfr_info, *ino_dfr = NULL;
+ int skip = 0, done = 0;
+
+ /* Check if FS_ERROR occurred */
+ if (sb->s_flags & MS_RDONLY) {
+ dfr_err("RDONLY partition (err %d)", -EPERM);
+ goto out;
+ }
+
+ list_for_each_entry(ino_dfr, &sb_dfr->entry, entry) {
+ struct inode *inode = &(container_of(ino_dfr, struct sdfat_inode_info, dfr_info)->vfs_inode);
+ struct sdfat_inode_info *ino_info = SDFAT_I(inode);
+ struct defrag_chunk_info *chunk_prev = NULL;
+ int i = 0, j = 0;
+
+ mutex_lock(&ino_dfr->lock);
+ BUG_ON(atomic_read(&ino_dfr->stat) != DFR_INO_STAT_REQ);
+ for (i = 0; i < ino_dfr->nr_chunks; i++) {
+ struct defrag_chunk_info *chunk = NULL;
+ int err = 0;
+
+ chunk = &(ino_dfr->chunks[i]);
+ BUG_ON(!chunk);
+
+ /* Do nothing for already passed chunk */
+ if (chunk->stat == DFR_CHUNK_STAT_PASS) {
+ done++;
+ continue;
+ }
+
+ /* Handle error case */
+ if (chunk->stat == DFR_CHUNK_STAT_ERR) {
+ err = -EINVAL;
+ goto error;
+ }
+
+ /* Double-check clusters */
+ if (chunk_prev &&
+ (chunk->f_clus == chunk_prev->f_clus + chunk_prev->nr_clus) &&
+ (chunk_prev->stat == DFR_CHUNK_STAT_PASS)) {
+
+ err = defrag_validate_cluster(inode, chunk, 1);
+
+ /* Handle continuous chunks in a file */
+ if (!err) {
+ chunk->prev_clus =
+ sbi->dfr_new_clus[chunk_prev->new_idx + chunk_prev->nr_clus - 1];
+ dfr_debug("prev->f_clus %d, prev->nr_clus %d, chunk->f_clus %d",
+ chunk_prev->f_clus, chunk_prev->nr_clus, chunk->f_clus);
+ }
+ } else {
+ err = defrag_validate_cluster(inode, chunk, 0);
+ }
+
+ if (err) {
+ dfr_err("Cluster validation: inode %p, chunk->f_clus %d, err %d",
+ inode, chunk->f_clus, err);
+ goto error;
+ }
+
+ /**
+ * Skip update_fat_prev if WB or update_fat_next not completed.
+ * Go to error case if FORCE set.
+ */
+ if (__defrag_check_wb(sbi, chunk) || (chunk->stat != DFR_CHUNK_STAT_PREP)) {
+ if (force) {
+ err = -EPERM;
+ dfr_err("Skip case: inode %p, stat %x, f_clus %d, err %d",
+ inode, chunk->stat, chunk->f_clus, err);
+ goto error;
+ }
+ skip++;
+ continue;
+ }
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+ /* SPO test */
+ defrag_spo_test(sb, DFR_SPO_RANDOM, __func__);
+#endif
+
+ /* Update chunk's previous cluster */
+ if (chunk->prev_clus == 0) {
+ /* For the first cluster of a file */
+ /* Update ino_info->fid.start_clu */
+ ino_info->fid.start_clu = sbi->dfr_new_clus[chunk->new_idx];
+ __defrag_update_dirent(sb, chunk);
+ } else {
+ FAT32_CHECK_CLUSTER(fsi, chunk->prev_clus, err);
+ BUG_ON(err);
+ if (fat_ent_set(sb,
+ chunk->prev_clus,
+ sbi->dfr_new_clus[chunk->new_idx])) {
+ err = -EIO;
+ goto error;
+ }
+ }
+
+ /* Clear extent cache */
+ extent_cache_inval_inode(inode);
+
+ /* Update FID info */
+ ino_info->fid.hint_bmap.off = CLUS_EOF;
+ ino_info->fid.hint_bmap.clu = 0;
+
+ /* Clear old FAT-chain */
+ for (j = 0; j < chunk->nr_clus; j++)
+ defrag_free_cluster(sb, chunk->d_clus + j);
+
+ /* Mark this chunk PASS */
+ chunk->stat = DFR_CHUNK_STAT_PASS;
+ __defrag_check_fat_new(sb, inode, chunk);
+
+ done++;
+
+error:
+ if (err) {
+ /**
+ * chunk->new_idx != 0 means this chunk needs to be cleaned up
+ */
+ if (chunk->new_idx) {
+ /* Free already allocated clusters */
+ for (j = 0; j < chunk->nr_clus; j++) {
+ if (sbi->dfr_new_clus[chunk->new_idx + j]) {
+ defrag_free_cluster(sb, sbi->dfr_new_clus[chunk->new_idx + j]);
+ sbi->dfr_new_clus[chunk->new_idx + j] = 0;
+ }
+ }
+
+ __defrag_check_fat_old(sb, inode, chunk);
+ }
+
+ /**
+ * chunk->new_idx == 0 means this chunk already cleaned up
+ */
+ chunk->new_idx = 0;
+ chunk->stat = DFR_CHUNK_STAT_ERR;
+ }
+
+ chunk_prev = chunk;
+ }
+ BUG_ON(!mutex_is_locked(&ino_dfr->lock));
+ mutex_unlock(&ino_dfr->lock);
+ }
+
+out:
+ if (skip) {
+ dfr_debug("%s skipped (nr_reqs %d, done %d, skip %d)",
+ __func__, sb_dfr->nr_chunks - 1, done, skip);
+ } else {
+ /* Make dfr_reserved_clus zero */
+ if (sbi->dfr_reserved_clus > 0) {
+ if (fsi->reserved_clusters < sbi->dfr_reserved_clus) {
+ dfr_err("Reserved count: reserved_clus %d, dfr_reserved_clus %d",
+ fsi->reserved_clusters, sbi->dfr_reserved_clus);
+ BUG_ON(fsi->reserved_clusters < sbi->dfr_reserved_clus);
+ }
+
+ defrag_reserve_clusters(sb, 0 - sbi->dfr_reserved_clus);
+ }
+
+ dfr_debug("%s done (nr_reqs %d, done %d)", __func__, sb_dfr->nr_chunks - 1, done);
+ }
+}
+
+
+/**
+ * @fn defrag_update_fat_next
+ * @brief update FAT chain for defrag requests
+ * @return void
+ * @param sb super block
+ * @remark protected by super_block and volume lock
+ */
+void
+defrag_update_fat_next(
+ struct super_block *sb)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ struct defrag_info *sb_dfr = &sbi->dfr_info, *ino_dfr = NULL;
+ struct defrag_chunk_info *chunk = NULL;
+ int done = 0, i = 0, j = 0, err = 0;
+
+ /* Check if FS_ERROR occurred */
+ if (sb->s_flags & MS_RDONLY) {
+ dfr_err("RDONLY partition (err %d)", -EROFS);
+ goto out;
+ }
+
+ list_for_each_entry(ino_dfr, &sb_dfr->entry, entry) {
+
+ for (i = 0; i < ino_dfr->nr_chunks; i++) {
+ int skip = 0;
+
+ chunk = &(ino_dfr->chunks[i]);
+
+ /* Do nothing if error occurred or update_fat_next already passed */
+ if (chunk->stat == DFR_CHUNK_STAT_ERR)
+ continue;
+ if (chunk->stat & DFR_CHUNK_STAT_FAT) {
+ done++;
+ continue;
+ }
+
+ /* Ship this chunk if get_block not passed for this chunk */
+ for (j = 0; j < chunk->nr_clus; j++) {
+ if (sbi->dfr_new_clus[chunk->new_idx + j] == 0) {
+ skip = 1;
+ break;
+ }
+ }
+ if (skip)
+ continue;
+
+ /* Update chunk's next cluster */
+ FAT32_CHECK_CLUSTER(fsi,
+ sbi->dfr_new_clus[chunk->new_idx + chunk->nr_clus - 1], err);
+ BUG_ON(err);
+ if (fat_ent_set(sb,
+ sbi->dfr_new_clus[chunk->new_idx + chunk->nr_clus - 1],
+ chunk->next_clus))
+ goto out;
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+ /* SPO test */
+ defrag_spo_test(sb, DFR_SPO_RANDOM, __func__);
+#endif
+
+ /* Update chunk's state */
+ chunk->stat |= DFR_CHUNK_STAT_FAT;
+ done++;
+ }
+ }
+
+out:
+ dfr_debug("%s done (nr_reqs %d, done %d)", __func__, sb_dfr->nr_chunks - 1, done);
+}
+
+
+/**
+ * @fn defrag_check_discard
+ * @brief check if we can send discard for this AU, if so, send discard
+ * @return void
+ * @param sb super block
+ * @remark protected by super_block and volume lock
+ */
+void
+defrag_check_discard(
+ IN struct super_block *sb)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ AU_INFO_T *au = NULL;
+ struct defrag_info *sb_dfr = &(SDFAT_SB(sb)->dfr_info);
+ unsigned int tmp[DFR_MAX_AU_MOVED];
+ int i = 0, j = 0;
+
+ BUG_ON(!amap);
+
+ if (!(SDFAT_SB(sb)->options.discard) ||
+ !(SDFAT_SB(sb)->options.improved_allocation & SDFAT_ALLOC_SMART))
+ return;
+
+ memset(tmp, 0, sizeof(int) * DFR_MAX_AU_MOVED);
+
+ for (i = REQ_HEADER_IDX + 1; i < sb_dfr->nr_chunks; i++) {
+ struct defrag_chunk_info *chunk = &(sb_dfr->chunks[i]);
+ int skip = 0;
+
+ au = GET_AU(amap, i_AU_of_CLU(amap, chunk->d_clus));
+
+ /* Send DISCARD for free AU */
+ if ((IS_AU_IGNORED(au, amap)) &&
+ (amap_get_freeclus(sb, chunk->d_clus) == CLUS_PER_AU(sb))) {
+ sector_t blk = 0, nr_blks = 0;
+ unsigned int au_align_factor = amap->option.au_align_factor % amap->option.au_size;
+
+ BUG_ON(au->idx == 0);
+
+ /* Avoid multiple DISCARD */
+ for (j = 0; j < DFR_MAX_AU_MOVED; j++) {
+ if (tmp[j] == au->idx) {
+ skip = 1;
+ break;
+ }
+ }
+ if (skip == 1)
+ continue;
+
+ /* Send DISCARD cmd */
+ blk = (sector_t) (((au->idx * CLUS_PER_AU(sb)) << fsi->sect_per_clus_bits)
+ - au_align_factor);
+ nr_blks = ((sector_t)CLUS_PER_AU(sb)) << fsi->sect_per_clus_bits;
+
+ dfr_debug("Send DISCARD for AU[%d] (blk %08zx)", au->idx, blk);
+ sb_issue_discard(sb, blk, nr_blks, GFP_NOFS, 0);
+
+ /* Save previous AU's index */
+ for (j = 0; j < DFR_MAX_AU_MOVED; j++) {
+ if (!tmp[j]) {
+ tmp[j] = au->idx;
+ break;
+ }
+ }
+ }
+ }
+}
+
+
+/**
+ * @fn defrag_free_cluster
+ * @brief free uneccessary cluster
+ * @return void
+ * @param sb super block
+ * @param clus physical cluster num
+ * @remark protected by super_block and volume lock
+ */
+int
+defrag_free_cluster(
+ struct super_block *sb,
+ unsigned int clus)
+{
+ FS_INFO_T *fsi = &SDFAT_SB(sb)->fsi;
+ unsigned int val = 0;
+ s32 err = 0;
+
+ FAT32_CHECK_CLUSTER(fsi, clus, err);
+ BUG_ON(err);
+ if (fat_ent_get(sb, clus, &val))
+ return -EIO;
+ if (val) {
+ if (fat_ent_set(sb, clus, 0))
+ return -EIO;
+ } else {
+ dfr_err("Free: Already freed, clus %08x, val %08x", clus, val);
+ BUG_ON(!val);
+ }
+
+ set_sb_dirty(sb);
+ fsi->used_clusters--;
+ if (fsi->amap)
+ amap_release_cluster(sb, clus);
+
+ return 0;
+}
+
+
+/**
+ * @fn defrag_check_defrag_required
+ * @brief check if defrag required
+ * @return 1 if required, 0 otherwise
+ * @param sb super block
+ * @param totalau # of total AUs
+ * @param cleanau # of clean AUs
+ * @param fullau # of full AUs
+ * @remark protected by super_block
+ */
+int
+defrag_check_defrag_required(
+ IN struct super_block *sb,
+ OUT int *totalau,
+ OUT int *cleanau,
+ OUT int *fullau)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ AMAP_T *amap = NULL;
+ int clean_ratio = 0, frag_ratio = 0;
+ int ret = 0;
+
+ if (!sb || !(SDFAT_SB(sb)->options.defrag))
+ return 0;
+
+ /* Check DFR_DEFAULT_STOP_RATIO first */
+ fsi = &(SDFAT_SB(sb)->fsi);
+ if (fsi->used_clusters == (unsigned int)(~0)) {
+ if (fsi->fs_func->count_used_clusters(sb, &fsi->used_clusters))
+ return -EIO;
+ }
+ if (fsi->used_clusters * DFR_FULL_RATIO >= fsi->num_clusters * DFR_DEFAULT_STOP_RATIO) {
+ dfr_debug("used_clusters %d, num_clusters %d", fsi->used_clusters, fsi->num_clusters);
+ return 0;
+ }
+
+ /* Check clean/frag ratio */
+ amap = SDFAT_SB(sb)->fsi.amap;
+ BUG_ON(!amap);
+
+ clean_ratio = (amap->n_clean_au * 100) / amap->n_au;
+ if (amap->n_full_au)
+ frag_ratio = ((amap->n_au - amap->n_clean_au) * 100) / amap->n_full_au;
+ else
+ frag_ratio = ((amap->n_au - amap->n_clean_au) * 100) /
+ (fsi->used_clusters / CLUS_PER_AU(sb) + 1);
+
+ /*
+ * Wake-up defrag_daemon:
+ * when # of clean AUs too small, or frag_ratio exceeds the limit
+ */
+ if ((clean_ratio < DFR_DEFAULT_WAKEUP_RATIO) ||
+ ((clean_ratio < DFR_DEFAULT_CLEAN_RATIO) && (frag_ratio >= DFR_DEFAULT_FRAG_RATIO))) {
+
+ if (totalau)
+ *totalau = amap->n_au;
+ if (cleanau)
+ *cleanau = amap->n_clean_au;
+ if (fullau)
+ *fullau = amap->n_full_au;
+ ret = 1;
+ }
+
+ return ret;
+}
+
+
+/**
+ * @fn defrag_check_defrag_required
+ * @brief check defrag status on inode
+ * @return 1 if defrag in on, 0 otherwise
+ * @param inode inode
+ * @param start logical start addr
+ * @param end logical end addr
+ * @param cancel flag to cancel defrag
+ * @param caller caller info
+ */
+int
+defrag_check_defrag_on(
+ INOUT struct inode *inode,
+ IN loff_t start,
+ IN loff_t end,
+ IN int cancel,
+ IN const char *caller)
+{
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+ struct defrag_info *ino_dfr = &(SDFAT_I(inode)->dfr_info);
+ unsigned int clus_start = 0, clus_end = 0;
+ int ret = 0, i = 0;
+
+ if (!inode || (start == end))
+ return 0;
+
+ mutex_lock(&ino_dfr->lock);
+ /* Check if this inode is on defrag */
+ if (atomic_read(&ino_dfr->stat) == DFR_INO_STAT_REQ) {
+
+ clus_start = start >> (fsi->cluster_size_bits);
+ clus_end = (end >> (fsi->cluster_size_bits)) +
+ ((end & (fsi->cluster_size - 1)) ? 1 : 0);
+
+ if (!ino_dfr->chunks)
+ goto error;
+
+ /* Check each chunk in given inode */
+ for (i = 0; i < ino_dfr->nr_chunks; i++) {
+ struct defrag_chunk_info *chunk = &(ino_dfr->chunks[i]);
+ unsigned int chunk_start = 0, chunk_end = 0;
+
+ /* Skip this chunk when error occurred or it already passed defrag process */
+ if ((chunk->stat == DFR_CHUNK_STAT_ERR) || (chunk->stat == DFR_CHUNK_STAT_PASS))
+ continue;
+
+ chunk_start = chunk->f_clus;
+ chunk_end = chunk->f_clus + chunk->nr_clus;
+
+ if (((clus_start >= chunk_start) && (clus_start < chunk_end)) ||
+ ((clus_end > chunk_start) && (clus_end <= chunk_end)) ||
+ ((clus_start < chunk_start) && (clus_end > chunk_end))) {
+ ret = 1;
+ if (cancel) {
+ chunk->stat = DFR_CHUNK_STAT_ERR;
+ dfr_debug("Defrag canceled: inode %p, start %08x, end %08x, caller %s",
+ inode, clus_start, clus_end, caller);
+ }
+ }
+ }
+ }
+
+error:
+ BUG_ON(!mutex_is_locked(&ino_dfr->lock));
+ mutex_unlock(&ino_dfr->lock);
+ return ret;
+}
+
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+/**
+ * @fn defrag_spo_test
+ * @brief test SPO while defrag running
+ * @return void
+ * @param sb super block
+ * @param flag SPO debug flag
+ * @param caller caller info
+ */
+void
+defrag_spo_test(
+ struct super_block *sb,
+ int flag,
+ const char *caller)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ if (!sb || !(SDFAT_SB(sb)->options.defrag))
+ return;
+
+ if (flag == sbi->dfr_spo_flag) {
+ dfr_err("Defrag SPO test (flag %d, caller %s)", flag, caller);
+ panic("Defrag SPO test");
+ }
+}
+#endif /* CONFIG_SDFAT_DFR_DEBUG */
+
+
+#endif /* CONFIG_SDFAT_DFR */
diff --git a/fs/sdfat/dfr.h b/fs/sdfat/dfr.h
new file mode 100644
index 000000000000..da98605020d3
--- /dev/null
+++ b/fs/sdfat/dfr.h
@@ -0,0 +1,261 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SDFAT_DEFRAG_H
+#define _SDFAT_DEFRAG_H
+
+#ifdef CONFIG_SDFAT_DFR
+
+/* Tuning parameters */
+#define DFR_MIN_TIMEOUT (1 * HZ) // Minimum timeout for forced-sync
+#define DFR_DEFAULT_TIMEOUT (10 * HZ) // Default timeout for forced-sync
+
+#define DFR_DEFAULT_CLEAN_RATIO (50) // Wake-up daemon when clean AU ratio under 50%
+#define DFR_DEFAULT_WAKEUP_RATIO (10) // Wake-up daemon when clean AU ratio under 10%, regardless of frag_ratio
+
+#define DFR_DEFAULT_FRAG_RATIO (130) // Wake-up daemon when frag_ratio over 130%
+
+#define DFR_DEFAULT_PACKING_RATIO (10) // Call allocator with PACKING flag, when clean AU ratio under 10%
+
+#define DFR_DEFAULT_STOP_RATIO (98) // Stop defrag_daemon when disk used ratio over 98%
+#define DFR_FULL_RATIO (100)
+
+#define DFR_MAX_AU_MOVED (16) // Maximum # of AUs for a request
+
+
+/* Debugging support*/
+#define dfr_err(fmt, args...) pr_err("DFR: " fmt "\n", args)
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+#define dfr_debug(fmt, args...) pr_debug("DFR: " fmt "\n", args)
+#else
+#define dfr_debug(fmt, args...)
+#endif
+
+
+/* Error handling */
+#define ERR_HANDLE(err) { \
+ if (err) { \
+ dfr_debug("err %d", err); \
+ goto error; \
+ } \
+}
+
+#define ERR_HANDLE2(cond, err, val) { \
+ if (cond) { \
+ err = val; \
+ dfr_debug("err %d", err); \
+ goto error; \
+ } \
+}
+
+
+/* Arguments IN-OUT */
+#define IN
+#define OUT
+#define INOUT
+
+
+/* Macros */
+#define GET64_HI(var64) ((unsigned int)((var64) >> 32))
+#define GET64_LO(var64) ((unsigned int)(((var64) << 32) >> 32))
+#define SET64_HI(dst64, var32) { (dst64) = ((loff_t)(var32) << 32) | ((dst64) & 0x00000000ffffffffLL); }
+#define SET64_LO(dst64, var32) { (dst64) = ((dst64) & 0xffffffff00000000LL) | ((var32) & 0x00000000ffffffffLL); }
+
+#define GET32_HI(var32) ((unsigned short)((var32) >> 16))
+#define GET32_LO(var32) ((unsigned short)(((var32) << 16) >> 16))
+#define SET32_HI(dst32, var16) { (dst32) = ((unsigned int)(var16) << 16) | ((dst32) & 0x0000ffff); }
+#define SET32_LO(dst32, var16) { (dst32) = ((dst32) & 0xffff0000) | ((unsigned int)(var16) & 0x0000ffff); }
+
+
+/* FAT32 related */
+#define FAT32_EOF (0x0fffffff)
+#define FAT32_RESERVED (0x0ffffff7)
+#define FAT32_UNUSED_CLUS (2)
+
+#define CLUS_PER_AU(sb) ( \
+ (SDFAT_SB(sb)->options.amap_opt.sect_per_au) >> (SDFAT_SB(sb)->fsi.sect_per_clus_bits) \
+)
+#define PAGES_PER_AU(sb) ( \
+ ((SDFAT_SB(sb)->options.amap_opt.sect_per_au) << ((sb)->s_blocksize_bits)) \
+ >> PAGE_SHIFT \
+)
+#define PAGES_PER_CLUS(sb) ((SDFAT_SB(sb)->fsi.cluster_size) >> PAGE_SHIFT)
+
+#define FAT32_CHECK_CLUSTER(fsi, clus, err) \
+ { \
+ if (((clus) < FAT32_UNUSED_CLUS) || \
+ ((clus) > (fsi)->num_clusters) || \
+ ((clus) >= FAT32_RESERVED)) { \
+ dfr_err("clus %08x, fsi->num_clusters %08x", (clus), (fsi)->num_clusters); \
+ err = -EINVAL; \
+ } else { \
+ err = 0; \
+ } \
+ }
+
+
+/* IOCTL_DFR_INFO */
+struct defrag_info_arg {
+ /* PBS info */
+ unsigned int sec_sz;
+ unsigned int clus_sz;
+ unsigned long long total_sec;
+ unsigned long long fat_offset_sec;
+ unsigned int fat_sz_sec;
+ unsigned int n_fat;
+ unsigned int hidden_sectors;
+
+ /* AU info */
+ unsigned int sec_per_au;
+};
+
+
+/* IOC_DFR_TRAV */
+#define DFR_TRAV_HEADER_IDX (0)
+
+#define DFR_TRAV_TYPE_HEADER (0x0000000F)
+#define DFR_TRAV_TYPE_DIR (1)
+#define DFR_TRAV_TYPE_FILE (2)
+#define DFR_TRAV_TYPE_TEST (DFR_TRAV_TYPE_HEADER | 0x10000000)
+
+#define DFR_TRAV_ROOT_IPOS (0xFFFFFFFFFFFFFFFFLL)
+
+struct defrag_trav_arg {
+ int type;
+ unsigned int start_clus;
+ loff_t i_pos;
+ char name[MAX_DOSNAME_BUF_SIZE];
+ char dummy1;
+ int dummy2;
+};
+
+#define DFR_TRAV_STAT_DONE (0x1)
+#define DFR_TRAV_STAT_MORE (0x2)
+#define DFR_TRAV_STAT_ERR (0xFF)
+
+struct defrag_trav_header {
+ int type;
+ unsigned int start_clus;
+ loff_t i_pos;
+ char name[MAX_DOSNAME_BUF_SIZE];
+ char stat;
+ unsigned int nr_entries;
+};
+
+
+/* IOC_DFR_REQ */
+#define REQ_HEADER_IDX (0)
+
+#define DFR_CHUNK_STAT_ERR (0xFFFFFFFF)
+#define DFR_CHUNK_STAT_REQ (0x1)
+#define DFR_CHUNK_STAT_WB (0x2)
+#define DFR_CHUNK_STAT_FAT (0x4)
+#define DFR_CHUNK_STAT_PREP (DFR_CHUNK_STAT_REQ | DFR_CHUNK_STAT_WB | DFR_CHUNK_STAT_FAT)
+#define DFR_CHUNK_STAT_PASS (0x0000000F)
+
+struct defrag_chunk_header {
+ int mode;
+ unsigned int nr_chunks;
+ loff_t dummy1;
+ int dummy2[4];
+ union {
+ int *dummy3;
+ int dummy4;
+ };
+ int dummy5;
+};
+
+struct defrag_chunk_info {
+ int stat;
+ /* File related */
+ unsigned int f_clus;
+ loff_t i_pos;
+ /* Cluster related */
+ unsigned int d_clus;
+ unsigned int nr_clus;
+ unsigned int prev_clus;
+ unsigned int next_clus;
+ union {
+ void *dummy;
+ /* req status */
+ unsigned int new_idx;
+ };
+ /* AU related */
+ unsigned int au_clus;
+};
+
+
+/* Global info */
+#define DFR_MODE_BACKGROUND (0x1)
+#define DFR_MODE_FOREGROUND (0x2)
+#define DFR_MODE_ONESHOT (0x4)
+#define DFR_MODE_BATCHED (0x8)
+#define DFR_MODE_TEST (DFR_MODE_BACKGROUND | 0x10000000)
+
+#define DFR_SB_STAT_IDLE (0)
+#define DFR_SB_STAT_REQ (1)
+#define DFR_SB_STAT_VALID (2)
+
+#define DFR_INO_STAT_IDLE (0)
+#define DFR_INO_STAT_REQ (1)
+struct defrag_info {
+ struct mutex lock;
+ atomic_t stat;
+ struct defrag_chunk_info *chunks;
+ unsigned int nr_chunks;
+ struct list_head entry;
+};
+
+
+/* SPO test flags */
+#define DFR_SPO_NONE (0)
+#define DFR_SPO_NORMAL (1)
+#define DFR_SPO_DISCARD (2)
+#define DFR_SPO_FAT_NEXT (3)
+#define DFR_SPO_RANDOM (4)
+
+
+/* Extern functions */
+int defrag_get_info(struct super_block *sb, struct defrag_info_arg *arg);
+
+int defrag_scan_dir(struct super_block *sb, struct defrag_trav_arg *arg);
+
+int defrag_validate_cluster(struct inode *inode, struct defrag_chunk_info *chunk, int skip_prev);
+int defrag_reserve_clusters(struct super_block *sb, int nr_clus);
+int defrag_mark_ignore(struct super_block *sb, unsigned int clus);
+void defrag_unmark_ignore_all(struct super_block *sb);
+
+int defrag_map_cluster(struct inode *inode, unsigned int clu_offset, unsigned int *clu);
+void defrag_writepage_end_io(struct page *page);
+
+void defrag_update_fat_prev(struct super_block *sb, int force);
+void defrag_update_fat_next(struct super_block *sb);
+void defrag_check_discard(struct super_block *sb);
+int defrag_free_cluster(struct super_block *sb, unsigned int clus);
+
+int defrag_check_defrag_required(struct super_block *sb, int *totalau, int *cleanau, int *fullau);
+int defrag_check_defrag_on(struct inode *inode, loff_t start, loff_t end, int cancel, const char *caller);
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+void defrag_spo_test(struct super_block *sb, int flag, const char *caller);
+#endif
+
+#endif /* CONFIG_SDFAT_DFR */
+
+#endif /* _SDFAT_DEFRAG_H */
+
diff --git a/fs/sdfat/extent.c b/fs/sdfat/extent.c
new file mode 100644
index 000000000000..59e096530dda
--- /dev/null
+++ b/fs/sdfat/extent.c
@@ -0,0 +1,351 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * linux/fs/fat/cache.c
+ *
+ * Written 1992,1993 by Werner Almesberger
+ *
+ * Mar 1999. AV. Changed cache, so that it uses the starting cluster instead
+ * of inode number.
+ * May 1999. AV. Fixed the bogosity with FAT32 (read "FAT28"). Fscking lusers.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : extent.c */
+/* PURPOSE : Improve the performance of traversing fat chain. */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/slab.h>
+#include "sdfat.h"
+#include "core.h"
+
+#define EXTENT_CACHE_VALID 0
+/* this must be > 0. */
+#define EXTENT_MAX_CACHE 16
+
+struct extent_cache {
+ struct list_head cache_list;
+ u32 nr_contig; /* number of contiguous clusters */
+ u32 fcluster; /* cluster number in the file. */
+ u32 dcluster; /* cluster number on disk. */
+};
+
+struct extent_cache_id {
+ u32 id;
+ u32 nr_contig;
+ u32 fcluster;
+ u32 dcluster;
+};
+
+static struct kmem_cache *extent_cache_cachep;
+
+static void init_once(void *c)
+{
+ struct extent_cache *cache = (struct extent_cache *)c;
+
+ INIT_LIST_HEAD(&cache->cache_list);
+}
+
+s32 extent_cache_init(void)
+{
+ extent_cache_cachep = kmem_cache_create("sdfat_extent_cache",
+ sizeof(struct extent_cache),
+ 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+ init_once);
+ if (!extent_cache_cachep)
+ return -ENOMEM;
+ return 0;
+}
+
+void extent_cache_shutdown(void)
+{
+ if (!extent_cache_cachep)
+ return;
+ kmem_cache_destroy(extent_cache_cachep);
+}
+
+void extent_cache_init_inode(struct inode *inode)
+{
+ EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent);
+
+ spin_lock_init(&extent->cache_lru_lock);
+ extent->nr_caches = 0;
+ extent->cache_valid_id = EXTENT_CACHE_VALID + 1;
+ INIT_LIST_HEAD(&extent->cache_lru);
+}
+
+static inline struct extent_cache *extent_cache_alloc(void)
+{
+ return kmem_cache_alloc(extent_cache_cachep, GFP_NOFS);
+}
+
+static inline void extent_cache_free(struct extent_cache *cache)
+{
+ BUG_ON(!list_empty(&cache->cache_list));
+ kmem_cache_free(extent_cache_cachep, cache);
+}
+
+static inline void extent_cache_update_lru(struct inode *inode,
+ struct extent_cache *cache)
+{
+ EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent);
+
+ if (extent->cache_lru.next != &cache->cache_list)
+ list_move(&cache->cache_list, &extent->cache_lru);
+}
+
+static u32 extent_cache_lookup(struct inode *inode, u32 fclus,
+ struct extent_cache_id *cid,
+ u32 *cached_fclus, u32 *cached_dclus)
+{
+ EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent);
+
+ static struct extent_cache nohit = { .fcluster = 0, };
+
+ struct extent_cache *hit = &nohit, *p;
+ u32 offset = CLUS_EOF;
+
+ spin_lock(&extent->cache_lru_lock);
+ list_for_each_entry(p, &extent->cache_lru, cache_list) {
+ /* Find the cache of "fclus" or nearest cache. */
+ if (p->fcluster <= fclus && hit->fcluster < p->fcluster) {
+ hit = p;
+ if ((hit->fcluster + hit->nr_contig) < fclus) {
+ offset = hit->nr_contig;
+ } else {
+ offset = fclus - hit->fcluster;
+ break;
+ }
+ }
+ }
+ if (hit != &nohit) {
+ extent_cache_update_lru(inode, hit);
+
+ cid->id = extent->cache_valid_id;
+ cid->nr_contig = hit->nr_contig;
+ cid->fcluster = hit->fcluster;
+ cid->dcluster = hit->dcluster;
+ *cached_fclus = cid->fcluster + offset;
+ *cached_dclus = cid->dcluster + offset;
+ }
+ spin_unlock(&extent->cache_lru_lock);
+
+ return offset;
+}
+
+static struct extent_cache *extent_cache_merge(struct inode *inode,
+ struct extent_cache_id *new)
+{
+ EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent);
+
+ struct extent_cache *p;
+
+ list_for_each_entry(p, &extent->cache_lru, cache_list) {
+ /* Find the same part as "new" in cluster-chain. */
+ if (p->fcluster == new->fcluster) {
+ ASSERT(p->dcluster == new->dcluster);
+ if (new->nr_contig > p->nr_contig)
+ p->nr_contig = new->nr_contig;
+ return p;
+ }
+ }
+ return NULL;
+}
+
+static void extent_cache_add(struct inode *inode, struct extent_cache_id *new)
+{
+ EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent);
+
+ struct extent_cache *cache, *tmp;
+
+ if (new->fcluster == -1) /* dummy cache */
+ return;
+
+ spin_lock(&extent->cache_lru_lock);
+ if (new->id != EXTENT_CACHE_VALID &&
+ new->id != extent->cache_valid_id)
+ goto out; /* this cache was invalidated */
+
+ cache = extent_cache_merge(inode, new);
+ if (cache == NULL) {
+ if (extent->nr_caches < EXTENT_MAX_CACHE) {
+ extent->nr_caches++;
+ spin_unlock(&extent->cache_lru_lock);
+
+ tmp = extent_cache_alloc();
+ if (!tmp) {
+ spin_lock(&extent->cache_lru_lock);
+ extent->nr_caches--;
+ spin_unlock(&extent->cache_lru_lock);
+ return;
+ }
+
+ spin_lock(&extent->cache_lru_lock);
+ cache = extent_cache_merge(inode, new);
+ if (cache != NULL) {
+ extent->nr_caches--;
+ extent_cache_free(tmp);
+ goto out_update_lru;
+ }
+ cache = tmp;
+ } else {
+ struct list_head *p = extent->cache_lru.prev;
+ cache = list_entry(p, struct extent_cache, cache_list);
+ }
+ cache->fcluster = new->fcluster;
+ cache->dcluster = new->dcluster;
+ cache->nr_contig = new->nr_contig;
+ }
+out_update_lru:
+ extent_cache_update_lru(inode, cache);
+out:
+ spin_unlock(&extent->cache_lru_lock);
+}
+
+/*
+ * Cache invalidation occurs rarely, thus the LRU chain is not updated. It
+ * fixes itself after a while.
+ */
+static void __extent_cache_inval_inode(struct inode *inode)
+{
+ EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent);
+ struct extent_cache *cache;
+
+ while (!list_empty(&extent->cache_lru)) {
+ cache = list_entry(extent->cache_lru.next,
+ struct extent_cache, cache_list);
+ list_del_init(&cache->cache_list);
+ extent->nr_caches--;
+ extent_cache_free(cache);
+ }
+ /* Update. The copy of caches before this id is discarded. */
+ extent->cache_valid_id++;
+ if (extent->cache_valid_id == EXTENT_CACHE_VALID)
+ extent->cache_valid_id++;
+}
+
+void extent_cache_inval_inode(struct inode *inode)
+{
+ EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent);
+
+ spin_lock(&extent->cache_lru_lock);
+ __extent_cache_inval_inode(inode);
+ spin_unlock(&extent->cache_lru_lock);
+}
+
+static inline s32 cache_contiguous(struct extent_cache_id *cid, u32 dclus)
+{
+ cid->nr_contig++;
+ return ((cid->dcluster + cid->nr_contig) == dclus);
+}
+
+static inline void cache_init(struct extent_cache_id *cid, u32 fclus, u32 dclus)
+{
+ cid->id = EXTENT_CACHE_VALID;
+ cid->fcluster = fclus;
+ cid->dcluster = dclus;
+ cid->nr_contig = 0;
+}
+
+s32 extent_get_clus(struct inode *inode, u32 cluster, u32 *fclus,
+ u32 *dclus, u32 *last_dclus, s32 allow_eof)
+{
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u32 limit = fsi->num_clusters;
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ struct extent_cache_id cid;
+ u32 content;
+
+ /* FOR GRACEFUL ERROR HANDLING */
+ if (IS_CLUS_FREE(fid->start_clu)) {
+ sdfat_fs_error(sb, "invalid access to "
+ "extent cache (entry 0x%08x)", fid->start_clu);
+ ASSERT(0);
+ return -EIO;
+ }
+
+ *fclus = 0;
+ *dclus = fid->start_clu;
+ *last_dclus = *dclus;
+
+ /*
+ * Don`t use extent_cache if zero offset or non-cluster allocation
+ */
+ if ((cluster == 0) || IS_CLUS_EOF(*dclus))
+ return 0;
+
+ cache_init(&cid, CLUS_EOF, CLUS_EOF);
+
+ if (extent_cache_lookup(inode, cluster, &cid, fclus, dclus) == CLUS_EOF) {
+ /*
+ * dummy, always not contiguous
+ * This is reinitialized by cache_init(), later.
+ */
+ ASSERT((cid.id == EXTENT_CACHE_VALID)
+ && (cid.fcluster == CLUS_EOF)
+ && (cid.dcluster == CLUS_EOF)
+ && (cid.nr_contig == 0));
+ }
+
+ if (*fclus == cluster)
+ return 0;
+
+ while (*fclus < cluster) {
+ /* prevent the infinite loop of cluster chain */
+ if (*fclus > limit) {
+ sdfat_fs_error(sb,
+ "%s: detected the cluster chain loop"
+ " (i_pos %u)", __func__,
+ (*fclus));
+ return -EIO;
+ }
+
+ if (fat_ent_get_safe(sb, *dclus, &content))
+ return -EIO;
+
+ *last_dclus = *dclus;
+ *dclus = content;
+ (*fclus)++;
+
+ if (IS_CLUS_EOF(content)) {
+ if (!allow_eof) {
+ sdfat_fs_error(sb,
+ "%s: invalid cluster chain (i_pos %u,"
+ "last_clus 0x%08x is EOF)",
+ __func__, *fclus, (*last_dclus));
+ return -EIO;
+ }
+
+ break;
+ }
+
+ if (!cache_contiguous(&cid, *dclus))
+ cache_init(&cid, *fclus, *dclus);
+ }
+
+ extent_cache_add(inode, &cid);
+ return 0;
+}
diff --git a/fs/sdfat/fatent.c b/fs/sdfat/fatent.c
new file mode 100644
index 000000000000..fca32a50d336
--- /dev/null
+++ b/fs/sdfat/fatent.c
@@ -0,0 +1,420 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : fatent.c */
+/* PURPOSE : sdFAT FAT entry manager */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <asm/unaligned.h>
+
+#include "sdfat.h"
+#include "core.h"
+
+/*----------------------------------------------------------------------*/
+/* Global Variable Definitions */
+/*----------------------------------------------------------------------*/
+/* All buffer structures are protected w/ fsi->v_sem */
+
+/*----------------------------------------------------------------------*/
+/* Static functions */
+/*----------------------------------------------------------------------*/
+
+/*======================================================================*/
+/* FAT Read/Write Functions */
+/*======================================================================*/
+/* in : sb, loc
+ * out: content
+ * returns 0 on success, -1 on error
+ */
+static s32 exfat_ent_get(struct super_block *sb, u32 loc, u32 *content)
+{
+ u32 off, _content;
+ u64 sec;
+ u8 *fat_sector;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ /* fsi->vol_type == EXFAT */
+ sec = fsi->FAT1_start_sector + (loc >> (sb->s_blocksize_bits-2));
+ off = (loc << 2) & (u32)(sb->s_blocksize - 1);
+
+ fat_sector = fcache_getblk(sb, sec);
+ if (!fat_sector)
+ return -EIO;
+
+ _content = le32_to_cpu(*(__le32 *)(&fat_sector[off]));
+
+ /* remap reserved clusters to simplify code */
+ if (_content >= CLUSTER_32(0xFFFFFFF8))
+ _content = CLUS_EOF;
+
+ *content = CLUSTER_32(_content);
+ return 0;
+}
+
+static s32 exfat_ent_set(struct super_block *sb, u32 loc, u32 content)
+{
+ u32 off;
+ u64 sec;
+ u8 *fat_sector;
+ __le32 *fat_entry;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ sec = fsi->FAT1_start_sector + (loc >> (sb->s_blocksize_bits-2));
+ off = (loc << 2) & (u32)(sb->s_blocksize - 1);
+
+ fat_sector = fcache_getblk(sb, sec);
+ if (!fat_sector)
+ return -EIO;
+
+ fat_entry = (__le32 *)&(fat_sector[off]);
+ *fat_entry = cpu_to_le32(content);
+
+ return fcache_modify(sb, sec);
+}
+
+#define FATENT_FAT32_VALID_MASK (0x0FFFFFFFU)
+#define FATENT_FAT32_IGNORE_MASK (0xF0000000U)
+static s32 fat32_ent_get(struct super_block *sb, u32 loc, u32 *content)
+{
+ u32 off, _content;
+ u64 sec;
+ u8 *fat_sector;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ sec = fsi->FAT1_start_sector + (loc >> (sb->s_blocksize_bits-2));
+ off = (loc << 2) & (u32)(sb->s_blocksize - 1);
+
+ fat_sector = fcache_getblk(sb, sec);
+ if (!fat_sector)
+ return -EIO;
+
+ _content = le32_to_cpu(*(__le32 *)(&fat_sector[off]));
+ _content &= FATENT_FAT32_VALID_MASK;
+
+ /* remap reserved clusters to simplify code */
+ if (_content == CLUSTER_32(0x0FFFFFF7U))
+ _content = CLUS_BAD;
+ else if (_content >= CLUSTER_32(0x0FFFFFF8U))
+ _content = CLUS_EOF;
+
+ *content = CLUSTER_32(_content);
+ return 0;
+}
+
+static s32 fat32_ent_set(struct super_block *sb, u32 loc, u32 content)
+{
+ u32 off;
+ u64 sec;
+ u8 *fat_sector;
+ __le32 *fat_entry;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ content &= FATENT_FAT32_VALID_MASK;
+
+ sec = fsi->FAT1_start_sector + (loc >> (sb->s_blocksize_bits-2));
+ off = (loc << 2) & (u32)(sb->s_blocksize - 1);
+
+ fat_sector = fcache_getblk(sb, sec);
+ if (!fat_sector)
+ return -EIO;
+
+ fat_entry = (__le32 *)&(fat_sector[off]);
+ content |= (le32_to_cpu(*fat_entry) & FATENT_FAT32_IGNORE_MASK);
+ *fat_entry = cpu_to_le32(content);
+
+ return fcache_modify(sb, sec);
+}
+
+#define FATENT_FAT16_VALID_MASK (0x0000FFFFU)
+static s32 fat16_ent_get(struct super_block *sb, u32 loc, u32 *content)
+{
+ u32 off, _content;
+ u64 sec;
+ u8 *fat_sector;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ sec = fsi->FAT1_start_sector + (loc >> (sb->s_blocksize_bits-1));
+ off = (loc << 1) & (u32)(sb->s_blocksize - 1);
+
+ fat_sector = fcache_getblk(sb, sec);
+ if (!fat_sector)
+ return -EIO;
+
+ _content = (u32)le16_to_cpu(*(__le16 *)(&fat_sector[off]));
+ _content &= FATENT_FAT16_VALID_MASK;
+
+ /* remap reserved clusters to simplify code */
+ if (_content == CLUSTER_16(0xFFF7U))
+ _content = CLUS_BAD;
+ else if (_content >= CLUSTER_16(0xFFF8U))
+ _content = CLUS_EOF;
+
+ *content = CLUSTER_32(_content);
+ return 0;
+}
+
+static s32 fat16_ent_set(struct super_block *sb, u32 loc, u32 content)
+{
+ u32 off;
+ u64 sec;
+ u8 *fat_sector;
+ __le16 *fat_entry;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ content &= FATENT_FAT16_VALID_MASK;
+
+ sec = fsi->FAT1_start_sector + (loc >> (sb->s_blocksize_bits-1));
+ off = (loc << 1) & (u32)(sb->s_blocksize - 1);
+
+ fat_sector = fcache_getblk(sb, sec);
+ if (!fat_sector)
+ return -EIO;
+
+ fat_entry = (__le16 *)&(fat_sector[off]);
+ *fat_entry = cpu_to_le16(content);
+
+ return fcache_modify(sb, sec);
+}
+
+#define FATENT_FAT12_VALID_MASK (0x00000FFFU)
+static s32 fat12_ent_get(struct super_block *sb, u32 loc, u32 *content)
+{
+ u32 off, _content;
+ u64 sec;
+ u8 *fat_sector;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ sec = fsi->FAT1_start_sector + ((loc + (loc >> 1)) >> sb->s_blocksize_bits);
+ off = (loc + (loc >> 1)) & (u32)(sb->s_blocksize - 1);
+
+ fat_sector = fcache_getblk(sb, sec);
+ if (!fat_sector)
+ return -EIO;
+
+ if (off == (u32)(sb->s_blocksize - 1)) {
+ _content = (u32) fat_sector[off];
+
+ fat_sector = fcache_getblk(sb, ++sec);
+ if (!fat_sector)
+ return -EIO;
+
+ _content |= (u32) fat_sector[0] << 8;
+ } else {
+ _content = get_unaligned_le16(&fat_sector[off]);
+ }
+
+ if (loc & 1)
+ _content >>= 4;
+
+ _content &= FATENT_FAT12_VALID_MASK;
+
+ /* remap reserved clusters to simplify code */
+ if (_content == CLUSTER_16(0x0FF7U))
+ _content = CLUS_BAD;
+ else if (_content >= CLUSTER_16(0x0FF8U))
+ _content = CLUS_EOF;
+
+ *content = CLUSTER_32(_content);
+ return 0;
+}
+
+static s32 fat12_ent_set(struct super_block *sb, u32 loc, u32 content)
+{
+ u32 off;
+ u64 sec;
+ u8 *fat_sector, *fat_entry;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ content &= FATENT_FAT12_VALID_MASK;
+
+ sec = fsi->FAT1_start_sector + ((loc + (loc >> 1)) >> sb->s_blocksize_bits);
+ off = (loc + (loc >> 1)) & (u32)(sb->s_blocksize - 1);
+
+ fat_sector = fcache_getblk(sb, sec);
+ if (!fat_sector)
+ return -EIO;
+
+ if (loc & 1) { /* odd */
+
+ content <<= 4;
+
+ if (off == (u32)(sb->s_blocksize-1)) {
+ fat_sector[off] = (u8)(content | (fat_sector[off] & 0x0F));
+ if (fcache_modify(sb, sec))
+ return -EIO;
+
+ fat_sector = fcache_getblk(sb, ++sec);
+ if (!fat_sector)
+ return -EIO;
+
+ fat_sector[0] = (u8)(content >> 8);
+ } else {
+ fat_entry = &(fat_sector[off]);
+ content |= 0x000F & get_unaligned_le16(fat_entry);
+ put_unaligned_le16(content, fat_entry);
+ }
+ } else { /* even */
+ fat_sector[off] = (u8)(content);
+
+ if (off == (u32)(sb->s_blocksize-1)) {
+ fat_sector[off] = (u8)(content);
+ if (fcache_modify(sb, sec))
+ return -EIO;
+
+ fat_sector = fcache_getblk(sb, ++sec);
+ if (!fat_sector)
+ return -EIO;
+
+ fat_sector[0] = (u8)((fat_sector[0] & 0xF0) | (content >> 8));
+ } else {
+ fat_entry = &(fat_sector[off]);
+ content |= 0xF000 & get_unaligned_le16(fat_entry);
+ put_unaligned_le16(content, fat_entry);
+ }
+ }
+ return fcache_modify(sb, sec);
+}
+
+
+static FATENT_OPS_T fat12_ent_ops = {
+ fat12_ent_get,
+ fat12_ent_set
+};
+
+static FATENT_OPS_T fat16_ent_ops = {
+ fat16_ent_get,
+ fat16_ent_set
+};
+
+static FATENT_OPS_T fat32_ent_ops = {
+ fat32_ent_get,
+ fat32_ent_set
+};
+
+static FATENT_OPS_T exfat_ent_ops = {
+ exfat_ent_get,
+ exfat_ent_set
+};
+
+s32 fat_ent_ops_init(struct super_block *sb)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ switch (fsi->vol_type) {
+ case EXFAT:
+ fsi->fatent_ops = &exfat_ent_ops;
+ break;
+ case FAT32:
+ fsi->fatent_ops = &fat32_ent_ops;
+ break;
+ case FAT16:
+ fsi->fatent_ops = &fat16_ent_ops;
+ break;
+ case FAT12:
+ fsi->fatent_ops = &fat12_ent_ops;
+ break;
+ default:
+ fsi->fatent_ops = NULL;
+ EMSG("Unknown volume type : %d", (int)fsi->vol_type);
+ return -ENOTSUPP;
+ }
+
+ return 0;
+}
+
+static inline bool is_reserved_clus(u32 clus)
+{
+ if (IS_CLUS_FREE(clus))
+ return true;
+ if (IS_CLUS_EOF(clus))
+ return true;
+ if (IS_CLUS_BAD(clus))
+ return true;
+ return false;
+}
+
+static inline bool is_valid_clus(FS_INFO_T *fsi, u32 clus)
+{
+ if (clus < CLUS_BASE || fsi->num_clusters <= clus)
+ return false;
+ return true;
+}
+
+s32 fat_ent_get(struct super_block *sb, u32 loc, u32 *content)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ s32 err;
+
+ if (!is_valid_clus(fsi, loc)) {
+ sdfat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", loc);
+ return -EIO;
+ }
+
+ err = fsi->fatent_ops->ent_get(sb, loc, content);
+ if (err) {
+ sdfat_fs_error(sb, "failed to access to FAT "
+ "(entry 0x%08x, err:%d)", loc, err);
+ return err;
+ }
+
+ if (!is_reserved_clus(*content) && !is_valid_clus(fsi, *content)) {
+ sdfat_fs_error(sb, "invalid access to FAT (entry 0x%08x) "
+ "bogus content (0x%08x)", loc, *content);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+s32 fat_ent_set(struct super_block *sb, u32 loc, u32 content)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ return fsi->fatent_ops->ent_set(sb, loc, content);
+}
+
+s32 fat_ent_get_safe(struct super_block *sb, u32 loc, u32 *content)
+{
+ s32 err = fat_ent_get(sb, loc, content);
+
+ if (err)
+ return err;
+
+ if (IS_CLUS_FREE(*content)) {
+ sdfat_fs_error(sb, "invalid access to FAT free cluster "
+ "(entry 0x%08x)", loc);
+ return -EIO;
+ }
+
+ if (IS_CLUS_BAD(*content)) {
+ sdfat_fs_error(sb, "invalid access to FAT bad cluster "
+ "(entry 0x%08x)", loc);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/* end of fatent.c */
diff --git a/fs/sdfat/misc.c b/fs/sdfat/misc.c
new file mode 100644
index 000000000000..a006e898816f
--- /dev/null
+++ b/fs/sdfat/misc.c
@@ -0,0 +1,464 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * linux/fs/fat/misc.c
+ *
+ * Written 1992,1993 by Werner Almesberger
+ * 22/11/2000 - Fixed fat_date_unix2dos for dates earlier than 01/01/1980
+ * and date_dos2unix for date==0 by Igor Zhbanov(bsg@uniyar.ac.ru)
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : misc.c */
+/* PURPOSE : Helper function for checksum and handing sdFAT error */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/time.h>
+#include "sdfat.h"
+#include "version.h"
+
+#ifdef CONFIG_SDFAT_SUPPORT_STLOG
+#ifdef CONFIG_PROC_FSLOG
+#include <linux/fslog.h>
+#else
+#include <linux/stlog.h>
+#endif
+#else
+#define ST_LOG(fmt, ...)
+#endif
+
+/*************************************************************************
+ * FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
+ *************************************************************************/
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0)
+#define CURRENT_TIME_SEC timespec64_trunc(current_kernel_time64(), NSEC_PER_SEC)
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0)
+#define CURRENT_TIME_SEC timespec_trunc(current_kernel_time(), NSEC_PER_SEC)
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) */
+ /* EMPTY */
+#endif
+
+
+#ifdef CONFIG_SDFAT_UEVENT
+static struct kobject sdfat_uevent_kobj;
+
+int sdfat_uevent_init(struct kset *sdfat_kset)
+{
+ int err;
+ struct kobj_type *ktype = get_ktype(&sdfat_kset->kobj);
+
+ sdfat_uevent_kobj.kset = sdfat_kset;
+ err = kobject_init_and_add(&sdfat_uevent_kobj, ktype, NULL, "uevent");
+ if (err)
+ pr_err("[SDFAT] Unable to create sdfat uevent kobj\n");
+
+ return err;
+}
+
+void sdfat_uevent_uninit(void)
+{
+ kobject_del(&sdfat_uevent_kobj);
+ memset(&sdfat_uevent_kobj, 0, sizeof(struct kobject));
+}
+
+void sdfat_uevent_ro_remount(struct super_block *sb)
+{
+ struct block_device *bdev = sb->s_bdev;
+ dev_t bd_dev = bdev ? bdev->bd_dev : 0;
+
+ char major[16], minor[16];
+ char *envp[] = { major, minor, NULL };
+
+ snprintf(major, sizeof(major), "MAJOR=%d", MAJOR(bd_dev));
+ snprintf(minor, sizeof(minor), "MINOR=%d", MINOR(bd_dev));
+
+ kobject_uevent_env(&sdfat_uevent_kobj, KOBJ_CHANGE, envp);
+
+ ST_LOG("[SDFAT](%s[%d:%d]): Uevent triggered\n",
+ sb->s_id, MAJOR(bd_dev), MINOR(bd_dev));
+}
+#endif
+
+/*
+ * sdfat_fs_error reports a file system problem that might indicate fa data
+ * corruption/inconsistency. Depending on 'errors' mount option the
+ * panic() is called, or error message is printed FAT and nothing is done,
+ * or filesystem is remounted read-only (default behavior).
+ * In case the file system is remounted read-only, it can be made writable
+ * again by remounting it.
+ */
+void __sdfat_fs_error(struct super_block *sb, int report, const char *fmt, ...)
+{
+ struct sdfat_mount_options *opts = &SDFAT_SB(sb)->options;
+ va_list args;
+ struct va_format vaf;
+ struct block_device *bdev = sb->s_bdev;
+ dev_t bd_dev = bdev ? bdev->bd_dev : 0;
+
+ if (report) {
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ pr_err("[SDFAT](%s[%d:%d]):ERR: %pV\n",
+ sb->s_id, MAJOR(bd_dev), MINOR(bd_dev), &vaf);
+#ifdef CONFIG_SDFAT_SUPPORT_STLOG
+ if (opts->errors == SDFAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) {
+ ST_LOG("[SDFAT](%s[%d:%d]):ERR: %pV\n",
+ sb->s_id, MAJOR(bd_dev), MINOR(bd_dev), &vaf);
+ }
+#endif
+ va_end(args);
+ }
+
+ if (opts->errors == SDFAT_ERRORS_PANIC) {
+ panic("[SDFAT](%s[%d:%d]): fs panic from previous error\n",
+ sb->s_id, MAJOR(bd_dev), MINOR(bd_dev));
+ } else if (opts->errors == SDFAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) {
+ sb->s_flags |= MS_RDONLY;
+ sdfat_statistics_set_mnt_ro();
+ pr_err("[SDFAT](%s[%d:%d]): Filesystem has been set "
+ "read-only\n", sb->s_id, MAJOR(bd_dev), MINOR(bd_dev));
+#ifdef CONFIG_SDFAT_SUPPORT_STLOG
+ ST_LOG("[SDFAT](%s[%d:%d]): Filesystem has been set read-only\n",
+ sb->s_id, MAJOR(bd_dev), MINOR(bd_dev));
+#endif
+ sdfat_uevent_ro_remount(sb);
+ }
+}
+EXPORT_SYMBOL(__sdfat_fs_error);
+
+/**
+ * __sdfat_msg() - print preformated SDFAT specific messages.
+ * All logs except what uses sdfat_fs_error() should be written by __sdfat_msg()
+ * If 'st' is set, the log is propagated to ST_LOG.
+ */
+void __sdfat_msg(struct super_block *sb, const char *level, int st, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+ struct block_device *bdev = sb->s_bdev;
+ dev_t bd_dev = bdev ? bdev->bd_dev : 0;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ /* level means KERN_ pacility level */
+ printk("%s[SDFAT](%s[%d:%d]): %pV\n", level,
+ sb->s_id, MAJOR(bd_dev), MINOR(bd_dev), &vaf);
+#ifdef CONFIG_SDFAT_SUPPORT_STLOG
+ if (st) {
+ ST_LOG("[SDFAT](%s[%d:%d]): %pV\n",
+ sb->s_id, MAJOR(bd_dev), MINOR(bd_dev), &vaf);
+ }
+#endif
+ va_end(args);
+}
+EXPORT_SYMBOL(__sdfat_msg);
+
+void sdfat_log_version(void)
+{
+ pr_info("[SDFAT] Filesystem version %s\n", SDFAT_VERSION);
+#ifdef CONFIG_SDFAT_SUPPORT_STLOG
+ ST_LOG("[SDFAT] Filesystem version %s\n", SDFAT_VERSION);
+#endif
+}
+EXPORT_SYMBOL(sdfat_log_version);
+
+/* <linux/time.h> externs sys_tz
+ * extern struct timezone sys_tz;
+ */
+#define UNIX_SECS_1980 315532800L
+
+#if BITS_PER_LONG == 64
+#define UNIX_SECS_2108 4354819200L
+#endif
+
+/* days between 1970/01/01 and 1980/01/01 (2 leap days) */
+#define DAYS_DELTA_DECADE (365 * 10 + 2)
+/* 120 (2100 - 1980) isn't leap year */
+#define NO_LEAP_YEAR_2100 (120)
+#define IS_LEAP_YEAR(y) (!((y) & 0x3) && (y) != NO_LEAP_YEAR_2100)
+
+#define SECS_PER_MIN (60)
+#define SECS_PER_HOUR (60 * SECS_PER_MIN)
+#define SECS_PER_DAY (24 * SECS_PER_HOUR)
+
+#define MAKE_LEAP_YEAR(leap_year, year) \
+ do { \
+ /* 2100 isn't leap year */ \
+ if (unlikely(year > NO_LEAP_YEAR_2100)) \
+ leap_year = ((year + 3) / 4) - 1; \
+ else \
+ leap_year = ((year + 3) / 4); \
+ } while (0)
+
+/* Linear day numbers of the respective 1sts in non-leap years. */
+static time_t accum_days_in_year[] = {
+ /* Month : N 01 02 03 04 05 06 07 08 09 10 11 12 */
+ 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0,
+};
+
+#define TIMEZONE_SEC(x) ((x) * 15 * SECS_PER_MIN)
+/* Convert a FAT time/date pair to a UNIX date (seconds since 1 1 70). */
+void sdfat_time_fat2unix(struct sdfat_sb_info *sbi, sdfat_timespec_t *ts,
+ DATE_TIME_T *tp)
+{
+ time_t year = tp->Year;
+ time_t ld; /* leap day */
+
+ MAKE_LEAP_YEAR(ld, year);
+
+ if (IS_LEAP_YEAR(year) && (tp->Month) > 2)
+ ld++;
+
+ ts->tv_sec = tp->Second + tp->Minute * SECS_PER_MIN
+ + tp->Hour * SECS_PER_HOUR
+ + (year * 365 + ld + accum_days_in_year[tp->Month]
+ + (tp->Day - 1) + DAYS_DELTA_DECADE) * SECS_PER_DAY;
+
+ ts->tv_nsec = 0;
+
+ /* Treat as local time */
+ if (!sbi->options.tz_utc && !tp->Timezone.valid) {
+ ts->tv_sec += sys_tz.tz_minuteswest * SECS_PER_MIN;
+ return;
+ }
+
+ /* Treat as UTC time */
+ if (!tp->Timezone.valid)
+ return;
+
+ /* Treat as UTC time, but need to adjust timezone to UTC0 */
+ if (tp->Timezone.off <= 0x3F)
+ ts->tv_sec -= TIMEZONE_SEC(tp->Timezone.off);
+ else /* 0x40 <= (tp->Timezone & 0x7F) <=0x7F */
+ ts->tv_sec += TIMEZONE_SEC(0x80 - tp->Timezone.off);
+}
+
+#define TIMEZONE_CUR_OFFSET() ((sys_tz.tz_minuteswest / (-15)) & 0x7F)
+/* Convert linear UNIX date to a FAT time/date pair. */
+void sdfat_time_unix2fat(struct sdfat_sb_info *sbi, sdfat_timespec_t *ts,
+ DATE_TIME_T *tp)
+{
+ bool tz_valid = (sbi->fsi.vol_type == EXFAT) ? true : false;
+ time_t second = ts->tv_sec;
+ time_t day, month, year;
+ time_t ld; /* leap day */
+
+ tp->Timezone.value = 0x00;
+
+ /* Treats as local time with proper time */
+ if (tz_valid || !sbi->options.tz_utc) {
+ second -= sys_tz.tz_minuteswest * SECS_PER_MIN;
+ if (tz_valid) {
+ tp->Timezone.valid = 1;
+ tp->Timezone.off = TIMEZONE_CUR_OFFSET();
+ }
+ }
+
+ /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
+ if (second < UNIX_SECS_1980) {
+ tp->Second = 0;
+ tp->Minute = 0;
+ tp->Hour = 0;
+ tp->Day = 1;
+ tp->Month = 1;
+ tp->Year = 0;
+ return;
+ }
+#if (BITS_PER_LONG == 64)
+ if (second >= UNIX_SECS_2108) {
+ tp->Second = 59;
+ tp->Minute = 59;
+ tp->Hour = 23;
+ tp->Day = 31;
+ tp->Month = 12;
+ tp->Year = 127;
+ return;
+ }
+#endif
+
+ day = second / SECS_PER_DAY - DAYS_DELTA_DECADE;
+ year = day / 365;
+
+ MAKE_LEAP_YEAR(ld, year);
+ if (year * 365 + ld > day)
+ year--;
+
+ MAKE_LEAP_YEAR(ld, year);
+ day -= year * 365 + ld;
+
+ if (IS_LEAP_YEAR(year) && day == accum_days_in_year[3]) {
+ month = 2;
+ } else {
+ if (IS_LEAP_YEAR(year) && day > accum_days_in_year[3])
+ day--;
+ for (month = 1; month < 12; month++) {
+ if (accum_days_in_year[month + 1] > day)
+ break;
+ }
+ }
+ day -= accum_days_in_year[month];
+
+ tp->Second = second % SECS_PER_MIN;
+ tp->Minute = (second / SECS_PER_MIN) % 60;
+ tp->Hour = (second / SECS_PER_HOUR) % 24;
+ tp->Day = day + 1;
+ tp->Month = month;
+ tp->Year = year;
+}
+
+TIMESTAMP_T *tm_now(struct sdfat_sb_info *sbi, TIMESTAMP_T *tp)
+{
+ sdfat_timespec_t ts = CURRENT_TIME_SEC;
+ DATE_TIME_T dt;
+
+ sdfat_time_unix2fat(sbi, &ts, &dt);
+
+ tp->year = dt.Year;
+ tp->mon = dt.Month;
+ tp->day = dt.Day;
+ tp->hour = dt.Hour;
+ tp->min = dt.Minute;
+ tp->sec = dt.Second;
+ tp->tz.value = dt.Timezone.value;
+
+ return tp;
+}
+
+u8 calc_chksum_1byte(void *data, s32 len, u8 chksum)
+{
+ s32 i;
+ u8 *c = (u8 *) data;
+
+ for (i = 0; i < len; i++, c++)
+ chksum = (((chksum & 1) << 7) | ((chksum & 0xFE) >> 1)) + *c;
+
+ return chksum;
+}
+
+u16 calc_chksum_2byte(void *data, s32 len, u16 chksum, s32 type)
+{
+ s32 i;
+ u8 *c = (u8 *) data;
+
+ for (i = 0; i < len; i++, c++) {
+ if (((i == 2) || (i == 3)) && (type == CS_DIR_ENTRY))
+ continue;
+ chksum = (((chksum & 1) << 15) | ((chksum & 0xFFFE) >> 1)) + (u16) *c;
+ }
+ return chksum;
+}
+
+#ifdef CONFIG_SDFAT_TRACE_ELAPSED_TIME
+struct timeval __t1, __t2;
+u32 sdfat_time_current_usec(struct timeval *tv)
+{
+ do_gettimeofday(tv);
+ return (u32)(tv->tv_sec*1000000 + tv->tv_usec);
+}
+#endif /* CONFIG_SDFAT_TRACE_ELAPSED_TIME */
+
+#ifdef CONFIG_SDFAT_DBG_CAREFUL
+/* Check the consistency of i_size_ondisk (FAT32, or flags 0x01 only) */
+void sdfat_debug_check_clusters(struct inode *inode)
+{
+ unsigned int num_clusters;
+ volatile uint32_t tmp_fat_chain[50];
+ volatile int tmp_i = 0;
+ volatile unsigned int num_clusters_org, tmp_i = 0;
+ CHAIN_T clu;
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ FS_INFO_T *fsi = &(SDFAT_SB(inode->i_sb)->fsi);
+
+ if (SDFAT_I(inode)->i_size_ondisk == 0)
+ num_clusters = 0;
+ else
+ num_clusters = ((SDFAT_I(inode)->i_size_ondisk-1) >> fsi->cluster_size_bits) + 1;
+
+ clu.dir = fid->start_clu;
+ clu.size = num_clusters;
+ clu.flags = fid->flags;
+
+ num_clusters_org = num_clusters;
+
+ if (clu.flags == 0x03)
+ return;
+
+ while (num_clusters > 0) {
+ /* FAT chain logging */
+ tmp_fat_chain[tmp_i] = clu.dir;
+ tmp_i++;
+ if (tmp_i >= 50)
+ tmp_i = 0;
+
+ BUG_ON(IS_CLUS_EOF(clu.dir) || IS_CLUS_FREE(clu.dir));
+
+ if (get_next_clus_safe(inode->i_sb, &(clu.dir)))
+ EMSG("%s: failed to access to FAT\n");
+
+ num_clusters--;
+ }
+
+ BUG_ON(!IS_CLUS_EOF(clu.dir));
+}
+
+#endif /* CONFIG_SDFAT_DBG_CAREFUL */
+
+#ifdef CONFIG_SDFAT_DBG_MSG
+void __sdfat_dmsg(int level, const char *fmt, ...)
+{
+#ifdef CONFIG_SDFAT_DBG_SHOW_PID
+ struct va_format vaf;
+ va_list args;
+
+ /* should check type */
+ if (level > SDFAT_MSG_LEVEL)
+ return;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ /* fmt already includes KERN_ pacility level */
+ printk("[%u] %pV", current->pid, &vaf);
+ va_end(args);
+#else
+ va_list args;
+
+ /* should check type */
+ if (level > SDFAT_MSG_LEVEL)
+ return;
+
+ va_start(args, fmt);
+ /* fmt already includes KERN_ pacility level */
+ vprintk(fmt, args);
+ va_end(args);
+#endif
+}
+#endif
+
diff --git a/fs/sdfat/mpage.c b/fs/sdfat/mpage.c
new file mode 100644
index 000000000000..f550fbb2204a
--- /dev/null
+++ b/fs/sdfat/mpage.c
@@ -0,0 +1,635 @@
+/*
+ * fs/mpage.c
+ *
+ * Copyright (C) 2002, Linus Torvalds.
+ *
+ * Contains functions related to preparing and submitting BIOs which contain
+ * multiple pagecache pages.
+ *
+ * 15May2002 Andrew Morton
+ * Initial version
+ * 27Jun2002 axboe@suse.de
+ * use bio_add_page() to build bio's just the right size
+ */
+
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : core.c */
+/* PURPOSE : sdFAT glue layer for supporting VFS */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/buffer_head.h>
+#include <linux/exportfs.h>
+#include <linux/mount.h>
+#include <linux/vfs.h>
+#include <linux/parser.h>
+#include <linux/uio.h>
+#include <linux/writeback.h>
+#include <linux/log2.h>
+#include <linux/hash.h>
+#include <linux/backing-dev.h>
+#include <linux/sched.h>
+#include <linux/fs_struct.h>
+#include <linux/namei.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/swap.h> /* for mark_page_accessed() */
+#include <asm/current.h>
+#include <asm/unaligned.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
+#include <linux/aio.h>
+#endif
+
+#include "sdfat.h"
+
+#ifdef CONFIG_SDFAT_ALIGNED_MPAGE_WRITE
+
+/*************************************************************************
+ * INNER FUNCTIONS FOR FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
+ *************************************************************************/
+static void __mpage_write_end_io(struct bio *bio, int err);
+
+/*************************************************************************
+ * FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
+ *************************************************************************/
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
+ /* EMPTY */
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) */
+static inline void bio_set_dev(struct bio *bio, struct block_device *bdev)
+{
+ bio->bi_bdev = bdev;
+}
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static inline void __sdfat_clean_bdev_aliases(struct block_device *bdev, sector_t block)
+{
+ clean_bdev_aliases(bdev, block, 1);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0) */
+static inline void __sdfat_clean_bdev_aliases(struct block_device *bdev, sector_t block)
+{
+ unmap_underlying_metadata(bdev, block);
+}
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
+static inline void __sdfat_submit_bio_write2(int flags, struct bio *bio)
+{
+ bio_set_op_attrs(bio, REQ_OP_WRITE, flags);
+ submit_bio(bio);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0) */
+static inline void __sdfat_submit_bio_write2(int flags, struct bio *bio)
+{
+ submit_bio(WRITE | flags, bio);
+}
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
+static inline int bio_get_nr_vecs(struct block_device *bdev)
+{
+ return BIO_MAX_PAGES;
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0) */
+ /* EMPTY */
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
+static inline sector_t __sdfat_bio_sector(struct bio *bio)
+{
+ return bio->bi_iter.bi_sector;
+}
+
+static inline void __sdfat_set_bio_sector(struct bio *bio, sector_t sector)
+{
+ bio->bi_iter.bi_sector = sector;
+}
+
+static inline unsigned int __sdfat_bio_size(struct bio *bio)
+{
+ return bio->bi_iter.bi_size;
+}
+
+static inline void __sdfat_set_bio_size(struct bio *bio, unsigned int size)
+{
+ bio->bi_iter.bi_size = size;
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) */
+static inline sector_t __sdfat_bio_sector(struct bio *bio)
+{
+ return bio->bi_sector;
+}
+
+static inline void __sdfat_set_bio_sector(struct bio *bio, sector_t sector)
+{
+ bio->bi_sector = sector;
+}
+
+static inline unsigned int __sdfat_bio_size(struct bio *bio)
+{
+ return bio->bi_size;
+}
+
+static inline void __sdfat_set_bio_size(struct bio *bio, unsigned int size)
+{
+ bio->bi_size = size;
+}
+#endif
+
+/*************************************************************************
+ * MORE FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
+ *************************************************************************/
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0)
+static void mpage_write_end_io(struct bio *bio)
+{
+ __mpage_write_end_io(bio, blk_status_to_errno(bio->bi_status));
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
+static void mpage_write_end_io(struct bio *bio)
+{
+ __mpage_write_end_io(bio, bio->bi_error);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,3,0) */
+static void mpage_write_end_io(struct bio *bio, int err)
+{
+ if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+ err = 0;
+ __mpage_write_end_io(bio, err);
+}
+#endif
+
+/* __check_dfr_on() and __dfr_writepage_end_io() functions
+ * are copied from sdfat.c
+ * Each function should be same perfectly
+ */
+static inline int __check_dfr_on(struct inode *inode, loff_t start, loff_t end, const char *fname)
+{
+#ifdef CONFIG_SDFAT_DFR
+ struct defrag_info *ino_dfr = &(SDFAT_I(inode)->dfr_info);
+
+ if ((atomic_read(&ino_dfr->stat) == DFR_INO_STAT_REQ) &&
+ fsapi_dfr_check_dfr_on(inode, start, end, 0, fname))
+ return 1;
+#endif
+ return 0;
+}
+
+static inline int __dfr_writepage_end_io(struct page *page)
+{
+#ifdef CONFIG_SDFAT_DFR
+ struct defrag_info *ino_dfr = &(SDFAT_I(page->mapping->host)->dfr_info);
+
+ if (atomic_read(&ino_dfr->stat) == DFR_INO_STAT_REQ)
+ fsapi_dfr_writepage_endio(page);
+#endif
+ return 0;
+}
+
+
+static inline unsigned int __calc_size_to_align(struct super_block *sb)
+{
+ struct block_device *bdev = sb->s_bdev;
+ struct gendisk *disk;
+ struct request_queue *queue;
+ struct queue_limits *limit;
+ unsigned int max_sectors;
+ unsigned int aligned = 0;
+
+ disk = bdev->bd_disk;
+ if (!disk)
+ goto out;
+
+ queue = disk->queue;
+ if (!queue)
+ goto out;
+
+ limit = &queue->limits;
+ max_sectors = limit->max_sectors;
+ aligned = 1 << ilog2(max_sectors);
+
+ if (aligned && (max_sectors & (aligned - 1)))
+ aligned = 0;
+out:
+ return aligned;
+}
+
+struct mpage_data {
+ struct bio *bio;
+ sector_t last_block_in_bio;
+ get_block_t *get_block;
+ unsigned int use_writepage;
+ unsigned int size_to_align;
+};
+
+/*
+ * I/O completion handler for multipage BIOs.
+ *
+ * The mpage code never puts partial pages into a BIO (except for end-of-file).
+ * If a page does not map to a contiguous run of blocks then it simply falls
+ * back to block_read_full_page().
+ *
+ * Why is this? If a page's completion depends on a number of different BIOs
+ * which can complete in any order (or at the same time) then determining the
+ * status of that page is hard. See end_buffer_async_read() for the details.
+ * There is no point in duplicating all that complexity.
+ */
+static void __mpage_write_end_io(struct bio *bio, int err)
+{
+ struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+
+ ASSERT(bio_data_dir(bio) == WRITE); /* only write */
+
+ do {
+ struct page *page = bvec->bv_page;
+
+ if (--bvec >= bio->bi_io_vec)
+ prefetchw(&bvec->bv_page->flags);
+ if (err) {
+ SetPageError(page);
+ if (page->mapping)
+ mapping_set_error(page->mapping, err);
+ }
+
+ __dfr_writepage_end_io(page);
+
+ end_page_writeback(page);
+ } while (bvec >= bio->bi_io_vec);
+ bio_put(bio);
+}
+
+static struct bio *mpage_bio_submit_write(int flags, struct bio *bio)
+{
+ bio->bi_end_io = mpage_write_end_io;
+ __sdfat_submit_bio_write2(flags, bio);
+ return NULL;
+}
+
+static struct bio *
+mpage_alloc(struct block_device *bdev,
+ sector_t first_sector, int nr_vecs,
+ gfp_t gfp_flags)
+{
+ struct bio *bio;
+
+ bio = bio_alloc(gfp_flags, nr_vecs);
+
+ if (bio == NULL && (current->flags & PF_MEMALLOC)) {
+ while (!bio && (nr_vecs /= 2))
+ bio = bio_alloc(gfp_flags, nr_vecs);
+ }
+
+ if (bio) {
+ bio_set_dev(bio, bdev);
+ __sdfat_set_bio_sector(bio, first_sector);
+ }
+ return bio;
+}
+
+static int sdfat_mpage_writepage(struct page *page,
+ struct writeback_control *wbc, void *data)
+{
+ struct mpage_data *mpd = data;
+ struct bio *bio = mpd->bio;
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = page->mapping->host;
+ const unsigned int blkbits = inode->i_blkbits;
+ const unsigned int blocks_per_page = PAGE_SIZE >> blkbits;
+ sector_t last_block;
+ sector_t block_in_file;
+ sector_t blocks[MAX_BUF_PER_PAGE];
+ unsigned int page_block;
+ unsigned int first_unmapped = blocks_per_page;
+ struct block_device *bdev = NULL;
+ int boundary = 0;
+ sector_t boundary_block = 0;
+ struct block_device *boundary_bdev = NULL;
+ int length;
+ struct buffer_head map_bh;
+ loff_t i_size = i_size_read(inode);
+ unsigned long end_index = i_size >> PAGE_SHIFT;
+ int ret = 0;
+
+ if (page_has_buffers(page)) {
+ struct buffer_head *head = page_buffers(page);
+ struct buffer_head *bh = head;
+
+ /* If they're all mapped and dirty, do it */
+ page_block = 0;
+ do {
+ BUG_ON(buffer_locked(bh));
+ if (!buffer_mapped(bh)) {
+ /*
+ * unmapped dirty buffers are created by
+ * __set_page_dirty_buffers -> mmapped data
+ */
+ if (buffer_dirty(bh))
+ goto confused;
+ if (first_unmapped == blocks_per_page)
+ first_unmapped = page_block;
+ continue;
+ }
+
+ if (first_unmapped != blocks_per_page)
+ goto confused; /* hole -> non-hole */
+
+ if (!buffer_dirty(bh) || !buffer_uptodate(bh))
+ goto confused;
+
+ /* bh should be mapped if delay is set */
+ if (buffer_delay(bh)) {
+ sector_t blk_in_file =
+ (sector_t)(page->index << (PAGE_SHIFT - blkbits)) + page_block;
+
+ BUG_ON(bh->b_size != (1 << blkbits));
+ if (page->index > end_index) {
+ MMSG("%s(inode:%p) "
+ "over end with delayed buffer"
+ "(page_idx:%u, end_idx:%u)\n",
+ __func__, inode,
+ (u32)page->index,
+ (u32)end_index);
+ goto confused;
+ }
+
+ ret = mpd->get_block(inode, blk_in_file, bh, 1);
+ if (ret) {
+ MMSG("%s(inode:%p) "
+ "failed to getblk(ret:%d)\n",
+ __func__, inode, ret);
+ goto confused;
+ }
+
+ BUG_ON(buffer_delay(bh));
+
+ if (buffer_new(bh)) {
+ clear_buffer_new(bh);
+ __sdfat_clean_bdev_aliases(bh->b_bdev, bh->b_blocknr);
+ }
+ }
+
+ if (page_block) {
+ if (bh->b_blocknr != blocks[page_block-1] + 1) {
+ MMSG("%s(inode:%p) pblk(%d) "
+ "no_seq(prev:%lld, new:%lld)\n",
+ __func__, inode, page_block,
+ (u64)blocks[page_block-1],
+ (u64)bh->b_blocknr);
+ goto confused;
+ }
+ }
+ blocks[page_block++] = bh->b_blocknr;
+ boundary = buffer_boundary(bh);
+ if (boundary) {
+ boundary_block = bh->b_blocknr;
+ boundary_bdev = bh->b_bdev;
+ }
+ bdev = bh->b_bdev;
+ } while ((bh = bh->b_this_page) != head);
+
+ if (first_unmapped)
+ goto page_is_mapped;
+
+ /*
+ * Page has buffers, but they are all unmapped. The page was
+ * created by pagein or read over a hole which was handled by
+ * block_read_full_page(). If this address_space is also
+ * using mpage_readpages then this can rarely happen.
+ */
+ goto confused;
+ }
+
+ /*
+ * The page has no buffers: map it to disk
+ */
+ BUG_ON(!PageUptodate(page));
+ block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
+ last_block = (i_size - 1) >> blkbits;
+ map_bh.b_page = page;
+ for (page_block = 0; page_block < blocks_per_page; ) {
+
+ map_bh.b_state = 0;
+ map_bh.b_size = 1 << blkbits;
+ if (mpd->get_block(inode, block_in_file, &map_bh, 1))
+ goto confused;
+
+ if (buffer_new(&map_bh))
+ __sdfat_clean_bdev_aliases(map_bh.b_bdev, map_bh.b_blocknr);
+ if (buffer_boundary(&map_bh)) {
+ boundary_block = map_bh.b_blocknr;
+ boundary_bdev = map_bh.b_bdev;
+ }
+
+ if (page_block) {
+ if (map_bh.b_blocknr != blocks[page_block-1] + 1)
+ goto confused;
+ }
+ blocks[page_block++] = map_bh.b_blocknr;
+ boundary = buffer_boundary(&map_bh);
+ bdev = map_bh.b_bdev;
+ if (block_in_file == last_block)
+ break;
+ block_in_file++;
+ }
+ BUG_ON(page_block == 0);
+
+ first_unmapped = page_block;
+
+page_is_mapped:
+ if (page->index >= end_index) {
+ /*
+ * The page straddles i_size. It must be zeroed out on each
+ * and every writepage invocation because it may be mmapped.
+ * "A file is mapped in multiples of the page size. For a file
+ * that is not a multiple of the page size, the remaining memory
+ * is zeroed when mapped, and writes to that region are not
+ * written out to the file."
+ */
+ unsigned int offset = i_size & (PAGE_SIZE - 1);
+
+ if (page->index > end_index || !offset) {
+ MMSG("%s(inode:%p) over end "
+ "(page_idx:%u, end_idx:%u off:%u)\n",
+ __func__, inode, (u32)page->index,
+ (u32)end_index, (u32)offset);
+ goto confused;
+ }
+ zero_user_segment(page, offset, PAGE_SIZE);
+ }
+
+ /*
+ * This page will go to BIO. Do we need to send this BIO off first?
+ *
+ * REMARK : added ELSE_IF for ALIGNMENT_MPAGE_WRITE of SDFAT
+ */
+ if (bio) {
+ if (mpd->last_block_in_bio != blocks[0] - 1) {
+ bio = mpage_bio_submit_write(0, bio);
+ } else if (mpd->size_to_align) {
+ unsigned int mask = mpd->size_to_align - 1;
+ sector_t max_end_block =
+ (__sdfat_bio_sector(bio) & ~(mask)) + mask;
+
+ if ((__sdfat_bio_size(bio) != (1 << (mask + 1))) &&
+ (mpd->last_block_in_bio == max_end_block)) {
+ MMSG("%s(inode:%p) alignment mpage_bio_submit"
+ "(start:%u, len:%u aligned:%u)\n",
+ __func__, inode,
+ (unsigned int)__sdfat_bio_sector(bio),
+ (unsigned int)(mpd->last_block_in_bio -
+ __sdfat_bio_sector(bio) + 1),
+ (unsigned int)mpd->size_to_align);
+ bio = mpage_bio_submit_write(REQ_NOMERGE, bio);
+ }
+ }
+ }
+
+alloc_new:
+ if (!bio) {
+ bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
+ bio_get_nr_vecs(bdev), GFP_NOFS|__GFP_HIGH);
+ if (!bio)
+ goto confused;
+ }
+
+ /*
+ * Must try to add the page before marking the buffer clean or
+ * the confused fail path above (OOM) will be very confused when
+ * it finds all bh marked clean (i.e. it will not write anything)
+ */
+ length = first_unmapped << blkbits;
+ if (bio_add_page(bio, page, length, 0) < length) {
+ bio = mpage_bio_submit_write(0, bio);
+ goto alloc_new;
+ }
+
+ /*
+ * OK, we have our BIO, so we can now mark the buffers clean. Make
+ * sure to only clean buffers which we know we'll be writing.
+ */
+ if (page_has_buffers(page)) {
+ struct buffer_head *head = page_buffers(page);
+ struct buffer_head *bh = head;
+ unsigned int buffer_counter = 0;
+
+ do {
+ if (buffer_counter++ == first_unmapped)
+ break;
+ clear_buffer_dirty(bh);
+ bh = bh->b_this_page;
+ } while (bh != head);
+
+ /*
+ * we cannot drop the bh if the page is not uptodate
+ * or a concurrent readpage would fail to serialize with the bh
+ * and it would read from disk before we reach the platter.
+ */
+ if (buffer_heads_over_limit && PageUptodate(page))
+ try_to_free_buffers(page);
+ }
+
+ BUG_ON(PageWriteback(page));
+ set_page_writeback(page);
+
+ /*
+ * FIXME FOR DEFRAGMENTATION : CODE REVIEW IS REQUIRED
+ *
+ * Turn off MAPPED flag in victim's bh if defrag on.
+ * Another write_begin can starts after get_block for defrag victims
+ * called.
+ * In this case, write_begin calls get_block and get original block
+ * number and previous defrag will be canceled.
+ */
+ if (unlikely(__check_dfr_on(inode, (loff_t)(page->index << PAGE_SHIFT),
+ (loff_t)((page->index + 1) << PAGE_SHIFT), __func__))) {
+ struct buffer_head *head = page_buffers(page);
+ struct buffer_head *bh = head;
+
+ do {
+ clear_buffer_mapped(bh);
+ bh = bh->b_this_page;
+ } while (bh != head);
+ }
+
+ unlock_page(page);
+ if (boundary || (first_unmapped != blocks_per_page)) {
+ bio = mpage_bio_submit_write(0, bio);
+ if (boundary_block) {
+ write_boundary_block(boundary_bdev,
+ boundary_block, 1 << blkbits);
+ }
+ } else {
+ mpd->last_block_in_bio = blocks[blocks_per_page - 1];
+ }
+
+ goto out;
+
+confused:
+ if (bio)
+ bio = mpage_bio_submit_write(0, bio);
+
+ if (mpd->use_writepage) {
+ ret = mapping->a_ops->writepage(page, wbc);
+ } else {
+ ret = -EAGAIN;
+ goto out;
+ }
+ /*
+ * The caller has a ref on the inode, so *mapping is stable
+ */
+ mapping_set_error(mapping, ret);
+out:
+ mpd->bio = bio;
+ return ret;
+}
+
+int sdfat_mpage_writepages(struct address_space *mapping,
+ struct writeback_control *wbc, get_block_t *get_block)
+{
+ struct blk_plug plug;
+ int ret;
+ struct mpage_data mpd = {
+ .bio = NULL,
+ .last_block_in_bio = 0,
+ .get_block = get_block,
+ .use_writepage = 1,
+ .size_to_align = __calc_size_to_align(mapping->host->i_sb),
+ };
+
+ BUG_ON(!get_block);
+ blk_start_plug(&plug);
+ ret = write_cache_pages(mapping, wbc, sdfat_mpage_writepage, &mpd);
+ if (mpd.bio)
+ mpage_bio_submit_write(0, mpd.bio);
+ blk_finish_plug(&plug);
+ return ret;
+}
+
+#endif /* CONFIG_SDFAT_ALIGNED_MPAGE_WRITE */
+
diff --git a/fs/sdfat/nls.c b/fs/sdfat/nls.c
new file mode 100644
index 000000000000..b65634454c55
--- /dev/null
+++ b/fs/sdfat/nls.c
@@ -0,0 +1,478 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : nls.c */
+/* PURPOSE : sdFAT NLS Manager */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+#include <linux/string.h>
+#include <linux/nls.h>
+
+#include "sdfat.h"
+#include "core.h"
+
+/*----------------------------------------------------------------------*/
+/* Global Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Local Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+static u16 bad_dos_chars[] = {
+ /* + , ; = [ ] */
+ 0x002B, 0x002C, 0x003B, 0x003D, 0x005B, 0x005D,
+ 0xFF0B, 0xFF0C, 0xFF1B, 0xFF1D, 0xFF3B, 0xFF3D,
+ 0
+};
+
+/*
+ * Allow full-width illegal characters :
+ * "MS windows 7" supports full-width-invalid-name-characters.
+ * So we should check half-width-invalid-name-characters(ASCII) only
+ * for compatibility.
+ *
+ * " * / : < > ? \ |
+ *
+ * patch 1.2.0
+ */
+static u16 bad_uni_chars[] = {
+ 0x0022, 0x002A, 0x002F, 0x003A,
+ 0x003C, 0x003E, 0x003F, 0x005C, 0x007C,
+#if 0 /* allow full-width characters */
+ 0x201C, 0x201D, 0xFF0A, 0xFF0F, 0xFF1A,
+ 0xFF1C, 0xFF1E, 0xFF1F, 0xFF3C, 0xFF5C,
+#endif
+ 0
+};
+
+/*----------------------------------------------------------------------*/
+/* Local Function Declarations */
+/*----------------------------------------------------------------------*/
+static s32 convert_uni_to_ch(struct nls_table *nls, u16 uni, u8 *ch, s32 *lossy);
+static s32 convert_ch_to_uni(struct nls_table *nls, u8 *ch, u16 *uni, s32 *lossy);
+
+static u16 nls_upper(struct super_block *sb, u16 a)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (SDFAT_SB(sb)->options.casesensitive)
+ return a;
+ if ((fsi->vol_utbl)[get_col_index(a)] != NULL)
+ return (fsi->vol_utbl)[get_col_index(a)][get_row_index(a)];
+ else
+ return a;
+}
+/*======================================================================*/
+/* Global Function Definitions */
+/*======================================================================*/
+u16 *nls_wstrchr(u16 *str, u16 wchar)
+{
+ while (*str) {
+ if (*(str++) == wchar)
+ return str;
+ }
+
+ return 0;
+}
+
+s32 nls_cmp_sfn(struct super_block *sb, u8 *a, u8 *b)
+{
+ return strncmp((void *)a, (void *)b, DOS_NAME_LENGTH);
+}
+
+s32 nls_cmp_uniname(struct super_block *sb, u16 *a, u16 *b)
+{
+ s32 i;
+
+ for (i = 0; i < MAX_NAME_LENGTH; i++, a++, b++) {
+ if (nls_upper(sb, *a) != nls_upper(sb, *b))
+ return 1;
+ if (*a == 0x0)
+ return 0;
+ }
+ return 0;
+}
+
+#define CASE_LOWER_BASE (0x08) /* base is lower case */
+#define CASE_LOWER_EXT (0x10) /* extension is lower case */
+
+s32 nls_uni16s_to_sfn(struct super_block *sb, UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname, s32 *p_lossy)
+{
+ s32 i, j, len, lossy = NLS_NAME_NO_LOSSY;
+ u8 buf[MAX_CHARSET_SIZE];
+ u8 lower = 0, upper = 0;
+ u8 *dosname = p_dosname->name;
+ u16 *uniname = p_uniname->name;
+ u16 *p, *last_period;
+ struct nls_table *nls = SDFAT_SB(sb)->nls_disk;
+
+ /* DOSNAME is filled with space */
+ for (i = 0; i < DOS_NAME_LENGTH; i++)
+ *(dosname+i) = ' ';
+
+ /* DOT and DOTDOT are handled by VFS layer */
+
+ /* search for the last embedded period */
+ last_period = NULL;
+ for (p = uniname; *p; p++) {
+ if (*p == (u16) '.')
+ last_period = p;
+ }
+
+ i = 0;
+ while (i < DOS_NAME_LENGTH) {
+ if (i == 8) {
+ if (last_period == NULL)
+ break;
+
+ if (uniname <= last_period) {
+ if (uniname < last_period)
+ lossy |= NLS_NAME_OVERLEN;
+ uniname = last_period + 1;
+ }
+ }
+
+ if (*uniname == (u16) '\0') {
+ break;
+ } else if (*uniname == (u16) ' ') {
+ lossy |= NLS_NAME_LOSSY;
+ } else if (*uniname == (u16) '.') {
+ if (uniname < last_period)
+ lossy |= NLS_NAME_LOSSY;
+ else
+ i = 8;
+ } else if (nls_wstrchr(bad_dos_chars, *uniname)) {
+ lossy |= NLS_NAME_LOSSY;
+ *(dosname+i) = '_';
+ i++;
+ } else {
+ len = convert_uni_to_ch(nls, *uniname, buf, &lossy);
+
+ if (len > 1) {
+ if ((i >= 8) && ((i+len) > DOS_NAME_LENGTH))
+ break;
+
+ if ((i < 8) && ((i+len) > 8)) {
+ i = 8;
+ continue;
+ }
+
+ lower = 0xFF;
+
+ for (j = 0; j < len; j++, i++)
+ *(dosname+i) = *(buf+j);
+ } else { /* len == 1 */
+ if ((*buf >= 'a') && (*buf <= 'z')) {
+ *(dosname+i) = *buf - ('a' - 'A');
+
+ lower |= (i < 8) ?
+ CASE_LOWER_BASE :
+ CASE_LOWER_EXT;
+ } else if ((*buf >= 'A') && (*buf <= 'Z')) {
+ *(dosname+i) = *buf;
+
+ upper |= (i < 8) ?
+ CASE_LOWER_BASE :
+ CASE_LOWER_EXT;
+ } else {
+ *(dosname+i) = *buf;
+ }
+ i++;
+ }
+ }
+
+ uniname++;
+ }
+
+ if (*dosname == 0xE5)
+ *dosname = 0x05;
+ if (*uniname != 0x0)
+ lossy |= NLS_NAME_OVERLEN;
+
+ if (upper & lower)
+ p_dosname->name_case = 0xFF;
+ else
+ p_dosname->name_case = lower;
+
+ if (p_lossy)
+ *p_lossy = lossy;
+ return i;
+}
+
+s32 nls_sfn_to_uni16s(struct super_block *sb, DOS_NAME_T *p_dosname, UNI_NAME_T *p_uniname)
+{
+ s32 i = 0, j, n = 0;
+ u8 buf[MAX_DOSNAME_BUF_SIZE];
+ u8 *dosname = p_dosname->name;
+ u16 *uniname = p_uniname->name;
+ struct nls_table *nls = SDFAT_SB(sb)->nls_disk;
+
+ if (*dosname == 0x05) {
+ *buf = 0xE5;
+ i++;
+ n++;
+ }
+
+ for ( ; i < 8; i++, n++) {
+ if (*(dosname+i) == ' ')
+ break;
+
+ if ((*(dosname+i) >= 'A') && (*(dosname+i) <= 'Z') &&
+ (p_dosname->name_case & CASE_LOWER_BASE))
+ *(buf+n) = *(dosname+i) + ('a' - 'A');
+ else
+ *(buf+n) = *(dosname+i);
+ }
+ if (*(dosname+8) != ' ') {
+ *(buf+n) = '.';
+ n++;
+ }
+
+ for (i = 8; i < DOS_NAME_LENGTH; i++, n++) {
+ if (*(dosname+i) == ' ')
+ break;
+
+ if ((*(dosname+i) >= 'A') && (*(dosname+i) <= 'Z') &&
+ (p_dosname->name_case & CASE_LOWER_EXT))
+ *(buf+n) = *(dosname+i) + ('a' - 'A');
+ else
+ *(buf+n) = *(dosname+i);
+ }
+ *(buf+n) = '\0';
+
+ i = j = 0;
+ while (j < MAX_NAME_LENGTH) {
+ if (*(buf+i) == '\0')
+ break;
+
+ i += convert_ch_to_uni(nls, (buf+i), uniname, NULL);
+
+ uniname++;
+ j++;
+ }
+
+ *uniname = (u16) '\0';
+ return j;
+}
+
+static s32 __nls_utf16s_to_vfsname(struct super_block *sb, UNI_NAME_T *p_uniname, u8 *p_cstring, s32 buflen)
+{
+ s32 len;
+ const u16 *uniname = p_uniname->name;
+
+ /* always len >= 0 */
+ len = utf16s_to_utf8s(uniname, MAX_NAME_LENGTH, UTF16_HOST_ENDIAN,
+ p_cstring, buflen);
+ p_cstring[len] = '\0';
+ return len;
+}
+
+static s32 __nls_vfsname_to_utf16s(struct super_block *sb, const u8 *p_cstring,
+ const s32 len, UNI_NAME_T *p_uniname, s32 *p_lossy)
+{
+ s32 i, unilen, lossy = NLS_NAME_NO_LOSSY;
+ u16 upname[MAX_NAME_LENGTH+1];
+ u16 *uniname = p_uniname->name;
+
+ BUG_ON(!len);
+
+ unilen = utf8s_to_utf16s(p_cstring, len, UTF16_HOST_ENDIAN,
+ (wchar_t *)uniname, MAX_NAME_LENGTH+2);
+ if (unilen < 0) {
+ MMSG("%s: failed to vfsname_to_utf16(err:%d) "
+ "vfsnamelen:%d", __func__, unilen, len);
+ return unilen;
+ }
+
+ if (unilen > MAX_NAME_LENGTH) {
+ MMSG("%s: failed to vfsname_to_utf16(estr:ENAMETOOLONG) "
+ "vfsnamelen:%d, unilen:%d>%d",
+ __func__, len, unilen, MAX_NAME_LENGTH);
+ return -ENAMETOOLONG;
+ }
+
+ p_uniname->name_len = (u8)(unilen & 0xFF);
+
+ for (i = 0; i < unilen; i++) {
+ if ((*uniname < 0x0020) || nls_wstrchr(bad_uni_chars, *uniname))
+ lossy |= NLS_NAME_LOSSY;
+
+ *(upname+i) = nls_upper(sb, *uniname);
+ uniname++;
+ }
+
+ *uniname = (u16)'\0';
+ p_uniname->name_len = unilen;
+ p_uniname->name_hash = calc_chksum_2byte((void *) upname,
+ unilen << 1, 0, CS_DEFAULT);
+
+ if (p_lossy)
+ *p_lossy = lossy;
+
+ return unilen;
+}
+
+static s32 __nls_uni16s_to_vfsname(struct super_block *sb, UNI_NAME_T *p_uniname, u8 *p_cstring, s32 buflen)
+{
+ s32 i, j, len, out_len = 0;
+ u8 buf[MAX_CHARSET_SIZE];
+ const u16 *uniname = p_uniname->name;
+ struct nls_table *nls = SDFAT_SB(sb)->nls_io;
+
+ i = 0;
+ while ((i < MAX_NAME_LENGTH) && (out_len < (buflen-1))) {
+ if (*uniname == (u16)'\0')
+ break;
+
+ len = convert_uni_to_ch(nls, *uniname, buf, NULL);
+
+ if (out_len + len >= buflen)
+ len = (buflen - 1) - out_len;
+
+ out_len += len;
+
+ if (len > 1) {
+ for (j = 0; j < len; j++)
+ *p_cstring++ = (s8) *(buf+j);
+ } else { /* len == 1 */
+ *p_cstring++ = (s8) *buf;
+ }
+
+ uniname++;
+ i++;
+ }
+
+ *p_cstring = '\0';
+ return out_len;
+}
+
+static s32 __nls_vfsname_to_uni16s(struct super_block *sb, const u8 *p_cstring,
+ const s32 len, UNI_NAME_T *p_uniname, s32 *p_lossy)
+{
+ s32 i, unilen, lossy = NLS_NAME_NO_LOSSY;
+ u16 upname[MAX_NAME_LENGTH+1];
+ u16 *uniname = p_uniname->name;
+ struct nls_table *nls = SDFAT_SB(sb)->nls_io;
+
+ BUG_ON(!len);
+
+ i = unilen = 0;
+ while ((unilen < MAX_NAME_LENGTH) && (i < len)) {
+ i += convert_ch_to_uni(nls, (u8 *)(p_cstring+i), uniname, &lossy);
+
+ if ((*uniname < 0x0020) || nls_wstrchr(bad_uni_chars, *uniname))
+ lossy |= NLS_NAME_LOSSY;
+
+ *(upname+unilen) = nls_upper(sb, *uniname);
+
+ uniname++;
+ unilen++;
+ }
+
+ if (*(p_cstring+i) != '\0')
+ lossy |= NLS_NAME_OVERLEN;
+
+ *uniname = (u16)'\0';
+ p_uniname->name_len = unilen;
+ p_uniname->name_hash =
+ calc_chksum_2byte((void *) upname, unilen<<1, 0, CS_DEFAULT);
+
+ if (p_lossy)
+ *p_lossy = lossy;
+
+ return unilen;
+}
+
+s32 nls_uni16s_to_vfsname(struct super_block *sb, UNI_NAME_T *uniname, u8 *p_cstring, s32 buflen)
+{
+ if (SDFAT_SB(sb)->options.utf8)
+ return __nls_utf16s_to_vfsname(sb, uniname, p_cstring, buflen);
+
+ return __nls_uni16s_to_vfsname(sb, uniname, p_cstring, buflen);
+}
+
+s32 nls_vfsname_to_uni16s(struct super_block *sb, const u8 *p_cstring, const s32 len, UNI_NAME_T *uniname, s32 *p_lossy)
+{
+ if (SDFAT_SB(sb)->options.utf8)
+ return __nls_vfsname_to_utf16s(sb, p_cstring, len, uniname, p_lossy);
+ return __nls_vfsname_to_uni16s(sb, p_cstring, len, uniname, p_lossy);
+}
+
+/*======================================================================*/
+/* Local Function Definitions */
+/*======================================================================*/
+
+static s32 convert_ch_to_uni(struct nls_table *nls, u8 *ch, u16 *uni, s32 *lossy)
+{
+ int len;
+
+ *uni = 0x0;
+
+ if (ch[0] < 0x80) {
+ *uni = (u16) ch[0];
+ return 1;
+ }
+
+ len = nls->char2uni(ch, MAX_CHARSET_SIZE, uni);
+ if (len < 0) {
+ /* conversion failed */
+ DMSG("%s: fail to use nls\n", __func__);
+ if (lossy != NULL)
+ *lossy |= NLS_NAME_LOSSY;
+ *uni = (u16) '_';
+ if (!strcmp(nls->charset, "utf8"))
+ return 1;
+ return 2;
+ }
+
+ return len;
+} /* end of convert_ch_to_uni */
+
+static s32 convert_uni_to_ch(struct nls_table *nls, u16 uni, u8 *ch, s32 *lossy)
+{
+ int len;
+
+ ch[0] = 0x0;
+
+ if (uni < 0x0080) {
+ ch[0] = (u8) uni;
+ return 1;
+ }
+
+ len = nls->uni2char(uni, ch, MAX_CHARSET_SIZE);
+ if (len < 0) {
+ /* conversion failed */
+ DMSG("%s: fail to use nls\n", __func__);
+ if (lossy != NULL)
+ *lossy |= NLS_NAME_LOSSY;
+ ch[0] = '_';
+ return 1;
+ }
+
+ return len;
+
+} /* end of convert_uni_to_ch */
+
+/* end of nls.c */
diff --git a/fs/sdfat/sdfat.c b/fs/sdfat/sdfat.c
new file mode 100644
index 000000000000..2d9955cfc993
--- /dev/null
+++ b/fs/sdfat/sdfat.c
@@ -0,0 +1,5255 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : core.c */
+/* PURPOSE : sdFAT glue layer for supporting VFS */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/time.h>
+#include <linux/slab.h>
+#include <linux/seq_file.h>
+#include <linux/pagemap.h>
+#include <linux/mpage.h>
+#include <linux/buffer_head.h>
+#include <linux/exportfs.h>
+#include <linux/mount.h>
+#include <linux/vfs.h>
+#include <linux/parser.h>
+#include <linux/uio.h>
+#include <linux/writeback.h>
+#include <linux/log2.h>
+#include <linux/hash.h>
+#include <linux/backing-dev.h>
+#include <linux/sched.h>
+#include <linux/fs_struct.h>
+#include <linux/namei.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/swap.h> /* for mark_page_accessed() */
+#include <linux/vmalloc.h>
+#include <asm/current.h>
+#include <asm/unaligned.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0)
+#include <linux/iversion.h>
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
+#include <linux/aio.h>
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+#error SDFAT only supports linux kernel version 3.0 or higher
+#endif
+
+#include "sdfat.h"
+#include "version.h"
+
+/* skip iterating emit_dots when dir is empty */
+#define ITER_POS_FILLED_DOTS (2)
+
+/* type index declare at sdfat.h */
+const char *FS_TYPE_STR[] = {
+ "auto",
+ "exfat",
+ "vfat"
+};
+
+static struct kset *sdfat_kset;
+static struct kmem_cache *sdfat_inode_cachep;
+
+
+static int sdfat_default_codepage = CONFIG_SDFAT_DEFAULT_CODEPAGE;
+static char sdfat_default_iocharset[] = CONFIG_SDFAT_DEFAULT_IOCHARSET;
+static const char sdfat_iocharset_with_utf8[] = "iso8859-1";
+
+#ifdef CONFIG_SDFAT_TRACE_SB_LOCK
+static unsigned long __lock_jiffies;
+#endif
+
+static void sdfat_truncate(struct inode *inode, loff_t old_size);
+static int sdfat_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create);
+
+static struct inode *sdfat_iget(struct super_block *sb, loff_t i_pos);
+static struct inode *sdfat_build_inode(struct super_block *sb, const FILE_ID_T *fid, loff_t i_pos);
+static void sdfat_detach(struct inode *inode);
+static void sdfat_attach(struct inode *inode, loff_t i_pos);
+static inline unsigned long sdfat_hash(loff_t i_pos);
+static int __sdfat_write_inode(struct inode *inode, int sync);
+static int sdfat_sync_inode(struct inode *inode);
+static int sdfat_write_inode(struct inode *inode, struct writeback_control *wbc);
+static void sdfat_write_super(struct super_block *sb);
+static void sdfat_write_failed(struct address_space *mapping, loff_t to);
+
+static void sdfat_init_namebuf(DENTRY_NAMEBUF_T *nb);
+static int sdfat_alloc_namebuf(DENTRY_NAMEBUF_T *nb);
+static void sdfat_free_namebuf(DENTRY_NAMEBUF_T *nb);
+
+/*************************************************************************
+ * INNER FUNCTIONS FOR FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
+ *************************************************************************/
+static int __sdfat_getattr(struct inode *inode, struct kstat *stat);
+static void __sdfat_writepage_end_io(struct bio *bio, int err);
+static inline void __lock_super(struct super_block *sb);
+static inline void __unlock_super(struct super_block *sb);
+static int __sdfat_create(struct inode *dir, struct dentry *dentry);
+static int __sdfat_revalidate(struct dentry *dentry);
+static int __sdfat_revalidate_ci(struct dentry *dentry, unsigned int flags);
+static int __sdfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync);
+static struct dentry *__sdfat_lookup(struct inode *dir, struct dentry *dentry);
+static int __sdfat_mkdir(struct inode *dir, struct dentry *dentry);
+static int __sdfat_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry);
+static int __sdfat_show_options(struct seq_file *m, struct super_block *sb);
+static inline ssize_t __sdfat_blkdev_direct_IO(int rw, struct kiocb *iocb,
+ struct inode *inode, void *iov_u, loff_t offset,
+ unsigned long nr_segs);
+static inline ssize_t __sdfat_direct_IO(int rw, struct kiocb *iocb,
+ struct inode *inode, void *iov_u, loff_t offset,
+ loff_t count, unsigned long nr_segs);
+static int __sdfat_d_hash(const struct dentry *dentry, struct qstr *qstr);
+static int __sdfat_d_hashi(const struct dentry *dentry, struct qstr *qstr);
+static int __sdfat_cmp(const struct dentry *dentry, unsigned int len,
+ const char *str, const struct qstr *name);
+static int __sdfat_cmpi(const struct dentry *dentry, unsigned int len,
+ const char *str, const struct qstr *name);
+
+/*************************************************************************
+ * FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
+ *************************************************************************/
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0)
+static inline void inode_set_iversion(struct inode *inode, u64 val)
+{
+ inode->i_version = val;
+}
+static inline u64 inode_peek_iversion(struct inode *inode)
+{
+ return inode->i_version;
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
+ /* EMPTY */
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) */
+static inline void bio_set_dev(struct bio *bio, struct block_device *bdev)
+{
+ bio->bi_bdev = bdev;
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+static int sdfat_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
+{
+ struct inode *inode = d_backing_inode(path->dentry);
+
+ return __sdfat_getattr(inode, stat);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) */
+static int sdfat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+ struct inode *inode = dentry->d_inode;
+
+ return __sdfat_getattr(inode, stat);
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static inline void __sdfat_clean_bdev_aliases(struct block_device *bdev, sector_t block)
+{
+ clean_bdev_aliases(bdev, block, 1);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0) */
+static inline void __sdfat_clean_bdev_aliases(struct block_device *bdev, sector_t block)
+{
+ unmap_underlying_metadata(bdev, block);
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
+static int sdfat_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ /*
+ * The VFS already checks for existence, so for local filesystems
+ * the RENAME_NOREPLACE implementation is equivalent to plain rename.
+ * Don't support any other flags
+ */
+ if (flags & ~RENAME_NOREPLACE)
+ return -EINVAL;
+ return __sdfat_rename(old_dir, old_dentry, new_dir, new_dentry);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) */
+static int sdfat_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ return __sdfat_rename(old_dir, old_dentry, new_dir, new_dentry);
+}
+
+static int setattr_prepare(struct dentry *dentry, struct iattr *attr)
+{
+ struct inode *inode = dentry->d_inode;
+
+ return inode_change_ok(inode, attr);
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
+static inline void __sdfat_submit_bio_write(struct bio *bio)
+{
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ submit_bio(bio);
+}
+
+static inline unsigned int __sdfat_full_name_hash(const struct dentry *dentry, const char *name, unsigned int len)
+{
+ return full_name_hash(dentry, name, len);
+}
+
+static inline unsigned long __sdfat_init_name_hash(const struct dentry *dentry)
+{
+ return init_name_hash(dentry);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0) */
+static inline void __sdfat_submit_bio_write(struct bio *bio)
+{
+ submit_bio(WRITE, bio);
+}
+
+static inline unsigned int __sdfat_full_name_hash(const struct dentry *unused, const char *name, unsigned int len)
+{
+ return full_name_hash(name, len);
+}
+
+static inline unsigned long __sdfat_init_name_hash(const struct dentry *unused)
+{
+ return init_name_hash();
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 21)
+ /* EMPTY */
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 21) */
+static inline void inode_lock(struct inode *inode)
+{
+ mutex_lock(&inode->i_mutex);
+}
+
+static inline void inode_unlock(struct inode *inode)
+{
+ mutex_unlock(&inode->i_mutex);
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
+static inline int sdfat_remount_syncfs(struct super_block *sb)
+{
+ sync_filesystem(sb);
+ return 0;
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0) */
+static inline int sdfat_remount_syncfs(struct super_block *sb)
+{
+ /*
+ * We don`t need to call sync_filesystem(sb),
+ * Because VFS calls it.
+ */
+ return 0;
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
+static inline sector_t __sdfat_bio_sector(struct bio *bio)
+{
+ return bio->bi_iter.bi_sector;
+}
+
+static inline void __sdfat_set_bio_iterate(struct bio *bio, sector_t sector,
+ unsigned int size, unsigned int idx, unsigned int done)
+{
+ struct bvec_iter *iter = &(bio->bi_iter);
+
+ iter->bi_sector = sector;
+ iter->bi_size = size;
+ iter->bi_idx = idx;
+ iter->bi_bvec_done = done;
+}
+
+static void __sdfat_truncate_pagecache(struct inode *inode,
+ loff_t to, loff_t newsize)
+{
+ truncate_pagecache(inode, newsize);
+}
+
+static int sdfat_d_hash(const struct dentry *dentry, struct qstr *qstr)
+{
+ return __sdfat_d_hash(dentry, qstr);
+}
+
+static int sdfat_d_hashi(const struct dentry *dentry, struct qstr *qstr)
+{
+ return __sdfat_d_hashi(dentry, qstr);
+}
+
+//instead of sdfat_readdir
+static int sdfat_iterate(struct file *filp, struct dir_context *ctx)
+{
+ struct inode *inode = filp->f_path.dentry->d_inode;
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+ DIR_ENTRY_T de;
+ DENTRY_NAMEBUF_T *nb = &(de.NameBuf);
+ unsigned long inum;
+ loff_t cpos;
+ int err = 0, fake_offset = 0;
+
+ sdfat_init_namebuf(nb);
+ __lock_super(sb);
+
+ cpos = ctx->pos;
+ if ((fsi->vol_type == EXFAT) || (inode->i_ino == SDFAT_ROOT_INO)) {
+ if (!dir_emit_dots(filp, ctx))
+ goto out;
+ if (ctx->pos == ITER_POS_FILLED_DOTS) {
+ cpos = 0;
+ fake_offset = 1;
+ }
+ }
+ if (cpos & (DENTRY_SIZE - 1)) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ /* name buffer should be allocated before use */
+ err = sdfat_alloc_namebuf(nb);
+ if (err)
+ goto out;
+get_new:
+ SDFAT_I(inode)->fid.size = i_size_read(inode);
+ SDFAT_I(inode)->fid.rwoffset = cpos >> DENTRY_SIZE_BITS;
+
+ if (cpos >= SDFAT_I(inode)->fid.size)
+ goto end_of_dir;
+
+ err = fsapi_readdir(inode, &de);
+ if (err) {
+ // at least we tried to read a sector
+ // move cpos to next sector position (should be aligned)
+ if (err == -EIO) {
+ cpos += 1 << (sb->s_blocksize_bits);
+ cpos &= ~((u32)sb->s_blocksize-1);
+ }
+
+ err = -EIO;
+ goto end_of_dir;
+ }
+
+ cpos = SDFAT_I(inode)->fid.rwoffset << DENTRY_SIZE_BITS;
+
+ if (!nb->lfn[0])
+ goto end_of_dir;
+
+ if (!memcmp(nb->sfn, DOS_CUR_DIR_NAME, DOS_NAME_LENGTH)) {
+ inum = inode->i_ino;
+ } else if (!memcmp(nb->sfn, DOS_PAR_DIR_NAME, DOS_NAME_LENGTH)) {
+ inum = parent_ino(filp->f_path.dentry);
+ } else {
+ loff_t i_pos = ((loff_t) SDFAT_I(inode)->fid.start_clu << 32) |
+ ((SDFAT_I(inode)->fid.rwoffset-1) & 0xffffffff);
+ struct inode *tmp = sdfat_iget(sb, i_pos);
+
+ if (tmp) {
+ inum = tmp->i_ino;
+ iput(tmp);
+ } else {
+ inum = iunique(sb, SDFAT_ROOT_INO);
+ }
+ }
+
+ /* Before calling dir_emit(), sb_lock should be released.
+ * Because page fault can occur in dir_emit() when the size of buffer given
+ * from user is larger than one page size
+ */
+ __unlock_super(sb);
+ if (!dir_emit(ctx, nb->lfn, strlen(nb->lfn), inum,
+ (de.Attr & ATTR_SUBDIR) ? DT_DIR : DT_REG))
+ goto out_unlocked;
+ __lock_super(sb);
+
+ ctx->pos = cpos;
+ goto get_new;
+
+end_of_dir:
+ if (!cpos && fake_offset)
+ cpos = ITER_POS_FILLED_DOTS;
+ ctx->pos = cpos;
+out:
+ __unlock_super(sb);
+out_unlocked:
+ /*
+ * To improve performance, free namebuf after unlock sb_lock.
+ * If namebuf is not allocated, this function do nothing
+ */
+ sdfat_free_namebuf(nb);
+ return err;
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) */
+static inline sector_t __sdfat_bio_sector(struct bio *bio)
+{
+ return bio->bi_sector;
+}
+
+static inline void __sdfat_set_bio_iterate(struct bio *bio, sector_t sector,
+ unsigned int size, unsigned int idx, unsigned int done)
+{
+ bio->bi_sector = sector;
+ bio->bi_idx = idx;
+ bio->bi_size = size; //PAGE_SIZE;
+}
+
+static void __sdfat_truncate_pagecache(struct inode *inode,
+ loff_t to, loff_t newsize)
+{
+ truncate_pagecache(inode, to, newsize);
+}
+
+static int sdfat_d_hash(const struct dentry *dentry,
+ const struct inode *inode, struct qstr *qstr)
+{
+ return __sdfat_d_hash(dentry, qstr);
+}
+
+static int sdfat_d_hashi(const struct dentry *dentry,
+ const struct inode *inode, struct qstr *qstr)
+{
+ return __sdfat_d_hashi(dentry, qstr);
+}
+
+static int sdfat_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+ struct inode *inode = filp->f_path.dentry->d_inode;
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+ DIR_ENTRY_T de;
+ DENTRY_NAMEBUF_T *nb = &(de.NameBuf);
+ unsigned long inum;
+ loff_t cpos;
+ int err = 0, fake_offset = 0;
+
+ sdfat_init_namebuf(nb);
+ __lock_super(sb);
+
+ cpos = filp->f_pos;
+ /* Fake . and .. for the root directory. */
+ if ((fsi->vol_type == EXFAT) || (inode->i_ino == SDFAT_ROOT_INO)) {
+ while (cpos < ITER_POS_FILLED_DOTS) {
+ if (inode->i_ino == SDFAT_ROOT_INO)
+ inum = SDFAT_ROOT_INO;
+ else if (cpos == 0)
+ inum = inode->i_ino;
+ else /* (cpos == 1) */
+ inum = parent_ino(filp->f_path.dentry);
+
+ if (filldir(dirent, "..", cpos+1, cpos, inum, DT_DIR) < 0)
+ goto out;
+ cpos++;
+ filp->f_pos++;
+ }
+ if (cpos == ITER_POS_FILLED_DOTS) {
+ cpos = 0;
+ fake_offset = 1;
+ }
+ }
+ if (cpos & (DENTRY_SIZE - 1)) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ /* name buffer should be allocated before use */
+ err = sdfat_alloc_namebuf(nb);
+ if (err)
+ goto out;
+get_new:
+ SDFAT_I(inode)->fid.size = i_size_read(inode);
+ SDFAT_I(inode)->fid.rwoffset = cpos >> DENTRY_SIZE_BITS;
+
+ if (cpos >= SDFAT_I(inode)->fid.size)
+ goto end_of_dir;
+
+ err = fsapi_readdir(inode, &de);
+ if (err) {
+ // at least we tried to read a sector
+ // move cpos to next sector position (should be aligned)
+ if (err == -EIO) {
+ cpos += 1 << (sb->s_blocksize_bits);
+ cpos &= ~((u32)sb->s_blocksize-1);
+ }
+
+ err = -EIO;
+ goto end_of_dir;
+ }
+
+ cpos = SDFAT_I(inode)->fid.rwoffset << DENTRY_SIZE_BITS;
+
+ if (!nb->lfn[0])
+ goto end_of_dir;
+
+ if (!memcmp(nb->sfn, DOS_CUR_DIR_NAME, DOS_NAME_LENGTH)) {
+ inum = inode->i_ino;
+ } else if (!memcmp(nb->sfn, DOS_PAR_DIR_NAME, DOS_NAME_LENGTH)) {
+ inum = parent_ino(filp->f_path.dentry);
+ } else {
+ loff_t i_pos = ((loff_t) SDFAT_I(inode)->fid.start_clu << 32) |
+ ((SDFAT_I(inode)->fid.rwoffset-1) & 0xffffffff);
+ struct inode *tmp = sdfat_iget(sb, i_pos);
+
+ if (tmp) {
+ inum = tmp->i_ino;
+ iput(tmp);
+ } else {
+ inum = iunique(sb, SDFAT_ROOT_INO);
+ }
+ }
+
+ /* Before calling dir_emit(), sb_lock should be released.
+ * Because page fault can occur in dir_emit() when the size of buffer given
+ * from user is larger than one page size
+ */
+ __unlock_super(sb);
+ if (filldir(dirent, nb->lfn, strlen(nb->lfn), cpos, inum,
+ (de.Attr & ATTR_SUBDIR) ? DT_DIR : DT_REG) < 0)
+ goto out_unlocked;
+ __lock_super(sb);
+
+ filp->f_pos = cpos;
+ goto get_new;
+
+end_of_dir:
+ if (!cpos && fake_offset)
+ cpos = ITER_POS_FILLED_DOTS;
+ filp->f_pos = cpos;
+out:
+ __unlock_super(sb);
+out_unlocked:
+ /*
+ * To improve performance, free namebuf after unlock sb_lock.
+ * If namebuf is not allocated, this function do nothing
+ */
+ sdfat_free_namebuf(nb);
+ return err;
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
+ /* EMPTY */
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) */
+static inline struct inode *file_inode(const struct file *f)
+{
+ return f->f_dentry->d_inode;
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+static inline int __is_sb_dirty(struct super_block *sb)
+{
+ return SDFAT_SB(sb)->s_dirt;
+}
+
+static inline void __set_sb_clean(struct super_block *sb)
+{
+ SDFAT_SB(sb)->s_dirt = 0;
+}
+
+/* Workqueue wrapper for sdfat_write_super () */
+static void __write_super_delayed(struct work_struct *work)
+{
+ struct sdfat_sb_info *sbi;
+ struct super_block *sb;
+
+ sbi = container_of(work, struct sdfat_sb_info, write_super_work.work);
+ sb = sbi->host_sb;
+
+ /* XXX: Is this needed? */
+ if (!sb || !down_read_trylock(&sb->s_umount)) {
+ DMSG("%s: skip delayed work(write_super).\n", __func__);
+ return;
+ }
+
+ DMSG("%s: do delayed_work(write_super).\n", __func__);
+
+ spin_lock(&sbi->work_lock);
+ sbi->write_super_queued = 0;
+ spin_unlock(&sbi->work_lock);
+
+ sdfat_write_super(sb);
+
+ up_read(&sb->s_umount);
+}
+
+static void setup_sdfat_sync_super_wq(struct super_block *sb)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ mutex_init(&sbi->s_lock);
+ spin_lock_init(&sbi->work_lock);
+ INIT_DELAYED_WORK(&sbi->write_super_work, __write_super_delayed);
+ sbi->host_sb = sb;
+}
+
+static inline bool __cancel_delayed_work_sync(struct sdfat_sb_info *sbi)
+{
+ return cancel_delayed_work_sync(&sbi->write_super_work);
+}
+
+static inline void lock_super(struct super_block *sb)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ mutex_lock(&sbi->s_lock);
+}
+
+static inline void unlock_super(struct super_block *sb)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ mutex_unlock(&sbi->s_lock);
+}
+
+static int sdfat_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ return __sdfat_revalidate(dentry);
+}
+
+static int sdfat_revalidate_ci(struct dentry *dentry, unsigned int flags)
+{
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ return __sdfat_revalidate_ci(dentry, flags);
+}
+
+static struct inode *sdfat_iget(struct super_block *sb, loff_t i_pos)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct sdfat_inode_info *info;
+ struct hlist_head *head = sbi->inode_hashtable + sdfat_hash(i_pos);
+ struct inode *inode = NULL;
+
+ spin_lock(&sbi->inode_hash_lock);
+ hlist_for_each_entry(info, head, i_hash_fat) {
+ BUG_ON(info->vfs_inode.i_sb != sb);
+
+ if (i_pos != info->i_pos)
+ continue;
+ inode = igrab(&info->vfs_inode);
+ if (inode)
+ break;
+ }
+ spin_unlock(&sbi->inode_hash_lock);
+ return inode;
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0) */
+static inline int __is_sb_dirty(struct super_block *sb)
+{
+ return sb->s_dirt;
+}
+
+static inline void __set_sb_clean(struct super_block *sb)
+{
+ sb->s_dirt = 0;
+}
+
+static void setup_sdfat_sync_super_wq(struct super_block *sb)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ sbi->host_sb = sb;
+}
+
+static inline bool __cancel_delayed_work_sync(struct sdfat_sb_info *sbi)
+{
+ /* DO NOTHING */
+ return 0;
+}
+
+static inline void clear_inode(struct inode *inode)
+{
+ end_writeback(inode);
+}
+
+static int sdfat_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+ if (nd && nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ return __sdfat_revalidate(dentry);
+}
+
+static int sdfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
+{
+ if (nd && nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ return __sdfat_revalidate_ci(dentry, nd ? nd->flags : 0);
+
+}
+
+static struct inode *sdfat_iget(struct super_block *sb, loff_t i_pos)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct sdfat_inode_info *info;
+ struct hlist_node *node;
+ struct hlist_head *head = sbi->inode_hashtable + sdfat_hash(i_pos);
+ struct inode *inode = NULL;
+
+ spin_lock(&sbi->inode_hash_lock);
+ hlist_for_each_entry(info, node, head, i_hash_fat) {
+ BUG_ON(info->vfs_inode.i_sb != sb);
+
+ if (i_pos != info->i_pos)
+ continue;
+ inode = igrab(&info->vfs_inode);
+ if (inode)
+ break;
+ }
+ spin_unlock(&sbi->inode_hash_lock);
+ return inode;
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
+static struct dentry *sdfat_lookup(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
+{
+ return __sdfat_lookup(dir, dentry);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 6, 0) */
+static struct dentry *sdfat_lookup(struct inode *dir, struct dentry *dentry,
+ struct nameidata *nd)
+{
+ return __sdfat_lookup(dir, dentry);
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
+ /* NOTHING NOW */
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) */
+#define GLOBAL_ROOT_UID (0)
+#define GLOBAL_ROOT_GID (0)
+
+static inline bool uid_eq(uid_t left, uid_t right)
+{
+ return left == right;
+}
+
+static inline bool gid_eq(gid_t left, gid_t right)
+{
+ return left == right;
+}
+
+static inline uid_t from_kuid_munged(struct user_namespace *to, uid_t kuid)
+{
+ return kuid;
+}
+
+static inline gid_t from_kgid_munged(struct user_namespace *to, gid_t kgid)
+{
+ return kgid;
+}
+
+static inline uid_t make_kuid(struct user_namespace *from, uid_t uid)
+{
+ return uid;
+}
+
+static inline gid_t make_kgid(struct user_namespace *from, gid_t gid)
+{
+ return gid;
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+static struct dentry *__d_make_root(struct inode *root_inode)
+{
+ return d_make_root(root_inode);
+}
+
+static void __sdfat_do_truncate(struct inode *inode, loff_t old, loff_t new)
+{
+ down_write(&SDFAT_I(inode)->truncate_lock);
+ truncate_setsize(inode, new);
+ sdfat_truncate(inode, old);
+ up_write(&SDFAT_I(inode)->truncate_lock);
+}
+
+static sector_t sdfat_aop_bmap(struct address_space *mapping, sector_t block)
+{
+ sector_t blocknr;
+
+ /* sdfat_get_cluster() assumes the requested blocknr isn't truncated. */
+ down_read(&SDFAT_I(mapping->host)->truncate_lock);
+ blocknr = generic_block_bmap(mapping, block, sdfat_get_block);
+ up_read(&SDFAT_I(mapping->host)->truncate_lock);
+ return blocknr;
+}
+
+static int sdfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ return __sdfat_mkdir(dir, dentry);
+}
+
+static int sdfat_show_options(struct seq_file *m, struct dentry *root)
+{
+ return __sdfat_show_options(m, root->d_sb);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0) */
+static inline void set_nlink(struct inode *inode, unsigned int nlink)
+{
+ inode->i_nlink = nlink;
+}
+
+static struct dentry *__d_make_root(struct inode *root_inode)
+{
+ return d_alloc_root(root_inode);
+}
+
+static void __sdfat_do_truncate(struct inode *inode, loff_t old, loff_t new)
+{
+ truncate_setsize(inode, new);
+ sdfat_truncate(inode, old);
+}
+
+static sector_t sdfat_aop_bmap(struct address_space *mapping, sector_t block)
+{
+ sector_t blocknr;
+
+ /* sdfat_get_cluster() assumes the requested blocknr isn't truncated. */
+ down_read(&mapping->host->i_alloc_sem);
+ blocknr = generic_block_bmap(mapping, block, sdfat_get_block);
+ up_read(&mapping->host->i_alloc_sem);
+ return blocknr;
+}
+
+static int sdfat_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+ return __sdfat_mkdir(dir, dentry);
+}
+
+static int sdfat_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+ return __sdfat_show_options(m, mnt->mnt_sb);
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+#define __sdfat_generic_file_fsync(filp, start, end, datasync) \
+ generic_file_fsync(filp, start, end, datasync)
+
+static int sdfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
+{
+ return __sdfat_file_fsync(filp, start, end, datasync);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) */
+#define __sdfat_generic_file_fsync(filp, start, end, datasync) \
+ generic_file_fsync(filp, datasync)
+static int sdfat_file_fsync(struct file *filp, int datasync)
+{
+ return __sdfat_file_fsync(filp, 0, 0, datasync);
+}
+#endif
+
+/*************************************************************************
+ * MORE FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
+ *************************************************************************/
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0)
+#define CURRENT_TIME_SEC timespec64_trunc(current_kernel_time64(), NSEC_PER_SEC)
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0)
+#define CURRENT_TIME_SEC timespec_trunc(current_kernel_time(), NSEC_PER_SEC)
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) */
+ /* EMPTY */
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0)
+static void sdfat_writepage_end_io(struct bio *bio)
+{
+ __sdfat_writepage_end_io(bio, blk_status_to_errno(bio->bi_status));
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
+static void sdfat_writepage_end_io(struct bio *bio)
+{
+ __sdfat_writepage_end_io(bio, bio->bi_error);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) */
+static void sdfat_writepage_end_io(struct bio *bio, int err)
+{
+ if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+ err = 0;
+ __sdfat_writepage_end_io(bio, err);
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
+static int sdfat_cmp(const struct dentry *dentry,
+ unsigned int len, const char *str, const struct qstr *name)
+{
+ return __sdfat_cmp(dentry, len, str, name);
+}
+
+static int sdfat_cmpi(const struct dentry *dentry,
+ unsigned int len, const char *str, const struct qstr *name)
+{
+ return __sdfat_cmpi(dentry, len, str, name);
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
+static int sdfat_cmp(const struct dentry *parent, const struct dentry *dentry,
+ unsigned int len, const char *str, const struct qstr *name)
+{
+ return __sdfat_cmp(dentry, len, str, name);
+}
+
+static int sdfat_cmpi(const struct dentry *parent, const struct dentry *dentry,
+ unsigned int len, const char *str, const struct qstr *name)
+{
+ return __sdfat_cmpi(dentry, len, str, name);
+}
+#else
+static int sdfat_cmp(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
+{
+ return __sdfat_cmp(dentry, len, str, name);
+}
+
+static int sdfat_cmpi(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
+{
+ return __sdfat_cmpi(dentry, len, str, name);
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+static ssize_t sdfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ size_t count = iov_iter_count(iter);
+ int rw = iov_iter_rw(iter);
+ loff_t offset = iocb->ki_pos;
+
+ return __sdfat_direct_IO(rw, iocb, inode,
+ (void *)iter, offset, count, 0 /* UNUSED */);
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
+static ssize_t sdfat_direct_IO(struct kiocb *iocb,
+ struct iov_iter *iter,
+ loff_t offset)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ size_t count = iov_iter_count(iter);
+ int rw = iov_iter_rw(iter);
+
+ return __sdfat_direct_IO(rw, iocb, inode,
+ (void *)iter, offset, count, 0 /* UNUSED */);
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
+static ssize_t sdfat_direct_IO(int rw, struct kiocb *iocb,
+ struct iov_iter *iter,
+ loff_t offset)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ size_t count = iov_iter_count(iter);
+
+ return __sdfat_direct_IO(rw, iocb, inode,
+ (void *)iter, offset, count, 0 /* UNUSED */);
+}
+#else
+static ssize_t sdfat_direct_IO(int rw, struct kiocb *iocb,
+ const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ size_t count = iov_length(iov, nr_segs);
+
+ return __sdfat_direct_IO(rw, iocb, inode,
+ (void *)iov, offset, count, nr_segs);
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+static inline ssize_t __sdfat_blkdev_direct_IO(int unused, struct kiocb *iocb,
+ struct inode *inode, void *iov_u, loff_t unused_1,
+ unsigned long nr_segs)
+{
+ struct iov_iter *iter = (struct iov_iter *)iov_u;
+
+ return blockdev_direct_IO(iocb, inode, iter, sdfat_get_block);
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
+static inline ssize_t __sdfat_blkdev_direct_IO(int unused, struct kiocb *iocb,
+ struct inode *inode, void *iov_u, loff_t offset,
+ unsigned long nr_segs)
+{
+ struct iov_iter *iter = (struct iov_iter *)iov_u;
+
+ return blockdev_direct_IO(iocb, inode, iter, offset, sdfat_get_block);
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
+static inline ssize_t __sdfat_blkdev_direct_IO(int rw, struct kiocb *iocb,
+ struct inode *inode, void *iov_u, loff_t offset,
+ unsigned long nr_segs)
+{
+ struct iov_iter *iter = (struct iov_iter *)iov_u;
+
+ return blockdev_direct_IO(rw, iocb, inode, iter,
+ offset, sdfat_get_block);
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+static inline ssize_t __sdfat_blkdev_direct_IO(int rw, struct kiocb *iocb,
+ struct inode *inode, void *iov_u, loff_t offset,
+ unsigned long nr_segs)
+{
+ const struct iovec *iov = (const struct iovec *)iov_u;
+
+ return blockdev_direct_IO(rw, iocb, inode, iov,
+ offset, nr_segs, sdfat_get_block);
+}
+#else
+static inline ssize_t __sdfat_blkdev_direct_IO(int rw, struct kiocb *iocb,
+ struct inode *inode, void *iov_u, loff_t offset,
+ unsigned long nr_segs)
+{
+ const struct iovec *iov = (const struct iovec *)iov_u;
+
+ return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+ offset, nr_segs, sdfat_get_block, NULL);
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)
+static const char *sdfat_follow_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done)
+{
+ struct sdfat_inode_info *ei = SDFAT_I(inode);
+
+ return (char *)(ei->target);
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)
+static const char *sdfat_follow_link(struct dentry *dentry, void **cookie)
+{
+ struct sdfat_inode_info *ei = SDFAT_I(dentry->d_inode);
+
+ return *cookie = (char *)(ei->target);
+}
+#else
+static void *sdfat_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ struct sdfat_inode_info *ei = SDFAT_I(dentry->d_inode);
+
+ nd_set_link(nd, (char *)(ei->target));
+ return NULL;
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
+static int sdfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+ bool excl)
+{
+ return __sdfat_create(dir, dentry);
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+static int sdfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+ struct nameidata *nd)
+{
+ return __sdfat_create(dir, dentry);
+}
+#else
+static int sdfat_create(struct inode *dir, struct dentry *dentry, int mode,
+ struct nameidata *nd)
+{
+ return __sdfat_create(dir, dentry);
+}
+#endif
+
+
+/*************************************************************************
+ * WRAP FUNCTIONS FOR DEBUGGING
+ *************************************************************************/
+#ifdef CONFIG_SDFAT_TRACE_SB_LOCK
+static inline void __lock_super(struct super_block *sb)
+{
+ lock_super(sb);
+ __lock_jiffies = jiffies;
+}
+
+static inline void __unlock_super(struct super_block *sb)
+{
+ int time = ((jiffies - __lock_jiffies) * 1000 / HZ);
+ /* FIXME : error message should be modified */
+ if (time > 10)
+ EMSG("lock_super in %s (%d ms)\n", __func__, time);
+
+ unlock_super(sb);
+}
+#else /* CONFIG_SDFAT_TRACE_SB_LOCK */
+static inline void __lock_super(struct super_block *sb)
+{
+ lock_super(sb);
+}
+
+static inline void __unlock_super(struct super_block *sb)
+{
+ unlock_super(sb);
+}
+#endif /* CONFIG_SDFAT_TRACE_SB_LOCK */
+
+/*************************************************************************
+ * NORMAL FUNCTIONS
+ *************************************************************************/
+static inline loff_t sdfat_make_i_pos(FILE_ID_T *fid)
+{
+ return ((loff_t) fid->dir.dir << 32) | (fid->entry & 0xffffffff);
+}
+
+/*======================================================================*/
+/* Directory Entry Name Buffer Operations */
+/*======================================================================*/
+static void sdfat_init_namebuf(DENTRY_NAMEBUF_T *nb)
+{
+ nb->lfn = NULL;
+ nb->sfn = NULL;
+ nb->lfnbuf_len = 0;
+ nb->sfnbuf_len = 0;
+}
+
+static int sdfat_alloc_namebuf(DENTRY_NAMEBUF_T *nb)
+{
+ nb->lfn = __getname();
+ if (!nb->lfn)
+ return -ENOMEM;
+ nb->sfn = nb->lfn + MAX_VFSNAME_BUF_SIZE;
+ nb->lfnbuf_len = MAX_VFSNAME_BUF_SIZE;
+ nb->sfnbuf_len = MAX_VFSNAME_BUF_SIZE;
+ return 0;
+}
+
+static void sdfat_free_namebuf(DENTRY_NAMEBUF_T *nb)
+{
+ if (!nb->lfn)
+ return;
+
+ __putname(nb->lfn);
+ sdfat_init_namebuf(nb);
+}
+
+/*======================================================================*/
+/* Directory Entry Operations */
+/*======================================================================*/
+#define SDFAT_DSTATE_LOCKED (void *)(0xCAFE2016)
+#define SDFAT_DSTATE_UNLOCKED (void *)(0x00000000)
+
+static inline void __lock_d_revalidate(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ dentry->d_fsdata = SDFAT_DSTATE_LOCKED;
+ spin_unlock(&dentry->d_lock);
+}
+
+static inline void __unlock_d_revalidate(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ dentry->d_fsdata = SDFAT_DSTATE_UNLOCKED;
+ spin_unlock(&dentry->d_lock);
+}
+
+/* __check_dstate_locked requires dentry->d_lock */
+static inline int __check_dstate_locked(struct dentry *dentry)
+{
+ if (dentry->d_fsdata == SDFAT_DSTATE_LOCKED)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * If new entry was created in the parent, it could create the 8.3
+ * alias (the shortname of logname). So, the parent may have the
+ * negative-dentry which matches the created 8.3 alias.
+ *
+ * If it happened, the negative dentry isn't actually negative
+ * anymore. So, drop it.
+ */
+static int __sdfat_revalidate_common(struct dentry *dentry)
+{
+ int ret = 1;
+
+ spin_lock(&dentry->d_lock);
+ if ((!dentry->d_inode) && (!__check_dstate_locked(dentry) &&
+ (dentry->d_time !=
+ (unsigned long)inode_peek_iversion(dentry->d_parent->d_inode)))) {
+ ret = 0;
+ }
+ spin_unlock(&dentry->d_lock);
+ return ret;
+}
+
+static int __sdfat_revalidate(struct dentry *dentry)
+{
+ /* This is not negative dentry. Always valid. */
+ if (dentry->d_inode)
+ return 1;
+ return __sdfat_revalidate_common(dentry);
+}
+
+static int __sdfat_revalidate_ci(struct dentry *dentry, unsigned int flags)
+{
+ /*
+ * This is not negative dentry. Always valid.
+ *
+ * Note, rename() to existing directory entry will have ->d_inode,
+ * and will use existing name which isn't specified name by user.
+ *
+ * We may be able to drop this positive dentry here. But dropping
+ * positive dentry isn't good idea. So it's unsupported like
+ * rename("filename", "FILENAME") for now.
+ */
+ if (dentry->d_inode)
+ return 1;
+#if 0 /* Blocked below code for lookup_one_len() called by stackable FS */
+ /*
+ * This may be nfsd (or something), anyway, we can't see the
+ * intent of this. So, since this can be for creation, drop it.
+ */
+ if (!flags)
+ return 0;
+#endif
+ /*
+ * Drop the negative dentry, in order to make sure to use the
+ * case sensitive name which is specified by user if this is
+ * for creation.
+ */
+ if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
+ return 0;
+ return __sdfat_revalidate_common(dentry);
+}
+
+
+/* returns the length of a struct qstr, ignoring trailing dots */
+static unsigned int __sdfat_striptail_len(unsigned int len, const char *name)
+{
+ while (len && name[len - 1] == '.')
+ len--;
+ return len;
+}
+
+static unsigned int sdfat_striptail_len(const struct qstr *qstr)
+{
+ return __sdfat_striptail_len(qstr->len, qstr->name);
+}
+
+/*
+ * Compute the hash for the sdfat name corresponding to the dentry.
+ * Note: if the name is invalid, we leave the hash code unchanged so
+ * that the existing dentry can be used. The sdfat fs routines will
+ * return ENOENT or EINVAL as appropriate.
+ */
+static int __sdfat_d_hash(const struct dentry *dentry, struct qstr *qstr)
+{
+ unsigned int len = sdfat_striptail_len(qstr);
+
+ qstr->hash = __sdfat_full_name_hash(dentry, qstr->name, len);
+ return 0;
+}
+
+/*
+ * Compute the hash for the sdfat name corresponding to the dentry.
+ * Note: if the name is invalid, we leave the hash code unchanged so
+ * that the existing dentry can be used. The sdfat fs routines will
+ * return ENOENT or EINVAL as appropriate.
+ */
+static int __sdfat_d_hashi(const struct dentry *dentry, struct qstr *qstr)
+{
+ struct nls_table *t = SDFAT_SB(dentry->d_sb)->nls_io;
+ const unsigned char *name;
+ unsigned int len;
+ unsigned long hash;
+
+ name = qstr->name;
+ len = sdfat_striptail_len(qstr);
+
+ hash = __sdfat_init_name_hash(dentry);
+ while (len--)
+ hash = partial_name_hash(nls_tolower(t, *name++), hash);
+ qstr->hash = end_name_hash(hash);
+
+ return 0;
+}
+
+/*
+ * Case sensitive compare of two sdfat names.
+ */
+static int __sdfat_cmp(const struct dentry *dentry, unsigned int len,
+ const char *str, const struct qstr *name)
+{
+ unsigned int alen, blen;
+
+ /* A filename cannot end in '.' or we treat it like it has none */
+ alen = sdfat_striptail_len(name);
+ blen = __sdfat_striptail_len(len, str);
+ if (alen == blen) {
+ if (strncmp(name->name, str, alen) == 0)
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * Case insensitive compare of two sdfat names.
+ */
+static int __sdfat_cmpi(const struct dentry *dentry, unsigned int len,
+ const char *str, const struct qstr *name)
+{
+ struct nls_table *t = SDFAT_SB(dentry->d_sb)->nls_io;
+ unsigned int alen, blen;
+
+ /* A filename cannot end in '.' or we treat it like it has none */
+ alen = sdfat_striptail_len(name);
+ blen = __sdfat_striptail_len(len, str);
+ if (alen == blen) {
+ if (nls_strnicmp(t, name->name, str, alen) == 0)
+ return 0;
+ }
+ return 1;
+}
+
+static const struct dentry_operations sdfat_dentry_ops = {
+ .d_revalidate = sdfat_revalidate,
+ .d_hash = sdfat_d_hash,
+ .d_compare = sdfat_cmp,
+};
+
+static const struct dentry_operations sdfat_ci_dentry_ops = {
+ .d_revalidate = sdfat_revalidate_ci,
+ .d_hash = sdfat_d_hashi,
+ .d_compare = sdfat_cmpi,
+};
+
+#ifdef CONFIG_SDFAT_DFR
+/*----------------------------------------------------------------------*/
+/* Defragmentation related */
+/*----------------------------------------------------------------------*/
+/**
+ * @fn defrag_cleanup_reqs
+ * @brief clean-up defrag info depending on error flag
+ * @return void
+ * @param sb super block
+ * @param error error flag
+ */
+static void defrag_cleanup_reqs(INOUT struct super_block *sb, IN int error)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct defrag_info *sb_dfr = &(sbi->dfr_info);
+ struct defrag_info *ino_dfr = NULL, *tmp = NULL;
+ /* sdfat patch 0.96 : sbi->dfr_info crash problem */
+ __lock_super(sb);
+
+ /* Clean-up ino_dfr */
+ if (!error) {
+ list_for_each_entry_safe(ino_dfr, tmp, &sb_dfr->entry, entry) {
+ struct inode *inode = &(container_of(ino_dfr, struct sdfat_inode_info, dfr_info)->vfs_inode);
+
+ mutex_lock(&ino_dfr->lock);
+
+ atomic_set(&ino_dfr->stat, DFR_INO_STAT_IDLE);
+
+ list_del(&ino_dfr->entry);
+
+ ino_dfr->chunks = NULL;
+ ino_dfr->nr_chunks = 0;
+ INIT_LIST_HEAD(&ino_dfr->entry);
+
+ BUG_ON(!mutex_is_locked(&ino_dfr->lock));
+ mutex_unlock(&ino_dfr->lock);
+
+ iput(inode);
+ }
+ }
+
+ /* Clean-up sb_dfr */
+ sb_dfr->chunks = NULL;
+ sb_dfr->nr_chunks = 0;
+ INIT_LIST_HEAD(&sb_dfr->entry);
+
+ /* Clear dfr_new_clus page */
+ memset(sbi->dfr_new_clus, 0, PAGE_SIZE);
+ sbi->dfr_new_idx = 1;
+ memset(sbi->dfr_page_wb, 0, PAGE_SIZE);
+
+ sbi->dfr_hint_clus = sbi->dfr_hint_idx = sbi->dfr_reserved_clus = 0;
+
+ __unlock_super(sb);
+}
+
+/**
+ * @fn defrag_validate_pages
+ * @brief validate and mark dirty for victiim pages
+ * @return 0 on success, -errno otherwise
+ * @param inode inode
+ * @param chunk given chunk
+ * @remark protected by inode_lock and super_lock
+ */
+static int
+defrag_validate_pages(
+ IN struct inode *inode,
+ IN struct defrag_chunk_info *chunk)
+{
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct page *page = NULL;
+ unsigned int i_size = 0, page_off = 0, page_nr = 0;
+ int buf_i = 0, i = 0, err = 0;
+
+ i_size = i_size_read(inode);
+ page_off = chunk->f_clus * PAGES_PER_CLUS(sb);
+ page_nr = (i_size / PAGE_SIZE) + ((i_size % PAGE_SIZE) ? 1 : 0);
+ if ((i_size <= 0) || (page_nr <= 0)) {
+ dfr_err("inode %p, i_size %d, page_nr %d", inode, i_size, page_nr);
+ return -EINVAL;
+ }
+
+ /* Get victim pages
+ * and check its dirty/writeback/mapped state
+ */
+ for (i = 0;
+ i < min((int)(page_nr - page_off), (int)(chunk->nr_clus * PAGES_PER_CLUS(sb)));
+ i++) {
+ page = find_get_page(inode->i_mapping, page_off + i);
+ if (page)
+ if (!trylock_page(page)) {
+ put_page(page);
+ page = NULL;
+ }
+
+ if (!page) {
+ dfr_debug("get/lock_page() failed, index %d", i);
+ err = -EINVAL;
+ goto error;
+ }
+
+ sbi->dfr_pagep[buf_i++] = page;
+ if (PageError(page) || !PageUptodate(page) || PageDirty(page) ||
+ PageWriteback(page) || page_mapped(page)) {
+ dfr_debug("page %p, err %d, uptodate %d, "
+ "dirty %d, wb %d, mapped %d",
+ page, PageError(page), PageUptodate(page),
+ PageDirty(page), PageWriteback(page),
+ page_mapped(page));
+ err = -EINVAL;
+ goto error;
+ }
+
+ set_bit((page->index & (PAGES_PER_CLUS(sb) - 1)),
+ (volatile unsigned long *)&(sbi->dfr_page_wb[chunk->new_idx + i / PAGES_PER_CLUS(sb)]));
+
+ page = NULL;
+ }
+
+ /**
+ * All pages in the chunks are valid.
+ */
+ i_size -= (chunk->f_clus * (sbi->fsi.cluster_size));
+ BUG_ON(((i_size / PAGE_SIZE) + ((i_size % PAGE_SIZE) ? 1 : 0)) != (page_nr - page_off));
+
+ for (i = 0; i < buf_i; i++) {
+ struct buffer_head *bh = NULL, *head = NULL;
+ int bh_idx = 0;
+
+ page = sbi->dfr_pagep[i];
+ BUG_ON(!page);
+
+ /* Mark dirty in page */
+ set_page_dirty(page);
+ mark_page_accessed(page);
+
+ /* Attach empty BHs */
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+
+ /* Mark dirty in BHs */
+ bh = head = page_buffers(page);
+ BUG_ON(!bh && !i_size);
+ do {
+ if ((bh_idx >= 1) && (bh_idx >= (i_size >> inode->i_blkbits))) {
+ clear_buffer_dirty(bh);
+ } else {
+ if (PageUptodate(page))
+ if (!buffer_uptodate(bh))
+ set_buffer_uptodate(bh);
+
+ /* Set this bh as delay */
+ set_buffer_new(bh);
+ set_buffer_delay(bh);
+
+ mark_buffer_dirty(bh);
+ }
+
+ bh_idx++;
+ bh = bh->b_this_page;
+ } while (bh != head);
+
+ /* Mark this page accessed */
+ mark_page_accessed(page);
+
+ i_size -= PAGE_SIZE;
+ }
+
+error:
+ /* Unlock and put refs for pages */
+ for (i = 0; i < buf_i; i++) {
+ BUG_ON(!sbi->dfr_pagep[i]);
+ unlock_page(sbi->dfr_pagep[i]);
+ put_page(sbi->dfr_pagep[i]);
+ }
+ memset(sbi->dfr_pagep, 0, sizeof(PAGE_SIZE));
+
+ return err;
+}
+
+
+/**
+ * @fn defrag_validate_reqs
+ * @brief validate defrag requests
+ * @return negative if all requests not valid, 0 otherwise
+ * @param sb super block
+ * @param chunks given chunks
+ */
+static int
+defrag_validate_reqs(
+ IN struct super_block *sb,
+ INOUT struct defrag_chunk_info *chunks)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct defrag_info *sb_dfr = &(sbi->dfr_info);
+ int i = 0, err = 0, err_cnt = 0;
+
+ /* Validate all reqs */
+ for (i = REQ_HEADER_IDX + 1; i < sb_dfr->nr_chunks; i++) {
+ struct defrag_chunk_info *chunk = NULL;
+ struct inode *inode = NULL;
+ struct defrag_info *ino_dfr = NULL;
+
+ chunk = &chunks[i];
+
+ /* Check inode */
+ __lock_super(sb);
+ inode = sdfat_iget(sb, chunk->i_pos);
+ if (!inode) {
+ dfr_debug("inode not found, i_pos %08llx", chunk->i_pos);
+ chunk->stat = DFR_CHUNK_STAT_ERR;
+ err_cnt++;
+ __unlock_super(sb);
+ continue;
+ }
+ __unlock_super(sb);
+
+ dfr_debug("req[%d] inode %p, i_pos %08llx, f_clus %d, "
+ "d_clus %08x, nr %d, prev %08x, next %08x",
+ i, inode, chunk->i_pos, chunk->f_clus, chunk->d_clus,
+ chunk->nr_clus, chunk->prev_clus, chunk->next_clus);
+ /**
+ * Lock ordering: inode_lock -> lock_super
+ */
+ inode_lock(inode);
+ __lock_super(sb);
+
+ /* Check if enough buffers exist for chunk->new_idx */
+ if ((sbi->dfr_new_idx + chunk->nr_clus) >= (PAGE_SIZE / sizeof(int))) {
+ dfr_err("dfr_new_idx %d, chunk->nr_clus %d",
+ sbi->dfr_new_idx, chunk->nr_clus);
+ err = -ENOSPC;
+ goto unlock;
+ }
+
+ /* Reserve clusters for defrag with DA */
+ err = fsapi_dfr_reserve_clus(sb, chunk->nr_clus);
+ if (err)
+ goto unlock;
+
+ /* Check clusters */
+ err = fsapi_dfr_validate_clus(inode, chunk, 0);
+ if (err) {
+ fsapi_dfr_reserve_clus(sb, 0 - chunk->nr_clus);
+ dfr_debug("Cluster validation: err %d", err);
+ goto unlock;
+ }
+
+ /* Check pages */
+ err = defrag_validate_pages(inode, chunk);
+ if (err) {
+ fsapi_dfr_reserve_clus(sb, 0 - chunk->nr_clus);
+ dfr_debug("Page validation: err %d", err);
+ goto unlock;
+ }
+
+ /* Mark IGNORE flag to victim AU */
+ if (sbi->options.improved_allocation & SDFAT_ALLOC_SMART)
+ fsapi_dfr_mark_ignore(sb, chunk->d_clus);
+
+ ino_dfr = &(SDFAT_I(inode)->dfr_info);
+ mutex_lock(&ino_dfr->lock);
+
+ /* Update chunk info */
+ chunk->stat = DFR_CHUNK_STAT_REQ;
+ chunk->new_idx = sbi->dfr_new_idx;
+
+ /* Update ino_dfr info */
+ if (list_empty(&(ino_dfr->entry))) {
+ list_add_tail(&ino_dfr->entry, &sb_dfr->entry);
+ ino_dfr->chunks = chunk;
+ igrab(inode);
+ }
+ ino_dfr->nr_chunks++;
+
+ atomic_set(&ino_dfr->stat, DFR_INO_STAT_REQ);
+
+ BUG_ON(!mutex_is_locked(&ino_dfr->lock));
+ mutex_unlock(&ino_dfr->lock);
+
+ /* Reserved buffers for chunk->new_idx */
+ sbi->dfr_new_idx += chunk->nr_clus;
+
+unlock:
+ if (err) {
+ chunk->stat = DFR_CHUNK_STAT_ERR;
+ err_cnt++;
+ }
+ iput(inode);
+ __unlock_super(sb);
+ inode_unlock(inode);
+ }
+
+ /* Return error if all chunks are invalid */
+ if (err_cnt == sb_dfr->nr_chunks - 1) {
+ dfr_debug("%s failed (err_cnt %d)", __func__, err_cnt);
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+
+/**
+ * @fn defrag_check_fs_busy
+ * @brief check if this module busy
+ * @return 0 when idle, 1 otherwise
+ * @param sb super block
+ * @param reserved_clus # of reserved clusters
+ * @param queued_pages # of queued pages
+ */
+static int
+defrag_check_fs_busy(
+ IN struct super_block *sb,
+ OUT int *reserved_clus,
+ OUT int *queued_pages)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ int err = 0;
+
+ *reserved_clus = *queued_pages = 0;
+
+ __lock_super(sb);
+ *reserved_clus = fsi->reserved_clusters;
+ *queued_pages = atomic_read(&SDFAT_SB(sb)->stat_n_pages_queued);
+
+ if (*reserved_clus || *queued_pages)
+ err = 1;
+ __unlock_super(sb);
+
+ return err;
+}
+
+
+/**
+ * @fn sdfat_ioctl_defrag_req
+ * @brief ioctl to send defrag requests
+ * @return 0 on success, -errno otherwise
+ * @param inode inode
+ * @param uarg given requests
+ */
+static int
+sdfat_ioctl_defrag_req(
+ IN struct inode *inode,
+ INOUT unsigned int *uarg)
+{
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct defrag_info *sb_dfr = &(sbi->dfr_info);
+ struct defrag_chunk_header head;
+ struct defrag_chunk_info *chunks = NULL;
+ unsigned int len = 0;
+ int err = 0;
+ unsigned long timeout = 0;
+
+ /* Check overlapped defrag */
+ if (atomic_cmpxchg(&sb_dfr->stat, DFR_SB_STAT_IDLE, DFR_SB_STAT_REQ)) {
+ dfr_debug("sb_dfr->stat %d", atomic_read(&sb_dfr->stat));
+ return -EBUSY;
+ }
+
+ /* Check if defrag required */
+ __lock_super(sb);
+ if (!fsapi_dfr_check_dfr_required(sb, NULL, NULL, NULL)) {
+ dfr_debug("Not enough space left for defrag (err %d)", -ENOSPC);
+ atomic_set(&sb_dfr->stat, DFR_SB_STAT_IDLE);
+ __unlock_super(sb);
+ return -ENOSPC;
+ }
+ __unlock_super(sb);
+
+ /* Copy args */
+ memset(&head, 0, sizeof(struct defrag_chunk_header));
+ err = copy_from_user(&head, uarg, sizeof(struct defrag_chunk_info));
+ ERR_HANDLE(err);
+
+ /* If FS busy, cancel defrag */
+ if (!(head.mode == DFR_MODE_TEST)) {
+ int reserved_clus = 0, queued_pages = 0;
+
+ err = defrag_check_fs_busy(sb, &reserved_clus, &queued_pages);
+ if (err) {
+ dfr_debug("FS busy, cancel defrag (reserved_clus %d, queued_pages %d)",
+ reserved_clus, queued_pages);
+ err = -EBUSY;
+ goto error;
+ }
+ }
+
+ /* Total length is saved in the chunk header's nr_chunks field */
+ len = head.nr_chunks;
+ ERR_HANDLE2(!len, err, -EINVAL);
+
+ dfr_debug("IOC_DFR_REQ started (mode %d, nr_req %d)", head.mode, len - 1);
+ if (get_order(len * sizeof(struct defrag_chunk_info)) > MAX_ORDER) {
+ dfr_debug("len %d, sizeof(struct defrag_chunk_info) %lu, MAX_ORDER %d",
+ len, sizeof(struct defrag_chunk_info), MAX_ORDER);
+ err = -EINVAL;
+ goto error;
+ }
+ chunks = alloc_pages_exact(len * sizeof(struct defrag_chunk_info),
+ GFP_KERNEL | __GFP_ZERO);
+ ERR_HANDLE2(!chunks, err, -ENOMEM)
+
+ err = copy_from_user(chunks, uarg, len * sizeof(struct defrag_chunk_info));
+ ERR_HANDLE(err);
+
+ /* Initialize sb_dfr */
+ sb_dfr->chunks = chunks;
+ sb_dfr->nr_chunks = len;
+
+ /* Validate reqs & mark defrag/dirty */
+ err = defrag_validate_reqs(sb, sb_dfr->chunks);
+ ERR_HANDLE(err);
+
+ atomic_set(&sb_dfr->stat, DFR_SB_STAT_VALID);
+
+ /* Wait for defrag completion */
+ if (head.mode == DFR_MODE_ONESHOT)
+ timeout = 0;
+ else if (head.mode & DFR_MODE_BACKGROUND)
+ timeout = DFR_DEFAULT_TIMEOUT;
+ else
+ timeout = DFR_MIN_TIMEOUT;
+
+ dfr_debug("Wait for completion (timeout %ld)", timeout);
+ init_completion(&sbi->dfr_complete);
+ timeout = wait_for_completion_timeout(&sbi->dfr_complete, timeout);
+
+ if (!timeout) {
+ /* Force defrag_updat_fat() after timeout. */
+ dfr_debug("Force sync(), mode %d, left-timeout %ld", head.mode, timeout);
+
+ down_read(&sb->s_umount);
+
+ sync_inodes_sb(sb);
+
+ __lock_super(sb);
+ fsapi_dfr_update_fat_next(sb);
+
+ fsapi_sync_fs(sb, 1);
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+ /* SPO test */
+ fsapi_dfr_spo_test(sb, DFR_SPO_FAT_NEXT, __func__);
+#endif
+
+ fsapi_dfr_update_fat_prev(sb, 1);
+ fsapi_sync_fs(sb, 1);
+
+ __unlock_super(sb);
+
+ up_read(&sb->s_umount);
+ }
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+ /* SPO test */
+ fsapi_dfr_spo_test(sb, DFR_SPO_NORMAL, __func__);
+#endif
+
+ __lock_super(sb);
+ /* Send DISCARD to clean-ed AUs */
+ fsapi_dfr_check_discard(sb);
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+ /* SPO test */
+ fsapi_dfr_spo_test(sb, DFR_SPO_DISCARD, __func__);
+#endif
+
+ /* Unmark IGNORE flag to all victim AUs */
+ fsapi_dfr_unmark_ignore_all(sb);
+ __unlock_super(sb);
+
+ err = copy_to_user(uarg, sb_dfr->chunks, sizeof(struct defrag_chunk_info) * len);
+ ERR_HANDLE(err);
+
+error:
+ /* Clean-up sb_dfr & ino_dfr */
+ defrag_cleanup_reqs(sb, err);
+
+ if (chunks)
+ free_pages_exact(chunks, len * sizeof(struct defrag_chunk_info));
+
+ /* Set sb_dfr's state as IDLE */
+ atomic_set(&sb_dfr->stat, DFR_SB_STAT_IDLE);
+
+ dfr_debug("IOC_DFR_REQ done (err %d)", err);
+ return err;
+}
+
+/**
+ * @fn sdfat_ioctl_defrag_trav
+ * @brief ioctl to traverse given directory for defrag
+ * @return 0 on success, -errno otherwise
+ * @param inode inode
+ * @param uarg output buffer
+ */
+static int
+sdfat_ioctl_defrag_trav(
+ IN struct inode *inode,
+ INOUT unsigned int *uarg)
+{
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct defrag_info *sb_dfr = &(sbi->dfr_info);
+ struct defrag_trav_arg *args = (struct defrag_trav_arg *) sbi->dfr_pagep;
+ struct defrag_trav_header *header = (struct defrag_trav_header *) args;
+ int err = 0;
+
+ /* Check overlapped defrag */
+ if (atomic_cmpxchg(&sb_dfr->stat, DFR_SB_STAT_IDLE, DFR_SB_STAT_REQ)) {
+ dfr_debug("sb_dfr->stat %d", atomic_read(&sb_dfr->stat));
+ return -EBUSY;
+ }
+
+ /* Check if defrag required */
+ __lock_super(sb);
+ if (!fsapi_dfr_check_dfr_required(sb, NULL, NULL, NULL)) {
+ dfr_debug("Not enough space left for defrag (err %d)", -ENOSPC);
+ atomic_set(&sb_dfr->stat, DFR_SB_STAT_IDLE);
+ __unlock_super(sb);
+ return -ENOSPC;
+ }
+ __unlock_super(sb);
+
+ /* Copy args */
+ err = copy_from_user(args, uarg, PAGE_SIZE);
+ ERR_HANDLE(err);
+
+ /**
+ * Check args.
+ * ROOT directory has i_pos = 0 and start_clus = 0 .
+ */
+ if (!(header->type & DFR_TRAV_TYPE_HEADER)) {
+ err = -EINVAL;
+ dfr_debug("type %d, i_pos %08llx, start_clus %08x",
+ header->type, header->i_pos, header->start_clus);
+ goto error;
+ }
+
+ /* If FS busy, cancel defrag */
+ if (!(header->type & DFR_TRAV_TYPE_TEST)) {
+ unsigned int reserved_clus = 0, queued_pages = 0;
+
+ err = defrag_check_fs_busy(sb, &reserved_clus, &queued_pages);
+ if (err) {
+ dfr_debug("FS busy, cancel defrag (reserved_clus %d, queued_pages %d)",
+ reserved_clus, queued_pages);
+ err = -EBUSY;
+ goto error;
+ }
+ }
+
+ /* Scan given directory and gather info */
+ inode_lock(inode);
+ __lock_super(sb);
+ err = fsapi_dfr_scan_dir(sb, (void *)args);
+ __unlock_super(sb);
+ inode_unlock(inode);
+ ERR_HANDLE(err);
+
+ /* Copy the result to user */
+ err = copy_to_user(uarg, args, PAGE_SIZE);
+ ERR_HANDLE(err);
+
+error:
+ memset(sbi->dfr_pagep, 0, PAGE_SIZE);
+
+ atomic_set(&sb_dfr->stat, DFR_SB_STAT_IDLE);
+ return err;
+}
+
+/**
+ * @fn sdfat_ioctl_defrag_info
+ * @brief ioctl to get HW param info
+ * @return 0 on success, -errno otherwise
+ * @param sb super block
+ * @param uarg output buffer
+ */
+static int
+sdfat_ioctl_defrag_info(
+ IN struct super_block *sb,
+ OUT unsigned int *uarg)
+{
+ struct defrag_info_arg info_arg;
+ int err = 0;
+
+ memset(&info_arg, 0, sizeof(struct defrag_info_arg));
+
+ __lock_super(sb);
+ err = fsapi_dfr_get_info(sb, &info_arg);
+ __unlock_super(sb);
+ ERR_HANDLE(err);
+ dfr_debug("IOC_DFR_INFO: sec_per_au %d, hidden_sectors %d",
+ info_arg.sec_per_au, info_arg.hidden_sectors);
+
+ err = copy_to_user(uarg, &info_arg, sizeof(struct defrag_info_arg));
+error:
+ return err;
+}
+
+#endif /* CONFIG_SDFAT_DFR */
+
+static inline int __do_dfr_map_cluster(struct inode *inode, u32 clu_offset, unsigned int *clus_ptr)
+{
+#ifdef CONFIG_SDFAT_DFR
+ return fsapi_dfr_map_clus(inode, clu_offset, clus_ptr);
+#else
+ return 0;
+#endif
+}
+
+static inline int __check_dfr_on(struct inode *inode, loff_t start, loff_t end, const char *fname)
+{
+#ifdef CONFIG_SDFAT_DFR
+ struct defrag_info *ino_dfr = &(SDFAT_I(inode)->dfr_info);
+
+ if ((atomic_read(&ino_dfr->stat) == DFR_INO_STAT_REQ) &&
+ fsapi_dfr_check_dfr_on(inode, start, end, 0, fname))
+ return 1;
+#endif
+ return 0;
+}
+
+static inline int __cancel_dfr_work(struct inode *inode, loff_t start, loff_t end, const char *fname)
+{
+#ifdef CONFIG_SDFAT_DFR
+ struct defrag_info *ino_dfr = &(SDFAT_I(inode)->dfr_info);
+ /* Cancel DEFRAG */
+ if (atomic_read(&ino_dfr->stat) == DFR_INO_STAT_REQ)
+ fsapi_dfr_check_dfr_on(inode, start, end, 1, fname);
+#endif
+ return 0;
+}
+
+static inline int __dfr_writepage_end_io(struct page *page)
+{
+#ifdef CONFIG_SDFAT_DFR
+ struct defrag_info *ino_dfr = &(SDFAT_I(page->mapping->host)->dfr_info);
+
+ if (atomic_read(&ino_dfr->stat) == DFR_INO_STAT_REQ)
+ fsapi_dfr_writepage_endio(page);
+#endif
+ return 0;
+}
+
+static inline void __init_dfr_info(struct inode *inode)
+{
+#ifdef CONFIG_SDFAT_DFR
+ memset(&(SDFAT_I(inode)->dfr_info), 0, sizeof(struct defrag_info));
+ INIT_LIST_HEAD(&(SDFAT_I(inode)->dfr_info.entry));
+ mutex_init(&(SDFAT_I(inode)->dfr_info.lock));
+#endif
+}
+
+static inline int __alloc_dfr_mem_if_required(struct super_block *sb)
+{
+#ifdef CONFIG_SDFAT_DFR
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ if (!sbi->options.defrag)
+ return 0;
+
+ memset(&sbi->dfr_info, 0, sizeof(struct defrag_info));
+ INIT_LIST_HEAD(&(sbi->dfr_info.entry));
+ mutex_init(&(sbi->dfr_info.lock));
+
+ sbi->dfr_new_clus = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!sbi->dfr_new_clus) {
+ dfr_debug("error %d", -ENOMEM);
+ return -ENOMEM;
+ }
+ sbi->dfr_new_idx = 1;
+
+ sbi->dfr_page_wb = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!sbi->dfr_page_wb) {
+ dfr_debug("error %d", -ENOMEM);
+ return -ENOMEM;
+ }
+
+ sbi->dfr_pagep = alloc_pages_exact(sizeof(struct page *) *
+ PAGES_PER_AU(sb), GFP_KERNEL | __GFP_ZERO);
+ if (!sbi->dfr_pagep) {
+ dfr_debug("error %d", -ENOMEM);
+ return -ENOMEM;
+ }
+#endif
+ return 0;
+}
+
+static void __free_dfr_mem_if_required(struct super_block *sb)
+{
+#ifdef CONFIG_SDFAT_DFR
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ if (sbi->dfr_pagep) {
+ free_pages_exact(sbi->dfr_pagep, sizeof(struct page *) * PAGES_PER_AU(sb));
+ sbi->dfr_pagep = NULL;
+ }
+
+ /* thanks for kfree */
+ kfree(sbi->dfr_page_wb);
+ sbi->dfr_page_wb = NULL;
+
+ kfree(sbi->dfr_new_clus);
+ sbi->dfr_new_clus = NULL;
+#endif
+}
+
+
+static int sdfat_file_mmap(struct file *file, struct vm_area_struct *vm_struct)
+{
+ __cancel_dfr_work(file->f_mapping->host,
+ (loff_t)vm_struct->vm_start,
+ (loff_t)(vm_struct->vm_end - 1),
+ __func__);
+
+ return generic_file_mmap(file, vm_struct);
+}
+
+static int sdfat_ioctl_volume_id(struct inode *dir)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(dir->i_sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+
+ return fsi->vol_id;
+}
+
+static int sdfat_dfr_ioctl(struct inode *inode, struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+#ifdef CONFIG_SDFAT_DFR
+ switch (cmd) {
+ case SDFAT_IOCTL_DFR_INFO: {
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &SDFAT_SB(sb)->fsi;
+ unsigned int __user *uarg = (unsigned int __user *) arg;
+
+ __lock_super(sb);
+ /* Check FS type (FAT32 only) */
+ if (fsi->vol_type != FAT32) {
+ dfr_err("Defrag not supported, vol_type %d", fsi->vol_type);
+ __unlock_super(sb);
+ return -EPERM;
+
+ }
+
+ /* Check if SB's defrag option enabled */
+ if (!(SDFAT_SB(sb)->options.defrag)) {
+ dfr_err("Defrag not supported, sbi->options.defrag %d", SDFAT_SB(sb)->options.defrag);
+ __unlock_super(sb);
+ return -EPERM;
+ }
+
+ /* Only IOCTL on mount-point allowed */
+ if (filp->f_path.mnt->mnt_root != filp->f_path.dentry) {
+ dfr_err("IOC_DFR_INFO only allowed on ROOT, root %p, dentry %p",
+ filp->f_path.mnt->mnt_root, filp->f_path.dentry);
+ __unlock_super(sb);
+ return -EPERM;
+ }
+ __unlock_super(sb);
+
+ return sdfat_ioctl_defrag_info(sb, uarg);
+ }
+ case SDFAT_IOCTL_DFR_TRAV: {
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &SDFAT_SB(sb)->fsi;
+ unsigned int __user *uarg = (unsigned int __user *) arg;
+
+ __lock_super(sb);
+ /* Check FS type (FAT32 only) */
+ if (fsi->vol_type != FAT32) {
+ dfr_err("Defrag not supported, vol_type %d", fsi->vol_type);
+ __unlock_super(sb);
+ return -EPERM;
+
+ }
+
+ /* Check if SB's defrag option enabled */
+ if (!(SDFAT_SB(sb)->options.defrag)) {
+ dfr_err("Defrag not supported, sbi->options.defrag %d", SDFAT_SB(sb)->options.defrag);
+ __unlock_super(sb);
+ return -EPERM;
+ }
+ __unlock_super(sb);
+
+ return sdfat_ioctl_defrag_trav(inode, uarg);
+ }
+ case SDFAT_IOCTL_DFR_REQ: {
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &SDFAT_SB(sb)->fsi;
+ unsigned int __user *uarg = (unsigned int __user *) arg;
+
+ __lock_super(sb);
+
+ /* Check if FS_ERROR occurred */
+ if (sb->s_flags & MS_RDONLY) {
+ dfr_err("RDONLY partition (err %d)", -EPERM);
+ __unlock_super(sb);
+ return -EPERM;
+ }
+
+ /* Check FS type (FAT32 only) */
+ if (fsi->vol_type != FAT32) {
+ dfr_err("Defrag not supported, vol_type %d", fsi->vol_type);
+ __unlock_super(sb);
+ return -EINVAL;
+
+ }
+
+ /* Check if SB's defrag option enabled */
+ if (!(SDFAT_SB(sb)->options.defrag)) {
+ dfr_err("Defrag not supported, sbi->options.defrag %d", SDFAT_SB(sb)->options.defrag);
+ __unlock_super(sb);
+ return -EPERM;
+ }
+
+ /* Only IOCTL on mount-point allowed */
+ if (filp->f_path.mnt->mnt_root != filp->f_path.dentry) {
+ dfr_err("IOC_DFR_INFO only allowed on ROOT, root %p, dentry %p",
+ filp->f_path.mnt->mnt_root, filp->f_path.dentry);
+ __unlock_super(sb);
+ return -EINVAL;
+ }
+ __unlock_super(sb);
+
+ return sdfat_ioctl_defrag_req(inode, uarg);
+ }
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+ case SDFAT_IOCTL_DFR_SPO_FLAG: {
+ struct sdfat_sb_info *sbi = SDFAT_SB(inode->i_sb);
+ int ret = 0;
+
+ ret = get_user(sbi->dfr_spo_flag, (int __user *)arg);
+ dfr_debug("dfr_spo_flag %d", sbi->dfr_spo_flag);
+
+ return ret;
+ }
+#endif /* CONFIG_SDFAT_DFR_DEBUG */
+ }
+#endif /* CONFIG_SDFAT_DFR */
+
+ /* Inappropriate ioctl for device */
+ return -ENOTTY;
+}
+
+static int sdfat_dbg_ioctl(struct inode *inode, struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+#ifdef CONFIG_SDFAT_DBG_IOCTL
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ unsigned int flags;
+
+ switch (cmd) {
+ case SDFAT_IOC_GET_DEBUGFLAGS:
+ flags = sbi->debug_flags;
+ return put_user(flags, (int __user *)arg);
+ case SDFAT_IOC_SET_DEBUGFLAGS:
+ flags = 0;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (get_user(flags, (int __user *) arg))
+ return -EFAULT;
+
+ __lock_super(sb);
+ sbi->debug_flags = flags;
+ __unlock_super(sb);
+ return 0;
+ case SDFAT_IOCTL_PANIC:
+ panic("ioctl panic for test");
+
+ /* COULD NOT REACH HEAR */
+ return 0;
+ }
+#endif /* CONFIG_SDFAT_DBG_IOCTL */
+ return -ENOTTY;
+}
+
+static long sdfat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ int err;
+
+ if (cmd == SDFAT_IOCTL_GET_VOLUME_ID)
+ return sdfat_ioctl_volume_id(inode);
+
+ err = sdfat_dfr_ioctl(inode, filp, cmd, arg);
+ if (err != -ENOTTY)
+ return err;
+
+ /* -ENOTTY if inappropriate ioctl for device */
+ return sdfat_dbg_ioctl(inode, filp, cmd, arg);
+}
+
+static int __sdfat_getattr(struct inode *inode, struct kstat *stat)
+{
+ TMSG("%s entered\n", __func__);
+
+ generic_fillattr(inode, stat);
+ stat->blksize = SDFAT_SB(inode->i_sb)->fsi.cluster_size;
+
+ TMSG("%s exited\n", __func__);
+ return 0;
+}
+
+static void __sdfat_writepage_end_io(struct bio *bio, int err)
+{
+ struct page *page = bio->bi_io_vec->bv_page;
+ struct super_block *sb = page->mapping->host->i_sb;
+
+ ASSERT(bio->bi_vcnt == 1); /* Single page endio */
+ ASSERT(bio_data_dir(bio)); /* Write */
+
+ if (err) {
+ SetPageError(page);
+ mapping_set_error(page->mapping, err);
+ }
+
+ __dfr_writepage_end_io(page);
+
+#ifdef CONFIG_SDFAT_TRACE_IO
+ {
+ //struct sdfat_sb_info *sbi = SDFAT_SB(bio->bi_bdev->bd_super);
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ sbi->stat_n_pages_written++;
+ if (page->mapping->host == sb->s_bdev->bd_inode)
+ sbi->stat_n_bdev_pages_written++;
+
+ /* 4 MB = 1024 pages => 0.4 sec (approx.)
+ * 32 KB = 64 pages => 0.025 sec
+ * Min. average latency b/w msgs. ~= 0.025 sec
+ */
+ if ((sbi->stat_n_pages_written & 63) == 0) {
+ DMSG("STAT:%u, %u, %u, %u (Sector #: %u)\n",
+ sbi->stat_n_pages_added, sbi->stat_n_pages_written,
+ sbi->stat_n_bdev_pages_witten,
+ sbi->stat_n_pages_confused,
+ (unsigned int)__sdfat_bio_sector(bio));
+ }
+ }
+#endif
+ end_page_writeback(page);
+ bio_put(bio);
+
+ // Update trace info.
+ atomic_dec(&SDFAT_SB(sb)->stat_n_pages_queued);
+}
+
+
+static int __support_write_inode_sync(struct super_block *sb)
+{
+#ifdef CONFIG_SDFAT_SUPPORT_DIR_SYNC
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ if (sbi->fsi.vol_type != EXFAT)
+ return 0;
+#endif
+ return 1;
+#endif
+ return 0;
+}
+
+
+static int __sdfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
+{
+ struct inode *inode = filp->f_mapping->host;
+ struct super_block *sb = inode->i_sb;
+ int res, err = 0;
+
+ res = __sdfat_generic_file_fsync(filp, start, end, datasync);
+
+ if (!__support_write_inode_sync(sb))
+ err = fsapi_sync_fs(sb, 1);
+
+ return res ? res : err;
+}
+
+
+static const struct file_operations sdfat_dir_operations = {
+ .llseek = generic_file_llseek,
+ .read = generic_read_dir,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
+ .iterate = sdfat_iterate,
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) */
+ .readdir = sdfat_readdir,
+#endif
+ .fsync = sdfat_file_fsync,
+ .unlocked_ioctl = sdfat_generic_ioctl,
+};
+
+static int __sdfat_create(struct inode *dir, struct dentry *dentry)
+{
+ struct super_block *sb = dir->i_sb;
+ struct inode *inode;
+ sdfat_timespec_t ts;
+ FILE_ID_T fid;
+ loff_t i_pos;
+ int err;
+
+ __lock_super(sb);
+
+ TMSG("%s entered\n", __func__);
+
+ ts = CURRENT_TIME_SEC;
+
+ err = fsapi_create(dir, (u8 *) dentry->d_name.name, FM_REGULAR, &fid);
+ if (err)
+ goto out;
+
+ __lock_d_revalidate(dentry);
+
+ inode_inc_iversion(dir);
+ dir->i_ctime = dir->i_mtime = dir->i_atime = ts;
+ if (IS_DIRSYNC(dir))
+ (void) sdfat_sync_inode(dir);
+ else
+ mark_inode_dirty(dir);
+
+ i_pos = sdfat_make_i_pos(&fid);
+ inode = sdfat_build_inode(sb, &fid, i_pos);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto out;
+ }
+ inode_inc_iversion(inode);
+ inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
+ /* timestamp is already written, so mark_inode_dirty() is unneeded. */
+
+ d_instantiate(dentry, inode);
+out:
+ __unlock_d_revalidate(dentry);
+ __unlock_super(sb);
+ TMSG("%s exited with err(%d)\n", __func__, err);
+ if (!err)
+ sdfat_statistics_set_create(fid.flags);
+ return err;
+}
+
+
+static int sdfat_find(struct inode *dir, struct qstr *qname, FILE_ID_T *fid)
+{
+ int err;
+
+ if (qname->len == 0)
+ return -ENOENT;
+
+ err = fsapi_lookup(dir, (u8 *) qname->name, fid);
+ if (err)
+ return -ENOENT;
+
+ return 0;
+}
+
+static int sdfat_d_anon_disconn(struct dentry *dentry)
+{
+ return IS_ROOT(dentry) && (dentry->d_flags & DCACHE_DISCONNECTED);
+}
+
+static struct dentry *__sdfat_lookup(struct inode *dir, struct dentry *dentry)
+{
+ struct super_block *sb = dir->i_sb;
+ struct inode *inode;
+ struct dentry *alias;
+ int err;
+ FILE_ID_T fid;
+ loff_t i_pos;
+ u64 ret;
+ mode_t i_mode;
+
+ __lock_super(sb);
+ TMSG("%s entered\n", __func__);
+ err = sdfat_find(dir, &dentry->d_name, &fid);
+ if (err) {
+ if (err == -ENOENT) {
+ inode = NULL;
+ goto out;
+ }
+ goto error;
+ }
+
+ i_pos = sdfat_make_i_pos(&fid);
+ inode = sdfat_build_inode(sb, &fid, i_pos);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto error;
+ }
+
+ i_mode = inode->i_mode;
+ if (S_ISLNK(i_mode) && !SDFAT_I(inode)->target) {
+ SDFAT_I(inode)->target = kmalloc((i_size_read(inode)+1), GFP_KERNEL);
+ if (!SDFAT_I(inode)->target) {
+ err = -ENOMEM;
+ goto error;
+ }
+ fsapi_read_link(dir, &fid, SDFAT_I(inode)->target, i_size_read(inode), &ret);
+ *(SDFAT_I(inode)->target + i_size_read(inode)) = '\0';
+ }
+
+ alias = d_find_alias(inode);
+
+ /*
+ * Checking "alias->d_parent == dentry->d_parent" to make sure
+ * FS is not corrupted (especially double linked dir).
+ */
+ if (alias && alias->d_parent == dentry->d_parent &&
+ !sdfat_d_anon_disconn(alias)) {
+
+ /*
+ * Unhashed alias is able to exist because of revalidate()
+ * called by lookup_fast. You can easily make this status
+ * by calling create and lookup concurrently
+ * In such case, we reuse an alias instead of new dentry
+ */
+ if (d_unhashed(alias)) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
+ BUG_ON(alias->d_name.hash != dentry->d_name.hash && alias->d_name.len != dentry->d_name.len);
+#else
+ BUG_ON(alias->d_name.hash_len != dentry->d_name.hash_len);
+#endif
+ sdfat_msg(sb, KERN_INFO, "rehashed a dentry(%p) "
+ "in read lookup", alias);
+ d_drop(dentry);
+ d_rehash(alias);
+ } else if (!S_ISDIR(i_mode)) {
+ /*
+ * This inode has non anonymous-DCACHE_DISCONNECTED
+ * dentry. This means, the user did ->lookup() by an
+ * another name (longname vs 8.3 alias of it) in past.
+ *
+ * Switch to new one for reason of locality if possible.
+ */
+ d_move(alias, dentry);
+ }
+ iput(inode);
+ __unlock_super(sb);
+ TMSG("%s exited\n", __func__);
+ return alias;
+ }
+ dput(alias);
+out:
+ /* initialize d_time even though it is positive dentry */
+ dentry->d_time = (unsigned long)inode_peek_iversion(dir);
+ __unlock_super(sb);
+
+ dentry = d_splice_alias(inode, dentry);
+
+ TMSG("%s exited\n", __func__);
+ return dentry;
+error:
+ __unlock_super(sb);
+ TMSG("%s exited with err(%d)\n", __func__, err);
+ return ERR_PTR(err);
+}
+
+
+static int sdfat_unlink(struct inode *dir, struct dentry *dentry)
+{
+ struct inode *inode = dentry->d_inode;
+ struct super_block *sb = dir->i_sb;
+ sdfat_timespec_t ts;
+ int err;
+
+ __lock_super(sb);
+
+ TMSG("%s entered\n", __func__);
+
+ ts = CURRENT_TIME_SEC;
+
+ SDFAT_I(inode)->fid.size = i_size_read(inode);
+
+ __cancel_dfr_work(inode, 0, SDFAT_I(inode)->fid.size, __func__);
+
+ err = fsapi_unlink(dir, &(SDFAT_I(inode)->fid));
+ if (err)
+ goto out;
+
+ __lock_d_revalidate(dentry);
+
+ inode_inc_iversion(dir);
+ dir->i_mtime = dir->i_atime = ts;
+ if (IS_DIRSYNC(dir))
+ (void) sdfat_sync_inode(dir);
+ else
+ mark_inode_dirty(dir);
+
+ clear_nlink(inode);
+ inode->i_mtime = inode->i_atime = ts;
+ sdfat_detach(inode);
+ dentry->d_time = (unsigned long)inode_peek_iversion(dir);
+out:
+ __unlock_d_revalidate(dentry);
+ __unlock_super(sb);
+ TMSG("%s exited with err(%d)\n", __func__, err);
+ return err;
+}
+
+static int sdfat_symlink(struct inode *dir, struct dentry *dentry, const char *target)
+{
+ struct super_block *sb = dir->i_sb;
+ struct inode *inode;
+ sdfat_timespec_t ts;
+ FILE_ID_T fid;
+ loff_t i_pos;
+ int err;
+ u64 len = (u64) strlen(target);
+ u64 ret;
+
+ /* symlink option check */
+ if (!SDFAT_SB(sb)->options.symlink)
+ return -ENOTSUPP;
+
+ __lock_super(sb);
+
+ TMSG("%s entered\n", __func__);
+
+ ts = CURRENT_TIME_SEC;
+
+ err = fsapi_create(dir, (u8 *) dentry->d_name.name, FM_SYMLINK, &fid);
+ if (err)
+ goto out;
+
+ err = fsapi_write_link(dir, &fid, (char *) target, len, &ret);
+
+ if (err) {
+ fsapi_remove(dir, &fid);
+ goto out;
+ }
+
+ __lock_d_revalidate(dentry);
+
+ inode_inc_iversion(dir);
+ dir->i_ctime = dir->i_mtime = dir->i_atime = ts;
+ if (IS_DIRSYNC(dir))
+ (void) sdfat_sync_inode(dir);
+ else
+ mark_inode_dirty(dir);
+
+ i_pos = sdfat_make_i_pos(&fid);
+ inode = sdfat_build_inode(sb, &fid, i_pos);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto out;
+ }
+ inode_inc_iversion(inode);
+ inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
+ /* timestamp is already written, so mark_inode_dirty() is unneeded. */
+
+ SDFAT_I(inode)->target = kmalloc((len+1), GFP_KERNEL);
+ if (!SDFAT_I(inode)->target) {
+ err = -ENOMEM;
+ goto out;
+ }
+ memcpy(SDFAT_I(inode)->target, target, len+1);
+
+ d_instantiate(dentry, inode);
+out:
+ __unlock_d_revalidate(dentry);
+ __unlock_super(sb);
+ TMSG("%s exited with err(%d)\n", __func__, err);
+ return err;
+}
+
+
+static int __sdfat_mkdir(struct inode *dir, struct dentry *dentry)
+{
+ struct super_block *sb = dir->i_sb;
+ struct inode *inode;
+ sdfat_timespec_t ts;
+ FILE_ID_T fid;
+ loff_t i_pos;
+ int err;
+
+ __lock_super(sb);
+
+ TMSG("%s entered\n", __func__);
+
+ ts = CURRENT_TIME_SEC;
+
+ err = fsapi_mkdir(dir, (u8 *) dentry->d_name.name, &fid);
+ if (err)
+ goto out;
+
+ __lock_d_revalidate(dentry);
+
+ inode_inc_iversion(dir);
+ dir->i_ctime = dir->i_mtime = dir->i_atime = ts;
+ if (IS_DIRSYNC(dir))
+ (void) sdfat_sync_inode(dir);
+ else
+ mark_inode_dirty(dir);
+ inc_nlink(dir);
+
+ i_pos = sdfat_make_i_pos(&fid);
+ inode = sdfat_build_inode(sb, &fid, i_pos);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto out;
+ }
+ inode_inc_iversion(inode);
+ inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
+ /* timestamp is already written, so mark_inode_dirty() is unneeded. */
+
+ d_instantiate(dentry, inode);
+
+out:
+ __unlock_d_revalidate(dentry);
+ __unlock_super(sb);
+ TMSG("%s exited with err(%d)\n", __func__, err);
+ if (!err)
+ sdfat_statistics_set_mkdir(fid.flags);
+ return err;
+}
+
+
+static int sdfat_rmdir(struct inode *dir, struct dentry *dentry)
+{
+ struct inode *inode = dentry->d_inode;
+ struct super_block *sb = dir->i_sb;
+ sdfat_timespec_t ts;
+ int err;
+
+ __lock_super(sb);
+
+ TMSG("%s entered\n", __func__);
+
+ ts = CURRENT_TIME_SEC;
+
+ SDFAT_I(inode)->fid.size = i_size_read(inode);
+
+ err = fsapi_rmdir(dir, &(SDFAT_I(inode)->fid));
+ if (err)
+ goto out;
+
+ __lock_d_revalidate(dentry);
+
+ inode_inc_iversion(dir);
+ dir->i_mtime = dir->i_atime = ts;
+ if (IS_DIRSYNC(dir))
+ (void) sdfat_sync_inode(dir);
+ else
+ mark_inode_dirty(dir);
+ drop_nlink(dir);
+
+ clear_nlink(inode);
+ inode->i_mtime = inode->i_atime = ts;
+ sdfat_detach(inode);
+ dentry->d_time = (unsigned long)inode_peek_iversion(dir);
+out:
+ __unlock_d_revalidate(dentry);
+ __unlock_super(sb);
+ TMSG("%s exited with err(%d)\n", __func__, err);
+ return err;
+}
+
+static int __sdfat_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ struct inode *old_inode, *new_inode;
+ struct super_block *sb = old_dir->i_sb;
+ sdfat_timespec_t ts;
+ loff_t i_pos;
+ int err;
+
+ __lock_super(sb);
+
+ TMSG("%s entered\n", __func__);
+
+ old_inode = old_dentry->d_inode;
+ new_inode = new_dentry->d_inode;
+
+ ts = CURRENT_TIME_SEC;
+
+ SDFAT_I(old_inode)->fid.size = i_size_read(old_inode);
+
+ __cancel_dfr_work(old_inode, 0, 1, __func__);
+
+ err = fsapi_rename(old_dir, &(SDFAT_I(old_inode)->fid), new_dir, new_dentry);
+ if (err)
+ goto out;
+
+ __lock_d_revalidate(old_dentry);
+ __lock_d_revalidate(new_dentry);
+
+ inode_inc_iversion(new_dir);
+ new_dir->i_ctime = new_dir->i_mtime = new_dir->i_atime = ts;
+ if (IS_DIRSYNC(new_dir))
+ (void) sdfat_sync_inode(new_dir);
+ else
+ mark_inode_dirty(new_dir);
+
+ i_pos = sdfat_make_i_pos(&(SDFAT_I(old_inode)->fid));
+ sdfat_detach(old_inode);
+ sdfat_attach(old_inode, i_pos);
+ if (IS_DIRSYNC(new_dir))
+ (void) sdfat_sync_inode(old_inode);
+ else
+ mark_inode_dirty(old_inode);
+
+ if ((S_ISDIR(old_inode->i_mode)) && (old_dir != new_dir)) {
+ drop_nlink(old_dir);
+ if (!new_inode)
+ inc_nlink(new_dir);
+ }
+
+ inode_inc_iversion(old_dir);
+ old_dir->i_ctime = old_dir->i_mtime = ts;
+ if (IS_DIRSYNC(old_dir))
+ (void) sdfat_sync_inode(old_dir);
+ else
+ mark_inode_dirty(old_dir);
+
+ if (new_inode) {
+ sdfat_detach(new_inode);
+
+ /* skip drop_nlink if new_inode already has been dropped */
+ if (new_inode->i_nlink) {
+ drop_nlink(new_inode);
+ if (S_ISDIR(new_inode->i_mode))
+ drop_nlink(new_inode);
+ } else {
+ EMSG("%s : abnormal access to an inode dropped\n",
+ __func__);
+ WARN_ON(new_inode->i_nlink == 0);
+ }
+ new_inode->i_ctime = ts;
+#if 0
+ (void) sdfat_sync_inode(new_inode);
+#endif
+ }
+
+out:
+ __unlock_d_revalidate(old_dentry);
+ __unlock_d_revalidate(new_dentry);
+ __unlock_super(sb);
+ TMSG("%s exited with err(%d)\n", __func__, err);
+ return err;
+}
+
+static int sdfat_cont_expand(struct inode *inode, loff_t size)
+{
+ struct address_space *mapping = inode->i_mapping;
+ loff_t start = i_size_read(inode), count = size - i_size_read(inode);
+ int err, err2;
+
+ err = generic_cont_expand_simple(inode, size);
+ if (err)
+ return err;
+
+ inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
+ mark_inode_dirty(inode);
+
+ if (!IS_SYNC(inode))
+ return 0;
+
+ err = filemap_fdatawrite_range(mapping, start, start + count - 1);
+ err2 = sync_mapping_buffers(mapping);
+ err = (err)?(err):(err2);
+ err2 = write_inode_now(inode, 1);
+ err = (err)?(err):(err2);
+ if (err)
+ return err;
+
+ return filemap_fdatawait_range(mapping, start, start + count - 1);
+}
+
+static int sdfat_allow_set_time(struct sdfat_sb_info *sbi, struct inode *inode)
+{
+ mode_t allow_utime = sbi->options.allow_utime;
+
+ if (!uid_eq(current_fsuid(), inode->i_uid)) {
+ if (in_group_p(inode->i_gid))
+ allow_utime >>= 3;
+ if (allow_utime & MAY_WRITE)
+ return 1;
+ }
+
+ /* use a default check */
+ return 0;
+}
+
+static int sdfat_sanitize_mode(const struct sdfat_sb_info *sbi,
+ struct inode *inode, umode_t *mode_ptr)
+{
+ mode_t i_mode, mask, perm;
+
+ i_mode = inode->i_mode;
+
+ if (S_ISREG(i_mode) || S_ISLNK(i_mode))
+ mask = sbi->options.fs_fmask;
+ else
+ mask = sbi->options.fs_dmask;
+
+ perm = *mode_ptr & ~(S_IFMT | mask);
+
+ /* Of the r and x bits, all (subject to umask) must be present.*/
+ if ((perm & (S_IRUGO | S_IXUGO)) != (i_mode & (S_IRUGO | S_IXUGO)))
+ return -EPERM;
+
+ if (sdfat_mode_can_hold_ro(inode)) {
+ /* Of the w bits, either all (subject to umask) or none must be present. */
+ if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask)))
+ return -EPERM;
+ } else {
+ /* If sdfat_mode_can_hold_ro(inode) is false, can't change w bits. */
+ if ((perm & S_IWUGO) != (S_IWUGO & ~mask))
+ return -EPERM;
+ }
+
+ *mode_ptr &= S_IFMT | perm;
+
+ return 0;
+}
+
+/*
+ * sdfat_block_truncate_page() zeroes out a mapping from file offset `from'
+ * up to the end of the block which corresponds to `from'.
+ * This is required during truncate to physically zeroout the tail end
+ * of that block so it doesn't yield old data if the file is later grown.
+ * Also, avoid causing failure from fsx for cases of "data past EOF"
+ */
+static int sdfat_block_truncate_page(struct inode *inode, loff_t from)
+{
+ return block_truncate_page(inode->i_mapping, from, sdfat_get_block);
+}
+
+static int sdfat_setattr(struct dentry *dentry, struct iattr *attr)
+{
+
+ struct sdfat_sb_info *sbi = SDFAT_SB(dentry->d_sb);
+ struct inode *inode = dentry->d_inode;
+ unsigned int ia_valid;
+ int error;
+ loff_t old_size;
+
+ TMSG("%s entered\n", __func__);
+
+ if ((attr->ia_valid & ATTR_SIZE)
+ && (attr->ia_size > i_size_read(inode))) {
+ error = sdfat_cont_expand(inode, attr->ia_size);
+ if (error || attr->ia_valid == ATTR_SIZE)
+ goto out;
+ attr->ia_valid &= ~ATTR_SIZE;
+ }
+
+ /* Check for setting the inode time. */
+ ia_valid = attr->ia_valid;
+ if ((ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET))
+ && sdfat_allow_set_time(sbi, inode)) {
+ attr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET);
+ }
+
+ error = setattr_prepare(dentry, attr);
+ attr->ia_valid = ia_valid;
+ if (error)
+ goto out;
+
+ if (((attr->ia_valid & ATTR_UID) &&
+ (!uid_eq(attr->ia_uid, sbi->options.fs_uid))) ||
+ ((attr->ia_valid & ATTR_GID) &&
+ (!gid_eq(attr->ia_gid, sbi->options.fs_gid))) ||
+ ((attr->ia_valid & ATTR_MODE) &&
+ (attr->ia_mode & ~(S_IFREG | S_IFLNK | S_IFDIR | S_IRWXUGO)))) {
+ error = -EPERM;
+ goto out;
+ }
+
+ /*
+ * We don't return -EPERM here. Yes, strange, but this is too
+ * old behavior.
+ */
+ if (attr->ia_valid & ATTR_MODE) {
+ if (sdfat_sanitize_mode(sbi, inode, &attr->ia_mode) < 0)
+ attr->ia_valid &= ~ATTR_MODE;
+ }
+
+ SDFAT_I(inode)->fid.size = i_size_read(inode);
+
+ /* patch 1.2.0 : fixed the problem of size mismatch. */
+ if (attr->ia_valid & ATTR_SIZE) {
+ error = sdfat_block_truncate_page(inode, attr->ia_size);
+ if (error)
+ goto out;
+
+ old_size = i_size_read(inode);
+
+ /* TO CHECK evicting directory works correctly */
+ MMSG("%s: inode(%p) truncate size (%llu->%llu)\n", __func__,
+ inode, (u64)old_size, (u64)attr->ia_size);
+ __sdfat_do_truncate(inode, old_size, attr->ia_size);
+ }
+ setattr_copy(inode, attr);
+ mark_inode_dirty(inode);
+out:
+ TMSG("%s exited with err(%d)\n", __func__, error);
+ return error;
+}
+
+static const struct inode_operations sdfat_dir_inode_operations = {
+ .create = sdfat_create,
+ .lookup = sdfat_lookup,
+ .unlink = sdfat_unlink,
+ .symlink = sdfat_symlink,
+ .mkdir = sdfat_mkdir,
+ .rmdir = sdfat_rmdir,
+ .rename = sdfat_rename,
+ .setattr = sdfat_setattr,
+ .getattr = sdfat_getattr,
+#ifdef CONFIG_SDFAT_VIRTUAL_XATTR
+ .listxattr = sdfat_listxattr,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+ .setxattr = sdfat_setxattr,
+ .getxattr = sdfat_getxattr,
+ .removexattr = sdfat_removexattr,
+#endif
+#endif
+};
+
+/*======================================================================*/
+/* File Operations */
+/*======================================================================*/
+static const struct inode_operations sdfat_symlink_inode_operations = {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+ .readlink = generic_readlink,
+#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)
+ .get_link = sdfat_follow_link,
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) */
+ .follow_link = sdfat_follow_link,
+#endif
+#ifdef CONFIG_SDFAT_VIRTUAL_XATTR
+ .listxattr = sdfat_listxattr,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+ .setxattr = sdfat_setxattr,
+ .getxattr = sdfat_getxattr,
+ .removexattr = sdfat_removexattr,
+#endif
+#endif
+};
+
+static int sdfat_file_release(struct inode *inode, struct file *filp)
+{
+ struct super_block *sb = inode->i_sb;
+
+ /* Moved below code from sdfat_write_inode
+ * TO FIX size-mismatch problem.
+ */
+ /* FIXME : Added bug_on to confirm that there is no size mismatch */
+ sdfat_debug_bug_on(SDFAT_I(inode)->fid.size != i_size_read(inode));
+ SDFAT_I(inode)->fid.size = i_size_read(inode);
+ fsapi_sync_fs(sb, 0);
+ return 0;
+}
+
+static const struct file_operations sdfat_file_operations = {
+ .llseek = generic_file_llseek,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
+ .read_iter = generic_file_read_iter,
+ .write_iter = generic_file_write_iter,
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
+ .read = new_sync_read,
+ .write = new_sync_write,
+ .read_iter = generic_file_read_iter,
+ .write_iter = generic_file_write_iter,
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0) */
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .aio_read = generic_file_aio_read,
+ .aio_write = generic_file_aio_write,
+#endif
+ .mmap = sdfat_file_mmap,
+ .release = sdfat_file_release,
+ .unlocked_ioctl = sdfat_generic_ioctl,
+ .fsync = sdfat_file_fsync,
+ .splice_read = generic_file_splice_read,
+};
+
+static const struct address_space_operations sdfat_da_aops;
+static const struct address_space_operations sdfat_aops;
+
+static void sdfat_truncate(struct inode *inode, loff_t old_size)
+{
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+ unsigned int blocksize = 1 << inode->i_blkbits;
+ loff_t aligned_size;
+ int err;
+
+ __lock_super(sb);
+
+ if (SDFAT_I(inode)->fid.start_clu == 0) {
+ /* Stange statement:
+ * Empty start_clu != ~0 (not allocated)
+ */
+ sdfat_fs_error(sb, "tried to truncate zeroed cluster.");
+ goto out;
+ }
+
+ sdfat_debug_check_clusters(inode);
+
+ __cancel_dfr_work(inode, (loff_t)i_size_read(inode), (loff_t)old_size, __func__);
+
+ err = fsapi_truncate(inode, old_size, i_size_read(inode));
+ if (err)
+ goto out;
+
+ inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
+ if (IS_DIRSYNC(inode))
+ (void) sdfat_sync_inode(inode);
+ else
+ mark_inode_dirty(inode);
+
+ // FIXME: 확인 요망
+ // inode->i_blocks = ((SDFAT_I(inode)->i_size_ondisk + (fsi->cluster_size - 1))
+ inode->i_blocks = ((i_size_read(inode) + (fsi->cluster_size - 1)) &
+ ~((loff_t)fsi->cluster_size - 1)) >> inode->i_blkbits;
+out:
+ /*
+ * This protects against truncating a file bigger than it was then
+ * trying to write into the hole.
+ *
+ * comment by sh.hong:
+ * This seems to mean 'intra page/block' truncate and writing.
+ * I couldn't find a reason to change the values prior to fsapi_truncate
+ * Therefore, I switched the order of operations
+ * so that it's possible to utilize i_size_ondisk in fsapi_truncate
+ */
+
+ aligned_size = i_size_read(inode);
+ if (aligned_size & (blocksize - 1)) {
+ aligned_size |= (blocksize - 1);
+ aligned_size++;
+ }
+
+ if (SDFAT_I(inode)->i_size_ondisk > i_size_read(inode))
+ SDFAT_I(inode)->i_size_ondisk = aligned_size;
+
+ sdfat_debug_check_clusters(inode);
+
+ if (SDFAT_I(inode)->i_size_aligned > i_size_read(inode))
+ SDFAT_I(inode)->i_size_aligned = aligned_size;
+
+ /* After truncation :
+ * 1) Delayed allocation is OFF
+ * i_size = i_size_ondisk <= i_size_aligned
+ * (useless size var.)
+ * (block-aligned)
+ * 2) Delayed allocation is ON
+ * i_size = i_size_ondisk = i_size_aligned
+ * (will be block-aligned after write)
+ * or
+ * i_size_ondisk < i_size <= i_size_aligned (block_aligned)
+ * (will be block-aligned after write)
+ */
+
+ __unlock_super(sb);
+}
+
+static const struct inode_operations sdfat_file_inode_operations = {
+ .setattr = sdfat_setattr,
+ .getattr = sdfat_getattr,
+#ifdef CONFIG_SDFAT_VIRTUAL_XATTR
+ .listxattr = sdfat_listxattr,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+ .setxattr = sdfat_setxattr,
+ .getxattr = sdfat_getxattr,
+ .removexattr = sdfat_removexattr,
+#endif
+#endif
+};
+
+/*======================================================================*/
+/* Address Space Operations */
+/*======================================================================*/
+/* 2-level option flag */
+#define BMAP_NOT_CREATE 0
+#define BMAP_ADD_BLOCK 1
+#define BMAP_ADD_CLUSTER 2
+#define BLOCK_ADDED(bmap_ops) (bmap_ops)
+static int sdfat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
+ unsigned long *mapped_blocks, int *create)
+{
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+ const unsigned long blocksize = sb->s_blocksize;
+ const unsigned char blocksize_bits = sb->s_blocksize_bits;
+ sector_t last_block;
+ unsigned int cluster, clu_offset, sec_offset;
+ int err = 0;
+
+ *phys = 0;
+ *mapped_blocks = 0;
+
+ /* core code should handle EIO */
+#if 0
+ if (fsi->prev_eio && BLOCK_ADDED(*create))
+ return -EIO;
+#endif
+
+ if (((fsi->vol_type == FAT12) || (fsi->vol_type == FAT16)) &&
+ (inode->i_ino == SDFAT_ROOT_INO)) {
+ if (sector < (fsi->dentries_in_root >>
+ (sb->s_blocksize_bits - DENTRY_SIZE_BITS))) {
+ *phys = sector + fsi->root_start_sector;
+ *mapped_blocks = 1;
+ }
+ return 0;
+ }
+
+ last_block = (i_size_read(inode) + (blocksize - 1)) >> blocksize_bits;
+ if ((sector >= last_block) && (*create == BMAP_NOT_CREATE))
+ return 0;
+
+ /* Is this block already allocated? */
+ clu_offset = sector >> fsi->sect_per_clus_bits; /* cluster offset */
+
+ SDFAT_I(inode)->fid.size = i_size_read(inode);
+
+
+ if (unlikely(__check_dfr_on(inode,
+ (loff_t)((loff_t)clu_offset << fsi->cluster_size_bits),
+ (loff_t)((loff_t)(clu_offset + 1) << fsi->cluster_size_bits),
+ __func__))) {
+ err = __do_dfr_map_cluster(inode, clu_offset, &cluster);
+ } else {
+ if (*create & BMAP_ADD_CLUSTER)
+ err = fsapi_map_clus(inode, clu_offset, &cluster, 1);
+ else
+ err = fsapi_map_clus(inode, clu_offset, &cluster, ALLOC_NOWHERE);
+ }
+
+ if (err) {
+ if (err != -ENOSPC)
+ return -EIO;
+ return err;
+ }
+
+ /* FOR BIGDATA */
+ sdfat_statistics_set_rw(SDFAT_I(inode)->fid.flags,
+ clu_offset, *create & BMAP_ADD_CLUSTER);
+
+ if (!IS_CLUS_EOF(cluster)) {
+ /* sector offset in cluster */
+ sec_offset = sector & (fsi->sect_per_clus - 1);
+
+ *phys = CLUS_TO_SECT(fsi, cluster) + sec_offset;
+ *mapped_blocks = fsi->sect_per_clus - sec_offset;
+ }
+#if 0
+ else {
+ /* Debug purpose (new clu needed) */
+ ASSERT((*create & BMAP_ADD_CLUSTER) == 0);
+ ASSERT(sector >= last_block);
+ }
+#endif
+
+ if (sector < last_block)
+ *create = BMAP_NOT_CREATE;
+#if 0
+ else if (sector >= last_block)
+ *create = non-zero;
+
+ if (iblock <= last mapped-block)
+ *phys != 0
+ *create = BMAP_NOT_CREATE
+ else if (iblock <= last cluster)
+ *phys != 0
+ *create = non-zero
+#endif
+ return 0;
+}
+
+static int sdfat_da_prep_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+ unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
+ unsigned long mapped_blocks;
+ sector_t phys;
+ loff_t pos;
+ int sec_offset;
+ int bmap_create = create ? BMAP_ADD_BLOCK : BMAP_NOT_CREATE;
+ int err = 0;
+
+ __lock_super(sb);
+
+ /* FAT32 only */
+ ASSERT(fsi->vol_type == FAT32);
+
+ err = sdfat_bmap(inode, iblock, &phys, &mapped_blocks, &bmap_create);
+ if (err) {
+ if (err != -ENOSPC)
+ sdfat_fs_error_ratelimit(sb, "%s: failed to bmap "
+ "(iblock:%u, err:%d)", __func__,
+ (u32)iblock, err);
+ goto unlock_ret;
+ }
+
+ sec_offset = iblock & (fsi->sect_per_clus - 1);
+
+ if (phys) {
+ /* the block in in the mapped cluster boundary */
+ max_blocks = min(mapped_blocks, max_blocks);
+ map_bh(bh_result, sb, phys);
+
+ BUG_ON(BLOCK_ADDED(bmap_create) && (sec_offset == 0));
+
+ } else if (create == 1) {
+ /* Not exist: new cluster needed */
+ if (!BLOCK_ADDED(bmap_create)) {
+ sector_t last_block;
+ last_block = (i_size_read(inode) + (sb->s_blocksize - 1))
+ >> sb->s_blocksize_bits;
+ sdfat_fs_error(sb, "%s: new cluster need, but "
+ "bmap_create == BMAP_NOT_CREATE(iblock:%lld, "
+ "last_block:%lld)", __func__,
+ (s64)iblock, (s64)last_block);
+ err = -EIO;
+ goto unlock_ret;
+ }
+
+ // Reserved Cluster (only if iblock is the first sector in a clu)
+ if (sec_offset == 0) {
+ err = fsapi_reserve_clus(inode);
+ if (err) {
+ if (err != -ENOSPC)
+ sdfat_fs_error_ratelimit(sb,
+ "%s: failed to bmap "
+ "(iblock:%u, err:%d)", __func__,
+ (u32)iblock, err);
+
+ goto unlock_ret;
+ }
+ }
+
+ // Delayed mapping
+ map_bh(bh_result, sb, ~((sector_t) 0xffff));
+ set_buffer_new(bh_result);
+ set_buffer_delay(bh_result);
+
+ } else {
+ /* get_block on non-existing addr. with create==0 */
+ /*
+ * CHECKME:
+ * i_size_aligned 보다 작으면 delay 매핑을 일단
+ * 켜줘야되는 게 아닌가?
+ * - 0-fill 을 항상 하기에, FAT 에서는 문제 없음.
+ * 중간에 영역이 꽉 찼으면, 디스크에 내려가지 않고는
+ * invalidate 될 일이 없음
+ */
+ goto unlock_ret;
+ }
+
+
+ /* Newly added blocks */
+ if (BLOCK_ADDED(bmap_create)) {
+ set_buffer_new(bh_result);
+
+ SDFAT_I(inode)->i_size_aligned += max_blocks << sb->s_blocksize_bits;
+ if (phys) {
+ /* i_size_ondisk changes if a block added in the existing cluster */
+ #define num_clusters(value) ((value) ? (s32)((value - 1) >> fsi->cluster_size_bits) + 1 : 0)
+
+ /* FOR GRACEFUL ERROR HANDLING */
+ if (num_clusters(SDFAT_I(inode)->i_size_aligned) !=
+ num_clusters(SDFAT_I(inode)->i_size_ondisk)) {
+ EMSG("%s: inode(%p) invalid size (create(%d) "
+ "bmap_create(%d) phys(%lld) aligned(%lld) "
+ "on_disk(%lld) iblock(%u) sec_off(%d))\n",
+ __func__, inode, create, bmap_create, (s64)phys,
+ (s64)SDFAT_I(inode)->i_size_aligned,
+ (s64)SDFAT_I(inode)->i_size_ondisk,
+ (u32)iblock,
+ (s32)sec_offset);
+ sdfat_debug_bug_on(1);
+ }
+ SDFAT_I(inode)->i_size_ondisk = SDFAT_I(inode)->i_size_aligned;
+ }
+
+ pos = (iblock + 1) << sb->s_blocksize_bits;
+ /* Debug purpose - defensive coding */
+ ASSERT(SDFAT_I(inode)->i_size_aligned == pos);
+ if (SDFAT_I(inode)->i_size_aligned < pos)
+ SDFAT_I(inode)->i_size_aligned = pos;
+ /* Debug end */
+
+#ifdef CONFIG_SDFAT_TRACE_IO
+ /* New page added (ASSERTION: 8 blocks per page) */
+ if ((sec_offset & 7) == 0)
+ sbi->stat_n_pages_added++;
+#endif
+ }
+
+ /* FOR GRACEFUL ERROR HANDLING */
+ if (i_size_read(inode) > SDFAT_I(inode)->i_size_aligned) {
+ sdfat_fs_error_ratelimit(sb, "%s: invalid size (inode(%p), "
+ "size(%llu) > aligned(%llu)\n", __func__, inode,
+ i_size_read(inode), SDFAT_I(inode)->i_size_aligned);
+ sdfat_debug_bug_on(1);
+ }
+
+ bh_result->b_size = max_blocks << sb->s_blocksize_bits;
+
+unlock_ret:
+ __unlock_super(sb);
+ return err;
+}
+
+static int sdfat_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ struct super_block *sb = inode->i_sb;
+ unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
+ int err = 0;
+ unsigned long mapped_blocks;
+ sector_t phys;
+ loff_t pos;
+ int bmap_create = create ? BMAP_ADD_CLUSTER : BMAP_NOT_CREATE;
+
+ __lock_super(sb);
+ err = sdfat_bmap(inode, iblock, &phys, &mapped_blocks, &bmap_create);
+ if (err) {
+ if (err != -ENOSPC)
+ sdfat_fs_error_ratelimit(sb, "%s: failed to bmap "
+ "(inode:%p iblock:%u, err:%d)",
+ __func__, inode, (u32)iblock, err);
+ goto unlock_ret;
+ }
+
+ if (phys) {
+ max_blocks = min(mapped_blocks, max_blocks);
+
+ /* Treat newly added block / cluster */
+ if (BLOCK_ADDED(bmap_create) || buffer_delay(bh_result)) {
+
+ /* Update i_size_ondisk */
+ pos = (iblock + 1) << sb->s_blocksize_bits;
+ if (SDFAT_I(inode)->i_size_ondisk < pos) {
+ /* Debug purpose */
+ if ((pos - SDFAT_I(inode)->i_size_ondisk) > bh_result->b_size) {
+ /* This never happens without DA */
+ MMSG("Jumping get_block\n");
+ }
+
+ SDFAT_I(inode)->i_size_ondisk = pos;
+ sdfat_debug_check_clusters(inode);
+ }
+
+ if (BLOCK_ADDED(bmap_create)) {
+ /* Old way (w/o DA)
+ * create == 1 only if iblock > i_size
+ * (in block unit)
+ */
+
+ /* 20130723 CHECK
+ * Truncate와 동시에 발생할 경우,
+ * i_size < (i_block 위치) 면서 buffer_delay()가
+ * 켜져있을 수 있다.
+ *
+ * 기존에 할당된 영역을 다시 쓸 뿐이므로 큰 문제
+ * 없지만, 그 경우, 미리 i_size_aligned 가 확장된
+ * 영역이어야 한다.
+ */
+
+ /* FOR GRACEFUL ERROR HANDLING */
+ if (buffer_delay(bh_result) &&
+ (pos > SDFAT_I(inode)->i_size_aligned)) {
+ sdfat_fs_error(sb, "requested for bmap "
+ "out of range(pos:(%llu)>i_size_aligned(%llu)\n",
+ pos, SDFAT_I(inode)->i_size_aligned);
+ sdfat_debug_bug_on(1);
+ err = -EIO;
+ goto unlock_ret;
+ }
+ set_buffer_new(bh_result);
+
+ /*
+ * adjust i_size_aligned if i_size_ondisk is
+ * bigger than it. (i.e. non-DA)
+ */
+ if (SDFAT_I(inode)->i_size_ondisk >
+ SDFAT_I(inode)->i_size_aligned) {
+ SDFAT_I(inode)->i_size_aligned =
+ SDFAT_I(inode)->i_size_ondisk;
+ }
+ }
+
+ if (buffer_delay(bh_result))
+ clear_buffer_delay(bh_result);
+
+#if 0
+ /* Debug purpose */
+ if (SDFAT_I(inode)->i_size_ondisk >
+ SDFAT_I(inode)->i_size_aligned) {
+ /* Only after truncate
+ * and the two size variables should indicate
+ * same i_block
+ */
+ unsigned int blocksize = 1 << inode->i_blkbits;
+ BUG_ON(SDFAT_I(inode)->i_size_ondisk -
+ SDFAT_I(inode)->i_size_aligned >= blocksize);
+ }
+#endif
+ }
+ map_bh(bh_result, sb, phys);
+ }
+
+ bh_result->b_size = max_blocks << sb->s_blocksize_bits;
+unlock_ret:
+ __unlock_super(sb);
+ return err;
+}
+
+static int sdfat_readpage(struct file *file, struct page *page)
+{
+ int ret;
+
+ ret = mpage_readpage(page, sdfat_get_block);
+ return ret;
+}
+
+static int sdfat_readpages(struct file *file, struct address_space *mapping,
+ struct list_head *pages, unsigned int nr_pages)
+{
+ int ret;
+
+ ret = mpage_readpages(mapping, pages, nr_pages, sdfat_get_block);
+ return ret;
+}
+
+static inline void sdfat_submit_fullpage_bio(struct block_device *bdev,
+ sector_t sector, unsigned int length, struct page *page)
+{
+ /* Single page bio submit */
+ struct bio *bio;
+
+ BUG_ON((length > PAGE_SIZE) || (length == 0));
+
+ /*
+ * If __GFP_WAIT is set, then bio_alloc will always be able to allocate
+ * a bio. This is due to the mempool guarantees. To make this work, callers
+ * must never allocate more than 1 bio at a time from this pool.
+ *
+ * #define GFP_NOIO (__GFP_WAIT)
+ */
+ bio = bio_alloc(GFP_NOIO, 1);
+
+ bio_set_dev(bio, bdev);
+ bio->bi_vcnt = 1;
+ bio->bi_io_vec[0].bv_page = page; /* Inline vec */
+ bio->bi_io_vec[0].bv_len = length; /* PAGE_SIZE */
+ bio->bi_io_vec[0].bv_offset = 0;
+ __sdfat_set_bio_iterate(bio, sector, length, 0, 0);
+
+ bio->bi_end_io = sdfat_writepage_end_io;
+ __sdfat_submit_bio_write(bio);
+}
+
+static int sdfat_writepage(struct page *page, struct writeback_control *wbc)
+{
+ struct inode * const inode = page->mapping->host;
+ struct super_block *sb = inode->i_sb;
+ loff_t i_size = i_size_read(inode);
+ const pgoff_t end_index = i_size >> PAGE_SHIFT;
+ const unsigned int blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ struct buffer_head *bh, *head;
+ sector_t block, block_0, last_phys;
+ int ret;
+ unsigned int nr_blocks_towrite = blocks_per_page;
+
+ /* Don't distinguish 0-filled/clean block.
+ * Just write back the whole page
+ */
+ if (fsi->cluster_size < PAGE_SIZE)
+ goto confused;
+
+ if (!PageUptodate(page)) {
+ MMSG("%s: Not up-to-date page -> block_write_full_page\n",
+ __func__);
+ goto confused;
+ }
+
+ if (page->index >= end_index) {
+ /* last page or outside i_size */
+ unsigned int offset = i_size & (PAGE_SIZE-1);
+
+ /* If a truncation is in progress */
+ if (page->index > end_index || !offset)
+ goto confused;
+
+ /* 0-fill after i_size */
+ zero_user_segment(page, offset, PAGE_SIZE);
+ }
+
+ if (!page_has_buffers(page)) {
+ MMSG("WP: No buffers -> block_write_full_page\n");
+ goto confused;
+ }
+
+ block = (sector_t)page->index << (PAGE_SHIFT - inode->i_blkbits);
+ block_0 = block; /* first block */
+ head = page_buffers(page);
+ bh = head;
+
+ last_phys = 0;
+ do {
+ BUG_ON(buffer_locked(bh));
+
+ if (!buffer_dirty(bh) || !buffer_uptodate(bh)) {
+ if (nr_blocks_towrite == blocks_per_page)
+ nr_blocks_towrite = (unsigned int) (block - block_0);
+
+ BUG_ON(nr_blocks_towrite >= blocks_per_page);
+
+ // !uptodate but dirty??
+ if (buffer_dirty(bh))
+ goto confused;
+
+ // Nothing to writeback in this block
+ bh = bh->b_this_page;
+ block++;
+ continue;
+ }
+
+ if (nr_blocks_towrite != blocks_per_page)
+ // Dirty -> Non-dirty -> Dirty again case
+ goto confused;
+
+ /* Map if needed */
+ if (!buffer_mapped(bh) || buffer_delay(bh)) {
+ BUG_ON(bh->b_size != (1 << (inode->i_blkbits)));
+ ret = sdfat_get_block(inode, block, bh, 1);
+ if (ret)
+ goto confused;
+
+ if (buffer_new(bh)) {
+ clear_buffer_new(bh);
+ __sdfat_clean_bdev_aliases(bh->b_bdev, bh->b_blocknr);
+ }
+ }
+
+ /* continuity check */
+ if (((last_phys + 1) != bh->b_blocknr) && (last_phys != 0)) {
+ DMSG("Non-contiguous block mapping in single page");
+ goto confused;
+ }
+
+ last_phys = bh->b_blocknr;
+ bh = bh->b_this_page;
+ block++;
+ } while (bh != head);
+
+ if (nr_blocks_towrite == 0) {
+ DMSG("Page dirty but no dirty bh? alloc_208\n");
+ goto confused;
+ }
+
+
+ /* Write-back */
+ do {
+ clear_buffer_dirty(bh);
+ bh = bh->b_this_page;
+ } while (bh != head);
+
+ BUG_ON(PageWriteback(page));
+ set_page_writeback(page);
+
+ /**
+ * Turn off MAPPED flag in victim's bh if defrag on.
+ * Another write_begin can starts after get_block for defrag victims called.
+ * In this case, write_begin calls get_block and get original block number
+ * and previous defrag will be canceled.
+ */
+ if (unlikely(__check_dfr_on(inode,
+ (loff_t)(page->index << PAGE_SHIFT),
+ (loff_t)((page->index + 1) << PAGE_SHIFT),
+ __func__))) {
+ do {
+ clear_buffer_mapped(bh);
+ bh = bh->b_this_page;
+ } while (bh != head);
+ }
+
+ // Trace # of pages queued (Approx.)
+ atomic_inc(&SDFAT_SB(sb)->stat_n_pages_queued);
+
+ sdfat_submit_fullpage_bio(head->b_bdev,
+ head->b_blocknr << (sb->s_blocksize_bits - SECTOR_SIZE_BITS),
+ nr_blocks_towrite << inode->i_blkbits,
+ page);
+
+ unlock_page(page);
+
+ return 0;
+
+confused:
+#ifdef CONFIG_SDFAT_TRACE_IO
+ SDFAT_SB(sb)->stat_n_pages_confused++;
+#endif
+ ret = block_write_full_page(page, sdfat_get_block, wbc);
+ return ret;
+}
+
+static int sdfat_da_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ MMSG("%s(inode:%p) with nr_to_write = 0x%08lx "
+ "(ku %d, bg %d, tag %d, rc %d )\n",
+ __func__, mapping->host, wbc->nr_to_write,
+ wbc->for_kupdate, wbc->for_background, wbc->tagged_writepages,
+ wbc->for_reclaim);
+
+ ASSERT(mapping->a_ops == &sdfat_da_aops);
+
+#ifdef CONFIG_SDFAT_ALIGNED_MPAGE_WRITE
+ if (SDFAT_SB(mapping->host->i_sb)->options.adj_req)
+ return sdfat_mpage_writepages(mapping, wbc, sdfat_get_block);
+#endif
+ return generic_writepages(mapping, wbc);
+}
+
+static int sdfat_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ MMSG("%s(inode:%p) with nr_to_write = 0x%08lx "
+ "(ku %d, bg %d, tag %d, rc %d )\n",
+ __func__, mapping->host, wbc->nr_to_write,
+ wbc->for_kupdate, wbc->for_background, wbc->tagged_writepages,
+ wbc->for_reclaim);
+
+ ASSERT(mapping->a_ops == &sdfat_aops);
+
+#ifdef CONFIG_SDFAT_ALIGNED_MPAGE_WRITE
+ if (SDFAT_SB(mapping->host->i_sb)->options.adj_req)
+ return sdfat_mpage_writepages(mapping, wbc, sdfat_get_block);
+#endif
+ return mpage_writepages(mapping, wbc, sdfat_get_block);
+}
+
+static void sdfat_write_failed(struct address_space *mapping, loff_t to)
+{
+ struct inode *inode = mapping->host;
+
+ if (to > i_size_read(inode)) {
+ __sdfat_truncate_pagecache(inode, to, i_size_read(inode));
+ sdfat_truncate(inode, SDFAT_I(inode)->i_size_aligned);
+ }
+}
+
+static int sdfat_check_writable(struct super_block *sb)
+{
+ if (fsapi_check_bdi_valid(sb))
+ return -EIO;
+
+ if (sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
+ return 0;
+}
+
+static int __sdfat_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned int len,
+ unsigned int flags, struct page **pagep,
+ void **fsdata, get_block_t *get_block,
+ loff_t *bytes, const char *fname)
+{
+ struct super_block *sb = mapping->host->i_sb;
+ int ret;
+
+ __cancel_dfr_work(mapping->host, pos, (loff_t)(pos + len), fname);
+
+ ret = sdfat_check_writable(sb);
+ if (unlikely(ret < 0))
+ return ret;
+
+ *pagep = NULL;
+ ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+ get_block, bytes);
+
+ if (ret < 0)
+ sdfat_write_failed(mapping, pos+len);
+
+ return ret;
+}
+
+
+static int sdfat_da_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned int len, unsigned int flags,
+ struct page **pagep, void **fsdata)
+{
+ return __sdfat_write_begin(file, mapping, pos, len, flags,
+ pagep, fsdata, sdfat_da_prep_block,
+ &SDFAT_I(mapping->host)->i_size_aligned,
+ __func__);
+}
+
+
+static int sdfat_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned int len, unsigned int flags,
+ struct page **pagep, void **fsdata)
+{
+ return __sdfat_write_begin(file, mapping, pos, len, flags,
+ pagep, fsdata, sdfat_get_block,
+ &SDFAT_I(mapping->host)->i_size_ondisk,
+ __func__);
+}
+
+static int sdfat_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned int len, unsigned int copied,
+ struct page *pagep, void *fsdata)
+{
+ struct inode *inode = mapping->host;
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ int err;
+
+ err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
+
+ /* FOR GRACEFUL ERROR HANDLING */
+ if (SDFAT_I(inode)->i_size_aligned < i_size_read(inode)) {
+ sdfat_fs_error(inode->i_sb, "invalid size(size(%llu) "
+ "> aligned(%llu)\n", i_size_read(inode),
+ SDFAT_I(inode)->i_size_aligned);
+ sdfat_debug_bug_on(1);
+ }
+
+ if (err < len)
+ sdfat_write_failed(mapping, pos+len);
+
+ if (!(err < 0) && !(fid->attr & ATTR_ARCHIVE)) {
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+ fid->attr |= ATTR_ARCHIVE;
+ mark_inode_dirty(inode);
+ }
+
+ return err;
+}
+
+static inline ssize_t __sdfat_direct_IO(int rw, struct kiocb *iocb,
+ struct inode *inode, void *iov_u, loff_t offset,
+ loff_t count, unsigned long nr_segs)
+{
+ struct address_space *mapping = inode->i_mapping;
+ loff_t size = offset + count;
+ ssize_t ret;
+
+ if (rw == WRITE) {
+ /*
+ * FIXME: blockdev_direct_IO() doesn't use ->write_begin(),
+ * so we need to update the ->i_size_aligned to block boundary.
+ *
+ * But we must fill the remaining area or hole by nul for
+ * updating ->i_size_aligned
+ *
+ * Return 0, and fallback to normal buffered write.
+ */
+ if (SDFAT_I(inode)->i_size_aligned < size)
+ return 0;
+ }
+
+ /*
+ * sdFAT need to use the DIO_LOCKING for avoiding the race
+ * condition of sdfat_get_block() and ->truncate().
+ */
+ ret = __sdfat_blkdev_direct_IO(rw, iocb, inode, iov_u, offset, nr_segs);
+ if (ret < 0 && (rw & WRITE))
+ sdfat_write_failed(mapping, size);
+
+ return ret;
+}
+
+static const struct address_space_operations sdfat_aops = {
+ .readpage = sdfat_readpage,
+ .readpages = sdfat_readpages,
+ .writepage = sdfat_writepage,
+ .writepages = sdfat_writepages,
+ .write_begin = sdfat_write_begin,
+ .write_end = sdfat_write_end,
+ .direct_IO = sdfat_direct_IO,
+ .bmap = sdfat_aop_bmap
+};
+
+static const struct address_space_operations sdfat_da_aops = {
+ .readpage = sdfat_readpage,
+ .readpages = sdfat_readpages,
+ .writepage = sdfat_writepage,
+ .writepages = sdfat_da_writepages,
+ .write_begin = sdfat_da_write_begin,
+ .write_end = sdfat_write_end,
+ .direct_IO = sdfat_direct_IO,
+ .bmap = sdfat_aop_bmap
+};
+
+/*======================================================================*/
+/* Super Operations */
+/*======================================================================*/
+
+static inline unsigned long sdfat_hash(loff_t i_pos)
+{
+ return hash_32(i_pos, SDFAT_HASH_BITS);
+}
+
+static void sdfat_attach(struct inode *inode, loff_t i_pos)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(inode->i_sb);
+ struct hlist_head *head = sbi->inode_hashtable + sdfat_hash(i_pos);
+
+ spin_lock(&sbi->inode_hash_lock);
+ SDFAT_I(inode)->i_pos = i_pos;
+ hlist_add_head(&SDFAT_I(inode)->i_hash_fat, head);
+ spin_unlock(&sbi->inode_hash_lock);
+}
+
+static void sdfat_detach(struct inode *inode)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(inode->i_sb);
+
+ spin_lock(&sbi->inode_hash_lock);
+ hlist_del_init(&SDFAT_I(inode)->i_hash_fat);
+ SDFAT_I(inode)->i_pos = 0;
+ spin_unlock(&sbi->inode_hash_lock);
+}
+
+
+/* doesn't deal with root inode */
+static int sdfat_fill_inode(struct inode *inode, const FILE_ID_T *fid)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(inode->i_sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+ DIR_ENTRY_T info;
+ u64 size = fid->size;
+
+ memcpy(&(SDFAT_I(inode)->fid), fid, sizeof(FILE_ID_T));
+
+ SDFAT_I(inode)->i_pos = 0;
+ SDFAT_I(inode)->target = NULL;
+ inode->i_uid = sbi->options.fs_uid;
+ inode->i_gid = sbi->options.fs_gid;
+ inode_inc_iversion(inode);
+ inode->i_generation = get_seconds();
+
+ if (fsapi_read_inode(inode, &info) < 0) {
+ MMSG("%s: failed to read stat!\n", __func__);
+ return -EIO;
+ }
+
+ if (info.Attr & ATTR_SUBDIR) { /* directory */
+ inode->i_generation &= ~1;
+ inode->i_mode = sdfat_make_mode(sbi, info.Attr, S_IRWXUGO);
+ inode->i_op = &sdfat_dir_inode_operations;
+ inode->i_fop = &sdfat_dir_operations;
+
+ set_nlink(inode, info.NumSubdirs);
+ } else if (info.Attr & ATTR_SYMLINK) { /* symbolic link */
+ inode->i_op = &sdfat_symlink_inode_operations;
+ inode->i_generation |= 1;
+ inode->i_mode = sdfat_make_mode(sbi, info.Attr, S_IRWXUGO);
+ } else { /* regular file */
+ inode->i_generation |= 1;
+ inode->i_mode = sdfat_make_mode(sbi, info.Attr, S_IRWXUGO);
+ inode->i_op = &sdfat_file_inode_operations;
+ inode->i_fop = &sdfat_file_operations;
+
+ if (sbi->options.improved_allocation & SDFAT_ALLOC_DELAY)
+ inode->i_mapping->a_ops = &sdfat_da_aops;
+ else
+ inode->i_mapping->a_ops = &sdfat_aops;
+
+ inode->i_mapping->nrpages = 0;
+
+ }
+
+ /*
+ * Use fid->size instead of info.Size
+ * because info.Size means the value saved on disk
+ */
+ i_size_write(inode, size);
+
+ /* ondisk and aligned size should be aligned with block size */
+ if (size & (inode->i_sb->s_blocksize - 1)) {
+ size |= (inode->i_sb->s_blocksize - 1);
+ size++;
+ }
+
+ SDFAT_I(inode)->i_size_aligned = size;
+ SDFAT_I(inode)->i_size_ondisk = size;
+ sdfat_debug_check_clusters(inode);
+
+ sdfat_save_attr(inode, info.Attr);
+
+ inode->i_blocks = ((i_size_read(inode) + (fsi->cluster_size - 1))
+ & ~((loff_t)fsi->cluster_size - 1)) >> inode->i_blkbits;
+
+ sdfat_time_fat2unix(sbi, &inode->i_mtime, &info.ModifyTimestamp);
+ sdfat_time_fat2unix(sbi, &inode->i_ctime, &info.CreateTimestamp);
+ sdfat_time_fat2unix(sbi, &inode->i_atime, &info.AccessTimestamp);
+
+ __init_dfr_info(inode);
+
+ return 0;
+}
+
+static struct inode *sdfat_build_inode(struct super_block *sb,
+ const FILE_ID_T *fid, loff_t i_pos) {
+ struct inode *inode;
+ int err;
+
+ inode = sdfat_iget(sb, i_pos);
+ if (inode)
+ goto out;
+ inode = new_inode(sb);
+ if (!inode) {
+ inode = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+ inode->i_ino = iunique(sb, SDFAT_ROOT_INO);
+ inode_set_iversion(inode, 1);
+ err = sdfat_fill_inode(inode, fid);
+ if (err) {
+ iput(inode);
+ inode = ERR_PTR(err);
+ goto out;
+ }
+ sdfat_attach(inode, i_pos);
+ insert_inode_hash(inode);
+out:
+ return inode;
+}
+
+static struct inode *sdfat_alloc_inode(struct super_block *sb)
+{
+ struct sdfat_inode_info *ei;
+
+ ei = kmem_cache_alloc(sdfat_inode_cachep, GFP_NOFS);
+ if (!ei)
+ return NULL;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+ init_rwsem(&ei->truncate_lock);
+#endif
+ return &ei->vfs_inode;
+}
+
+static void sdfat_destroy_inode(struct inode *inode)
+{
+ if (SDFAT_I(inode)->target) {
+ kfree(SDFAT_I(inode)->target);
+ SDFAT_I(inode)->target = NULL;
+ }
+
+ kmem_cache_free(sdfat_inode_cachep, SDFAT_I(inode));
+}
+
+static int __sdfat_write_inode(struct inode *inode, int sync)
+{
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ DIR_ENTRY_T info;
+
+ if (inode->i_ino == SDFAT_ROOT_INO)
+ return 0;
+
+ info.Attr = sdfat_make_attr(inode);
+ info.Size = i_size_read(inode);
+
+ sdfat_time_unix2fat(sbi, &inode->i_mtime, &info.ModifyTimestamp);
+ sdfat_time_unix2fat(sbi, &inode->i_ctime, &info.CreateTimestamp);
+ sdfat_time_unix2fat(sbi, &inode->i_atime, &info.AccessTimestamp);
+
+ if (!__support_write_inode_sync(sb))
+ sync = 0;
+
+ /* FIXME : Do we need handling error? */
+ return fsapi_write_inode(inode, &info, sync);
+}
+
+static int sdfat_sync_inode(struct inode *inode)
+{
+ return __sdfat_write_inode(inode, 1);
+}
+
+static int sdfat_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+ return __sdfat_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
+}
+
+static void sdfat_evict_inode(struct inode *inode)
+{
+ truncate_inode_pages(&inode->i_data, 0);
+
+ if (!inode->i_nlink) {
+ loff_t old_size = i_size_read(inode);
+
+ i_size_write(inode, 0);
+
+ SDFAT_I(inode)->fid.size = old_size;
+
+ __cancel_dfr_work(inode, 0, (loff_t)old_size, __func__);
+
+ /* TO CHECK evicting directory works correctly */
+ MMSG("%s: inode(%p) evict %s (size(%llu) to zero)\n",
+ __func__, inode,
+ S_ISDIR(inode->i_mode) ? "directory" : "file",
+ (u64)old_size);
+ fsapi_truncate(inode, old_size, 0);
+ }
+
+ invalidate_inode_buffers(inode);
+ clear_inode(inode);
+ fsapi_invalidate_extent(inode);
+ sdfat_detach(inode);
+
+ /* after end of this function, caller will remove inode hash */
+ /* remove_inode_hash(inode); */
+}
+
+
+
+static void sdfat_put_super(struct super_block *sb)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ int err;
+
+ sdfat_log_msg(sb, KERN_INFO, "trying to unmount...");
+
+ __cancel_delayed_work_sync(sbi);
+
+ if (__is_sb_dirty(sb))
+ sdfat_write_super(sb);
+
+ __free_dfr_mem_if_required(sb);
+ err = fsapi_umount(sb);
+
+ if (sbi->nls_disk) {
+ unload_nls(sbi->nls_disk);
+ sbi->nls_disk = NULL;
+ sbi->options.codepage = sdfat_default_codepage;
+ }
+ if (sbi->nls_io) {
+ unload_nls(sbi->nls_io);
+ sbi->nls_io = NULL;
+ }
+ if (sbi->options.iocharset != sdfat_default_iocharset) {
+ kfree(sbi->options.iocharset);
+ sbi->options.iocharset = sdfat_default_iocharset;
+ }
+
+ sb->s_fs_info = NULL;
+
+ kobject_del(&sbi->sb_kobj);
+ kobject_put(&sbi->sb_kobj);
+ if (!sbi->use_vmalloc)
+ kfree(sbi);
+ else
+ vfree(sbi);
+
+ sdfat_log_msg(sb, KERN_INFO, "unmounted successfully! %s",
+ err ? "(with previous I/O errors)" : "");
+}
+
+static inline void __flush_delayed_meta(struct super_block *sb, s32 sync)
+{
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ fsapi_cache_flush(sb, sync);
+#else
+ /* DO NOTHING */
+#endif
+}
+
+static void sdfat_write_super(struct super_block *sb)
+{
+ int time = 0;
+
+ __lock_super(sb);
+
+ __set_sb_clean(sb);
+
+#ifdef CONFIG_SDFAT_DFR
+ if (atomic_read(&(SDFAT_SB(sb)->dfr_info.stat)) == DFR_SB_STAT_VALID)
+ fsapi_dfr_update_fat_next(sb);
+#endif
+
+ /* flush delayed FAT/DIR dirty */
+ __flush_delayed_meta(sb, 0);
+
+ if (!(sb->s_flags & MS_RDONLY))
+ fsapi_sync_fs(sb, 0);
+
+ __unlock_super(sb);
+
+ time = jiffies;
+
+ /* Issuing bdev requests is needed
+ * to guarantee DIR updates in time
+ * whether w/ or w/o delayed DIR dirty feature.
+ * (otherwise DIR updates could be delayed for 5 + 5 secs at max.)
+ */
+ sync_blockdev(sb->s_bdev);
+
+#if (defined(CONFIG_SDFAT_DFR) && defined(CONFIG_SDFAT_DFR_DEBUG))
+ /* SPO test */
+ fsapi_dfr_spo_test(sb, DFR_SPO_FAT_NEXT, __func__);
+#endif
+ MMSG("BD: sdfat_write_super (bdev_sync for %ld ms)\n",
+ (jiffies - time) * 1000 / HZ);
+}
+
+
+static void __dfr_update_fat_next(struct super_block *sb)
+{
+#ifdef CONFIG_SDFAT_DFR
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ if (sbi->options.defrag &&
+ (atomic_read(&sbi->dfr_info.stat) == DFR_SB_STAT_VALID)) {
+ fsapi_dfr_update_fat_next(sb);
+ }
+#endif
+}
+
+static void __dfr_update_fat_prev(struct super_block *sb, int wait)
+{
+#ifdef CONFIG_SDFAT_DFR
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct defrag_info *sb_dfr = &sbi->dfr_info;
+ /* static time available? */
+ static int time; /* initialized by zero */
+ int uevent = 0, total = 0, clean = 0, full = 0;
+ int spent = jiffies - time;
+
+ if (!(sbi->options.defrag && wait))
+ return;
+
+ __lock_super(sb);
+ /* Update FAT for defrag */
+ if (atomic_read(&(sbi->dfr_info.stat)) == DFR_SB_STAT_VALID) {
+
+ fsapi_dfr_update_fat_prev(sb, 0);
+
+ /* flush delayed FAT/DIR dirty */
+ __flush_delayed_meta(sb, 0);
+
+ /* Complete defrag req */
+ fsapi_sync_fs(sb, 1);
+ atomic_set(&sb_dfr->stat, DFR_SB_STAT_REQ);
+ complete_all(&sbi->dfr_complete);
+ } else if (((spent < 0) || (spent > DFR_DEFAULT_TIMEOUT)) &&
+ (atomic_read(&(sbi->dfr_info.stat)) == DFR_SB_STAT_IDLE)) {
+ uevent = fsapi_dfr_check_dfr_required(sb, &total, &clean, &full);
+ time = jiffies;
+ }
+ __unlock_super(sb);
+
+ if (uevent) {
+ kobject_uevent(&SDFAT_SB(sb)->sb_kobj, KOBJ_CHANGE);
+ dfr_debug("uevent for defrag_daemon, total_au %d, "
+ "clean_au %d, full_au %d", total, clean, full);
+ }
+#endif
+}
+
+static int sdfat_sync_fs(struct super_block *sb, int wait)
+{
+ int err = 0;
+
+ /* If there are some dirty buffers in the bdev inode */
+ if (__is_sb_dirty(sb)) {
+ __lock_super(sb);
+ __set_sb_clean(sb);
+
+ __dfr_update_fat_next(sb);
+
+ err = fsapi_sync_fs(sb, 1);
+
+#if (defined(CONFIG_SDFAT_DFR) && defined(CONFIG_SDFAT_DFR_DEBUG))
+ /* SPO test */
+ fsapi_dfr_spo_test(sb, DFR_SPO_FAT_NEXT, __func__);
+#endif
+
+ __unlock_super(sb);
+ }
+
+ __dfr_update_fat_prev(sb, wait);
+
+ return err;
+}
+
+static int sdfat_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+ /*
+ * patch 1.2.2 :
+ * fixed the slow-call problem because of volume-lock contention.
+ */
+ struct super_block *sb = dentry->d_sb;
+ u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ VOL_INFO_T info;
+
+ /* fsapi_statfs will try to get a volume lock if needed */
+ if (fsapi_statfs(sb, &info))
+ return -EIO;
+
+ if (fsi->prev_eio)
+ sdfat_msg(sb, KERN_INFO, "called statfs with previous"
+ " I/O error(0x%02X).", fsi->prev_eio);
+
+ buf->f_type = sb->s_magic;
+ buf->f_bsize = info.ClusterSize;
+ buf->f_blocks = info.NumClusters;
+ buf->f_bfree = info.FreeClusters;
+ buf->f_bavail = info.FreeClusters;
+ buf->f_fsid.val[0] = (u32)id;
+ buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_namelen = 260;
+
+ return 0;
+}
+
+static int sdfat_remount(struct super_block *sb, int *flags, char *data)
+{
+ unsigned long prev_sb_flags;
+ char *orig_data = kstrdup(data, GFP_KERNEL);
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+
+ *flags |= MS_NODIRATIME;
+
+ prev_sb_flags = sb->s_flags;
+
+ sdfat_remount_syncfs(sb);
+
+ fsapi_set_vol_flags(sb, VOL_CLEAN, 1);
+
+ sdfat_log_msg(sb, KERN_INFO, "re-mounted(%s->%s), eio=0x%x, Opts: %s",
+ (prev_sb_flags & MS_RDONLY) ? "ro" : "rw",
+ (*flags & MS_RDONLY) ? "ro" : "rw",
+ fsi->prev_eio, orig_data);
+ kfree(orig_data);
+ return 0;
+}
+
+static int __sdfat_show_options(struct seq_file *m, struct super_block *sb)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct sdfat_mount_options *opts = &sbi->options;
+ FS_INFO_T *fsi = &(sbi->fsi);
+
+ /* Show partition info */
+ seq_printf(m, ",fs=%s", sdfat_get_vol_type_str(fsi->vol_type));
+ if (fsi->prev_eio)
+ seq_printf(m, ",eio=0x%x", fsi->prev_eio);
+ if (!uid_eq(opts->fs_uid, GLOBAL_ROOT_UID))
+ seq_printf(m, ",uid=%u",
+ from_kuid_munged(&init_user_ns, opts->fs_uid));
+ if (!gid_eq(opts->fs_gid, GLOBAL_ROOT_GID))
+ seq_printf(m, ",gid=%u",
+ from_kgid_munged(&init_user_ns, opts->fs_gid));
+ seq_printf(m, ",fmask=%04o", opts->fs_fmask);
+ seq_printf(m, ",dmask=%04o", opts->fs_dmask);
+ if (opts->allow_utime)
+ seq_printf(m, ",allow_utime=%04o", opts->allow_utime);
+ if (sbi->nls_disk)
+ seq_printf(m, ",codepage=%s", sbi->nls_disk->charset);
+ if (sbi->nls_io)
+ seq_printf(m, ",iocharset=%s", sbi->nls_io->charset);
+ if (opts->utf8)
+ seq_puts(m, ",utf8");
+ if (sbi->fsi.vol_type != EXFAT)
+ seq_puts(m, ",shortname=winnt");
+ seq_printf(m, ",namecase=%u", opts->casesensitive);
+ if (opts->tz_utc)
+ seq_puts(m, ",tz=UTC");
+ if (opts->improved_allocation & SDFAT_ALLOC_DELAY)
+ seq_puts(m, ",delay");
+ if (opts->improved_allocation & SDFAT_ALLOC_SMART)
+ seq_printf(m, ",smart,ausize=%u", opts->amap_opt.sect_per_au);
+ if (opts->defrag)
+ seq_puts(m, ",defrag");
+ if (opts->adj_hidsect)
+ seq_puts(m, ",adj_hid");
+ if (opts->adj_req)
+ seq_puts(m, ",adj_req");
+ seq_printf(m, ",symlink=%u", opts->symlink);
+ seq_printf(m, ",bps=%ld", sb->s_blocksize);
+ if (opts->errors == SDFAT_ERRORS_CONT)
+ seq_puts(m, ",errors=continue");
+ else if (opts->errors == SDFAT_ERRORS_PANIC)
+ seq_puts(m, ",errors=panic");
+ else
+ seq_puts(m, ",errors=remount-ro");
+ if (opts->discard)
+ seq_puts(m, ",discard");
+
+ return 0;
+}
+
+static const struct super_operations sdfat_sops = {
+ .alloc_inode = sdfat_alloc_inode,
+ .destroy_inode = sdfat_destroy_inode,
+ .write_inode = sdfat_write_inode,
+ .evict_inode = sdfat_evict_inode,
+ .put_super = sdfat_put_super,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+ .write_super = sdfat_write_super,
+#endif
+ .sync_fs = sdfat_sync_fs,
+ .statfs = sdfat_statfs,
+ .remount_fs = sdfat_remount,
+ .show_options = sdfat_show_options,
+};
+
+/*======================================================================*/
+/* SYSFS Operations */
+/*======================================================================*/
+#define SDFAT_ATTR(name, mode, show, store) \
+static struct sdfat_attr sdfat_attr_##name = __ATTR(name, mode, show, store)
+
+struct sdfat_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct sdfat_sb_info *, char *);
+ ssize_t (*store)(struct sdfat_sb_info *, const char *, size_t);
+};
+
+static ssize_t sdfat_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ struct sdfat_sb_info *sbi = container_of(kobj, struct sdfat_sb_info, sb_kobj);
+ struct sdfat_attr *a = container_of(attr, struct sdfat_attr, attr);
+
+ return a->show ? a->show(sbi, buf) : 0;
+}
+
+static ssize_t sdfat_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct sdfat_sb_info *sbi = container_of(kobj, struct sdfat_sb_info, sb_kobj);
+ struct sdfat_attr *a = container_of(attr, struct sdfat_attr, attr);
+
+ return a->store ? a->store(sbi, buf, len) : len;
+}
+
+static const struct sysfs_ops sdfat_attr_ops = {
+ .show = sdfat_attr_show,
+ .store = sdfat_attr_store,
+};
+
+
+static ssize_t type_show(struct sdfat_sb_info *sbi, char *buf)
+{
+ FS_INFO_T *fsi = &(sbi->fsi);
+
+ return snprintf(buf, PAGE_SIZE, "%s\n", sdfat_get_vol_type_str(fsi->vol_type));
+}
+SDFAT_ATTR(type, 0444, type_show, NULL);
+
+static ssize_t eio_show(struct sdfat_sb_info *sbi, char *buf)
+{
+ FS_INFO_T *fsi = &(sbi->fsi);
+
+ return snprintf(buf, PAGE_SIZE, "0x%x\n", fsi->prev_eio);
+}
+SDFAT_ATTR(eio, 0444, eio_show, NULL);
+
+static ssize_t fratio_show(struct sdfat_sb_info *sbi, char *buf)
+{
+ unsigned int n_total_au = 0;
+ unsigned int n_clean_au = 0;
+ unsigned int n_full_au = 0;
+ unsigned int n_dirty_au = 0;
+ unsigned int fr = 0;
+
+ n_total_au = fsapi_get_au_stat(sbi->host_sb, VOL_AU_STAT_TOTAL);
+ n_clean_au = fsapi_get_au_stat(sbi->host_sb, VOL_AU_STAT_CLEAN);
+ n_full_au = fsapi_get_au_stat(sbi->host_sb, VOL_AU_STAT_FULL);
+ n_dirty_au = n_total_au - (n_full_au + n_clean_au);
+
+ if (!n_dirty_au)
+ fr = 0;
+ else if (!n_clean_au)
+ fr = 100;
+ else
+ fr = (n_dirty_au * 100) / (n_clean_au + n_dirty_au);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", fr);
+}
+SDFAT_ATTR(fratio, 0444, fratio_show, NULL);
+
+static ssize_t totalau_show(struct sdfat_sb_info *sbi, char *buf)
+{
+ unsigned int n_au = 0;
+
+ n_au = fsapi_get_au_stat(sbi->host_sb, VOL_AU_STAT_TOTAL);
+ return snprintf(buf, PAGE_SIZE, "%u\n", n_au);
+}
+SDFAT_ATTR(totalau, 0444, totalau_show, NULL);
+
+static ssize_t cleanau_show(struct sdfat_sb_info *sbi, char *buf)
+{
+ unsigned int n_clean_au = 0;
+
+ n_clean_au = fsapi_get_au_stat(sbi->host_sb, VOL_AU_STAT_CLEAN);
+ return snprintf(buf, PAGE_SIZE, "%u\n", n_clean_au);
+}
+SDFAT_ATTR(cleanau, 0444, cleanau_show, NULL);
+
+static ssize_t fullau_show(struct sdfat_sb_info *sbi, char *buf)
+{
+ unsigned int n_full_au = 0;
+
+ n_full_au = fsapi_get_au_stat(sbi->host_sb, VOL_AU_STAT_FULL);
+ return snprintf(buf, PAGE_SIZE, "%u\n", n_full_au);
+}
+SDFAT_ATTR(fullau, 0444, fullau_show, NULL);
+
+static struct attribute *sdfat_attrs[] = {
+ &sdfat_attr_type.attr,
+ &sdfat_attr_eio.attr,
+ &sdfat_attr_fratio.attr,
+ &sdfat_attr_totalau.attr,
+ &sdfat_attr_cleanau.attr,
+ &sdfat_attr_fullau.attr,
+ NULL,
+};
+
+static struct kobj_type sdfat_ktype = {
+ .default_attrs = sdfat_attrs,
+ .sysfs_ops = &sdfat_attr_ops,
+};
+
+static ssize_t version_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ return snprintf(buff, PAGE_SIZE, "FS Version %s\n", SDFAT_VERSION);
+}
+
+static struct kobj_attribute version_attr = __ATTR_RO(version);
+
+static struct attribute *attributes[] = {
+ &version_attr.attr,
+ NULL,
+};
+
+static struct attribute_group attr_group = {
+ .attrs = attributes,
+};
+
+/*======================================================================*/
+/* Super Block Read Operations */
+/*======================================================================*/
+
+enum {
+ Opt_uid,
+ Opt_gid,
+ Opt_umask,
+ Opt_dmask,
+ Opt_fmask,
+ Opt_allow_utime,
+ Opt_codepage,
+ Opt_charset,
+ Opt_utf8,
+ Opt_namecase,
+ Opt_tz_utc,
+ Opt_adj_hidsect,
+ Opt_delay,
+ Opt_smart,
+ Opt_ausize,
+ Opt_packing,
+ Opt_defrag,
+ Opt_symlink,
+ Opt_debug,
+ Opt_err_cont,
+ Opt_err_panic,
+ Opt_err_ro,
+ Opt_err,
+ Opt_discard,
+ Opt_fs,
+ Opt_adj_req,
+#ifdef CONFIG_SDFAT_USE_FOR_VFAT
+ Opt_shortname_lower,
+ Opt_shortname_win95,
+ Opt_shortname_winnt,
+ Opt_shortname_mixed,
+#endif /* CONFIG_SDFAT_USE_FOR_VFAT */
+};
+
+static const match_table_t sdfat_tokens = {
+ {Opt_uid, "uid=%u"},
+ {Opt_gid, "gid=%u"},
+ {Opt_umask, "umask=%o"},
+ {Opt_dmask, "dmask=%o"},
+ {Opt_fmask, "fmask=%o"},
+ {Opt_allow_utime, "allow_utime=%o"},
+ {Opt_codepage, "codepage=%u"},
+ {Opt_charset, "iocharset=%s"},
+ {Opt_utf8, "utf8"},
+ {Opt_namecase, "namecase=%u"},
+ {Opt_tz_utc, "tz=UTC"},
+ {Opt_adj_hidsect, "adj_hid"},
+ {Opt_delay, "delay"},
+ {Opt_smart, "smart"},
+ {Opt_ausize, "ausize=%u"},
+ {Opt_packing, "packing=%u"},
+ {Opt_defrag, "defrag"},
+ {Opt_symlink, "symlink=%u"},
+ {Opt_debug, "debug"},
+ {Opt_err_cont, "errors=continue"},
+ {Opt_err_panic, "errors=panic"},
+ {Opt_err_ro, "errors=remount-ro"},
+ {Opt_discard, "discard"},
+ {Opt_fs, "fs=%s"},
+ {Opt_adj_req, "adj_req"},
+#ifdef CONFIG_SDFAT_USE_FOR_VFAT
+ {Opt_shortname_lower, "shortname=lower"},
+ {Opt_shortname_win95, "shortname=win95"},
+ {Opt_shortname_winnt, "shortname=winnt"},
+ {Opt_shortname_mixed, "shortname=mixed"},
+#endif /* CONFIG_SDFAT_USE_FOR_VFAT */
+ {Opt_err, NULL}
+};
+
+static int parse_options(struct super_block *sb, char *options, int silent,
+ int *debug, struct sdfat_mount_options *opts)
+{
+ char *p;
+ substring_t args[MAX_OPT_ARGS];
+ int option, i;
+ char *tmpstr;
+
+ opts->fs_uid = current_uid();
+ opts->fs_gid = current_gid();
+ opts->fs_fmask = opts->fs_dmask = current->fs->umask;
+ opts->allow_utime = (unsigned short) -1;
+ opts->codepage = sdfat_default_codepage;
+ opts->iocharset = sdfat_default_iocharset;
+ opts->casesensitive = 0;
+ opts->utf8 = 0;
+ opts->adj_hidsect = 0;
+ opts->tz_utc = 0;
+ opts->improved_allocation = 0;
+ opts->amap_opt.pack_ratio = 0; // Default packing
+ opts->amap_opt.sect_per_au = 0;
+ opts->amap_opt.misaligned_sect = 0;
+ opts->symlink = 0;
+ opts->errors = SDFAT_ERRORS_RO;
+ opts->discard = 0;
+ *debug = 0;
+
+ if (!options)
+ goto out;
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token;
+
+ if (!*p)
+ continue;
+ token = match_token(p, sdfat_tokens, args);
+ switch (token) {
+ case Opt_uid:
+ if (match_int(&args[0], &option))
+ return 0;
+ opts->fs_uid = make_kuid(current_user_ns(), option);
+ break;
+ case Opt_gid:
+ if (match_int(&args[0], &option))
+ return 0;
+ opts->fs_gid = make_kgid(current_user_ns(), option);
+ break;
+ case Opt_umask:
+ case Opt_dmask:
+ case Opt_fmask:
+ if (match_octal(&args[0], &option))
+ return 0;
+ if (token != Opt_dmask)
+ opts->fs_fmask = option;
+ if (token != Opt_fmask)
+ opts->fs_dmask = option;
+ break;
+ case Opt_allow_utime:
+ if (match_octal(&args[0], &option))
+ return 0;
+ opts->allow_utime = option & (S_IWGRP | S_IWOTH);
+ break;
+ case Opt_codepage:
+ if (match_int(&args[0], &option))
+ return 0;
+ opts->codepage = option;
+ break;
+ case Opt_charset:
+ if (opts->iocharset != sdfat_default_iocharset)
+ kfree(opts->iocharset);
+ tmpstr = match_strdup(&args[0]);
+ if (!tmpstr)
+ return -ENOMEM;
+ opts->iocharset = tmpstr;
+ break;
+ case Opt_namecase:
+ if (match_int(&args[0], &option))
+ return 0;
+ opts->casesensitive = (option > 0) ? 1:0;
+ break;
+ case Opt_utf8:
+ opts->utf8 = 1;
+ break;
+ case Opt_adj_hidsect:
+ opts->adj_hidsect = 1;
+ break;
+ case Opt_tz_utc:
+ opts->tz_utc = 1;
+ break;
+ case Opt_symlink:
+ if (match_int(&args[0], &option))
+ return 0;
+ opts->symlink = option > 0 ? 1 : 0;
+ break;
+ case Opt_delay:
+ opts->improved_allocation |= SDFAT_ALLOC_DELAY;
+ break;
+ case Opt_smart:
+ opts->improved_allocation |= SDFAT_ALLOC_SMART;
+ break;
+ case Opt_ausize:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ if (!is_power_of_2(option))
+ return -EINVAL;
+ opts->amap_opt.sect_per_au = option;
+ IMSG("set AU size by option : %u sectors\n", option);
+ break;
+ case Opt_packing:
+ if (match_int(&args[0], &option))
+ return 0;
+ opts->amap_opt.pack_ratio = option;
+ break;
+ case Opt_defrag:
+#ifdef CONFIG_SDFAT_DFR
+ opts->defrag = 1;
+#else
+ IMSG("defragmentation config is not enabled. ignore\n");
+#endif
+ break;
+ case Opt_err_cont:
+ opts->errors = SDFAT_ERRORS_CONT;
+ break;
+ case Opt_err_panic:
+ opts->errors = SDFAT_ERRORS_PANIC;
+ break;
+ case Opt_err_ro:
+ opts->errors = SDFAT_ERRORS_RO;
+ break;
+ case Opt_debug:
+ *debug = 1;
+ break;
+ case Opt_discard:
+ opts->discard = 1;
+ break;
+ case Opt_fs:
+ tmpstr = match_strdup(&args[0]);
+ if (!tmpstr)
+ return -ENOMEM;
+ for (i = 0; i < FS_TYPE_MAX; i++) {
+ if (!strcmp(tmpstr, FS_TYPE_STR[i])) {
+ opts->fs_type = (unsigned char)i;
+ sdfat_log_msg(sb, KERN_ERR,
+ "set fs-type by option : %s",
+ FS_TYPE_STR[i]);
+ break;
+ }
+ }
+ kfree(tmpstr);
+ if (i == FS_TYPE_MAX) {
+ sdfat_log_msg(sb, KERN_ERR,
+ "invalid fs-type, "
+ "only allow auto, exfat, vfat");
+ return -EINVAL;
+ }
+ break;
+ case Opt_adj_req:
+#ifdef CONFIG_SDFAT_ALIGNED_MPAGE_WRITE
+ opts->adj_req = 1;
+#else
+ IMSG("adjust request config is not enabled. ignore\n");
+#endif
+ break;
+#ifdef CONFIG_SDFAT_USE_FOR_VFAT
+ case Opt_shortname_lower:
+ case Opt_shortname_win95:
+ case Opt_shortname_mixed:
+ pr_warn("[SDFAT] DRAGONS AHEAD! sdFAT only understands \"shortname=winnt\"!\n");
+ case Opt_shortname_winnt:
+ break;
+#endif /* CONFIG_SDFAT_USE_FOR_VFAT */
+ default:
+ if (!silent) {
+ sdfat_msg(sb, KERN_ERR,
+ "unrecognized mount option \"%s\" "
+ "or missing value", p);
+ }
+ return -EINVAL;
+ }
+ }
+
+out:
+ if (opts->allow_utime == (unsigned short) -1)
+ opts->allow_utime = ~opts->fs_dmask & (S_IWGRP | S_IWOTH);
+
+ if (opts->utf8 && strcmp(opts->iocharset, sdfat_iocharset_with_utf8)) {
+ sdfat_msg(sb, KERN_WARNING,
+ "utf8 enabled, \"iocharset=%s\" is recommended",
+ sdfat_iocharset_with_utf8);
+ }
+
+ if (opts->discard) {
+ struct request_queue *q = bdev_get_queue(sb->s_bdev);
+
+ if (!blk_queue_discard(q))
+ sdfat_msg(sb, KERN_WARNING,
+ "mounting with \"discard\" option, but "
+ "the device does not support discard");
+ opts->discard = 0;
+ }
+
+ return 0;
+}
+
+static void sdfat_hash_init(struct super_block *sb)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ int i;
+
+ spin_lock_init(&sbi->inode_hash_lock);
+ for (i = 0; i < SDFAT_HASH_SIZE; i++)
+ INIT_HLIST_HEAD(&sbi->inode_hashtable[i]);
+}
+
+static int sdfat_read_root(struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ sdfat_timespec_t ts;
+ FS_INFO_T *fsi = &(sbi->fsi);
+ DIR_ENTRY_T info;
+
+ ts = CURRENT_TIME_SEC;
+
+ SDFAT_I(inode)->fid.dir.dir = fsi->root_dir;
+ SDFAT_I(inode)->fid.dir.flags = 0x01;
+ SDFAT_I(inode)->fid.entry = -1;
+ SDFAT_I(inode)->fid.start_clu = fsi->root_dir;
+ SDFAT_I(inode)->fid.flags = 0x01;
+ SDFAT_I(inode)->fid.type = TYPE_DIR;
+ SDFAT_I(inode)->fid.version = 0;
+ SDFAT_I(inode)->fid.rwoffset = 0;
+ SDFAT_I(inode)->fid.hint_bmap.off = CLUS_EOF;
+ SDFAT_I(inode)->fid.hint_stat.eidx = 0;
+ SDFAT_I(inode)->fid.hint_stat.clu = fsi->root_dir;
+ SDFAT_I(inode)->fid.hint_femp.eidx = -1;
+
+ SDFAT_I(inode)->target = NULL;
+
+ if (fsapi_read_inode(inode, &info) < 0)
+ return -EIO;
+
+ inode->i_uid = sbi->options.fs_uid;
+ inode->i_gid = sbi->options.fs_gid;
+ inode_inc_iversion(inode);
+ inode->i_generation = 0;
+ inode->i_mode = sdfat_make_mode(sbi, ATTR_SUBDIR, S_IRWXUGO);
+ inode->i_op = &sdfat_dir_inode_operations;
+ inode->i_fop = &sdfat_dir_operations;
+
+ i_size_write(inode, info.Size);
+ SDFAT_I(inode)->fid.size = info.Size;
+ inode->i_blocks = ((i_size_read(inode) + (fsi->cluster_size - 1))
+ & ~((loff_t)fsi->cluster_size - 1)) >> inode->i_blkbits;
+ SDFAT_I(inode)->i_pos = ((loff_t) fsi->root_dir << 32) | 0xffffffff;
+ SDFAT_I(inode)->i_size_aligned = i_size_read(inode);
+ SDFAT_I(inode)->i_size_ondisk = i_size_read(inode);
+
+ sdfat_save_attr(inode, ATTR_SUBDIR);
+ inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
+ set_nlink(inode, info.NumSubdirs + 2);
+ return 0;
+}
+
+
+
+static void setup_dops(struct super_block *sb)
+{
+ if (SDFAT_SB(sb)->options.casesensitive == 0)
+ sb->s_d_op = &sdfat_ci_dentry_ops;
+ else
+ sb->s_d_op = &sdfat_dentry_ops;
+}
+
+static int sdfat_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct inode *root_inode = NULL;
+ struct sdfat_sb_info *sbi;
+ int debug;
+ int err;
+ char buf[50];
+ struct block_device *bdev = sb->s_bdev;
+ dev_t bd_dev = bdev ? bdev->bd_dev : 0;
+
+ sdfat_log_msg(sb, KERN_INFO, "trying to mount...");
+
+ /*
+ * GFP_KERNEL is ok here, because while we do hold the
+ * supeblock lock, memory pressure can't call back into
+ * the filesystem, since we're only just about to mount
+ * it and have no inodes etc active!
+ */
+ sbi = kzalloc(sizeof(struct sdfat_sb_info), GFP_KERNEL);
+ if (!sbi) {
+ sdfat_log_msg(sb, KERN_INFO,
+ "trying to alloc sbi with vzalloc()");
+ sbi = vzalloc(sizeof(struct sdfat_sb_info));
+ if (!sbi) {
+ sdfat_log_msg(sb, KERN_ERR, "failed to mount! (ENOMEM)");
+ return -ENOMEM;
+ }
+ sbi->use_vmalloc = 1;
+ }
+
+ mutex_init(&sbi->s_vlock);
+ sb->s_fs_info = sbi;
+ sb->s_flags |= MS_NODIRATIME;
+ sb->s_magic = SDFAT_SUPER_MAGIC;
+ sb->s_op = &sdfat_sops;
+ ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+ err = parse_options(sb, data, silent, &debug, &sbi->options);
+ if (err) {
+ sdfat_log_msg(sb, KERN_ERR, "failed to parse options");
+ goto failed_mount;
+ }
+
+ setup_sdfat_xattr_handler(sb);
+ setup_sdfat_sync_super_wq(sb);
+ setup_dops(sb);
+
+ err = fsapi_mount(sb);
+ if (err) {
+ sdfat_log_msg(sb, KERN_ERR, "failed to recognize fat type");
+ goto failed_mount;
+ }
+
+ /* set up enough so that it can read an inode */
+ sdfat_hash_init(sb);
+
+ /*
+ * The low byte of FAT's first entry must have same value with
+ * media-field. But in real world, too many devices is
+ * writing wrong value. So, removed that validity check.
+ *
+ * if (FAT_FIRST_ENT(sb, media) != first)
+ */
+
+ err = -EINVAL;
+ sprintf(buf, "cp%d", sbi->options.codepage);
+ sbi->nls_disk = load_nls(buf);
+ if (!sbi->nls_disk) {
+ sdfat_log_msg(sb, KERN_ERR, "codepage %s not found", buf);
+ goto failed_mount2;
+ }
+
+ sbi->nls_io = load_nls(sbi->options.iocharset);
+ if (!sbi->nls_io) {
+ sdfat_log_msg(sb, KERN_ERR, "IO charset %s not found",
+ sbi->options.iocharset);
+ goto failed_mount2;
+ }
+
+ err = __alloc_dfr_mem_if_required(sb);
+ if (err) {
+ sdfat_log_msg(sb, KERN_ERR, "failed to initialize a memory for "
+ "defragmentation");
+ goto failed_mount3;
+ }
+
+ err = -ENOMEM;
+ root_inode = new_inode(sb);
+ if (!root_inode) {
+ sdfat_log_msg(sb, KERN_ERR, "failed to allocate root inode.");
+ goto failed_mount3;
+ }
+
+ root_inode->i_ino = SDFAT_ROOT_INO;
+ inode_set_iversion(root_inode, 1);
+
+ err = sdfat_read_root(root_inode);
+ if (err) {
+ sdfat_log_msg(sb, KERN_ERR, "failed to initialize root inode.");
+ goto failed_mount3;
+ }
+
+ sdfat_attach(root_inode, SDFAT_I(root_inode)->i_pos);
+ insert_inode_hash(root_inode);
+
+ err = -ENOMEM;
+ sb->s_root = __d_make_root(root_inode);
+ if (!sb->s_root) {
+ sdfat_msg(sb, KERN_ERR, "failed to get the root dentry");
+ goto failed_mount3;
+ }
+
+ /*
+ * Initialize filesystem attributes (for sysfs)
+ * ex: /sys/fs/sdfat/mmcblk1[179:17]
+ */
+ sbi->sb_kobj.kset = sdfat_kset;
+ err = kobject_init_and_add(&sbi->sb_kobj, &sdfat_ktype, NULL,
+ "%s[%d:%d]", sb->s_id, MAJOR(bd_dev), MINOR(bd_dev));
+ if (err) {
+ sdfat_msg(sb, KERN_ERR, "Unable to create sdfat attributes for"
+ " %s[%d:%d](%d)", sb->s_id,
+ MAJOR(bd_dev), MINOR(bd_dev), err);
+ goto failed_mount3;
+ }
+
+ sdfat_log_msg(sb, KERN_INFO, "mounted successfully!");
+ /* FOR BIGDATA */
+ sdfat_statistics_set_mnt(&sbi->fsi);
+ sdfat_statistics_set_vol_size(sb);
+ return 0;
+
+failed_mount3:
+ __free_dfr_mem_if_required(sb);
+failed_mount2:
+ fsapi_umount(sb);
+failed_mount:
+ sdfat_log_msg(sb, KERN_INFO, "failed to mount! (%d)", err);
+
+ if (root_inode)
+ iput(root_inode);
+ sb->s_root = NULL;
+
+ if (sbi->nls_io)
+ unload_nls(sbi->nls_io);
+ if (sbi->nls_disk)
+ unload_nls(sbi->nls_disk);
+ if (sbi->options.iocharset != sdfat_default_iocharset)
+ kfree(sbi->options.iocharset);
+ sb->s_fs_info = NULL;
+ if (!sbi->use_vmalloc)
+ kfree(sbi);
+ else
+ vfree(sbi);
+ return err;
+}
+
+static struct dentry *sdfat_fs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data) {
+ return mount_bdev(fs_type, flags, dev_name, data, sdfat_fill_super);
+}
+
+static void init_once(void *foo)
+{
+ struct sdfat_inode_info *ei = (struct sdfat_inode_info *)foo;
+
+ INIT_HLIST_NODE(&ei->i_hash_fat);
+ inode_init_once(&ei->vfs_inode);
+}
+
+static int __init sdfat_init_inodecache(void)
+{
+ sdfat_inode_cachep = kmem_cache_create("sdfat_inode_cache",
+ sizeof(struct sdfat_inode_info),
+ 0, (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
+ init_once);
+ if (!sdfat_inode_cachep)
+ return -ENOMEM;
+ return 0;
+}
+
+static void sdfat_destroy_inodecache(void)
+{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
+ kmem_cache_destroy(sdfat_inode_cachep);
+}
+
+#ifdef CONFIG_SDFAT_DBG_IOCTL
+static void sdfat_debug_kill_sb(struct super_block *sb)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct block_device *bdev = sb->s_bdev;
+
+ long flags;
+
+ if (sbi) {
+ flags = sbi->debug_flags;
+
+ if (flags & SDFAT_DEBUGFLAGS_INVALID_UMOUNT) {
+ /* invalidate_bdev drops all device cache include dirty.
+ * we use this to simulate device removal
+ */
+ fsapi_cache_release(sb);
+ invalidate_bdev(bdev);
+ }
+ }
+
+ kill_block_super(sb);
+}
+#endif /* CONFIG_SDFAT_DBG_IOCTL */
+
+static struct file_system_type sdfat_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "sdfat",
+ .mount = sdfat_fs_mount,
+#ifdef CONFIG_SDFAT_DBG_IOCTL
+ .kill_sb = sdfat_debug_kill_sb,
+#else
+ .kill_sb = kill_block_super,
+#endif /* CONFIG_SDFAT_DBG_IOCTL */
+ .fs_flags = FS_REQUIRES_DEV,
+};
+MODULE_ALIAS_FS("sdfat");
+
+#ifdef CONFIG_SDFAT_USE_FOR_EXFAT
+static struct file_system_type exfat_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "exfat",
+ .mount = sdfat_fs_mount,
+#ifdef CONFIG_SDFAT_DBG_IOCTL
+ .kill_sb = sdfat_debug_kill_sb,
+#else
+ .kill_sb = kill_block_super,
+#endif /* CONFIG_SDFAT_DBG_IOCTL */
+ .fs_flags = FS_REQUIRES_DEV,
+};
+MODULE_ALIAS_FS("exfat");
+#endif /* CONFIG_SDFAT_USE_FOR_EXFAT */
+
+#ifdef CONFIG_SDFAT_USE_FOR_VFAT
+static struct file_system_type vfat_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "vfat",
+ .mount = sdfat_fs_mount,
+#ifdef CONFIG_SDFAT_DBG_IOCTL
+ .kill_sb = sdfat_debug_kill_sb,
+#else
+ .kill_sb = kill_block_super,
+#endif /* CONFIG_SDFAT_DBG_IOCTL */
+ .fs_flags = FS_REQUIRES_DEV,
+};
+MODULE_ALIAS_FS("vfat");
+#endif /* CONFIG_SDFAT_USE_FOR_VFAT */
+
+static int __init init_sdfat_fs(void)
+{
+ int err;
+
+ sdfat_log_version();
+ err = fsapi_init();
+ if (err)
+ goto error;
+
+ sdfat_kset = kset_create_and_add("sdfat", NULL, fs_kobj);
+ if (!sdfat_kset) {
+ pr_err("[SDFAT] failed to create sdfat kset\n");
+ err = -ENOMEM;
+ goto error;
+ }
+
+ err = sysfs_create_group(&sdfat_kset->kobj, &attr_group);
+ if (err) {
+ pr_err("[SDFAT] failed to create sdfat version attributes\n");
+ goto error;
+ }
+
+ err = sdfat_statistics_init(sdfat_kset);
+ if (err)
+ goto error;
+
+ err = sdfat_uevent_init(sdfat_kset);
+ if (err)
+ goto error;
+
+ err = sdfat_init_inodecache();
+ if (err) {
+ pr_err("[SDFAT] failed to initialize inode cache\n");
+ goto error;
+ }
+
+ err = register_filesystem(&sdfat_fs_type);
+ if (err) {
+ pr_err("[SDFAT] failed to register filesystem\n");
+ goto error;
+ }
+
+#ifdef CONFIG_SDFAT_USE_FOR_EXFAT
+ err = register_filesystem(&exfat_fs_type);
+ if (err) {
+ pr_err("[SDFAT] failed to register for exfat filesystem\n");
+ goto error;
+ }
+#endif /* CONFIG_SDFAT_USE_FOR_EXFAT */
+
+#ifdef CONFIG_SDFAT_USE_FOR_VFAT
+ err = register_filesystem(&vfat_fs_type);
+ if (err) {
+ pr_err("[SDFAT] failed to register for vfat filesystem\n");
+ goto error;
+ }
+#endif /* CONFIG_SDFAT_USE_FOR_VFAT */
+
+ return 0;
+error:
+ sdfat_uevent_uninit();
+ sdfat_statistics_uninit();
+
+ if (sdfat_kset) {
+ sysfs_remove_group(&sdfat_kset->kobj, &attr_group);
+ kset_unregister(sdfat_kset);
+ sdfat_kset = NULL;
+ }
+
+ sdfat_destroy_inodecache();
+ fsapi_shutdown();
+
+ pr_err("[SDFAT] failed to initialize FS driver(err:%d)\n", err);
+ return err;
+}
+
+static void __exit exit_sdfat_fs(void)
+{
+ sdfat_uevent_uninit();
+ sdfat_statistics_uninit();
+
+ if (sdfat_kset) {
+ sysfs_remove_group(&sdfat_kset->kobj, &attr_group);
+ kset_unregister(sdfat_kset);
+ sdfat_kset = NULL;
+ }
+
+ sdfat_destroy_inodecache();
+ unregister_filesystem(&sdfat_fs_type);
+
+#ifdef CONFIG_SDFAT_USE_FOR_EXFAT
+ unregister_filesystem(&exfat_fs_type);
+#endif /* CONFIG_SDFAT_USE_FOR_EXFAT */
+#ifdef CONFIG_SDFAT_USE_FOR_VFAT
+ unregister_filesystem(&vfat_fs_type);
+#endif /* CONFIG_SDFAT_USE_FOR_VFAT */
+ fsapi_shutdown();
+}
+
+module_init(init_sdfat_fs);
+module_exit(exit_sdfat_fs);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("FAT/exFAT filesystem support");
+MODULE_AUTHOR("Samsung Electronics Co., Ltd.");
+
diff --git a/fs/sdfat/sdfat.h b/fs/sdfat/sdfat.h
new file mode 100644
index 000000000000..60f7811c7b99
--- /dev/null
+++ b/fs/sdfat/sdfat.h
@@ -0,0 +1,528 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SDFAT_H
+#define _SDFAT_H
+
+#include <linux/buffer_head.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/nls.h>
+#include <linux/fs.h>
+#include <linux/mutex.h>
+#include <linux/ratelimit.h>
+#include <linux/version.h>
+#include <linux/kobject.h>
+#include "api.h"
+
+#ifdef CONFIG_SDFAT_DFR
+#include "dfr.h"
+#endif
+
+/*
+ * sdfat error flags
+ */
+#define SDFAT_ERRORS_CONT (1) /* ignore error and continue */
+#define SDFAT_ERRORS_PANIC (2) /* panic on error */
+#define SDFAT_ERRORS_RO (3) /* remount r/o on error */
+
+/*
+ * sdfat allocator flags
+ */
+#define SDFAT_ALLOC_DELAY (1) /* Delayed allocation */
+#define SDFAT_ALLOC_SMART (2) /* Smart allocation */
+
+/*
+ * sdfat allocator destination for smart allocation
+ */
+#define ALLOC_NOWHERE (0)
+#define ALLOC_COLD (1)
+#define ALLOC_HOT (16)
+#define ALLOC_COLD_ALIGNED (1)
+#define ALLOC_COLD_PACKING (2)
+#define ALLOC_COLD_SEQ (4)
+
+/*
+ * sdfat nls lossy flag
+ */
+#define NLS_NAME_NO_LOSSY (0x00) /* no lossy */
+#define NLS_NAME_LOSSY (0x01) /* just detected incorrect filename(s) */
+#define NLS_NAME_OVERLEN (0x02) /* the length is over than its limit */
+
+/*
+ * sdfat common MACRO
+ */
+#define CLUSTER_16(x) ((u16)((x) & 0xFFFFU))
+#define CLUSTER_32(x) ((u32)((x) & 0xFFFFFFFFU))
+#define CLUS_EOF CLUSTER_32(~0)
+#define CLUS_BAD (0xFFFFFFF7U)
+#define CLUS_FREE (0)
+#define CLUS_BASE (2)
+#define IS_CLUS_EOF(x) ((x) == CLUS_EOF)
+#define IS_CLUS_BAD(x) ((x) == CLUS_BAD)
+#define IS_CLUS_FREE(x) ((x) == CLUS_FREE)
+#define IS_LAST_SECT_IN_CLUS(fsi, sec) \
+ ((((sec) - (fsi)->data_start_sector + 1) \
+ & ((1 << (fsi)->sect_per_clus_bits) - 1)) == 0)
+
+#define CLUS_TO_SECT(fsi, x) \
+ ((((unsigned long long)(x) - CLUS_BASE) << (fsi)->sect_per_clus_bits) + (fsi)->data_start_sector)
+
+#define SECT_TO_CLUS(fsi, sec) \
+ ((u32)((((sec) - (fsi)->data_start_sector) >> (fsi)->sect_per_clus_bits) + CLUS_BASE))
+
+/* variables defined at sdfat.c */
+extern const char *FS_TYPE_STR[];
+
+enum {
+ FS_TYPE_AUTO,
+ FS_TYPE_EXFAT,
+ FS_TYPE_VFAT,
+ FS_TYPE_MAX
+};
+
+/*
+ * sdfat mount in-memory data
+ */
+struct sdfat_mount_options {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
+ kuid_t fs_uid;
+ kgid_t fs_gid;
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) */
+ uid_t fs_uid;
+ gid_t fs_gid;
+#endif
+ unsigned short fs_fmask;
+ unsigned short fs_dmask;
+ unsigned short allow_utime; /* permission for setting the [am]time */
+ unsigned short codepage; /* codepage for shortname conversions */
+ char *iocharset; /* charset for filename input/display */
+ struct {
+ unsigned int pack_ratio;
+ unsigned int sect_per_au;
+ unsigned int misaligned_sect;
+ } amap_opt; /* AMAP-related options (see amap.c) */
+
+ unsigned char utf8;
+ unsigned char casesensitive;
+ unsigned char adj_hidsect;
+ unsigned char tz_utc;
+ unsigned char improved_allocation;
+ unsigned char defrag;
+ unsigned char symlink; /* support symlink operation */
+ unsigned char errors; /* on error: continue, panic, remount-ro */
+ unsigned char discard; /* flag on if -o dicard specified and device support discard() */
+ unsigned char fs_type; /* fs_type that user specified */
+ unsigned short adj_req; /* support aligned mpage write */
+};
+
+#define SDFAT_HASH_BITS 8
+#define SDFAT_HASH_SIZE (1UL << SDFAT_HASH_BITS)
+
+/*
+ * SDFAT file system superblock in-memory data
+ */
+struct sdfat_sb_info {
+ FS_INFO_T fsi; /* private filesystem info */
+
+ struct mutex s_vlock; /* volume lock */
+ int use_vmalloc;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+ int s_dirt;
+ struct mutex s_lock; /* superblock lock */
+ int write_super_queued; /* Write_super work is pending? */
+ struct delayed_work write_super_work; /* Work_queue data structrue for write_super() */
+ spinlock_t work_lock; /* Lock for WQ */
+#endif
+ struct super_block *host_sb; /* sb pointer */
+ struct sdfat_mount_options options;
+ struct nls_table *nls_disk; /* Codepage used on disk */
+ struct nls_table *nls_io; /* Charset used for input and display */
+ struct ratelimit_state ratelimit;
+
+ spinlock_t inode_hash_lock;
+ struct hlist_head inode_hashtable[SDFAT_HASH_SIZE];
+ struct kobject sb_kobj;
+#ifdef CONFIG_SDFAT_DBG_IOCTL
+ long debug_flags;
+#endif /* CONFIG_SDFAT_DBG_IOCTL */
+
+#ifdef CONFIG_SDFAT_DFR
+ struct defrag_info dfr_info;
+ struct completion dfr_complete;
+ unsigned int *dfr_new_clus;
+ int dfr_new_idx;
+ unsigned int *dfr_page_wb;
+ void **dfr_pagep;
+ unsigned int dfr_hint_clus;
+ unsigned int dfr_hint_idx;
+ int dfr_reserved_clus;
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+ int dfr_spo_flag;
+#endif /* CONFIG_SDFAT_DFR_DEBUG */
+
+#endif /* CONFIG_SDFAT_DFR */
+
+#ifdef CONFIG_SDFAT_TRACE_IO
+ /* Statistics for allocator */
+ unsigned int stat_n_pages_written; /* # of written pages in total */
+ unsigned int stat_n_pages_added; /* # of added blocks in total */
+ unsigned int stat_n_bdev_pages_written; /* # of written pages owned by bdev inode */
+ unsigned int stat_n_pages_confused;
+#endif
+ atomic_t stat_n_pages_queued; /* # of pages in the request queue (approx.) */
+};
+
+/*
+ * SDFAT file system inode in-memory data
+ */
+struct sdfat_inode_info {
+ FILE_ID_T fid;
+ char *target;
+ /* NOTE: i_size_ondisk is 64bits, so must hold ->inode_lock to access */
+ loff_t i_size_ondisk; /* physically allocated size */
+ loff_t i_size_aligned; /* block-aligned i_size (used in cont_write_begin) */
+ loff_t i_pos; /* on-disk position of directory entry or 0 */
+ struct hlist_node i_hash_fat; /* hash by i_location */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+ struct rw_semaphore truncate_lock; /* protect bmap against truncate */
+#endif
+#ifdef CONFIG_SDFAT_DFR
+ struct defrag_info dfr_info;
+#endif
+ struct inode vfs_inode;
+};
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0)
+typedef struct timespec64 sdfat_timespec_t;
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 18, 0) */
+typedef struct timespec sdfat_timespec_t;
+#endif
+
+/*
+ * FIXME : needs on-disk-slot in-memory data
+ */
+
+/* static inline functons */
+static inline const char *sdfat_get_vol_type_str(unsigned int type)
+{
+ if (type == EXFAT)
+ return "exfat";
+ else if (type == FAT32)
+ return "vfat:32";
+ else if (type == FAT16)
+ return "vfat:16";
+ else if (type == FAT12)
+ return "vfat:12";
+
+ return "unknown";
+}
+
+static inline struct sdfat_sb_info *SDFAT_SB(struct super_block *sb)
+{
+ return (struct sdfat_sb_info *)sb->s_fs_info;
+}
+
+static inline struct sdfat_inode_info *SDFAT_I(struct inode *inode)
+{
+ return container_of(inode, struct sdfat_inode_info, vfs_inode);
+}
+
+/*
+ * If ->i_mode can't hold S_IWUGO (i.e. ATTR_RO), we use ->i_attrs to
+ * save ATTR_RO instead of ->i_mode.
+ *
+ * If it's directory and !sbi->options.rodir, ATTR_RO isn't read-only
+ * bit, it's just used as flag for app.
+ */
+static inline int sdfat_mode_can_hold_ro(struct inode *inode)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(inode->i_sb);
+
+ if (S_ISDIR(inode->i_mode))
+ return 0;
+
+ if ((~sbi->options.fs_fmask) & S_IWUGO)
+ return 1;
+ return 0;
+}
+
+/*
+ * FIXME : needs to check symlink option.
+ */
+/* Convert attribute bits and a mask to the UNIX mode. */
+static inline mode_t sdfat_make_mode(struct sdfat_sb_info *sbi,
+ u32 attr, mode_t mode)
+{
+ if ((attr & ATTR_READONLY) && !(attr & ATTR_SUBDIR))
+ mode &= ~S_IWUGO;
+
+ if (attr & ATTR_SUBDIR)
+ return (mode & ~sbi->options.fs_dmask) | S_IFDIR;
+ else if (attr & ATTR_SYMLINK)
+ return (mode & ~sbi->options.fs_dmask) | S_IFLNK;
+ else
+ return (mode & ~sbi->options.fs_fmask) | S_IFREG;
+}
+
+/* Return the FAT attribute byte for this inode */
+static inline u32 sdfat_make_attr(struct inode *inode)
+{
+ u32 attrs = SDFAT_I(inode)->fid.attr;
+
+ if (S_ISDIR(inode->i_mode))
+ attrs |= ATTR_SUBDIR;
+ if (sdfat_mode_can_hold_ro(inode) && !(inode->i_mode & S_IWUGO))
+ attrs |= ATTR_READONLY;
+ return attrs;
+}
+
+static inline void sdfat_save_attr(struct inode *inode, u32 attr)
+{
+ if (sdfat_mode_can_hold_ro(inode))
+ SDFAT_I(inode)->fid.attr = attr & ATTR_RWMASK;
+ else
+ SDFAT_I(inode)->fid.attr = attr & (ATTR_RWMASK | ATTR_READONLY);
+}
+
+/* sdfat/statistics.c */
+/* bigdata function */
+#ifdef CONFIG_SDFAT_STATISTICS
+extern int sdfat_statistics_init(struct kset *sdfat_kset);
+extern void sdfat_statistics_uninit(void);
+extern void sdfat_statistics_set_mnt(FS_INFO_T *fsi);
+extern void sdfat_statistics_set_mnt_ro(void);
+extern void sdfat_statistics_set_mkdir(u8 flags);
+extern void sdfat_statistics_set_create(u8 flags);
+extern void sdfat_statistics_set_rw(u8 flags, u32 clu_offset, s32 create);
+extern void sdfat_statistics_set_trunc(u8 flags, CHAIN_T *clu);
+extern void sdfat_statistics_set_vol_size(struct super_block *sb);
+#else
+static inline int sdfat_statistics_init(struct kset *sdfat_kset)
+{
+ return 0;
+}
+static inline void sdfat_statistics_uninit(void) {};
+static inline void sdfat_statistics_set_mnt(FS_INFO_T *fsi) {};
+static inline void sdfat_statistics_set_mnt_ro(void) {};
+static inline void sdfat_statistics_set_mkdir(u8 flags) {};
+static inline void sdfat_statistics_set_create(u8 flags) {};
+static inline void sdfat_statistics_set_rw(u8 flags, u32 clu_offset, s32 create) {};
+static inline void sdfat_statistics_set_trunc(u8 flags, CHAIN_T *clu) {};
+static inline void sdfat_statistics_set_vol_size(struct super_block *sb) {};
+#endif
+
+/* sdfat/nls.c */
+/* NLS management function */
+s32 nls_cmp_sfn(struct super_block *sb, u8 *a, u8 *b);
+s32 nls_cmp_uniname(struct super_block *sb, u16 *a, u16 *b);
+s32 nls_uni16s_to_sfn(struct super_block *sb, UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname, s32 *p_lossy);
+s32 nls_sfn_to_uni16s(struct super_block *sb, DOS_NAME_T *p_dosname, UNI_NAME_T *p_uniname);
+s32 nls_uni16s_to_vfsname(struct super_block *sb, UNI_NAME_T *uniname, u8 *p_cstring, s32 len);
+s32 nls_vfsname_to_uni16s(struct super_block *sb, const u8 *p_cstring,
+ const s32 len, UNI_NAME_T *uniname, s32 *p_lossy);
+
+/* sdfat/mpage.c */
+#ifdef CONFIG_SDFAT_ALIGNED_MPAGE_WRITE
+int sdfat_mpage_writepages(struct address_space *mapping,
+ struct writeback_control *wbc, get_block_t *get_block);
+#endif
+
+/* sdfat/xattr.c */
+#ifdef CONFIG_SDFAT_VIRTUAL_XATTR
+void setup_sdfat_xattr_handler(struct super_block *sb);
+extern int sdfat_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags);
+extern ssize_t sdfat_getxattr(struct dentry *dentry, const char *name, void *value, size_t size);
+extern ssize_t sdfat_listxattr(struct dentry *dentry, char *list, size_t size);
+extern int sdfat_removexattr(struct dentry *dentry, const char *name);
+#else
+static inline void setup_sdfat_xattr_handler(struct super_block *sb) {};
+#endif
+
+/* sdfat/misc.c */
+#ifdef CONFIG_SDFAT_UEVENT
+extern int sdfat_uevent_init(struct kset *sdfat_kset);
+extern void sdfat_uevent_uninit(void);
+extern void sdfat_uevent_ro_remount(struct super_block *sb);
+#else
+static inline int sdfat_uevent_init(struct kset *sdfat_kset)
+{
+ return 0;
+}
+static inline void sdfat_uevent_uninit(void) {};
+static inline void sdfat_uevent_ro_remount(struct super_block *sb) {};
+#endif
+extern void
+__sdfat_fs_error(struct super_block *sb, int report, const char *fmt, ...)
+ __printf(3, 4) __cold;
+#define sdfat_fs_error(sb, fmt, args...) \
+ __sdfat_fs_error(sb, 1, fmt, ## args)
+#define sdfat_fs_error_ratelimit(sb, fmt, args...) \
+ __sdfat_fs_error(sb, __ratelimit(&SDFAT_SB(sb)->ratelimit), fmt, ## args)
+extern void
+__sdfat_msg(struct super_block *sb, const char *lv, int st, const char *fmt, ...)
+ __printf(4, 5) __cold;
+#define sdfat_msg(sb, lv, fmt, args...) \
+ __sdfat_msg(sb, lv, 0, fmt, ## args)
+#define sdfat_log_msg(sb, lv, fmt, args...) \
+ __sdfat_msg(sb, lv, 1, fmt, ## args)
+extern void sdfat_log_version(void);
+extern void sdfat_time_fat2unix(struct sdfat_sb_info *sbi, sdfat_timespec_t *ts,
+ DATE_TIME_T *tp);
+extern void sdfat_time_unix2fat(struct sdfat_sb_info *sbi, sdfat_timespec_t *ts,
+ DATE_TIME_T *tp);
+extern TIMESTAMP_T *tm_now(struct sdfat_sb_info *sbi, TIMESTAMP_T *tm);
+
+#ifdef CONFIG_SDFAT_DEBUG
+
+#ifdef CONFIG_SDFAT_DBG_CAREFUL
+void sdfat_debug_check_clusters(struct inode *inode);
+#else
+#define sdfat_debug_check_clusters(inode)
+#endif /* CONFIG_SDFAT_DBG_CAREFUL */
+
+#ifdef CONFIG_SDFAT_DBG_BUGON
+#define sdfat_debug_bug_on(expr) BUG_ON(expr)
+#else
+#define sdfat_debug_bug_on(expr)
+#endif
+
+#ifdef CONFIG_SDFAT_DBG_WARNON
+#define sdfat_debug_warn_on(expr) WARN_ON(expr)
+#else
+#define sdfat_debug_warn_on(expr)
+#endif
+
+#else /* CONFIG_SDFAT_DEBUG */
+
+#define sdfat_debug_check_clusters(inode)
+#define sdfat_debug_bug_on(expr)
+#define sdfat_debug_warn_on(expr)
+
+#endif /* CONFIG_SDFAT_DEBUG */
+
+#ifdef CONFIG_SDFAT_TRACE_ELAPSED_TIME
+u32 sdfat_time_current_usec(struct timeval *tv);
+extern struct timeval __t1;
+extern struct timeval __t2;
+
+#define TIME_GET(tv) sdfat_time_current_usec(tv)
+#define TIME_START(s) sdfat_time_current_usec(s)
+#define TIME_END(e) sdfat_time_current_usec(e)
+#define TIME_ELAPSED(s, e) ((u32)(((e)->tv_sec - (s)->tv_sec) * 1000000 + \
+ ((e)->tv_usec - (s)->tv_usec)))
+#define PRINT_TIME(n) pr_info("[SDFAT] Elapsed time %d = %d (usec)\n", n, (__t2 - __t1))
+#else /* CONFIG_SDFAT_TRACE_ELAPSED_TIME */
+#define TIME_GET(tv) (0)
+#define TIME_START(s)
+#define TIME_END(e)
+#define TIME_ELAPSED(s, e) (0)
+#define PRINT_TIME(n)
+#endif /* CONFIG_SDFAT_TRACE_ELAPSED_TIME */
+
+#define SDFAT_MSG_LV_NONE (0x00000000)
+#define SDFAT_MSG_LV_ERR (0x00000001)
+#define SDFAT_MSG_LV_INFO (0x00000002)
+#define SDFAT_MSG_LV_DBG (0x00000003)
+#define SDFAT_MSG_LV_MORE (0x00000004)
+#define SDFAT_MSG_LV_TRACE (0x00000005)
+#define SDFAT_MSG_LV_ALL (0x00000006)
+
+#define SDFAT_MSG_LEVEL SDFAT_MSG_LV_INFO
+
+#define SDFAT_TAG_NAME "SDFAT"
+#define __S(x) #x
+#define _S(x) __S(x)
+
+extern void __sdfat_dmsg(int level, const char *fmt, ...) __printf(2, 3) __cold;
+
+#define SDFAT_EMSG_T(level, ...) \
+ __sdfat_dmsg(level, KERN_ERR "[" SDFAT_TAG_NAME "] [" _S(__FILE__) "(" _S(__LINE__) ")] " __VA_ARGS__)
+#define SDFAT_DMSG_T(level, ...) \
+ __sdfat_dmsg(level, KERN_INFO "[" SDFAT_TAG_NAME "] " __VA_ARGS__)
+
+#define SDFAT_EMSG(...) SDFAT_EMSG_T(SDFAT_MSG_LV_ERR, __VA_ARGS__)
+#define SDFAT_IMSG(...) SDFAT_DMSG_T(SDFAT_MSG_LV_INFO, __VA_ARGS__)
+#define SDFAT_DMSG(...) SDFAT_DMSG_T(SDFAT_MSG_LV_DBG, __VA_ARGS__)
+#define SDFAT_MMSG(...) SDFAT_DMSG_T(SDFAT_MSG_LV_MORE, __VA_ARGS__)
+#define SDFAT_TMSG(...) SDFAT_DMSG_T(SDFAT_MSG_LV_TRACE, __VA_ARGS__)
+
+#define EMSG(...)
+#define IMSG(...)
+#define DMSG(...)
+#define MMSG(...)
+#define TMSG(...)
+
+#define EMSG_VAR(exp)
+#define IMSG_VAR(exp)
+#define DMSG_VAR(exp)
+#define MMSG_VAR(exp)
+#define TMSG_VAR(exp)
+
+#ifdef CONFIG_SDFAT_DBG_MSG
+
+
+#if (SDFAT_MSG_LEVEL >= SDFAT_MSG_LV_ERR)
+#undef EMSG
+#undef EMSG_VAR
+#define EMSG(...) SDFAT_EMSG(__VA_ARGS__)
+#define EMSG_VAR(exp) exp
+#endif
+
+#if (SDFAT_MSG_LEVEL >= SDFAT_MSG_LV_INFO)
+#undef IMSG
+#undef IMSG_VAR
+#define IMSG(...) SDFAT_IMSG(__VA_ARGS__)
+#define IMSG_VAR(exp) exp
+#endif
+
+#if (SDFAT_MSG_LEVEL >= SDFAT_MSG_LV_DBG)
+#undef DMSG
+#undef DMSG_VAR
+#define DMSG(...) SDFAT_DMSG(__VA_ARGS__)
+#define DMSG_VAR(exp) exp
+#endif
+
+#if (SDFAT_MSG_LEVEL >= SDFAT_MSG_LV_MORE)
+#undef MMSG
+#undef MMSG_VAR
+#define MMSG(...) SDFAT_MMSG(__VA_ARGS__)
+#define MMSG_VAR(exp) exp
+#endif
+
+/* should replace with trace function */
+#if (SDFAT_MSG_LEVEL >= SDFAT_MSG_LV_TRACE)
+#undef TMSG
+#undef TMSG_VAR
+#define TMSG(...) SDFAT_TMSG(__VA_ARGS__)
+#define TMSG_VAR(exp) exp
+#endif
+
+#endif /* CONFIG_SDFAT_DBG_MSG */
+
+
+#define ASSERT(expr) { \
+ if (!(expr)) { \
+ pr_err("Assertion failed! %s\n", #expr); \
+ BUG_ON(1); \
+ } \
+}
+
+#endif /* !_SDFAT_H */
+
diff --git a/fs/sdfat/sdfat_fs.h b/fs/sdfat/sdfat_fs.h
new file mode 100644
index 000000000000..23b5cda2f58c
--- /dev/null
+++ b/fs/sdfat/sdfat_fs.h
@@ -0,0 +1,423 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SDFAT_FS_H
+#define _SDFAT_FS_H
+
+#include <linux/types.h>
+#include <linux/magic.h>
+#include <asm/byteorder.h>
+
+/*----------------------------------------------------------------------*/
+/* Constant & Macro Definitions */
+/*----------------------------------------------------------------------*/
+#ifndef MSDOS_SUPER_MAGIC
+#define MSDOS_SUPER_MAGIC 0x4d44 /* MD */
+#endif
+
+#ifndef EXFAT_SUPER_MAGIC
+#define EXFAT_SUPER_MAGIC (0x2011BAB0UL)
+#endif /* EXFAT_SUPER_MAGIC */
+
+#ifndef SDFAT_SUPER_MAGIC
+#define SDFAT_SUPER_MAGIC (0x5EC5DFA4UL)
+#endif /* SDFAT_SUPER_MAGIC */
+
+#define SDFAT_ROOT_INO 1
+
+/* FAT types */
+#define FAT12 0x01 // FAT12
+#define FAT16 0x0E // Win95 FAT16 (LBA)
+#define FAT32 0x0C // Win95 FAT32 (LBA)
+#define EXFAT 0x07 // exFAT
+
+/* directory file name */
+#define DOS_CUR_DIR_NAME ". "
+#define DOS_PAR_DIR_NAME ".. "
+
+#ifdef __LITTLE_ENDIAN
+#define UNI_CUR_DIR_NAME ".\0"
+#define UNI_PAR_DIR_NAME ".\0.\0"
+#else
+#define UNI_CUR_DIR_NAME "\0."
+#define UNI_PAR_DIR_NAME "\0.\0."
+#endif
+
+/* file name lengths */
+/* NOTE :
+ * The maximum length of input or output is limited to 256 including NULL,
+ * But we allocate 4 extra bytes for utf8 translation reside in last position,
+ * because utf8 can uses memory upto 6 bytes per one character.
+ * Therefore, MAX_CHARSET_SIZE supports upto 6 bytes for utf8
+ */
+#define MAX_UNINAME_BUF_SIZE (((MAX_NAME_LENGTH+1)*2)+4)
+#define MAX_DOSNAME_BUF_SIZE ((DOS_NAME_LENGTH+2)+6)
+#define MAX_VFSNAME_BUF_SIZE ((MAX_NAME_LENGTH+1)*MAX_CHARSET_SIZE)
+#define MAX_CHARSET_SIZE 6 // max size of multi-byte character
+#define MAX_NAME_LENGTH 255 // max len of file name excluding NULL
+#define DOS_NAME_LENGTH 11 // DOS file name length excluding NULL
+
+#define SECTOR_SIZE_BITS 9 /* VFS sector size is 512 bytes */
+
+#define DENTRY_SIZE 32 /* directory entry size */
+#define DENTRY_SIZE_BITS 5
+
+#define MAX_FAT_DENTRIES 65536 /* FAT allows 65536 directory entries */
+#define MAX_EXFAT_DENTRIES 8388608 /* exFAT allows 8388608(256MB) directory entries */
+
+/* PBR entries */
+#define PBR_SIGNATURE 0xAA55
+#define EXT_SIGNATURE 0xAA550000
+#define VOL_LABEL "NO NAME " /* size should be 11 */
+#define OEM_NAME "MSWIN4.1" /* size should be 8 */
+#define STR_FAT12 "FAT12 " /* size should be 8 */
+#define STR_FAT16 "FAT16 " /* size should be 8 */
+#define STR_FAT32 "FAT32 " /* size should be 8 */
+#define STR_EXFAT "EXFAT " /* size should be 8 */
+
+#define VOL_CLEAN 0x0000
+#define VOL_DIRTY 0x0002
+
+#define FAT_VOL_DIRTY 0x01
+
+/* max number of clusters */
+#define FAT12_THRESHOLD 4087 // 2^12 - 1 + 2 (clu 0 & 1)
+#define FAT16_THRESHOLD 65527 // 2^16 - 1 + 2
+#define FAT32_THRESHOLD 268435457 // 2^28 - 1 + 2
+#define EXFAT_THRESHOLD 268435457 // 2^28 - 1 + 2
+
+/* dentry types */
+#define MSDOS_DELETED 0xE5 /* deleted mark */
+#define MSDOS_UNUSED 0x00 /* end of directory */
+
+#define EXFAT_UNUSED 0x00 /* end of directory */
+#define IS_EXFAT_DELETED(x) ((x) < 0x80) /* deleted file (0x01~0x7F) */
+#define EXFAT_INVAL 0x80 /* invalid value */
+#define EXFAT_BITMAP 0x81 /* allocation bitmap */
+#define EXFAT_UPCASE 0x82 /* upcase table */
+#define EXFAT_VOLUME 0x83 /* volume label */
+#define EXFAT_FILE 0x85 /* file or dir */
+#define EXFAT_STREAM 0xC0 /* stream entry */
+#define EXFAT_NAME 0xC1 /* file name entry */
+#define EXFAT_ACL 0xC2 /* stream entry */
+
+/* specific flag */
+#define MSDOS_LAST_LFN 0x40
+
+/* file attributes */
+#define ATTR_NORMAL 0x0000
+#define ATTR_READONLY 0x0001
+#define ATTR_HIDDEN 0x0002
+#define ATTR_SYSTEM 0x0004
+#define ATTR_VOLUME 0x0008
+#define ATTR_SUBDIR 0x0010
+#define ATTR_ARCHIVE 0x0020
+#define ATTR_SYMLINK 0x0040
+#define ATTR_EXTEND (ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM | \
+ ATTR_VOLUME) /* 0x000F */
+
+#define ATTR_EXTEND_MASK (ATTR_EXTEND | ATTR_SUBDIR | ATTR_ARCHIVE)
+#define ATTR_RWMASK (ATTR_HIDDEN | ATTR_SYSTEM | ATTR_VOLUME | \
+ ATTR_SUBDIR | ATTR_ARCHIVE | ATTR_SYMLINK)/* 0x007E */
+
+/* file creation modes */
+#define FM_REGULAR 0x00
+#define FM_SYMLINK 0x40
+
+/* time modes */
+#define TM_CREATE 0
+#define TM_MODIFY 1
+#define TM_ACCESS 2
+
+/* checksum types */
+#define CS_DIR_ENTRY 0
+#define CS_PBR_SECTOR 1
+#define CS_DEFAULT 2
+
+/*
+ * ioctl command
+ */
+#define SDFAT_IOCTL_GET_VOLUME_ID _IOR('r', 0x12, __u32)
+#define SDFAT_IOCTL_DFR_INFO _IOC(_IOC_NONE, 'E', 0x13, sizeof(u32))
+#define SDFAT_IOCTL_DFR_TRAV _IOC(_IOC_NONE, 'E', 0x14, sizeof(u32))
+#define SDFAT_IOCTL_DFR_REQ _IOC(_IOC_NONE, 'E', 0x15, sizeof(u32))
+#define SDFAT_IOCTL_DFR_SPO_FLAG _IOC(_IOC_NONE, 'E', 0x16, sizeof(u32))
+#define SDFAT_IOCTL_PANIC _IOC(_IOC_NONE, 'E', 0x17, sizeof(u32))
+
+/*
+ * ioctl command for debugging
+ */
+
+/*
+ * IOCTL code 'f' used by
+ * - file systems typically #0~0x1F
+ * - embedded terminal devices #128~
+ * - exts for debugging purpose #99
+ * number 100 and 101 is available now but has possible conflicts
+ *
+ * NOTE : This is available only If CONFIG_SDFAT_DVBG_IOCTL is enabled.
+ *
+ */
+#define SDFAT_IOC_GET_DEBUGFLAGS _IOR('f', 100, long)
+#define SDFAT_IOC_SET_DEBUGFLAGS _IOW('f', 101, long)
+
+#define SDFAT_DEBUGFLAGS_INVALID_UMOUNT 0x01
+#define SDFAT_DEBUGFLAGS_ERROR_RW 0x02
+
+/*----------------------------------------------------------------------*/
+/* On-Disk Type Definitions */
+/*----------------------------------------------------------------------*/
+
+/* FAT12/16 BIOS parameter block (64 bytes) */
+typedef struct {
+ __u8 jmp_boot[3];
+ __u8 oem_name[8];
+
+ __u8 sect_size[2]; /* unaligned */
+ __u8 sect_per_clus;
+ __le16 num_reserved; /* . */
+ __u8 num_fats;
+ __u8 num_root_entries[2]; /* unaligned */
+ __u8 num_sectors[2]; /* unaligned */
+ __u8 media_type;
+ __le16 num_fat_sectors;
+ __le16 sectors_in_track;
+ __le16 num_heads;
+ __le32 num_hid_sectors; /* . */
+ __le32 num_huge_sectors;
+
+ __u8 phy_drv_no;
+ __u8 state; /* used by WindowsNT for mount state */
+ __u8 ext_signature;
+ __u8 vol_serial[4];
+ __u8 vol_label[11];
+ __u8 vol_type[8];
+ __le16 dummy;
+} bpb16_t;
+
+/* FAT32 BIOS parameter block (64 bytes) */
+typedef struct {
+ __u8 jmp_boot[3];
+ __u8 oem_name[8];
+
+ __u8 sect_size[2]; /* unaligned */
+ __u8 sect_per_clus;
+ __le16 num_reserved;
+ __u8 num_fats;
+ __u8 num_root_entries[2]; /* unaligned */
+ __u8 num_sectors[2]; /* unaligned */
+ __u8 media_type;
+ __le16 num_fat_sectors; /* zero */
+ __le16 sectors_in_track;
+ __le16 num_heads;
+ __le32 num_hid_sectors; /* . */
+ __le32 num_huge_sectors;
+
+ __le32 num_fat32_sectors;
+ __le16 ext_flags;
+ __u8 fs_version[2];
+ __le32 root_cluster; /* . */
+ __le16 fsinfo_sector;
+ __le16 backup_sector;
+ __le16 reserved[6]; /* . */
+} bpb32_t;
+
+/* FAT32 EXTEND BIOS parameter block (32 bytes) */
+typedef struct {
+ __u8 phy_drv_no;
+ __u8 state; /* used by WindowsNT for mount state */
+ __u8 ext_signature;
+ __u8 vol_serial[4];
+ __u8 vol_label[11];
+ __u8 vol_type[8];
+ __le16 dummy[3];
+} bsx32_t;
+
+/* EXFAT BIOS parameter block (64 bytes) */
+typedef struct {
+ __u8 jmp_boot[3];
+ __u8 oem_name[8];
+ __u8 res_zero[53];
+} bpb64_t;
+
+/* EXFAT EXTEND BIOS parameter block (56 bytes) */
+typedef struct {
+ __le64 vol_offset;
+ __le64 vol_length;
+ __le32 fat_offset;
+ __le32 fat_length;
+ __le32 clu_offset;
+ __le32 clu_count;
+ __le32 root_cluster;
+ __le32 vol_serial;
+ __u8 fs_version[2];
+ __le16 vol_flags;
+ __u8 sect_size_bits;
+ __u8 sect_per_clus_bits;
+ __u8 num_fats;
+ __u8 phy_drv_no;
+ __u8 perc_in_use;
+ __u8 reserved2[7];
+} bsx64_t;
+
+/* FAT32 PBR (64 bytes) */
+typedef struct {
+ bpb16_t bpb;
+} pbr16_t;
+
+/* FAT32 PBR[BPB+BSX] (96 bytes) */
+typedef struct {
+ bpb32_t bpb;
+ bsx32_t bsx;
+} pbr32_t;
+
+/* EXFAT PBR[BPB+BSX] (120 bytes) */
+typedef struct {
+ bpb64_t bpb;
+ bsx64_t bsx;
+} pbr64_t;
+
+/* Common PBR[Partition Boot Record] (512 bytes) */
+typedef struct {
+ union {
+ __u8 raw[64];
+ bpb16_t f16;
+ bpb32_t f32;
+ bpb64_t f64;
+ } bpb;
+ union {
+ __u8 raw[56];
+ bsx32_t f32;
+ bsx64_t f64;
+ } bsx;
+ __u8 boot_code[390];
+ __le16 signature;
+} pbr_t;
+
+/* FAT32 filesystem information sector (512 bytes) */
+typedef struct {
+ __le32 signature1; // aligned
+ __u8 reserved1[480];
+ __le32 signature2; // aligned
+ __le32 free_cluster; // aligned
+ __le32 next_cluster; // aligned
+ __u8 reserved2[14];
+ __le16 signature3[2];
+} fat32_fsi_t;
+
+/* FAT directory entry (32 bytes) */
+typedef struct {
+ __u8 dummy[32];
+} DENTRY_T;
+
+typedef struct {
+ __u8 name[DOS_NAME_LENGTH]; /* 11 chars */
+ __u8 attr;
+ __u8 lcase;
+ __u8 create_time_ms;
+ __le16 create_time; // aligned
+ __le16 create_date; // aligned
+ __le16 access_date; // aligned
+ __le16 start_clu_hi; // aligned
+ __le16 modify_time; // aligned
+ __le16 modify_date; // aligned
+ __le16 start_clu_lo; // aligned
+ __le32 size; // aligned
+} DOS_DENTRY_T;
+
+/* FAT extended directory entry (32 bytes) */
+typedef struct {
+ __u8 order;
+ __u8 unicode_0_4[10];
+ __u8 attr;
+ __u8 sysid;
+ __u8 checksum;
+ __le16 unicode_5_10[6]; // aligned
+ __le16 start_clu; // aligned
+ __le16 unicode_11_12[2]; // aligned
+} EXT_DENTRY_T;
+
+/* EXFAT file directory entry (32 bytes) */
+typedef struct {
+ __u8 type;
+ __u8 num_ext;
+ __le16 checksum; // aligned
+ __le16 attr; // aligned
+ __le16 reserved1;
+ __le16 create_time; // aligned
+ __le16 create_date; // aligned
+ __le16 modify_time; // aligned
+ __le16 modify_date; // aligned
+ __le16 access_time; // aligned
+ __le16 access_date; // aligned
+ __u8 create_time_ms;
+ __u8 modify_time_ms;
+ __u8 create_tz;
+ __u8 modify_tz;
+ __u8 access_tz;
+ __u8 reserved2[7];
+} FILE_DENTRY_T;
+
+/* EXFAT stream extension directory entry (32 bytes) */
+typedef struct {
+ __u8 type;
+ __u8 flags;
+ __u8 reserved1;
+ __u8 name_len;
+ __le16 name_hash; // aligned
+ __le16 reserved2;
+ __le64 valid_size; // aligned
+ __le32 reserved3; // aligned
+ __le32 start_clu; // aligned
+ __le64 size; // aligned
+} STRM_DENTRY_T;
+
+/* EXFAT file name directory entry (32 bytes) */
+typedef struct {
+ __u8 type;
+ __u8 flags;
+ __le16 unicode_0_14[15]; // aligned
+} NAME_DENTRY_T;
+
+/* EXFAT allocation bitmap directory entry (32 bytes) */
+typedef struct {
+ __u8 type;
+ __u8 flags;
+ __u8 reserved[18];
+ __le32 start_clu; // aligned
+ __le64 size; // aligned
+} BMAP_DENTRY_T;
+
+/* EXFAT up-case table directory entry (32 bytes) */
+typedef struct {
+ __u8 type;
+ __u8 reserved1[3];
+ __le32 checksum; // aligned
+ __u8 reserved2[12];
+ __le32 start_clu; // aligned
+ __le64 size; // aligned
+} CASE_DENTRY_T;
+
+/* EXFAT volume label directory entry (32 bytes) */
+typedef struct {
+ __u8 type;
+ __u8 label_len;
+ __le16 unicode_0_10[11]; // aligned
+ __u8 reserved[8];
+} VOLM_DENTRY_T;
+
+#endif /* _SDFAT_FS_H */
diff --git a/fs/sdfat/statistics.c b/fs/sdfat/statistics.c
new file mode 100644
index 000000000000..099d9a358a76
--- /dev/null
+++ b/fs/sdfat/statistics.c
@@ -0,0 +1,281 @@
+#include "sdfat.h"
+
+#define SDFAT_VF_CLUS_MAX 7 /* 512 Byte ~ 32 KByte */
+#define SDFAT_EF_CLUS_MAX 17 /* 512 Byte ~ 32 MByte */
+
+enum {
+ SDFAT_MNT_FAT12,
+ SDFAT_MNT_FAT16,
+ SDFAT_MNT_FAT32,
+ SDFAT_MNT_EXFAT,
+ SDFAT_MNT_RO,
+ SDFAT_MNT_MAX
+};
+
+enum {
+ SDFAT_OP_EXFAT_MNT,
+ SDFAT_OP_MKDIR,
+ SDFAT_OP_CREATE,
+ SDFAT_OP_READ,
+ SDFAT_OP_WRITE,
+ SDFAT_OP_TRUNC,
+ SDFAT_OP_MAX
+};
+
+enum {
+ SDFAT_VOL_4G,
+ SDFAT_VOL_8G,
+ SDFAT_VOL_16G,
+ SDFAT_VOL_32G,
+ SDFAT_VOL_64G,
+ SDFAT_VOL_128G,
+ SDFAT_VOL_256G,
+ SDFAT_VOL_512G,
+ SDFAT_VOL_XTB,
+ SDFAT_VOL_MAX
+};
+
+static struct sdfat_statistics {
+ u32 clus_vfat[SDFAT_VF_CLUS_MAX];
+ u32 clus_exfat[SDFAT_EF_CLUS_MAX];
+ u32 mnt_cnt[SDFAT_MNT_MAX];
+ u32 nofat_op[SDFAT_OP_MAX];
+ u32 vol_size[SDFAT_VOL_MAX];
+} statistics;
+
+static struct kset *sdfat_statistics_kset;
+
+static ssize_t vfat_cl_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ return snprintf(buff, PAGE_SIZE, "\"VCL_512B_I\":\"%u\","
+ "\"VCL_1K_I\":\"%u\",\"VCL_2K_I\":\"%u\","
+ "\"VCL_4K_I\":\"%u\",\"VCL_8K_I\":\"%u\","
+ "\"VCL_16K_I\":\"%u\",\"VCL_32K_I\":\"%u\"\n",
+ statistics.clus_vfat[0], statistics.clus_vfat[1],
+ statistics.clus_vfat[2], statistics.clus_vfat[3],
+ statistics.clus_vfat[4], statistics.clus_vfat[5],
+ statistics.clus_vfat[6]);
+}
+
+static ssize_t exfat_cl_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ return snprintf(buff, PAGE_SIZE, "\"ECL_512B_I\":\"%u\","
+ "\"ECL_1K_I\":\"%u\",\"ECL_2K_I\":\"%u\","
+ "\"ECL_4K_I\":\"%u\",\"ECL_8K_I\":\"%u\","
+ "\"ECL_16K_I\":\"%u\",\"ECL_32K_I\":\"%u\","
+ "\"ECL_64K_I\":\"%u\",\"ECL_128K_I\":\"%u\","
+ "\"ECL_256K_I\":\"%u\",\"ECL_512K_I\":\"%u\","
+ "\"ECL_1M_I\":\"%u\",\"ECL_2M_I\":\"%u\","
+ "\"ECL_4M_I\":\"%u\",\"ECL_8M_I\":\"%u\","
+ "\"ECL_16M_I\":\"%u\",\"ECL_32M_I\":\"%u\"\n",
+ statistics.clus_exfat[0], statistics.clus_exfat[1],
+ statistics.clus_exfat[2], statistics.clus_exfat[3],
+ statistics.clus_exfat[4], statistics.clus_exfat[5],
+ statistics.clus_exfat[6], statistics.clus_exfat[7],
+ statistics.clus_exfat[8], statistics.clus_exfat[9],
+ statistics.clus_exfat[10], statistics.clus_exfat[11],
+ statistics.clus_exfat[12], statistics.clus_exfat[13],
+ statistics.clus_exfat[14], statistics.clus_exfat[15],
+ statistics.clus_exfat[16]);
+}
+
+static ssize_t mount_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ return snprintf(buff, PAGE_SIZE, "\"FAT12_MNT_I\":\"%u\","
+ "\"FAT16_MNT_I\":\"%u\",\"FAT32_MNT_I\":\"%u\","
+ "\"EXFAT_MNT_I\":\"%u\",\"RO_MNT_I\":\"%u\"\n",
+ statistics.mnt_cnt[SDFAT_MNT_FAT12],
+ statistics.mnt_cnt[SDFAT_MNT_FAT16],
+ statistics.mnt_cnt[SDFAT_MNT_FAT32],
+ statistics.mnt_cnt[SDFAT_MNT_EXFAT],
+ statistics.mnt_cnt[SDFAT_MNT_RO]);
+}
+
+static ssize_t nofat_op_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ return snprintf(buff, PAGE_SIZE, "\"NOFAT_MOUNT_I\":\"%u\","
+ "\"NOFAT_MKDIR_I\":\"%u\",\"NOFAT_CREATE_I\":\"%u\","
+ "\"NOFAT_READ_I\":\"%u\",\"NOFAT_WRITE_I\":\"%u\","
+ "\"NOFAT_TRUNC_I\":\"%u\"\n",
+ statistics.nofat_op[SDFAT_OP_EXFAT_MNT],
+ statistics.nofat_op[SDFAT_OP_MKDIR],
+ statistics.nofat_op[SDFAT_OP_CREATE],
+ statistics.nofat_op[SDFAT_OP_READ],
+ statistics.nofat_op[SDFAT_OP_WRITE],
+ statistics.nofat_op[SDFAT_OP_TRUNC]);
+}
+
+static ssize_t vol_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ return snprintf(buff, PAGE_SIZE, "\"VOL_4G_I\":\"%u\","
+ "\"VOL_8G_I\":\"%u\",\"VOL_16G_I\":\"%u\","
+ "\"VOL_32G_I\":\"%u\",\"VOL_64G_I\":\"%u\","
+ "\"VOL_128G_I\":\"%u\",\"VOL_256G_I\":\"%u\","
+ "\"VOL_512G_I\":\"%u\",\"VOL_XTB_I\":\"%u\"\n",
+ statistics.vol_size[SDFAT_VOL_4G],
+ statistics.vol_size[SDFAT_VOL_8G],
+ statistics.vol_size[SDFAT_VOL_16G],
+ statistics.vol_size[SDFAT_VOL_32G],
+ statistics.vol_size[SDFAT_VOL_64G],
+ statistics.vol_size[SDFAT_VOL_128G],
+ statistics.vol_size[SDFAT_VOL_256G],
+ statistics.vol_size[SDFAT_VOL_512G],
+ statistics.vol_size[SDFAT_VOL_XTB]);
+}
+
+static struct kobj_attribute vfat_cl_attr = __ATTR_RO(vfat_cl);
+static struct kobj_attribute exfat_cl_attr = __ATTR_RO(exfat_cl);
+static struct kobj_attribute mount_attr = __ATTR_RO(mount);
+static struct kobj_attribute nofat_op_attr = __ATTR_RO(nofat_op);
+static struct kobj_attribute vol_size_attr = __ATTR_RO(vol_size);
+
+static struct attribute *attributes_statistics[] = {
+ &vfat_cl_attr.attr,
+ &exfat_cl_attr.attr,
+ &mount_attr.attr,
+ &nofat_op_attr.attr,
+ &vol_size_attr.attr,
+ NULL,
+};
+
+static struct attribute_group attr_group_statistics = {
+ .attrs = attributes_statistics,
+};
+
+int sdfat_statistics_init(struct kset *sdfat_kset)
+{
+ int err;
+
+ sdfat_statistics_kset = kset_create_and_add("statistics", NULL, &sdfat_kset->kobj);
+ if (!sdfat_statistics_kset) {
+ pr_err("[SDFAT] failed to create sdfat statistics kobj\n");
+ return -ENOMEM;
+ }
+
+ err = sysfs_create_group(&sdfat_statistics_kset->kobj, &attr_group_statistics);
+ if (err) {
+ pr_err("[SDFAT] failed to create sdfat statistics attributes\n");
+ kset_unregister(sdfat_statistics_kset);
+ sdfat_statistics_kset = NULL;
+ return err;
+ }
+
+ return 0;
+}
+
+void sdfat_statistics_uninit(void)
+{
+ if (sdfat_statistics_kset) {
+ sysfs_remove_group(&sdfat_statistics_kset->kobj, &attr_group_statistics);
+ kset_unregister(sdfat_statistics_kset);
+ sdfat_statistics_kset = NULL;
+ }
+ memset(&statistics, 0, sizeof(struct sdfat_statistics));
+}
+
+void sdfat_statistics_set_mnt(FS_INFO_T *fsi)
+{
+ if (fsi->vol_type == EXFAT) {
+ statistics.mnt_cnt[SDFAT_MNT_EXFAT]++;
+ statistics.nofat_op[SDFAT_OP_EXFAT_MNT] = 1;
+ if (fsi->sect_per_clus_bits < SDFAT_EF_CLUS_MAX)
+ statistics.clus_exfat[fsi->sect_per_clus_bits]++;
+ else
+ statistics.clus_exfat[SDFAT_EF_CLUS_MAX - 1]++;
+ return;
+ }
+
+ if (fsi->vol_type == FAT32)
+ statistics.mnt_cnt[SDFAT_MNT_FAT32]++;
+ else if (fsi->vol_type == FAT16)
+ statistics.mnt_cnt[SDFAT_MNT_FAT16]++;
+ else if (fsi->vol_type == FAT12)
+ statistics.mnt_cnt[SDFAT_MNT_FAT12]++;
+
+ if (fsi->sect_per_clus_bits < SDFAT_VF_CLUS_MAX)
+ statistics.clus_vfat[fsi->sect_per_clus_bits]++;
+ else
+ statistics.clus_vfat[SDFAT_VF_CLUS_MAX - 1]++;
+}
+
+void sdfat_statistics_set_mnt_ro(void)
+{
+ statistics.mnt_cnt[SDFAT_MNT_RO]++;
+}
+
+void sdfat_statistics_set_mkdir(u8 flags)
+{
+ if (flags != 0x03)
+ return;
+ statistics.nofat_op[SDFAT_OP_MKDIR] = 1;
+}
+
+void sdfat_statistics_set_create(u8 flags)
+{
+ if (flags != 0x03)
+ return;
+ statistics.nofat_op[SDFAT_OP_CREATE] = 1;
+}
+
+/* flags : file or dir flgas, 0x03 means no fat-chain.
+ * clu_offset : file or dir logical cluster offset
+ * create : BMAP_ADD_CLUSTER or not
+ *
+ * File or dir have BMAP_ADD_CLUSTER is no fat-chain write
+ * when they have 0x03 flag and two or more clusters.
+ * And don`t have BMAP_ADD_CLUSTER is no fat-chain read
+ * when above same condition.
+ */
+void sdfat_statistics_set_rw(u8 flags, u32 clu_offset, s32 create)
+{
+ if ((flags == 0x03) && (clu_offset > 1)) {
+ if (create)
+ statistics.nofat_op[SDFAT_OP_WRITE] = 1;
+ else
+ statistics.nofat_op[SDFAT_OP_READ] = 1;
+ }
+}
+
+/* flags : file or dir flgas, 0x03 means no fat-chain.
+ * clu : cluster chain
+ *
+ * Set no fat-chain trunc when file or dir have 0x03 flag
+ * and two or more clusters.
+ */
+void sdfat_statistics_set_trunc(u8 flags, CHAIN_T *clu)
+{
+ if ((flags == 0x03) && (clu->size > 1))
+ statistics.nofat_op[SDFAT_OP_TRUNC] = 1;
+}
+
+void sdfat_statistics_set_vol_size(struct super_block *sb)
+{
+ u64 vol_size;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ vol_size = (u64)fsi->num_sectors << sb->s_blocksize_bits;
+
+ if (vol_size <= ((u64)1 << 32))
+ statistics.vol_size[SDFAT_VOL_4G]++;
+ else if (vol_size <= ((u64)1 << 33))
+ statistics.vol_size[SDFAT_VOL_8G]++;
+ else if (vol_size <= ((u64)1 << 34))
+ statistics.vol_size[SDFAT_VOL_16G]++;
+ else if (vol_size <= ((u64)1 << 35))
+ statistics.vol_size[SDFAT_VOL_32G]++;
+ else if (vol_size <= ((u64)1 << 36))
+ statistics.vol_size[SDFAT_VOL_64G]++;
+ else if (vol_size <= ((u64)1 << 37))
+ statistics.vol_size[SDFAT_VOL_128G]++;
+ else if (vol_size <= ((u64)1 << 38))
+ statistics.vol_size[SDFAT_VOL_256G]++;
+ else if (vol_size <= ((u64)1 << 39))
+ statistics.vol_size[SDFAT_VOL_512G]++;
+ else
+ statistics.vol_size[SDFAT_VOL_XTB]++;
+}
diff --git a/fs/sdfat/upcase.h b/fs/sdfat/upcase.h
new file mode 100644
index 000000000000..386772c57f8d
--- /dev/null
+++ b/fs/sdfat/upcase.h
@@ -0,0 +1,407 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _UPCASE_H
+#define _UPCASE_H
+
+/* Upcase tabel macro */
+#define SDFAT_NUM_UPCASE 2918
+#define HIGH_INDEX_BIT (8)
+#define HIGH_INDEX_MASK (0xFF00)
+#define LOW_INDEX_BIT (16-HIGH_INDEX_BIT)
+#define UTBL_ROW_COUNT (1<<LOW_INDEX_BIT)
+#define UTBL_COL_COUNT (1<<HIGH_INDEX_BIT)
+
+static inline u16 get_col_index(u16 i)
+{
+ return i >> LOW_INDEX_BIT;
+}
+static inline u16 get_row_index(u16 i)
+{
+ return i & ~HIGH_INDEX_MASK;
+}
+
+
+static const u8 uni_def_upcase[SDFAT_NUM_UPCASE<<1] = {
+ 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00,
+ 0x08, 0x00, 0x09, 0x00, 0x0A, 0x00, 0x0B, 0x00, 0x0C, 0x00, 0x0D, 0x00, 0x0E, 0x00, 0x0F, 0x00,
+ 0x10, 0x00, 0x11, 0x00, 0x12, 0x00, 0x13, 0x00, 0x14, 0x00, 0x15, 0x00, 0x16, 0x00, 0x17, 0x00,
+ 0x18, 0x00, 0x19, 0x00, 0x1A, 0x00, 0x1B, 0x00, 0x1C, 0x00, 0x1D, 0x00, 0x1E, 0x00, 0x1F, 0x00,
+ 0x20, 0x00, 0x21, 0x00, 0x22, 0x00, 0x23, 0x00, 0x24, 0x00, 0x25, 0x00, 0x26, 0x00, 0x27, 0x00,
+ 0x28, 0x00, 0x29, 0x00, 0x2A, 0x00, 0x2B, 0x00, 0x2C, 0x00, 0x2D, 0x00, 0x2E, 0x00, 0x2F, 0x00,
+ 0x30, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x34, 0x00, 0x35, 0x00, 0x36, 0x00, 0x37, 0x00,
+ 0x38, 0x00, 0x39, 0x00, 0x3A, 0x00, 0x3B, 0x00, 0x3C, 0x00, 0x3D, 0x00, 0x3E, 0x00, 0x3F, 0x00,
+ 0x40, 0x00, 0x41, 0x00, 0x42, 0x00, 0x43, 0x00, 0x44, 0x00, 0x45, 0x00, 0x46, 0x00, 0x47, 0x00,
+ 0x48, 0x00, 0x49, 0x00, 0x4A, 0x00, 0x4B, 0x00, 0x4C, 0x00, 0x4D, 0x00, 0x4E, 0x00, 0x4F, 0x00,
+ 0x50, 0x00, 0x51, 0x00, 0x52, 0x00, 0x53, 0x00, 0x54, 0x00, 0x55, 0x00, 0x56, 0x00, 0x57, 0x00,
+ 0x58, 0x00, 0x59, 0x00, 0x5A, 0x00, 0x5B, 0x00, 0x5C, 0x00, 0x5D, 0x00, 0x5E, 0x00, 0x5F, 0x00,
+ 0x60, 0x00, 0x41, 0x00, 0x42, 0x00, 0x43, 0x00, 0x44, 0x00, 0x45, 0x00, 0x46, 0x00, 0x47, 0x00,
+ 0x48, 0x00, 0x49, 0x00, 0x4A, 0x00, 0x4B, 0x00, 0x4C, 0x00, 0x4D, 0x00, 0x4E, 0x00, 0x4F, 0x00,
+ 0x50, 0x00, 0x51, 0x00, 0x52, 0x00, 0x53, 0x00, 0x54, 0x00, 0x55, 0x00, 0x56, 0x00, 0x57, 0x00,
+ 0x58, 0x00, 0x59, 0x00, 0x5A, 0x00, 0x7B, 0x00, 0x7C, 0x00, 0x7D, 0x00, 0x7E, 0x00, 0x7F, 0x00,
+ 0x80, 0x00, 0x81, 0x00, 0x82, 0x00, 0x83, 0x00, 0x84, 0x00, 0x85, 0x00, 0x86, 0x00, 0x87, 0x00,
+ 0x88, 0x00, 0x89, 0x00, 0x8A, 0x00, 0x8B, 0x00, 0x8C, 0x00, 0x8D, 0x00, 0x8E, 0x00, 0x8F, 0x00,
+ 0x90, 0x00, 0x91, 0x00, 0x92, 0x00, 0x93, 0x00, 0x94, 0x00, 0x95, 0x00, 0x96, 0x00, 0x97, 0x00,
+ 0x98, 0x00, 0x99, 0x00, 0x9A, 0x00, 0x9B, 0x00, 0x9C, 0x00, 0x9D, 0x00, 0x9E, 0x00, 0x9F, 0x00,
+ 0xA0, 0x00, 0xA1, 0x00, 0xA2, 0x00, 0xA3, 0x00, 0xA4, 0x00, 0xA5, 0x00, 0xA6, 0x00, 0xA7, 0x00,
+ 0xA8, 0x00, 0xA9, 0x00, 0xAA, 0x00, 0xAB, 0x00, 0xAC, 0x00, 0xAD, 0x00, 0xAE, 0x00, 0xAF, 0x00,
+ 0xB0, 0x00, 0xB1, 0x00, 0xB2, 0x00, 0xB3, 0x00, 0xB4, 0x00, 0xB5, 0x00, 0xB6, 0x00, 0xB7, 0x00,
+ 0xB8, 0x00, 0xB9, 0x00, 0xBA, 0x00, 0xBB, 0x00, 0xBC, 0x00, 0xBD, 0x00, 0xBE, 0x00, 0xBF, 0x00,
+ 0xC0, 0x00, 0xC1, 0x00, 0xC2, 0x00, 0xC3, 0x00, 0xC4, 0x00, 0xC5, 0x00, 0xC6, 0x00, 0xC7, 0x00,
+ 0xC8, 0x00, 0xC9, 0x00, 0xCA, 0x00, 0xCB, 0x00, 0xCC, 0x00, 0xCD, 0x00, 0xCE, 0x00, 0xCF, 0x00,
+ 0xD0, 0x00, 0xD1, 0x00, 0xD2, 0x00, 0xD3, 0x00, 0xD4, 0x00, 0xD5, 0x00, 0xD6, 0x00, 0xD7, 0x00,
+ 0xD8, 0x00, 0xD9, 0x00, 0xDA, 0x00, 0xDB, 0x00, 0xDC, 0x00, 0xDD, 0x00, 0xDE, 0x00, 0xDF, 0x00,
+ 0xC0, 0x00, 0xC1, 0x00, 0xC2, 0x00, 0xC3, 0x00, 0xC4, 0x00, 0xC5, 0x00, 0xC6, 0x00, 0xC7, 0x00,
+ 0xC8, 0x00, 0xC9, 0x00, 0xCA, 0x00, 0xCB, 0x00, 0xCC, 0x00, 0xCD, 0x00, 0xCE, 0x00, 0xCF, 0x00,
+ 0xD0, 0x00, 0xD1, 0x00, 0xD2, 0x00, 0xD3, 0x00, 0xD4, 0x00, 0xD5, 0x00, 0xD6, 0x00, 0xF7, 0x00,
+ 0xD8, 0x00, 0xD9, 0x00, 0xDA, 0x00, 0xDB, 0x00, 0xDC, 0x00, 0xDD, 0x00, 0xDE, 0x00, 0x78, 0x01,
+ 0x00, 0x01, 0x00, 0x01, 0x02, 0x01, 0x02, 0x01, 0x04, 0x01, 0x04, 0x01, 0x06, 0x01, 0x06, 0x01,
+ 0x08, 0x01, 0x08, 0x01, 0x0A, 0x01, 0x0A, 0x01, 0x0C, 0x01, 0x0C, 0x01, 0x0E, 0x01, 0x0E, 0x01,
+ 0x10, 0x01, 0x10, 0x01, 0x12, 0x01, 0x12, 0x01, 0x14, 0x01, 0x14, 0x01, 0x16, 0x01, 0x16, 0x01,
+ 0x18, 0x01, 0x18, 0x01, 0x1A, 0x01, 0x1A, 0x01, 0x1C, 0x01, 0x1C, 0x01, 0x1E, 0x01, 0x1E, 0x01,
+ 0x20, 0x01, 0x20, 0x01, 0x22, 0x01, 0x22, 0x01, 0x24, 0x01, 0x24, 0x01, 0x26, 0x01, 0x26, 0x01,
+ 0x28, 0x01, 0x28, 0x01, 0x2A, 0x01, 0x2A, 0x01, 0x2C, 0x01, 0x2C, 0x01, 0x2E, 0x01, 0x2E, 0x01,
+ 0x30, 0x01, 0x31, 0x01, 0x32, 0x01, 0x32, 0x01, 0x34, 0x01, 0x34, 0x01, 0x36, 0x01, 0x36, 0x01,
+ 0x38, 0x01, 0x39, 0x01, 0x39, 0x01, 0x3B, 0x01, 0x3B, 0x01, 0x3D, 0x01, 0x3D, 0x01, 0x3F, 0x01,
+ 0x3F, 0x01, 0x41, 0x01, 0x41, 0x01, 0x43, 0x01, 0x43, 0x01, 0x45, 0x01, 0x45, 0x01, 0x47, 0x01,
+ 0x47, 0x01, 0x49, 0x01, 0x4A, 0x01, 0x4A, 0x01, 0x4C, 0x01, 0x4C, 0x01, 0x4E, 0x01, 0x4E, 0x01,
+ 0x50, 0x01, 0x50, 0x01, 0x52, 0x01, 0x52, 0x01, 0x54, 0x01, 0x54, 0x01, 0x56, 0x01, 0x56, 0x01,
+ 0x58, 0x01, 0x58, 0x01, 0x5A, 0x01, 0x5A, 0x01, 0x5C, 0x01, 0x5C, 0x01, 0x5E, 0x01, 0x5E, 0x01,
+ 0x60, 0x01, 0x60, 0x01, 0x62, 0x01, 0x62, 0x01, 0x64, 0x01, 0x64, 0x01, 0x66, 0x01, 0x66, 0x01,
+ 0x68, 0x01, 0x68, 0x01, 0x6A, 0x01, 0x6A, 0x01, 0x6C, 0x01, 0x6C, 0x01, 0x6E, 0x01, 0x6E, 0x01,
+ 0x70, 0x01, 0x70, 0x01, 0x72, 0x01, 0x72, 0x01, 0x74, 0x01, 0x74, 0x01, 0x76, 0x01, 0x76, 0x01,
+ 0x78, 0x01, 0x79, 0x01, 0x79, 0x01, 0x7B, 0x01, 0x7B, 0x01, 0x7D, 0x01, 0x7D, 0x01, 0x7F, 0x01,
+ 0x43, 0x02, 0x81, 0x01, 0x82, 0x01, 0x82, 0x01, 0x84, 0x01, 0x84, 0x01, 0x86, 0x01, 0x87, 0x01,
+ 0x87, 0x01, 0x89, 0x01, 0x8A, 0x01, 0x8B, 0x01, 0x8B, 0x01, 0x8D, 0x01, 0x8E, 0x01, 0x8F, 0x01,
+ 0x90, 0x01, 0x91, 0x01, 0x91, 0x01, 0x93, 0x01, 0x94, 0x01, 0xF6, 0x01, 0x96, 0x01, 0x97, 0x01,
+ 0x98, 0x01, 0x98, 0x01, 0x3D, 0x02, 0x9B, 0x01, 0x9C, 0x01, 0x9D, 0x01, 0x20, 0x02, 0x9F, 0x01,
+ 0xA0, 0x01, 0xA0, 0x01, 0xA2, 0x01, 0xA2, 0x01, 0xA4, 0x01, 0xA4, 0x01, 0xA6, 0x01, 0xA7, 0x01,
+ 0xA7, 0x01, 0xA9, 0x01, 0xAA, 0x01, 0xAB, 0x01, 0xAC, 0x01, 0xAC, 0x01, 0xAE, 0x01, 0xAF, 0x01,
+ 0xAF, 0x01, 0xB1, 0x01, 0xB2, 0x01, 0xB3, 0x01, 0xB3, 0x01, 0xB5, 0x01, 0xB5, 0x01, 0xB7, 0x01,
+ 0xB8, 0x01, 0xB8, 0x01, 0xBA, 0x01, 0xBB, 0x01, 0xBC, 0x01, 0xBC, 0x01, 0xBE, 0x01, 0xF7, 0x01,
+ 0xC0, 0x01, 0xC1, 0x01, 0xC2, 0x01, 0xC3, 0x01, 0xC4, 0x01, 0xC5, 0x01, 0xC4, 0x01, 0xC7, 0x01,
+ 0xC8, 0x01, 0xC7, 0x01, 0xCA, 0x01, 0xCB, 0x01, 0xCA, 0x01, 0xCD, 0x01, 0xCD, 0x01, 0xCF, 0x01,
+ 0xCF, 0x01, 0xD1, 0x01, 0xD1, 0x01, 0xD3, 0x01, 0xD3, 0x01, 0xD5, 0x01, 0xD5, 0x01, 0xD7, 0x01,
+ 0xD7, 0x01, 0xD9, 0x01, 0xD9, 0x01, 0xDB, 0x01, 0xDB, 0x01, 0x8E, 0x01, 0xDE, 0x01, 0xDE, 0x01,
+ 0xE0, 0x01, 0xE0, 0x01, 0xE2, 0x01, 0xE2, 0x01, 0xE4, 0x01, 0xE4, 0x01, 0xE6, 0x01, 0xE6, 0x01,
+ 0xE8, 0x01, 0xE8, 0x01, 0xEA, 0x01, 0xEA, 0x01, 0xEC, 0x01, 0xEC, 0x01, 0xEE, 0x01, 0xEE, 0x01,
+ 0xF0, 0x01, 0xF1, 0x01, 0xF2, 0x01, 0xF1, 0x01, 0xF4, 0x01, 0xF4, 0x01, 0xF6, 0x01, 0xF7, 0x01,
+ 0xF8, 0x01, 0xF8, 0x01, 0xFA, 0x01, 0xFA, 0x01, 0xFC, 0x01, 0xFC, 0x01, 0xFE, 0x01, 0xFE, 0x01,
+ 0x00, 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x04, 0x02, 0x04, 0x02, 0x06, 0x02, 0x06, 0x02,
+ 0x08, 0x02, 0x08, 0x02, 0x0A, 0x02, 0x0A, 0x02, 0x0C, 0x02, 0x0C, 0x02, 0x0E, 0x02, 0x0E, 0x02,
+ 0x10, 0x02, 0x10, 0x02, 0x12, 0x02, 0x12, 0x02, 0x14, 0x02, 0x14, 0x02, 0x16, 0x02, 0x16, 0x02,
+ 0x18, 0x02, 0x18, 0x02, 0x1A, 0x02, 0x1A, 0x02, 0x1C, 0x02, 0x1C, 0x02, 0x1E, 0x02, 0x1E, 0x02,
+ 0x20, 0x02, 0x21, 0x02, 0x22, 0x02, 0x22, 0x02, 0x24, 0x02, 0x24, 0x02, 0x26, 0x02, 0x26, 0x02,
+ 0x28, 0x02, 0x28, 0x02, 0x2A, 0x02, 0x2A, 0x02, 0x2C, 0x02, 0x2C, 0x02, 0x2E, 0x02, 0x2E, 0x02,
+ 0x30, 0x02, 0x30, 0x02, 0x32, 0x02, 0x32, 0x02, 0x34, 0x02, 0x35, 0x02, 0x36, 0x02, 0x37, 0x02,
+ 0x38, 0x02, 0x39, 0x02, 0x65, 0x2C, 0x3B, 0x02, 0x3B, 0x02, 0x3D, 0x02, 0x66, 0x2C, 0x3F, 0x02,
+ 0x40, 0x02, 0x41, 0x02, 0x41, 0x02, 0x43, 0x02, 0x44, 0x02, 0x45, 0x02, 0x46, 0x02, 0x46, 0x02,
+ 0x48, 0x02, 0x48, 0x02, 0x4A, 0x02, 0x4A, 0x02, 0x4C, 0x02, 0x4C, 0x02, 0x4E, 0x02, 0x4E, 0x02,
+ 0x50, 0x02, 0x51, 0x02, 0x52, 0x02, 0x81, 0x01, 0x86, 0x01, 0x55, 0x02, 0x89, 0x01, 0x8A, 0x01,
+ 0x58, 0x02, 0x8F, 0x01, 0x5A, 0x02, 0x90, 0x01, 0x5C, 0x02, 0x5D, 0x02, 0x5E, 0x02, 0x5F, 0x02,
+ 0x93, 0x01, 0x61, 0x02, 0x62, 0x02, 0x94, 0x01, 0x64, 0x02, 0x65, 0x02, 0x66, 0x02, 0x67, 0x02,
+ 0x97, 0x01, 0x96, 0x01, 0x6A, 0x02, 0x62, 0x2C, 0x6C, 0x02, 0x6D, 0x02, 0x6E, 0x02, 0x9C, 0x01,
+ 0x70, 0x02, 0x71, 0x02, 0x9D, 0x01, 0x73, 0x02, 0x74, 0x02, 0x9F, 0x01, 0x76, 0x02, 0x77, 0x02,
+ 0x78, 0x02, 0x79, 0x02, 0x7A, 0x02, 0x7B, 0x02, 0x7C, 0x02, 0x64, 0x2C, 0x7E, 0x02, 0x7F, 0x02,
+ 0xA6, 0x01, 0x81, 0x02, 0x82, 0x02, 0xA9, 0x01, 0x84, 0x02, 0x85, 0x02, 0x86, 0x02, 0x87, 0x02,
+ 0xAE, 0x01, 0x44, 0x02, 0xB1, 0x01, 0xB2, 0x01, 0x45, 0x02, 0x8D, 0x02, 0x8E, 0x02, 0x8F, 0x02,
+ 0x90, 0x02, 0x91, 0x02, 0xB7, 0x01, 0x93, 0x02, 0x94, 0x02, 0x95, 0x02, 0x96, 0x02, 0x97, 0x02,
+ 0x98, 0x02, 0x99, 0x02, 0x9A, 0x02, 0x9B, 0x02, 0x9C, 0x02, 0x9D, 0x02, 0x9E, 0x02, 0x9F, 0x02,
+ 0xA0, 0x02, 0xA1, 0x02, 0xA2, 0x02, 0xA3, 0x02, 0xA4, 0x02, 0xA5, 0x02, 0xA6, 0x02, 0xA7, 0x02,
+ 0xA8, 0x02, 0xA9, 0x02, 0xAA, 0x02, 0xAB, 0x02, 0xAC, 0x02, 0xAD, 0x02, 0xAE, 0x02, 0xAF, 0x02,
+ 0xB0, 0x02, 0xB1, 0x02, 0xB2, 0x02, 0xB3, 0x02, 0xB4, 0x02, 0xB5, 0x02, 0xB6, 0x02, 0xB7, 0x02,
+ 0xB8, 0x02, 0xB9, 0x02, 0xBA, 0x02, 0xBB, 0x02, 0xBC, 0x02, 0xBD, 0x02, 0xBE, 0x02, 0xBF, 0x02,
+ 0xC0, 0x02, 0xC1, 0x02, 0xC2, 0x02, 0xC3, 0x02, 0xC4, 0x02, 0xC5, 0x02, 0xC6, 0x02, 0xC7, 0x02,
+ 0xC8, 0x02, 0xC9, 0x02, 0xCA, 0x02, 0xCB, 0x02, 0xCC, 0x02, 0xCD, 0x02, 0xCE, 0x02, 0xCF, 0x02,
+ 0xD0, 0x02, 0xD1, 0x02, 0xD2, 0x02, 0xD3, 0x02, 0xD4, 0x02, 0xD5, 0x02, 0xD6, 0x02, 0xD7, 0x02,
+ 0xD8, 0x02, 0xD9, 0x02, 0xDA, 0x02, 0xDB, 0x02, 0xDC, 0x02, 0xDD, 0x02, 0xDE, 0x02, 0xDF, 0x02,
+ 0xE0, 0x02, 0xE1, 0x02, 0xE2, 0x02, 0xE3, 0x02, 0xE4, 0x02, 0xE5, 0x02, 0xE6, 0x02, 0xE7, 0x02,
+ 0xE8, 0x02, 0xE9, 0x02, 0xEA, 0x02, 0xEB, 0x02, 0xEC, 0x02, 0xED, 0x02, 0xEE, 0x02, 0xEF, 0x02,
+ 0xF0, 0x02, 0xF1, 0x02, 0xF2, 0x02, 0xF3, 0x02, 0xF4, 0x02, 0xF5, 0x02, 0xF6, 0x02, 0xF7, 0x02,
+ 0xF8, 0x02, 0xF9, 0x02, 0xFA, 0x02, 0xFB, 0x02, 0xFC, 0x02, 0xFD, 0x02, 0xFE, 0x02, 0xFF, 0x02,
+ 0x00, 0x03, 0x01, 0x03, 0x02, 0x03, 0x03, 0x03, 0x04, 0x03, 0x05, 0x03, 0x06, 0x03, 0x07, 0x03,
+ 0x08, 0x03, 0x09, 0x03, 0x0A, 0x03, 0x0B, 0x03, 0x0C, 0x03, 0x0D, 0x03, 0x0E, 0x03, 0x0F, 0x03,
+ 0x10, 0x03, 0x11, 0x03, 0x12, 0x03, 0x13, 0x03, 0x14, 0x03, 0x15, 0x03, 0x16, 0x03, 0x17, 0x03,
+ 0x18, 0x03, 0x19, 0x03, 0x1A, 0x03, 0x1B, 0x03, 0x1C, 0x03, 0x1D, 0x03, 0x1E, 0x03, 0x1F, 0x03,
+ 0x20, 0x03, 0x21, 0x03, 0x22, 0x03, 0x23, 0x03, 0x24, 0x03, 0x25, 0x03, 0x26, 0x03, 0x27, 0x03,
+ 0x28, 0x03, 0x29, 0x03, 0x2A, 0x03, 0x2B, 0x03, 0x2C, 0x03, 0x2D, 0x03, 0x2E, 0x03, 0x2F, 0x03,
+ 0x30, 0x03, 0x31, 0x03, 0x32, 0x03, 0x33, 0x03, 0x34, 0x03, 0x35, 0x03, 0x36, 0x03, 0x37, 0x03,
+ 0x38, 0x03, 0x39, 0x03, 0x3A, 0x03, 0x3B, 0x03, 0x3C, 0x03, 0x3D, 0x03, 0x3E, 0x03, 0x3F, 0x03,
+ 0x40, 0x03, 0x41, 0x03, 0x42, 0x03, 0x43, 0x03, 0x44, 0x03, 0x45, 0x03, 0x46, 0x03, 0x47, 0x03,
+ 0x48, 0x03, 0x49, 0x03, 0x4A, 0x03, 0x4B, 0x03, 0x4C, 0x03, 0x4D, 0x03, 0x4E, 0x03, 0x4F, 0x03,
+ 0x50, 0x03, 0x51, 0x03, 0x52, 0x03, 0x53, 0x03, 0x54, 0x03, 0x55, 0x03, 0x56, 0x03, 0x57, 0x03,
+ 0x58, 0x03, 0x59, 0x03, 0x5A, 0x03, 0x5B, 0x03, 0x5C, 0x03, 0x5D, 0x03, 0x5E, 0x03, 0x5F, 0x03,
+ 0x60, 0x03, 0x61, 0x03, 0x62, 0x03, 0x63, 0x03, 0x64, 0x03, 0x65, 0x03, 0x66, 0x03, 0x67, 0x03,
+ 0x68, 0x03, 0x69, 0x03, 0x6A, 0x03, 0x6B, 0x03, 0x6C, 0x03, 0x6D, 0x03, 0x6E, 0x03, 0x6F, 0x03,
+ 0x70, 0x03, 0x71, 0x03, 0x72, 0x03, 0x73, 0x03, 0x74, 0x03, 0x75, 0x03, 0x76, 0x03, 0x77, 0x03,
+ 0x78, 0x03, 0x79, 0x03, 0x7A, 0x03, 0xFD, 0x03, 0xFE, 0x03, 0xFF, 0x03, 0x7E, 0x03, 0x7F, 0x03,
+ 0x80, 0x03, 0x81, 0x03, 0x82, 0x03, 0x83, 0x03, 0x84, 0x03, 0x85, 0x03, 0x86, 0x03, 0x87, 0x03,
+ 0x88, 0x03, 0x89, 0x03, 0x8A, 0x03, 0x8B, 0x03, 0x8C, 0x03, 0x8D, 0x03, 0x8E, 0x03, 0x8F, 0x03,
+ 0x90, 0x03, 0x91, 0x03, 0x92, 0x03, 0x93, 0x03, 0x94, 0x03, 0x95, 0x03, 0x96, 0x03, 0x97, 0x03,
+ 0x98, 0x03, 0x99, 0x03, 0x9A, 0x03, 0x9B, 0x03, 0x9C, 0x03, 0x9D, 0x03, 0x9E, 0x03, 0x9F, 0x03,
+ 0xA0, 0x03, 0xA1, 0x03, 0xA2, 0x03, 0xA3, 0x03, 0xA4, 0x03, 0xA5, 0x03, 0xA6, 0x03, 0xA7, 0x03,
+ 0xA8, 0x03, 0xA9, 0x03, 0xAA, 0x03, 0xAB, 0x03, 0x86, 0x03, 0x88, 0x03, 0x89, 0x03, 0x8A, 0x03,
+ 0xB0, 0x03, 0x91, 0x03, 0x92, 0x03, 0x93, 0x03, 0x94, 0x03, 0x95, 0x03, 0x96, 0x03, 0x97, 0x03,
+ 0x98, 0x03, 0x99, 0x03, 0x9A, 0x03, 0x9B, 0x03, 0x9C, 0x03, 0x9D, 0x03, 0x9E, 0x03, 0x9F, 0x03,
+ 0xA0, 0x03, 0xA1, 0x03, 0xA3, 0x03, 0xA3, 0x03, 0xA4, 0x03, 0xA5, 0x03, 0xA6, 0x03, 0xA7, 0x03,
+ 0xA8, 0x03, 0xA9, 0x03, 0xAA, 0x03, 0xAB, 0x03, 0x8C, 0x03, 0x8E, 0x03, 0x8F, 0x03, 0xCF, 0x03,
+ 0xD0, 0x03, 0xD1, 0x03, 0xD2, 0x03, 0xD3, 0x03, 0xD4, 0x03, 0xD5, 0x03, 0xD6, 0x03, 0xD7, 0x03,
+ 0xD8, 0x03, 0xD8, 0x03, 0xDA, 0x03, 0xDA, 0x03, 0xDC, 0x03, 0xDC, 0x03, 0xDE, 0x03, 0xDE, 0x03,
+ 0xE0, 0x03, 0xE0, 0x03, 0xE2, 0x03, 0xE2, 0x03, 0xE4, 0x03, 0xE4, 0x03, 0xE6, 0x03, 0xE6, 0x03,
+ 0xE8, 0x03, 0xE8, 0x03, 0xEA, 0x03, 0xEA, 0x03, 0xEC, 0x03, 0xEC, 0x03, 0xEE, 0x03, 0xEE, 0x03,
+ 0xF0, 0x03, 0xF1, 0x03, 0xF9, 0x03, 0xF3, 0x03, 0xF4, 0x03, 0xF5, 0x03, 0xF6, 0x03, 0xF7, 0x03,
+ 0xF7, 0x03, 0xF9, 0x03, 0xFA, 0x03, 0xFA, 0x03, 0xFC, 0x03, 0xFD, 0x03, 0xFE, 0x03, 0xFF, 0x03,
+ 0x00, 0x04, 0x01, 0x04, 0x02, 0x04, 0x03, 0x04, 0x04, 0x04, 0x05, 0x04, 0x06, 0x04, 0x07, 0x04,
+ 0x08, 0x04, 0x09, 0x04, 0x0A, 0x04, 0x0B, 0x04, 0x0C, 0x04, 0x0D, 0x04, 0x0E, 0x04, 0x0F, 0x04,
+ 0x10, 0x04, 0x11, 0x04, 0x12, 0x04, 0x13, 0x04, 0x14, 0x04, 0x15, 0x04, 0x16, 0x04, 0x17, 0x04,
+ 0x18, 0x04, 0x19, 0x04, 0x1A, 0x04, 0x1B, 0x04, 0x1C, 0x04, 0x1D, 0x04, 0x1E, 0x04, 0x1F, 0x04,
+ 0x20, 0x04, 0x21, 0x04, 0x22, 0x04, 0x23, 0x04, 0x24, 0x04, 0x25, 0x04, 0x26, 0x04, 0x27, 0x04,
+ 0x28, 0x04, 0x29, 0x04, 0x2A, 0x04, 0x2B, 0x04, 0x2C, 0x04, 0x2D, 0x04, 0x2E, 0x04, 0x2F, 0x04,
+ 0x10, 0x04, 0x11, 0x04, 0x12, 0x04, 0x13, 0x04, 0x14, 0x04, 0x15, 0x04, 0x16, 0x04, 0x17, 0x04,
+ 0x18, 0x04, 0x19, 0x04, 0x1A, 0x04, 0x1B, 0x04, 0x1C, 0x04, 0x1D, 0x04, 0x1E, 0x04, 0x1F, 0x04,
+ 0x20, 0x04, 0x21, 0x04, 0x22, 0x04, 0x23, 0x04, 0x24, 0x04, 0x25, 0x04, 0x26, 0x04, 0x27, 0x04,
+ 0x28, 0x04, 0x29, 0x04, 0x2A, 0x04, 0x2B, 0x04, 0x2C, 0x04, 0x2D, 0x04, 0x2E, 0x04, 0x2F, 0x04,
+ 0x00, 0x04, 0x01, 0x04, 0x02, 0x04, 0x03, 0x04, 0x04, 0x04, 0x05, 0x04, 0x06, 0x04, 0x07, 0x04,
+ 0x08, 0x04, 0x09, 0x04, 0x0A, 0x04, 0x0B, 0x04, 0x0C, 0x04, 0x0D, 0x04, 0x0E, 0x04, 0x0F, 0x04,
+ 0x60, 0x04, 0x60, 0x04, 0x62, 0x04, 0x62, 0x04, 0x64, 0x04, 0x64, 0x04, 0x66, 0x04, 0x66, 0x04,
+ 0x68, 0x04, 0x68, 0x04, 0x6A, 0x04, 0x6A, 0x04, 0x6C, 0x04, 0x6C, 0x04, 0x6E, 0x04, 0x6E, 0x04,
+ 0x70, 0x04, 0x70, 0x04, 0x72, 0x04, 0x72, 0x04, 0x74, 0x04, 0x74, 0x04, 0x76, 0x04, 0x76, 0x04,
+ 0x78, 0x04, 0x78, 0x04, 0x7A, 0x04, 0x7A, 0x04, 0x7C, 0x04, 0x7C, 0x04, 0x7E, 0x04, 0x7E, 0x04,
+ 0x80, 0x04, 0x80, 0x04, 0x82, 0x04, 0x83, 0x04, 0x84, 0x04, 0x85, 0x04, 0x86, 0x04, 0x87, 0x04,
+ 0x88, 0x04, 0x89, 0x04, 0x8A, 0x04, 0x8A, 0x04, 0x8C, 0x04, 0x8C, 0x04, 0x8E, 0x04, 0x8E, 0x04,
+ 0x90, 0x04, 0x90, 0x04, 0x92, 0x04, 0x92, 0x04, 0x94, 0x04, 0x94, 0x04, 0x96, 0x04, 0x96, 0x04,
+ 0x98, 0x04, 0x98, 0x04, 0x9A, 0x04, 0x9A, 0x04, 0x9C, 0x04, 0x9C, 0x04, 0x9E, 0x04, 0x9E, 0x04,
+ 0xA0, 0x04, 0xA0, 0x04, 0xA2, 0x04, 0xA2, 0x04, 0xA4, 0x04, 0xA4, 0x04, 0xA6, 0x04, 0xA6, 0x04,
+ 0xA8, 0x04, 0xA8, 0x04, 0xAA, 0x04, 0xAA, 0x04, 0xAC, 0x04, 0xAC, 0x04, 0xAE, 0x04, 0xAE, 0x04,
+ 0xB0, 0x04, 0xB0, 0x04, 0xB2, 0x04, 0xB2, 0x04, 0xB4, 0x04, 0xB4, 0x04, 0xB6, 0x04, 0xB6, 0x04,
+ 0xB8, 0x04, 0xB8, 0x04, 0xBA, 0x04, 0xBA, 0x04, 0xBC, 0x04, 0xBC, 0x04, 0xBE, 0x04, 0xBE, 0x04,
+ 0xC0, 0x04, 0xC1, 0x04, 0xC1, 0x04, 0xC3, 0x04, 0xC3, 0x04, 0xC5, 0x04, 0xC5, 0x04, 0xC7, 0x04,
+ 0xC7, 0x04, 0xC9, 0x04, 0xC9, 0x04, 0xCB, 0x04, 0xCB, 0x04, 0xCD, 0x04, 0xCD, 0x04, 0xC0, 0x04,
+ 0xD0, 0x04, 0xD0, 0x04, 0xD2, 0x04, 0xD2, 0x04, 0xD4, 0x04, 0xD4, 0x04, 0xD6, 0x04, 0xD6, 0x04,
+ 0xD8, 0x04, 0xD8, 0x04, 0xDA, 0x04, 0xDA, 0x04, 0xDC, 0x04, 0xDC, 0x04, 0xDE, 0x04, 0xDE, 0x04,
+ 0xE0, 0x04, 0xE0, 0x04, 0xE2, 0x04, 0xE2, 0x04, 0xE4, 0x04, 0xE4, 0x04, 0xE6, 0x04, 0xE6, 0x04,
+ 0xE8, 0x04, 0xE8, 0x04, 0xEA, 0x04, 0xEA, 0x04, 0xEC, 0x04, 0xEC, 0x04, 0xEE, 0x04, 0xEE, 0x04,
+ 0xF0, 0x04, 0xF0, 0x04, 0xF2, 0x04, 0xF2, 0x04, 0xF4, 0x04, 0xF4, 0x04, 0xF6, 0x04, 0xF6, 0x04,
+ 0xF8, 0x04, 0xF8, 0x04, 0xFA, 0x04, 0xFA, 0x04, 0xFC, 0x04, 0xFC, 0x04, 0xFE, 0x04, 0xFE, 0x04,
+ 0x00, 0x05, 0x00, 0x05, 0x02, 0x05, 0x02, 0x05, 0x04, 0x05, 0x04, 0x05, 0x06, 0x05, 0x06, 0x05,
+ 0x08, 0x05, 0x08, 0x05, 0x0A, 0x05, 0x0A, 0x05, 0x0C, 0x05, 0x0C, 0x05, 0x0E, 0x05, 0x0E, 0x05,
+ 0x10, 0x05, 0x10, 0x05, 0x12, 0x05, 0x12, 0x05, 0x14, 0x05, 0x15, 0x05, 0x16, 0x05, 0x17, 0x05,
+ 0x18, 0x05, 0x19, 0x05, 0x1A, 0x05, 0x1B, 0x05, 0x1C, 0x05, 0x1D, 0x05, 0x1E, 0x05, 0x1F, 0x05,
+ 0x20, 0x05, 0x21, 0x05, 0x22, 0x05, 0x23, 0x05, 0x24, 0x05, 0x25, 0x05, 0x26, 0x05, 0x27, 0x05,
+ 0x28, 0x05, 0x29, 0x05, 0x2A, 0x05, 0x2B, 0x05, 0x2C, 0x05, 0x2D, 0x05, 0x2E, 0x05, 0x2F, 0x05,
+ 0x30, 0x05, 0x31, 0x05, 0x32, 0x05, 0x33, 0x05, 0x34, 0x05, 0x35, 0x05, 0x36, 0x05, 0x37, 0x05,
+ 0x38, 0x05, 0x39, 0x05, 0x3A, 0x05, 0x3B, 0x05, 0x3C, 0x05, 0x3D, 0x05, 0x3E, 0x05, 0x3F, 0x05,
+ 0x40, 0x05, 0x41, 0x05, 0x42, 0x05, 0x43, 0x05, 0x44, 0x05, 0x45, 0x05, 0x46, 0x05, 0x47, 0x05,
+ 0x48, 0x05, 0x49, 0x05, 0x4A, 0x05, 0x4B, 0x05, 0x4C, 0x05, 0x4D, 0x05, 0x4E, 0x05, 0x4F, 0x05,
+ 0x50, 0x05, 0x51, 0x05, 0x52, 0x05, 0x53, 0x05, 0x54, 0x05, 0x55, 0x05, 0x56, 0x05, 0x57, 0x05,
+ 0x58, 0x05, 0x59, 0x05, 0x5A, 0x05, 0x5B, 0x05, 0x5C, 0x05, 0x5D, 0x05, 0x5E, 0x05, 0x5F, 0x05,
+ 0x60, 0x05, 0x31, 0x05, 0x32, 0x05, 0x33, 0x05, 0x34, 0x05, 0x35, 0x05, 0x36, 0x05, 0x37, 0x05,
+ 0x38, 0x05, 0x39, 0x05, 0x3A, 0x05, 0x3B, 0x05, 0x3C, 0x05, 0x3D, 0x05, 0x3E, 0x05, 0x3F, 0x05,
+ 0x40, 0x05, 0x41, 0x05, 0x42, 0x05, 0x43, 0x05, 0x44, 0x05, 0x45, 0x05, 0x46, 0x05, 0x47, 0x05,
+ 0x48, 0x05, 0x49, 0x05, 0x4A, 0x05, 0x4B, 0x05, 0x4C, 0x05, 0x4D, 0x05, 0x4E, 0x05, 0x4F, 0x05,
+ 0x50, 0x05, 0x51, 0x05, 0x52, 0x05, 0x53, 0x05, 0x54, 0x05, 0x55, 0x05, 0x56, 0x05, 0xFF, 0xFF,
+ 0xF6, 0x17, 0x63, 0x2C, 0x7E, 0x1D, 0x7F, 0x1D, 0x80, 0x1D, 0x81, 0x1D, 0x82, 0x1D, 0x83, 0x1D,
+ 0x84, 0x1D, 0x85, 0x1D, 0x86, 0x1D, 0x87, 0x1D, 0x88, 0x1D, 0x89, 0x1D, 0x8A, 0x1D, 0x8B, 0x1D,
+ 0x8C, 0x1D, 0x8D, 0x1D, 0x8E, 0x1D, 0x8F, 0x1D, 0x90, 0x1D, 0x91, 0x1D, 0x92, 0x1D, 0x93, 0x1D,
+ 0x94, 0x1D, 0x95, 0x1D, 0x96, 0x1D, 0x97, 0x1D, 0x98, 0x1D, 0x99, 0x1D, 0x9A, 0x1D, 0x9B, 0x1D,
+ 0x9C, 0x1D, 0x9D, 0x1D, 0x9E, 0x1D, 0x9F, 0x1D, 0xA0, 0x1D, 0xA1, 0x1D, 0xA2, 0x1D, 0xA3, 0x1D,
+ 0xA4, 0x1D, 0xA5, 0x1D, 0xA6, 0x1D, 0xA7, 0x1D, 0xA8, 0x1D, 0xA9, 0x1D, 0xAA, 0x1D, 0xAB, 0x1D,
+ 0xAC, 0x1D, 0xAD, 0x1D, 0xAE, 0x1D, 0xAF, 0x1D, 0xB0, 0x1D, 0xB1, 0x1D, 0xB2, 0x1D, 0xB3, 0x1D,
+ 0xB4, 0x1D, 0xB5, 0x1D, 0xB6, 0x1D, 0xB7, 0x1D, 0xB8, 0x1D, 0xB9, 0x1D, 0xBA, 0x1D, 0xBB, 0x1D,
+ 0xBC, 0x1D, 0xBD, 0x1D, 0xBE, 0x1D, 0xBF, 0x1D, 0xC0, 0x1D, 0xC1, 0x1D, 0xC2, 0x1D, 0xC3, 0x1D,
+ 0xC4, 0x1D, 0xC5, 0x1D, 0xC6, 0x1D, 0xC7, 0x1D, 0xC8, 0x1D, 0xC9, 0x1D, 0xCA, 0x1D, 0xCB, 0x1D,
+ 0xCC, 0x1D, 0xCD, 0x1D, 0xCE, 0x1D, 0xCF, 0x1D, 0xD0, 0x1D, 0xD1, 0x1D, 0xD2, 0x1D, 0xD3, 0x1D,
+ 0xD4, 0x1D, 0xD5, 0x1D, 0xD6, 0x1D, 0xD7, 0x1D, 0xD8, 0x1D, 0xD9, 0x1D, 0xDA, 0x1D, 0xDB, 0x1D,
+ 0xDC, 0x1D, 0xDD, 0x1D, 0xDE, 0x1D, 0xDF, 0x1D, 0xE0, 0x1D, 0xE1, 0x1D, 0xE2, 0x1D, 0xE3, 0x1D,
+ 0xE4, 0x1D, 0xE5, 0x1D, 0xE6, 0x1D, 0xE7, 0x1D, 0xE8, 0x1D, 0xE9, 0x1D, 0xEA, 0x1D, 0xEB, 0x1D,
+ 0xEC, 0x1D, 0xED, 0x1D, 0xEE, 0x1D, 0xEF, 0x1D, 0xF0, 0x1D, 0xF1, 0x1D, 0xF2, 0x1D, 0xF3, 0x1D,
+ 0xF4, 0x1D, 0xF5, 0x1D, 0xF6, 0x1D, 0xF7, 0x1D, 0xF8, 0x1D, 0xF9, 0x1D, 0xFA, 0x1D, 0xFB, 0x1D,
+ 0xFC, 0x1D, 0xFD, 0x1D, 0xFE, 0x1D, 0xFF, 0x1D, 0x00, 0x1E, 0x00, 0x1E, 0x02, 0x1E, 0x02, 0x1E,
+ 0x04, 0x1E, 0x04, 0x1E, 0x06, 0x1E, 0x06, 0x1E, 0x08, 0x1E, 0x08, 0x1E, 0x0A, 0x1E, 0x0A, 0x1E,
+ 0x0C, 0x1E, 0x0C, 0x1E, 0x0E, 0x1E, 0x0E, 0x1E, 0x10, 0x1E, 0x10, 0x1E, 0x12, 0x1E, 0x12, 0x1E,
+ 0x14, 0x1E, 0x14, 0x1E, 0x16, 0x1E, 0x16, 0x1E, 0x18, 0x1E, 0x18, 0x1E, 0x1A, 0x1E, 0x1A, 0x1E,
+ 0x1C, 0x1E, 0x1C, 0x1E, 0x1E, 0x1E, 0x1E, 0x1E, 0x20, 0x1E, 0x20, 0x1E, 0x22, 0x1E, 0x22, 0x1E,
+ 0x24, 0x1E, 0x24, 0x1E, 0x26, 0x1E, 0x26, 0x1E, 0x28, 0x1E, 0x28, 0x1E, 0x2A, 0x1E, 0x2A, 0x1E,
+ 0x2C, 0x1E, 0x2C, 0x1E, 0x2E, 0x1E, 0x2E, 0x1E, 0x30, 0x1E, 0x30, 0x1E, 0x32, 0x1E, 0x32, 0x1E,
+ 0x34, 0x1E, 0x34, 0x1E, 0x36, 0x1E, 0x36, 0x1E, 0x38, 0x1E, 0x38, 0x1E, 0x3A, 0x1E, 0x3A, 0x1E,
+ 0x3C, 0x1E, 0x3C, 0x1E, 0x3E, 0x1E, 0x3E, 0x1E, 0x40, 0x1E, 0x40, 0x1E, 0x42, 0x1E, 0x42, 0x1E,
+ 0x44, 0x1E, 0x44, 0x1E, 0x46, 0x1E, 0x46, 0x1E, 0x48, 0x1E, 0x48, 0x1E, 0x4A, 0x1E, 0x4A, 0x1E,
+ 0x4C, 0x1E, 0x4C, 0x1E, 0x4E, 0x1E, 0x4E, 0x1E, 0x50, 0x1E, 0x50, 0x1E, 0x52, 0x1E, 0x52, 0x1E,
+ 0x54, 0x1E, 0x54, 0x1E, 0x56, 0x1E, 0x56, 0x1E, 0x58, 0x1E, 0x58, 0x1E, 0x5A, 0x1E, 0x5A, 0x1E,
+ 0x5C, 0x1E, 0x5C, 0x1E, 0x5E, 0x1E, 0x5E, 0x1E, 0x60, 0x1E, 0x60, 0x1E, 0x62, 0x1E, 0x62, 0x1E,
+ 0x64, 0x1E, 0x64, 0x1E, 0x66, 0x1E, 0x66, 0x1E, 0x68, 0x1E, 0x68, 0x1E, 0x6A, 0x1E, 0x6A, 0x1E,
+ 0x6C, 0x1E, 0x6C, 0x1E, 0x6E, 0x1E, 0x6E, 0x1E, 0x70, 0x1E, 0x70, 0x1E, 0x72, 0x1E, 0x72, 0x1E,
+ 0x74, 0x1E, 0x74, 0x1E, 0x76, 0x1E, 0x76, 0x1E, 0x78, 0x1E, 0x78, 0x1E, 0x7A, 0x1E, 0x7A, 0x1E,
+ 0x7C, 0x1E, 0x7C, 0x1E, 0x7E, 0x1E, 0x7E, 0x1E, 0x80, 0x1E, 0x80, 0x1E, 0x82, 0x1E, 0x82, 0x1E,
+ 0x84, 0x1E, 0x84, 0x1E, 0x86, 0x1E, 0x86, 0x1E, 0x88, 0x1E, 0x88, 0x1E, 0x8A, 0x1E, 0x8A, 0x1E,
+ 0x8C, 0x1E, 0x8C, 0x1E, 0x8E, 0x1E, 0x8E, 0x1E, 0x90, 0x1E, 0x90, 0x1E, 0x92, 0x1E, 0x92, 0x1E,
+ 0x94, 0x1E, 0x94, 0x1E, 0x96, 0x1E, 0x97, 0x1E, 0x98, 0x1E, 0x99, 0x1E, 0x9A, 0x1E, 0x9B, 0x1E,
+ 0x9C, 0x1E, 0x9D, 0x1E, 0x9E, 0x1E, 0x9F, 0x1E, 0xA0, 0x1E, 0xA0, 0x1E, 0xA2, 0x1E, 0xA2, 0x1E,
+ 0xA4, 0x1E, 0xA4, 0x1E, 0xA6, 0x1E, 0xA6, 0x1E, 0xA8, 0x1E, 0xA8, 0x1E, 0xAA, 0x1E, 0xAA, 0x1E,
+ 0xAC, 0x1E, 0xAC, 0x1E, 0xAE, 0x1E, 0xAE, 0x1E, 0xB0, 0x1E, 0xB0, 0x1E, 0xB2, 0x1E, 0xB2, 0x1E,
+ 0xB4, 0x1E, 0xB4, 0x1E, 0xB6, 0x1E, 0xB6, 0x1E, 0xB8, 0x1E, 0xB8, 0x1E, 0xBA, 0x1E, 0xBA, 0x1E,
+ 0xBC, 0x1E, 0xBC, 0x1E, 0xBE, 0x1E, 0xBE, 0x1E, 0xC0, 0x1E, 0xC0, 0x1E, 0xC2, 0x1E, 0xC2, 0x1E,
+ 0xC4, 0x1E, 0xC4, 0x1E, 0xC6, 0x1E, 0xC6, 0x1E, 0xC8, 0x1E, 0xC8, 0x1E, 0xCA, 0x1E, 0xCA, 0x1E,
+ 0xCC, 0x1E, 0xCC, 0x1E, 0xCE, 0x1E, 0xCE, 0x1E, 0xD0, 0x1E, 0xD0, 0x1E, 0xD2, 0x1E, 0xD2, 0x1E,
+ 0xD4, 0x1E, 0xD4, 0x1E, 0xD6, 0x1E, 0xD6, 0x1E, 0xD8, 0x1E, 0xD8, 0x1E, 0xDA, 0x1E, 0xDA, 0x1E,
+ 0xDC, 0x1E, 0xDC, 0x1E, 0xDE, 0x1E, 0xDE, 0x1E, 0xE0, 0x1E, 0xE0, 0x1E, 0xE2, 0x1E, 0xE2, 0x1E,
+ 0xE4, 0x1E, 0xE4, 0x1E, 0xE6, 0x1E, 0xE6, 0x1E, 0xE8, 0x1E, 0xE8, 0x1E, 0xEA, 0x1E, 0xEA, 0x1E,
+ 0xEC, 0x1E, 0xEC, 0x1E, 0xEE, 0x1E, 0xEE, 0x1E, 0xF0, 0x1E, 0xF0, 0x1E, 0xF2, 0x1E, 0xF2, 0x1E,
+ 0xF4, 0x1E, 0xF4, 0x1E, 0xF6, 0x1E, 0xF6, 0x1E, 0xF8, 0x1E, 0xF8, 0x1E, 0xFA, 0x1E, 0xFB, 0x1E,
+ 0xFC, 0x1E, 0xFD, 0x1E, 0xFE, 0x1E, 0xFF, 0x1E, 0x08, 0x1F, 0x09, 0x1F, 0x0A, 0x1F, 0x0B, 0x1F,
+ 0x0C, 0x1F, 0x0D, 0x1F, 0x0E, 0x1F, 0x0F, 0x1F, 0x08, 0x1F, 0x09, 0x1F, 0x0A, 0x1F, 0x0B, 0x1F,
+ 0x0C, 0x1F, 0x0D, 0x1F, 0x0E, 0x1F, 0x0F, 0x1F, 0x18, 0x1F, 0x19, 0x1F, 0x1A, 0x1F, 0x1B, 0x1F,
+ 0x1C, 0x1F, 0x1D, 0x1F, 0x16, 0x1F, 0x17, 0x1F, 0x18, 0x1F, 0x19, 0x1F, 0x1A, 0x1F, 0x1B, 0x1F,
+ 0x1C, 0x1F, 0x1D, 0x1F, 0x1E, 0x1F, 0x1F, 0x1F, 0x28, 0x1F, 0x29, 0x1F, 0x2A, 0x1F, 0x2B, 0x1F,
+ 0x2C, 0x1F, 0x2D, 0x1F, 0x2E, 0x1F, 0x2F, 0x1F, 0x28, 0x1F, 0x29, 0x1F, 0x2A, 0x1F, 0x2B, 0x1F,
+ 0x2C, 0x1F, 0x2D, 0x1F, 0x2E, 0x1F, 0x2F, 0x1F, 0x38, 0x1F, 0x39, 0x1F, 0x3A, 0x1F, 0x3B, 0x1F,
+ 0x3C, 0x1F, 0x3D, 0x1F, 0x3E, 0x1F, 0x3F, 0x1F, 0x38, 0x1F, 0x39, 0x1F, 0x3A, 0x1F, 0x3B, 0x1F,
+ 0x3C, 0x1F, 0x3D, 0x1F, 0x3E, 0x1F, 0x3F, 0x1F, 0x48, 0x1F, 0x49, 0x1F, 0x4A, 0x1F, 0x4B, 0x1F,
+ 0x4C, 0x1F, 0x4D, 0x1F, 0x46, 0x1F, 0x47, 0x1F, 0x48, 0x1F, 0x49, 0x1F, 0x4A, 0x1F, 0x4B, 0x1F,
+ 0x4C, 0x1F, 0x4D, 0x1F, 0x4E, 0x1F, 0x4F, 0x1F, 0x50, 0x1F, 0x59, 0x1F, 0x52, 0x1F, 0x5B, 0x1F,
+ 0x54, 0x1F, 0x5D, 0x1F, 0x56, 0x1F, 0x5F, 0x1F, 0x58, 0x1F, 0x59, 0x1F, 0x5A, 0x1F, 0x5B, 0x1F,
+ 0x5C, 0x1F, 0x5D, 0x1F, 0x5E, 0x1F, 0x5F, 0x1F, 0x68, 0x1F, 0x69, 0x1F, 0x6A, 0x1F, 0x6B, 0x1F,
+ 0x6C, 0x1F, 0x6D, 0x1F, 0x6E, 0x1F, 0x6F, 0x1F, 0x68, 0x1F, 0x69, 0x1F, 0x6A, 0x1F, 0x6B, 0x1F,
+ 0x6C, 0x1F, 0x6D, 0x1F, 0x6E, 0x1F, 0x6F, 0x1F, 0xBA, 0x1F, 0xBB, 0x1F, 0xC8, 0x1F, 0xC9, 0x1F,
+ 0xCA, 0x1F, 0xCB, 0x1F, 0xDA, 0x1F, 0xDB, 0x1F, 0xF8, 0x1F, 0xF9, 0x1F, 0xEA, 0x1F, 0xEB, 0x1F,
+ 0xFA, 0x1F, 0xFB, 0x1F, 0x7E, 0x1F, 0x7F, 0x1F, 0x88, 0x1F, 0x89, 0x1F, 0x8A, 0x1F, 0x8B, 0x1F,
+ 0x8C, 0x1F, 0x8D, 0x1F, 0x8E, 0x1F, 0x8F, 0x1F, 0x88, 0x1F, 0x89, 0x1F, 0x8A, 0x1F, 0x8B, 0x1F,
+ 0x8C, 0x1F, 0x8D, 0x1F, 0x8E, 0x1F, 0x8F, 0x1F, 0x98, 0x1F, 0x99, 0x1F, 0x9A, 0x1F, 0x9B, 0x1F,
+ 0x9C, 0x1F, 0x9D, 0x1F, 0x9E, 0x1F, 0x9F, 0x1F, 0x98, 0x1F, 0x99, 0x1F, 0x9A, 0x1F, 0x9B, 0x1F,
+ 0x9C, 0x1F, 0x9D, 0x1F, 0x9E, 0x1F, 0x9F, 0x1F, 0xA8, 0x1F, 0xA9, 0x1F, 0xAA, 0x1F, 0xAB, 0x1F,
+ 0xAC, 0x1F, 0xAD, 0x1F, 0xAE, 0x1F, 0xAF, 0x1F, 0xA8, 0x1F, 0xA9, 0x1F, 0xAA, 0x1F, 0xAB, 0x1F,
+ 0xAC, 0x1F, 0xAD, 0x1F, 0xAE, 0x1F, 0xAF, 0x1F, 0xB8, 0x1F, 0xB9, 0x1F, 0xB2, 0x1F, 0xBC, 0x1F,
+ 0xB4, 0x1F, 0xB5, 0x1F, 0xB6, 0x1F, 0xB7, 0x1F, 0xB8, 0x1F, 0xB9, 0x1F, 0xBA, 0x1F, 0xBB, 0x1F,
+ 0xBC, 0x1F, 0xBD, 0x1F, 0xBE, 0x1F, 0xBF, 0x1F, 0xC0, 0x1F, 0xC1, 0x1F, 0xC2, 0x1F, 0xC3, 0x1F,
+ 0xC4, 0x1F, 0xC5, 0x1F, 0xC6, 0x1F, 0xC7, 0x1F, 0xC8, 0x1F, 0xC9, 0x1F, 0xCA, 0x1F, 0xCB, 0x1F,
+ 0xC3, 0x1F, 0xCD, 0x1F, 0xCE, 0x1F, 0xCF, 0x1F, 0xD8, 0x1F, 0xD9, 0x1F, 0xD2, 0x1F, 0xD3, 0x1F,
+ 0xD4, 0x1F, 0xD5, 0x1F, 0xD6, 0x1F, 0xD7, 0x1F, 0xD8, 0x1F, 0xD9, 0x1F, 0xDA, 0x1F, 0xDB, 0x1F,
+ 0xDC, 0x1F, 0xDD, 0x1F, 0xDE, 0x1F, 0xDF, 0x1F, 0xE8, 0x1F, 0xE9, 0x1F, 0xE2, 0x1F, 0xE3, 0x1F,
+ 0xE4, 0x1F, 0xEC, 0x1F, 0xE6, 0x1F, 0xE7, 0x1F, 0xE8, 0x1F, 0xE9, 0x1F, 0xEA, 0x1F, 0xEB, 0x1F,
+ 0xEC, 0x1F, 0xED, 0x1F, 0xEE, 0x1F, 0xEF, 0x1F, 0xF0, 0x1F, 0xF1, 0x1F, 0xF2, 0x1F, 0xF3, 0x1F,
+ 0xF4, 0x1F, 0xF5, 0x1F, 0xF6, 0x1F, 0xF7, 0x1F, 0xF8, 0x1F, 0xF9, 0x1F, 0xFA, 0x1F, 0xFB, 0x1F,
+ 0xF3, 0x1F, 0xFD, 0x1F, 0xFE, 0x1F, 0xFF, 0x1F, 0x00, 0x20, 0x01, 0x20, 0x02, 0x20, 0x03, 0x20,
+ 0x04, 0x20, 0x05, 0x20, 0x06, 0x20, 0x07, 0x20, 0x08, 0x20, 0x09, 0x20, 0x0A, 0x20, 0x0B, 0x20,
+ 0x0C, 0x20, 0x0D, 0x20, 0x0E, 0x20, 0x0F, 0x20, 0x10, 0x20, 0x11, 0x20, 0x12, 0x20, 0x13, 0x20,
+ 0x14, 0x20, 0x15, 0x20, 0x16, 0x20, 0x17, 0x20, 0x18, 0x20, 0x19, 0x20, 0x1A, 0x20, 0x1B, 0x20,
+ 0x1C, 0x20, 0x1D, 0x20, 0x1E, 0x20, 0x1F, 0x20, 0x20, 0x20, 0x21, 0x20, 0x22, 0x20, 0x23, 0x20,
+ 0x24, 0x20, 0x25, 0x20, 0x26, 0x20, 0x27, 0x20, 0x28, 0x20, 0x29, 0x20, 0x2A, 0x20, 0x2B, 0x20,
+ 0x2C, 0x20, 0x2D, 0x20, 0x2E, 0x20, 0x2F, 0x20, 0x30, 0x20, 0x31, 0x20, 0x32, 0x20, 0x33, 0x20,
+ 0x34, 0x20, 0x35, 0x20, 0x36, 0x20, 0x37, 0x20, 0x38, 0x20, 0x39, 0x20, 0x3A, 0x20, 0x3B, 0x20,
+ 0x3C, 0x20, 0x3D, 0x20, 0x3E, 0x20, 0x3F, 0x20, 0x40, 0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20,
+ 0x44, 0x20, 0x45, 0x20, 0x46, 0x20, 0x47, 0x20, 0x48, 0x20, 0x49, 0x20, 0x4A, 0x20, 0x4B, 0x20,
+ 0x4C, 0x20, 0x4D, 0x20, 0x4E, 0x20, 0x4F, 0x20, 0x50, 0x20, 0x51, 0x20, 0x52, 0x20, 0x53, 0x20,
+ 0x54, 0x20, 0x55, 0x20, 0x56, 0x20, 0x57, 0x20, 0x58, 0x20, 0x59, 0x20, 0x5A, 0x20, 0x5B, 0x20,
+ 0x5C, 0x20, 0x5D, 0x20, 0x5E, 0x20, 0x5F, 0x20, 0x60, 0x20, 0x61, 0x20, 0x62, 0x20, 0x63, 0x20,
+ 0x64, 0x20, 0x65, 0x20, 0x66, 0x20, 0x67, 0x20, 0x68, 0x20, 0x69, 0x20, 0x6A, 0x20, 0x6B, 0x20,
+ 0x6C, 0x20, 0x6D, 0x20, 0x6E, 0x20, 0x6F, 0x20, 0x70, 0x20, 0x71, 0x20, 0x72, 0x20, 0x73, 0x20,
+ 0x74, 0x20, 0x75, 0x20, 0x76, 0x20, 0x77, 0x20, 0x78, 0x20, 0x79, 0x20, 0x7A, 0x20, 0x7B, 0x20,
+ 0x7C, 0x20, 0x7D, 0x20, 0x7E, 0x20, 0x7F, 0x20, 0x80, 0x20, 0x81, 0x20, 0x82, 0x20, 0x83, 0x20,
+ 0x84, 0x20, 0x85, 0x20, 0x86, 0x20, 0x87, 0x20, 0x88, 0x20, 0x89, 0x20, 0x8A, 0x20, 0x8B, 0x20,
+ 0x8C, 0x20, 0x8D, 0x20, 0x8E, 0x20, 0x8F, 0x20, 0x90, 0x20, 0x91, 0x20, 0x92, 0x20, 0x93, 0x20,
+ 0x94, 0x20, 0x95, 0x20, 0x96, 0x20, 0x97, 0x20, 0x98, 0x20, 0x99, 0x20, 0x9A, 0x20, 0x9B, 0x20,
+ 0x9C, 0x20, 0x9D, 0x20, 0x9E, 0x20, 0x9F, 0x20, 0xA0, 0x20, 0xA1, 0x20, 0xA2, 0x20, 0xA3, 0x20,
+ 0xA4, 0x20, 0xA5, 0x20, 0xA6, 0x20, 0xA7, 0x20, 0xA8, 0x20, 0xA9, 0x20, 0xAA, 0x20, 0xAB, 0x20,
+ 0xAC, 0x20, 0xAD, 0x20, 0xAE, 0x20, 0xAF, 0x20, 0xB0, 0x20, 0xB1, 0x20, 0xB2, 0x20, 0xB3, 0x20,
+ 0xB4, 0x20, 0xB5, 0x20, 0xB6, 0x20, 0xB7, 0x20, 0xB8, 0x20, 0xB9, 0x20, 0xBA, 0x20, 0xBB, 0x20,
+ 0xBC, 0x20, 0xBD, 0x20, 0xBE, 0x20, 0xBF, 0x20, 0xC0, 0x20, 0xC1, 0x20, 0xC2, 0x20, 0xC3, 0x20,
+ 0xC4, 0x20, 0xC5, 0x20, 0xC6, 0x20, 0xC7, 0x20, 0xC8, 0x20, 0xC9, 0x20, 0xCA, 0x20, 0xCB, 0x20,
+ 0xCC, 0x20, 0xCD, 0x20, 0xCE, 0x20, 0xCF, 0x20, 0xD0, 0x20, 0xD1, 0x20, 0xD2, 0x20, 0xD3, 0x20,
+ 0xD4, 0x20, 0xD5, 0x20, 0xD6, 0x20, 0xD7, 0x20, 0xD8, 0x20, 0xD9, 0x20, 0xDA, 0x20, 0xDB, 0x20,
+ 0xDC, 0x20, 0xDD, 0x20, 0xDE, 0x20, 0xDF, 0x20, 0xE0, 0x20, 0xE1, 0x20, 0xE2, 0x20, 0xE3, 0x20,
+ 0xE4, 0x20, 0xE5, 0x20, 0xE6, 0x20, 0xE7, 0x20, 0xE8, 0x20, 0xE9, 0x20, 0xEA, 0x20, 0xEB, 0x20,
+ 0xEC, 0x20, 0xED, 0x20, 0xEE, 0x20, 0xEF, 0x20, 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20,
+ 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20, 0xF8, 0x20, 0xF9, 0x20, 0xFA, 0x20, 0xFB, 0x20,
+ 0xFC, 0x20, 0xFD, 0x20, 0xFE, 0x20, 0xFF, 0x20, 0x00, 0x21, 0x01, 0x21, 0x02, 0x21, 0x03, 0x21,
+ 0x04, 0x21, 0x05, 0x21, 0x06, 0x21, 0x07, 0x21, 0x08, 0x21, 0x09, 0x21, 0x0A, 0x21, 0x0B, 0x21,
+ 0x0C, 0x21, 0x0D, 0x21, 0x0E, 0x21, 0x0F, 0x21, 0x10, 0x21, 0x11, 0x21, 0x12, 0x21, 0x13, 0x21,
+ 0x14, 0x21, 0x15, 0x21, 0x16, 0x21, 0x17, 0x21, 0x18, 0x21, 0x19, 0x21, 0x1A, 0x21, 0x1B, 0x21,
+ 0x1C, 0x21, 0x1D, 0x21, 0x1E, 0x21, 0x1F, 0x21, 0x20, 0x21, 0x21, 0x21, 0x22, 0x21, 0x23, 0x21,
+ 0x24, 0x21, 0x25, 0x21, 0x26, 0x21, 0x27, 0x21, 0x28, 0x21, 0x29, 0x21, 0x2A, 0x21, 0x2B, 0x21,
+ 0x2C, 0x21, 0x2D, 0x21, 0x2E, 0x21, 0x2F, 0x21, 0x30, 0x21, 0x31, 0x21, 0x32, 0x21, 0x33, 0x21,
+ 0x34, 0x21, 0x35, 0x21, 0x36, 0x21, 0x37, 0x21, 0x38, 0x21, 0x39, 0x21, 0x3A, 0x21, 0x3B, 0x21,
+ 0x3C, 0x21, 0x3D, 0x21, 0x3E, 0x21, 0x3F, 0x21, 0x40, 0x21, 0x41, 0x21, 0x42, 0x21, 0x43, 0x21,
+ 0x44, 0x21, 0x45, 0x21, 0x46, 0x21, 0x47, 0x21, 0x48, 0x21, 0x49, 0x21, 0x4A, 0x21, 0x4B, 0x21,
+ 0x4C, 0x21, 0x4D, 0x21, 0x32, 0x21, 0x4F, 0x21, 0x50, 0x21, 0x51, 0x21, 0x52, 0x21, 0x53, 0x21,
+ 0x54, 0x21, 0x55, 0x21, 0x56, 0x21, 0x57, 0x21, 0x58, 0x21, 0x59, 0x21, 0x5A, 0x21, 0x5B, 0x21,
+ 0x5C, 0x21, 0x5D, 0x21, 0x5E, 0x21, 0x5F, 0x21, 0x60, 0x21, 0x61, 0x21, 0x62, 0x21, 0x63, 0x21,
+ 0x64, 0x21, 0x65, 0x21, 0x66, 0x21, 0x67, 0x21, 0x68, 0x21, 0x69, 0x21, 0x6A, 0x21, 0x6B, 0x21,
+ 0x6C, 0x21, 0x6D, 0x21, 0x6E, 0x21, 0x6F, 0x21, 0x60, 0x21, 0x61, 0x21, 0x62, 0x21, 0x63, 0x21,
+ 0x64, 0x21, 0x65, 0x21, 0x66, 0x21, 0x67, 0x21, 0x68, 0x21, 0x69, 0x21, 0x6A, 0x21, 0x6B, 0x21,
+ 0x6C, 0x21, 0x6D, 0x21, 0x6E, 0x21, 0x6F, 0x21, 0x80, 0x21, 0x81, 0x21, 0x82, 0x21, 0x83, 0x21,
+ 0x83, 0x21, 0xFF, 0xFF, 0x4B, 0x03, 0xB6, 0x24, 0xB7, 0x24, 0xB8, 0x24, 0xB9, 0x24, 0xBA, 0x24,
+ 0xBB, 0x24, 0xBC, 0x24, 0xBD, 0x24, 0xBE, 0x24, 0xBF, 0x24, 0xC0, 0x24, 0xC1, 0x24, 0xC2, 0x24,
+ 0xC3, 0x24, 0xC4, 0x24, 0xC5, 0x24, 0xC6, 0x24, 0xC7, 0x24, 0xC8, 0x24, 0xC9, 0x24, 0xCA, 0x24,
+ 0xCB, 0x24, 0xCC, 0x24, 0xCD, 0x24, 0xCE, 0x24, 0xCF, 0x24, 0xFF, 0xFF, 0x46, 0x07, 0x00, 0x2C,
+ 0x01, 0x2C, 0x02, 0x2C, 0x03, 0x2C, 0x04, 0x2C, 0x05, 0x2C, 0x06, 0x2C, 0x07, 0x2C, 0x08, 0x2C,
+ 0x09, 0x2C, 0x0A, 0x2C, 0x0B, 0x2C, 0x0C, 0x2C, 0x0D, 0x2C, 0x0E, 0x2C, 0x0F, 0x2C, 0x10, 0x2C,
+ 0x11, 0x2C, 0x12, 0x2C, 0x13, 0x2C, 0x14, 0x2C, 0x15, 0x2C, 0x16, 0x2C, 0x17, 0x2C, 0x18, 0x2C,
+ 0x19, 0x2C, 0x1A, 0x2C, 0x1B, 0x2C, 0x1C, 0x2C, 0x1D, 0x2C, 0x1E, 0x2C, 0x1F, 0x2C, 0x20, 0x2C,
+ 0x21, 0x2C, 0x22, 0x2C, 0x23, 0x2C, 0x24, 0x2C, 0x25, 0x2C, 0x26, 0x2C, 0x27, 0x2C, 0x28, 0x2C,
+ 0x29, 0x2C, 0x2A, 0x2C, 0x2B, 0x2C, 0x2C, 0x2C, 0x2D, 0x2C, 0x2E, 0x2C, 0x5F, 0x2C, 0x60, 0x2C,
+ 0x60, 0x2C, 0x62, 0x2C, 0x63, 0x2C, 0x64, 0x2C, 0x65, 0x2C, 0x66, 0x2C, 0x67, 0x2C, 0x67, 0x2C,
+ 0x69, 0x2C, 0x69, 0x2C, 0x6B, 0x2C, 0x6B, 0x2C, 0x6D, 0x2C, 0x6E, 0x2C, 0x6F, 0x2C, 0x70, 0x2C,
+ 0x71, 0x2C, 0x72, 0x2C, 0x73, 0x2C, 0x74, 0x2C, 0x75, 0x2C, 0x75, 0x2C, 0x77, 0x2C, 0x78, 0x2C,
+ 0x79, 0x2C, 0x7A, 0x2C, 0x7B, 0x2C, 0x7C, 0x2C, 0x7D, 0x2C, 0x7E, 0x2C, 0x7F, 0x2C, 0x80, 0x2C,
+ 0x80, 0x2C, 0x82, 0x2C, 0x82, 0x2C, 0x84, 0x2C, 0x84, 0x2C, 0x86, 0x2C, 0x86, 0x2C, 0x88, 0x2C,
+ 0x88, 0x2C, 0x8A, 0x2C, 0x8A, 0x2C, 0x8C, 0x2C, 0x8C, 0x2C, 0x8E, 0x2C, 0x8E, 0x2C, 0x90, 0x2C,
+ 0x90, 0x2C, 0x92, 0x2C, 0x92, 0x2C, 0x94, 0x2C, 0x94, 0x2C, 0x96, 0x2C, 0x96, 0x2C, 0x98, 0x2C,
+ 0x98, 0x2C, 0x9A, 0x2C, 0x9A, 0x2C, 0x9C, 0x2C, 0x9C, 0x2C, 0x9E, 0x2C, 0x9E, 0x2C, 0xA0, 0x2C,
+ 0xA0, 0x2C, 0xA2, 0x2C, 0xA2, 0x2C, 0xA4, 0x2C, 0xA4, 0x2C, 0xA6, 0x2C, 0xA6, 0x2C, 0xA8, 0x2C,
+ 0xA8, 0x2C, 0xAA, 0x2C, 0xAA, 0x2C, 0xAC, 0x2C, 0xAC, 0x2C, 0xAE, 0x2C, 0xAE, 0x2C, 0xB0, 0x2C,
+ 0xB0, 0x2C, 0xB2, 0x2C, 0xB2, 0x2C, 0xB4, 0x2C, 0xB4, 0x2C, 0xB6, 0x2C, 0xB6, 0x2C, 0xB8, 0x2C,
+ 0xB8, 0x2C, 0xBA, 0x2C, 0xBA, 0x2C, 0xBC, 0x2C, 0xBC, 0x2C, 0xBE, 0x2C, 0xBE, 0x2C, 0xC0, 0x2C,
+ 0xC0, 0x2C, 0xC2, 0x2C, 0xC2, 0x2C, 0xC4, 0x2C, 0xC4, 0x2C, 0xC6, 0x2C, 0xC6, 0x2C, 0xC8, 0x2C,
+ 0xC8, 0x2C, 0xCA, 0x2C, 0xCA, 0x2C, 0xCC, 0x2C, 0xCC, 0x2C, 0xCE, 0x2C, 0xCE, 0x2C, 0xD0, 0x2C,
+ 0xD0, 0x2C, 0xD2, 0x2C, 0xD2, 0x2C, 0xD4, 0x2C, 0xD4, 0x2C, 0xD6, 0x2C, 0xD6, 0x2C, 0xD8, 0x2C,
+ 0xD8, 0x2C, 0xDA, 0x2C, 0xDA, 0x2C, 0xDC, 0x2C, 0xDC, 0x2C, 0xDE, 0x2C, 0xDE, 0x2C, 0xE0, 0x2C,
+ 0xE0, 0x2C, 0xE2, 0x2C, 0xE2, 0x2C, 0xE4, 0x2C, 0xE5, 0x2C, 0xE6, 0x2C, 0xE7, 0x2C, 0xE8, 0x2C,
+ 0xE9, 0x2C, 0xEA, 0x2C, 0xEB, 0x2C, 0xEC, 0x2C, 0xED, 0x2C, 0xEE, 0x2C, 0xEF, 0x2C, 0xF0, 0x2C,
+ 0xF1, 0x2C, 0xF2, 0x2C, 0xF3, 0x2C, 0xF4, 0x2C, 0xF5, 0x2C, 0xF6, 0x2C, 0xF7, 0x2C, 0xF8, 0x2C,
+ 0xF9, 0x2C, 0xFA, 0x2C, 0xFB, 0x2C, 0xFC, 0x2C, 0xFD, 0x2C, 0xFE, 0x2C, 0xFF, 0x2C, 0xA0, 0x10,
+ 0xA1, 0x10, 0xA2, 0x10, 0xA3, 0x10, 0xA4, 0x10, 0xA5, 0x10, 0xA6, 0x10, 0xA7, 0x10, 0xA8, 0x10,
+ 0xA9, 0x10, 0xAA, 0x10, 0xAB, 0x10, 0xAC, 0x10, 0xAD, 0x10, 0xAE, 0x10, 0xAF, 0x10, 0xB0, 0x10,
+ 0xB1, 0x10, 0xB2, 0x10, 0xB3, 0x10, 0xB4, 0x10, 0xB5, 0x10, 0xB6, 0x10, 0xB7, 0x10, 0xB8, 0x10,
+ 0xB9, 0x10, 0xBA, 0x10, 0xBB, 0x10, 0xBC, 0x10, 0xBD, 0x10, 0xBE, 0x10, 0xBF, 0x10, 0xC0, 0x10,
+ 0xC1, 0x10, 0xC2, 0x10, 0xC3, 0x10, 0xC4, 0x10, 0xC5, 0x10, 0xFF, 0xFF, 0x1B, 0xD2, 0x21, 0xFF,
+ 0x22, 0xFF, 0x23, 0xFF, 0x24, 0xFF, 0x25, 0xFF, 0x26, 0xFF, 0x27, 0xFF, 0x28, 0xFF, 0x29, 0xFF,
+ 0x2A, 0xFF, 0x2B, 0xFF, 0x2C, 0xFF, 0x2D, 0xFF, 0x2E, 0xFF, 0x2F, 0xFF, 0x30, 0xFF, 0x31, 0xFF,
+ 0x32, 0xFF, 0x33, 0xFF, 0x34, 0xFF, 0x35, 0xFF, 0x36, 0xFF, 0x37, 0xFF, 0x38, 0xFF, 0x39, 0xFF,
+ 0x3A, 0xFF, 0x5B, 0xFF, 0x5C, 0xFF, 0x5D, 0xFF, 0x5E, 0xFF, 0x5F, 0xFF, 0x60, 0xFF, 0x61, 0xFF,
+ 0x62, 0xFF, 0x63, 0xFF, 0x64, 0xFF, 0x65, 0xFF, 0x66, 0xFF, 0x67, 0xFF, 0x68, 0xFF, 0x69, 0xFF,
+ 0x6A, 0xFF, 0x6B, 0xFF, 0x6C, 0xFF, 0x6D, 0xFF, 0x6E, 0xFF, 0x6F, 0xFF, 0x70, 0xFF, 0x71, 0xFF,
+ 0x72, 0xFF, 0x73, 0xFF, 0x74, 0xFF, 0x75, 0xFF, 0x76, 0xFF, 0x77, 0xFF, 0x78, 0xFF, 0x79, 0xFF,
+ 0x7A, 0xFF, 0x7B, 0xFF, 0x7C, 0xFF, 0x7D, 0xFF, 0x7E, 0xFF, 0x7F, 0xFF, 0x80, 0xFF, 0x81, 0xFF,
+ 0x82, 0xFF, 0x83, 0xFF, 0x84, 0xFF, 0x85, 0xFF, 0x86, 0xFF, 0x87, 0xFF, 0x88, 0xFF, 0x89, 0xFF,
+ 0x8A, 0xFF, 0x8B, 0xFF, 0x8C, 0xFF, 0x8D, 0xFF, 0x8E, 0xFF, 0x8F, 0xFF, 0x90, 0xFF, 0x91, 0xFF,
+ 0x92, 0xFF, 0x93, 0xFF, 0x94, 0xFF, 0x95, 0xFF, 0x96, 0xFF, 0x97, 0xFF, 0x98, 0xFF, 0x99, 0xFF,
+ 0x9A, 0xFF, 0x9B, 0xFF, 0x9C, 0xFF, 0x9D, 0xFF, 0x9E, 0xFF, 0x9F, 0xFF, 0xA0, 0xFF, 0xA1, 0xFF,
+ 0xA2, 0xFF, 0xA3, 0xFF, 0xA4, 0xFF, 0xA5, 0xFF, 0xA6, 0xFF, 0xA7, 0xFF, 0xA8, 0xFF, 0xA9, 0xFF,
+ 0xAA, 0xFF, 0xAB, 0xFF, 0xAC, 0xFF, 0xAD, 0xFF, 0xAE, 0xFF, 0xAF, 0xFF, 0xB0, 0xFF, 0xB1, 0xFF,
+ 0xB2, 0xFF, 0xB3, 0xFF, 0xB4, 0xFF, 0xB5, 0xFF, 0xB6, 0xFF, 0xB7, 0xFF, 0xB8, 0xFF, 0xB9, 0xFF,
+ 0xBA, 0xFF, 0xBB, 0xFF, 0xBC, 0xFF, 0xBD, 0xFF, 0xBE, 0xFF, 0xBF, 0xFF, 0xC0, 0xFF, 0xC1, 0xFF,
+ 0xC2, 0xFF, 0xC3, 0xFF, 0xC4, 0xFF, 0xC5, 0xFF, 0xC6, 0xFF, 0xC7, 0xFF, 0xC8, 0xFF, 0xC9, 0xFF,
+ 0xCA, 0xFF, 0xCB, 0xFF, 0xCC, 0xFF, 0xCD, 0xFF, 0xCE, 0xFF, 0xCF, 0xFF, 0xD0, 0xFF, 0xD1, 0xFF,
+ 0xD2, 0xFF, 0xD3, 0xFF, 0xD4, 0xFF, 0xD5, 0xFF, 0xD6, 0xFF, 0xD7, 0xFF, 0xD8, 0xFF, 0xD9, 0xFF,
+ 0xDA, 0xFF, 0xDB, 0xFF, 0xDC, 0xFF, 0xDD, 0xFF, 0xDE, 0xFF, 0xDF, 0xFF, 0xE0, 0xFF, 0xE1, 0xFF,
+ 0xE2, 0xFF, 0xE3, 0xFF, 0xE4, 0xFF, 0xE5, 0xFF, 0xE6, 0xFF, 0xE7, 0xFF, 0xE8, 0xFF, 0xE9, 0xFF,
+ 0xEA, 0xFF, 0xEB, 0xFF, 0xEC, 0xFF, 0xED, 0xFF, 0xEE, 0xFF, 0xEF, 0xFF, 0xF0, 0xFF, 0xF1, 0xFF,
+ 0xF2, 0xFF, 0xF3, 0xFF, 0xF4, 0xFF, 0xF5, 0xFF, 0xF6, 0xFF, 0xF7, 0xFF, 0xF8, 0xFF, 0xF9, 0xFF,
+ 0xFA, 0xFF, 0xFB, 0xFF, 0xFC, 0xFF, 0xFD, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF
+};
+
+#endif /* _UPCASE_H */
diff --git a/fs/sdfat/version.h b/fs/sdfat/version.h
new file mode 100644
index 000000000000..44e44e03d847
--- /dev/null
+++ b/fs/sdfat/version.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : version.h */
+/* PURPOSE : sdFAT File Manager */
+/* */
+/************************************************************************/
+#define SDFAT_VERSION "2.3.0-lineage"
diff --git a/fs/sdfat/xattr.c b/fs/sdfat/xattr.c
new file mode 100644
index 000000000000..40bb850711be
--- /dev/null
+++ b/fs/sdfat/xattr.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : xattr.c */
+/* PURPOSE : sdFAT code for supporting xattr(Extended File Attributes) */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/xattr.h>
+#include <linux/dcache.h>
+#include "sdfat.h"
+
+#ifndef CONFIG_SDFAT_VIRTUAL_XATTR_SELINUX_LABEL
+#define CONFIG_SDFAT_VIRTUAL_XATTR_SELINUX_LABEL ("undefined")
+#endif
+
+static const char default_xattr[] = CONFIG_SDFAT_VIRTUAL_XATTR_SELINUX_LABEL;
+
+static int can_support(const char *name)
+{
+ if (!name || strcmp(name, "security.selinux"))
+ return -1;
+ return 0;
+}
+
+ssize_t sdfat_listxattr(struct dentry *dentry, char *list, size_t size)
+{
+ return 0;
+}
+
+
+/*************************************************************************
+ * INNER FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
+ *************************************************************************/
+static int __sdfat_xattr_check_support(const char *name)
+{
+ if (can_support(name))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+ssize_t __sdfat_getxattr(const char *name, void *value, size_t size)
+{
+ if (can_support(name))
+ return -EOPNOTSUPP;
+
+ if ((size > strlen(default_xattr)+1) && value)
+ strcpy(value, default_xattr);
+
+ return strlen(default_xattr);
+}
+
+
+/*************************************************************************
+ * FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
+ *************************************************************************/
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
+static int sdfat_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, void *buffer, size_t size)
+{
+ return __sdfat_getxattr(name, buffer, size);
+}
+
+static int sdfat_xattr_set(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, const void *value, size_t size,
+ int flags)
+{
+ return __sdfat_xattr_check_support(name);
+}
+
+const struct xattr_handler sdfat_xattr_handler = {
+ .prefix = "", /* match anything */
+ .get = sdfat_xattr_get,
+ .set = sdfat_xattr_set,
+};
+
+const struct xattr_handler *sdfat_xattr_handlers[] = {
+ &sdfat_xattr_handler,
+ NULL
+};
+
+void setup_sdfat_xattr_handler(struct super_block *sb)
+{
+ sb->s_xattr = sdfat_xattr_handlers;
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) */
+int sdfat_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags)
+{
+ return __sdfat_xattr_check_support(name);
+}
+
+ssize_t sdfat_getxattr(struct dentry *dentry, const char *name, void *value, size_t size)
+{
+ return __sdfat_getxattr(name, value, size);
+}
+
+int sdfat_removexattr(struct dentry *dentry, const char *name)
+{
+ return __sdfat_xattr_check_support(name);
+}
+
+void setup_sdfat_xattr_handler(struct super_block *sb)
+{
+ /* DO NOTHING */
+}
+#endif
diff --git a/fs/select.c b/fs/select.c
index 3d38808dbcb6..f7667f2c6d0d 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -239,7 +239,8 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
set_current_state(state);
if (!pwq->triggered)
- rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
+ rc = freezable_schedule_hrtimeout_range(expires, slack,
+ HRTIMER_MODE_ABS);
__set_current_state(TASK_RUNNING);
/*
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 95e730506ad2..0004df800c88 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -683,11 +683,11 @@ EXPORT_SYMBOL(seq_puts);
/*
* A helper routine for putting decimal numbers without rich format of printf().
* only 'unsigned long long' is supported.
- * This routine will put one byte delimiter + number into seq_file.
+ * This routine will put strlen(delimiter) + number into seq_file.
* This routine is very quick when you show lots of numbers.
* In usual cases, it will be better to use seq_printf(). It's easier to read.
*/
-void seq_put_decimal_ull(struct seq_file *m, char delimiter,
+void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
unsigned long long num)
{
int len;
@@ -695,8 +695,15 @@ void seq_put_decimal_ull(struct seq_file *m, char delimiter,
if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */
goto overflow;
- if (delimiter)
- m->buf[m->count++] = delimiter;
+ len = strlen(delimiter);
+ if (m->count + len >= m->size)
+ goto overflow;
+
+ memcpy(m->buf + m->count, delimiter, len);
+ m->count += len;
+
+ if (m->count + 1 >= m->size)
+ goto overflow;
if (num < 10) {
m->buf[m->count++] = num + '0';
@@ -706,6 +713,7 @@ void seq_put_decimal_ull(struct seq_file *m, char delimiter,
len = num_to_str(m->buf + m->count, m->size - m->count, num);
if (!len)
goto overflow;
+
m->count += len;
return;
@@ -714,19 +722,42 @@ overflow:
}
EXPORT_SYMBOL(seq_put_decimal_ull);
-void seq_put_decimal_ll(struct seq_file *m, char delimiter, long long num)
+void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num)
{
+ int len;
+
+ if (m->count + 3 >= m->size) /* we'll write 2 bytes at least */
+ goto overflow;
+
+ len = strlen(delimiter);
+ if (m->count + len >= m->size)
+ goto overflow;
+
+ memcpy(m->buf + m->count, delimiter, len);
+ m->count += len;
+
+ if (m->count + 2 >= m->size)
+ goto overflow;
+
if (num < 0) {
- if (m->count + 3 >= m->size) {
- seq_set_overflow(m);
- return;
- }
- if (delimiter)
- m->buf[m->count++] = delimiter;
+ m->buf[m->count++] = '-';
num = -num;
- delimiter = '-';
}
- seq_put_decimal_ull(m, delimiter, num);
+
+ if (num < 10) {
+ m->buf[m->count++] = num + '0';
+ return;
+ }
+
+ len = num_to_str(m->buf + m->count, m->size - m->count, num);
+ if (!len)
+ goto overflow;
+
+ m->count += len;
+ return;
+
+overflow:
+ seq_set_overflow(m);
}
EXPORT_SYMBOL(seq_put_decimal_ll);
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index 6dd158a216f4..ffb093e72b6c 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -26,6 +26,34 @@ config SQUASHFS
If unsure, say N.
choice
+ prompt "File decompression options"
+ depends on SQUASHFS
+ help
+ Squashfs now supports two options for decompressing file
+ data. Traditionally Squashfs has decompressed into an
+ intermediate buffer and then memcopied it into the page cache.
+ Squashfs now supports the ability to decompress directly into
+ the page cache.
+
+ If unsure, select "Decompress file data into an intermediate buffer"
+
+config SQUASHFS_FILE_CACHE
+ bool "Decompress file data into an intermediate buffer"
+ help
+ Decompress file data into an intermediate buffer and then
+ memcopy it into the page cache.
+
+config SQUASHFS_FILE_DIRECT
+ bool "Decompress files directly into the page cache"
+ help
+ Directly decompress file data into the page cache.
+ Doing so can significantly improve performance because
+ it eliminates a memcpy and it also removes the lock contention
+ on the single buffer.
+
+endchoice
+
+choice
prompt "Decompressor parallelisation options"
depends on SQUASHFS
help
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index fe51f1507ed1..246a6f329d89 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -5,7 +5,8 @@
obj-$(CONFIG_SQUASHFS) += squashfs.o
squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
squashfs-y += namei.o super.o symlink.o decompressor.o
-squashfs-y += file_direct.o page_actor.o
+squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o
+squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index b3b95e2ae2ff..82bc942fc437 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -28,12 +28,9 @@
#include <linux/fs.h>
#include <linux/vfs.h>
-#include <linux/bio.h>
#include <linux/slab.h>
#include <linux/string.h>
-#include <linux/pagemap.h>
#include <linux/buffer_head.h>
-#include <linux/workqueue.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@@ -41,381 +38,45 @@
#include "decompressor.h"
#include "page_actor.h"
-static struct workqueue_struct *squashfs_read_wq;
-
-struct squashfs_read_request {
- struct super_block *sb;
- u64 index;
- int length;
- int compressed;
- int offset;
- u64 read_end;
- struct squashfs_page_actor *output;
- enum {
- SQUASHFS_COPY,
- SQUASHFS_DECOMPRESS,
- SQUASHFS_METADATA,
- } data_processing;
- bool synchronous;
-
- /*
- * If the read is synchronous, it is possible to retrieve information
- * about the request by setting these pointers.
- */
- int *res;
- int *bytes_read;
- int *bytes_uncompressed;
-
- int nr_buffers;
- struct buffer_head **bh;
- struct work_struct offload;
-};
-
-struct squashfs_bio_request {
- struct buffer_head **bh;
- int nr_buffers;
-};
-
-static int squashfs_bio_submit(struct squashfs_read_request *req);
-
-int squashfs_init_read_wq(void)
-{
- squashfs_read_wq = create_workqueue("SquashFS read wq");
- return !!squashfs_read_wq;
-}
-
-void squashfs_destroy_read_wq(void)
-{
- flush_workqueue(squashfs_read_wq);
- destroy_workqueue(squashfs_read_wq);
-}
-
-static void free_read_request(struct squashfs_read_request *req, int error)
-{
- if (!req->synchronous)
- squashfs_page_actor_free(req->output, error);
- if (req->res)
- *(req->res) = error;
- kfree(req->bh);
- kfree(req);
-}
-
-static void squashfs_process_blocks(struct squashfs_read_request *req)
-{
- int error = 0;
- int bytes, i, length;
- struct squashfs_sb_info *msblk = req->sb->s_fs_info;
- struct squashfs_page_actor *actor = req->output;
- struct buffer_head **bh = req->bh;
- int nr_buffers = req->nr_buffers;
-
- for (i = 0; i < nr_buffers; ++i) {
- if (!bh[i])
- continue;
- wait_on_buffer(bh[i]);
- if (!buffer_uptodate(bh[i]))
- error = -EIO;
- }
- if (error)
- goto cleanup;
-
- if (req->data_processing == SQUASHFS_METADATA) {
- /* Extract the length of the metadata block */
- if (req->offset != msblk->devblksize - 1) {
- length = le16_to_cpup((__le16 *)
- (bh[0]->b_data + req->offset));
- } else {
- length = (unsigned char)bh[0]->b_data[req->offset];
- length |= (unsigned char)bh[1]->b_data[0] << 8;
- }
- req->compressed = SQUASHFS_COMPRESSED(length);
- req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS
- : SQUASHFS_COPY;
- length = SQUASHFS_COMPRESSED_SIZE(length);
- if (req->index + length + 2 > req->read_end) {
- for (i = 0; i < nr_buffers; ++i)
- put_bh(bh[i]);
- kfree(bh);
- req->length = length;
- req->index += 2;
- squashfs_bio_submit(req);
- return;
- }
- req->length = length;
- req->offset = (req->offset + 2) % PAGE_SIZE;
- if (req->offset < 2) {
- put_bh(bh[0]);
- ++bh;
- --nr_buffers;
- }
- }
- if (req->bytes_read)
- *(req->bytes_read) = req->length;
-
- if (req->data_processing == SQUASHFS_COPY) {
- squashfs_bh_to_actor(bh, nr_buffers, req->output, req->offset,
- req->length, msblk->devblksize);
- } else if (req->data_processing == SQUASHFS_DECOMPRESS) {
- req->length = squashfs_decompress(msblk, bh, nr_buffers,
- req->offset, req->length, actor);
- if (req->length < 0) {
- error = -EIO;
- goto cleanup;
- }
- }
-
- /* Last page may have trailing bytes not filled */
- bytes = req->length % PAGE_SIZE;
- if (bytes && actor->page[actor->pages - 1])
- zero_user_segment(actor->page[actor->pages - 1], bytes,
- PAGE_SIZE);
-
-cleanup:
- if (req->bytes_uncompressed)
- *(req->bytes_uncompressed) = req->length;
- if (error) {
- for (i = 0; i < nr_buffers; ++i)
- if (bh[i])
- put_bh(bh[i]);
- }
- free_read_request(req, error);
-}
-
-static void read_wq_handler(struct work_struct *work)
-{
- squashfs_process_blocks(container_of(work,
- struct squashfs_read_request, offload));
-}
-
-static void squashfs_bio_end_io(struct bio *bio)
-{
- int i;
- int error = bio->bi_error;
- struct squashfs_bio_request *bio_req = bio->bi_private;
-
- bio_put(bio);
-
- for (i = 0; i < bio_req->nr_buffers; ++i) {
- if (!bio_req->bh[i])
- continue;
- if (!error)
- set_buffer_uptodate(bio_req->bh[i]);
- else
- clear_buffer_uptodate(bio_req->bh[i]);
- unlock_buffer(bio_req->bh[i]);
- }
- kfree(bio_req);
-}
-
-static int bh_is_optional(struct squashfs_read_request *req, int idx)
-{
- int start_idx, end_idx;
- struct squashfs_sb_info *msblk = req->sb->s_fs_info;
-
- start_idx = (idx * msblk->devblksize - req->offset) >> PAGE_SHIFT;
- end_idx = ((idx + 1) * msblk->devblksize - req->offset + 1) >> PAGE_SHIFT;
- if (start_idx >= req->output->pages)
- return 1;
- if (start_idx < 0)
- start_idx = end_idx;
- if (end_idx >= req->output->pages)
- end_idx = start_idx;
- return !req->output->page[start_idx] && !req->output->page[end_idx];
-}
-
-static int actor_getblks(struct squashfs_read_request *req, u64 block)
-{
- int i;
-
- req->bh = kmalloc_array(req->nr_buffers, sizeof(*(req->bh)), GFP_NOIO);
- if (!req->bh)
- return -ENOMEM;
-
- for (i = 0; i < req->nr_buffers; ++i) {
- /*
- * When dealing with an uncompressed block, the actor may
- * contains NULL pages. There's no need to read the buffers
- * associated with these pages.
- */
- if (!req->compressed && bh_is_optional(req, i)) {
- req->bh[i] = NULL;
- continue;
- }
- req->bh[i] = sb_getblk(req->sb, block + i);
- if (!req->bh[i]) {
- while (--i) {
- if (req->bh[i])
- put_bh(req->bh[i]);
- }
- return -1;
- }
- }
- return 0;
-}
-
-static int squashfs_bio_submit(struct squashfs_read_request *req)
+/*
+ * Read the metadata block length, this is stored in the first two
+ * bytes of the metadata block.
+ */
+static struct buffer_head *get_block_length(struct super_block *sb,
+ u64 *cur_index, int *offset, int *length)
{
- struct bio *bio = NULL;
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
struct buffer_head *bh;
- struct squashfs_bio_request *bio_req = NULL;
- int b = 0, prev_block = 0;
- struct squashfs_sb_info *msblk = req->sb->s_fs_info;
-
- u64 read_start = round_down(req->index, msblk->devblksize);
- u64 read_end = round_up(req->index + req->length, msblk->devblksize);
- sector_t block = read_start >> msblk->devblksize_log2;
- sector_t block_end = read_end >> msblk->devblksize_log2;
- int offset = read_start - round_down(req->index, PAGE_SIZE);
- int nr_buffers = block_end - block;
- int blksz = msblk->devblksize;
- int bio_max_pages = nr_buffers > BIO_MAX_PAGES ? BIO_MAX_PAGES
- : nr_buffers;
- /* Setup the request */
- req->read_end = read_end;
- req->offset = req->index - read_start;
- req->nr_buffers = nr_buffers;
- if (actor_getblks(req, block) < 0)
- goto getblk_failed;
-
- /* Create and submit the BIOs */
- for (b = 0; b < nr_buffers; ++b, offset += blksz) {
- bh = req->bh[b];
- if (!bh || !trylock_buffer(bh))
- continue;
- if (buffer_uptodate(bh)) {
- unlock_buffer(bh);
- continue;
+ bh = sb_bread(sb, *cur_index);
+ if (bh == NULL)
+ return NULL;
+
+ if (msblk->devblksize - *offset == 1) {
+ *length = (unsigned char) bh->b_data[*offset];
+ put_bh(bh);
+ bh = sb_bread(sb, ++(*cur_index));
+ if (bh == NULL)
+ return NULL;
+ *length |= (unsigned char) bh->b_data[0] << 8;
+ *offset = 1;
+ } else {
+ *length = (unsigned char) bh->b_data[*offset] |
+ (unsigned char) bh->b_data[*offset + 1] << 8;
+ *offset += 2;
+
+ if (*offset == msblk->devblksize) {
+ put_bh(bh);
+ bh = sb_bread(sb, ++(*cur_index));
+ if (bh == NULL)
+ return NULL;
+ *offset = 0;
}
- offset %= PAGE_SIZE;
-
- /* Append the buffer to the current BIO if it is contiguous */
- if (bio && bio_req && prev_block + 1 == b) {
- if (bio_add_page(bio, bh->b_page, blksz, offset)) {
- bio_req->nr_buffers += 1;
- prev_block = b;
- continue;
- }
- }
-
- /* Otherwise, submit the current BIO and create a new one */
- if (bio)
- submit_bio(READ, bio);
- bio_req = kcalloc(1, sizeof(struct squashfs_bio_request),
- GFP_NOIO);
- if (!bio_req)
- goto req_alloc_failed;
- bio_req->bh = &req->bh[b];
- bio = bio_alloc(GFP_NOIO, bio_max_pages);
- if (!bio)
- goto bio_alloc_failed;
- bio->bi_bdev = req->sb->s_bdev;
- bio->bi_iter.bi_sector = (block + b)
- << (msblk->devblksize_log2 - 9);
- bio->bi_private = bio_req;
- bio->bi_end_io = squashfs_bio_end_io;
-
- bio_add_page(bio, bh->b_page, blksz, offset);
- bio_req->nr_buffers += 1;
- prev_block = b;
}
- if (bio)
- submit_bio(READ, bio);
- if (req->synchronous)
- squashfs_process_blocks(req);
- else {
- INIT_WORK(&req->offload, read_wq_handler);
- schedule_work(&req->offload);
- }
- return 0;
-
-bio_alloc_failed:
- kfree(bio_req);
-req_alloc_failed:
- unlock_buffer(bh);
- while (--nr_buffers >= b)
- if (req->bh[nr_buffers])
- put_bh(req->bh[nr_buffers]);
- while (--b >= 0)
- if (req->bh[b])
- wait_on_buffer(req->bh[b]);
-getblk_failed:
- free_read_request(req, -ENOMEM);
- return -ENOMEM;
+ return bh;
}
-static int read_metadata_block(struct squashfs_read_request *req,
- u64 *next_index)
-{
- int ret, error, bytes_read = 0, bytes_uncompressed = 0;
- struct squashfs_sb_info *msblk = req->sb->s_fs_info;
-
- if (req->index + 2 > msblk->bytes_used) {
- free_read_request(req, -EINVAL);
- return -EINVAL;
- }
- req->length = 2;
-
- /* Do not read beyond the end of the device */
- if (req->index + req->length > msblk->bytes_used)
- req->length = msblk->bytes_used - req->index;
- req->data_processing = SQUASHFS_METADATA;
-
- /*
- * Reading metadata is always synchronous because we don't know the
- * length in advance and the function is expected to update
- * 'next_index' and return the length.
- */
- req->synchronous = true;
- req->res = &error;
- req->bytes_read = &bytes_read;
- req->bytes_uncompressed = &bytes_uncompressed;
-
- TRACE("Metadata block @ 0x%llx, %scompressed size %d, src size %d\n",
- req->index, req->compressed ? "" : "un", bytes_read,
- req->output->length);
-
- ret = squashfs_bio_submit(req);
- if (ret)
- return ret;
- if (error)
- return error;
- if (next_index)
- *next_index += 2 + bytes_read;
- return bytes_uncompressed;
-}
-
-static int read_data_block(struct squashfs_read_request *req, int length,
- u64 *next_index, bool synchronous)
-{
- int ret, error = 0, bytes_uncompressed = 0, bytes_read = 0;
-
- req->compressed = SQUASHFS_COMPRESSED_BLOCK(length);
- req->length = length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
- req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS
- : SQUASHFS_COPY;
-
- req->synchronous = synchronous;
- if (synchronous) {
- req->res = &error;
- req->bytes_read = &bytes_read;
- req->bytes_uncompressed = &bytes_uncompressed;
- }
-
- TRACE("Data block @ 0x%llx, %scompressed size %d, src size %d\n",
- req->index, req->compressed ? "" : "un", req->length,
- req->output->length);
-
- ret = squashfs_bio_submit(req);
- if (ret)
- return ret;
- if (synchronous)
- ret = error ? error : bytes_uncompressed;
- if (next_index)
- *next_index += length;
- return ret;
-}
/*
* Read and decompress a metadata block or datablock. Length is non-zero
@@ -426,50 +87,130 @@ static int read_data_block(struct squashfs_read_request *req, int length,
* generated a larger block - this does occasionally happen with compression
* algorithms).
*/
-static int __squashfs_read_data(struct super_block *sb, u64 index, int length,
- u64 *next_index, struct squashfs_page_actor *output, bool sync)
+int squashfs_read_data(struct super_block *sb, u64 index, int length,
+ u64 *next_index, struct squashfs_page_actor *output)
{
- struct squashfs_read_request *req;
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ struct buffer_head **bh;
+ int offset = index & ((1 << msblk->devblksize_log2) - 1);
+ u64 cur_index = index >> msblk->devblksize_log2;
+ int bytes, compressed, b = 0, k = 0, avail, i;
- req = kcalloc(1, sizeof(struct squashfs_read_request), GFP_KERNEL);
- if (!req) {
- if (!sync)
- squashfs_page_actor_free(output, -ENOMEM);
+ bh = kcalloc(((output->length + msblk->devblksize - 1)
+ >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
+ if (bh == NULL)
return -ENOMEM;
- }
- req->sb = sb;
- req->index = index;
- req->output = output;
+ if (length) {
+ /*
+ * Datablock.
+ */
+ bytes = -offset;
+ compressed = SQUASHFS_COMPRESSED_BLOCK(length);
+ length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
+ if (next_index)
+ *next_index = index + length;
+
+ TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
+ index, compressed ? "" : "un", length, output->length);
+
+ if (length < 0 || length > output->length ||
+ (index + length) > msblk->bytes_used)
+ goto read_failure;
+
+ for (b = 0; bytes < length; b++, cur_index++) {
+ bh[b] = sb_getblk(sb, cur_index);
+ if (bh[b] == NULL)
+ goto block_release;
+ bytes += msblk->devblksize;
+ }
+ ll_rw_block(READ, b, bh);
+ } else {
+ /*
+ * Metadata block.
+ */
+ if ((index + 2) > msblk->bytes_used)
+ goto read_failure;
+
+ bh[0] = get_block_length(sb, &cur_index, &offset, &length);
+ if (bh[0] == NULL)
+ goto read_failure;
+ b = 1;
+
+ bytes = msblk->devblksize - offset;
+ compressed = SQUASHFS_COMPRESSED(length);
+ length = SQUASHFS_COMPRESSED_SIZE(length);
+ if (next_index)
+ *next_index = index + length + 2;
- if (next_index)
- *next_index = index;
+ TRACE("Block @ 0x%llx, %scompressed size %d\n", index,
+ compressed ? "" : "un", length);
- if (length)
- length = read_data_block(req, length, next_index, sync);
- else
- length = read_metadata_block(req, next_index);
+ if (length < 0 || length > output->length ||
+ (index + length) > msblk->bytes_used)
+ goto block_release;
- if (length < 0) {
- ERROR("squashfs_read_data failed to read block 0x%llx\n",
- (unsigned long long)index);
- return -EIO;
+ for (; bytes < length; b++) {
+ bh[b] = sb_getblk(sb, ++cur_index);
+ if (bh[b] == NULL)
+ goto block_release;
+ bytes += msblk->devblksize;
+ }
+ ll_rw_block(READ, b - 1, bh + 1);
}
- return length;
-}
+ for (i = 0; i < b; i++) {
+ wait_on_buffer(bh[i]);
+ if (!buffer_uptodate(bh[i]))
+ goto block_release;
+ }
-int squashfs_read_data(struct super_block *sb, u64 index, int length,
- u64 *next_index, struct squashfs_page_actor *output)
-{
- return __squashfs_read_data(sb, index, length, next_index, output,
- true);
-}
+ if (compressed) {
+ if (!msblk->stream)
+ goto read_failure;
+ length = squashfs_decompress(msblk, bh, b, offset, length,
+ output);
+ if (length < 0)
+ goto read_failure;
+ } else {
+ /*
+ * Block is uncompressed.
+ */
+ int in, pg_offset = 0;
+ void *data = squashfs_first_page(output);
+
+ for (bytes = length; k < b; k++) {
+ in = min(bytes, msblk->devblksize - offset);
+ bytes -= in;
+ while (in) {
+ if (pg_offset == PAGE_CACHE_SIZE) {
+ data = squashfs_next_page(output);
+ pg_offset = 0;
+ }
+ avail = min_t(int, in, PAGE_CACHE_SIZE -
+ pg_offset);
+ memcpy(data + pg_offset, bh[k]->b_data + offset,
+ avail);
+ in -= avail;
+ pg_offset += avail;
+ offset += avail;
+ }
+ offset = 0;
+ put_bh(bh[k]);
+ }
+ squashfs_finish_page(output);
+ }
-int squashfs_read_data_async(struct super_block *sb, u64 index, int length,
- u64 *next_index, struct squashfs_page_actor *output)
-{
+ kfree(bh);
+ return length;
+
+block_release:
+ for (; k < b; k++)
+ put_bh(bh[k]);
- return __squashfs_read_data(sb, index, length, next_index, output,
- false);
+read_failure:
+ ERROR("squashfs_read_data failed to read block 0x%llx\n",
+ (unsigned long long) index);
+ kfree(bh);
+ return -EIO;
}
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 7d78c3d28bbd..91ce49c05b7c 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -209,14 +209,17 @@ void squashfs_cache_put(struct squashfs_cache_entry *entry)
*/
void squashfs_cache_delete(struct squashfs_cache *cache)
{
- int i;
+ int i, j;
if (cache == NULL)
return;
for (i = 0; i < cache->entries; i++) {
- if (cache->entry[i].page)
- free_page_array(cache->entry[i].page, cache->pages);
+ if (cache->entry[i].data) {
+ for (j = 0; j < cache->pages; j++)
+ kfree(cache->entry[i].data[j]);
+ kfree(cache->entry[i].data);
+ }
kfree(cache->entry[i].actor);
}
@@ -233,7 +236,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache)
struct squashfs_cache *squashfs_cache_init(char *name, int entries,
int block_size)
{
- int i;
+ int i, j;
struct squashfs_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL);
if (cache == NULL) {
@@ -265,13 +268,22 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries,
init_waitqueue_head(&cache->entry[i].wait_queue);
entry->cache = cache;
entry->block = SQUASHFS_INVALID_BLK;
- entry->page = alloc_page_array(cache->pages, GFP_KERNEL);
- if (!entry->page) {
+ entry->data = kcalloc(cache->pages, sizeof(void *), GFP_KERNEL);
+ if (entry->data == NULL) {
ERROR("Failed to allocate %s cache entry\n", name);
goto cleanup;
}
- entry->actor = squashfs_page_actor_init(entry->page,
- cache->pages, 0, NULL);
+
+ for (j = 0; j < cache->pages; j++) {
+ entry->data[j] = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+ if (entry->data[j] == NULL) {
+ ERROR("Failed to allocate %s buffer\n", name);
+ goto cleanup;
+ }
+ }
+
+ entry->actor = squashfs_page_actor_init(entry->data,
+ cache->pages, 0);
if (entry->actor == NULL) {
ERROR("Failed to allocate %s cache entry\n", name);
goto cleanup;
@@ -302,20 +314,18 @@ int squashfs_copy_data(void *buffer, struct squashfs_cache_entry *entry,
return min(length, entry->length - offset);
while (offset < entry->length) {
- void *buff = kmap_atomic(entry->page[offset / PAGE_CACHE_SIZE])
- + (offset % PAGE_CACHE_SIZE);
+ void *buff = entry->data[offset / PAGE_CACHE_SIZE]
+ + (offset % PAGE_CACHE_SIZE);
int bytes = min_t(int, entry->length - offset,
PAGE_CACHE_SIZE - (offset % PAGE_CACHE_SIZE));
if (bytes >= remaining) {
memcpy(buffer, buff, remaining);
- kunmap_atomic(buff);
remaining = 0;
break;
}
memcpy(buffer, buff, bytes);
- kunmap_atomic(buff);
buffer += bytes;
remaining -= bytes;
offset += bytes;
@@ -409,38 +419,43 @@ struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *sb,
void *squashfs_read_table(struct super_block *sb, u64 block, int length)
{
int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- struct page **page;
- void *buff;
- int res;
+ int i, res;
+ void *table, *buffer, **data;
struct squashfs_page_actor *actor;
- page = alloc_page_array(pages, GFP_KERNEL);
- if (!page)
+ table = buffer = kmalloc(length, GFP_KERNEL);
+ if (table == NULL)
return ERR_PTR(-ENOMEM);
- actor = squashfs_page_actor_init(page, pages, length, NULL);
- if (actor == NULL) {
+ data = kcalloc(pages, sizeof(void *), GFP_KERNEL);
+ if (data == NULL) {
res = -ENOMEM;
goto failed;
}
+ actor = squashfs_page_actor_init(data, pages, length);
+ if (actor == NULL) {
+ res = -ENOMEM;
+ goto failed2;
+ }
+
+ for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
+ data[i] = buffer;
+
res = squashfs_read_data(sb, block, length |
SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor);
+ kfree(data);
+ kfree(actor);
+
if (res < 0)
- goto failed2;
+ goto failed;
- buff = kmalloc(length, GFP_KERNEL);
- if (!buff)
- goto failed2;
- squashfs_actor_to_buf(actor, buff, length);
- squashfs_page_actor_free(actor, 0);
- free_page_array(page, pages);
- return buff;
+ return table;
failed2:
- squashfs_page_actor_free(actor, 0);
+ kfree(data);
failed:
- free_page_array(page, pages);
+ kfree(table);
return ERR_PTR(res);
}
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index 7de35bf297aa..e9034bf6e5ae 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -24,8 +24,7 @@
#include <linux/types.h>
#include <linux/mutex.h>
#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/fs.h>
+#include <linux/buffer_head.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@@ -95,44 +94,40 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
static void *get_comp_opts(struct super_block *sb, unsigned short flags)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
- void *comp_opts, *buffer = NULL;
- struct page *page;
+ void *buffer = NULL, *comp_opts;
struct squashfs_page_actor *actor = NULL;
int length = 0;
- if (!SQUASHFS_COMP_OPTS(flags))
- return squashfs_comp_opts(msblk, buffer, length);
-
/*
* Read decompressor specific options from file system if present
*/
-
- page = alloc_page(GFP_KERNEL);
- if (!page)
- return ERR_PTR(-ENOMEM);
-
- actor = squashfs_page_actor_init(&page, 1, 0, NULL);
- if (actor == NULL) {
- comp_opts = ERR_PTR(-ENOMEM);
- goto actor_error;
- }
-
- length = squashfs_read_data(sb,
- sizeof(struct squashfs_super_block), 0, NULL, actor);
-
- if (length < 0) {
- comp_opts = ERR_PTR(length);
- goto read_error;
+ if (SQUASHFS_COMP_OPTS(flags)) {
+ buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+ if (buffer == NULL) {
+ comp_opts = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ actor = squashfs_page_actor_init(&buffer, 1, 0);
+ if (actor == NULL) {
+ comp_opts = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ length = squashfs_read_data(sb,
+ sizeof(struct squashfs_super_block), 0, NULL, actor);
+
+ if (length < 0) {
+ comp_opts = ERR_PTR(length);
+ goto out;
+ }
}
- buffer = kmap_atomic(page);
comp_opts = squashfs_comp_opts(msblk, buffer, length);
- kunmap_atomic(buffer);
-read_error:
- squashfs_page_actor_free(actor, 0);
-actor_error:
- __free_page(page);
+out:
+ kfree(actor);
+ kfree(buffer);
return comp_opts;
}
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 377be131d23b..979da17cbbf3 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -47,16 +47,12 @@
#include <linux/string.h>
#include <linux/pagemap.h>
#include <linux/mutex.h>
-#include <linux/mm_inline.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
#include "squashfs_fs_i.h"
#include "squashfs.h"
-// Backported from 4.5
-#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
-
/*
* Locate cache slot in range [offset, index] for specified inode. If
* there's more than one return the slot closest to index.
@@ -446,21 +442,6 @@ static int squashfs_readpage_fragment(struct page *page)
return res;
}
-static int squashfs_readpages_fragment(struct page *page,
- struct list_head *readahead_pages, struct address_space *mapping)
-{
- if (!page) {
- page = lru_to_page(readahead_pages);
- list_del(&page->lru);
- if (add_to_page_cache_lru(page, mapping, page->index,
- mapping_gfp_constraint(mapping, GFP_KERNEL))) {
- put_page(page);
- return 0;
- }
- }
- return squashfs_readpage_fragment(page);
-}
-
static int squashfs_readpage_sparse(struct page *page, int index, int file_end)
{
struct inode *inode = page->mapping->host;
@@ -473,105 +454,54 @@ static int squashfs_readpage_sparse(struct page *page, int index, int file_end)
return 0;
}
-static int squashfs_readpages_sparse(struct page *page,
- struct list_head *readahead_pages, int index, int file_end,
- struct address_space *mapping)
-{
- if (!page) {
- page = lru_to_page(readahead_pages);
- list_del(&page->lru);
- if (add_to_page_cache_lru(page, mapping, page->index,
- mapping_gfp_constraint(mapping, GFP_KERNEL))) {
- put_page(page);
- return 0;
- }
- }
- return squashfs_readpage_sparse(page, index, file_end);
-}
-
-static int __squashfs_readpages(struct file *file, struct page *page,
- struct list_head *readahead_pages, unsigned int nr_pages,
- struct address_space *mapping)
+static int squashfs_readpage(struct file *file, struct page *page)
{
- struct inode *inode = mapping->host;
+ struct inode *inode = page->mapping->host;
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
int file_end = i_size_read(inode) >> msblk->block_log;
int res;
-
- do {
- struct page *cur_page = page ? page
- : lru_to_page(readahead_pages);
- int page_index = cur_page->index;
- int index = page_index >> (msblk->block_log - PAGE_CACHE_SHIFT);
-
- if (page_index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT))
- return 1;
-
- if (index < file_end || squashfs_i(inode)->fragment_block ==
- SQUASHFS_INVALID_BLK) {
- u64 block = 0;
- int bsize = read_blocklist(inode, index, &block);
-
- if (bsize < 0)
- return -1;
-
- if (bsize == 0) {
- res = squashfs_readpages_sparse(page,
- readahead_pages, index, file_end,
- mapping);
- } else {
- res = squashfs_readpages_block(page,
- readahead_pages, &nr_pages, mapping,
- page_index, block, bsize);
- }
- } else {
- res = squashfs_readpages_fragment(page,
- readahead_pages, mapping);
- }
- if (res)
- return 0;
- page = NULL;
- } while (readahead_pages && !list_empty(readahead_pages));
-
- return 0;
-}
-
-static int squashfs_readpage(struct file *file, struct page *page)
-{
- int ret;
+ void *pageaddr;
TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
- page->index, squashfs_i(page->mapping->host)->start);
+ page->index, squashfs_i(inode)->start);
- get_page(page);
+ if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT))
+ goto out;
- ret = __squashfs_readpages(file, page, NULL, 1, page->mapping);
- if (ret) {
- flush_dcache_page(page);
- if (ret < 0)
- SetPageError(page);
- else
- SetPageUptodate(page);
- zero_user_segment(page, 0, PAGE_CACHE_SIZE);
- unlock_page(page);
- put_page(page);
- }
+ if (index < file_end || squashfs_i(inode)->fragment_block ==
+ SQUASHFS_INVALID_BLK) {
+ u64 block = 0;
+ int bsize = read_blocklist(inode, index, &block);
+ if (bsize < 0)
+ goto error_out;
- return 0;
-}
+ if (bsize == 0)
+ res = squashfs_readpage_sparse(page, index, file_end);
+ else
+ res = squashfs_readpage_block(page, block, bsize);
+ } else
+ res = squashfs_readpage_fragment(page);
+
+ if (!res)
+ return 0;
+
+error_out:
+ SetPageError(page);
+out:
+ pageaddr = kmap_atomic(page);
+ memset(pageaddr, 0, PAGE_CACHE_SIZE);
+ kunmap_atomic(pageaddr);
+ flush_dcache_page(page);
+ if (!PageError(page))
+ SetPageUptodate(page);
+ unlock_page(page);
-static int squashfs_readpages(struct file *file, struct address_space *mapping,
- struct list_head *pages, unsigned int nr_pages)
-{
- TRACE("Entered squashfs_readpages, %u pages, first page index %lx\n",
- nr_pages, lru_to_page(pages)->index);
- __squashfs_readpages(file, NULL, pages, nr_pages, mapping);
return 0;
}
const struct address_space_operations squashfs_aops = {
- .readpage = squashfs_readpage,
- .readpages = squashfs_readpages,
+ .readpage = squashfs_readpage
};
diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c
new file mode 100644
index 000000000000..f2310d2a2019
--- /dev/null
+++ b/fs/squashfs/file_cache.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/mutex.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+/* Read separately compressed datablock and memcopy into page cache */
+int squashfs_readpage_block(struct page *page, u64 block, int bsize)
+{
+ struct inode *i = page->mapping->host;
+ struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
+ block, bsize);
+ int res = buffer->error;
+
+ if (res)
+ ERROR("Unable to read page, block %llx, size %x\n", block,
+ bsize);
+ else
+ squashfs_copy_cache(page, buffer, buffer->length, 0);
+
+ squashfs_cache_put(buffer);
+ return res;
+}
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index c97af4c6ccd0..43e7a7eddac0 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -13,7 +13,6 @@
#include <linux/string.h>
#include <linux/pagemap.h>
#include <linux/mutex.h>
-#include <linux/mm_inline.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@@ -21,139 +20,157 @@
#include "squashfs.h"
#include "page_actor.h"
-// Backported from 4.5
-#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
+static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
+ int pages, struct page **page);
-static void release_actor_pages(struct page **page, int pages, int error)
-{
- int i;
-
- for (i = 0; i < pages; i++) {
- if (!page[i])
- continue;
- flush_dcache_page(page[i]);
- if (!error)
- SetPageUptodate(page[i]);
- else {
- SetPageError(page[i]);
- zero_user_segment(page[i], 0, PAGE_CACHE_SIZE);
- }
- unlock_page(page[i]);
- put_page(page[i]);
- }
- kfree(page);
-}
+/* Read separately compressed datablock directly into page cache */
+int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
-/*
- * Create a "page actor" which will kmap and kunmap the
- * page cache pages appropriately within the decompressor
- */
-static struct squashfs_page_actor *actor_from_page_cache(
- unsigned int actor_pages, struct page *target_page,
- struct list_head *rpages, unsigned int *nr_pages, int start_index,
- struct address_space *mapping)
{
+ struct inode *inode = target_page->mapping->host;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+
+ int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+ int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+ int start_index = target_page->index & ~mask;
+ int end_index = start_index | mask;
+ int i, n, pages, missing_pages, bytes, res = -ENOMEM;
struct page **page;
struct squashfs_page_actor *actor;
- int i, n;
- gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
-
- page = kmalloc_array(actor_pages, sizeof(void *), GFP_KERNEL);
- if (!page)
- return NULL;
-
- for (i = 0, n = start_index; i < actor_pages; i++, n++) {
- if (target_page == NULL && rpages && !list_empty(rpages)) {
- struct page *cur_page = lru_to_page(rpages);
-
- if (cur_page->index < start_index + actor_pages) {
- list_del(&cur_page->lru);
- --(*nr_pages);
- if (add_to_page_cache_lru(cur_page, mapping,
- cur_page->index, gfp))
- put_page(cur_page);
- else
- target_page = cur_page;
- } else
- rpages = NULL;
- }
+ void *pageaddr;
+
+ if (end_index > file_end)
+ end_index = file_end;
+
+ pages = end_index - start_index + 1;
- if (target_page && target_page->index == n) {
- page[i] = target_page;
- target_page = NULL;
- } else {
- page[i] = grab_cache_page_nowait(mapping, n);
- if (page[i] == NULL)
- continue;
+ page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL);
+ if (page == NULL)
+ return res;
+
+ /*
+ * Create a "page actor" which will kmap and kunmap the
+ * page cache pages appropriately within the decompressor
+ */
+ actor = squashfs_page_actor_init_special(page, pages, 0);
+ if (actor == NULL)
+ goto out;
+
+ /* Try to grab all the pages covered by the Squashfs block */
+ for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) {
+ page[i] = (n == target_page->index) ? target_page :
+ grab_cache_page_nowait(target_page->mapping, n);
+
+ if (page[i] == NULL) {
+ missing_pages++;
+ continue;
}
if (PageUptodate(page[i])) {
unlock_page(page[i]);
- put_page(page[i]);
+ page_cache_release(page[i]);
page[i] = NULL;
+ missing_pages++;
}
}
- actor = squashfs_page_actor_init(page, actor_pages, 0,
- release_actor_pages);
- if (!actor) {
- release_actor_pages(page, actor_pages, -ENOMEM);
- kfree(page);
- return NULL;
+ if (missing_pages) {
+ /*
+ * Couldn't get one or more pages, this page has either
+ * been VM reclaimed, but others are still in the page cache
+ * and uptodate, or we're racing with another thread in
+ * squashfs_readpage also trying to grab them. Fall back to
+ * using an intermediate buffer.
+ */
+ res = squashfs_read_cache(target_page, block, bsize, pages,
+ page);
+ if (res < 0)
+ goto mark_errored;
+
+ goto out;
}
- return actor;
-}
-int squashfs_readpages_block(struct page *target_page,
- struct list_head *readahead_pages,
- unsigned int *nr_pages,
- struct address_space *mapping,
- int page_index, u64 block, int bsize)
+ /* Decompress directly into the page cache buffers */
+ res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
+ if (res < 0)
+ goto mark_errored;
+
+ /* Last page may have trailing bytes not filled */
+ bytes = res % PAGE_CACHE_SIZE;
+ if (bytes) {
+ pageaddr = kmap_atomic(page[pages - 1]);
+ memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
+ kunmap_atomic(pageaddr);
+ }
-{
- struct squashfs_page_actor *actor;
- struct inode *inode = mapping->host;
- struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
- int start_index, end_index, file_end, actor_pages, res;
- int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+ /* Mark pages as uptodate, unlock and release */
+ for (i = 0; i < pages; i++) {
+ flush_dcache_page(page[i]);
+ SetPageUptodate(page[i]);
+ unlock_page(page[i]);
+ if (page[i] != target_page)
+ page_cache_release(page[i]);
+ }
- /*
- * If readpage() is called on an uncompressed datablock, we can just
- * read the pages instead of fetching the whole block.
- * This greatly improves the performance when a process keep doing
- * random reads because we only fetch the necessary data.
- * The readahead algorithm will take care of doing speculative reads
- * if necessary.
- * We can't read more than 1 block even if readahead provides use more
- * pages because we don't know yet if the next block is compressed or
- * not.
+ kfree(actor);
+ kfree(page);
+
+ return 0;
+
+mark_errored:
+ /* Decompression failed, mark pages as errored. Target_page is
+ * dealt with by the caller
*/
- if (bsize && !SQUASHFS_COMPRESSED_BLOCK(bsize)) {
- u64 block_end = block + msblk->block_size;
-
- block += (page_index & mask) * PAGE_CACHE_SIZE;
- actor_pages = (block_end - block) / PAGE_CACHE_SIZE;
- if (*nr_pages < actor_pages)
- actor_pages = *nr_pages;
- start_index = page_index;
- bsize = min_t(int, bsize, (PAGE_CACHE_SIZE * actor_pages)
- | SQUASHFS_COMPRESSED_BIT_BLOCK);
- } else {
- file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
- start_index = page_index & ~mask;
- end_index = start_index | mask;
- if (end_index > file_end)
- end_index = file_end;
- actor_pages = end_index - start_index + 1;
+ for (i = 0; i < pages; i++) {
+ if (page[i] == NULL || page[i] == target_page)
+ continue;
+ flush_dcache_page(page[i]);
+ SetPageError(page[i]);
+ unlock_page(page[i]);
+ page_cache_release(page[i]);
}
- actor = actor_from_page_cache(actor_pages, target_page,
- readahead_pages, nr_pages, start_index,
- mapping);
- if (!actor)
- return -ENOMEM;
+out:
+ kfree(actor);
+ kfree(page);
+ return res;
+}
+
+
+static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
+ int pages, struct page **page)
+{
+ struct inode *i = target_page->mapping->host;
+ struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
+ block, bsize);
+ int bytes = buffer->length, res = buffer->error, n, offset = 0;
+ void *pageaddr;
+
+ if (res) {
+ ERROR("Unable to read page, block %llx, size %x\n", block,
+ bsize);
+ goto out;
+ }
+
+ for (n = 0; n < pages && bytes > 0; n++,
+ bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
+ int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
+
+ if (page[n] == NULL)
+ continue;
+
+ pageaddr = kmap_atomic(page[n]);
+ squashfs_copy_data(pageaddr, buffer, offset, avail);
+ memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
+ kunmap_atomic(pageaddr);
+ flush_dcache_page(page[n]);
+ SetPageUptodate(page[n]);
+ unlock_page(page[n]);
+ if (page[n] != target_page)
+ page_cache_release(page[n]);
+ }
- res = squashfs_read_data_async(inode->i_sb, block, bsize, NULL,
- actor);
- return res < 0 ? res : 0;
+out:
+ squashfs_cache_put(buffer);
+ return res;
}
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index df4fa3c7ddd0..c31e2bc9c081 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c
@@ -94,17 +94,39 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
struct buffer_head **bh, int b, int offset, int length,
struct squashfs_page_actor *output)
{
- int res;
- size_t dest_len = output->length;
struct squashfs_lz4 *stream = strm;
+ void *buff = stream->input, *data;
+ int avail, i, bytes = length, res;
+ size_t dest_len = output->length;
+
+ for (i = 0; i < b; i++) {
+ avail = min(bytes, msblk->devblksize - offset);
+ memcpy(buff, bh[i]->b_data + offset, avail);
+ buff += avail;
+ bytes -= avail;
+ offset = 0;
+ put_bh(bh[i]);
+ }
- squashfs_bh_to_buf(bh, b, stream->input, offset, length,
- msblk->devblksize);
res = lz4_decompress_unknownoutputsize(stream->input, length,
stream->output, &dest_len);
if (res)
return -EIO;
- squashfs_buf_to_actor(stream->output, output, dest_len);
+
+ bytes = dest_len;
+ data = squashfs_first_page(output);
+ buff = stream->output;
+ while (data) {
+ if (bytes <= PAGE_CACHE_SIZE) {
+ memcpy(data, buff, bytes);
+ break;
+ }
+ memcpy(data, buff, PAGE_CACHE_SIZE);
+ buff += PAGE_CACHE_SIZE;
+ bytes -= PAGE_CACHE_SIZE;
+ data = squashfs_next_page(output);
+ }
+ squashfs_finish_page(output);
return dest_len;
}
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 2c844d53a59e..244b9fbfff7b 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -79,19 +79,45 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
struct buffer_head **bh, int b, int offset, int length,
struct squashfs_page_actor *output)
{
- int res;
- size_t out_len = output->length;
struct squashfs_lzo *stream = strm;
+ void *buff = stream->input, *data;
+ int avail, i, bytes = length, res;
+ size_t out_len = output->length;
+
+ for (i = 0; i < b; i++) {
+ avail = min(bytes, msblk->devblksize - offset);
+ memcpy(buff, bh[i]->b_data + offset, avail);
+ buff += avail;
+ bytes -= avail;
+ offset = 0;
+ put_bh(bh[i]);
+ }
- squashfs_bh_to_buf(bh, b, stream->input, offset, length,
- msblk->devblksize);
res = lzo1x_decompress_safe(stream->input, (size_t)length,
stream->output, &out_len);
if (res != LZO_E_OK)
- return -EIO;
- squashfs_buf_to_actor(stream->output, output, out_len);
+ goto failed;
- return out_len;
+ res = bytes = (int)out_len;
+ data = squashfs_first_page(output);
+ buff = stream->output;
+ while (data) {
+ if (bytes <= PAGE_CACHE_SIZE) {
+ memcpy(data, buff, bytes);
+ break;
+ } else {
+ memcpy(data, buff, PAGE_CACHE_SIZE);
+ buff += PAGE_CACHE_SIZE;
+ bytes -= PAGE_CACHE_SIZE;
+ data = squashfs_next_page(output);
+ }
+ }
+ squashfs_finish_page(output);
+
+ return res;
+
+failed:
+ return -EIO;
}
const struct squashfs_decompressor squashfs_lzo_comp_ops = {
diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
index 53863508e400..5a1c11f56441 100644
--- a/fs/squashfs/page_actor.c
+++ b/fs/squashfs/page_actor.c
@@ -9,145 +9,92 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
-#include <linux/buffer_head.h>
#include "page_actor.h"
-struct squashfs_page_actor *squashfs_page_actor_init(struct page **page,
- int pages, int length, void (*release_pages)(struct page **, int, int))
-{
- struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
-
- if (actor == NULL)
- return NULL;
+/*
+ * This file contains implementations of page_actor for decompressing into
+ * an intermediate buffer, and for decompressing directly into the
+ * page cache.
+ *
+ * Calling code should avoid sleeping between calls to squashfs_first_page()
+ * and squashfs_finish_page().
+ */
- actor->length = length ? : pages * PAGE_CACHE_SIZE;
- actor->page = page;
- actor->pages = pages;
- actor->next_page = 0;
- actor->pageaddr = NULL;
- actor->release_pages = release_pages;
- return actor;
+/* Implementation of page_actor for decompressing into intermediate buffer */
+static void *cache_first_page(struct squashfs_page_actor *actor)
+{
+ actor->next_page = 1;
+ return actor->buffer[0];
}
-void squashfs_page_actor_free(struct squashfs_page_actor *actor, int error)
+static void *cache_next_page(struct squashfs_page_actor *actor)
{
- if (!actor)
- return;
+ if (actor->next_page == actor->pages)
+ return NULL;
- if (actor->release_pages)
- actor->release_pages(actor->page, actor->pages, error);
- kfree(actor);
+ return actor->buffer[actor->next_page++];
}
-void squashfs_actor_to_buf(struct squashfs_page_actor *actor, void *buf,
- int length)
+static void cache_finish_page(struct squashfs_page_actor *actor)
{
- void *pageaddr;
- int pos = 0, avail, i;
-
- for (i = 0; i < actor->pages && pos < length; ++i) {
- avail = min_t(int, length - pos, PAGE_CACHE_SIZE);
- if (actor->page[i]) {
- pageaddr = kmap_atomic(actor->page[i]);
- memcpy(buf + pos, pageaddr, avail);
- kunmap_atomic(pageaddr);
- }
- pos += avail;
- }
+ /* empty */
}
-void squashfs_buf_to_actor(void *buf, struct squashfs_page_actor *actor,
- int length)
+struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
+ int pages, int length)
{
- void *pageaddr;
- int pos = 0, avail, i;
-
- for (i = 0; i < actor->pages && pos < length; ++i) {
- avail = min_t(int, length - pos, PAGE_CACHE_SIZE);
- if (actor->page[i]) {
- pageaddr = kmap_atomic(actor->page[i]);
- memcpy(pageaddr, buf + pos, avail);
- kunmap_atomic(pageaddr);
- }
- pos += avail;
- }
+ struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+
+ if (actor == NULL)
+ return NULL;
+
+ actor->length = length ? : pages * PAGE_CACHE_SIZE;
+ actor->buffer = buffer;
+ actor->pages = pages;
+ actor->next_page = 0;
+ actor->squashfs_first_page = cache_first_page;
+ actor->squashfs_next_page = cache_next_page;
+ actor->squashfs_finish_page = cache_finish_page;
+ return actor;
}
-void squashfs_bh_to_actor(struct buffer_head **bh, int nr_buffers,
- struct squashfs_page_actor *actor, int offset, int length, int blksz)
+/* Implementation of page_actor for decompressing directly into page cache. */
+static void *direct_first_page(struct squashfs_page_actor *actor)
{
- void *kaddr = NULL;
- int bytes = 0, pgoff = 0, b = 0, p = 0, avail, i;
-
- while (bytes < length) {
- if (actor->page[p]) {
- kaddr = kmap_atomic(actor->page[p]);
- while (pgoff < PAGE_CACHE_SIZE && bytes < length) {
- avail = min_t(int, blksz - offset,
- PAGE_CACHE_SIZE - pgoff);
- memcpy(kaddr + pgoff, bh[b]->b_data + offset,
- avail);
- pgoff += avail;
- bytes += avail;
- offset = (offset + avail) % blksz;
- if (!offset) {
- put_bh(bh[b]);
- ++b;
- }
- }
- kunmap_atomic(kaddr);
- pgoff = 0;
- } else {
- for (i = 0; i < PAGE_CACHE_SIZE / blksz; ++i) {
- if (bh[b])
- put_bh(bh[b]);
- ++b;
- }
- bytes += PAGE_CACHE_SIZE;
- }
- ++p;
- }
+ actor->next_page = 1;
+ return actor->pageaddr = kmap_atomic(actor->page[0]);
}
-void squashfs_bh_to_buf(struct buffer_head **bh, int nr_buffers, void *buf,
- int offset, int length, int blksz)
+static void *direct_next_page(struct squashfs_page_actor *actor)
{
- int i, avail, bytes = 0;
-
- for (i = 0; i < nr_buffers && bytes < length; ++i) {
- avail = min_t(int, length - bytes, blksz - offset);
- if (bh[i]) {
- memcpy(buf + bytes, bh[i]->b_data + offset, avail);
- put_bh(bh[i]);
- }
- bytes += avail;
- offset = 0;
- }
+ if (actor->pageaddr)
+ kunmap_atomic(actor->pageaddr);
+
+ return actor->pageaddr = actor->next_page == actor->pages ? NULL :
+ kmap_atomic(actor->page[actor->next_page++]);
}
-void free_page_array(struct page **page, int nr_pages)
+static void direct_finish_page(struct squashfs_page_actor *actor)
{
- int i;
-
- for (i = 0; i < nr_pages; ++i)
- __free_page(page[i]);
- kfree(page);
+ if (actor->pageaddr)
+ kunmap_atomic(actor->pageaddr);
}
-struct page **alloc_page_array(int nr_pages, int gfp_mask)
+struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page,
+ int pages, int length)
{
- int i;
- struct page **page;
+ struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
- page = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
- if (!page)
+ if (actor == NULL)
return NULL;
- for (i = 0; i < nr_pages; ++i) {
- page[i] = alloc_page(gfp_mask);
- if (!page[i]) {
- free_page_array(page, i);
- return NULL;
- }
- }
- return page;
+
+ actor->length = length ? : pages * PAGE_CACHE_SIZE;
+ actor->page = page;
+ actor->pages = pages;
+ actor->next_page = 0;
+ actor->pageaddr = NULL;
+ actor->squashfs_first_page = direct_first_page;
+ actor->squashfs_next_page = direct_next_page;
+ actor->squashfs_finish_page = direct_finish_page;
+ return actor;
}
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
index aa1ed790b5a3..26dd82008b82 100644
--- a/fs/squashfs/page_actor.h
+++ b/fs/squashfs/page_actor.h
@@ -5,61 +5,77 @@
* Phillip Lougher <phillip@squashfs.org.uk>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level squashfsory.
+ * the COPYING file in the top-level directory.
*/
+#ifndef CONFIG_SQUASHFS_FILE_DIRECT
struct squashfs_page_actor {
- struct page **page;
- void *pageaddr;
+ void **page;
int pages;
int length;
int next_page;
- void (*release_pages)(struct page **, int, int);
};
-extern struct squashfs_page_actor *squashfs_page_actor_init(struct page **,
- int, int, void (*)(struct page **, int, int));
-extern void squashfs_page_actor_free(struct squashfs_page_actor *, int);
+static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page,
+ int pages, int length)
+{
+ struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
-extern void squashfs_actor_to_buf(struct squashfs_page_actor *, void *, int);
-extern void squashfs_buf_to_actor(void *, struct squashfs_page_actor *, int);
-extern void squashfs_bh_to_actor(struct buffer_head **, int,
- struct squashfs_page_actor *, int, int, int);
-extern void squashfs_bh_to_buf(struct buffer_head **, int, void *, int, int,
- int);
+ if (actor == NULL)
+ return NULL;
+
+ actor->length = length ? : pages * PAGE_CACHE_SIZE;
+ actor->page = page;
+ actor->pages = pages;
+ actor->next_page = 0;
+ return actor;
+}
-/*
- * Calling code should avoid sleeping between calls to squashfs_first_page()
- * and squashfs_finish_page().
- */
static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
{
actor->next_page = 1;
- return actor->pageaddr = actor->page[0] ? kmap_atomic(actor->page[0])
- : NULL;
+ return actor->page[0];
}
static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
{
- if (!IS_ERR_OR_NULL(actor->pageaddr))
- kunmap_atomic(actor->pageaddr);
-
- if (actor->next_page == actor->pages)
- return actor->pageaddr = ERR_PTR(-ENODATA);
-
- actor->pageaddr = actor->page[actor->next_page] ?
- kmap_atomic(actor->page[actor->next_page]) : NULL;
- ++actor->next_page;
- return actor->pageaddr;
+ return actor->next_page == actor->pages ? NULL :
+ actor->page[actor->next_page++];
}
static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
{
- if (!IS_ERR_OR_NULL(actor->pageaddr))
- kunmap_atomic(actor->pageaddr);
+ /* empty */
}
+#else
+struct squashfs_page_actor {
+ union {
+ void **buffer;
+ struct page **page;
+ };
+ void *pageaddr;
+ void *(*squashfs_first_page)(struct squashfs_page_actor *);
+ void *(*squashfs_next_page)(struct squashfs_page_actor *);
+ void (*squashfs_finish_page)(struct squashfs_page_actor *);
+ int pages;
+ int length;
+ int next_page;
+};
-extern struct page **alloc_page_array(int, int);
-extern void free_page_array(struct page **, int);
-
+extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int);
+extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page
+ **, int, int);
+static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
+{
+ return actor->squashfs_first_page(actor);
+}
+static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
+{
+ return actor->squashfs_next_page(actor);
+}
+static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
+{
+ actor->squashfs_finish_page(actor);
+}
+#endif
#endif
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 6093579c6c5d..887d6d270080 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -28,14 +28,8 @@
#define WARNING(s, args...) pr_warn("SQUASHFS: "s, ## args)
/* block.c */
-extern int squashfs_init_read_wq(void);
-extern void squashfs_destroy_read_wq(void);
extern int squashfs_read_data(struct super_block *, u64, int, u64 *,
struct squashfs_page_actor *);
-extern int squashfs_read_data(struct super_block *, u64, int, u64 *,
- struct squashfs_page_actor *);
-extern int squashfs_read_data_async(struct super_block *, u64, int, u64 *,
- struct squashfs_page_actor *);
/* cache.c */
extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
@@ -76,9 +70,8 @@ extern __le64 *squashfs_read_fragment_index_table(struct super_block *,
void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int,
int);
-/* file_direct.c */
-extern int squashfs_readpages_block(struct page *, struct list_head *,
- unsigned int *, struct address_space *, int, u64, int);
+/* file_xxx.c */
+extern int squashfs_readpage_block(struct page *, u64, int);
/* id.c */
extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *);
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 9dc66d0003a6..5234c19a0eab 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -49,7 +49,7 @@ struct squashfs_cache_entry {
int num_waiters;
wait_queue_head_t wait_queue;
struct squashfs_cache *cache;
- struct page **page;
+ void **data;
struct squashfs_page_actor *actor;
};
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 9602ce9ddfc7..44500dcf1805 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -445,15 +445,9 @@ static int __init init_squashfs_fs(void)
if (err)
return err;
- if (!squashfs_init_read_wq()) {
- destroy_inodecache();
- return -ENOMEM;
- }
-
err = register_filesystem(&squashfs_fs_type);
if (err) {
destroy_inodecache();
- squashfs_destroy_read_wq();
return err;
}
@@ -467,7 +461,6 @@ static void __exit exit_squashfs_fs(void)
{
unregister_filesystem(&squashfs_fs_type);
destroy_inodecache();
- squashfs_destroy_read_wq();
}
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index 14cd373e1897..c609624e4b8a 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -55,7 +55,7 @@ static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk,
struct comp_opts *opts;
int err = 0, n;
- opts = kmalloc(sizeof(*opts), GFP_ATOMIC);
+ opts = kmalloc(sizeof(*opts), GFP_KERNEL);
if (opts == NULL) {
err = -ENOMEM;
goto out2;
@@ -136,7 +136,6 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
enum xz_ret xz_err;
int avail, total = 0, k = 0;
struct squashfs_xz *stream = strm;
- void *buf = NULL;
xz_dec_reset(stream->state);
stream->buf.in_pos = 0;
@@ -157,20 +156,12 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
if (stream->buf.out_pos == stream->buf.out_size) {
stream->buf.out = squashfs_next_page(output);
- if (!IS_ERR(stream->buf.out)) {
+ if (stream->buf.out != NULL) {
stream->buf.out_pos = 0;
total += PAGE_CACHE_SIZE;
}
}
- if (!stream->buf.out) {
- if (!buf) {
- buf = kmalloc(PAGE_CACHE_SIZE, GFP_ATOMIC);
- if (!buf)
- goto out;
- }
- stream->buf.out = buf;
- }
xz_err = xz_dec_run(stream->state, &stream->buf);
if (stream->buf.in_pos == stream->buf.in_size && k < b)
@@ -182,13 +173,11 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
if (xz_err != XZ_STREAM_END || k < b)
goto out;
- kfree(buf);
return total + stream->buf.out_pos;
out:
for (; k < b; k++)
put_bh(bh[k]);
- kfree(buf);
return -EIO;
}
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 09c892b5308e..8727caba6882 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -66,7 +66,6 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
struct buffer_head **bh, int b, int offset, int length,
struct squashfs_page_actor *output)
{
- void *buf = NULL;
int zlib_err, zlib_init = 0, k = 0;
z_stream *stream = strm;
@@ -85,19 +84,10 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
if (stream->avail_out == 0) {
stream->next_out = squashfs_next_page(output);
- if (!IS_ERR(stream->next_out))
+ if (stream->next_out != NULL)
stream->avail_out = PAGE_CACHE_SIZE;
}
- if (!stream->next_out) {
- if (!buf) {
- buf = kmalloc(PAGE_CACHE_SIZE, GFP_ATOMIC);
- if (!buf)
- goto out;
- }
- stream->next_out = buf;
- }
-
if (!zlib_init) {
zlib_err = zlib_inflateInit(stream);
if (zlib_err != Z_OK) {
@@ -125,13 +115,11 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
if (k < b)
goto out;
- kfree(buf);
return stream->total_out;
out:
for (; k < b; k++)
put_bh(bh[k]);
- kfree(buf);
return -EIO;
}
diff --git a/fs/super.c b/fs/super.c
index eb27955a229a..689ec96c43f8 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -982,7 +982,7 @@ static int set_bdev_super(struct super_block *s, void *data)
* We set the bdi here to the queue backing, file systems can
* overwrite this in ->fill_super()
*/
- s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info;
+ s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info;
return 0;
}
diff --git a/fs/timerfd.c b/fs/timerfd.c
index ab8dd1538381..147b72349d3b 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -44,6 +44,8 @@ struct timerfd_ctx {
bool might_cancel;
};
+static atomic_t instance_count = ATOMIC_INIT(0);
+
static LIST_HEAD(cancel_list);
static DEFINE_SPINLOCK(cancel_lock);
@@ -387,6 +389,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
{
int ufd;
struct timerfd_ctx *ctx;
+ char task_comm_buf[TASK_COMM_LEN];
+ char file_name_buf[32];
+ int instance;
/* Check the TFD_* constants for consistency. */
BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
@@ -423,7 +428,12 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
- ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
+ instance = atomic_inc_return(&instance_count);
+ get_task_comm(task_comm_buf, current);
+ snprintf(file_name_buf, sizeof(file_name_buf), "[timerfd%d_%.*s]",
+ instance, (int)sizeof(task_comm_buf), task_comm_buf);
+
+ ufd = anon_inode_getfd(file_name_buf, &timerfd_fops, ctx,
O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
if (ufd < 0)
kfree(ctx);
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
index 92a8491a8f8c..c0a95e393347 100644
--- a/fs/ubifs/key.h
+++ b/fs/ubifs/key.h
@@ -34,6 +34,12 @@
* node. We use "r5" hash borrowed from reiserfs.
*/
+/*
+ * Lot's of the key helpers require a struct ubifs_info *c as the first parameter.
+ * But we are not using it at all currently. That's designed for future extensions of
+ * different c->key_format. But right now, there is only one key type, UBIFS_SIMPLE_KEY_FMT.
+ */
+
#ifndef __UBIFS_KEY_H__
#define __UBIFS_KEY_H__
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index b5bf23b34241..de6f82d4eda2 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -268,7 +268,7 @@ static int check_namespace(const struct qstr *nm)
if (!strncmp(nm->name, XATTR_TRUSTED_PREFIX,
XATTR_TRUSTED_PREFIX_LEN)) {
- if (nm->name[sizeof(XATTR_TRUSTED_PREFIX) - 1] == '\0')
+ if (nm->name[XATTR_TRUSTED_PREFIX_LEN] == '\0')
return -EINVAL;
type = TRUSTED_XATTR;
} else if (!strncmp(nm->name, XATTR_USER_PREFIX,
@@ -278,7 +278,7 @@ static int check_namespace(const struct qstr *nm)
type = USER_XATTR;
} else if (!strncmp(nm->name, XATTR_SECURITY_PREFIX,
XATTR_SECURITY_PREFIX_LEN)) {
- if (nm->name[sizeof(XATTR_SECURITY_PREFIX) - 1] == '\0')
+ if (nm->name[XATTR_SECURITY_PREFIX_LEN] == '\0')
return -EINVAL;
type = SECURITY_XATTR;
} else
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 074560ad190e..af6fd442b9d8 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -611,7 +611,8 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
if (fibh.sbh != fibh.ebh)
brelse(fibh.ebh);
brelse(fibh.sbh);
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
return 0;
}
@@ -721,7 +722,8 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
inc_nlink(dir);
dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb);
mark_inode_dirty(dir);
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
if (fibh.sbh != fibh.ebh)
brelse(fibh.ebh);
brelse(fibh.sbh);
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 2ec7689c25cf..47966554317c 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -38,7 +38,8 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode)
{
int err = ufs_add_link(dentry, inode);
if (!err) {
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
return 0;
}
inode_dec_link_count(inode);
@@ -190,7 +191,8 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
if (err)
goto out_fail;
- d_instantiate_new(dentry, inode);
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
return 0;
out_fail:
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 9d20f75c1209..85877c5da0df 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -805,6 +805,18 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
goto out_unlock;
/*
+ * UFFDIO_COPY will fill file holes even without
+ * PROT_WRITE. This check enforces that if this is a
+ * MAP_SHARED, the process has write permission to the backing
+ * file. If VM_MAYWRITE is set it also enforces that on a
+ * MAP_SHARED vma: there is no F_WRITE_SEAL and no further
+ * F_WRITE_SEAL can be taken until the vma is destroyed.
+ */
+ ret = -EPERM;
+ if (unlikely(!(cur->vm_flags & VM_MAYWRITE)))
+ goto out_unlock;
+
+ /*
* Check that this vma isn't already owned by a
* different userfaultfd. We can't allow more than one
* userfaultfd to own a single vma simultaneously or we
@@ -829,6 +841,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
BUG_ON(vma->vm_ops);
BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
vma->vm_userfaultfd_ctx.ctx != ctx);
+ WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
/*
* Nothing to do: this vma is already registered into this
@@ -970,6 +983,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
cond_resched();
BUG_ON(vma->vm_ops);
+ WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
/*
* Nothing to do: this vma is already registered into this
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index c5101a3295d8..62ba66e1c598 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -289,8 +289,10 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (type == ACL_TYPE_ACCESS) {
umode_t mode;
-
+ struct posix_acl *old_acl = acl;
error = posix_acl_update_mode(inode, &mode, &acl);
+ if (!acl)
+ posix_acl_release(old_acl);
if (error)
return error;
error = xfs_set_mode(inode, mode);